diff --git a/.github/ISSUE_TEMPLATE/96_installation-issues.md b/.github/ISSUE_TEMPLATE/96_installation-issues.md new file mode 100644 index 00000000000..c322ccc92ce --- /dev/null +++ b/.github/ISSUE_TEMPLATE/96_installation-issues.md @@ -0,0 +1,29 @@ +--- +name: Installation issue +about: Issue with ClickHouse installation from https://clickhouse.com/docs/en/install/ +title: '' +labels: comp-install +assignees: '' + +--- + +**Installation type** + +Packages, docker, single binary, curl? + +**Source of the ClickHouse** + +A link to the source. Or the command you've tried + +**Expected result** + +What you expected + +**The actual result** + +What you get + +**How to reproduce** + +* For Linux-based operating systems: provide a script for clear docker container from the official image +* For anything else: steps to reproduce on as much as possible clear system diff --git a/.github/workflows/backport_branches.yml b/.github/workflows/backport_branches.yml index 4c8d023f2ec..30a77a9b27f 100644 --- a/.github/workflows/backport_branches.yml +++ b/.github/workflows/backport_branches.yml @@ -466,6 +466,7 @@ jobs: - BuilderDebTsan - BuilderDebDebug runs-on: [self-hosted, style-checker] + if: ${{ success() || failure() }} steps: - name: Set envs run: | @@ -504,6 +505,7 @@ jobs: - BuilderBinDarwin - BuilderBinDarwinAarch64 runs-on: [self-hosted, style-checker] + if: ${{ success() || failure() }} steps: - name: Set envs run: | diff --git a/.github/workflows/cancel.yml b/.github/workflows/cancel.yml index cb06d853219..3c2be767ad2 100644 --- a/.github/workflows/cancel.yml +++ b/.github/workflows/cancel.yml @@ -6,7 +6,7 @@ env: on: # yamllint disable-line rule:truthy workflow_run: - workflows: ["PullRequestCI", "ReleaseCI", "DocsCheck", "BackportPR"] + workflows: ["PullRequestCI", "ReleaseBranchCI", "DocsCheck", "BackportPR"] types: - requested jobs: diff --git a/.github/workflows/debug.yml b/.github/workflows/debug.yml index fa980a95a39..993fa8c0d07 100644 --- a/.github/workflows/debug.yml +++ b/.github/workflows/debug.yml @@ -2,7 +2,7 @@ name: Debug 'on': - [push, pull_request, release, workflow_dispatch] + [push, pull_request, release, workflow_dispatch, workflow_call] jobs: DebugInfo: diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml index 3d22cb984dd..3d43a960534 100644 --- a/.github/workflows/master.yml +++ b/.github/workflows/master.yml @@ -974,6 +974,7 @@ jobs: - BuilderDebTsan - BuilderDebUBsan runs-on: [self-hosted, style-checker] + if: ${{ success() || failure() }} steps: - name: Set envs run: | @@ -1021,6 +1022,7 @@ jobs: - BuilderBinClangTidy - BuilderDebShared runs-on: [self-hosted, style-checker] + if: ${{ success() || failure() }} steps: - name: Set envs run: | @@ -2992,6 +2994,77 @@ jobs: docker ps --quiet | xargs --no-run-if-empty docker kill ||: docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: sudo rm -fr "$TEMP_PATH" +############################################################################################## +###################################### SQLANCER FUZZERS ###################################### +############################################################################################## + SQLancerTestRelease: + needs: [BuilderDebRelease] + runs-on: [self-hosted, fuzzer-unit-tester] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/sqlancer_release + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=SQLancer (release) + REPO_COPY=${{runner.temp}}/sqlancer_release/ClickHouse + EOF + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{ env.REPORTS_PATH }} + - name: Clear repository + run: | + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" + - name: Check out repository code + uses: actions/checkout@v2 + - name: SQLancer + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 sqlancer_check.py "$CHECK_NAME" + - name: Cleanup + if: always() + run: | + docker ps --quiet | xargs --no-run-if-empty docker kill ||: + docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: + sudo rm -fr "$TEMP_PATH" + SQLancerTestDebug: + needs: [BuilderDebDebug] + runs-on: [self-hosted, fuzzer-unit-tester] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/sqlancer_debug + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=SQLancer (debug) + REPO_COPY=${{runner.temp}}/sqlancer_debug/ClickHouse + EOF + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{ env.REPORTS_PATH }} + - name: Clear repository + run: | + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" + - name: Check out repository code + uses: actions/checkout@v2 + - name: SQLancer + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 sqlancer_check.py "$CHECK_NAME" + - name: Cleanup + if: always() + run: | + docker ps --quiet | xargs --no-run-if-empty docker kill ||: + docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: + sudo rm -fr "$TEMP_PATH" FinishCheck: needs: - DockerHubPush @@ -3051,6 +3124,8 @@ jobs: - UnitTestsUBsan - UnitTestsReleaseClang - SharedBuildSmokeTest + - SQLancerTestRelease + - SQLancerTestDebug runs-on: [self-hosted, style-checker] steps: - name: Clear repository diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml index 801f7eda94a..7dff1e205a1 100644 --- a/.github/workflows/nightly.yml +++ b/.github/workflows/nightly.yml @@ -10,6 +10,9 @@ env: workflow_dispatch: jobs: + Debug: + # The task for having a preserved ENV and event.json for later investigation + uses: ./.github/workflows/debug.yml DockerHubPushAarch64: runs-on: [self-hosted, style-checker-aarch64] steps: @@ -122,3 +125,58 @@ jobs: docker ps --quiet | xargs --no-run-if-empty docker kill ||: docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" + SonarCloud: + runs-on: [self-hosted, builder] + env: + SONAR_SCANNER_VERSION: 4.7.0.2747 + SONAR_SERVER_URL: "https://sonarcloud.io" + BUILD_WRAPPER_OUT_DIR: build_wrapper_output_directory # Directory where build-wrapper output will be placed + CC: clang-15 + CXX: clang++-15 + steps: + - uses: actions/checkout@v2 + with: + fetch-depth: 0 # Shallow clones should be disabled for a better relevancy of analysis + submodules: true + - name: Set up JDK 11 + uses: actions/setup-java@v1 + with: + java-version: 11 + - name: Download and set up sonar-scanner + env: + SONAR_SCANNER_DOWNLOAD_URL: https://binaries.sonarsource.com/Distribution/sonar-scanner-cli/sonar-scanner-cli-${{ env.SONAR_SCANNER_VERSION }}-linux.zip + run: | + mkdir -p "$HOME/.sonar" + curl -sSLo "$HOME/.sonar/sonar-scanner.zip" "${{ env.SONAR_SCANNER_DOWNLOAD_URL }}" + unzip -o "$HOME/.sonar/sonar-scanner.zip" -d "$HOME/.sonar/" + echo "$HOME/.sonar/sonar-scanner-${{ env.SONAR_SCANNER_VERSION }}-linux/bin" >> "$GITHUB_PATH" + - name: Download and set up build-wrapper + env: + BUILD_WRAPPER_DOWNLOAD_URL: ${{ env.SONAR_SERVER_URL }}/static/cpp/build-wrapper-linux-x86.zip + run: | + curl -sSLo "$HOME/.sonar/build-wrapper-linux-x86.zip" "${{ env.BUILD_WRAPPER_DOWNLOAD_URL }}" + unzip -o "$HOME/.sonar/build-wrapper-linux-x86.zip" -d "$HOME/.sonar/" + echo "$HOME/.sonar/build-wrapper-linux-x86" >> "$GITHUB_PATH" + - name: Set Up Build Tools + run: | + sudo apt-get update + sudo apt-get install -yq git cmake ccache python3 ninja-build + sudo bash -c "$(wget -O - https://apt.llvm.org/llvm.sh)" + - name: Run build-wrapper + run: | + mkdir build + cd build + cmake .. + cd .. + build-wrapper-linux-x86-64 --out-dir ${{ env.BUILD_WRAPPER_OUT_DIR }} cmake --build build/ + - name: Run sonar-scanner + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + SONAR_TOKEN: ${{ secrets.SONAR_TOKEN }} + run: | + sonar-scanner \ + --define sonar.host.url="${{ env.SONAR_SERVER_URL }}" \ + --define sonar.cfamily.build-wrapper-output="${{ env.BUILD_WRAPPER_OUT_DIR }}" \ + --define sonar.projectKey="ClickHouse_ClickHouse" \ + --define sonar.organization="clickhouse-java" \ + --define sonar.exclusions="**/*.java,**/*.ts,**/*.js,**/*.css,**/*.sql" diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index 2795dc62d6d..09ca64977f0 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -112,7 +112,7 @@ jobs: StyleCheck: needs: DockerHubPush runs-on: [self-hosted, style-checker] - if: ${{ success() || failure() }} + if: ${{ success() || failure() || always() }} steps: - name: Set envs run: | @@ -2023,6 +2023,7 @@ jobs: docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: sudo rm -fr "$TEMP_PATH" TestsBugfixCheck: + needs: [CheckLabels, StyleCheck] runs-on: [self-hosted, stress-tester] steps: - name: Set envs @@ -3490,6 +3491,77 @@ jobs: docker ps --quiet | xargs --no-run-if-empty docker kill ||: docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: sudo rm -fr "$TEMP_PATH" +############################################################################################## +###################################### SQLANCER FUZZERS ###################################### +############################################################################################## + SQLancerTestRelease: + needs: [BuilderDebRelease] + runs-on: [self-hosted, fuzzer-unit-tester] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/sqlancer_release + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=SQLancer (release) + REPO_COPY=${{runner.temp}}/sqlancer_release/ClickHouse + EOF + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{ env.REPORTS_PATH }} + - name: Clear repository + run: | + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" + - name: Check out repository code + uses: actions/checkout@v2 + - name: SQLancer + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 sqlancer_check.py "$CHECK_NAME" + - name: Cleanup + if: always() + run: | + docker ps --quiet | xargs --no-run-if-empty docker kill ||: + docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: + sudo rm -fr "$TEMP_PATH" + SQLancerTestDebug: + needs: [BuilderDebDebug] + runs-on: [self-hosted, fuzzer-unit-tester] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/sqlancer_debug + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=SQLancer (debug) + REPO_COPY=${{runner.temp}}/sqlancer_debug/ClickHouse + EOF + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{ env.REPORTS_PATH }} + - name: Clear repository + run: | + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" + - name: Check out repository code + uses: actions/checkout@v2 + - name: SQLancer + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 sqlancer_check.py "$CHECK_NAME" + - name: Cleanup + if: always() + run: | + docker ps --quiet | xargs --no-run-if-empty docker kill ||: + docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: + sudo rm -fr "$TEMP_PATH" ############################################################################################# ###################################### JEPSEN TESTS ######################################### ############################################################################################# @@ -3500,7 +3572,6 @@ jobs: if: contains(github.event.pull_request.labels.*.name, 'jepsen-test') needs: [BuilderBinRelease] uses: ./.github/workflows/jepsen.yml - FinishCheck: needs: - StyleCheck @@ -3575,6 +3646,8 @@ jobs: - SharedBuildSmokeTest - CompatibilityCheck - IntegrationTestsFlakyCheck + - SQLancerTestRelease + - SQLancerTestDebug runs-on: [self-hosted, style-checker] steps: - name: Clear repository diff --git a/.github/workflows/release_branches.yml b/.github/workflows/release_branches.yml index 8f42ca92646..abe85d3e72d 100644 --- a/.github/workflows/release_branches.yml +++ b/.github/workflows/release_branches.yml @@ -541,6 +541,7 @@ jobs: - BuilderDebMsan - BuilderDebDebug runs-on: [self-hosted, style-checker] + if: ${{ success() || failure() }} steps: - name: Set envs run: | @@ -580,6 +581,7 @@ jobs: - BuilderBinDarwin - BuilderBinDarwinAarch64 runs-on: [self-hosted, style-checker] + if: ${{ success() || failure() }} steps: - name: Set envs run: | diff --git a/.gitignore b/.gitignore index dd632eba85d..09d3f4a4e33 100644 --- a/.gitignore +++ b/.gitignore @@ -80,6 +80,7 @@ core vgcore* *.deb +*.tar.zst *.build *.upload *.changes @@ -153,3 +154,6 @@ website/package-lock.json /programs/server/metadata /programs/server/store +# temporary test files +tests/queries/0_stateless/test_* +tests/queries/0_stateless/*.binary diff --git a/.gitmodules b/.gitmodules index abd29c38846..293029ad171 100644 --- a/.gitmodules +++ b/.gitmodules @@ -287,3 +287,6 @@ [submodule "contrib/corrosion"] path = contrib/corrosion url = https://github.com/corrosion-rs/corrosion.git +[submodule "contrib/morton-nd"] + path = contrib/morton-nd + url = https://github.com/morton-nd/morton-nd diff --git a/.snyk b/.snyk new file mode 100644 index 00000000000..7acc6b9fbf5 --- /dev/null +++ b/.snyk @@ -0,0 +1,4 @@ +# Snyk (https://snyk.io) policy file +exclude: + global: + - tests/** diff --git a/CHANGELOG.md b/CHANGELOG.md index 56d117d05dd..68767612892 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,7 @@ ### Table of Contents +**[ClickHouse release v22.10, 2022-10-25](#2210)**
**[ClickHouse release v22.9, 2022-09-22](#229)**
-**[ClickHouse release v22.8, 2022-08-18](#228)**
+**[ClickHouse release v22.8-lts, 2022-08-18](#228)**
**[ClickHouse release v22.7, 2022-07-21](#227)**
**[ClickHouse release v22.6, 2022-06-16](#226)**
**[ClickHouse release v22.5, 2022-05-19](#225)**
@@ -10,10 +11,143 @@ **[ClickHouse release v22.1, 2022-01-18](#221)**
**[Changelog for 2021](https://clickhouse.com/docs/en/whats-new/changelog/2021/)**
+### ClickHouse release 22.10, 2022-10-26 + +#### Backward Incompatible Change +* Rename cache commands: `show caches` -> `show filesystem caches`, `describe cache` -> `describe filesystem cache`. [#41508](https://github.com/ClickHouse/ClickHouse/pull/41508) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Remove support for the `WITH TIMEOUT` section for `LIVE VIEW`. This closes [#40557](https://github.com/ClickHouse/ClickHouse/issues/40557). [#42173](https://github.com/ClickHouse/ClickHouse/pull/42173) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Remove support for the `{database}` macro from the client's prompt. It was displayed incorrectly if the database was unspecified and it was not updated on `USE` statements. This closes [#25891](https://github.com/ClickHouse/ClickHouse/issues/25891). [#42508](https://github.com/ClickHouse/ClickHouse/pull/42508) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### New Feature +* Composable protocol configuration is added. Now different protocols can be set up with different listen hosts. Protocol wrappers such as PROXYv1 can be set up over any other protocols (TCP, TCP secure, MySQL, Postgres). [#41198](https://github.com/ClickHouse/ClickHouse/pull/41198) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Add `S3` as a new type of the destination of backups. Support BACKUP to S3 with as-is path/data structure. [#42333](https://github.com/ClickHouse/ClickHouse/pull/42333) ([Vitaly Baranov](https://github.com/vitlibar)), [#42232](https://github.com/ClickHouse/ClickHouse/pull/42232) ([Azat Khuzhin](https://github.com/azat)). +* Added functions (`randUniform`, `randNormal`, `randLogNormal`, `randExponential`, `randChiSquared`, `randStudentT`, `randFisherF`, `randBernoulli`, `randBinomial`, `randNegativeBinomial`, `randPoisson`) to generate random values according to the specified distributions. This closes [#21834](https://github.com/ClickHouse/ClickHouse/issues/21834). [#42411](https://github.com/ClickHouse/ClickHouse/pull/42411) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* An improvement for ClickHouse Keeper: add support for uploading snapshots to S3. S3 information can be defined inside `keeper_server.s3_snapshot`. [#41342](https://github.com/ClickHouse/ClickHouse/pull/41342) ([Antonio Andelic](https://github.com/antonio2368)). +* Added an aggregate function `analysisOfVariance` (`anova`) to perform a statistical test over several groups of normally distributed observations to find out whether all groups have the same mean or not. Original PR [#37872](https://github.com/ClickHouse/ClickHouse/issues/37872). [#42131](https://github.com/ClickHouse/ClickHouse/pull/42131) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Support limiting of temporary data stored on disk using settings `max_temporary_data_on_disk_size_for_user`/`max_temporary_data_on_disk_size_for_query` . [#40893](https://github.com/ClickHouse/ClickHouse/pull/40893) ([Vladimir C](https://github.com/vdimir)). +* Add setting `format_json_object_each_row_column_for_object_name` to write/parse object name as column value in JSONObjectEachRow format. [#41703](https://github.com/ClickHouse/ClickHouse/pull/41703) ([Kruglov Pavel](https://github.com/Avogar)). +* Add BLAKE3 hash-function to SQL. [#33435](https://github.com/ClickHouse/ClickHouse/pull/33435) ([BoloniniD](https://github.com/BoloniniD)). +* The function `javaHash` has been extended to integers. [#41131](https://github.com/ClickHouse/ClickHouse/pull/41131) ([JackyWoo](https://github.com/JackyWoo)). +* Add OpenTelemetry support to ON CLUSTER DDL (require `distributed_ddl_entry_format_version` to be set to 4). [#41484](https://github.com/ClickHouse/ClickHouse/pull/41484) ([Frank Chen](https://github.com/FrankChen021)). +* Added system table `asynchronous_insert_log`. It contains information about asynchronous inserts (including results of queries in fire-and-forget mode (with `wait_for_async_insert=0`)) for better introspection. [#42040](https://github.com/ClickHouse/ClickHouse/pull/42040) ([Anton Popov](https://github.com/CurtizJ)). +* Add support for methods `lz4`, `bz2`, `snappy` in HTTP's `Accept-Encoding` which is a non-standard extension to HTTP protocol. [#42071](https://github.com/ClickHouse/ClickHouse/pull/42071) ([Nikolay Degterinsky](https://github.com/evillique)). +* Adds Morton Coding (ZCurve) encode/decode functions. [#41753](https://github.com/ClickHouse/ClickHouse/pull/41753) ([Constantine Peresypkin](https://github.com/pkit)). +* Add support for `SET setting_name = DEFAULT`. [#42187](https://github.com/ClickHouse/ClickHouse/pull/42187) ([Filatenkov Artur](https://github.com/FArthur-cmd)). + +#### Experimental Feature +* Added new infrastructure for query analysis and planning under the `allow_experimental_analyzer` setting. [#31796](https://github.com/ClickHouse/ClickHouse/pull/31796) ([Maksim Kita](https://github.com/kitaisreal)). +* Initial implementation of Kusto Query Language. Please don't use it. [#37961](https://github.com/ClickHouse/ClickHouse/pull/37961) ([Yong Wang](https://github.com/kashwy)). + +#### Performance Improvement +* Relax the "Too many parts" threshold. This closes [#6551](https://github.com/ClickHouse/ClickHouse/issues/6551). Now ClickHouse will allow more parts in a partition if the average part size is large enough (at least 10 GiB). This allows to have up to petabytes of data in a single partition of a single table on a single server, which is possible using disk shelves or object storage. [#42002](https://github.com/ClickHouse/ClickHouse/pull/42002) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Implement operator precedence element parser to make the required stack size smaller. [#34892](https://github.com/ClickHouse/ClickHouse/pull/34892) ([Nikolay Degterinsky](https://github.com/evillique)). +* DISTINCT in order optimization leverage sorting properties of data streams. This improvement will enable reading in order for DISTINCT if applicable (before it was necessary to provide ORDER BY for columns in DISTINCT). [#41014](https://github.com/ClickHouse/ClickHouse/pull/41014) ([Igor Nikonov](https://github.com/devcrafter)). +* ColumnVector: optimize UInt8 index with AVX512VBMI. [#41247](https://github.com/ClickHouse/ClickHouse/pull/41247) ([Guo Wangyang](https://github.com/guowangy)). +* Optimize the lock contentions for `ThreadGroupStatus::mutex`. The performance experiments of **SSB** (Star Schema Benchmark) on the ICX device (Intel Xeon Platinum 8380 CPU, 80 cores, 160 threads) shows that this change could bring a **2.95x** improvement of the geomean of all subcases' QPS. [#41675](https://github.com/ClickHouse/ClickHouse/pull/41675) ([Zhiguo Zhou](https://github.com/ZhiguoZh)). +* Add `ldapr` capabilities to AArch64 builds. This is supported from Graviton 2+, Azure and GCP instances. Only appeared in clang-15 [not so long ago](https://github.com/llvm/llvm-project/commit/9609b5daffe9fd28d83d83da895abc5113f76c24). [#41778](https://github.com/ClickHouse/ClickHouse/pull/41778) ([Daniel Kutenin](https://github.com/danlark1)). +* Improve performance when comparing strings and one argument is an empty constant string. [#41870](https://github.com/ClickHouse/ClickHouse/pull/41870) ([Jiebin Sun](https://github.com/jiebinn)). +* Optimize `insertFrom` of ColumnAggregateFunction to share Aggregate State in some cases. [#41960](https://github.com/ClickHouse/ClickHouse/pull/41960) ([flynn](https://github.com/ucasfl)). +* Make writing to `azure_blob_storage` disks faster (respect `max_single_part_upload_size` instead of writing a block per each buffer size). Inefficiency mentioned in [#41754](https://github.com/ClickHouse/ClickHouse/issues/41754). [#42041](https://github.com/ClickHouse/ClickHouse/pull/42041) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Make thread ids in the process list and query_log unique to avoid waste. [#42180](https://github.com/ClickHouse/ClickHouse/pull/42180) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Support skipping cache completely (both download to cache and reading cached data) in case the requested read range exceeds the threshold defined by cache setting `bypass_cache_threashold`, requires to be enabled with `enable_bypass_cache_with_threshold`). [#42418](https://github.com/ClickHouse/ClickHouse/pull/42418) ([Han Shukai](https://github.com/KinderRiven)). This helps on slow local disks. + +#### Improvement +* Add setting `allow_implicit_no_password`: in combination with `allow_no_password` it forbids creating a user with no password unless `IDENTIFIED WITH no_password` is explicitly specified. [#41341](https://github.com/ClickHouse/ClickHouse/pull/41341) ([Nikolay Degterinsky](https://github.com/evillique)). +* Embedded Keeper will always start in the background allowing ClickHouse to start without achieving quorum. [#40991](https://github.com/ClickHouse/ClickHouse/pull/40991) ([Antonio Andelic](https://github.com/antonio2368)). +* Made reestablishing a new connection to ZooKeeper more reactive in case of expiration of the previous one. Previously there was a task which spawns every minute by default and thus a table could be in readonly state for about this time. [#41092](https://github.com/ClickHouse/ClickHouse/pull/41092) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Now projections can be used with zero copy replication (zero-copy replication is a non-production feature). [#41147](https://github.com/ClickHouse/ClickHouse/pull/41147) ([alesapin](https://github.com/alesapin)). +* Support expression `(EXPLAIN SELECT ...)` in a subquery. Queries like `SELECT * FROM (EXPLAIN PIPELINE SELECT col FROM TABLE ORDER BY col)` became valid. [#40630](https://github.com/ClickHouse/ClickHouse/pull/40630) ([Vladimir C](https://github.com/vdimir)). +* Allow changing `async_insert_max_data_size` or `async_insert_busy_timeout_ms` in scope of query. E.g. user wants to insert data rarely and she doesn't have access to the server config to tune default settings. [#40668](https://github.com/ClickHouse/ClickHouse/pull/40668) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Improvements for reading from remote filesystems, made threadpool size for reads/writes configurable. Closes [#41070](https://github.com/ClickHouse/ClickHouse/issues/41070). [#41011](https://github.com/ClickHouse/ClickHouse/pull/41011) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Support all combinators combination in WindowTransform/arratReduce*/initializeAggregation/aggregate functions versioning. Previously combinators like `ForEach/Resample/Map` didn't work in these places, using them led to exception like`State function ... inserts results into non-state column`. [#41107](https://github.com/ClickHouse/ClickHouse/pull/41107) ([Kruglov Pavel](https://github.com/Avogar)). +* Add function `tryDecrypt` that returns NULL when decrypt fails (e.g. decrypt with incorrect key) instead of throwing an exception. [#41206](https://github.com/ClickHouse/ClickHouse/pull/41206) ([Duc Canh Le](https://github.com/canhld94)). +* Add the `unreserved_space` column to the `system.disks` table to check how much space is not taken by reservations per disk. [#41254](https://github.com/ClickHouse/ClickHouse/pull/41254) ([filimonov](https://github.com/filimonov)). +* Support s3 authorization headers in table function arguments. [#41261](https://github.com/ClickHouse/ClickHouse/pull/41261) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Add support for MultiRead in Keeper and internal ZooKeeper client (this is an extension to ZooKeeper protocol, only available in ClickHouse Keeper). [#41410](https://github.com/ClickHouse/ClickHouse/pull/41410) ([Antonio Andelic](https://github.com/antonio2368)). +* Add support for decimal type comparing with floating point literal in IN operator. [#41544](https://github.com/ClickHouse/ClickHouse/pull/41544) ([liang.huang](https://github.com/lhuang09287750)). +* Allow readable size values (like `1TB`) in cache config. [#41688](https://github.com/ClickHouse/ClickHouse/pull/41688) ([Kseniia Sumarokova](https://github.com/kssenii)). +* ClickHouse could cache stale DNS entries for some period of time (15 seconds by default) until the cache won't be updated asynchronously. During these periods ClickHouse can nevertheless try to establish a connection and produce errors. This behavior is fixed. [#41707](https://github.com/ClickHouse/ClickHouse/pull/41707) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Add interactive history search with fzf-like utility (fzf/sk) for `clickhouse-client`/`clickhouse-local` (note you can use `FZF_DEFAULT_OPTS`/`SKIM_DEFAULT_OPTIONS` to additionally configure the behavior). [#41730](https://github.com/ClickHouse/ClickHouse/pull/41730) ([Azat Khuzhin](https://github.com/azat)). +* Only allow clients connecting to a secure server with an invalid certificate only to proceed with the '--accept-certificate' flag. [#41743](https://github.com/ClickHouse/ClickHouse/pull/41743) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Add function `tryBase58Decode`, similar to the existing function `tryBase64Decode`. [#41824](https://github.com/ClickHouse/ClickHouse/pull/41824) ([Robert Schulze](https://github.com/rschu1ze)). +* Improve feedback when replacing partition with different primary key. Fixes [#34798](https://github.com/ClickHouse/ClickHouse/issues/34798). [#41838](https://github.com/ClickHouse/ClickHouse/pull/41838) ([Salvatore](https://github.com/tbsal)). +* Fix parallel parsing: segmentator now checks `max_block_size`. This fixed memory overallocation in case of parallel parsing and small LIMIT. [#41852](https://github.com/ClickHouse/ClickHouse/pull/41852) ([Vitaly Baranov](https://github.com/vitlibar)). +* Don't add "TABLE_IS_DROPPED" exception to `system.errors` if it's happened during SELECT from a system table and was ignored. [#41908](https://github.com/ClickHouse/ClickHouse/pull/41908) ([AlfVII](https://github.com/AlfVII)). +* Improve option `enable_extended_results_for_datetime_functions` to return results of type DateTime64 for functions `toStartOfDay`, `toStartOfHour`, `toStartOfFifteenMinutes`, `toStartOfTenMinutes`, `toStartOfFiveMinutes`, `toStartOfMinute` and `timeSlot`. [#41910](https://github.com/ClickHouse/ClickHouse/pull/41910) ([Roman Vasin](https://github.com/rvasin)). +* Improve `DateTime` type inference for text formats. Now it respects setting `date_time_input_format` and doesn't try to infer datetimes from numbers as timestamps. Closes [#41389](https://github.com/ClickHouse/ClickHouse/issues/41389) Closes [#42206](https://github.com/ClickHouse/ClickHouse/issues/42206). [#41912](https://github.com/ClickHouse/ClickHouse/pull/41912) ([Kruglov Pavel](https://github.com/Avogar)). +* Remove confusing warning when inserting with `perform_ttl_move_on_insert` = false. [#41980](https://github.com/ClickHouse/ClickHouse/pull/41980) ([Vitaly Baranov](https://github.com/vitlibar)). +* Allow user to write `countState(*)` similar to `count(*)`. This closes [#9338](https://github.com/ClickHouse/ClickHouse/issues/9338). [#41983](https://github.com/ClickHouse/ClickHouse/pull/41983) ([Amos Bird](https://github.com/amosbird)). +* Fix `rankCorr` size overflow. [#42020](https://github.com/ClickHouse/ClickHouse/pull/42020) ([Duc Canh Le](https://github.com/canhld94)). +* Added an option to specify an arbitrary string as an environment name in the Sentry's config for more handy reports. [#42037](https://github.com/ClickHouse/ClickHouse/pull/42037) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Fix parsing out-of-range Date from CSV. [#42044](https://github.com/ClickHouse/ClickHouse/pull/42044) ([Andrey Zvonov](https://github.com/zvonand)). +* `parseDataTimeBestEffort` now supports comma between date and time. Closes [#42038](https://github.com/ClickHouse/ClickHouse/issues/42038). [#42049](https://github.com/ClickHouse/ClickHouse/pull/42049) ([flynn](https://github.com/ucasfl)). +* Improved stale replica recovery process for `ReplicatedMergeTree`. If a lost replica has some parts which are absent from a healthy replica, but these parts should appear in the future according to the replication queue of the healthy replica, then the lost replica will keep such parts instead of detaching them. [#42134](https://github.com/ClickHouse/ClickHouse/pull/42134) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Add a possibility to use `Date32` arguments for date_diff function. Fix issue in date_diff function when using DateTime64 arguments with a start date before Unix epoch and end date after Unix epoch. [#42308](https://github.com/ClickHouse/ClickHouse/pull/42308) ([Roman Vasin](https://github.com/rvasin)). +* When uploading big parts to Minio, 'Complete Multipart Upload' can take a long time. Minio sends heartbeats every 10 seconds (see https://github.com/minio/minio/pull/7198). But clickhouse times out earlier, because the default send/receive timeout is [set](https://github.com/ClickHouse/ClickHouse/blob/cc24fcd6d5dfb67f5f66f5483e986bd1010ad9cf/src/IO/S3/PocoHTTPClient.cpp#L123) to 5 seconds. [#42321](https://github.com/ClickHouse/ClickHouse/pull/42321) ([filimonov](https://github.com/filimonov)). +* Fix rarely invalid cast of aggregate state types with complex types such as Decimal. This fixes [#42408](https://github.com/ClickHouse/ClickHouse/issues/42408). [#42417](https://github.com/ClickHouse/ClickHouse/pull/42417) ([Amos Bird](https://github.com/amosbird)). +* Allow to use `Date32` arguments for `dateName` function. [#42554](https://github.com/ClickHouse/ClickHouse/pull/42554) ([Roman Vasin](https://github.com/rvasin)). +* Now filters with NULL literals will be used during index analysis. [#34063](https://github.com/ClickHouse/ClickHouse/issues/34063). [#41842](https://github.com/ClickHouse/ClickHouse/pull/41842) ([Amos Bird](https://github.com/amosbird)). +* Merge parts if every part in the range is older than a certain threshold. The threshold can be set by using `min_age_to_force_merge_seconds`. This closes [#35836](https://github.com/ClickHouse/ClickHouse/issues/35836). [#42423](https://github.com/ClickHouse/ClickHouse/pull/42423) ([Antonio Andelic](https://github.com/antonio2368)). This is continuation of [#39550i](https://github.com/ClickHouse/ClickHouse/pull/39550) by [@fastio](https://github.com/fastio) who implemented most of the logic. +* Improve the time to recover lost keeper connections. [#42541](https://github.com/ClickHouse/ClickHouse/pull/42541) ([Raúl Marín](https://github.com/Algunenano)). + +#### Build/Testing/Packaging Improvement +* Add fuzzer for table definitions [#40096](https://github.com/ClickHouse/ClickHouse/pull/40096) ([Anton Popov](https://github.com/CurtizJ)). This represents the biggest advancement for ClickHouse testing in this year so far. +* Beta version of the ClickHouse Cloud service is released: [https://clickhouse.cloud/](https://clickhouse.cloud/). It provides the easiest way to use ClickHouse (even slightly easier than the single-command installation). +* Added support of WHERE clause generation to AST Fuzzer and possibility to add or remove ORDER BY and WHERE clause. [#38519](https://github.com/ClickHouse/ClickHouse/pull/38519) ([Ilya Yatsishin](https://github.com/qoega)). +* Aarch64 binaries now require at least ARMv8.2, released in 2016. Most notably, this enables use of ARM LSE, i.e. native atomic operations. Also, CMake build option "NO_ARMV81_OR_HIGHER" has been added to allow compilation of binaries for older ARMv8.0 hardware, e.g. Raspberry Pi 4. [#41610](https://github.com/ClickHouse/ClickHouse/pull/41610) ([Robert Schulze](https://github.com/rschu1ze)). +* Allow building ClickHouse with Musl (small changes after it was already supported but broken). [#41987](https://github.com/ClickHouse/ClickHouse/pull/41987) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add the `$CLICKHOUSE_CRONFILE` file checking to avoid running the `sed` command to get the file not found error on install. [#42081](https://github.com/ClickHouse/ClickHouse/pull/42081) ([Chun-Sheng, Li](https://github.com/peter279k)). +* Update cctz to `2022e` to support the new timezone changes. Palestine transitions are now Saturdays at 02:00. Simplify three Ukraine zones into one. Jordan and Syria switch from +02/+03 with DST to year-round +03. (https://data.iana.org/time-zones/tzdb/NEWS). This closes [#42252](https://github.com/ClickHouse/ClickHouse/issues/42252). [#42327](https://github.com/ClickHouse/ClickHouse/pull/42327) ([Alexey Milovidov](https://github.com/alexey-milovidov)). [#42273](https://github.com/ClickHouse/ClickHouse/pull/42273) ([Dom Del Nano](https://github.com/ddelnano)). +* Add Rust code support into ClickHouse with BLAKE3 hash-function library as an example. [#33435](https://github.com/ClickHouse/ClickHouse/pull/33435) ([BoloniniD](https://github.com/BoloniniD)). + +#### Bug Fix (user-visible misbehavior in official stable or prestable release) + +* Choose correct aggregation method for `LowCardinality` with big integer types. [#42342](https://github.com/ClickHouse/ClickHouse/pull/42342) ([Duc Canh Le](https://github.com/canhld94)). +* Several fixes for `web` disk. [#41652](https://github.com/ClickHouse/ClickHouse/pull/41652) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fixes an issue that causes docker run to fail if `https_port` is not present in config. [#41693](https://github.com/ClickHouse/ClickHouse/pull/41693) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Mutations were not cancelled properly on server shutdown or `SYSTEM STOP MERGES` query and cancellation might take long time, it's fixed. [#41699](https://github.com/ClickHouse/ClickHouse/pull/41699) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix wrong result of queries with `ORDER BY` or `GROUP BY` by columns from prefix of sorting key, wrapped into monotonic functions, with enable "read in order" optimization (settings `optimize_read_in_order` and `optimize_aggregation_in_order`). [#41701](https://github.com/ClickHouse/ClickHouse/pull/41701) ([Anton Popov](https://github.com/CurtizJ)). +* Fix possible crash in `SELECT` from `Merge` table with enabled `optimize_monotonous_functions_in_order_by` setting. Fixes [#41269](https://github.com/ClickHouse/ClickHouse/issues/41269). [#41740](https://github.com/ClickHouse/ClickHouse/pull/41740) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fixed "Part ... intersects part ..." error that might happen in extremely rare cases if replica was restarted just after detaching some part as broken. [#41741](https://github.com/ClickHouse/ClickHouse/pull/41741) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Don't allow to create or alter merge tree tables with column name `_row_exists`, which is reserved for lightweight delete. Fixed [#41716](https://github.com/ClickHouse/ClickHouse/issues/41716). [#41763](https://github.com/ClickHouse/ClickHouse/pull/41763) ([Jianmei Zhang](https://github.com/zhangjmruc)). +* Fix a bug that CORS headers are missing in some HTTP responses. [#41792](https://github.com/ClickHouse/ClickHouse/pull/41792) ([Frank Chen](https://github.com/FrankChen021)). +* 22.9 might fail to startup `ReplicatedMergeTree` table if that table was created by 20.3 or older version and was never altered, it's fixed. Fixes [#41742](https://github.com/ClickHouse/ClickHouse/issues/41742). [#41796](https://github.com/ClickHouse/ClickHouse/pull/41796) ([Alexander Tokmakov](https://github.com/tavplubix)). +* When the batch sending fails for some reason, it cannot be automatically recovered, and if it is not processed in time, it will lead to accumulation, and the printed error message will become longer and longer, which will cause the http thread to block. [#41813](https://github.com/ClickHouse/ClickHouse/pull/41813) ([zhongyuankai](https://github.com/zhongyuankai)). +* Fix compact parts with compressed marks setting. Fixes [#41783](https://github.com/ClickHouse/ClickHouse/issues/41783) and [#41746](https://github.com/ClickHouse/ClickHouse/issues/41746). [#41823](https://github.com/ClickHouse/ClickHouse/pull/41823) ([alesapin](https://github.com/alesapin)). +* Old versions of Replicated database don't have a special marker in [Zoo]Keeper. We need to check only whether the node contains come obscure data instead of special mark. [#41875](https://github.com/ClickHouse/ClickHouse/pull/41875) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Fix possible exception in fs cache. [#41884](https://github.com/ClickHouse/ClickHouse/pull/41884) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix `use_environment_credentials` for s3 table function. [#41970](https://github.com/ClickHouse/ClickHouse/pull/41970) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fixed "Directory already exists and is not empty" error on detaching broken part that might prevent `ReplicatedMergeTree` table from starting replication. Fixes [#40957](https://github.com/ClickHouse/ClickHouse/issues/40957). [#41981](https://github.com/ClickHouse/ClickHouse/pull/41981) ([Alexander Tokmakov](https://github.com/tavplubix)). +* `toDateTime64` now returns the same output with negative integer and float arguments. [#42025](https://github.com/ClickHouse/ClickHouse/pull/42025) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix write into `azure_blob_storage`. Partially closes [#41754](https://github.com/ClickHouse/ClickHouse/issues/41754). [#42034](https://github.com/ClickHouse/ClickHouse/pull/42034) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix the `bzip2` decoding issue for specific `bzip2` files. [#42046](https://github.com/ClickHouse/ClickHouse/pull/42046) ([Nikolay Degterinsky](https://github.com/evillique)). +* Fix SQL function `toLastDayOfMonth` with setting "enable_extended_results_for_datetime_functions = 1" at the beginning of the extended range (January 1900). - Fix SQL function "toRelativeWeekNum()" with setting "enable_extended_results_for_datetime_functions = 1" at the end of extended range (December 2299). - Improve the performance of for SQL functions "toISOYear()", "toFirstDayNumOfISOYearIndex()" and "toYearWeekOfNewyearMode()" by avoiding unnecessary index arithmetics. [#42084](https://github.com/ClickHouse/ClickHouse/pull/42084) ([Roman Vasin](https://github.com/rvasin)). +* The maximum size of fetches for each table accidentally was set to 8 while the pool size could be bigger. Now the maximum size of fetches for table is equal to the pool size. [#42090](https://github.com/ClickHouse/ClickHouse/pull/42090) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* A table might be shut down and a dictionary might be detached before checking if can be dropped without breaking dependencies between table, it's fixed. Fixes [#41982](https://github.com/ClickHouse/ClickHouse/issues/41982). [#42106](https://github.com/ClickHouse/ClickHouse/pull/42106) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix bad inefficiency of `remote_filesystem_read_method=read` with filesystem cache. Closes [#42125](https://github.com/ClickHouse/ClickHouse/issues/42125). [#42129](https://github.com/ClickHouse/ClickHouse/pull/42129) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix possible timeout exception for distributed queries with use_hedged_requests = 0. [#42130](https://github.com/ClickHouse/ClickHouse/pull/42130) ([Azat Khuzhin](https://github.com/azat)). +* Fixed a minor bug inside function `runningDifference` in case of using it with `Date32` type. Previously `Date` was used and it may cause some logical errors like `Bad cast from type DB::ColumnVector to DB::ColumnVector'`. [#42143](https://github.com/ClickHouse/ClickHouse/pull/42143) ([Alfred Xu](https://github.com/sperlingxx)). +* Fix reusing of files > 4GB from base backup. [#42146](https://github.com/ClickHouse/ClickHouse/pull/42146) ([Azat Khuzhin](https://github.com/azat)). +* DISTINCT in order fails with LOGICAL_ERROR if first column in sorting key contains function. [#42186](https://github.com/ClickHouse/ClickHouse/pull/42186) ([Igor Nikonov](https://github.com/devcrafter)). +* Fix a bug with projections and the `aggregate_functions_null_for_empty` setting. This bug is very rare and appears only if you enable the `aggregate_functions_null_for_empty` setting in the server's config. This closes [#41647](https://github.com/ClickHouse/ClickHouse/issues/41647). [#42198](https://github.com/ClickHouse/ClickHouse/pull/42198) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix read from `Buffer` tables with read in order desc. [#42236](https://github.com/ClickHouse/ClickHouse/pull/42236) ([Duc Canh Le](https://github.com/canhld94)). +* Fix a bug which prevents ClickHouse to start when `background_pool_size setting` is set on default profile but `background_merges_mutations_concurrency_ratio` is not. [#42315](https://github.com/ClickHouse/ClickHouse/pull/42315) ([nvartolomei](https://github.com/nvartolomei)). +* `ALTER UPDATE` of attached part (with columns different from table schema) could create an invalid `columns.txt` metadata on disk. Reading from such part could fail with errors or return invalid data. Fixes [#42161](https://github.com/ClickHouse/ClickHouse/issues/42161). [#42319](https://github.com/ClickHouse/ClickHouse/pull/42319) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Setting `additional_table_filters` were not applied to `Distributed` storage. Fixes [#41692](https://github.com/ClickHouse/ClickHouse/issues/41692). [#42322](https://github.com/ClickHouse/ClickHouse/pull/42322) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix a data race in query finish/cancel. This closes [#42346](https://github.com/ClickHouse/ClickHouse/issues/42346). [#42362](https://github.com/ClickHouse/ClickHouse/pull/42362) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* This reverts [#40217](https://github.com/ClickHouse/ClickHouse/issues/40217) which introduced a regression in date/time functions. [#42367](https://github.com/ClickHouse/ClickHouse/pull/42367) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix assert cast in join on falsy condition, Close [#42380](https://github.com/ClickHouse/ClickHouse/issues/42380). [#42407](https://github.com/ClickHouse/ClickHouse/pull/42407) ([Vladimir C](https://github.com/vdimir)). +* Fix buffer overflow in the processing of Decimal data types. This closes [#42451](https://github.com/ClickHouse/ClickHouse/issues/42451). [#42465](https://github.com/ClickHouse/ClickHouse/pull/42465) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* `AggregateFunctionQuantile` now correctly works with UInt128 columns. Previously, the quantile state interpreted `UInt128` columns as `Int128` which could have led to incorrect results. [#42473](https://github.com/ClickHouse/ClickHouse/pull/42473) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix bad_cast assert during INSERT into `Annoy` indexes over non-Float32 columns. `Annoy` indices is an experimental feature. [#42485](https://github.com/ClickHouse/ClickHouse/pull/42485) ([Robert Schulze](https://github.com/rschu1ze)). +* Arithmetic operator with Date or DateTime and 128 or 256-bit integer was referencing uninitialized memory. [#42453](https://github.com/ClickHouse/ClickHouse/issues/42453). [#42573](https://github.com/ClickHouse/ClickHouse/pull/42573) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix unexpected table loading error when partition key contains alias function names during server upgrade. [#36379](https://github.com/ClickHouse/ClickHouse/pull/36379) ([Amos Bird](https://github.com/amosbird)). + ### ClickHouse release 22.9, 2022-09-22 #### Backward Incompatible Change + * Upgrade from 20.3 and older to 22.9 and newer should be done through an intermediate version if there are any `ReplicatedMergeTree` tables, otherwise server with the new version will not start. [#40641](https://github.com/ClickHouse/ClickHouse/pull/40641) ([Alexander Tokmakov](https://github.com/tavplubix)). * Remove the functions `accurate_Cast` and `accurate_CastOrNull` (they are different to `accurateCast` and `accurateCastOrNull` by underscore in the name and they are not affected by the value of `cast_keep_nullable` setting). These functions were undocumented, untested, unused, and unneeded. They appeared to be alive due to code generalization. [#40682](https://github.com/ClickHouse/ClickHouse/pull/40682) ([Alexey Milovidov](https://github.com/alexey-milovidov)). * Add a test to ensure that every new table function will be documented. See [#40649](https://github.com/ClickHouse/ClickHouse/issues/40649). Rename table function `MeiliSearch` to `meilisearch`. [#40709](https://github.com/ClickHouse/ClickHouse/pull/40709) ([Alexey Milovidov](https://github.com/alexey-milovidov)). @@ -21,6 +155,7 @@ * Make interpretation of YAML configs to be more conventional. [#41044](https://github.com/ClickHouse/ClickHouse/pull/41044) ([Vitaly Baranov](https://github.com/vitlibar)). #### New Feature + * Support `insert_quorum = 'auto'` to use majority number. [#39970](https://github.com/ClickHouse/ClickHouse/pull/39970) ([Sachin](https://github.com/SachinSetiya)). * Add embedded dashboards to ClickHouse server. This is a demo project about how to achieve 90% results with 1% effort using ClickHouse features. [#40461](https://github.com/ClickHouse/ClickHouse/pull/40461) ([Alexey Milovidov](https://github.com/alexey-milovidov)). * Added new settings constraint writability kind `changeable_in_readonly`. [#40631](https://github.com/ClickHouse/ClickHouse/pull/40631) ([Sergei Trifonov](https://github.com/serxa)). @@ -38,6 +173,7 @@ * Improvement for in-memory data parts: remove completely processed WAL files. [#40592](https://github.com/ClickHouse/ClickHouse/pull/40592) ([Azat Khuzhin](https://github.com/azat)). #### Performance Improvement + * Implement compression of marks and primary key. Close [#34437](https://github.com/ClickHouse/ClickHouse/issues/34437). [#37693](https://github.com/ClickHouse/ClickHouse/pull/37693) ([zhongyuankai](https://github.com/zhongyuankai)). * Allow to load marks with threadpool in advance. Regulated by setting `load_marks_asynchronously` (default: 0). [#40821](https://github.com/ClickHouse/ClickHouse/pull/40821) ([Kseniia Sumarokova](https://github.com/kssenii)). * Virtual filesystem over s3 will use random object names split into multiple path prefixes for better performance on AWS. [#40968](https://github.com/ClickHouse/ClickHouse/pull/40968) ([Alexey Milovidov](https://github.com/alexey-milovidov)). @@ -58,6 +194,7 @@ * Parallel hash JOIN for Float data types might be suboptimal. Make it better. [#41183](https://github.com/ClickHouse/ClickHouse/pull/41183) ([Alexey Milovidov](https://github.com/alexey-milovidov)). #### Improvement + * During startup and ATTACH call, `ReplicatedMergeTree` tables will be readonly until the ZooKeeper connection is made and the setup is finished. [#40148](https://github.com/ClickHouse/ClickHouse/pull/40148) ([Antonio Andelic](https://github.com/antonio2368)). * Add `enable_extended_results_for_datetime_functions` option to return results of type Date32 for functions toStartOfYear, toStartOfISOYear, toStartOfQuarter, toStartOfMonth, toStartOfWeek, toMonday and toLastDayOfMonth when argument is Date32 or DateTime64, otherwise results of Date type are returned. For compatibility reasons default value is ‘0’. [#41214](https://github.com/ClickHouse/ClickHouse/pull/41214) ([Roman Vasin](https://github.com/rvasin)). * For security and stability reasons, CatBoost models are no longer evaluated within the ClickHouse server. Instead, the evaluation is now done in the clickhouse-library-bridge, a separate process that loads the catboost library and communicates with the server process via HTTP. [#40897](https://github.com/ClickHouse/ClickHouse/pull/40897) ([Robert Schulze](https://github.com/rschu1ze)). [#39629](https://github.com/ClickHouse/ClickHouse/pull/39629) ([Robert Schulze](https://github.com/rschu1ze)). @@ -108,6 +245,7 @@ * Add `has_lightweight_delete` to system.parts. [#41564](https://github.com/ClickHouse/ClickHouse/pull/41564) ([Kseniia Sumarokova](https://github.com/kssenii)). #### Build/Testing/Packaging Improvement + * Enforce documentation for every setting. [#40644](https://github.com/ClickHouse/ClickHouse/pull/40644) ([Alexey Milovidov](https://github.com/alexey-milovidov)). * Enforce documentation for every current metric. [#40645](https://github.com/ClickHouse/ClickHouse/pull/40645) ([Alexey Milovidov](https://github.com/alexey-milovidov)). * Enforce documentation for every profile event counter. Write the documentation where it was missing. [#40646](https://github.com/ClickHouse/ClickHouse/pull/40646) ([Alexey Milovidov](https://github.com/alexey-milovidov)). @@ -217,15 +355,16 @@ * Fix read bytes/rows in X-ClickHouse-Summary with materialized views. [#41586](https://github.com/ClickHouse/ClickHouse/pull/41586) ([Raúl Marín](https://github.com/Algunenano)). * Fix possible `pipeline stuck` exception for queries with `OFFSET`. The error was found with `enable_optimize_predicate_expression = 0` and always false condition in `WHERE`. Fixes [#41383](https://github.com/ClickHouse/ClickHouse/issues/41383). [#41588](https://github.com/ClickHouse/ClickHouse/pull/41588) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). - -### ClickHouse release 22.8, 2022-08-18 +### ClickHouse release 22.8-lts, 2022-08-18 #### Backward Incompatible Change + * Extended range of `Date32` and `DateTime64` to support dates from the year 1900 to 2299. In previous versions, the supported interval was only from the year 1925 to 2283. The implementation is using the proleptic Gregorian calendar (which is conformant with [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601):2004 (clause 3.2.1 The Gregorian calendar)) instead of accounting for historical transitions from the Julian to the Gregorian calendar. This change affects implementation-specific behavior for out-of-range arguments. E.g. if in previous versions the value of `1899-01-01` was clamped to `1925-01-01`, in the new version it will be clamped to `1900-01-01`. It changes the behavior of rounding with `toStartOfInterval` if you pass `INTERVAL 3 QUARTER` up to one quarter because the intervals are counted from an implementation-specific point of time. Closes [#28216](https://github.com/ClickHouse/ClickHouse/issues/28216), improves [#38393](https://github.com/ClickHouse/ClickHouse/issues/38393). [#39425](https://github.com/ClickHouse/ClickHouse/pull/39425) ([Roman Vasin](https://github.com/rvasin)). * Now, all relevant dictionary sources respect `remote_url_allow_hosts` setting. It was already done for HTTP, Cassandra, Redis. Added ClickHouse, MongoDB, MySQL, PostgreSQL. Host is checked only for dictionaries created from DDL. [#39184](https://github.com/ClickHouse/ClickHouse/pull/39184) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). * Make the remote filesystem cache composable, allow not to evict certain files (regarding idx, mrk, ..), delete old cache version. Now it is possible to configure cache over Azure blob storage disk, over Local disk, over StaticWeb disk, etc. This PR is marked backward incompatible because cache configuration changes and in order for cache to work need to update the config file. Old cache will still be used with new configuration. The server will startup fine with the old cache configuration. Closes https://github.com/ClickHouse/ClickHouse/issues/36140. Closes https://github.com/ClickHouse/ClickHouse/issues/37889. ([Kseniia Sumarokova](https://github.com/kssenii)). [#36171](https://github.com/ClickHouse/ClickHouse/pull/36171)) #### New Feature + * Query parameters can be set in interactive mode as `SET param_abc = 'def'` and transferred via the native protocol as settings. [#39906](https://github.com/ClickHouse/ClickHouse/pull/39906) ([Nikita Taranov](https://github.com/nickitat)). * Quota key can be set in the native protocol ([Yakov Olkhovsky](https://github.com/ClickHouse/ClickHouse/pull/39874)). * Added a setting `exact_rows_before_limit` (0/1). When enabled, ClickHouse will provide exact value for `rows_before_limit_at_least` statistic, but with the cost that the data before limit will have to be read completely. This closes [#6613](https://github.com/ClickHouse/ClickHouse/issues/6613). [#25333](https://github.com/ClickHouse/ClickHouse/pull/25333) ([kevin wan](https://github.com/MaxWk)). @@ -240,12 +379,14 @@ * Add new setting schema_inference_hints that allows to specify structure hints in schema inference for specific columns. Closes [#39569](https://github.com/ClickHouse/ClickHouse/issues/39569). [#40068](https://github.com/ClickHouse/ClickHouse/pull/40068) ([Kruglov Pavel](https://github.com/Avogar)). #### Experimental Feature + * Support SQL standard DELETE FROM syntax on merge tree tables and lightweight delete implementation for merge tree families. [#37893](https://github.com/ClickHouse/ClickHouse/pull/37893) ([Jianmei Zhang](https://github.com/zhangjmruc)) ([Alexander Gololobov](https://github.com/davenger)). Note: this new feature does not make ClickHouse an HTAP DBMS. #### Performance Improvement + * Improved memory usage during memory efficient merging of aggregation results. [#39429](https://github.com/ClickHouse/ClickHouse/pull/39429) ([Nikita Taranov](https://github.com/nickitat)). * Added concurrency control logic to limit total number of concurrent threads created by queries. [#37558](https://github.com/ClickHouse/ClickHouse/pull/37558) ([Sergei Trifonov](https://github.com/serxa)). Add `concurrent_threads_soft_limit parameter` to increase performance in case of high QPS by means of limiting total number of threads for all queries. [#37285](https://github.com/ClickHouse/ClickHouse/pull/37285) ([Roman Vasin](https://github.com/rvasin)). -* Add `SLRU` cache policy for uncompressed cache and marks cache. ([Kseniia Sumarokova](https://github.com/kssenii)). [#34651](https://github.com/ClickHouse/ClickHouse/pull/34651) ([alexX512](https://github.com/alexX512)). Decoupling local cache function and cache algorithm [#38048](https://github.com/ClickHouse/ClickHouse/pull/38048) ([Han Shukai](https://github.com/KinderRiven)). +* Add `SLRU` cache policy for uncompressed cache and marks cache. ([Kseniia Sumarokova](https://github.com/kssenii)). [#34651](https://github.com/ClickHouse/ClickHouse/pull/34651) ([alexX512](https://github.com/alexX512)). Decoupling local cache function and cache algorithm [#38048](https://github.com/ClickHouse/ClickHouse/pull/38048) ([Han Shukai](https://github.com/KinderRiven)). * Intel® In-Memory Analytics Accelerator (Intel® IAA) is a hardware accelerator available in the upcoming generation of Intel® Xeon® Scalable processors ("Sapphire Rapids"). Its goal is to speed up common operations in analytics like data (de)compression and filtering. ClickHouse gained the new "DeflateQpl" compression codec which utilizes the Intel® IAA offloading technology to provide a high-performance DEFLATE implementation. The codec uses the [Intel® Query Processing Library (QPL)](https://github.com/intel/qpl) which abstracts access to the hardware accelerator, respectively to a software fallback in case the hardware accelerator is not available. DEFLATE provides in general higher compression rates than ClickHouse's LZ4 default codec, and as a result, offers less disk I/O and lower main memory consumption. [#36654](https://github.com/ClickHouse/ClickHouse/pull/36654) ([jasperzhu](https://github.com/jinjunzh)). [#39494](https://github.com/ClickHouse/ClickHouse/pull/39494) ([Robert Schulze](https://github.com/rschu1ze)). * `DISTINCT` in order with `ORDER BY`: Deduce way to sort based on input stream sort description. Skip sorting if input stream is already sorted. [#38719](https://github.com/ClickHouse/ClickHouse/pull/38719) ([Igor Nikonov](https://github.com/devcrafter)). Improve memory usage (significantly) and query execution time + use `DistinctSortedChunkTransform` for final distinct when `DISTINCT` columns match `ORDER BY` columns, but rename to `DistinctSortedStreamTransform` in `EXPLAIN PIPELINE` → this improves memory usage significantly + remove unnecessary allocations in hot loop in `DistinctSortedChunkTransform`. [#39432](https://github.com/ClickHouse/ClickHouse/pull/39432) ([Igor Nikonov](https://github.com/devcrafter)). Use `DistinctSortedTransform` only when sort description is applicable to DISTINCT columns, otherwise fall back to ordinary DISTINCT implementation + it allows making less checks during `DistinctSortedTransform` execution. [#39528](https://github.com/ClickHouse/ClickHouse/pull/39528) ([Igor Nikonov](https://github.com/devcrafter)). Fix: `DistinctSortedTransform` didn't take advantage of sorting. It never cleared HashSet since clearing_columns were detected incorrectly (always empty). So, it basically worked as ordinary `DISTINCT` (`DistinctTransform`). The fix reduces memory usage significantly. [#39538](https://github.com/ClickHouse/ClickHouse/pull/39538) ([Igor Nikonov](https://github.com/devcrafter)). * Use local node as first priority to get structure of remote table when executing `cluster` and similar table functions. [#39440](https://github.com/ClickHouse/ClickHouse/pull/39440) ([Mingliang Pan](https://github.com/liangliangpan)). @@ -256,6 +397,7 @@ * Improve bytes to bits mask transform for SSE/AVX/AVX512. [#39586](https://github.com/ClickHouse/ClickHouse/pull/39586) ([Guo Wangyang](https://github.com/guowangy)). #### Improvement + * Normalize `AggregateFunction` types and state representations because optimizations like [#35788](https://github.com/ClickHouse/ClickHouse/pull/35788) will treat `count(not null columns)` as `count()`, which might confuses distributed interpreters with the following error : `Conversion from AggregateFunction(count) to AggregateFunction(count, Int64) is not supported`. [#39420](https://github.com/ClickHouse/ClickHouse/pull/39420) ([Amos Bird](https://github.com/amosbird)). The functions with identical states can be used in materialized views interchangeably. * Rework and simplify the `system.backups` table, remove the `internal` column, allow user to set the ID of operation, add columns `num_files`, `uncompressed_size`, `compressed_size`, `start_time`, `end_time`. [#39503](https://github.com/ClickHouse/ClickHouse/pull/39503) ([Vitaly Baranov](https://github.com/vitlibar)). * Improved structure of DDL query result table for `Replicated` database (separate columns with shard and replica name, more clear status) - `CREATE TABLE ... ON CLUSTER` queries can be normalized on initiator first if `distributed_ddl_entry_format_version` is set to 3 (default value). It means that `ON CLUSTER` queries may not work if initiator does not belong to the cluster that specified in query. Fixes [#37318](https://github.com/ClickHouse/ClickHouse/issues/37318), [#39500](https://github.com/ClickHouse/ClickHouse/issues/39500) - Ignore `ON CLUSTER` clause if database is `Replicated` and cluster name equals to database name. Related to [#35570](https://github.com/ClickHouse/ClickHouse/issues/35570) - Miscellaneous minor fixes for `Replicated` database engine - Check metadata consistency when starting up `Replicated` database, start replica recovery in case of mismatch of local metadata and metadata in Keeper. Resolves [#24880](https://github.com/ClickHouse/ClickHouse/issues/24880). [#37198](https://github.com/ClickHouse/ClickHouse/pull/37198) ([Alexander Tokmakov](https://github.com/tavplubix)). @@ -294,6 +436,7 @@ * Add support for LARGE_BINARY/LARGE_STRING with Arrow (Closes [#32401](https://github.com/ClickHouse/ClickHouse/issues/32401)). [#40293](https://github.com/ClickHouse/ClickHouse/pull/40293) ([Josh Taylor](https://github.com/joshuataylor)). #### Build/Testing/Packaging Improvement + * [ClickFiddle](https://fiddle.clickhouse.com/): A new tool for testing ClickHouse versions in read/write mode (**Igor Baliuk**). * ClickHouse binary is made self-extracting [#35775](https://github.com/ClickHouse/ClickHouse/pull/35775) ([Yakov Olkhovskiy, Arthur Filatenkov](https://github.com/yakov-olkhovskiy)). * Update tzdata to 2022b to support the new timezone changes. See https://github.com/google/cctz/pull/226. Chile's 2022 DST start is delayed from September 4 to September 11. Iran plans to stop observing DST permanently, after it falls back on 2022-09-21. There are corrections of the historical time zone of Asia/Tehran in the year 1977: Iran adopted standard time in 1935, not 1946. In 1977 it observed DST from 03-21 23:00 to 10-20 24:00; its 1978 transitions were on 03-24 and 08-05, not 03-20 and 10-20; and its spring 1979 transition was on 05-27, not 03-21 (https://data.iana.org/time-zones/tzdb/NEWS). ([Alexey Milovidov](https://github.com/alexey-milovidov)). @@ -308,6 +451,7 @@ * Docker: Now entrypoint.sh in docker image creates and executes chown for all folders it found in config for multidisk setup [#17717](https://github.com/ClickHouse/ClickHouse/issues/17717). [#39121](https://github.com/ClickHouse/ClickHouse/pull/39121) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). #### Bug Fix + * Fix possible segfault in `CapnProto` input format. This bug was found and send through ClickHouse bug-bounty [program](https://github.com/ClickHouse/ClickHouse/issues/38986) by *kiojj*. [#40241](https://github.com/ClickHouse/ClickHouse/pull/40241) ([Kruglov Pavel](https://github.com/Avogar)). * Fix a very rare case of incorrect behavior of array subscript operator. This closes [#28720](https://github.com/ClickHouse/ClickHouse/issues/28720). [#40185](https://github.com/ClickHouse/ClickHouse/pull/40185) ([Alexey Milovidov](https://github.com/alexey-milovidov)). * Fix insufficient argument check for encryption functions (found by query fuzzer). This closes [#39987](https://github.com/ClickHouse/ClickHouse/issues/39987). [#40194](https://github.com/ClickHouse/ClickHouse/pull/40194) ([Alexey Milovidov](https://github.com/alexey-milovidov)). @@ -358,16 +502,17 @@ * A fix for reverse DNS resolution. [#40134](https://github.com/ClickHouse/ClickHouse/pull/40134) ([Arthur Passos](https://github.com/arthurpassos)). * Fix unexpected result `arrayDifference` of `Array(UInt32). [#40211](https://github.com/ClickHouse/ClickHouse/pull/40211) ([Duc Canh Le](https://github.com/canhld94)). - ### ClickHouse release 22.7, 2022-07-21 #### Upgrade Notes + * Enable setting `enable_positional_arguments` by default. It allows queries like `SELECT ... ORDER BY 1, 2` where 1, 2 are the references to the select clause. If you need to return the old behavior, disable this setting. [#38204](https://github.com/ClickHouse/ClickHouse/pull/38204) ([Alexey Milovidov](https://github.com/alexey-milovidov)). * Disable `format_csv_allow_single_quotes` by default. See [#37096](https://github.com/ClickHouse/ClickHouse/issues/37096). ([Kruglov Pavel](https://github.com/Avogar)). * `Ordinary` database engine and old storage definition syntax for `*MergeTree` tables are deprecated. By default it's not possible to create new databases with `Ordinary` engine. If `system` database has `Ordinary` engine it will be automatically converted to `Atomic` on server startup. There are settings to keep old behavior (`allow_deprecated_database_ordinary` and `allow_deprecated_syntax_for_merge_tree`), but these settings may be removed in future releases. [#38335](https://github.com/ClickHouse/ClickHouse/pull/38335) ([Alexander Tokmakov](https://github.com/tavplubix)). * Force rewriting comma join to inner by default (set default value `cross_to_inner_join_rewrite = 2`). To have old behavior set `cross_to_inner_join_rewrite = 1`. [#39326](https://github.com/ClickHouse/ClickHouse/pull/39326) ([Vladimir C](https://github.com/vdimir)). If you will face any incompatibilities, you can turn this setting back. #### New Feature + * Support expressions with window functions. Closes [#19857](https://github.com/ClickHouse/ClickHouse/issues/19857). [#37848](https://github.com/ClickHouse/ClickHouse/pull/37848) ([Dmitry Novik](https://github.com/novikd)). * Add new `direct` join algorithm for `EmbeddedRocksDB` tables, see [#33582](https://github.com/ClickHouse/ClickHouse/issues/33582). [#35363](https://github.com/ClickHouse/ClickHouse/pull/35363) ([Vladimir C](https://github.com/vdimir)). * Added full sorting merge join algorithm. [#35796](https://github.com/ClickHouse/ClickHouse/pull/35796) ([Vladimir C](https://github.com/vdimir)). @@ -395,9 +540,11 @@ * Add `clickhouse-diagnostics` binary to the packages. [#38647](https://github.com/ClickHouse/ClickHouse/pull/38647) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). #### Experimental Feature + * Adds new setting `implicit_transaction` to run standalone queries inside a transaction. It handles both creation and closing (via COMMIT if the query succeeded or ROLLBACK if it didn't) of the transaction automatically. [#38344](https://github.com/ClickHouse/ClickHouse/pull/38344) ([Raúl Marín](https://github.com/Algunenano)). #### Performance Improvement + * Distinct optimization for sorted columns. Use specialized distinct transformation in case input stream is sorted by column(s) in distinct. Optimization can be applied to pre-distinct, final distinct, or both. Initial implementation by @dimarub2000. [#37803](https://github.com/ClickHouse/ClickHouse/pull/37803) ([Igor Nikonov](https://github.com/devcrafter)). * Improve performance of `ORDER BY`, `MergeTree` merges, window functions using batch version of `BinaryHeap`. [#38022](https://github.com/ClickHouse/ClickHouse/pull/38022) ([Maksim Kita](https://github.com/kitaisreal)). * More parallel execution for queries with `FINAL` [#36396](https://github.com/ClickHouse/ClickHouse/pull/36396) ([Nikita Taranov](https://github.com/nickitat)). @@ -407,7 +554,7 @@ * Improve performance of insertion to columns of type `JSON`. [#38320](https://github.com/ClickHouse/ClickHouse/pull/38320) ([Anton Popov](https://github.com/CurtizJ)). * Optimized insertion and lookups in the HashTable. [#38413](https://github.com/ClickHouse/ClickHouse/pull/38413) ([Nikita Taranov](https://github.com/nickitat)). * Fix performance degradation from [#32493](https://github.com/ClickHouse/ClickHouse/issues/32493). [#38417](https://github.com/ClickHouse/ClickHouse/pull/38417) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Improve performance of joining with numeric columns using SIMD instructions. [#37235](https://github.com/ClickHouse/ClickHouse/pull/37235) ([zzachimed](https://github.com/zzachimed)). [#38565](https://github.com/ClickHouse/ClickHouse/pull/38565) ([Maksim Kita](https://github.com/kitaisreal)). +* Improve performance of joining with numeric columns using SIMD instructions. [#37235](https://github.com/ClickHouse/ClickHouse/pull/37235) ([zzachimed](https://github.com/zzachimed)). [#38565](https://github.com/ClickHouse/ClickHouse/pull/38565) ([Maksim Kita](https://github.com/kitaisreal)). * Norm and Distance functions for arrays speed up 1.2-2 times. [#38740](https://github.com/ClickHouse/ClickHouse/pull/38740) ([Alexander Gololobov](https://github.com/davenger)). * Add AVX-512 VBMI optimized `copyOverlap32Shuffle` for LZ4 decompression. In other words, LZ4 decompression performance is improved. [#37891](https://github.com/ClickHouse/ClickHouse/pull/37891) ([Guo Wangyang](https://github.com/guowangy)). * `ORDER BY (a, b)` will use all the same benefits as `ORDER BY a, b`. [#38873](https://github.com/ClickHouse/ClickHouse/pull/38873) ([Igor Nikonov](https://github.com/devcrafter)). @@ -419,6 +566,7 @@ * The table `system.asynchronous_metric_log` is further optimized for storage space. This closes [#38134](https://github.com/ClickHouse/ClickHouse/issues/38134). See the [YouTube video](https://www.youtube.com/watch?v=0fSp9SF8N8A). [#38428](https://github.com/ClickHouse/ClickHouse/pull/38428) ([Alexey Milovidov](https://github.com/alexey-milovidov)). #### Improvement + * Support SQL standard CREATE INDEX and DROP INDEX syntax. [#35166](https://github.com/ClickHouse/ClickHouse/pull/35166) ([Jianmei Zhang](https://github.com/zhangjmruc)). * Send profile events for INSERT queries (previously only SELECT was supported). [#37391](https://github.com/ClickHouse/ClickHouse/pull/37391) ([Azat Khuzhin](https://github.com/azat)). * Implement in order aggregation (`optimize_aggregation_in_order`) for fully materialized projections. [#37469](https://github.com/ClickHouse/ClickHouse/pull/37469) ([Azat Khuzhin](https://github.com/azat)). @@ -464,6 +612,7 @@ * Allow to declare `RabbitMQ` queue without default arguments `x-max-length` and `x-overflow`. [#39259](https://github.com/ClickHouse/ClickHouse/pull/39259) ([rnbondarenko](https://github.com/rnbondarenko)). #### Build/Testing/Packaging Improvement + * Apply Clang Thread Safety Analysis (TSA) annotations to ClickHouse. [#38068](https://github.com/ClickHouse/ClickHouse/pull/38068) ([Robert Schulze](https://github.com/rschu1ze)). * Adapt universal installation script for FreeBSD. [#39302](https://github.com/ClickHouse/ClickHouse/pull/39302) ([Alexey Milovidov](https://github.com/alexey-milovidov)). * Preparation for building on `s390x` platform. [#39193](https://github.com/ClickHouse/ClickHouse/pull/39193) ([Harry Lee](https://github.com/HarryLeeIBM)). @@ -473,6 +622,7 @@ * Change `all|noarch` packages to architecture-dependent - Fix some documentation for it - Push aarch64|arm64 packages to artifactory and release assets - Fixes [#36443](https://github.com/ClickHouse/ClickHouse/issues/36443). [#38580](https://github.com/ClickHouse/ClickHouse/pull/38580) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). #### Bug Fix (user-visible misbehavior in official stable or prestable release) + * Fix rounding for `Decimal128/Decimal256` with more than 19-digits long scale. [#38027](https://github.com/ClickHouse/ClickHouse/pull/38027) ([Igor Nikonov](https://github.com/devcrafter)). * Fixed crash caused by data race in storage `Hive` (integration table engine). [#38887](https://github.com/ClickHouse/ClickHouse/pull/38887) ([lgbo](https://github.com/lgbo-ustc)). * Fix crash when executing GRANT ALL ON *.* with ON CLUSTER. It was broken in https://github.com/ClickHouse/ClickHouse/pull/35767. This closes [#38618](https://github.com/ClickHouse/ClickHouse/issues/38618). [#38674](https://github.com/ClickHouse/ClickHouse/pull/38674) ([Vitaly Baranov](https://github.com/vitlibar)). @@ -529,6 +679,7 @@ ### ClickHouse release 22.6, 2022-06-16 #### Backward Incompatible Change + * Remove support for octal number literals in SQL. In previous versions they were parsed as Float64. [#37765](https://github.com/ClickHouse/ClickHouse/pull/37765) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). * Changes how settings using `seconds` as type are parsed to support floating point values (for example: `max_execution_time=0.5`). Infinity or NaN values will throw an exception. [#37187](https://github.com/ClickHouse/ClickHouse/pull/37187) ([Raúl Marín](https://github.com/Algunenano)). * Changed format of binary serialization of columns of experimental type `Object`. New format is more convenient to implement by third-party clients. [#37482](https://github.com/ClickHouse/ClickHouse/pull/37482) ([Anton Popov](https://github.com/CurtizJ)). @@ -537,6 +688,7 @@ * If you run different ClickHouse versions on a cluster with AArch64 CPU or mix AArch64 and amd64 on a cluster, and use distributed queries with GROUP BY multiple keys of fixed-size type that fit in 256 bits but don't fit in 64 bits, and the size of the result is huge, the data will not be fully aggregated in the result of these queries during upgrade. Workaround: upgrade with downtime instead of a rolling upgrade. #### New Feature + * Add `GROUPING` function. It allows to disambiguate the records in the queries with `ROLLUP`, `CUBE` or `GROUPING SETS`. Closes [#19426](https://github.com/ClickHouse/ClickHouse/issues/19426). [#37163](https://github.com/ClickHouse/ClickHouse/pull/37163) ([Dmitry Novik](https://github.com/novikd)). * A new codec [FPC](https://userweb.cs.txstate.edu/~burtscher/papers/dcc07a.pdf) algorithm for floating point data compression. [#37553](https://github.com/ClickHouse/ClickHouse/pull/37553) ([Mikhail Guzov](https://github.com/koloshmet)). * Add new columnar JSON formats: `JSONColumns`, `JSONCompactColumns`, `JSONColumnsWithMetadata`. Closes [#36338](https://github.com/ClickHouse/ClickHouse/issues/36338) Closes [#34509](https://github.com/ClickHouse/ClickHouse/issues/34509). [#36975](https://github.com/ClickHouse/ClickHouse/pull/36975) ([Kruglov Pavel](https://github.com/Avogar)). @@ -557,11 +709,13 @@ * Added `SYSTEM UNFREEZE` query that deletes the whole backup regardless if the corresponding table is deleted or not. [#36424](https://github.com/ClickHouse/ClickHouse/pull/36424) ([Vadim Volodin](https://github.com/PolyProgrammist)). #### Experimental Feature + * Enables `POPULATE` for `WINDOW VIEW`. [#36945](https://github.com/ClickHouse/ClickHouse/pull/36945) ([vxider](https://github.com/Vxider)). * `ALTER TABLE ... MODIFY QUERY` support for `WINDOW VIEW`. [#37188](https://github.com/ClickHouse/ClickHouse/pull/37188) ([vxider](https://github.com/Vxider)). * This PR changes the behavior of the `ENGINE` syntax in `WINDOW VIEW`, to make it like in `MATERIALIZED VIEW`. [#37214](https://github.com/ClickHouse/ClickHouse/pull/37214) ([vxider](https://github.com/Vxider)). #### Performance Improvement + * Added numerous optimizations for ARM NEON [#38093](https://github.com/ClickHouse/ClickHouse/pull/38093)([Daniel Kutenin](https://github.com/danlark1)), ([Alexandra Pilipyuk](https://github.com/chalice19)) Note: if you run different ClickHouse versions on a cluster with ARM CPU and use distributed queries with GROUP BY multiple keys of fixed-size type that fit in 256 bits but don't fit in 64 bits, the result of the aggregation query will be wrong during upgrade. Workaround: upgrade with downtime instead of a rolling upgrade. * Improve performance and memory usage for select of subset of columns for formats Native, Protobuf, CapnProto, JSONEachRow, TSKV, all formats with suffixes WithNames/WithNamesAndTypes. Previously while selecting only subset of columns from files in these formats all columns were read and stored in memory. Now only required columns are read. This PR enables setting `input_format_skip_unknown_fields` by default, because otherwise in case of select of subset of columns exception will be thrown. [#37192](https://github.com/ClickHouse/ClickHouse/pull/37192) ([Kruglov Pavel](https://github.com/Avogar)). * Now more filters can be pushed down for join. [#37472](https://github.com/ClickHouse/ClickHouse/pull/37472) ([Amos Bird](https://github.com/amosbird)). @@ -592,6 +746,7 @@ * In function: CompressedWriteBuffer::nextImpl(), there is an unnecessary write-copy step that would happen frequently during inserting data. Below shows the differentiation with this patch: - Before: 1. Compress "working_buffer" into "compressed_buffer" 2. write-copy into "out" - After: Directly Compress "working_buffer" into "out". [#37242](https://github.com/ClickHouse/ClickHouse/pull/37242) ([jasperzhu](https://github.com/jinjunzh)). #### Improvement + * Support types with non-standard defaults in ROLLUP, CUBE, GROUPING SETS. Closes [#37360](https://github.com/ClickHouse/ClickHouse/issues/37360). [#37667](https://github.com/ClickHouse/ClickHouse/pull/37667) ([Dmitry Novik](https://github.com/novikd)). * Fix stack traces collection on ARM. Closes [#37044](https://github.com/ClickHouse/ClickHouse/issues/37044). Closes [#15638](https://github.com/ClickHouse/ClickHouse/issues/15638). [#37797](https://github.com/ClickHouse/ClickHouse/pull/37797) ([Maksim Kita](https://github.com/kitaisreal)). * Client will try every IP address returned by DNS resolution until successful connection. [#37273](https://github.com/ClickHouse/ClickHouse/pull/37273) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). @@ -633,6 +788,7 @@ * Add implicit grants with grant option too. For example `GRANT CREATE TABLE ON test.* TO A WITH GRANT OPTION` now allows `A` to execute `GRANT CREATE VIEW ON test.* TO B`. [#38017](https://github.com/ClickHouse/ClickHouse/pull/38017) ([Vitaly Baranov](https://github.com/vitlibar)). #### Build/Testing/Packaging Improvement + * Use `clang-14` and LLVM infrastructure version 14 for builds. This closes [#34681](https://github.com/ClickHouse/ClickHouse/issues/34681). [#34754](https://github.com/ClickHouse/ClickHouse/pull/34754) ([Alexey Milovidov](https://github.com/alexey-milovidov)). Note: `clang-14` has [a bug](https://github.com/google/sanitizers/issues/1540) in ThreadSanitizer that makes our CI work worse. * Allow to drop privileges at startup. This simplifies Docker images. Closes [#36293](https://github.com/ClickHouse/ClickHouse/issues/36293). [#36341](https://github.com/ClickHouse/ClickHouse/pull/36341) ([Alexey Milovidov](https://github.com/alexey-milovidov)). * Add docs spellcheck to CI. [#37790](https://github.com/ClickHouse/ClickHouse/pull/37790) ([Vladimir C](https://github.com/vdimir)). @@ -690,7 +846,6 @@ * Fix possible heap-use-after-free error when reading system.projection_parts and system.projection_parts_columns . This fixes [#37184](https://github.com/ClickHouse/ClickHouse/issues/37184). [#37185](https://github.com/ClickHouse/ClickHouse/pull/37185) ([Amos Bird](https://github.com/amosbird)). * Fixed `DateTime64` fractional seconds behavior prior to Unix epoch. [#37697](https://github.com/ClickHouse/ClickHouse/pull/37697) ([Andrey Zvonov](https://github.com/zvonand)). [#37039](https://github.com/ClickHouse/ClickHouse/pull/37039) ([李扬](https://github.com/taiyang-li)). - ### ClickHouse release 22.5, 2022-05-19 #### Upgrade Notes @@ -743,7 +898,7 @@ * Implement partial GROUP BY key for optimize_aggregation_in_order. [#35111](https://github.com/ClickHouse/ClickHouse/pull/35111) ([Azat Khuzhin](https://github.com/azat)). #### Improvement - + * Show names of erroneous files in case of parsing errors while executing table functions `file`, `s3` and `url`. [#36314](https://github.com/ClickHouse/ClickHouse/pull/36314) ([Anton Popov](https://github.com/CurtizJ)). * Allowed to increase the number of threads for executing background operations (merges, mutations, moves and fetches) at runtime if they are specified at top level config. [#36425](https://github.com/ClickHouse/ClickHouse/pull/36425) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). * Now date time conversion functions that generates time before 1970-01-01 00:00:00 with partial hours/minutes timezones will be saturated to zero instead of overflow. This is the continuation of https://github.com/ClickHouse/ClickHouse/pull/29953 which addresses https://github.com/ClickHouse/ClickHouse/pull/29953#discussion_r800550280 . Mark as improvement because it's implementation defined behavior (and very rare case) and we are allowed to break it. [#36656](https://github.com/ClickHouse/ClickHouse/pull/36656) ([Amos Bird](https://github.com/amosbird)). @@ -852,7 +1007,6 @@ * Fix ALTER DROP COLUMN of nested column with compact parts (i.e. `ALTER TABLE x DROP COLUMN n`, when there is column `n.d`). [#35797](https://github.com/ClickHouse/ClickHouse/pull/35797) ([Azat Khuzhin](https://github.com/azat)). * Fix substring function range error length when `offset` and `length` is negative constant and `s` is not constant. [#33861](https://github.com/ClickHouse/ClickHouse/pull/33861) ([RogerYK](https://github.com/RogerYK)). - ### ClickHouse release 22.4, 2022-04-19 #### Backward Incompatible Change @@ -1004,8 +1158,7 @@ * Fix mutations in tables with enabled sparse columns. [#35284](https://github.com/ClickHouse/ClickHouse/pull/35284) ([Anton Popov](https://github.com/CurtizJ)). * Do not delay final part writing by default (fixes possible `Memory limit exceeded` during `INSERT` by adding `max_insert_delayed_streams_for_parallel_write` with default to 1000 for writes to s3 and disabled as before otherwise). [#34780](https://github.com/ClickHouse/ClickHouse/pull/34780) ([Azat Khuzhin](https://github.com/azat)). - -## ClickHouse release v22.3-lts, 2022-03-17 +### ClickHouse release v22.3-lts, 2022-03-17 #### Backward Incompatible Change @@ -1132,7 +1285,6 @@ * Fix incorrect result of trivial count query when part movement feature is used [#34089](https://github.com/ClickHouse/ClickHouse/issues/34089). [#34385](https://github.com/ClickHouse/ClickHouse/pull/34385) ([nvartolomei](https://github.com/nvartolomei)). * Fix inconsistency of `max_query_size` limitation in distributed subqueries. [#34078](https://github.com/ClickHouse/ClickHouse/pull/34078) ([Chao Ma](https://github.com/godliness)). - ### ClickHouse release v22.2, 2022-02-17 #### Upgrade Notes @@ -1308,7 +1460,6 @@ * Fix issue [#18206](https://github.com/ClickHouse/ClickHouse/issues/18206). [#33977](https://github.com/ClickHouse/ClickHouse/pull/33977) ([Vitaly Baranov](https://github.com/vitlibar)). * This PR allows using multiple LDAP storages in the same list of user directories. It worked earlier but was broken because LDAP tests are disabled (they are part of the testflows tests). [#33574](https://github.com/ClickHouse/ClickHouse/pull/33574) ([Vitaly Baranov](https://github.com/vitlibar)). - ### ClickHouse release v22.1, 2022-01-18 #### Upgrade Notes @@ -1335,7 +1486,6 @@ * Add function `decodeURLFormComponent` slightly different to `decodeURLComponent`. Close [#10298](https://github.com/ClickHouse/ClickHouse/issues/10298). [#33451](https://github.com/ClickHouse/ClickHouse/pull/33451) ([SuperDJY](https://github.com/cmsxbc)). * Allow to split `GraphiteMergeTree` rollup rules for plain/tagged metrics (optional rule_type field). [#33494](https://github.com/ClickHouse/ClickHouse/pull/33494) ([Michail Safronov](https://github.com/msaf1980)). - #### Performance Improvement * Support moving conditions to `PREWHERE` (setting `optimize_move_to_prewhere`) for tables of `Merge` engine if its all underlying tables supports `PREWHERE`. [#33300](https://github.com/ClickHouse/ClickHouse/pull/33300) ([Anton Popov](https://github.com/CurtizJ)). @@ -1351,7 +1501,6 @@ * Optimize selecting of MergeTree parts that can be moved between volumes. [#33225](https://github.com/ClickHouse/ClickHouse/pull/33225) ([OnePiece](https://github.com/zhongyuankai)). * Fix `sparse_hashed` dict performance with sequential keys (wrong hash function). [#32536](https://github.com/ClickHouse/ClickHouse/pull/32536) ([Azat Khuzhin](https://github.com/azat)). - #### Experimental Feature * Parallel reading from multiple replicas within a shard during distributed query without using sample key. To enable this, set `allow_experimental_parallel_reading_from_replicas = 1` and `max_parallel_replicas` to any number. This closes [#26748](https://github.com/ClickHouse/ClickHouse/issues/26748). [#29279](https://github.com/ClickHouse/ClickHouse/pull/29279) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). @@ -1364,7 +1513,6 @@ * Fix ACL with explicit digit hash in `clickhouse-keeper`: now the behavior consistent with ZooKeeper and generated digest is always accepted. [#33249](https://github.com/ClickHouse/ClickHouse/pull/33249) ([小路](https://github.com/nicelulu)). [#33246](https://github.com/ClickHouse/ClickHouse/pull/33246). * Fix unexpected projection removal when detaching parts. [#32067](https://github.com/ClickHouse/ClickHouse/pull/32067) ([Amos Bird](https://github.com/amosbird)). - #### Improvement * Now date time conversion functions that generates time before `1970-01-01 00:00:00` will be saturated to zero instead of overflow. [#29953](https://github.com/ClickHouse/ClickHouse/pull/29953) ([Amos Bird](https://github.com/amosbird)). It also fixes a bug in index analysis if date truncation function would yield result before the Unix epoch. @@ -1411,7 +1559,6 @@ * Updating `modification_time` for data part in `system.parts` after part movement [#32964](https://github.com/ClickHouse/ClickHouse/issues/32964). [#32965](https://github.com/ClickHouse/ClickHouse/pull/32965) ([save-my-heart](https://github.com/save-my-heart)). * Potential issue, cannot be exploited: integer overflow may happen in array resize. [#33024](https://github.com/ClickHouse/ClickHouse/pull/33024) ([varadarajkumar](https://github.com/varadarajkumar)). - #### Build/Testing/Packaging Improvement * Add packages, functional tests and Docker builds for AArch64 (ARM) version of ClickHouse. [#32911](https://github.com/ClickHouse/ClickHouse/pull/32911) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). [#32415](https://github.com/ClickHouse/ClickHouse/pull/32415) @@ -1426,7 +1573,6 @@ * Inject git information into clickhouse binary file. So we can get source code revision easily from clickhouse binary file. [#33124](https://github.com/ClickHouse/ClickHouse/pull/33124) ([taiyang-li](https://github.com/taiyang-li)). * Remove obsolete code from ConfigProcessor. Yandex specific code is not used anymore. The code contained one minor defect. This defect was reported by [Mallik Hassan](https://github.com/SadiHassan) in [#33032](https://github.com/ClickHouse/ClickHouse/issues/33032). This closes [#33032](https://github.com/ClickHouse/ClickHouse/issues/33032). [#33026](https://github.com/ClickHouse/ClickHouse/pull/33026) ([alexey-milovidov](https://github.com/alexey-milovidov)). - #### Bug Fix (user-visible misbehavior in official stable or prestable release) * Several fixes for format parsing. This is relevant if `clickhouse-server` is open for write access to adversary. Specifically crafted input data for `Native` format may lead to reading uninitialized memory or crash. This is relevant if `clickhouse-server` is open for write access to adversary. [#33050](https://github.com/ClickHouse/ClickHouse/pull/33050) ([Heena Bansal](https://github.com/HeenaBansal2009)). Fixed Apache Avro Union type index out of boundary issue in Apache Avro binary format. [#33022](https://github.com/ClickHouse/ClickHouse/pull/33022) ([Harry Lee](https://github.com/HarryLeeIBM)). Fix null pointer dereference in `LowCardinality` data when deserializing `LowCardinality` data in the Native format. [#33021](https://github.com/ClickHouse/ClickHouse/pull/33021) ([Harry Lee](https://github.com/HarryLeeIBM)). @@ -1485,5 +1631,4 @@ * Fix possible crash (or incorrect result) in case of `LowCardinality` arguments of window function. Fixes [#31114](https://github.com/ClickHouse/ClickHouse/issues/31114). [#31888](https://github.com/ClickHouse/ClickHouse/pull/31888) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). * Fix hang up with command `DROP TABLE system.query_log sync`. [#33293](https://github.com/ClickHouse/ClickHouse/pull/33293) ([zhanghuajie](https://github.com/zhanghuajieHIT)). - ## [Changelog for 2021](https://clickhouse.com/docs/en/whats-new/changelog/2021) diff --git a/README.md b/README.md index 9f4a39a2c97..f90df9686c2 100644 --- a/README.md +++ b/README.md @@ -5,16 +5,17 @@ ClickHouse® is an open-source column-oriented database management system that a ## Useful Links * [Official website](https://clickhouse.com/) has a quick high-level overview of ClickHouse on the main page. -* [ClickHouse Cloud](https://clickhouse.com/cloud) ClickHouse as a service, built by the creators and maintainers. +* [ClickHouse Cloud](https://clickhouse.cloud) ClickHouse as a service, built by the creators and maintainers. * [Tutorial](https://clickhouse.com/docs/en/getting_started/tutorial/) shows how to set up and query a small ClickHouse cluster. * [Documentation](https://clickhouse.com/docs/en/) provides more in-depth information. * [YouTube channel](https://www.youtube.com/c/ClickHouseDB) has a lot of content about ClickHouse in video format. * [Slack](https://join.slack.com/t/clickhousedb/shared_invite/zt-rxm3rdrk-lIUmhLC3V8WTaL0TGxsOmg) and [Telegram](https://telegram.me/clickhouse_en) allow chatting with ClickHouse users in real-time. -* [Blog](https://clickhouse.com/blog/en/) contains various ClickHouse-related articles, as well as announcements and reports about events. +* [Blog](https://clickhouse.com/blog/) contains various ClickHouse-related articles, as well as announcements and reports about events. * [Code Browser (Woboq)](https://clickhouse.com/codebrowser/ClickHouse/index.html) with syntax highlight and navigation. * [Code Browser (github.dev)](https://github.dev/ClickHouse/ClickHouse) with syntax highlight, powered by github.dev. * [Contacts](https://clickhouse.com/company/contact) can help to get your questions answered if there are any. ## Upcoming events -* [**v22.10 Release Webinar**](https://clickhouse.com/company/events/v22-10-release-webinar) Original creator, co-founder, and CTO of ClickHouse Alexey Milovidov will walk us through the highlights of the release, provide live demos, and share vision into what is coming in the roadmap. -* [**Introducing ClickHouse Cloud**](https://clickhouse.com/company/events/cloud-beta) Introducing ClickHouse as a service, built by creators and maintainers of the fastest OLAP database on earth. Join Tanya Bragin for a detailed walkthrough of ClickHouse Cloud capabilities, as well as a peek behind the curtain to understand the unique architecture that makes our service tick. +* [**v22.11 Release Webinar**](https://clickhouse.com/company/events/v22-11-release-webinar) Original creator, co-founder, and CTO of ClickHouse Alexey Milovidov will walk us through the highlights of the release, provide live demos, and share vision into what is coming in the roadmap. +* [**ClickHouse Meetup at the Deutsche Bank office in Berlin**](https://www.meetup.com/clickhouse-berlin-user-group/events/289311596/) Hear from Deutsche Bank on why they chose ClickHouse for big sensitive data in a regulated environment. The ClickHouse team will then present how ClickHouse is used for real time financial data analytics, including tick data, trade analytics and risk management. +* [**AWS re:Invent**](https://clickhouse.com/company/events/aws-reinvent) Core members of the ClickHouse team -- including 2 of our founders -- will be at re:Invent from November 29 to December 3. We are available on the show floor, but are also determining interest in holding an event during the time there. diff --git a/SECURITY.md b/SECURITY.md index fb6caa92cb8..0fb333c8ea3 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -10,9 +10,11 @@ The following versions of ClickHouse server are currently being supported with s | Version | Supported | |:-|:-| +| 22.10 | ✔️ | +| 22.9 | ✔️ | | 22.8 | ✔️ | -| 22.7 | ✔️ | -| 22.6 | ✔️ | +| 22.7 | ❌ | +| 22.6 | ❌ | | 22.5 | ❌ | | 22.4 | ❌ | | 22.3 | ✔️ | diff --git a/base/base/ReplxxLineReader.cpp b/base/base/ReplxxLineReader.cpp index a014fa4b8f2..e0dc81af5b0 100644 --- a/base/base/ReplxxLineReader.cpp +++ b/base/base/ReplxxLineReader.cpp @@ -151,7 +151,7 @@ public: { size_t dot_pos = path.rfind('.'); if (dot_pos != std::string::npos) - fd = ::mkstemps(path.data(), path.size() - dot_pos); + fd = ::mkstemps(path.data(), static_cast(path.size() - dot_pos)); else fd = ::mkstemp(path.data()); @@ -408,7 +408,7 @@ ReplxxLineReader::ReplxxLineReader( // In a simplest case use simple comment. commented_line = fmt::format("-- {}", state.text()); } - rx.set_state(replxx::Replxx::State(commented_line.c_str(), commented_line.size())); + rx.set_state(replxx::Replxx::State(commented_line.c_str(), static_cast(commented_line.size()))); return rx.invoke(Replxx::ACTION::COMMIT_LINE, code); }; @@ -480,7 +480,7 @@ void ReplxxLineReader::openEditor() if (executeCommand(argv) == 0) { const std::string & new_query = readFile(editor_file.getPath()); - rx.set_state(replxx::Replxx::State(new_query.c_str(), new_query.size())); + rx.set_state(replxx::Replxx::State(new_query.c_str(), static_cast(new_query.size()))); } } catch (const std::runtime_error & e) @@ -526,7 +526,7 @@ void ReplxxLineReader::openInteractiveHistorySearch() { std::string new_query = readFile(output_file.getPath()); rightTrim(new_query); - rx.set_state(replxx::Replxx::State(new_query.c_str(), new_query.size())); + rx.set_state(replxx::Replxx::State(new_query.c_str(), static_cast(new_query.size()))); } } catch (const std::runtime_error & e) diff --git a/base/base/StringRef.h b/base/base/StringRef.h index 5ee197021ca..a3e32ff5058 100644 --- a/base/base/StringRef.h +++ b/base/base/StringRef.h @@ -265,7 +265,7 @@ inline size_t hashLessThan16(const char * data, size_t size) struct CRC32Hash { - size_t operator() (StringRef x) const + unsigned operator() (StringRef x) const { const char * pos = x.data; size_t size = x.size; @@ -275,22 +275,22 @@ struct CRC32Hash if (size < 8) { - return hashLessThan8(x.data, x.size); + return static_cast(hashLessThan8(x.data, x.size)); } const char * end = pos + size; - size_t res = -1ULL; + unsigned res = -1U; do { UInt64 word = unalignedLoad(pos); - res = CRC_INT(res, word); + res = static_cast(CRC_INT(res, word)); pos += 8; } while (pos + 8 < end); UInt64 word = unalignedLoad(end - 8); /// I'm not sure if this is normal. - res = CRC_INT(res, word); + res = static_cast(CRC_INT(res, word)); return res; } @@ -302,7 +302,7 @@ struct StringRefHash : CRC32Hash {}; struct CRC32Hash { - size_t operator() (StringRef /* x */) const + unsigned operator() (StringRef /* x */) const { throw std::logic_error{"Not implemented CRC32Hash without SSE"}; } diff --git a/base/base/itoa.h b/base/base/itoa.h index 5e0b18d50c0..dd3e3cc96fe 100644 --- a/base/base/itoa.h +++ b/base/base/itoa.h @@ -122,7 +122,7 @@ QuotientAndRemainder static inline split(UnsignedOfSize value) constexpr DivisionBy10PowN division; UnsignedOfSize quotient = (division.multiplier * (UnsignedOfSize<2 * N>(value) + division.add)) >> division.shift; - UnsignedOfSize remainder = value - quotient * pow10>(N); + UnsignedOfSize remainder = static_cast>(value - quotient * pow10>(N)); return {quotient, remainder}; } diff --git a/base/base/safeExit.cpp b/base/base/safeExit.cpp index e4f9e80759e..ddb93dac65b 100644 --- a/base/base/safeExit.cpp +++ b/base/base/safeExit.cpp @@ -1,10 +1,8 @@ #if defined(OS_LINUX) # include #endif -#include #include #include -#include [[noreturn]] void safeExit(int code) { diff --git a/base/base/wide_integer_impl.h b/base/base/wide_integer_impl.h index eb2edcb98ff..1b5f502722c 100644 --- a/base/base/wide_integer_impl.h +++ b/base/base/wide_integer_impl.h @@ -227,6 +227,8 @@ struct integer::_impl template __attribute__((no_sanitize("undefined"))) constexpr static auto to_Integral(T f) noexcept { + /// NOTE: this can be called with DB::Decimal, and in this case, result + /// will be wrong if constexpr (std::is_signed_v) return static_cast(f); else diff --git a/base/glibc-compatibility/musl/getauxval.c b/base/glibc-compatibility/musl/getauxval.c index 22886013d07..eba12604b4d 100644 --- a/base/glibc-compatibility/musl/getauxval.c +++ b/base/glibc-compatibility/musl/getauxval.c @@ -8,6 +8,8 @@ #include // ElfW #include +#include "syscall.h" + #define ARRAY_SIZE(a) sizeof((a))/sizeof((a[0])) /// Suppress TSan since it is possible for this code to be called from multiple threads, @@ -39,7 +41,9 @@ ssize_t __retry_read(int fd, void * buf, size_t count) { for (;;) { - ssize_t ret = read(fd, buf, count); + // We cannot use the read syscall as it will be intercept by sanitizers, which aren't + // initialized yet. Emit syscall directly. + ssize_t ret = __syscall_ret(__syscall(SYS_read, fd, buf, count)); if (ret == -1) { if (errno == EINTR) @@ -90,6 +94,11 @@ static unsigned long NO_SANITIZE_THREAD __auxv_init_procfs(unsigned long type) _Static_assert(sizeof(aux) < 4096, "Unexpected sizeof(aux)"); while (__retry_read(fd, &aux, sizeof(aux)) == sizeof(aux)) { +#if defined(__has_feature) +#if __has_feature(memory_sanitizer) + __msan_unpoison(&aux, sizeof(aux)); +#endif +#endif if (aux.a_type == AT_NULL) { break; diff --git a/cmake/autogenerated_versions.txt b/cmake/autogenerated_versions.txt index 9728451f38a..11b37f5a7c8 100644 --- a/cmake/autogenerated_versions.txt +++ b/cmake/autogenerated_versions.txt @@ -2,11 +2,11 @@ # NOTE: has nothing common with DBMS_TCP_PROTOCOL_VERSION, # only DBMS_TCP_PROTOCOL_VERSION should be incremented on protocol changes. -SET(VERSION_REVISION 54467) +SET(VERSION_REVISION 54468) SET(VERSION_MAJOR 22) -SET(VERSION_MINOR 10) +SET(VERSION_MINOR 11) SET(VERSION_PATCH 1) -SET(VERSION_GITHASH 3030d4c7ff09ec44ab07d0a8069ea923227288a1) -SET(VERSION_DESCRIBE v22.10.1.1-testing) -SET(VERSION_STRING 22.10.1.1) +SET(VERSION_GITHASH 98ab5a3c189232ea2a3dddb9d2be7196ae8b3434) +SET(VERSION_DESCRIBE v22.11.1.1-testing) +SET(VERSION_STRING 22.11.1.1) # end of autochange diff --git a/cmake/clang_tidy.cmake b/cmake/clang_tidy.cmake index 200282234ca..57295682487 100644 --- a/cmake/clang_tidy.cmake +++ b/cmake/clang_tidy.cmake @@ -3,10 +3,20 @@ option (ENABLE_CLANG_TIDY "Use clang-tidy static analyzer" OFF) if (ENABLE_CLANG_TIDY) - find_program (CLANG_TIDY_PATH NAMES "clang-tidy" "clang-tidy-15" "clang-tidy-14" "clang-tidy-13" "clang-tidy-12") + find_program (CLANG_TIDY_CACHE_PATH NAMES "clang-tidy-cache") + if (CLANG_TIDY_CACHE_PATH) + find_program (_CLANG_TIDY_PATH NAMES "clang-tidy" "clang-tidy-15" "clang-tidy-14" "clang-tidy-13" "clang-tidy-12") + + # Why do we use ';' here? + # It's a cmake black magic: https://cmake.org/cmake/help/latest/prop_tgt/LANG_CLANG_TIDY.html#prop_tgt:%3CLANG%3E_CLANG_TIDY + # The CLANG_TIDY_PATH is passed to CMAKE_CXX_CLANG_TIDY, which follows CXX_CLANG_TIDY syntax. + set (CLANG_TIDY_PATH "${CLANG_TIDY_CACHE_PATH};${_CLANG_TIDY_PATH}" CACHE STRING "A combined command to run clang-tidy with caching wrapper") + else () + find_program (CLANG_TIDY_PATH NAMES "clang-tidy" "clang-tidy-15" "clang-tidy-14" "clang-tidy-13" "clang-tidy-12") + endif () if (CLANG_TIDY_PATH) - message(STATUS + message (STATUS "Using clang-tidy: ${CLANG_TIDY_PATH}. The checks will be run during build process. See the .clang-tidy file at the root directory to configure the checks.") @@ -15,11 +25,15 @@ if (ENABLE_CLANG_TIDY) # clang-tidy requires assertions to guide the analysis # Note that NDEBUG is set implicitly by CMake for non-debug builds - set(COMPILER_FLAGS "${COMPILER_FLAGS} -UNDEBUG") + set (COMPILER_FLAGS "${COMPILER_FLAGS} -UNDEBUG") - # The variable CMAKE_CXX_CLANG_TIDY will be set inside src and base directories with non third-party code. + # The variable CMAKE_CXX_CLANG_TIDY will be set inside the following directories with non third-party code. + # - base + # - programs + # - src + # - utils # set (CMAKE_CXX_CLANG_TIDY "${CLANG_TIDY_PATH}") else () - message(${RECONFIGURE_MESSAGE_LEVEL} "clang-tidy is not found") + message (${RECONFIGURE_MESSAGE_LEVEL} "clang-tidy is not found") endif () endif () diff --git a/cmake/cpu_features.cmake b/cmake/cpu_features.cmake index 6707d703372..a554992caf3 100644 --- a/cmake/cpu_features.cmake +++ b/cmake/cpu_features.cmake @@ -61,8 +61,14 @@ elseif (ARCH_AARCH64) endif () elseif (ARCH_PPC64LE) + # By Default, build for power8 and up, allow building for power9 and up # Note that gcc and clang have support for x86 SSE2 intrinsics when building for PowerPC - set (COMPILER_FLAGS "${COMPILER_FLAGS} -maltivec -mcpu=power8 -D__SSE2__=1 -DNO_WARN_X86_INTRINSICS") + option (POWER9 "Build for Power 9 CPU and above" 0) + if(POWER9) + set (COMPILER_FLAGS "${COMPILER_FLAGS} -maltivec -mcpu=power9 -D__SSE2__=1 -DNO_WARN_X86_INTRINSICS") + else () + set (COMPILER_FLAGS "${COMPILER_FLAGS} -maltivec -mcpu=power8 -D__SSE2__=1 -DNO_WARN_X86_INTRINSICS") + endif () elseif (ARCH_AMD64) option (ENABLE_SSSE3 "Use SSSE3 instructions on x86_64" 1) @@ -75,6 +81,7 @@ elseif (ARCH_AMD64) option (ENABLE_AVX512 "Use AVX512 instructions on x86_64" 0) option (ENABLE_AVX512_VBMI "Use AVX512_VBMI instruction on x86_64 (depends on ENABLE_AVX512)" 0) option (ENABLE_BMI "Use BMI instructions on x86_64" 0) + option (ENABLE_BMI2 "Use BMI2 instructions on x86_64 (depends on ENABLE_AVX2)" 0) option (ENABLE_AVX2_FOR_SPEC_OP "Use avx2 instructions for specific operations on x86_64" 0) option (ENABLE_AVX512_FOR_SPEC_OP "Use avx512 instructions for specific operations on x86_64" 0) @@ -90,6 +97,7 @@ elseif (ARCH_AMD64) SET(ENABLE_AVX512 0) SET(ENABLE_AVX512_VBMI 0) SET(ENABLE_BMI 0) + SET(ENABLE_BMI2 0) SET(ENABLE_AVX2_FOR_SPEC_OP 0) SET(ENABLE_AVX512_FOR_SPEC_OP 0) endif() @@ -237,6 +245,20 @@ elseif (ARCH_AMD64) set (COMPILER_FLAGS "${COMPILER_FLAGS} ${TEST_FLAG}") endif () + set (TEST_FLAG "-mbmi2") + set (CMAKE_REQUIRED_FLAGS "${TEST_FLAG} -O0") + check_cxx_source_compiles(" + #include + int main() { + auto a = _pdep_u64(0, 0); + (void)a; + return 0; + } + " HAVE_BMI2) + if (HAVE_BMI2 AND HAVE_AVX2 AND ENABLE_AVX2 AND ENABLE_BMI2) + set (COMPILER_FLAGS "${COMPILER_FLAGS} ${TEST_FLAG}") + endif () + # Limit avx2/avx512 flag for specific source build set (X86_INTRINSICS_FLAGS "") if (ENABLE_AVX2_FOR_SPEC_OP) diff --git a/cmake/ld.lld.in b/cmake/ld.lld.in index 9736dab1bc3..78a264a0089 100755 --- a/cmake/ld.lld.in +++ b/cmake/ld.lld.in @@ -3,15 +3,15 @@ # This is a workaround for bug in llvm/clang, # that does not produce .debug_aranges with LTO # -# NOTE: this is a temporary solution, that should be removed once [1] will be -# resolved. +# NOTE: this is a temporary solution, that should be removed after upgrading to +# clang-16/llvm-16. # -# [1]: https://discourse.llvm.org/t/clang-does-not-produce-full-debug-aranges-section-with-thinlto/64898/8 +# Refs: https://reviews.llvm.org/D133092 # NOTE: only -flto=thin is supported. # NOTE: it is not possible to check was there -gdwarf-aranges initially or not. if [[ "$*" =~ -plugin-opt=thinlto ]]; then - exec "@LLD_PATH@" -mllvm -generate-arange-section "$@" + exec "@LLD_PATH@" -plugin-opt=-generate-arange-section "$@" else exec "@LLD_PATH@" "$@" fi diff --git a/cmake/sanitize.cmake b/cmake/sanitize.cmake index 73610545009..f0cef54b0b8 100644 --- a/cmake/sanitize.cmake +++ b/cmake/sanitize.cmake @@ -85,7 +85,7 @@ if (SANITIZE) # and they have a bunch of flags not halt the program if UIO happend and even to silence that warnings. # But for unknown reason that flags don't work with ClickHouse or we don't understand how to properly use them, # that's why we often receive reports about UIO. The simplest way to avoid this is just set this flag here. - set(UBSAN_FLAGS "${SAN_FLAGS} -fno-sanitize=unsigned-integer-overflow") + set(UBSAN_FLAGS "${UBSAN_FLAGS} -fno-sanitize=unsigned-integer-overflow") endif() if (COMPILER_CLANG) set (UBSAN_FLAGS "${UBSAN_FLAGS} -fsanitize-blacklist=${CMAKE_SOURCE_DIR}/tests/ubsan_suppressions.txt") diff --git a/cmake/tools.cmake b/cmake/tools.cmake index 57d39899a40..8a17d97cf13 100644 --- a/cmake/tools.cmake +++ b/cmake/tools.cmake @@ -117,7 +117,7 @@ endif() # Archiver if (COMPILER_GCC) - find_program (LLVM_AR_PATH NAMES "llvm-ar" "llvm-ar-14" "llvm-ar-13" "llvm-ar-12") + find_program (LLVM_AR_PATH NAMES "llvm-ar" "llvm-ar-15" "llvm-ar-14" "llvm-ar-13" "llvm-ar-12") else () find_program (LLVM_AR_PATH NAMES "llvm-ar-${COMPILER_VERSION_MAJOR}" "llvm-ar") endif () @@ -131,7 +131,7 @@ message(STATUS "Using archiver: ${CMAKE_AR}") # Ranlib if (COMPILER_GCC) - find_program (LLVM_RANLIB_PATH NAMES "llvm-ranlib" "llvm-ranlib-14" "llvm-ranlib-13" "llvm-ranlib-12") + find_program (LLVM_RANLIB_PATH NAMES "llvm-ranlib" "llvm-ranlib-15" "llvm-ranlib-14" "llvm-ranlib-13" "llvm-ranlib-12") else () find_program (LLVM_RANLIB_PATH NAMES "llvm-ranlib-${COMPILER_VERSION_MAJOR}" "llvm-ranlib") endif () @@ -145,7 +145,7 @@ message(STATUS "Using ranlib: ${CMAKE_RANLIB}") # Install Name Tool if (COMPILER_GCC) - find_program (LLVM_INSTALL_NAME_TOOL_PATH NAMES "llvm-install-name-tool" "llvm-install-name-tool-14" "llvm-install-name-tool-13" "llvm-install-name-tool-12") + find_program (LLVM_INSTALL_NAME_TOOL_PATH NAMES "llvm-install-name-tool" "llvm-install-name-tool-15" "llvm-install-name-tool-14" "llvm-install-name-tool-13" "llvm-install-name-tool-12") else () find_program (LLVM_INSTALL_NAME_TOOL_PATH NAMES "llvm-install-name-tool-${COMPILER_VERSION_MAJOR}" "llvm-install-name-tool") endif () @@ -159,7 +159,7 @@ message(STATUS "Using install-name-tool: ${CMAKE_INSTALL_NAME_TOOL}") # Objcopy if (COMPILER_GCC) - find_program (OBJCOPY_PATH NAMES "llvm-objcopy" "llvm-objcopy-14" "llvm-objcopy-13" "llvm-objcopy-12" "objcopy") + find_program (OBJCOPY_PATH NAMES "llvm-objcopy" "llvm-objcopy-15" "llvm-objcopy-14" "llvm-objcopy-13" "llvm-objcopy-12" "objcopy") else () find_program (OBJCOPY_PATH NAMES "llvm-objcopy-${COMPILER_VERSION_MAJOR}" "llvm-objcopy" "objcopy") endif () @@ -173,7 +173,7 @@ endif () # Strip if (COMPILER_GCC) - find_program (STRIP_PATH NAMES "llvm-strip" "llvm-strip-14" "llvm-strip-13" "llvm-strip-12" "strip") + find_program (STRIP_PATH NAMES "llvm-strip" "llvm-strip-15" "llvm-strip-14" "llvm-strip-13" "llvm-strip-12" "strip") else () find_program (STRIP_PATH NAMES "llvm-strip-${COMPILER_VERSION_MAJOR}" "llvm-strip" "strip") endif () diff --git a/cmake/warnings.cmake b/cmake/warnings.cmake index 89f3a62ba2e..8364b0c2c08 100644 --- a/cmake/warnings.cmake +++ b/cmake/warnings.cmake @@ -27,7 +27,6 @@ if (COMPILER_CLANG) no_warning(sign-conversion) no_warning(implicit-int-conversion) no_warning(implicit-int-float-conversion) - no_warning(shorten-64-to-32) no_warning(ctad-maybe-unsupported) # clang 9+, linux-only no_warning(disabled-macro-expansion) no_warning(documentation-unknown-command) diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index f914c0d2d3f..8ebd4ab55d3 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -114,6 +114,7 @@ if (ENABLE_TESTS) endif() add_contrib (llvm-project-cmake llvm-project) +add_contrib (libfuzzer-cmake llvm-project) add_contrib (libxml2-cmake libxml2) add_contrib (aws-s3-cmake aws @@ -164,6 +165,7 @@ add_contrib (sqlite-cmake sqlite-amalgamation) add_contrib (s2geometry-cmake s2geometry) add_contrib (c-ares-cmake c-ares) add_contrib (qpl-cmake qpl) +add_contrib (morton-nd-cmake morton-nd) add_contrib(annoy-cmake annoy) diff --git a/contrib/NuRaft b/contrib/NuRaft index 1be805e7cb2..e4e746a24eb 160000 --- a/contrib/NuRaft +++ b/contrib/NuRaft @@ -1 +1 @@ -Subproject commit 1be805e7cb2494aa8170015493474379b0362dfc +Subproject commit e4e746a24eb56861a86f3672771e3308d8c40722 diff --git a/contrib/cctz b/contrib/cctz index 05ec08ce61e..5c8528fb35e 160000 --- a/contrib/cctz +++ b/contrib/cctz @@ -1 +1 @@ -Subproject commit 05ec08ce61e4b5c44692cc2f1ce4b6d8596679bf +Subproject commit 5c8528fb35e89ee0b3a7157490423fba0d4dd7b5 diff --git a/contrib/libcxx b/contrib/libcxx index 172b2ae074f..4db7f838afd 160000 --- a/contrib/libcxx +++ b/contrib/libcxx @@ -1 +1 @@ -Subproject commit 172b2ae074f6755145b91c53a95c8540c1468239 +Subproject commit 4db7f838afd3139eb3761694b04d31275df45d2d diff --git a/contrib/libcxx-cmake/CMakeLists.txt b/contrib/libcxx-cmake/CMakeLists.txt index 6f42a479588..53c6ff58f83 100644 --- a/contrib/libcxx-cmake/CMakeLists.txt +++ b/contrib/libcxx-cmake/CMakeLists.txt @@ -25,6 +25,7 @@ set(SRCS "${LIBCXX_SOURCE_DIR}/src/ios.cpp" "${LIBCXX_SOURCE_DIR}/src/ios.instantiations.cpp" "${LIBCXX_SOURCE_DIR}/src/iostream.cpp" +"${LIBCXX_SOURCE_DIR}/src/legacy_debug_handler.cpp" "${LIBCXX_SOURCE_DIR}/src/legacy_pointer_safety.cpp" "${LIBCXX_SOURCE_DIR}/src/locale.cpp" "${LIBCXX_SOURCE_DIR}/src/memory.cpp" @@ -49,6 +50,7 @@ set(SRCS "${LIBCXX_SOURCE_DIR}/src/valarray.cpp" "${LIBCXX_SOURCE_DIR}/src/variant.cpp" "${LIBCXX_SOURCE_DIR}/src/vector.cpp" +"${LIBCXX_SOURCE_DIR}/src/verbose_abort.cpp" ) add_library(cxx ${SRCS}) diff --git a/contrib/libcxxabi b/contrib/libcxxabi index 6eb7cc7a7bd..a736a6b3c6a 160000 --- a/contrib/libcxxabi +++ b/contrib/libcxxabi @@ -1 +1 @@ -Subproject commit 6eb7cc7a7bdd779e6734d1b9fb451df2274462d7 +Subproject commit a736a6b3c6a7b8aae2ebad629ca21b2c55b4820e diff --git a/contrib/libcxxabi-cmake/CMakeLists.txt b/contrib/libcxxabi-cmake/CMakeLists.txt index bf1ede8a60e..a59452eee9a 100644 --- a/contrib/libcxxabi-cmake/CMakeLists.txt +++ b/contrib/libcxxabi-cmake/CMakeLists.txt @@ -9,6 +9,7 @@ set(SRCS "${LIBCXXABI_SOURCE_DIR}/src/cxa_exception_storage.cpp" "${LIBCXXABI_SOURCE_DIR}/src/cxa_guard.cpp" "${LIBCXXABI_SOURCE_DIR}/src/cxa_handlers.cpp" +# "${LIBCXXABI_SOURCE_DIR}/src/cxa_noexception.cpp" "${LIBCXXABI_SOURCE_DIR}/src/cxa_personality.cpp" "${LIBCXXABI_SOURCE_DIR}/src/cxa_thread_atexit.cpp" "${LIBCXXABI_SOURCE_DIR}/src/cxa_vector.cpp" diff --git a/contrib/libfuzzer-cmake/CMakeLists.txt b/contrib/libfuzzer-cmake/CMakeLists.txt new file mode 100644 index 00000000000..ff3a91d828e --- /dev/null +++ b/contrib/libfuzzer-cmake/CMakeLists.txt @@ -0,0 +1,35 @@ +set(COMPILER_RT_FUZZER_SRC_DIR "${ClickHouse_SOURCE_DIR}/contrib/llvm-project/compiler-rt/lib/fuzzer") + +set(FUZZER_SRCS + "${COMPILER_RT_FUZZER_SRC_DIR}/FuzzerCrossOver.cpp" + "${COMPILER_RT_FUZZER_SRC_DIR}/FuzzerDataFlowTrace.cpp" + "${COMPILER_RT_FUZZER_SRC_DIR}/FuzzerDriver.cpp" + "${COMPILER_RT_FUZZER_SRC_DIR}/FuzzerExtFunctionsDlsym.cpp" + "${COMPILER_RT_FUZZER_SRC_DIR}/FuzzerExtFunctionsWeak.cpp" + "${COMPILER_RT_FUZZER_SRC_DIR}/FuzzerExtFunctionsWindows.cpp" + "${COMPILER_RT_FUZZER_SRC_DIR}/FuzzerExtraCounters.cpp" + "${COMPILER_RT_FUZZER_SRC_DIR}/FuzzerExtraCountersDarwin.cpp" + "${COMPILER_RT_FUZZER_SRC_DIR}/FuzzerExtraCountersWindows.cpp" + "${COMPILER_RT_FUZZER_SRC_DIR}/FuzzerFork.cpp" + "${COMPILER_RT_FUZZER_SRC_DIR}/FuzzerIO.cpp" + "${COMPILER_RT_FUZZER_SRC_DIR}/FuzzerIOPosix.cpp" + "${COMPILER_RT_FUZZER_SRC_DIR}/FuzzerIOWindows.cpp" + "${COMPILER_RT_FUZZER_SRC_DIR}/FuzzerLoop.cpp" + "${COMPILER_RT_FUZZER_SRC_DIR}/FuzzerMerge.cpp" + "${COMPILER_RT_FUZZER_SRC_DIR}/FuzzerMutate.cpp" + "${COMPILER_RT_FUZZER_SRC_DIR}/FuzzerSHA1.cpp" + "${COMPILER_RT_FUZZER_SRC_DIR}/FuzzerTracePC.cpp" + "${COMPILER_RT_FUZZER_SRC_DIR}/FuzzerUtil.cpp" + "${COMPILER_RT_FUZZER_SRC_DIR}/FuzzerUtilDarwin.cpp" + "${COMPILER_RT_FUZZER_SRC_DIR}/FuzzerUtilFuchsia.cpp" + "${COMPILER_RT_FUZZER_SRC_DIR}/FuzzerUtilLinux.cpp" + "${COMPILER_RT_FUZZER_SRC_DIR}/FuzzerUtilPosix.cpp" + "${COMPILER_RT_FUZZER_SRC_DIR}/FuzzerUtilWindows.cpp" +) + +add_library(_fuzzer_no_main STATIC ${FUZZER_SRCS}) +add_library(ch_contrib::fuzzer_no_main ALIAS _fuzzer_no_main) + +add_library(_fuzzer STATIC ${FUZZER_SRCS} "${COMPILER_RT_FUZZER_SRC_DIR}/FuzzerMain.cpp") +add_library(ch_contrib::fuzzer ALIAS _fuzzer) + diff --git a/contrib/llvm-project-cmake/CMakeLists.txt b/contrib/llvm-project-cmake/CMakeLists.txt index 6a73ae0f0c6..7af4a23bc9d 100644 --- a/contrib/llvm-project-cmake/CMakeLists.txt +++ b/contrib/llvm-project-cmake/CMakeLists.txt @@ -21,6 +21,9 @@ set (LLVM_INCLUDE_DIRS "${ClickHouse_BINARY_DIR}/contrib/llvm-project/llvm/include" ) set (LLVM_LIBRARY_DIRS "${ClickHouse_BINARY_DIR}/contrib/llvm-project/llvm") +# NOTE: You should not remove this line since otherwise it will use default 20, +# and llvm cannot be compiled with bundled libcxx and 20 standard. +set (CMAKE_CXX_STANDARD 14) # This list was generated by listing all LLVM libraries, compiling the binary and removing all libraries while it still compiles. set (REQUIRED_LLVM_LIBRARIES diff --git a/contrib/morton-nd b/contrib/morton-nd new file mode 160000 index 00000000000..3795491a4aa --- /dev/null +++ b/contrib/morton-nd @@ -0,0 +1 @@ +Subproject commit 3795491a4aa3cdc916c8583094683f0d68df5bc0 diff --git a/contrib/morton-nd-cmake/CMakeLists.txt b/contrib/morton-nd-cmake/CMakeLists.txt new file mode 100644 index 00000000000..4842781503f --- /dev/null +++ b/contrib/morton-nd-cmake/CMakeLists.txt @@ -0,0 +1,3 @@ +add_library(_morton_nd INTERFACE) +target_include_directories(_morton_nd SYSTEM BEFORE INTERFACE "${ClickHouse_SOURCE_DIR}/contrib/morton-nd/include/") +add_library(ch_contrib::morton_nd ALIAS _morton_nd) diff --git a/contrib/rocksdb b/contrib/rocksdb index e7c2b2f7bcf..2c8998e26c6 160000 --- a/contrib/rocksdb +++ b/contrib/rocksdb @@ -1 +1 @@ -Subproject commit e7c2b2f7bcf3b4b33892a1a6d25c32a93edfbdb9 +Subproject commit 2c8998e26c6d46b27c710d7829c3a15e34959f70 diff --git a/contrib/rocksdb-cmake/CMakeLists.txt b/contrib/rocksdb-cmake/CMakeLists.txt index b9dd2558348..466adf6aff0 100644 --- a/contrib/rocksdb-cmake/CMakeLists.txt +++ b/contrib/rocksdb-cmake/CMakeLists.txt @@ -78,23 +78,13 @@ endif() include(CheckCCompilerFlag) if(CMAKE_SYSTEM_PROCESSOR MATCHES "^(powerpc|ppc)64") - CHECK_C_COMPILER_FLAG("-mcpu=power9" HAS_POWER9) - if(HAS_POWER9) - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mcpu=power9 -mtune=power9") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mcpu=power9 -mtune=power9") + if(POWER9) + set(HAS_POWER9 1) + set(HAS_ALTIVEC 1) else() - CHECK_C_COMPILER_FLAG("-mcpu=power8" HAS_POWER8) - if(HAS_POWER8) - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mcpu=power8 -mtune=power8") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mcpu=power8 -mtune=power8") - endif(HAS_POWER8) - endif(HAS_POWER9) - CHECK_C_COMPILER_FLAG("-maltivec" HAS_ALTIVEC) - if(HAS_ALTIVEC) - message(STATUS " HAS_ALTIVEC yes") - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -maltivec") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -maltivec") - endif(HAS_ALTIVEC) + set(HAS_POWER8 1) + set(HAS_ALTIVEC 1) + endif(POWER9) endif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(powerpc|ppc)64") if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|AARCH64|arm64|ARM64") diff --git a/contrib/zlib-ng b/contrib/zlib-ng index bffad6f6fe7..50f0eae1a41 160000 --- a/contrib/zlib-ng +++ b/contrib/zlib-ng @@ -1 +1 @@ -Subproject commit bffad6f6fe74d6a2f92e2668390664a926c68733 +Subproject commit 50f0eae1a411764cd6d1e85b3ce471438acd3c1c diff --git a/docker/packager/binary/Dockerfile b/docker/packager/binary/Dockerfile index 2954cd574d0..06c3c0d80f0 100644 --- a/docker/packager/binary/Dockerfile +++ b/docker/packager/binary/Dockerfile @@ -73,7 +73,7 @@ RUN apt-get install binutils-riscv64-linux-gnu # Architecture of the image when BuildKit/buildx is used ARG TARGETARCH -ARG NFPM_VERSION=2.18.1 +ARG NFPM_VERSION=2.20.0 RUN arch=${TARGETARCH:-amd64} \ && curl -Lo /tmp/nfpm.deb "https://github.com/goreleaser/nfpm/releases/download/v${NFPM_VERSION}/nfpm_${arch}.deb" \ @@ -91,6 +91,9 @@ ENV PATH="$PATH:/usr/local/go/bin" ENV GOPATH=/workdir/go ENV GOCACHE=/workdir/ +RUN curl https://raw.githubusercontent.com/matus-chochlik/ctcache/7fd516e91c17779cbc6fc18bd119313d9532dd90/clang-tidy-cache -Lo /usr/bin/clang-tidy-cache \ + && chmod +x /usr/bin/clang-tidy-cache + RUN mkdir /workdir && chmod 777 /workdir WORKDIR /workdir diff --git a/docker/packager/packager b/docker/packager/packager index b4aa4ebdd91..7f6bd8818fb 100755 --- a/docker/packager/packager +++ b/docker/packager/packager @@ -208,6 +208,7 @@ def parse_env_variables( cxx = cc.replace("gcc", "g++").replace("clang", "clang++") if package_type == "deb": + # NOTE: This are the env for packages/build script result.append("MAKE_DEB=true") cmake_flags.append("-DENABLE_TESTS=0") cmake_flags.append("-DENABLE_UTILS=0") @@ -257,6 +258,10 @@ def parse_env_variables( if clang_tidy: # 15G is not enough for tidy build cache_maxsize = "25G" + + # `CTCACHE_DIR` has the same purpose as the `CCACHE_DIR` above. + # It's there to have the clang-tidy cache embedded into our standard `CCACHE_DIR` + result.append("CTCACHE_DIR=/ccache/clang-tidy-cache") result.append(f"CCACHE_MAXSIZE={cache_maxsize}") if distcc_hosts: @@ -268,6 +273,7 @@ def parse_env_variables( result.append('DISTCC_HOSTS="localhost/`nproc`"') if additional_pkgs: + # NOTE: This are the env for packages/build script result.append("MAKE_APK=true") result.append("MAKE_RPM=true") result.append("MAKE_TGZ=true") @@ -280,9 +286,7 @@ def parse_env_variables( cmake_flags.append("-DENABLE_TESTS=1") if shared_libraries: - cmake_flags.append( - "-DUSE_STATIC_LIBRARIES=0 -DSPLIT_SHARED_LIBRARIES=1" - ) + cmake_flags.append("-DUSE_STATIC_LIBRARIES=0 -DSPLIT_SHARED_LIBRARIES=1") # We can't always build utils because it requires too much space, but # we have to build them at least in some way in CI. The shared library # build is probably the least heavy disk-wise. diff --git a/docker/server/Dockerfile.alpine b/docker/server/Dockerfile.alpine index 5b597f927a2..8f1cf6ee98b 100644 --- a/docker/server/Dockerfile.alpine +++ b/docker/server/Dockerfile.alpine @@ -33,7 +33,7 @@ RUN arch=${TARGETARCH:-amd64} \ # lts / testing / prestable / etc ARG REPO_CHANNEL="stable" ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}" -ARG VERSION="22.9.3.18" +ARG VERSION="22.10.2.11" ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static" # user/group precreated explicitly with fixed uid/gid on purpose. diff --git a/docker/server/Dockerfile.ubuntu b/docker/server/Dockerfile.ubuntu index c6254b898ed..d5fc5d8e0d3 100644 --- a/docker/server/Dockerfile.ubuntu +++ b/docker/server/Dockerfile.ubuntu @@ -21,7 +21,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list ARG REPO_CHANNEL="stable" ARG REPOSITORY="deb https://packages.clickhouse.com/deb ${REPO_CHANNEL} main" -ARG VERSION="22.9.3.18" +ARG VERSION="22.10.2.11" ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static" # set non-empty deb_location_url url to create a docker image diff --git a/docker/test/codebrowser/Dockerfile b/docker/test/codebrowser/Dockerfile index ceed93c3ac7..b76b8234c81 100644 --- a/docker/test/codebrowser/Dockerfile +++ b/docker/test/codebrowser/Dockerfile @@ -36,10 +36,7 @@ RUN arch=${TARGETARCH:-amd64} \ # repo versions doesn't work correctly with C++17 # also we push reports to s3, so we add index.html to subfolder urls # https://github.com/ClickHouse-Extras/woboq_codebrowser/commit/37e15eaf377b920acb0b48dbe82471be9203f76b -# TODO: remove branch in a few weeks after merge, e.g. in May or June 2022 -# -# FIXME: update location of a repo -RUN git clone https://github.com/azat/woboq_codebrowser --branch llvm-15 \ +RUN git clone https://github.com/ClickHouse/woboq_codebrowser \ && cd woboq_codebrowser \ && cmake . -G Ninja -DCMAKE_BUILD_TYPE=Release -DCMAKE_CXX_COMPILER=clang\+\+-${LLVM_VERSION} -DCMAKE_C_COMPILER=clang-${LLVM_VERSION} \ && ninja \ diff --git a/docker/test/fasttest/run.sh b/docker/test/fasttest/run.sh index 9d6cf22c817..de9125d565b 100755 --- a/docker/test/fasttest/run.sh +++ b/docker/test/fasttest/run.sh @@ -136,6 +136,7 @@ function clone_submodules contrib/wyhash contrib/hashidsxx contrib/c-ares + contrib/morton-nd ) git submodule sync diff --git a/docker/test/integration/base/Dockerfile b/docker/test/integration/base/Dockerfile index 9b6318a5426..a2d86187a23 100644 --- a/docker/test/integration/base/Dockerfile +++ b/docker/test/integration/base/Dockerfile @@ -27,9 +27,14 @@ RUN apt-get update \ tar \ tzdata \ unixodbc \ + python3-pip \ + libcurl4-openssl-dev \ + libssl-dev \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/* +RUN pip3 install pycurl + # Architecture of the image when BuildKit/buildx is used ARG TARGETARCH diff --git a/docker/test/sqlancer/Dockerfile b/docker/test/sqlancer/Dockerfile index 0821d516e23..2ebc61e35a9 100644 --- a/docker/test/sqlancer/Dockerfile +++ b/docker/test/sqlancer/Dockerfile @@ -1,5 +1,5 @@ # docker build -t clickhouse/sqlancer-test . -FROM ubuntu:20.04 +FROM ubuntu:22.04 # ARG for quick switch to a given ubuntu mirror ARG apt_archive="http://archive.ubuntu.com" diff --git a/docker/test/sqlancer/process_sqlancer_result.py b/docker/test/sqlancer/process_sqlancer_result.py index 37b8f465498..3bed4578565 100755 --- a/docker/test/sqlancer/process_sqlancer_result.py +++ b/docker/test/sqlancer/process_sqlancer_result.py @@ -11,13 +11,15 @@ def process_result(result_folder): summary = [] paths = [] tests = [ - "TLPWhere", + "TLPAggregate", + "TLPDistinct", "TLPGroupBy", "TLPHaving", + "TLPWhere", "TLPWhereGroupBy", - "TLPDistinct", - "TLPAggregate", + "NoREC", ] + failed_tests = [] for test in tests: err_path = "{}/{}.err".format(result_folder, test) @@ -33,15 +35,11 @@ def process_result(result_folder): with open(err_path, "r") as f: if "AssertionError" in f.read(): summary.append((test, "FAIL")) + failed_tests.append(test) status = "failure" else: summary.append((test, "OK")) - logs_path = "{}/logs.tar.gz".format(result_folder) - if not os.path.exists(logs_path): - logging.info("No logs tar on path %s", logs_path) - else: - paths.append(logs_path) stdout_path = "{}/stdout.log".format(result_folder) if not os.path.exists(stdout_path): logging.info("No stdout log on path %s", stdout_path) @@ -53,18 +51,23 @@ def process_result(result_folder): else: paths.append(stderr_path) - description = "SQLancer test run. See report" + description = "SQLancer run successfully" + if status == "failure": + description = f"Failed oracles: {failed_tests}" return status, description, summary, paths -def write_results(results_file, status_file, results, status): +def write_results( + results_file, status_file, description_file, results, status, description +): with open(results_file, "w") as f: out = csv.writer(f, delimiter="\t") out.writerows(results) with open(status_file, "w") as f: - out = csv.writer(f, delimiter="\t") - out.writerow(status) + f.write(status + "\n") + with open(description_file, "w") as f: + f.write(description + "\n") if __name__ == "__main__": @@ -72,13 +75,20 @@ if __name__ == "__main__": parser = argparse.ArgumentParser( description="ClickHouse script for parsing results of sqlancer test" ) - parser.add_argument("--in-results-dir", default="/test_output/") - parser.add_argument("--out-results-file", default="/test_output/test_results.tsv") - parser.add_argument("--out-status-file", default="/test_output/check_status.tsv") + parser.add_argument("--in-results-dir", default="/workspace/") + parser.add_argument("--out-results-file", default="/workspace/summary.tsv") + parser.add_argument("--out-description-file", default="/workspace/description.txt") + parser.add_argument("--out-status-file", default="/workspace/status.txt") args = parser.parse_args() - state, description, test_results, logs = process_result(args.in_results_dir) + status, description, summary, logs = process_result(args.in_results_dir) logging.info("Result parsed") - status = (state, description) - write_results(args.out_results_file, args.out_status_file, test_results, status) + write_results( + args.out_results_file, + args.out_status_file, + args.out_description_file, + summary, + status, + description, + ) logging.info("Result written") diff --git a/docker/test/sqlancer/run.sh b/docker/test/sqlancer/run.sh index a1891569d34..4a0f0f6a512 100755 --- a/docker/test/sqlancer/run.sh +++ b/docker/test/sqlancer/run.sh @@ -1,33 +1,62 @@ #!/bin/bash +set -exu +trap "exit" INT TERM -set -e -x +function wget_with_retry +{ + for _ in 1 2 3 4; do + if wget -nv -nd -c "$1";then + return 0 + else + sleep 0.5 + fi + done + return 1 +} -dpkg -i package_folder/clickhouse-common-static_*.deb -dpkg -i package_folder/clickhouse-common-static-dbg_*.deb -dpkg -i package_folder/clickhouse-server_*.deb -dpkg -i package_folder/clickhouse-client_*.deb +if [ -z ${BINARY_URL_TO_DOWNLOAD+x} ] +then + echo "No BINARY_URL_TO_DOWNLOAD provided." +else + wget_with_retry "$BINARY_URL_TO_DOWNLOAD" + chmod +x /clickhouse +fi -service clickhouse-server start && sleep 5 +if [[ -f "/clickhouse" ]]; then + echo "/clickhouse exists" +else + exit 1 +fi + +cd /workspace +/clickhouse server -P /workspace/clickhouse-server.pid -L /workspace/clickhouse-server.log -E /workspace/clickhouse-server.log.err --daemon + +for _ in $(seq 1 60); do if [[ $(wget -q 'localhost:8123' -O-) == 'Ok.' ]]; then break ; else sleep 1; fi ; done cd /sqlancer/sqlancer-master -export TIMEOUT=300 -export NUM_QUERIES=1000 +TIMEOUT=300 +NUM_QUERIES=1000 +NUM_THREADS=10 +TESTS=( "TLPGroupBy" "TLPHaving" "TLPWhere" "TLPDistinct" "TLPAggregate" "NoREC" ) +echo "${TESTS[@]}" -( java -jar target/sqlancer-*.jar --num-threads 10 --timeout-seconds $TIMEOUT --num-queries $NUM_QUERIES --username default --password "" clickhouse --oracle TLPWhere | tee /test_output/TLPWhere.out ) 3>&1 1>&2 2>&3 | tee /test_output/TLPWhere.err -( java -jar target/sqlancer-*.jar --num-threads 10 --timeout-seconds $TIMEOUT --num-queries $NUM_QUERIES --username default --password "" clickhouse --oracle TLPGroupBy | tee /test_output/TLPGroupBy.out ) 3>&1 1>&2 2>&3 | tee /test_output/TLPGroupBy.err -( java -jar target/sqlancer-*.jar --num-threads 10 --timeout-seconds $TIMEOUT --num-queries $NUM_QUERIES --username default --password "" clickhouse --oracle TLPHaving | tee /test_output/TLPHaving.out ) 3>&1 1>&2 2>&3 | tee /test_output/TLPHaving.err -( java -jar target/sqlancer-*.jar --num-threads 10 --timeout-seconds $TIMEOUT --num-queries $NUM_QUERIES --username default --password "" clickhouse --oracle TLPWhere --oracle TLPGroupBy | tee /test_output/TLPWhereGroupBy.out ) 3>&1 1>&2 2>&3 | tee /test_output/TLPWhereGroupBy.err -( java -jar target/sqlancer-*.jar --num-threads 10 --timeout-seconds $TIMEOUT --num-queries $NUM_QUERIES --username default --password "" clickhouse --oracle TLPDistinct | tee /test_output/TLPDistinct.out ) 3>&1 1>&2 2>&3 | tee /test_output/TLPDistinct.err -( java -jar target/sqlancer-*.jar --num-threads 10 --timeout-seconds $TIMEOUT --num-queries $NUM_QUERIES --username default --password "" clickhouse --oracle TLPAggregate | tee /test_output/TLPAggregate.out ) 3>&1 1>&2 2>&3 | tee /test_output/TLPAggregate.err +for TEST in "${TESTS[@]}"; do + echo "$TEST" + if [[ $(wget -q 'localhost:8123' -O-) == 'Ok.' ]] + then + echo "Server is OK" + ( java -jar target/sqlancer-*.jar --log-each-select true --print-failed false --num-threads "$NUM_THREADS" --timeout-seconds "$TIMEOUT" --num-queries "$NUM_QUERIES" --username default --password "" clickhouse --oracle "$TEST" | tee "/workspace/$TEST.out" ) 3>&1 1>&2 2>&3 | tee "/workspace/$TEST.err" + else + touch "/workspace/$TEST.err" "/workspace/$TEST.out" + echo "Server is not responding" | tee /workspace/server_crashed.log + fi +done -service clickhouse stop +ls /workspace +pkill -F /workspace/clickhouse-server.pid || true -ls /var/log/clickhouse-server/ -tar czf /test_output/logs.tar.gz -C /var/log/clickhouse-server/ . -tail -n 1000 /var/log/clickhouse-server/stderr.log > /test_output/stderr.log -tail -n 1000 /var/log/clickhouse-server/stdout.log > /test_output/stdout.log -tail -n 1000 /var/log/clickhouse-server/clickhouse-server.log > /test_output/clickhouse-server.log +for _ in $(seq 1 60); do if [[ $(wget -q 'localhost:8123' -O-) == 'Ok.' ]]; then sleep 1 ; else break; fi ; done -/process_sqlancer_result.py || echo -e "failure\tCannot parse results" > /test_output/check_status.tsv -ls /test_output +/process_sqlancer_result.py || echo -e "failure\tCannot parse results" > /workspace/check_status.tsv +ls /workspace diff --git a/docker/test/stress/run.sh b/docker/test/stress/run.sh old mode 100755 new mode 100644 index 6b9954c2431..7058853b43e --- a/docker/test/stress/run.sh +++ b/docker/test/stress/run.sh @@ -47,7 +47,6 @@ function install_packages() function configure() { - export ZOOKEEPER_FAULT_INJECTION=1 # install test configs export USE_DATABASE_ORDINARY=1 export EXPORT_S3_STORAGE_POLICIES=1 @@ -203,6 +202,7 @@ quit install_packages package_folder +export ZOOKEEPER_FAULT_INJECTION=1 configure azurite-blob --blobHost 0.0.0.0 --blobPort 10000 --debug /azurite_log & @@ -243,6 +243,7 @@ stop # Let's enable S3 storage by default export USE_S3_STORAGE_FOR_MERGE_TREE=1 +export ZOOKEEPER_FAULT_INJECTION=1 configure # But we still need default disk because some tables loaded only into it @@ -270,10 +271,6 @@ clickhouse-client --query "SELECT 'Server successfully started', 'OK'" >> /test_ || (echo -e 'Server failed to start (see application_errors.txt and clickhouse-server.clean.log)\tFAIL' >> /test_output/test_results.tsv \ && grep -a ".*Application" /var/log/clickhouse-server/clickhouse-server.log > /test_output/application_errors.txt) -echo "Get previous release tag" -previous_release_tag=$(clickhouse-client --query="SELECT version()" | get_previous_release_tag) -echo $previous_release_tag - stop [ -f /var/log/clickhouse-server/clickhouse-server.log ] || echo -e "Server log does not exist\tFAIL" @@ -331,6 +328,10 @@ zgrep -Fa " received signal " /test_output/gdb.log > /dev/null \ echo -e "Backward compatibility check\n" +echo "Get previous release tag" +previous_release_tag=$(clickhouse-client --version | grep -o "[0-9]*\.[0-9]*\.[0-9]*\.[0-9]*" | get_previous_release_tag) +echo $previous_release_tag + echo "Clone previous release repository" git clone https://github.com/ClickHouse/ClickHouse.git --no-tags --progress --branch=$previous_release_tag --no-recurse-submodules --depth=1 previous_release_repository @@ -375,6 +376,8 @@ else install_packages previous_release_package_folder # Start server from previous release + # Previous version may not be ready for fault injections + export ZOOKEEPER_FAULT_INJECTION=0 configure # Avoid "Setting s3_check_objects_after_upload is neither a builtin setting..." @@ -389,12 +392,23 @@ else clickhouse-client --query="SELECT 'Server version: ', version()" - # Install new package before running stress test because we should use new clickhouse-client and new clickhouse-test - # But we should leave old binary in /usr/bin/ for gdb (so it will print sane stacktarces) + # Install new package before running stress test because we should use new + # clickhouse-client and new clickhouse-test. + # + # But we should leave old binary in /usr/bin/ and debug symbols in + # /usr/lib/debug/usr/bin (if any) for gdb and internal DWARF parser, so it + # will print sane stacktraces and also to avoid possible crashes. + # + # FIXME: those files can be extracted directly from debian package, but + # actually better solution will be to use different PATH instead of playing + # games with files from packages. mv /usr/bin/clickhouse previous_release_package_folder/ + mv /usr/lib/debug/usr/bin/clickhouse.debug previous_release_package_folder/ install_packages package_folder mv /usr/bin/clickhouse package_folder/ + mv /usr/lib/debug/usr/bin/clickhouse.debug package_folder/ mv previous_release_package_folder/clickhouse /usr/bin/ + mv previous_release_package_folder/clickhouse.debug /usr/lib/debug/usr/bin/clickhouse.debug mkdir tmp_stress_output @@ -410,6 +424,8 @@ else # Start new server mv package_folder/clickhouse /usr/bin/ + mv package_folder/clickhouse.debug /usr/lib/debug/usr/bin/clickhouse.debug + export ZOOKEEPER_FAULT_INJECTION=1 configure start 500 clickhouse-client --query "SELECT 'Backward compatibility check: Server successfully started', 'OK'" >> /test_output/test_results.tsv \ @@ -464,6 +480,7 @@ else -e "[Queue = DB::MergeMutateRuntimeQueue]: Code: 235. DB::Exception: Part" \ -e "The set of parts restored in place of" \ -e "(ReplicatedMergeTreeAttachThread): Initialization failed. Error" \ + -e "Code: 269. DB::Exception: Destination table is myself" \ /var/log/clickhouse-server/clickhouse-server.backward.clean.log | zgrep -Fa "" > /test_output/bc_check_error_messages.txt \ && echo -e 'Backward compatibility check: Error message in clickhouse-server.log (see bc_check_error_messages.txt)\tFAIL' >> /test_output/test_results.tsv \ || echo -e 'Backward compatibility check: No Error messages in clickhouse-server.log\tOK' >> /test_output/test_results.tsv diff --git a/docker/test/stress/stress b/docker/test/stress/stress index 7f3f38bd8f5..a0ec86f7fbe 100755 --- a/docker/test/stress/stress +++ b/docker/test/stress/stress @@ -286,9 +286,7 @@ if __name__ == "__main__": # But right now it should work, since neither hung check, nor 00001_select_1 has GROUP BY. "--client-option", "max_untracked_memory=1Gi", - "--client-option", "max_memory_usage_for_user=0", - "--client-option", "memory_profiler_step=1Gi", # Use system database to avoid CREATE/DROP DATABASE queries "--database=system", diff --git a/docker/test/style/Dockerfile b/docker/test/style/Dockerfile index 683124feaa0..cb8c914e53d 100644 --- a/docker/test/style/Dockerfile +++ b/docker/test/style/Dockerfile @@ -1,7 +1,7 @@ # docker build -t clickhouse/style-test . FROM ubuntu:20.04 -ARG ACT_VERSION=0.2.25 -ARG ACTIONLINT_VERSION=1.6.8 +ARG ACT_VERSION=0.2.33 +ARG ACTIONLINT_VERSION=1.6.22 # ARG for quick switch to a given ubuntu mirror ARG apt_archive="http://archive.ubuntu.com" diff --git a/docker/test/util/Dockerfile b/docker/test/util/Dockerfile index 57880bfc1d6..57544bdc090 100644 --- a/docker/test/util/Dockerfile +++ b/docker/test/util/Dockerfile @@ -5,6 +5,7 @@ FROM ubuntu:20.04 ARG apt_archive="http://archive.ubuntu.com" RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list +# 15.0.2 ENV DEBIAN_FRONTEND=noninteractive LLVM_VERSION=15 RUN apt-get update \ @@ -58,6 +59,9 @@ RUN apt-get update \ RUN ln -s /usr/bin/lld-${LLVM_VERSION} /usr/bin/ld.lld # for external_symbolizer_path RUN ln -s /usr/bin/llvm-symbolizer-${LLVM_VERSION} /usr/bin/llvm-symbolizer +# FIXME: workaround for "The imported target "merge-fdata" references the file" error +# https://salsa.debian.org/pkg-llvm-team/llvm-toolchain/-/commit/992e52c0b156a5ba9c6a8a54f8c4857ddd3d371d +RUN sed -i '/_IMPORT_CHECK_FILES_FOR_\(mlir-\|llvm-bolt\|merge-fdata\|MLIR\)/ {s|^|#|}' /usr/lib/llvm-${LLVM_VERSION}/lib/cmake/llvm/LLVMExports-*.cmake ARG CCACHE_VERSION=4.6.1 RUN mkdir /tmp/ccache \ diff --git a/docs/README.md b/docs/README.md index fa8b6bed85c..3ca87dc03c3 100644 --- a/docs/README.md +++ b/docs/README.md @@ -212,4 +212,4 @@ Templates: ## How to Build Documentation -You can build your documentation manually by following the instructions in [docs/tools/README.md](../docs/tools/README.md). Also, our CI runs the documentation build after the `documentation` label is added to PR. You can see the results of a build in the GitHub interface. If you have no permissions to add labels, a reviewer of your PR will add it. +You can build your documentation manually by following the instructions in the docs repo [contrib-writing-guide](https://github.com/ClickHouse/clickhouse-docs/blob/main/contrib-writing-guide.md). Also, our CI runs the documentation build after the `documentation` label is added to PR. You can see the results of a build in the GitHub interface. If you have no permissions to add labels, a reviewer of your PR will add it. diff --git a/docs/changelogs/v22.10.1.1877-stable.md b/docs/changelogs/v22.10.1.1877-stable.md new file mode 100644 index 00000000000..77e540ce928 --- /dev/null +++ b/docs/changelogs/v22.10.1.1877-stable.md @@ -0,0 +1,352 @@ +--- +sidebar_position: 1 +sidebar_label: 2022 +--- + +# 2022 Changelog + +### ClickHouse release v22.10.1.1877-stable (98ab5a3c189) FIXME as compared to v22.9.1.2603-stable (3030d4c7ff0) + +#### Backward Incompatible Change +* Rename cache commands: `show caches` -> `show filesystem caches`, `describe cache` -> `describe filesystem cache`. [#41508](https://github.com/ClickHouse/ClickHouse/pull/41508) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Remove support for the `WITH TIMEOUT` section for `LIVE VIEW`. This closes [#40557](https://github.com/ClickHouse/ClickHouse/issues/40557). [#42173](https://github.com/ClickHouse/ClickHouse/pull/42173) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### New Feature +* Add Rust code support into ClickHouse with BLAKE3 hash-function library as an example. [#33435](https://github.com/ClickHouse/ClickHouse/pull/33435) ([BoloniniD](https://github.com/BoloniniD)). +* This is the initial implement of Kusto Query Language. (MVP). [#37961](https://github.com/ClickHouse/ClickHouse/pull/37961) ([Yong Wang](https://github.com/kashwy)). +* * Support limiting of temporary data stored on disk using settings `max_temporary_data_on_disk_size_for_user`/`max_temporary_data_on_disk_size_for_query` . [#40893](https://github.com/ClickHouse/ClickHouse/pull/40893) ([Vladimir C](https://github.com/vdimir)). +* Support Java integers hashing in `javaHash`. [#41131](https://github.com/ClickHouse/ClickHouse/pull/41131) ([JackyWoo](https://github.com/JackyWoo)). +* This PR is to support the OpenSSL in-house build like the BoringSSL submodule. Build flag i.e. ENABLE_CH_BUNDLE_BORINGSSL is used to choose between BoringSSL and OpenSSL. By default, the BoringSSL in-house build will be used. [#41142](https://github.com/ClickHouse/ClickHouse/pull/41142) ([MeenaRenganathan22](https://github.com/MeenaRenganathan22)). +* Composable protocol configuration is added. [#41198](https://github.com/ClickHouse/ClickHouse/pull/41198) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Add OpenTelemetry support to ON CLUSTER DDL(require `distributed_ddl_entry_format_version` to be set to 4). [#41484](https://github.com/ClickHouse/ClickHouse/pull/41484) ([Frank Chen](https://github.com/FrankChen021)). +* Add setting `format_json_object_each_row_column_for_object_name` to write/parse object name as column value in JSONObjectEachRow format. [#41703](https://github.com/ClickHouse/ClickHouse/pull/41703) ([Kruglov Pavel](https://github.com/Avogar)). +* adds Morton Coding (ZCurve) encode/decode functions. [#41753](https://github.com/ClickHouse/ClickHouse/pull/41753) ([Constantine Peresypkin](https://github.com/pkit)). +* Implement support for different UUID binary formats with support for the two most prevalent ones: the default big-endian and Microsoft's mixed-endian as specified in [RFC 4122](https://datatracker.ietf.org/doc/html/rfc4122#section-4.1.1). [#42108](https://github.com/ClickHouse/ClickHouse/pull/42108) ([ltrk2](https://github.com/ltrk2)). +* Added an aggregate function `analysisOfVariance` (`anova`) to perform a statistical test over several groups of normally distributed observations to find out whether all groups have the same mean or not. Original PR [#37872](https://github.com/ClickHouse/ClickHouse/issues/37872). [#42131](https://github.com/ClickHouse/ClickHouse/pull/42131) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Add support for `SET setting_name = DEFAULT`. [#42187](https://github.com/ClickHouse/ClickHouse/pull/42187) ([Filatenkov Artur](https://github.com/FArthur-cmd)). +* * Add `URL` Functions which conform rfc. Functions include: `cutToFirstSignificantSubdomainCustomRFC`, `cutToFirstSignificantSubdomainCustomWithWWWRFC`, `cutToFirstSignificantSubdomainRFC`, `cutToFirstSignificantSubdomainWithWWWRFC`, `domainRFC`, `domainWithoutWWWRFC`, `firstSignificantSubdomainCustomRFC`, `firstSignificantSubdomainRFC`, `portRFC`, `topLevelDomainRFC`. [#42274](https://github.com/ClickHouse/ClickHouse/pull/42274) ([Quanfa Fu](https://github.com/dentiscalprum)). +* Added functions (`randUniform`, `randNormal`, `randLogNormal`, `randExponential`, `randChiSquared`, `randStudentT`, `randFisherF`, `randBernoulli`, `randBinomial`, `randNegativeBinomial`, `randPoisson` ) to generate random values according to the specified distributions. This closes [#21834](https://github.com/ClickHouse/ClickHouse/issues/21834). [#42411](https://github.com/ClickHouse/ClickHouse/pull/42411) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). + +#### Performance Improvement +* Implement operator precedence element parser to resolve stack overflow issues and make the required stack size smaller. [#34892](https://github.com/ClickHouse/ClickHouse/pull/34892) ([Nikolay Degterinsky](https://github.com/evillique)). +* DISTINCT in order optimization leverage sorting properties of data streams. This improvement will enable reading in order for DISTINCT if applicable (before it was necessary to provide ORDER BY for columns in DISTINCT). [#41014](https://github.com/ClickHouse/ClickHouse/pull/41014) ([Igor Nikonov](https://github.com/devcrafter)). +* ColumnVector: optimize UInt8 index with AVX512VBMI. [#41247](https://github.com/ClickHouse/ClickHouse/pull/41247) ([Guo Wangyang](https://github.com/guowangy)). +* The performance experiments of **SSB** (Star Schema Benchmark) on the ICX device (Intel Xeon Platinum 8380 CPU, 80 cores, 160 threads) shows that this change could bring a **2.95x** improvement of the geomean of all subcases' QPS. [#41675](https://github.com/ClickHouse/ClickHouse/pull/41675) ([Zhiguo Zhou](https://github.com/ZhiguoZh)). +* Fixed slowness in JSONExtract with LowCardinality(String) tuples. [#41726](https://github.com/ClickHouse/ClickHouse/pull/41726) ([AlfVII](https://github.com/AlfVII)). +* Add ldapr capabilities to AArch64 builds. This is supported from Graviton 2+, Azure and GCP instances. Only appeared in clang-15 [not so long ago](https://github.com/llvm/llvm-project/commit/9609b5daffe9fd28d83d83da895abc5113f76c24). [#41778](https://github.com/ClickHouse/ClickHouse/pull/41778) ([Daniel Kutenin](https://github.com/danlark1)). +* Improve performance when comparing strings and one argument is empty constant string. [#41870](https://github.com/ClickHouse/ClickHouse/pull/41870) ([Jiebin Sun](https://github.com/jiebinn)). +* optimize insertFrom of ColumnAggregateFunction to share Aggregate State in some cases. [#41960](https://github.com/ClickHouse/ClickHouse/pull/41960) ([flynn](https://github.com/ucasfl)). +* Relax the "Too many parts" threshold. This closes [#6551](https://github.com/ClickHouse/ClickHouse/issues/6551). Now ClickHouse will allow more parts in a partition if the average part size is large enough (at least 10 GiB). This allows to have up to petabytes of data in a single partition of a single table on a single server, which is possible using disk shelves or object storage. [#42002](https://github.com/ClickHouse/ClickHouse/pull/42002) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Make writing to AzureBlobStorage more efficient (respect `max_single_part_upload_size` instead of writing a block per each buffer size). Inefficiency mentioned in [#41754](https://github.com/ClickHouse/ClickHouse/issues/41754). [#42041](https://github.com/ClickHouse/ClickHouse/pull/42041) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Make thread ids in the process list and query_log unique to avoid waste. [#42180](https://github.com/ClickHouse/ClickHouse/pull/42180) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### Improvement +* Added new infrastructure for query analysis and planning under `allow_experimental_analyzer` setting. [#31796](https://github.com/ClickHouse/ClickHouse/pull/31796) ([Maksim Kita](https://github.com/kitaisreal)). +* * Support expression `(EXPLAIN SELECT ...)` in a subquery. Queries like `SELECT * FROM (EXPLAIN PIPELINE SELECT col FROM TABLE ORDER BY col)` became valid. [#40630](https://github.com/ClickHouse/ClickHouse/pull/40630) ([Vladimir C](https://github.com/vdimir)). +* Currently changing `async_insert_max_data_size` or `async_insert_busy_timeout_ms` in scope of query makes no sense and this leads to bad user experience. E.g. user wants to insert data rarely and he doesn't have an access to server config to tune default settings. [#40668](https://github.com/ClickHouse/ClickHouse/pull/40668) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Embedded Keeper will always start in the background allowing ClickHouse to start without achieving quorum. [#40991](https://github.com/ClickHouse/ClickHouse/pull/40991) ([Antonio Andelic](https://github.com/antonio2368)). +* Improvements for reading from remote filesystems, made threadpool size for reads/writes configurable. Closes [#41070](https://github.com/ClickHouse/ClickHouse/issues/41070). [#41011](https://github.com/ClickHouse/ClickHouse/pull/41011) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Made reestablishing a new connection more reactive in case of expiration of the previous one. Previously there was a task which spawns every minute by default and thus a table could be in readonly state for about this time. [#41092](https://github.com/ClickHouse/ClickHouse/pull/41092) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Support all combinators combination in WindowTransform/arratReduce*/initializeAggregation/aggregate functions versioning. Previously combinators like `ForEach/Resample/Map` didn't work in these places, using them led to exception like`State function ... inserts results into non-state column`. [#41107](https://github.com/ClickHouse/ClickHouse/pull/41107) ([Kruglov Pavel](https://github.com/Avogar)). +* Now projections can be used with zero copy replication. [#41147](https://github.com/ClickHouse/ClickHouse/pull/41147) ([alesapin](https://github.com/alesapin)). +* - Add function tryDecrypt that returns NULL when decrypt fail (e.g. decrypt with incorrect key) instead of throwing exception. [#41206](https://github.com/ClickHouse/ClickHouse/pull/41206) ([Duc Canh Le](https://github.com/canhld94)). +* Add the `unreserved_space` column to the `system.disks` table to check how much space is not taken by reservations per disk. [#41254](https://github.com/ClickHouse/ClickHouse/pull/41254) ([filimonov](https://github.com/filimonov)). +* Support s3 authorisation headers from ast arguments. [#41261](https://github.com/ClickHouse/ClickHouse/pull/41261) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Add setting 'allow_implicit_no_password' that forbids creating a user with no password unless 'IDENTIFIED WITH no_password' is explicitly specified. [#41341](https://github.com/ClickHouse/ClickHouse/pull/41341) ([Nikolay Degterinsky](https://github.com/evillique)). +* keeper-improvement: add support for uploading snapshots to S3. S3 information can be defined inside `keeper_server.s3_snapshot`. [#41342](https://github.com/ClickHouse/ClickHouse/pull/41342) ([Antonio Andelic](https://github.com/antonio2368)). +* Add support for MultiRead in Keeper and internal ZooKeeper client. [#41410](https://github.com/ClickHouse/ClickHouse/pull/41410) ([Antonio Andelic](https://github.com/antonio2368)). +* add a support for decimal type comparing with floating point literal in IN operator. [#41544](https://github.com/ClickHouse/ClickHouse/pull/41544) ([liang.huang](https://github.com/lhuang09287750)). +* Allow readable size values in cache config. [#41688](https://github.com/ClickHouse/ClickHouse/pull/41688) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Check file path for path traversal attacks in errors logger for input formats. [#41694](https://github.com/ClickHouse/ClickHouse/pull/41694) ([Kruglov Pavel](https://github.com/Avogar)). +* ClickHouse could cache stale DNS entries for some period of time (15 seconds by default) until the cache won't be updated asynchronously. During these period ClickHouse can nevertheless try to establish a connection and produce errors. This behaviour is fixed. [#41707](https://github.com/ClickHouse/ClickHouse/pull/41707) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Add interactive history search with fzf-like utility (fzf/sk) for `clickhouse-client`/`clickhouse-local` (note you can use `FZF_DEFAULT_OPTS`/`SKIM_DEFAULT_OPTIONS` to additionally configure the behavior). [#41730](https://github.com/ClickHouse/ClickHouse/pull/41730) ([Azat Khuzhin](https://github.com/azat)). +* For client when connecting to a secure server with invalid certificate only allow to proceed with '--accept-certificate' flag. [#41743](https://github.com/ClickHouse/ClickHouse/pull/41743) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Add function "tryBase58Decode()", similar to the existing function "tryBase64Decode()". [#41824](https://github.com/ClickHouse/ClickHouse/pull/41824) ([Robert Schulze](https://github.com/rschu1ze)). +* Improve feedback when replacing partition with different primary key. Fixes [#34798](https://github.com/ClickHouse/ClickHouse/issues/34798). [#41838](https://github.com/ClickHouse/ClickHouse/pull/41838) ([Salvatore](https://github.com/tbsal)). +* Replace back `clickhouse su` command with `sudo -u` in start in order to respect limits in `/etc/security/limits.conf`. [#41847](https://github.com/ClickHouse/ClickHouse/pull/41847) ([Eugene Konkov](https://github.com/ekonkov)). +* Fix parallel parsing: segmentator now checks max_block_size. [#41852](https://github.com/ClickHouse/ClickHouse/pull/41852) ([Vitaly Baranov](https://github.com/vitlibar)). +* Don't report TABLE_IS_DROPPED exception in order to skip table in case is was just dropped. [#41908](https://github.com/ClickHouse/ClickHouse/pull/41908) ([AlfVII](https://github.com/AlfVII)). +* Improve option enable_extended_results_for_datetime_functions to return results of type DateTime64 for functions toStartOfDay, toStartOfHour, toStartOfFifteenMinutes, toStartOfTenMinutes, toStartOfFiveMinutes, toStartOfMinute and timeSlot. [#41910](https://github.com/ClickHouse/ClickHouse/pull/41910) ([Roman Vasin](https://github.com/rvasin)). +* Improve DateTime type inference for text formats. Now it respect setting `date_time_input_format` and doesn't try to infer datetimes from numbers as timestamps. Closes [#41389](https://github.com/ClickHouse/ClickHouse/issues/41389) Closes [#42206](https://github.com/ClickHouse/ClickHouse/issues/42206). [#41912](https://github.com/ClickHouse/ClickHouse/pull/41912) ([Kruglov Pavel](https://github.com/Avogar)). +* Remove confusing warning when inserting with `perform_ttl_move_on_insert`=false. [#41980](https://github.com/ClickHouse/ClickHouse/pull/41980) ([Vitaly Baranov](https://github.com/vitlibar)). +* Allow user to write `countState(*)` similar to `count(*)`. This closes [#9338](https://github.com/ClickHouse/ClickHouse/issues/9338). [#41983](https://github.com/ClickHouse/ClickHouse/pull/41983) ([Amos Bird](https://github.com/amosbird)). +* - Fix rankCorr size overflow. [#42020](https://github.com/ClickHouse/ClickHouse/pull/42020) ([Duc Canh Le](https://github.com/canhld94)). +* Added an option to specify an arbitrary string as an environment name in the Sentry's config for more handy reports. [#42037](https://github.com/ClickHouse/ClickHouse/pull/42037) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Added system table `asynchronous_insert_log `. It contains information about asynchronous inserts (including results of queries in fire-and-forget mode (with `wait_for_async_insert=0`)) for better introspection. [#42040](https://github.com/ClickHouse/ClickHouse/pull/42040) ([Anton Popov](https://github.com/CurtizJ)). +* Fix parsing out-of-range Date from CSV:. [#42044](https://github.com/ClickHouse/ClickHouse/pull/42044) ([Andrey Zvonov](https://github.com/zvonand)). +* parseDataTimeBestEffort support comma between date and time. Closes [#42038](https://github.com/ClickHouse/ClickHouse/issues/42038). [#42049](https://github.com/ClickHouse/ClickHouse/pull/42049) ([flynn](https://github.com/ucasfl)). +* Add support for methods lz4, bz2, snappy in 'Accept-Encoding'. [#42071](https://github.com/ClickHouse/ClickHouse/pull/42071) ([Nikolay Degterinsky](https://github.com/evillique)). +* Various minor fixes for BLAKE3 function. [#42073](https://github.com/ClickHouse/ClickHouse/pull/42073) ([BoloniniD](https://github.com/BoloniniD)). +* Improved stale replica recovery process for `ReplicatedMergeTree`. If lost replica have some parts which absent on a healthy replica, but these parts should appear in future according to replication queue of the healthy replica, then lost replica will keep such parts instead of detaching them. [#42134](https://github.com/ClickHouse/ClickHouse/pull/42134) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Support BACKUP to S3 with as-is path/data structure. [#42232](https://github.com/ClickHouse/ClickHouse/pull/42232) ([Azat Khuzhin](https://github.com/azat)). +* Add a possibility to use Date32 arguments for date_diff function. Fix issue in date_diff function when using DateTime64 arguments with start date before Unix epoch and end date after Unix epoch. [#42308](https://github.com/ClickHouse/ClickHouse/pull/42308) ([Roman Vasin](https://github.com/rvasin)). +* When uploading big parts to minio, 'Complete Multipart Upload' can take a long time. Minio sends heartbeats every 10 seconds (see https://github.com/minio/minio/pull/7198). But clickhouse times out earlier, because the default send/receive timeout is [set](https://github.com/ClickHouse/ClickHouse/blob/cc24fcd6d5dfb67f5f66f5483e986bd1010ad9cf/src/IO/S3/PocoHTTPClient.cpp#L123) to 5 seconds. [#42321](https://github.com/ClickHouse/ClickHouse/pull/42321) ([filimonov](https://github.com/filimonov)). +* Add `S3` as a new type of the destination of backups. [#42333](https://github.com/ClickHouse/ClickHouse/pull/42333) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix rarely invalid cast of aggregate state types with complex types such as Decimal. This fixes [#42408](https://github.com/ClickHouse/ClickHouse/issues/42408). [#42417](https://github.com/ClickHouse/ClickHouse/pull/42417) ([Amos Bird](https://github.com/amosbird)). +* Support skipping cache completely (both download to cache and reading cached data) in case the requested read range exceeds the threshold defined by cache setting `bypass_cache_threashold`, requires to be enabled with `enable_bypass_cache_with_threshold`). [#42418](https://github.com/ClickHouse/ClickHouse/pull/42418) ([Han Shukai](https://github.com/KinderRiven)). +* Merge parts if every part in the range is older than a certain threshold. The threshold can be set by using `min_age_to_force_merge_seconds`. This closes [#35836](https://github.com/ClickHouse/ClickHouse/issues/35836). [#42423](https://github.com/ClickHouse/ClickHouse/pull/42423) ([Antonio Andelic](https://github.com/antonio2368)). +* Enabled CompiledExpressionCache in clickhouse-local. [#42477](https://github.com/ClickHouse/ClickHouse/pull/42477) ([AlfVII](https://github.com/AlfVII)). +* Remove support for the `{database}` macro from the client's prompt. It was displayed incorrectly if the database was unspecified and it was not updated on `USE` statements. This closes [#25891](https://github.com/ClickHouse/ClickHouse/issues/25891). [#42508](https://github.com/ClickHouse/ClickHouse/pull/42508) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* - Improve the time to recover lost keeper connections. [#42541](https://github.com/ClickHouse/ClickHouse/pull/42541) ([Raúl Marín](https://github.com/Algunenano)). +* Allow to use Date32 arguments for dateName function. [#42554](https://github.com/ClickHouse/ClickHouse/pull/42554) ([Roman Vasin](https://github.com/rvasin)). + +#### Bug Fix +* Now filters with NULL literals will be used during index analysis. This closes https://github.com/ClickHouse/ClickHouse/pull/41814 [#34063](https://github.com/ClickHouse/ClickHouse/issues/34063). [#41842](https://github.com/ClickHouse/ClickHouse/pull/41842) ([Amos Bird](https://github.com/amosbird)). +* - Choose correct aggregation method for LowCardinality with BigInt. [#42342](https://github.com/ClickHouse/ClickHouse/pull/42342) ([Duc Canh Le](https://github.com/canhld94)). +* Fix using subqueries in row policy filters. This PR fixes [#32463](https://github.com/ClickHouse/ClickHouse/issues/32463). [#42562](https://github.com/ClickHouse/ClickHouse/pull/42562) ([Vitaly Baranov](https://github.com/vitlibar)). + +#### Build/Testing/Packaging Improvement +* Added support of WHERE clause generation to AST Fuzzer and possibility to add or remove ORDER BY and WHERE clause. [#38519](https://github.com/ClickHouse/ClickHouse/pull/38519) ([Ilya Yatsishin](https://github.com/qoega)). +* Aarch64 binaries now require at least ARMv8.2, released in 2016. Most notably, this enables use of ARM LSE, i.e. native atomic operations. Also, CMake build option "NO_ARMV81_OR_HIGHER" has been added to allow compilation of binaries for older ARMv8.0 hardware, e.g. Raspberry Pi 4. [#41610](https://github.com/ClickHouse/ClickHouse/pull/41610) ([Robert Schulze](https://github.com/rschu1ze)). +* After updating runners to 22.04 cgroups stopped to work in privileged mode, here's the issue https://github.com/moby/moby/issues/42275#issuecomment-1115055846. [#41857](https://github.com/ClickHouse/ClickHouse/pull/41857) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Allow building ClickHouse with Musl (small changes after it was already supported but broken). [#41987](https://github.com/ClickHouse/ClickHouse/pull/41987) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* - Add the `$CLICKHOUSE_CRONFILE` file checking to avoid running the `sed` command to get the file not found error. [#42081](https://github.com/ClickHouse/ClickHouse/pull/42081) ([Chun-Sheng, Li](https://github.com/peter279k)). +* Update cctz to the latest master, update tzdb to 2020e. [#42273](https://github.com/ClickHouse/ClickHouse/pull/42273) ([Dom Del Nano](https://github.com/ddelnano)). +* Update tzdata to 2022e to support the new timezone changes. Palestine transitions are now Saturdays at 02:00. Simplify three Ukraine zones into one. Jordan and Syria switch from +02/+03 with DST to year-round +03. (https://data.iana.org/time-zones/tzdb/NEWS). This closes [#42252](https://github.com/ClickHouse/ClickHouse/issues/42252). [#42327](https://github.com/ClickHouse/ClickHouse/pull/42327) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix power8 support. [#42462](https://github.com/ClickHouse/ClickHouse/pull/42462) ([Boris Kuschel](https://github.com/bkuschel)). + +#### Bug Fix (user-visible misbehavior in official stable or prestable release) + +* Several fixes for DiskWeb. [#41652](https://github.com/ClickHouse/ClickHouse/pull/41652) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fixes issue when docker run will fail if "https_port" is not present in config. [#41693](https://github.com/ClickHouse/ClickHouse/pull/41693) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Mutations were not cancelled properly on server shutdown or `SYSTEM STOP MERGES` query and cancellation might take long time, it's fixed. [#41699](https://github.com/ClickHouse/ClickHouse/pull/41699) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix wrong result of queries with `ORDER BY` or `GROUP BY` by columns from prefix of sorting key, wrapped into monotonic functions, with enable "read in order" optimization (settings `optimize_read_in_order` and `optimize_aggregation_in_order`). [#41701](https://github.com/ClickHouse/ClickHouse/pull/41701) ([Anton Popov](https://github.com/CurtizJ)). +* Fix possible crash in `SELECT` from `Merge` table with enabled `optimize_monotonous_functions_in_order_by` setting. Fixes [#41269](https://github.com/ClickHouse/ClickHouse/issues/41269). [#41740](https://github.com/ClickHouse/ClickHouse/pull/41740) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fixed "Part ... intersects part ..." error that might happen in extremely rare cases if replica was restarted just after detaching some part as broken. [#41741](https://github.com/ClickHouse/ClickHouse/pull/41741) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Don't allow to create or alter merge tree tables with virtual column name _row_exists, which is reserved for lightweight delete. Fixed [#41716](https://github.com/ClickHouse/ClickHouse/issues/41716). [#41763](https://github.com/ClickHouse/ClickHouse/pull/41763) ([Jianmei Zhang](https://github.com/zhangjmruc)). +* Fix a bug that CORS headers are missing in some HTTP responses. [#41792](https://github.com/ClickHouse/ClickHouse/pull/41792) ([Frank Chen](https://github.com/FrankChen021)). +* 22.9 might fail to startup `ReplicatedMergeTree` table if that table was created by 20.3 or older version and was never altered, it's fixed. Fixes [#41742](https://github.com/ClickHouse/ClickHouse/issues/41742). [#41796](https://github.com/ClickHouse/ClickHouse/pull/41796) ([Alexander Tokmakov](https://github.com/tavplubix)). +* When the batch sending fails for some reason, it cannot be automatically recovered, and if it is not processed in time, it will lead to accumulation, and the printed error message will become longer and longer, which will cause the http thread to block. [#41813](https://github.com/ClickHouse/ClickHouse/pull/41813) ([zhongyuankai](https://github.com/zhongyuankai)). +* Fix compact parts with compressed marks setting. Fixes [#41783](https://github.com/ClickHouse/ClickHouse/issues/41783) and [#41746](https://github.com/ClickHouse/ClickHouse/issues/41746). [#41823](https://github.com/ClickHouse/ClickHouse/pull/41823) ([alesapin](https://github.com/alesapin)). +* Old versions of Replicated database doesn't have a special marker in [Zoo]Keeper. We need to check only whether the node contains come obscure data instead of special mark. [#41875](https://github.com/ClickHouse/ClickHouse/pull/41875) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Fix possible exception in fs cache. [#41884](https://github.com/ClickHouse/ClickHouse/pull/41884) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix use_environment_credentials for s3 table function. [#41970](https://github.com/ClickHouse/ClickHouse/pull/41970) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fixed "Directory already exists and is not empty" error on detaching broken part that might prevent `ReplicatedMergeTree` table from starting replication. Fixes [#40957](https://github.com/ClickHouse/ClickHouse/issues/40957). [#41981](https://github.com/ClickHouse/ClickHouse/pull/41981) ([Alexander Tokmakov](https://github.com/tavplubix)). +* toDateTime64() now returns the same output with negative integer and float arguments. [#42025](https://github.com/ClickHouse/ClickHouse/pull/42025) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix write into AzureBlobStorage. Partially closes [#41754](https://github.com/ClickHouse/ClickHouse/issues/41754). [#42034](https://github.com/ClickHouse/ClickHouse/pull/42034) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix the bzip2 decoding issue for specific bzip2 files. [#42046](https://github.com/ClickHouse/ClickHouse/pull/42046) ([Nikolay Degterinsky](https://github.com/evillique)). +* - Fix SQL function "toLastDayOfMonth()" with setting "enable_extended_results_for_datetime_functions = 1" at the beginning of the extended range (January 1900). - Fix SQL function "toRelativeWeekNum()" with setting "enable_extended_results_for_datetime_functions = 1" at the end of extended range (December 2299). - Improve the performance of for SQL functions "toISOYear()", "toFirstDayNumOfISOYearIndex()" and "toYearWeekOfNewyearMode()" by avoiding unnecessary index arithmetics. [#42084](https://github.com/ClickHouse/ClickHouse/pull/42084) ([Roman Vasin](https://github.com/rvasin)). +* The maximum size of fetches for each table accidentally was set to 8 while the pool size could be bigger. Now the maximum size of fetches for table is equal to the pool size. [#42090](https://github.com/ClickHouse/ClickHouse/pull/42090) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* A table might be shut down and a dictionary might be detached before checking if can be dropped without breaking dependencies between table, it's fixed. Fixes [#41982](https://github.com/ClickHouse/ClickHouse/issues/41982). [#42106](https://github.com/ClickHouse/ClickHouse/pull/42106) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix bad inefficiency of `remote_filesystem_read_method=read` with filesystem cache. Closes [#42125](https://github.com/ClickHouse/ClickHouse/issues/42125). [#42129](https://github.com/ClickHouse/ClickHouse/pull/42129) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix possible timeout exception for distributed queries with use_hedged_requests=0. [#42130](https://github.com/ClickHouse/ClickHouse/pull/42130) ([Azat Khuzhin](https://github.com/azat)). +* Fixed a minor bug inside function `runningDifference` in case of using it with `Date32` type. Previously `Date` was used and it may cause some logical errors like `Bad cast from type DB::ColumnVector to DB::ColumnVector'`. [#42143](https://github.com/ClickHouse/ClickHouse/pull/42143) ([Alfred Xu](https://github.com/sperlingxx)). +* Fix reusing of files > 4GB from base backup. [#42146](https://github.com/ClickHouse/ClickHouse/pull/42146) ([Azat Khuzhin](https://github.com/azat)). +* DISTINCT in order fails with LOGICAL_ERROR if first column in sorting key contains function. [#42186](https://github.com/ClickHouse/ClickHouse/pull/42186) ([Igor Nikonov](https://github.com/devcrafter)). +* Fix a bug with projections and the `aggregate_functions_null_for_empty` setting. This bug is very rare and appears only if you enable the `aggregate_functions_null_for_empty` setting in the server's config. This closes [#41647](https://github.com/ClickHouse/ClickHouse/issues/41647). [#42198](https://github.com/ClickHouse/ClickHouse/pull/42198) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* - Fix read from buffer with read in order desc. [#42236](https://github.com/ClickHouse/ClickHouse/pull/42236) ([Duc Canh Le](https://github.com/canhld94)). +* Fix a bug which prevents ClickHouse to start when background_pool_size setting is set on default profile but background_merges_mutations_concurrency_ratio is not. [#42315](https://github.com/ClickHouse/ClickHouse/pull/42315) ([nvartolomei](https://github.com/nvartolomei)). +* `ALTER UPDATE` of attached part (with columns different from table schema) could create an invalid `columns.txt` metadata on disk. Reading from such part could fail with errors or return invalid data. Fixes [#42161](https://github.com/ClickHouse/ClickHouse/issues/42161). [#42319](https://github.com/ClickHouse/ClickHouse/pull/42319) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Setting `additional_table_filters` were not applied to `Distributed` storage. Fixes [#41692](https://github.com/ClickHouse/ClickHouse/issues/41692). [#42322](https://github.com/ClickHouse/ClickHouse/pull/42322) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix a data race in query finish/cancel. This closes [#42346](https://github.com/ClickHouse/ClickHouse/issues/42346). [#42362](https://github.com/ClickHouse/ClickHouse/pull/42362) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* This reverts [#40217](https://github.com/ClickHouse/ClickHouse/issues/40217) which introduced a regression in date/time functions. [#42367](https://github.com/ClickHouse/ClickHouse/pull/42367) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix assert cast in join on falsy condition, Close [#42380](https://github.com/ClickHouse/ClickHouse/issues/42380). [#42407](https://github.com/ClickHouse/ClickHouse/pull/42407) ([Vladimir C](https://github.com/vdimir)). +* Fix buffer overflow in the processing of Decimal data types. This closes [#42451](https://github.com/ClickHouse/ClickHouse/issues/42451). [#42465](https://github.com/ClickHouse/ClickHouse/pull/42465) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* `AggregateFunctionQuantile` now correctly works with UInt128 columns. Previously, the quantile state interpreted `UInt128` columns as `Int128` which could have led to incorrect results. [#42473](https://github.com/ClickHouse/ClickHouse/pull/42473) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix bad_assert during INSERT into Annoy indexes over non-Float32 columns. [#42485](https://github.com/ClickHouse/ClickHouse/pull/42485) ([Robert Schulze](https://github.com/rschu1ze)). +* This closes [#42453](https://github.com/ClickHouse/ClickHouse/issues/42453). [#42573](https://github.com/ClickHouse/ClickHouse/pull/42573) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix function `arrayElement` with type `Map` with `Nullable` values and `Nullable` index. [#42623](https://github.com/ClickHouse/ClickHouse/pull/42623) ([Anton Popov](https://github.com/CurtizJ)). + +#### Bug Fix (user-visible misbehaviour in official stable or prestable release) + +* Fix unexpected table loading error when partition key contains alias function names during server upgrade. [#36379](https://github.com/ClickHouse/ClickHouse/pull/36379) ([Amos Bird](https://github.com/amosbird)). + +#### Build Improvement + +* Fixed SipHash Endian issue for s390x platform. [#41372](https://github.com/ClickHouse/ClickHouse/pull/41372) ([Harry Lee](https://github.com/HarryLeeIBM)). +* Enable lib base64 for ppc64le platform. [#41974](https://github.com/ClickHouse/ClickHouse/pull/41974) ([Suzy Wang](https://github.com/SuzyWangIBMer)). +* Fixed Endian issue in T64 compression codec on s390x. [#42314](https://github.com/ClickHouse/ClickHouse/pull/42314) ([Harry Lee](https://github.com/HarryLeeIBM)). + +#### NO CL ENTRY + +* NO CL ENTRY: 'Revert "Disable parallel s3 multipart upload for part moves."'. [#41681](https://github.com/ClickHouse/ClickHouse/pull/41681) ([Alexander Tokmakov](https://github.com/tavplubix)). +* NO CL ENTRY: 'Revert "Attempt to fix abort from parallel parsing"'. [#42545](https://github.com/ClickHouse/ClickHouse/pull/42545) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* NO CL ENTRY: 'Revert "Low cardinality cases moved to the function for its corresponding type"'. [#42633](https://github.com/ClickHouse/ClickHouse/pull/42633) ([Anton Popov](https://github.com/CurtizJ)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Test for ignore function in PARTITION KEY [#39875](https://github.com/ClickHouse/ClickHouse/pull/39875) ([UnamedRus](https://github.com/UnamedRus)). +* Add fuzzer for table definitions [#40096](https://github.com/ClickHouse/ClickHouse/pull/40096) ([Anton Popov](https://github.com/CurtizJ)). +* Add missing tests for legacy geobase [#40684](https://github.com/ClickHouse/ClickHouse/pull/40684) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Remove obsolete comment from the config.xml [#41518](https://github.com/ClickHouse/ClickHouse/pull/41518) ([filimonov](https://github.com/filimonov)). +* Resurrect parallel distributed insert select with s3Cluster [#41535](https://github.com/ClickHouse/ClickHouse/pull/41535) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Update runners to a recent version to install on 22.04 [#41556](https://github.com/ClickHouse/ClickHouse/pull/41556) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Refactor wiping sensitive information from logs. [#41562](https://github.com/ClickHouse/ClickHouse/pull/41562) ([Vitaly Baranov](https://github.com/vitlibar)). +* Better S3 logs [#41587](https://github.com/ClickHouse/ClickHouse/pull/41587) ([Nikolay Degterinsky](https://github.com/evillique)). +* Fix typos in JSON formats after [#40910](https://github.com/ClickHouse/ClickHouse/issues/40910) [#41614](https://github.com/ClickHouse/ClickHouse/pull/41614) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix drop for KeeperMap [#41616](https://github.com/ClickHouse/ClickHouse/pull/41616) ([Antonio Andelic](https://github.com/antonio2368)). +* increase default max_suspicious_broken_parts to 100 [#41619](https://github.com/ClickHouse/ClickHouse/pull/41619) ([Denny Crane](https://github.com/den-crane)). +* Release AWS SDK log level + replace one exception [#41649](https://github.com/ClickHouse/ClickHouse/pull/41649) ([alesapin](https://github.com/alesapin)). +* Fix a destruction order for views ThreadStatus [#41650](https://github.com/ClickHouse/ClickHouse/pull/41650) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Add very explicit logging on disk choice for fetch [#41653](https://github.com/ClickHouse/ClickHouse/pull/41653) ([alesapin](https://github.com/alesapin)). +* Fix race between ~BackgroundSchedulePool and ~DNSCacheUpdater [#41654](https://github.com/ClickHouse/ClickHouse/pull/41654) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Add changelog for 22.9 [#41668](https://github.com/ClickHouse/ClickHouse/pull/41668) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Update version after release [#41670](https://github.com/ClickHouse/ClickHouse/pull/41670) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix error message [#41680](https://github.com/ClickHouse/ClickHouse/pull/41680) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Add test for setting output_format_json_validate_utf8 [#41691](https://github.com/ClickHouse/ClickHouse/pull/41691) ([Kruglov Pavel](https://github.com/Avogar)). +* Resolve findings from clang-tidy [#41702](https://github.com/ClickHouse/ClickHouse/pull/41702) ([ltrk2](https://github.com/ltrk2)). +* Ignore Keeper errors from ReplicatedMergeTreeAttachThread in stress tests [#41717](https://github.com/ClickHouse/ClickHouse/pull/41717) ([Antonio Andelic](https://github.com/antonio2368)). +* Collect logs in Stress test using clickhouse-local [#41721](https://github.com/ClickHouse/ClickHouse/pull/41721) ([Antonio Andelic](https://github.com/antonio2368)). +* Disable flaky `test_merge_tree_azure_blob_storage` [#41722](https://github.com/ClickHouse/ClickHouse/pull/41722) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Update version_date.tsv and changelogs after v22.9.2.7-stable [#41724](https://github.com/ClickHouse/ClickHouse/pull/41724) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Fix part removal retries [#41728](https://github.com/ClickHouse/ClickHouse/pull/41728) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Try fix azure tests [#41731](https://github.com/ClickHouse/ClickHouse/pull/41731) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix test build [#41732](https://github.com/ClickHouse/ClickHouse/pull/41732) ([Robert Schulze](https://github.com/rschu1ze)). +* Change logging levels in cache [#41733](https://github.com/ClickHouse/ClickHouse/pull/41733) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Revert of "Revert the revert of "ColumnVector: optimize filter with AVX512 VBMI2 compress store" [#40033](https://github.com/ClickHouse/ClickHouse/issues/40033)" [#41752](https://github.com/ClickHouse/ClickHouse/pull/41752) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix SET query parameters formatting [#41755](https://github.com/ClickHouse/ClickHouse/pull/41755) ([Nikolay Degterinsky](https://github.com/evillique)). +* Support to run testcases on macOS [#41760](https://github.com/ClickHouse/ClickHouse/pull/41760) ([Frank Chen](https://github.com/FrankChen021)). +* Bump LLVM from 12 to 13 [#41762](https://github.com/ClickHouse/ClickHouse/pull/41762) ([Robert Schulze](https://github.com/rschu1ze)). +* ColumnVector: re-enable AVX512_VBMI/AVX512_VBMI2 optimized filter and index [#41765](https://github.com/ClickHouse/ClickHouse/pull/41765) ([Guo Wangyang](https://github.com/guowangy)). +* Update 02354_annoy.sql [#41767](https://github.com/ClickHouse/ClickHouse/pull/41767) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix the typo preventing building latest images [#41769](https://github.com/ClickHouse/ClickHouse/pull/41769) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Make automatic download script choose between ARMv8.0 or ARMv8.2 builds [#41775](https://github.com/ClickHouse/ClickHouse/pull/41775) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix tests for docker-ci [#41777](https://github.com/ClickHouse/ClickHouse/pull/41777) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Possible fix for KeeperMap drop [#41784](https://github.com/ClickHouse/ClickHouse/pull/41784) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix drop of completely dropped table [#41789](https://github.com/ClickHouse/ClickHouse/pull/41789) ([alesapin](https://github.com/alesapin)). +* Log git hash during startup [#41790](https://github.com/ClickHouse/ClickHouse/pull/41790) ([Robert Schulze](https://github.com/rschu1ze)). +* Revert "ColumnVector: optimize UInt8 index with AVX512VBMI ([#41247](https://github.com/ClickHouse/ClickHouse/issues/41247))" [#41797](https://github.com/ClickHouse/ClickHouse/pull/41797) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Small fix in dashboard [#41798](https://github.com/ClickHouse/ClickHouse/pull/41798) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Keep the most important log in stress tests [#41821](https://github.com/ClickHouse/ClickHouse/pull/41821) ([alesapin](https://github.com/alesapin)). +* Use copy for some operations instead of hardlinks [#41832](https://github.com/ClickHouse/ClickHouse/pull/41832) ([alesapin](https://github.com/alesapin)). +* Remove unused variable in registerStorageMergeTree.cpp [#41839](https://github.com/ClickHouse/ClickHouse/pull/41839) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix Jepsen [#41845](https://github.com/ClickHouse/ClickHouse/pull/41845) ([Antonio Andelic](https://github.com/antonio2368)). +* Increase `request_timeout_ms` for s3 tests in CI [#41853](https://github.com/ClickHouse/ClickHouse/pull/41853) ([Kseniia Sumarokova](https://github.com/kssenii)). +* tests: fix debug symbols (and possible crashes) for backward compatiblity check [#41854](https://github.com/ClickHouse/ClickHouse/pull/41854) ([Azat Khuzhin](https://github.com/azat)). +* Remove two redundant lines [#41856](https://github.com/ClickHouse/ClickHouse/pull/41856) ([alesapin](https://github.com/alesapin)). +* Infer Object type only when allow_experimental_object_type is enabled [#41858](https://github.com/ClickHouse/ClickHouse/pull/41858) ([Kruglov Pavel](https://github.com/Avogar)). +* Add default UNION/EXCEPT/INTERSECT to the echo query text [#41862](https://github.com/ClickHouse/ClickHouse/pull/41862) ([Nikolay Degterinsky](https://github.com/evillique)). +* Consolidate CMake-generated config headers [#41873](https://github.com/ClickHouse/ClickHouse/pull/41873) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix 02267_file_globs_schema_inference.sql flakiness [#41877](https://github.com/ClickHouse/ClickHouse/pull/41877) ([Kruglov Pavel](https://github.com/Avogar)). +* Docs: Remove obsolete modelEvaluate() mention [#41878](https://github.com/ClickHouse/ClickHouse/pull/41878) ([Robert Schulze](https://github.com/rschu1ze)). +* Better exception message for duplicate column names in schema inference [#41885](https://github.com/ClickHouse/ClickHouse/pull/41885) ([Kruglov Pavel](https://github.com/Avogar)). +* Docs: Reference external papers as DOIs [#41886](https://github.com/ClickHouse/ClickHouse/pull/41886) ([Robert Schulze](https://github.com/rschu1ze)). +* Make LDAPR a prerequisite for downloading the ARMv8.2 build [#41897](https://github.com/ClickHouse/ClickHouse/pull/41897) ([Robert Schulze](https://github.com/rschu1ze)). +* Another sync replicas in test_recovery_replica [#41898](https://github.com/ClickHouse/ClickHouse/pull/41898) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* remove unused code [#41921](https://github.com/ClickHouse/ClickHouse/pull/41921) ([flynn](https://github.com/ucasfl)). +* Move all queries for MV creation to the end of queue during recovering [#41932](https://github.com/ClickHouse/ClickHouse/pull/41932) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Fix broken test_disks_app_func [#41933](https://github.com/ClickHouse/ClickHouse/pull/41933) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Temporarily disable ThreadFuzzer with TSan [#41943](https://github.com/ClickHouse/ClickHouse/pull/41943) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Enable some disabled S3 tests [#41945](https://github.com/ClickHouse/ClickHouse/pull/41945) ([alesapin](https://github.com/alesapin)). +* QOL log improvements [#41947](https://github.com/ClickHouse/ClickHouse/pull/41947) ([Raúl Marín](https://github.com/Algunenano)). +* Fix non-deterministic test results [#41948](https://github.com/ClickHouse/ClickHouse/pull/41948) ([Robert Schulze](https://github.com/rschu1ze)). +* Earlier throw exception in PullingAsyncPipelineExecutor. [#41949](https://github.com/ClickHouse/ClickHouse/pull/41949) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix linker error [#41950](https://github.com/ClickHouse/ClickHouse/pull/41950) ([ltrk2](https://github.com/ltrk2)). +* Bump LLVM from 13 to 14 [#41951](https://github.com/ClickHouse/ClickHouse/pull/41951) ([Robert Schulze](https://github.com/rschu1ze)). +* Update version_date.tsv and changelogs after v22.3.13.80-lts [#41953](https://github.com/ClickHouse/ClickHouse/pull/41953) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Update version_date.tsv and changelogs after v22.7.6.74-stable [#41954](https://github.com/ClickHouse/ClickHouse/pull/41954) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Update version_date.tsv and changelogs after v22.8.6.71-lts [#41955](https://github.com/ClickHouse/ClickHouse/pull/41955) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Update version_date.tsv and changelogs after v22.9.3.18-stable [#41956](https://github.com/ClickHouse/ClickHouse/pull/41956) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Add a warning message to release.py script, require release type [#41975](https://github.com/ClickHouse/ClickHouse/pull/41975) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Rename max_temp_data_on_disk -> max_temporary_data_on_disk [#41984](https://github.com/ClickHouse/ClickHouse/pull/41984) ([Vladimir C](https://github.com/vdimir)). +* Add more checkStackSize calls [#41991](https://github.com/ClickHouse/ClickHouse/pull/41991) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix test 02403_big_http_chunk_size [#41996](https://github.com/ClickHouse/ClickHouse/pull/41996) ([Vitaly Baranov](https://github.com/vitlibar)). +* More sane behavior of part number thresholds override in query level settings [#42001](https://github.com/ClickHouse/ClickHouse/pull/42001) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Remove useless code [#42004](https://github.com/ClickHouse/ClickHouse/pull/42004) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Refactoring: Uninline some error handling methods [#42010](https://github.com/ClickHouse/ClickHouse/pull/42010) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix warning that ENABLE_REPLXX is unused [#42013](https://github.com/ClickHouse/ClickHouse/pull/42013) ([Robert Schulze](https://github.com/rschu1ze)). +* Drop leftovers of libexecinfo [#42014](https://github.com/ClickHouse/ClickHouse/pull/42014) ([Robert Schulze](https://github.com/rschu1ze)). +* More detailed exception message [#42022](https://github.com/ClickHouse/ClickHouse/pull/42022) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Build against an LLVM version which has clang[-extra-tools], lldb and lld removed [#42023](https://github.com/ClickHouse/ClickHouse/pull/42023) ([Robert Schulze](https://github.com/rschu1ze)). +* Add log message and lower the retry timeout in MergeTreeRestartingThread [#42026](https://github.com/ClickHouse/ClickHouse/pull/42026) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Update amqp-cpp [#42031](https://github.com/ClickHouse/ClickHouse/pull/42031) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix No such key during table drop [#42036](https://github.com/ClickHouse/ClickHouse/pull/42036) ([alesapin](https://github.com/alesapin)). +* Temporarily disable too aggressive tests [#42050](https://github.com/ClickHouse/ClickHouse/pull/42050) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix style check [#42055](https://github.com/ClickHouse/ClickHouse/pull/42055) ([Anton Popov](https://github.com/CurtizJ)). +* Function name normalization fix functions header [#42063](https://github.com/ClickHouse/ClickHouse/pull/42063) ([Maksim Kita](https://github.com/kitaisreal)). +* remove unused virtual keyword [#42065](https://github.com/ClickHouse/ClickHouse/pull/42065) ([flynn](https://github.com/ucasfl)). +* Fix crash in `SummingMergeTree` with `LowCardinality` [#42066](https://github.com/ClickHouse/ClickHouse/pull/42066) ([Anton Popov](https://github.com/CurtizJ)). +* Fix drop of completely dropped table [#42067](https://github.com/ClickHouse/ClickHouse/pull/42067) ([alesapin](https://github.com/alesapin)). +* Fix assertion in bloom filter index [#42072](https://github.com/ClickHouse/ClickHouse/pull/42072) ([Anton Popov](https://github.com/CurtizJ)). +* Ignore core.autocrlf for tests references [#42076](https://github.com/ClickHouse/ClickHouse/pull/42076) ([Azat Khuzhin](https://github.com/azat)). +* Fix progress for INSERT SELECT [#42078](https://github.com/ClickHouse/ClickHouse/pull/42078) ([Azat Khuzhin](https://github.com/azat)). +* Avoid adding extra new line after using fuzzy history search [#42080](https://github.com/ClickHouse/ClickHouse/pull/42080) ([Azat Khuzhin](https://github.com/azat)). +* Add `at` to runner AMI, bump gh runner version [#42082](https://github.com/ClickHouse/ClickHouse/pull/42082) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Use send_metadata instead of send_object_metadata [#42085](https://github.com/ClickHouse/ClickHouse/pull/42085) ([Elena Torró](https://github.com/elenatorro)). +* Docs: Preparations to remove misc statements page [#42086](https://github.com/ClickHouse/ClickHouse/pull/42086) ([Robert Schulze](https://github.com/rschu1ze)). +* Followup for TemporaryDataOnDisk [#42103](https://github.com/ClickHouse/ClickHouse/pull/42103) ([Vladimir C](https://github.com/vdimir)). +* Disable 02122_join_group_by_timeout for debug [#42104](https://github.com/ClickHouse/ClickHouse/pull/42104) ([Vladimir C](https://github.com/vdimir)). +* Update version_date.tsv and changelogs after v22.6.9.11-stable [#42114](https://github.com/ClickHouse/ClickHouse/pull/42114) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* JIT compilation migration to LLVM 15 [#42123](https://github.com/ClickHouse/ClickHouse/pull/42123) ([Maksim Kita](https://github.com/kitaisreal)). +* Fix build without TSA [#42128](https://github.com/ClickHouse/ClickHouse/pull/42128) ([Raúl Marín](https://github.com/Algunenano)). +* Update codespell-ignore-words.list [#42132](https://github.com/ClickHouse/ClickHouse/pull/42132) ([Dan Roscigno](https://github.com/DanRoscigno)). +* Add null pointer checks [#42135](https://github.com/ClickHouse/ClickHouse/pull/42135) ([ltrk2](https://github.com/ltrk2)). +* Revert [#27787](https://github.com/ClickHouse/ClickHouse/issues/27787) [#42136](https://github.com/ClickHouse/ClickHouse/pull/42136) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Follow up for [#42129](https://github.com/ClickHouse/ClickHouse/issues/42129) [#42144](https://github.com/ClickHouse/ClickHouse/pull/42144) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix checking parent for old-format parts [#42147](https://github.com/ClickHouse/ClickHouse/pull/42147) ([alesapin](https://github.com/alesapin)). +* Revert "Resurrect parallel distributed insert select with s3Cluster [#42150](https://github.com/ClickHouse/ClickHouse/pull/42150) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Docs: Add "TABLE" to CHECK/DESCRIBE statements in sidebar [#42152](https://github.com/ClickHouse/ClickHouse/pull/42152) ([Robert Schulze](https://github.com/rschu1ze)). +* Add logging during merge tree startup [#42163](https://github.com/ClickHouse/ClickHouse/pull/42163) ([alesapin](https://github.com/alesapin)). +* Abort instead of `__builtin_unreachable` in debug builds [#42168](https://github.com/ClickHouse/ClickHouse/pull/42168) ([Alexander Tokmakov](https://github.com/tavplubix)). +* [RFC] Enable -Wshorten-64-to-32 [#42190](https://github.com/ClickHouse/ClickHouse/pull/42190) ([Azat Khuzhin](https://github.com/azat)). +* Fix dialect setting description [#42196](https://github.com/ClickHouse/ClickHouse/pull/42196) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Add a test for #658 [#42197](https://github.com/ClickHouse/ClickHouse/pull/42197) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* use alias for MergeMutateSelectedEntry share ptr [#42211](https://github.com/ClickHouse/ClickHouse/pull/42211) ([Tian Xinhui](https://github.com/xinhuitian)). +* Fix LLVM build [#42216](https://github.com/ClickHouse/ClickHouse/pull/42216) ([Raúl Marín](https://github.com/Algunenano)). +* Exclude comments from style-check defined extern [#42217](https://github.com/ClickHouse/ClickHouse/pull/42217) ([Vladimir C](https://github.com/vdimir)). +* Update safeExit.cpp [#42220](https://github.com/ClickHouse/ClickHouse/pull/42220) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Disable concurrent parts removal [#42222](https://github.com/ClickHouse/ClickHouse/pull/42222) ([alesapin](https://github.com/alesapin)). +* Fail fast on empty URL in HDFS [#42223](https://github.com/ClickHouse/ClickHouse/pull/42223) ([Ilya Yatsishin](https://github.com/qoega)). +* Add a test for [#2389](https://github.com/ClickHouse/ClickHouse/issues/2389) [#42235](https://github.com/ClickHouse/ClickHouse/pull/42235) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Use MultiRead where possible [#42243](https://github.com/ClickHouse/ClickHouse/pull/42243) ([Antonio Andelic](https://github.com/antonio2368)). +* Minor cleanups of LLVM integration [#42249](https://github.com/ClickHouse/ClickHouse/pull/42249) ([Robert Schulze](https://github.com/rschu1ze)). +* remove useless code [#42253](https://github.com/ClickHouse/ClickHouse/pull/42253) ([flynn](https://github.com/ucasfl)). +* Early return of corner cases in selectPartsToMutate function [#42254](https://github.com/ClickHouse/ClickHouse/pull/42254) ([Tian Xinhui](https://github.com/xinhuitian)). +* Refactor the implementation of user-defined functions [#42263](https://github.com/ClickHouse/ClickHouse/pull/42263) ([Vitaly Baranov](https://github.com/vitlibar)). +* assert unused value in test_replicated_merge_tree_compatibility [#42266](https://github.com/ClickHouse/ClickHouse/pull/42266) ([nvartolomei](https://github.com/nvartolomei)). +* Fix Date Interval add/minus over DataTypeDate32 [#42279](https://github.com/ClickHouse/ClickHouse/pull/42279) ([Alfred Xu](https://github.com/sperlingxx)). +* Fix log-level in `clickhouse-disks` [#42302](https://github.com/ClickHouse/ClickHouse/pull/42302) ([Nikolay Degterinsky](https://github.com/evillique)). +* Remove forgotten debug logging [#42313](https://github.com/ClickHouse/ClickHouse/pull/42313) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix another trash in zero-copy replication [#42317](https://github.com/ClickHouse/ClickHouse/pull/42317) ([alesapin](https://github.com/alesapin)). +* go update for diagnostics tool [#42325](https://github.com/ClickHouse/ClickHouse/pull/42325) ([Dale McDiarmid](https://github.com/gingerwizard)). +* Better logging for asynchronous inserts [#42345](https://github.com/ClickHouse/ClickHouse/pull/42345) ([Anton Popov](https://github.com/CurtizJ)). +* Use nfpm packager for archlinux packages [#42349](https://github.com/ClickHouse/ClickHouse/pull/42349) ([Azat Khuzhin](https://github.com/azat)). +* Bump llvm/clang to 15.0.2 [#42351](https://github.com/ClickHouse/ClickHouse/pull/42351) ([Azat Khuzhin](https://github.com/azat)). +* Make getResource() independent from the order of the sections [#42353](https://github.com/ClickHouse/ClickHouse/pull/42353) ([Azat Khuzhin](https://github.com/azat)). +* Smaller threshold for multipart upload part size increase [#42392](https://github.com/ClickHouse/ClickHouse/pull/42392) ([alesapin](https://github.com/alesapin)). +* Better error message for unsupported delimiters in custom formats [#42406](https://github.com/ClickHouse/ClickHouse/pull/42406) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix formatting of `ALTER FREEZE` [#42409](https://github.com/ClickHouse/ClickHouse/pull/42409) ([Anton Popov](https://github.com/CurtizJ)). +* Replace table name in ast fuzzer more often [#42413](https://github.com/ClickHouse/ClickHouse/pull/42413) ([Anton Popov](https://github.com/CurtizJ)). +* Add *-15 tools to cmake.tools for GCC build [#42430](https://github.com/ClickHouse/ClickHouse/pull/42430) ([Ilya Yatsishin](https://github.com/qoega)). +* Deactivate tasks in ReplicatedMergeTree until startup [#42441](https://github.com/ClickHouse/ClickHouse/pull/42441) ([alesapin](https://github.com/alesapin)). +* Revert "Revert [#27787](https://github.com/ClickHouse/ClickHouse/issues/27787)" [#42442](https://github.com/ClickHouse/ClickHouse/pull/42442) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Update woboq_codebrowser location [#42448](https://github.com/ClickHouse/ClickHouse/pull/42448) ([Azat Khuzhin](https://github.com/azat)). +* add mdx and jsx to list of doc files [#42454](https://github.com/ClickHouse/ClickHouse/pull/42454) ([Dan Roscigno](https://github.com/DanRoscigno)). +* Remove code browser docs [#42455](https://github.com/ClickHouse/ClickHouse/pull/42455) ([Dan Roscigno](https://github.com/DanRoscigno)). +* Better workaround for emitting .debug_aranges section [#42457](https://github.com/ClickHouse/ClickHouse/pull/42457) ([Azat Khuzhin](https://github.com/azat)). +* Fix flaky test [#42459](https://github.com/ClickHouse/ClickHouse/pull/42459) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix UBSan report in Julian Day functions [#42464](https://github.com/ClickHouse/ClickHouse/pull/42464) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* rename filesystem_query_cache [#42472](https://github.com/ClickHouse/ClickHouse/pull/42472) ([Han Shukai](https://github.com/KinderRiven)). +* Add convenience typedefs for Date/Date32/DateTime/DateTime64 columns [#42476](https://github.com/ClickHouse/ClickHouse/pull/42476) ([Robert Schulze](https://github.com/rschu1ze)). +* Add error "Destination table is myself" to exception list in BC check [#42479](https://github.com/ClickHouse/ClickHouse/pull/42479) ([Kruglov Pavel](https://github.com/Avogar)). +* Get current clickhouse version without sending query in BC check [#42483](https://github.com/ClickHouse/ClickHouse/pull/42483) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix logical error from welchTTest [#42487](https://github.com/ClickHouse/ClickHouse/pull/42487) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Attempt to fix abort from parallel parsing [#42496](https://github.com/ClickHouse/ClickHouse/pull/42496) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Increase threshold for using physical cores for `max_threads` [#42503](https://github.com/ClickHouse/ClickHouse/pull/42503) ([Nikita Taranov](https://github.com/nickitat)). +* Add a test for [#16827](https://github.com/ClickHouse/ClickHouse/issues/16827) [#42511](https://github.com/ClickHouse/ClickHouse/pull/42511) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add a test for [#13653](https://github.com/ClickHouse/ClickHouse/issues/13653) [#42512](https://github.com/ClickHouse/ClickHouse/pull/42512) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix aliases [#42514](https://github.com/ClickHouse/ClickHouse/pull/42514) ([Nikolay Degterinsky](https://github.com/evillique)). +* tests: fix 00705_drop_create_merge_tree flakiness [#42522](https://github.com/ClickHouse/ClickHouse/pull/42522) ([Azat Khuzhin](https://github.com/azat)). +* Fix sanitizer reports in integration tests [#42529](https://github.com/ClickHouse/ClickHouse/pull/42529) ([Azat Khuzhin](https://github.com/azat)). +* Fix `KeeperTCPHandler` data race [#42532](https://github.com/ClickHouse/ClickHouse/pull/42532) ([Antonio Andelic](https://github.com/antonio2368)). +* Disable `test_storage_nats`, because it's permanently broken [#42535](https://github.com/ClickHouse/ClickHouse/pull/42535) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Better logs in clickhouse-disks [#42549](https://github.com/ClickHouse/ClickHouse/pull/42549) ([Nikolay Degterinsky](https://github.com/evillique)). +* add lib_fuzzer and lib_fuzzer_no_main to llvm-project build [#42550](https://github.com/ClickHouse/ClickHouse/pull/42550) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Some polishing: replicated merge tree [#42560](https://github.com/ClickHouse/ClickHouse/pull/42560) ([Igor Nikonov](https://github.com/devcrafter)). +* Temporarily disable flaky `test_replicated_merge_tree_hdfs_zero_copy` [#42563](https://github.com/ClickHouse/ClickHouse/pull/42563) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Adapt internal data structures to 512-bit era [#42564](https://github.com/ClickHouse/ClickHouse/pull/42564) ([Nikita Taranov](https://github.com/nickitat)). +* Fix strange code in date monotonicity [#42574](https://github.com/ClickHouse/ClickHouse/pull/42574) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Clear thread::id when ThreadFromGlobalPool exits. [#42577](https://github.com/ClickHouse/ClickHouse/pull/42577) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* ci/stress: fix memory limits overrides for hung check [#42585](https://github.com/ClickHouse/ClickHouse/pull/42585) ([Azat Khuzhin](https://github.com/azat)). +* tests: avoid model overlap for obfuscator [#42586](https://github.com/ClickHouse/ClickHouse/pull/42586) ([Azat Khuzhin](https://github.com/azat)). +* Fix possible segfault in expression parser [#42598](https://github.com/ClickHouse/ClickHouse/pull/42598) ([Nikolay Degterinsky](https://github.com/evillique)). +* Fix incorrect trace log line on dict reload [#42609](https://github.com/ClickHouse/ClickHouse/pull/42609) ([filimonov](https://github.com/filimonov)). +* Fix flaky 02458_datediff_date32 test [#42611](https://github.com/ClickHouse/ClickHouse/pull/42611) ([Roman Vasin](https://github.com/rvasin)). +* Revert revert 41268 disable s3 parallel write for part moves to disk s3 [#42617](https://github.com/ClickHouse/ClickHouse/pull/42617) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Try to fix data race on zookeeper vs DDLWorker at server shutdown. [#42620](https://github.com/ClickHouse/ClickHouse/pull/42620) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Add a template for installation issues [#42626](https://github.com/ClickHouse/ClickHouse/pull/42626) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fix typo in cmake code related to fuzzing [#42627](https://github.com/ClickHouse/ClickHouse/pull/42627) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Fix build [#42635](https://github.com/ClickHouse/ClickHouse/pull/42635) ([Anton Popov](https://github.com/CurtizJ)). +* Add .rgignore for test data [#42639](https://github.com/ClickHouse/ClickHouse/pull/42639) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix flaky 02457_datediff_via_unix_epoch test [#42655](https://github.com/ClickHouse/ClickHouse/pull/42655) ([Roman Vasin](https://github.com/rvasin)). + diff --git a/docs/changelogs/v22.10.2.11-stable.md b/docs/changelogs/v22.10.2.11-stable.md new file mode 100644 index 00000000000..e4507f4e745 --- /dev/null +++ b/docs/changelogs/v22.10.2.11-stable.md @@ -0,0 +1,18 @@ +--- +sidebar_position: 1 +sidebar_label: 2022 +--- + +# 2022 Changelog + +### ClickHouse release v22.10.2.11-stable (d2bfcaba002) FIXME as compared to v22.10.1.1877-stable (98ab5a3c189) + +#### Bug Fix (user-visible misbehavior in official stable or prestable release) + +* Backported in [#42750](https://github.com/ClickHouse/ClickHouse/issues/42750): A segmentation fault related to DNS & c-ares has been reported. The below error ocurred in multiple threads: ``` 2022-09-28 15:41:19.008,2022.09.28 15:41:19.008088 [ 356 ] {} BaseDaemon: ######################################## 2022-09-28 15:41:19.008,"2022.09.28 15:41:19.008147 [ 356 ] {} BaseDaemon: (version 22.8.5.29 (official build), build id: 92504ACA0B8E2267) (from thread 353) (no query) Received signal Segmentation fault (11)" 2022-09-28 15:41:19.008,2022.09.28 15:41:19.008196 [ 356 ] {} BaseDaemon: Address: 0xf Access: write. Address not mapped to object. 2022-09-28 15:41:19.008,2022.09.28 15:41:19.008216 [ 356 ] {} BaseDaemon: Stack trace: 0x188f8212 0x1626851b 0x1626a69e 0x16269b3f 0x16267eab 0x13cf8284 0x13d24afc 0x13c5217e 0x14ec2495 0x15ba440f 0x15b9d13b 0x15bb2699 0x1891ccb3 0x1891e00d 0x18ae0769 0x18ade022 0x7f76aa985609 0x7f76aa8aa133 2022-09-28 15:41:19.008,2022.09.28 15:41:19.008274 [ 356 ] {} BaseDaemon: 2. Poco::Net::IPAddress::family() const @ 0x188f8212 in /usr/bin/clickhouse 2022-09-28 15:41:19.008,2022.09.28 15:41:19.008297 [ 356 ] {} BaseDaemon: 3. ? @ 0x1626851b in /usr/bin/clickhouse 2022-09-28 15:41:19.008,2022.09.28 15:41:19.008309 [ 356 ] {} BaseDaemon: 4. ? @ 0x1626a69e in /usr/bin/clickhouse ```. [#42234](https://github.com/ClickHouse/ClickHouse/pull/42234) ([Arthur Passos](https://github.com/arthurpassos)). +* Backported in [#42793](https://github.com/ClickHouse/ClickHouse/issues/42793): Fix a bug in ParserFunction that could have led to a segmentation fault. [#42724](https://github.com/ClickHouse/ClickHouse/pull/42724) ([Nikolay Degterinsky](https://github.com/evillique)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Always run `BuilderReport` and `BuilderSpecialReport` in all CI types [#42684](https://github.com/ClickHouse/ClickHouse/pull/42684) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + diff --git a/docs/changelogs/v22.3.14.18-lts.md b/docs/changelogs/v22.3.14.18-lts.md new file mode 100644 index 00000000000..d0c67a2b241 --- /dev/null +++ b/docs/changelogs/v22.3.14.18-lts.md @@ -0,0 +1,26 @@ +--- +sidebar_position: 1 +sidebar_label: 2022 +--- + +# 2022 Changelog + +### ClickHouse release v22.3.14.18-lts (642946f61b2) FIXME as compared to v22.3.13.80-lts (e2708b01fba) + +#### Bug Fix +* Backported in [#42432](https://github.com/ClickHouse/ClickHouse/issues/42432): - Choose correct aggregation method for LowCardinality with BigInt. [#42342](https://github.com/ClickHouse/ClickHouse/pull/42342) ([Duc Canh Le](https://github.com/canhld94)). + +#### Build/Testing/Packaging Improvement +* Backported in [#42328](https://github.com/ClickHouse/ClickHouse/issues/42328): Update cctz to the latest master, update tzdb to 2020e. [#42273](https://github.com/ClickHouse/ClickHouse/pull/42273) ([Dom Del Nano](https://github.com/ddelnano)). +* Backported in [#42358](https://github.com/ClickHouse/ClickHouse/issues/42358): Update tzdata to 2022e to support the new timezone changes. Palestine transitions are now Saturdays at 02:00. Simplify three Ukraine zones into one. Jordan and Syria switch from +02/+03 with DST to year-round +03. (https://data.iana.org/time-zones/tzdb/NEWS). This closes [#42252](https://github.com/ClickHouse/ClickHouse/issues/42252). [#42327](https://github.com/ClickHouse/ClickHouse/pull/42327) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### Bug Fix (user-visible misbehavior in official stable or prestable release) + +* Backported in [#42298](https://github.com/ClickHouse/ClickHouse/issues/42298): Fix a bug with projections and the `aggregate_functions_null_for_empty` setting. This bug is very rare and appears only if you enable the `aggregate_functions_null_for_empty` setting in the server's config. This closes [#41647](https://github.com/ClickHouse/ClickHouse/issues/41647). [#42198](https://github.com/ClickHouse/ClickHouse/pull/42198) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#42592](https://github.com/ClickHouse/ClickHouse/issues/42592): This closes [#42453](https://github.com/ClickHouse/ClickHouse/issues/42453). [#42573](https://github.com/ClickHouse/ClickHouse/pull/42573) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Add a warning message to release.py script, require release type [#41975](https://github.com/ClickHouse/ClickHouse/pull/41975) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Revert [#27787](https://github.com/ClickHouse/ClickHouse/issues/27787) [#42136](https://github.com/ClickHouse/ClickHouse/pull/42136) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). + diff --git a/docs/changelogs/v22.3.14.23-lts.md b/docs/changelogs/v22.3.14.23-lts.md new file mode 100644 index 00000000000..663d8b43f6f --- /dev/null +++ b/docs/changelogs/v22.3.14.23-lts.md @@ -0,0 +1,29 @@ +--- +sidebar_position: 1 +sidebar_label: 2022 +--- + +# 2022 Changelog + +### ClickHouse release v22.3.14.23-lts (74956bfee4d) FIXME as compared to v22.3.13.80-lts (e2708b01fba) + +#### Improvement +* Backported in [#42527](https://github.com/ClickHouse/ClickHouse/issues/42527): Fix issue with passing MySQL timeouts for MySQL database engine and MySQL table function. Closes [#34168](https://github.com/ClickHouse/ClickHouse/issues/34168)?notification_referrer_id=NT_kwDOAzsV57MzMDMxNjAzNTY5OjU0MjAzODc5. [#40751](https://github.com/ClickHouse/ClickHouse/pull/40751) ([Kseniia Sumarokova](https://github.com/kssenii)). + +#### Bug Fix +* Backported in [#42432](https://github.com/ClickHouse/ClickHouse/issues/42432): - Choose correct aggregation method for LowCardinality with BigInt. [#42342](https://github.com/ClickHouse/ClickHouse/pull/42342) ([Duc Canh Le](https://github.com/canhld94)). + +#### Build/Testing/Packaging Improvement +* Backported in [#42328](https://github.com/ClickHouse/ClickHouse/issues/42328): Update cctz to the latest master, update tzdb to 2020e. [#42273](https://github.com/ClickHouse/ClickHouse/pull/42273) ([Dom Del Nano](https://github.com/ddelnano)). +* Backported in [#42358](https://github.com/ClickHouse/ClickHouse/issues/42358): Update tzdata to 2022e to support the new timezone changes. Palestine transitions are now Saturdays at 02:00. Simplify three Ukraine zones into one. Jordan and Syria switch from +02/+03 with DST to year-round +03. (https://data.iana.org/time-zones/tzdb/NEWS). This closes [#42252](https://github.com/ClickHouse/ClickHouse/issues/42252). [#42327](https://github.com/ClickHouse/ClickHouse/pull/42327) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### Bug Fix (user-visible misbehavior in official stable or prestable release) + +* Backported in [#42298](https://github.com/ClickHouse/ClickHouse/issues/42298): Fix a bug with projections and the `aggregate_functions_null_for_empty` setting. This bug is very rare and appears only if you enable the `aggregate_functions_null_for_empty` setting in the server's config. This closes [#41647](https://github.com/ClickHouse/ClickHouse/issues/41647). [#42198](https://github.com/ClickHouse/ClickHouse/pull/42198) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#42592](https://github.com/ClickHouse/ClickHouse/issues/42592): This closes [#42453](https://github.com/ClickHouse/ClickHouse/issues/42453). [#42573](https://github.com/ClickHouse/ClickHouse/pull/42573) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Add a warning message to release.py script, require release type [#41975](https://github.com/ClickHouse/ClickHouse/pull/41975) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Revert [#27787](https://github.com/ClickHouse/ClickHouse/issues/27787) [#42136](https://github.com/ClickHouse/ClickHouse/pull/42136) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). + diff --git a/docs/changelogs/v22.7.7.24-stable.md b/docs/changelogs/v22.7.7.24-stable.md new file mode 100644 index 00000000000..d7b83775502 --- /dev/null +++ b/docs/changelogs/v22.7.7.24-stable.md @@ -0,0 +1,29 @@ +--- +sidebar_position: 1 +sidebar_label: 2022 +--- + +# 2022 Changelog + +### ClickHouse release v22.7.7.24-stable (02ad1f979a8) FIXME as compared to v22.7.6.74-stable (c00ffb3c11a) + +#### Bug Fix +* Backported in [#42433](https://github.com/ClickHouse/ClickHouse/issues/42433): - Choose correct aggregation method for LowCardinality with BigInt. [#42342](https://github.com/ClickHouse/ClickHouse/pull/42342) ([Duc Canh Le](https://github.com/canhld94)). + +#### Build/Testing/Packaging Improvement +* Backported in [#42329](https://github.com/ClickHouse/ClickHouse/issues/42329): Update cctz to the latest master, update tzdb to 2020e. [#42273](https://github.com/ClickHouse/ClickHouse/pull/42273) ([Dom Del Nano](https://github.com/ddelnano)). +* Backported in [#42359](https://github.com/ClickHouse/ClickHouse/issues/42359): Update tzdata to 2022e to support the new timezone changes. Palestine transitions are now Saturdays at 02:00. Simplify three Ukraine zones into one. Jordan and Syria switch from +02/+03 with DST to year-round +03. (https://data.iana.org/time-zones/tzdb/NEWS). This closes [#42252](https://github.com/ClickHouse/ClickHouse/issues/42252). [#42327](https://github.com/ClickHouse/ClickHouse/pull/42327) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### Bug Fix (user-visible misbehavior in official stable or prestable release) + +* Backported in [#42268](https://github.com/ClickHouse/ClickHouse/issues/42268): Fix reusing of files > 4GB from base backup. [#42146](https://github.com/ClickHouse/ClickHouse/pull/42146) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#42299](https://github.com/ClickHouse/ClickHouse/issues/42299): Fix a bug with projections and the `aggregate_functions_null_for_empty` setting. This bug is very rare and appears only if you enable the `aggregate_functions_null_for_empty` setting in the server's config. This closes [#41647](https://github.com/ClickHouse/ClickHouse/issues/41647). [#42198](https://github.com/ClickHouse/ClickHouse/pull/42198) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#42386](https://github.com/ClickHouse/ClickHouse/issues/42386): `ALTER UPDATE` of attached part (with columns different from table schema) could create an invalid `columns.txt` metadata on disk. Reading from such part could fail with errors or return invalid data. Fixes [#42161](https://github.com/ClickHouse/ClickHouse/issues/42161). [#42319](https://github.com/ClickHouse/ClickHouse/pull/42319) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#42498](https://github.com/ClickHouse/ClickHouse/issues/42498): Setting `additional_table_filters` were not applied to `Distributed` storage. Fixes [#41692](https://github.com/ClickHouse/ClickHouse/issues/41692). [#42322](https://github.com/ClickHouse/ClickHouse/pull/42322) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#42593](https://github.com/ClickHouse/ClickHouse/issues/42593): This closes [#42453](https://github.com/ClickHouse/ClickHouse/issues/42453). [#42573](https://github.com/ClickHouse/ClickHouse/pull/42573) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Add a warning message to release.py script, require release type [#41975](https://github.com/ClickHouse/ClickHouse/pull/41975) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Revert [#27787](https://github.com/ClickHouse/ClickHouse/issues/27787) [#42136](https://github.com/ClickHouse/ClickHouse/pull/42136) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). + diff --git a/docs/changelogs/v22.8.7.34-lts.md b/docs/changelogs/v22.8.7.34-lts.md new file mode 100644 index 00000000000..0dc899f4717 --- /dev/null +++ b/docs/changelogs/v22.8.7.34-lts.md @@ -0,0 +1,37 @@ +--- +sidebar_position: 1 +sidebar_label: 2022 +--- + +# 2022 Changelog + +### ClickHouse release v22.8.7.34-lts (3c38e5e8ab9) FIXME as compared to v22.8.6.71-lts (7bf38a43e30) + +#### Improvement +* Backported in [#42096](https://github.com/ClickHouse/ClickHouse/issues/42096): Replace back `clickhouse su` command with `sudo -u` in start in order to respect limits in `/etc/security/limits.conf`. [#41847](https://github.com/ClickHouse/ClickHouse/pull/41847) ([Eugene Konkov](https://github.com/ekonkov)). + +#### Bug Fix +* Backported in [#42434](https://github.com/ClickHouse/ClickHouse/issues/42434): - Choose correct aggregation method for LowCardinality with BigInt. [#42342](https://github.com/ClickHouse/ClickHouse/pull/42342) ([Duc Canh Le](https://github.com/canhld94)). + +#### Build/Testing/Packaging Improvement +* Backported in [#42296](https://github.com/ClickHouse/ClickHouse/issues/42296): Update cctz to the latest master, update tzdb to 2020e. [#42273](https://github.com/ClickHouse/ClickHouse/pull/42273) ([Dom Del Nano](https://github.com/ddelnano)). +* Backported in [#42360](https://github.com/ClickHouse/ClickHouse/issues/42360): Update tzdata to 2022e to support the new timezone changes. Palestine transitions are now Saturdays at 02:00. Simplify three Ukraine zones into one. Jordan and Syria switch from +02/+03 with DST to year-round +03. (https://data.iana.org/time-zones/tzdb/NEWS). This closes [#42252](https://github.com/ClickHouse/ClickHouse/issues/42252). [#42327](https://github.com/ClickHouse/ClickHouse/pull/42327) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### Bug Fix (user-visible misbehavior in official stable or prestable release) + +* Backported in [#42489](https://github.com/ClickHouse/ClickHouse/issues/42489): Removed skipping of mutations in unaffected partitions of `MergeTree` tables, because this feature never worked correctly and might cause resurrection of finished mutations. [#40589](https://github.com/ClickHouse/ClickHouse/pull/40589) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#42121](https://github.com/ClickHouse/ClickHouse/issues/42121): Fixed "Part ... intersects part ..." error that might happen in extremely rare cases if replica was restarted just after detaching some part as broken. [#41741](https://github.com/ClickHouse/ClickHouse/pull/41741) ([Alexander Tokmakov](https://github.com/tavplubix)). +* - Prevent crash when passing wrong aggregation states to groupBitmap*. [#41972](https://github.com/ClickHouse/ClickHouse/pull/41972) ([Raúl Marín](https://github.com/Algunenano)). +* - Fix read bytes/rows in X-ClickHouse-Summary with materialized views. [#41973](https://github.com/ClickHouse/ClickHouse/pull/41973) ([Raúl Marín](https://github.com/Algunenano)). +* Backported in [#42269](https://github.com/ClickHouse/ClickHouse/issues/42269): Fix reusing of files > 4GB from base backup. [#42146](https://github.com/ClickHouse/ClickHouse/pull/42146) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#42300](https://github.com/ClickHouse/ClickHouse/issues/42300): Fix a bug with projections and the `aggregate_functions_null_for_empty` setting. This bug is very rare and appears only if you enable the `aggregate_functions_null_for_empty` setting in the server's config. This closes [#41647](https://github.com/ClickHouse/ClickHouse/issues/41647). [#42198](https://github.com/ClickHouse/ClickHouse/pull/42198) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#42387](https://github.com/ClickHouse/ClickHouse/issues/42387): `ALTER UPDATE` of attached part (with columns different from table schema) could create an invalid `columns.txt` metadata on disk. Reading from such part could fail with errors or return invalid data. Fixes [#42161](https://github.com/ClickHouse/ClickHouse/issues/42161). [#42319](https://github.com/ClickHouse/ClickHouse/pull/42319) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#42499](https://github.com/ClickHouse/ClickHouse/issues/42499): Setting `additional_table_filters` were not applied to `Distributed` storage. Fixes [#41692](https://github.com/ClickHouse/ClickHouse/issues/41692). [#42322](https://github.com/ClickHouse/ClickHouse/pull/42322) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#42571](https://github.com/ClickHouse/ClickHouse/issues/42571): Fix buffer overflow in the processing of Decimal data types. This closes [#42451](https://github.com/ClickHouse/ClickHouse/issues/42451). [#42465](https://github.com/ClickHouse/ClickHouse/pull/42465) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#42594](https://github.com/ClickHouse/ClickHouse/issues/42594): This closes [#42453](https://github.com/ClickHouse/ClickHouse/issues/42453). [#42573](https://github.com/ClickHouse/ClickHouse/pull/42573) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Add a warning message to release.py script, require release type [#41975](https://github.com/ClickHouse/ClickHouse/pull/41975) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Revert [#27787](https://github.com/ClickHouse/ClickHouse/issues/27787) [#42136](https://github.com/ClickHouse/ClickHouse/pull/42136) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). + diff --git a/docs/changelogs/v22.8.8.3-lts.md b/docs/changelogs/v22.8.8.3-lts.md new file mode 100644 index 00000000000..deaab51fce9 --- /dev/null +++ b/docs/changelogs/v22.8.8.3-lts.md @@ -0,0 +1,13 @@ +--- +sidebar_position: 1 +sidebar_label: 2022 +--- + +# 2022 Changelog + +### ClickHouse release v22.8.8.3-lts (ac5a6cababc) FIXME as compared to v22.8.7.34-lts (3c38e5e8ab9) + +#### Bug Fix (user-visible misbehavior in official stable or prestable release) + +* Backported in [#42677](https://github.com/ClickHouse/ClickHouse/issues/42677): keeper-fix: fix race in accessing logs while snapshot is being installed. [#40627](https://github.com/ClickHouse/ClickHouse/pull/40627) ([Antonio Andelic](https://github.com/antonio2368)). + diff --git a/docs/changelogs/v22.9.4.32-stable.md b/docs/changelogs/v22.9.4.32-stable.md new file mode 100644 index 00000000000..d6c3f4ba498 --- /dev/null +++ b/docs/changelogs/v22.9.4.32-stable.md @@ -0,0 +1,33 @@ +--- +sidebar_position: 1 +sidebar_label: 2022 +--- + +# 2022 Changelog + +### ClickHouse release v22.9.4.32-stable (3db8bcf1a70) FIXME as compared to v22.9.3.18-stable (0cb4b15d2fa) + +#### Bug Fix +* Backported in [#42435](https://github.com/ClickHouse/ClickHouse/issues/42435): - Choose correct aggregation method for LowCardinality with BigInt. [#42342](https://github.com/ClickHouse/ClickHouse/pull/42342) ([Duc Canh Le](https://github.com/canhld94)). + +#### Build/Testing/Packaging Improvement +* Backported in [#42297](https://github.com/ClickHouse/ClickHouse/issues/42297): Update cctz to the latest master, update tzdb to 2020e. [#42273](https://github.com/ClickHouse/ClickHouse/pull/42273) ([Dom Del Nano](https://github.com/ddelnano)). +* Backported in [#42361](https://github.com/ClickHouse/ClickHouse/issues/42361): Update tzdata to 2022e to support the new timezone changes. Palestine transitions are now Saturdays at 02:00. Simplify three Ukraine zones into one. Jordan and Syria switch from +02/+03 with DST to year-round +03. (https://data.iana.org/time-zones/tzdb/NEWS). This closes [#42252](https://github.com/ClickHouse/ClickHouse/issues/42252). [#42327](https://github.com/ClickHouse/ClickHouse/pull/42327) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### Bug Fix (user-visible misbehavior in official stable or prestable release) + +* Backported in [#42122](https://github.com/ClickHouse/ClickHouse/issues/42122): Fixed "Part ... intersects part ..." error that might happen in extremely rare cases if replica was restarted just after detaching some part as broken. [#41741](https://github.com/ClickHouse/ClickHouse/pull/41741) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#41938](https://github.com/ClickHouse/ClickHouse/issues/41938): Don't allow to create or alter merge tree tables with virtual column name _row_exists, which is reserved for lightweight delete. Fixed [#41716](https://github.com/ClickHouse/ClickHouse/issues/41716). [#41763](https://github.com/ClickHouse/ClickHouse/pull/41763) ([Jianmei Zhang](https://github.com/zhangjmruc)). +* Backported in [#42179](https://github.com/ClickHouse/ClickHouse/issues/42179): Fix reusing of files > 4GB from base backup. [#42146](https://github.com/ClickHouse/ClickHouse/pull/42146) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#42301](https://github.com/ClickHouse/ClickHouse/issues/42301): Fix a bug with projections and the `aggregate_functions_null_for_empty` setting. This bug is very rare and appears only if you enable the `aggregate_functions_null_for_empty` setting in the server's config. This closes [#41647](https://github.com/ClickHouse/ClickHouse/issues/41647). [#42198](https://github.com/ClickHouse/ClickHouse/pull/42198) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#42388](https://github.com/ClickHouse/ClickHouse/issues/42388): `ALTER UPDATE` of attached part (with columns different from table schema) could create an invalid `columns.txt` metadata on disk. Reading from such part could fail with errors or return invalid data. Fixes [#42161](https://github.com/ClickHouse/ClickHouse/issues/42161). [#42319](https://github.com/ClickHouse/ClickHouse/pull/42319) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#42500](https://github.com/ClickHouse/ClickHouse/issues/42500): Setting `additional_table_filters` were not applied to `Distributed` storage. Fixes [#41692](https://github.com/ClickHouse/ClickHouse/issues/41692). [#42322](https://github.com/ClickHouse/ClickHouse/pull/42322) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#42581](https://github.com/ClickHouse/ClickHouse/issues/42581): This reverts [#40217](https://github.com/ClickHouse/ClickHouse/issues/40217) which introduced a regression in date/time functions. [#42367](https://github.com/ClickHouse/ClickHouse/pull/42367) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#42572](https://github.com/ClickHouse/ClickHouse/issues/42572): Fix buffer overflow in the processing of Decimal data types. This closes [#42451](https://github.com/ClickHouse/ClickHouse/issues/42451). [#42465](https://github.com/ClickHouse/ClickHouse/pull/42465) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#42595](https://github.com/ClickHouse/ClickHouse/issues/42595): This closes [#42453](https://github.com/ClickHouse/ClickHouse/issues/42453). [#42573](https://github.com/ClickHouse/ClickHouse/pull/42573) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Add a warning message to release.py script, require release type [#41975](https://github.com/ClickHouse/ClickHouse/pull/41975) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Revert [#27787](https://github.com/ClickHouse/ClickHouse/issues/27787) [#42136](https://github.com/ClickHouse/ClickHouse/pull/42136) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). + diff --git a/docs/en/development/architecture.md b/docs/en/development/architecture.md index c13b2519b84..fe644c43889 100644 --- a/docs/en/development/architecture.md +++ b/docs/en/development/architecture.md @@ -49,27 +49,13 @@ When we calculate some function over columns in a block, we add another column w Blocks are created for every processed chunk of data. Note that for the same type of calculation, the column names and types remain the same for different blocks, and only column data changes. It is better to split block data from the block header because small block sizes have a high overhead of temporary strings for copying shared_ptrs and column names. -## Block Streams {#block-streams} +## Processors -Block streams are for processing data. We use streams of blocks to read data from somewhere, perform data transformations, or write data to somewhere. `IBlockInputStream` has the `read` method to fetch the next block while available. `IBlockOutputStream` has the `write` method to push the block somewhere. - -Streams are responsible for: - -1. Reading or writing to a table. The table just returns a stream for reading or writing blocks. -2. Implementing data formats. For example, if you want to output data to a terminal in `Pretty` format, you create a block output stream where you push blocks, and it formats them. -3. Performing data transformations. Let’s say you have `IBlockInputStream` and want to create a filtered stream. You create `FilterBlockInputStream` and initialize it with your stream. Then when you pull a block from `FilterBlockInputStream`, it pulls a block from your stream, filters it, and returns the filtered block to you. Query execution pipelines are represented this way. - -There are more sophisticated transformations. For example, when you pull from `AggregatingBlockInputStream`, it reads all data from its source, aggregates it, and then returns a stream of aggregated data for you. Another example: `UnionBlockInputStream` accepts many input sources in the constructor and also a number of threads. It launches multiple threads and reads from multiple sources in parallel. - -> Block streams use the “pull” approach to control flow: when you pull a block from the first stream, it consequently pulls the required blocks from nested streams, and the entire execution pipeline will work. Neither “pull” nor “push” is the best solution, because control flow is implicit, and that limits the implementation of various features like simultaneous execution of multiple queries (merging many pipelines together). This limitation could be overcome with coroutines or just running extra threads that wait for each other. We may have more possibilities if we make control flow explicit: if we locate the logic for passing data from one calculation unit to another outside of those calculation units. Read this [article](http://journal.stuffwithstuff.com/2013/01/13/iteration-inside-and-out/) for more thoughts. - -We should note that the query execution pipeline creates temporary data at each step. We try to keep block size small enough so that temporary data fits in the CPU cache. With that assumption, writing and reading temporary data is almost free in comparison with other calculations. We could consider an alternative, which is to fuse many operations in the pipeline together. It could make the pipeline as short as possible and remove much of the temporary data, which could be an advantage, but it also has drawbacks. For example, a split pipeline makes it easy to implement caching intermediate data, stealing intermediate data from similar queries running at the same time, and merging pipelines for similar queries. +See the description at [https://github.com/ClickHouse/ClickHouse/blob/master/src/Processors/IProcessor.h](https://github.com/ClickHouse/ClickHouse/blob/master/src/Processors/IProcessor.h). ## Formats {#formats} -Data formats are implemented with block streams. There are “presentational” formats only suitable for the output of data to the client, such as `Pretty` format, which provides only `IBlockOutputStream`. And there are input/output formats, such as `TabSeparated` or `JSONEachRow`. - -There are also row streams: `IRowInputStream` and `IRowOutputStream`. They allow you to pull/push data by individual rows, not by blocks. And they are only needed to simplify the implementation of row-oriented formats. The wrappers `BlockInputStreamFromRowInputStream` and `BlockOutputStreamFromRowOutputStream` allow you to convert row-oriented streams to regular block-oriented streams. +Data formats are implemented with processors. ## I/O {#io} diff --git a/docs/en/development/browse-code.md b/docs/en/development/browse-code.md deleted file mode 100644 index 0d064cc9b0c..00000000000 --- a/docs/en/development/browse-code.md +++ /dev/null @@ -1,14 +0,0 @@ ---- -slug: /en/development/browse-code -sidebar_label: Source Code Browser -sidebar_position: 72 -description: Various ways to browse and edit the source code ---- - -# Browse ClickHouse Source Code - -You can use the **Woboq** online code browser available [here](https://clickhouse.com/codebrowser/ClickHouse/src/index.html). It provides code navigation and semantic highlighting, search and indexing. The code snapshot is updated daily. - -Also, you can browse sources on [GitHub](https://github.com/ClickHouse/ClickHouse) as usual. - -If you’re interested what IDE to use, we recommend CLion, QT Creator, VS Code and KDevelop (with caveats). You can use any favorite IDE. Vim and Emacs also count. diff --git a/docs/en/development/build.md b/docs/en/development/build.md index f397dc0d037..8982a3bc0a4 100644 --- a/docs/en/development/build.md +++ b/docs/en/development/build.md @@ -105,7 +105,7 @@ ninja Example for Fedora Rawhide: ``` bash sudo yum update -yum --nogpg install git cmake make clang-c++ python3 +sudo yum --nogpg install git cmake make clang python3 ccache git clone --recursive https://github.com/ClickHouse/ClickHouse.git mkdir build && cd build cmake ../ClickHouse diff --git a/docs/en/engines/table-engines/integrations/s3.md b/docs/en/engines/table-engines/integrations/s3.md index 986a29b8307..db983ab9c68 100644 --- a/docs/en/engines/table-engines/integrations/s3.md +++ b/docs/en/engines/table-engines/integrations/s3.md @@ -139,7 +139,7 @@ The following settings can be specified in configuration file for given endpoint - `use_environment_credentials` — If set to `true`, S3 client will try to obtain credentials from environment variables and [Amazon EC2](https://en.wikipedia.org/wiki/Amazon_Elastic_Compute_Cloud) metadata for given endpoint. Optional, default value is `false`. - `region` — Specifies S3 region name. Optional. - `use_insecure_imds_request` — If set to `true`, S3 client will use insecure IMDS request while obtaining credentials from Amazon EC2 metadata. Optional, default value is `false`. -- `header` — Adds specified HTTP header to a request to given endpoint. Optional, can be speficied multiple times. +- `header` — Adds specified HTTP header to a request to given endpoint. Optional, can be specified multiple times. - `server_side_encryption_customer_key_base64` — If specified, required headers for accessing S3 objects with SSE-C encryption will be set. Optional. - `max_single_read_retries` — The maximum number of attempts during single read. Default value is `4`. Optional. diff --git a/docs/en/engines/table-engines/mergetree-family/aggregatingmergetree.md b/docs/en/engines/table-engines/mergetree-family/aggregatingmergetree.md index ba518f51657..267e5c81dda 100644 --- a/docs/en/engines/table-engines/mergetree-family/aggregatingmergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/aggregatingmergetree.md @@ -68,36 +68,57 @@ In the results of `SELECT` query, the values of `AggregateFunction` type have im ## Example of an Aggregated Materialized View {#example-of-an-aggregated-materialized-view} -`AggregatingMergeTree` materialized view that watches the `test.visits` table: +We will create the table `test.visits` that contain the raw data: ``` sql -CREATE MATERIALIZED VIEW test.basic -ENGINE = AggregatingMergeTree() PARTITION BY toYYYYMM(StartDate) ORDER BY (CounterID, StartDate) +CREATE TABLE test.visits + ( + StartDate DateTime64 NOT NULL, + CounterID UInt64, + Sign Nullable(Int32), + UserID Nullable(Int32) +) ENGINE = MergeTree ORDER BY (StartDate, CounterID); +``` + +`AggregatingMergeTree` materialized view that watches the `test.visits` table, and use the `AggregateFunction` type: + +``` sql +CREATE MATERIALIZED VIEW test.mv_visits +( + StartDate DateTime64 NOT NULL, + CounterID UInt64, + Visits AggregateFunction(sum, Nullable(Int32)), + Users AggregateFunction(uniq, Nullable(Int32)) +) +ENGINE = AggregatingMergeTree() ORDER BY (StartDate, CounterID) AS SELECT - CounterID, StartDate, - sumState(Sign) AS Visits, + CounterID, + sumState(Sign) AS Visits, uniqState(UserID) AS Users FROM test.visits -GROUP BY CounterID, StartDate; +GROUP BY StartDate, CounterID; ``` Inserting data into the `test.visits` table. ``` sql -INSERT INTO test.visits ... +INSERT INTO test.visits (StartDate, CounterID, Sign, UserID) + VALUES (1667446031, 1, 3, 4) +INSERT INTO test.visits (StartDate, CounterID, Sign, UserID) + VALUES (1667446031, 1, 6, 3) ``` -The data are inserted in both the table and view `test.basic` that will perform the aggregation. +The data are inserted in both the table and the materialized view `test.mv_visits`. -To get the aggregated data, we need to execute a query such as `SELECT ... GROUP BY ...` from the view `test.basic`: +To get the aggregated data, we need to execute a query such as `SELECT ... GROUP BY ...` from the materialized view `test.mv_visits`: ``` sql SELECT StartDate, sumMerge(Visits) AS Visits, uniqMerge(Users) AS Users -FROM test.basic +FROM test.mv_visits GROUP BY StartDate ORDER BY StartDate; ``` diff --git a/docs/en/engines/table-engines/mergetree-family/mergetree.md b/docs/en/engines/table-engines/mergetree-family/mergetree.md index 9dc7e300d45..486baac2310 100644 --- a/docs/en/engines/table-engines/mergetree-family/mergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/mergetree.md @@ -419,6 +419,8 @@ Supported data types: `Int*`, `UInt*`, `Float*`, `Enum`, `Date`, `DateTime`, `St For `Map` data type client can specify if index should be created for keys or values using [mapKeys](../../../sql-reference/functions/tuple-map-functions.md#mapkeys) or [mapValues](../../../sql-reference/functions/tuple-map-functions.md#mapvalues) function. +There are also special-purpose and experimental indexes to support approximate nearest neighbor (ANN) queries. See [here](annindexes.md) for details. + The following functions can use the filter: [equals](../../../sql-reference/functions/comparison-functions.md), [notEquals](../../../sql-reference/functions/comparison-functions.md), [in](../../../sql-reference/functions/in-functions), [notIn](../../../sql-reference/functions/in-functions), [has](../../../sql-reference/functions/array-functions#hasarr-elem), [hasAny](../../../sql-reference/functions/array-functions#hasany), [hasAll](../../../sql-reference/functions/array-functions#hasall). Example of index creation for `Map` data type diff --git a/docs/en/getting-started/example-datasets/cell-towers.md b/docs/en/getting-started/example-datasets/cell-towers.md index 3d993c3e224..67ee8cdb7e2 100644 --- a/docs/en/getting-started/example-datasets/cell-towers.md +++ b/docs/en/getting-started/example-datasets/cell-towers.md @@ -4,25 +4,39 @@ sidebar_label: Cell Towers sidebar_position: 3 title: "Cell Towers" --- +import ConnectionDetails from '@site/docs/en/_snippets/_gather_your_details_http.mdx'; import Tabs from '@theme/Tabs'; import TabItem from '@theme/TabItem'; import CodeBlock from '@theme/CodeBlock'; import ActionsMenu from '@site/docs/en/_snippets/_service_actions_menu.md'; import SQLConsoleDetail from '@site/docs/en/_snippets/_launch_sql_console.md'; +import SupersetDocker from '@site/docs/en/_snippets/_add_superset_detail.md'; -This dataset is from [OpenCellid](https://www.opencellid.org/) - The world's largest Open Database of Cell Towers. +## Goal + +In this guide you will learn how to: +- Load the OpenCelliD data in Clickhouse +- Connect Apache Superset to ClickHouse +- Build a dashboard based on data available in the dataset + +Here is a preview of the dashboard created in this guide: + +![Dashboard of cell towers by radio type in mcc 204](@site/docs/en/getting-started/example-datasets/images/superset-cell-tower-dashboard.png) + +## Get the Dataset {#get-the-dataset} + +This dataset is from [OpenCelliD](https://www.opencellid.org/) - The world's largest Open Database of Cell Towers. As of 2021, it contains more than 40 million records about cell towers (GSM, LTE, UMTS, etc.) around the world with their geographical coordinates and metadata (country code, network, etc). OpenCelliD Project is licensed under a Creative Commons Attribution-ShareAlike 4.0 International License, and we redistribute a snapshot of this dataset under the terms of the same license. The up-to-date version of the dataset is available to download after sign in. - -## Get the Dataset {#get-the-dataset} - +### Load the sample data + ClickHouse Cloud provides an easy-button for uploading this dataset from S3. Log in to your ClickHouse Cloud organization, or create a free trial at [ClickHouse.cloud](https://clickhouse.cloud). @@ -30,13 +44,33 @@ Choose the **Cell Towers** dataset from the **Sample data** tab, and **Load data ![Load cell towers dataset](@site/docs/en/_snippets/images/cloud-load-data-sample.png) -Examine the schema of the cell_towers table: +### Examine the schema of the cell_towers table ```sql DESCRIBE TABLE cell_towers ``` +This is the output of `DESCRIBE`. Down further in this guide the field type choices will be described. +```response +┌─name──────────┬─type──────────────────────────────────────────────────────────────────┬ +│ radio │ Enum8('' = 0, 'CDMA' = 1, 'GSM' = 2, 'LTE' = 3, 'NR' = 4, 'UMTS' = 5) │ +│ mcc │ UInt16 │ +│ net │ UInt16 │ +│ area │ UInt16 │ +│ cell │ UInt64 │ +│ unit │ Int16 │ +│ lon │ Float64 │ +│ lat │ Float64 │ +│ range │ UInt32 │ +│ samples │ UInt32 │ +│ changeable │ UInt8 │ +│ created │ DateTime │ +│ updated │ DateTime │ +│ averageSignal │ UInt8 │ +└───────────────┴───────────────────────────────────────────────────────────────────────┴ +``` + @@ -86,7 +120,7 @@ clickhouse-client --query "INSERT INTO cell_towers FORMAT CSVWithNames" < cell_t -## Example queries {#examples} +## Run some example queries {#examples} 1. A number of cell towers by type: @@ -127,13 +161,13 @@ SELECT mcc, count() FROM cell_towers GROUP BY mcc ORDER BY count() DESC LIMIT 10 10 rows in set. Elapsed: 0.019 sec. Processed 43.28 million rows, 86.55 MB (2.33 billion rows/s., 4.65 GB/s.) ``` -So, the top countries are: the USA, Germany, and Russia. +Based on the above query and the [MCC list](https://en.wikipedia.org/wiki/Mobile_country_code), the countries with the most cell towers are: the USA, Germany, and Russia. You may want to create an [External Dictionary](../../sql-reference/dictionaries/external-dictionaries/external-dicts.md) in ClickHouse to decode these values. ## Use case: Incorporate geo data {#use-case} -Using `pointInPolygon` function. +Using the [`pointInPolygon`](/docs/en/sql-reference/functions/geo/coordinates.md/#pointinpolygon) function. 1. Create a table where we will store polygons: @@ -224,6 +258,110 @@ WHERE pointInPolygon((lon, lat), (SELECT * FROM moscow)) 1 rows in set. Elapsed: 0.067 sec. Processed 43.28 million rows, 692.42 MB (645.83 million rows/s., 10.33 GB/s.) ``` -The data is also available for interactive queries in the [Playground](https://play.clickhouse.com/play?user=play), [example](https://play.clickhouse.com/play?user=play#U0VMRUNUIG1jYywgY291bnQoKSBGUk9NIGNlbGxfdG93ZXJzIEdST1VQIEJZIG1jYyBPUkRFUiBCWSBjb3VudCgpIERFU0M=). +## Review of the schema -Although you cannot create temporary tables there. +Before building visualizations in Superset have a look at the columns that you will use. This dataset primarily provides the location (Longitude and Latitude) and radio types at mobile cellular towers worldwide. The column descriptions can be found in the [community forum](https://community.opencellid.org/t/documenting-the-columns-in-the-downloadable-cells-database-csv/186). The columns used in the visualizations that will be built are described below + +Here is a description of the columns taken from the OpenCelliD forum: + +| Column | Description | +|--------------|--------------------------------------------------------| +| radio | Technology generation: CDMA, GSM, UMTS, 5G NR | +| mcc | Mobile Country Code: `204` is The Netherlands | +| lon | Longitude: With Latitude, approximate tower location | +| lat | Latitude: With Longitude, approximate tower location | + +:::tip mcc +To find your MCC check [Mobile network codes](https://en.wikipedia.org/wiki/Mobile_country_code), and use the three digits in the **Mobile country code** column. +::: + +The schema for this table was designed for compact storage on disk and query speed. +- The `radio` data is stored as an `Enum8` (`UInt8`) rather than a string. +- `mcc` or Mobile country code, is stored as a `UInt16` as we know the range is 1 - 999. +- `lon` and `lat` are `Float64`. + +None of the other fields are used in the queries or visualizations in this guide, but they are described in the forum linked above if you are interested. + +## Build visualizations with Apache Superset + +Superset is easy to run from Docker. If you already have Superset running, all you need to do is add ClickHouse Connect with `pip install clickhouse-connect`. If you need to install Superset open the **Launch Apache Superset in Docker** directly below. + + + +To build a Superset dashboard using the OpenCelliD dataset you should: +- Add your ClickHouse service as a Superset **database** +- Add the table **cell_towers** as a Superset **dataset** +- Create some **charts** +- Add the charts to a **dashboard** + +### Add your ClickHouse service as a Superset database + + + + In Superset a database can be added by choosing the database type, and then providing the connection details. Open Superset and look for the **+**, it has a menu with **Data** and then **Connect database** options. + + ![Add a database](@site/docs/en/getting-started/example-datasets/images/superset-add.png) + + Choose **ClickHouse Connect** from the list: + + ![Choose clickhouse connect as database type](@site/docs/en/getting-started/example-datasets/images/superset-choose-a-database.png) + +:::note + If **ClickHouse Connect** is not one of your options, then you will need to install it. The comand is `pip install clickhouse-connect`, and more info is [available here](https://pypi.org/project/clickhouse-connect/). +::: + +#### Add your connection details: + +:::tip + Make sure that you set **SSL** on when connecting to ClickHouse Cloud or other ClickHouse systems that enforce the use of SSL. +::: + + ![Add ClickHouse as a Superset datasource](@site/docs/en/getting-started/example-datasets/images/superset-connect-a-database.png) + +### Add the table **cell_towers** as a Superset **dataset** + + In Superset a **dataset** maps to a table within a database. Click on add a dataset and choose your ClickHouse service, the database containing your table (`default`), and choose the `cell_towers` table: + +![Add cell_towers table as a dataset](@site/docs/en/getting-started/example-datasets/images/superset-add-dataset.png) + +### Create some **charts** + +When you choose to add a chart in Superset you have to specify the dataset (`cell_towers`) and the chart type. Since the OpenCelliD dataset provides longitude and latitude coordinates for cell towers we will create a **Map** chart. The **deck.gL Scatterplot** type is suited to this dataset as it works well with dense data points on a map. + +![Create a map in Superset](@site/docs/en/getting-started/example-datasets/images/superset-create-map.png) + +#### Specify the query used for the map + +A deck.gl Scatterplot requires a longitude and latitude, and one or more filters can also be applied to the query. In this example two filters are applied, one for cell towers with UMTS radios, and one for the Mobile country code assigned to The Netherlands. + +The fields `lon` and `lat` contain the longitude and latitude: + +![Specify longitude and latitude fields](@site/docs/en/getting-started/example-datasets/images/superset-lon-lat.png) + +Add a filter with `mcc` = `204` (or substitute any other `mcc` value): + +![Filter on MCC 204](@site/docs/en/getting-started/example-datasets/images/superset-mcc-204.png) + +Add a filter with `radio` = `'UMTS'` (or substitute any other `radio` value, you can see the choices in the output of `DESCRIBE TABLE cell_towers`): + +![Filter on radio = UMTS](@site/docs/en/getting-started/example-datasets/images/superset-radio-umts.png) + +This is the full configuration for the chart that filters on `radio = 'UMTS'` and `mcc = 204`: + +![Chart for UMTS radios in MCC 204](@site/docs/en/getting-started/example-datasets/images/superset-umts-netherlands.png) + +Click on **UPDATE CHART** to render the visualization. + +### Add the charts to a **dashboard** + +This screenshot shows cell tower locations with LTE, UMTS, and GSM radios. The charts are all created in the same way and they are added to a dashboard. + + ![Dashboard of cell towers by radio type in mcc 204](@site/docs/en/getting-started/example-datasets/images/superset-cell-tower-dashboard.png) + +:::tip +The data is also available for interactive queries in the [Playground](https://play.clickhouse.com/play?user=play). + +This [example](https://play.clickhouse.com/play?user=play#U0VMRUNUIG1jYywgY291bnQoKSBGUk9NIGNlbGxfdG93ZXJzIEdST1VQIEJZIG1jYyBPUkRFUiBCWSBjb3VudCgpIERFU0M=) will populate the username and even the query for you. + +Although you cannot create tables in the Playground, you can run all of the queries and even use Superset (adjust the hostname and port number). +::: diff --git a/docs/en/getting-started/example-datasets/images/superset-add-dataset.png b/docs/en/getting-started/example-datasets/images/superset-add-dataset.png new file mode 100644 index 00000000000..aaa976d76ce Binary files /dev/null and b/docs/en/getting-started/example-datasets/images/superset-add-dataset.png differ diff --git a/docs/en/getting-started/example-datasets/images/superset-add.png b/docs/en/getting-started/example-datasets/images/superset-add.png new file mode 100644 index 00000000000..54bbf11a014 Binary files /dev/null and b/docs/en/getting-started/example-datasets/images/superset-add.png differ diff --git a/docs/en/getting-started/example-datasets/images/superset-cell-tower-dashboard.png b/docs/en/getting-started/example-datasets/images/superset-cell-tower-dashboard.png new file mode 100644 index 00000000000..8197ea223c2 Binary files /dev/null and b/docs/en/getting-started/example-datasets/images/superset-cell-tower-dashboard.png differ diff --git a/docs/en/getting-started/example-datasets/images/superset-choose-a-database.png b/docs/en/getting-started/example-datasets/images/superset-choose-a-database.png new file mode 100644 index 00000000000..40c71e0a053 Binary files /dev/null and b/docs/en/getting-started/example-datasets/images/superset-choose-a-database.png differ diff --git a/docs/en/getting-started/example-datasets/images/superset-connect-a-database.png b/docs/en/getting-started/example-datasets/images/superset-connect-a-database.png new file mode 100644 index 00000000000..f67d0663063 Binary files /dev/null and b/docs/en/getting-started/example-datasets/images/superset-connect-a-database.png differ diff --git a/docs/en/getting-started/example-datasets/images/superset-create-map.png b/docs/en/getting-started/example-datasets/images/superset-create-map.png new file mode 100644 index 00000000000..5ad4395eb13 Binary files /dev/null and b/docs/en/getting-started/example-datasets/images/superset-create-map.png differ diff --git a/docs/en/getting-started/example-datasets/images/superset-lon-lat.png b/docs/en/getting-started/example-datasets/images/superset-lon-lat.png new file mode 100644 index 00000000000..f07fb899e72 Binary files /dev/null and b/docs/en/getting-started/example-datasets/images/superset-lon-lat.png differ diff --git a/docs/en/getting-started/example-datasets/images/superset-mcc-204.png b/docs/en/getting-started/example-datasets/images/superset-mcc-204.png new file mode 100644 index 00000000000..a561c539b58 Binary files /dev/null and b/docs/en/getting-started/example-datasets/images/superset-mcc-204.png differ diff --git a/docs/en/getting-started/example-datasets/images/superset-radio-umts.png b/docs/en/getting-started/example-datasets/images/superset-radio-umts.png new file mode 100644 index 00000000000..b0b31b6dbc0 Binary files /dev/null and b/docs/en/getting-started/example-datasets/images/superset-radio-umts.png differ diff --git a/docs/en/getting-started/example-datasets/images/superset-umts-netherlands.png b/docs/en/getting-started/example-datasets/images/superset-umts-netherlands.png new file mode 100644 index 00000000000..5cb887cb5c1 Binary files /dev/null and b/docs/en/getting-started/example-datasets/images/superset-umts-netherlands.png differ diff --git a/docs/en/getting-started/example-datasets/nyc-taxi.md b/docs/en/getting-started/example-datasets/nyc-taxi.md index e24fb4b01a7..69098f63037 100644 --- a/docs/en/getting-started/example-datasets/nyc-taxi.md +++ b/docs/en/getting-started/example-datasets/nyc-taxi.md @@ -33,7 +33,7 @@ CREATE TABLE trips ( tip_amount Float32, tolls_amount Float32, total_amount Float32, - payment_type Enum('CSH' = 1, 'CRE' = 2, 'NOC' = 3, 'DIS' = 4), + payment_type Enum('CSH' = 1, 'CRE' = 2, 'NOC' = 3, 'DIS' = 4, 'UNK' = 5), pickup_ntaname LowCardinality(String), dropoff_ntaname LowCardinality(String) ) @@ -63,7 +63,7 @@ SELECT payment_type, pickup_ntaname, dropoff_ntaname -FROM url( +FROM s3( 'https://datasets-documentation.s3.eu-west-3.amazonaws.com/nyc-taxi/trips_{0..2}.gz', 'TabSeparatedWithNames' ) diff --git a/docs/en/getting-started/example-datasets/recipes.md b/docs/en/getting-started/example-datasets/recipes.md index cc059f6bd26..6a003571f6e 100644 --- a/docs/en/getting-started/example-datasets/recipes.md +++ b/docs/en/getting-started/example-datasets/recipes.md @@ -4,7 +4,7 @@ sidebar_label: Recipes Dataset title: "Recipes Dataset" --- -RecipeNLG dataset is available for download [here](https://recipenlg.cs.put.poznan.pl/dataset). It contains 2.2 million recipes. The size is slightly less than 1 GB. +The RecipeNLG dataset is available for download [here](https://recipenlg.cs.put.poznan.pl/dataset). It contains 2.2 million recipes. The size is slightly less than 1 GB. ## Download and Unpack the Dataset diff --git a/docs/en/getting-started/example-datasets/uk-price-paid.md b/docs/en/getting-started/example-datasets/uk-price-paid.md index ef20c03883f..2a89bfda2e7 100644 --- a/docs/en/getting-started/example-datasets/uk-price-paid.md +++ b/docs/en/getting-started/example-datasets/uk-price-paid.md @@ -101,7 +101,7 @@ SELECT count() FROM uk_price_paid ``` -At the time this query was executed, the dataset had 27,450,499 rows. Let's see what the storage size is of the table in ClickHouse: +At the time this query was run, the dataset had 27,450,499 rows. Let's see what the storage size is of the table in ClickHouse: ```sql SELECT formatReadableSize(total_bytes) @@ -342,7 +342,7 @@ The result looks like: ## Let's Speed Up Queries Using Projections {#speedup-with-projections} -[Projections](../../sql-reference/statements/alter/projection.md) allow you to improve query speeds by storing pre-aggregated data in whatever format you want. In this example, we create a projection that keeps track of the average price, total price, and count of properties grouped by the year, district and town. At execution time, ClickHouse will use your projection if it thinks the projection can improve the performance fo the query (you don't have to do anything special to use the projection - ClickHouse decides for you when the projection will be useful). +[Projections](../../sql-reference/statements/alter/projection.md) allow you to improve query speeds by storing pre-aggregated data in whatever format you want. In this example, we create a projection that keeps track of the average price, total price, and count of properties grouped by the year, district and town. At query time, ClickHouse will use your projection if it thinks the projection can improve the performance of the query (you don't have to do anything special to use the projection - ClickHouse decides for you when the projection will be useful). ### Build a Projection {#build-projection} diff --git a/docs/en/getting-started/install.md b/docs/en/getting-started/install.md index 61303eddab9..e88e9e06a68 100644 --- a/docs/en/getting-started/install.md +++ b/docs/en/getting-started/install.md @@ -128,6 +128,24 @@ clickhouse-client # or "clickhouse-client --password" if you set up a password. +
+Migration Method for installing the deb-packages + +```bash +sudo apt-key del E0C56BD4 +sudo apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv 8919F6BD2B48D754 +echo "deb https://packages.clickhouse.com/deb stable main" | sudo tee \ + /etc/apt/sources.list.d/clickhouse.list +sudo apt-get update + +sudo apt-get install -y clickhouse-server clickhouse-client + +sudo service clickhouse-server start +clickhouse-client # or "clickhouse-client --password" if you set up a password. +``` + +
+ You can replace `stable` with `lts` to use different [release kinds](/docs/en/faq/operations/production.md) based on your needs. You can also download and install packages manually from [here](https://packages.clickhouse.com/deb/pool/main/c/). diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md index 9b8354f23a2..58e986cc2f3 100644 --- a/docs/en/interfaces/formats.md +++ b/docs/en/interfaces/formats.md @@ -1020,6 +1020,62 @@ Example: } ``` +To use object name as column value you can use special setting [format_json_object_each_row_column_for_object_name](../operations/settings/settings.md#format_json_object_each_row_column_for_object_name). Value of this setting is set to the name of a column, that is used as JSON key for a row in resulting object. +Examples: + +For output: + +Let's say we have table `test` with two columns: +``` +┌─object_name─┬─number─┐ +│ first_obj │ 1 │ +│ second_obj │ 2 │ +│ third_obj │ 3 │ +└─────────────┴────────┘ +``` +Let's output it in `JSONObjectEachRow` format and use `format_json_object_each_row_column_for_object_name` setting: + +```sql +select * from test settings format_json_object_each_row_column_for_object_name='object_name' +``` + +The output: +```json +{ + "first_obj": {"number": 1}, + "second_obj": {"number": 2}, + "third_obj": {"number": 3} +} +``` + +For input: + +Let's say we stored output from previous example in a file with name `data.json`: +```sql +select * from file('data.json', JSONObjectEachRow, 'object_name String, number UInt64') settings format_json_object_each_row_column_for_object_name='object_name' +``` + +``` +┌─object_name─┬─number─┐ +│ first_obj │ 1 │ +│ second_obj │ 2 │ +│ third_obj │ 3 │ +└─────────────┴────────┘ +``` + +It also works in schema inference: + +```sql +desc file('data.json', JSONObjectEachRow) settings format_json_object_each_row_column_for_object_name='object_name' +``` + +``` +┌─name────────┬─type────────────┐ +│ object_name │ String │ +│ number │ Nullable(Int64) │ +└─────────────┴─────────────────┘ +``` + ### Inserting Data {#json-inserting-data} diff --git a/docs/en/interfaces/third-party/client-libraries.md b/docs/en/interfaces/third-party/client-libraries.md index e085566aa7e..c26532c98cb 100644 --- a/docs/en/interfaces/third-party/client-libraries.md +++ b/docs/en/interfaces/third-party/client-libraries.md @@ -41,6 +41,7 @@ ClickHouse Inc does **not** maintain the libraries listed below and hasn’t don - [node-clickhouse](https://github.com/apla/node-clickhouse) - [nestjs-clickhouse](https://github.com/depyronick/nestjs-clickhouse) - [clickhouse-client](https://github.com/depyronick/clickhouse-client) + - [node-clickhouse-orm](https://github.com/zimv/node-clickhouse-orm) - Perl - [perl-DBD-ClickHouse](https://github.com/elcamlost/perl-DBD-ClickHouse) - [HTTP-ClickHouse](https://metacpan.org/release/HTTP-ClickHouse) diff --git a/docs/en/operations/backup.md b/docs/en/operations/_backup.md similarity index 61% rename from docs/en/operations/backup.md rename to docs/en/operations/_backup.md index d26d8f27820..d694c51cee6 100644 --- a/docs/en/operations/backup.md +++ b/docs/en/operations/_backup.md @@ -1,9 +1,12 @@ ---- -slug: /en/operations/backup -sidebar_position: 49 -sidebar_label: Data backup and restore -title: Data backup and restore ---- + +[//]: # (This file is included in Manage > Backups) + +- [Backup to a local disk](#backup-to-a-local-disk) +- [Configuring backup/restore to use an S3 endpoint](#configuring-backuprestore-to-use-an-s3-endpoint) +- [Backup/restore using an S3 disk](#backuprestore-using-an-s3-disk) +- [Alternatives](#alternatives) + +## Background While [replication](../engines/table-engines/mergetree-family/replication.md) provides protection from hardware failures, it does not protect against human errors: accidental deletion of data, deletion of the wrong table or a table on the wrong cluster, and software bugs that result in incorrect data processing or data corruption. In many cases mistakes like these will affect all replicas. ClickHouse has built-in safeguards to prevent some types of mistakes — for example, by default [you can’t just drop tables with a MergeTree-like engine containing more than 50 Gb of data](server-configuration-parameters/settings.md#max-table-size-to-drop). However, these safeguards do not cover all possible cases and can be circumvented. @@ -15,7 +18,9 @@ Each company has different resources available and business requirements, so the Keep in mind that if you backed something up and never tried to restore it, chances are that restore will not work properly when you actually need it (or at least it will take longer than business can tolerate). So whatever backup approach you choose, make sure to automate the restore process as well, and practice it on a spare ClickHouse cluster regularly. ::: -## Configure a backup destination +## Backup to a local disk + +### Configure a backup destination In the examples below you will see the backup destination specified like `Disk('backups', '1.zip')`. To prepare the destination add a file to `/etc/clickhouse-server/config.d/backup_disk.xml` specifying the backup destination. For example, this file defines disk named `backups` and then adds that disk to the **backups > allowed_disk** list: @@ -39,7 +44,7 @@ In the examples below you will see the backup destination specified like `Disk(' ``` -## Parameters +### Parameters Backups can be either full or incremental, and can include tables (including materialized views, projections, and dictionaries), and databases. Backups can be synchronous (default) or asynchronous. They can be compressed. Backups can be password protected. @@ -52,7 +57,7 @@ The BACKUP and RESTORE statements take a list of DATABASE and TABLE names, a des - `password` for the file on disk - `base_backup`: the destination of the previous backup of this source. For example, `Disk('backups', '1.zip')` -## Usage examples +### Usage examples Backup and then restore a table: ``` @@ -81,7 +86,7 @@ RESTORE TABLE test.table AS test.table2 FROM Disk('backups', '1.zip') BACKUP TABLE test.table3 AS test.table4 TO Disk('backups', '2.zip') ``` -## Incremental backups +### Incremental backups Incremental backups can be taken by specifying the `base_backup`. :::note @@ -100,7 +105,7 @@ RESTORE TABLE test.table AS test.table2 FROM Disk('backups', 'incremental-a.zip'); ``` -## Assign a password to the backup +### Assign a password to the backup Backups written to disk can have a password applied to the file: ``` @@ -116,7 +121,7 @@ RESTORE TABLE test.table SETTINGS password='qwerty' ``` -## Compression settings +### Compression settings If you would like to specify the compression method or level: ``` @@ -125,14 +130,14 @@ BACKUP TABLE test.table SETTINGS compression_method='lzma', compression_level=3 ``` -## Restore specific partitions +### Restore specific partitions If specific partitions associated with a table need to be restored these can be specified. To restore partitions 1 and 4 from backup: ``` RESTORE TABLE test.table PARTITIONS '2', '3' FROM Disk('backups', 'filename.zip') ``` -## Check the status of backups +### Check the status of backups The backup command returns an `id` and `status`, and that `id` can be used to get the status of the backup. This is very useful to check the progress of long ASYNC backups. The example below shows a failure that happened when trying to overwrite an existing backup file: ```sql @@ -171,6 +176,160 @@ end_time: 2022-08-30 09:21:46 1 row in set. Elapsed: 0.002 sec. ``` +## Configuring BACKUP/RESTORE to use an S3 Endpoint + +To write backups to an S3 bucket you need three pieces of information: +- S3 endpoint, + for example `https://mars-doc-test.s3.amazonaws.com/backup-S3/` +- Access key ID, + for example `ABC123` +- Secret access key, + for example `Abc+123` + +:::note +Creating an S3 bucket is covered in [Use S3 Object Storage as a ClickHouse disk](/docs/en/integrations/data-ingestion/s3/configuring-s3-for-clickhouse-use.md), just come back to this doc after saving the policy, there is no need to configure ClickHouse to use the S3 bucket. +::: + +The destination for a backup will be specified like this: +``` +S3('/', '', ') +``` + +```sql +CREATE TABLE data +( + `key` Int, + `value` String, + `array` Array(String) +) +ENGINE = MergeTree +ORDER BY tuple() +``` + +```sql +INSERT INTO data SELECT * +FROM generateRandom('key Int, value String, array Array(String)') +LIMIT 1000 +``` + +### Create a base (initial) backup + +Incremental backups require a _base_ backup to start from, this example will be used +later as the base backup. The first parameter of the S3 destination is the S3 endpoint followed by the directory within the bucket to use for this backup. In this example the directory is named `my_backup`. + +```sql +BACKUP TABLE data TO S3('https://mars-doc-test.s3.amazonaws.com/backup-S3/my_backup', 'ABC123', 'Abc+123') +``` + +```response +┌─id───────────────────────────────────┬─status─────────┐ +│ de442b75-a66c-4a3c-a193-f76f278c70f3 │ BACKUP_CREATED │ +└──────────────────────────────────────┴────────────────┘ +``` + +### Add more data + +Incremental backups are populated with the difference between the base backup and the current content of the table being backed up. Add more data before taking the incremental backup: + +```sql +INSERT INTO data SELECT * +FROM generateRandom('key Int, value String, array Array(String)') +LIMIT 100 +``` +### Take an incremental backup + +This backup command is similar to the base backup, but adds `SETTINGS base_backup` and the location of the base backup. Note that the destination for the incremental backup is not the same directory as the base, it is the same endpoint with a different target directory within the bucket. The base backup is in `my_backup`, and the incremental will be written to `my_incremental`: +```sql +BACKUP TABLE data TO S3('https://mars-doc-test.s3.amazonaws.com/backup-S3/my_incremental', 'ABC123', 'Abc+123') SETTINGS base_backup = S3('https://mars-doc-test.s3.amazonaws.com/backup-S3/my_backup', 'ABC123', 'Abc+123') +``` + +```response +┌─id───────────────────────────────────┬─status─────────┐ +│ f6cd3900-850f-41c9-94f1-0c4df33ea528 │ BACKUP_CREATED │ +└──────────────────────────────────────┴────────────────┘ +``` +### Restore from the incremental backup + +This command restores the incremental backup into a new table, `data3`. Note that when an incremental backup is restored, the base backup is also included. Specify only the incremental backup when restoring: +```sql +RESTORE TABLE data AS data3 FROM S3('https://mars-doc-test.s3.amazonaws.com/backup-S3/my_incremental', 'ABC123', 'Abc+123') +``` + +```response +┌─id───────────────────────────────────┬─status───┐ +│ ff0c8c39-7dff-4324-a241-000796de11ca │ RESTORED │ +└──────────────────────────────────────┴──────────┘ +``` + +### Verify the count + +There were two inserts into the original table `data`, one with 1,000 rows and one with 100 rows, for a total of 1,100. Verify that the restored table has 1,100 rows: +```sql +SELECT count() +FROM data3 +``` +```response +┌─count()─┐ +│ 1100 │ +└─────────┘ +``` + +### Verify the content +This compares the content of the original table, `data` with the restored table `data3`: +```sql +SELECT throwIf(( + SELECT groupArray(tuple(*)) + FROM data + ) != ( + SELECT groupArray(tuple(*)) + FROM data3 + ), 'Data does not match after BACKUP/RESTORE') +``` +## BACKUP/RESTORE Using an S3 Disk + +It is also possible to `BACKUP`/`RESTORE` to S3 by configuring an S3 disk in the ClickHouse storage configuration. Configure the disk like this by adding a file to `/etc/clickhouse-server/config.d`: + +```xml + + + + + s3_plain + + + + + + + + +
+ s3 +
+
+
+
+
+ + + s3_plain + +
+``` + +And then `BACKUP`/`RESTORE` as usual: + +```sql +BACKUP TABLE data TO Disk('s3_plain', 'cloud_backup'); +RESTORE TABLE data AS data_restored FROM Disk('s3_plain', 'cloud_backup'); +``` + +:::note +But keep in mind that: +- This disk should not be used for `MergeTree` itself, only for `BACKUP`/`RESTORE` +- It has excessive API calls +::: + ## Alternatives ClickHouse stores data on disk, and there are many ways to backup disks. These are some alternatives that have been used in the past, and that may fit in well in your environment. diff --git a/docs/en/operations/update.md b/docs/en/operations/_update.md similarity index 88% rename from docs/en/operations/update.md rename to docs/en/operations/_update.md index 24f7efecc7b..86981da2be6 100644 --- a/docs/en/operations/update.md +++ b/docs/en/operations/_update.md @@ -1,10 +1,7 @@ ---- -slug: /en/operations/update -sidebar_position: 47 -sidebar_label: ClickHouse Upgrade ---- -# ClickHouse Upgrade +[//]: # (This file is included in Manage > Updates) + +## Self-managed ClickHouse Upgrade If ClickHouse was installed from `deb` packages, execute the following commands on the server: diff --git a/docs/en/operations/clickhouse-keeper.md b/docs/en/operations/clickhouse-keeper.md index 82fa5c114ea..0324f742988 100644 --- a/docs/en/operations/clickhouse-keeper.md +++ b/docs/en/operations/clickhouse-keeper.md @@ -126,7 +126,7 @@ clickhouse keeper --config /etc/your_path_to_config/config.xml ClickHouse Keeper also provides 4lw commands which are almost the same with Zookeeper. Each command is composed of four letters such as `mntr`, `stat` etc. There are some more interesting commands: `stat` gives some general information about the server and connected clients, while `srvr` and `cons` give extended details on server and connections respectively. -The 4lw commands has a white list configuration `four_letter_word_white_list` which has default value `conf,cons,crst,envi,ruok,srst,srvr,stat,wchc,wchs,dirs,mntr,isro`. +The 4lw commands has a white list configuration `four_letter_word_white_list` which has default value `conf,cons,crst,envi,ruok,srst,srvr,stat,wchs,dirs,mntr,isro,rcvr,apiv,csnp,lgif`. You can issue the commands to ClickHouse Keeper via telnet or nc, at the client port. @@ -309,7 +309,26 @@ Sessions with Ephemerals (1): /clickhouse/task_queue/ddl ``` -## [experimental] Migration from ZooKeeper {#migration-from-zookeeper} +- `csnp`: Schedule a snapshot creation task. Return the last committed log index of the scheduled snapshot if success or `Failed to schedule snapshot creation task.` if failed. Note that `lgif` command can help you determine whether the snapshot is done. + +``` +100 +``` + +- `lgif`: Keeper log information. `first_log_idx` : my first log index in log store; `first_log_term` : my first log term; `last_log_idx` : my last log index in log store; `last_log_term` : my last log term; `last_committed_log_idx` : my last committed log index in state machine; `leader_committed_log_idx` : leader's committed log index from my perspective; `target_committed_log_idx` : target log index should be committed to; `last_snapshot_idx` : the largest committed log index in last snapshot. + +``` +first_log_idx 1 +first_log_term 1 +last_log_idx 101 +last_log_term 1 +last_committed_log_idx 100 +leader_committed_log_idx 101 +target_committed_log_idx 101 +last_snapshot_idx 50 +``` + +## Migration from ZooKeeper {#migration-from-zookeeper} Seamlessly migration from ZooKeeper to ClickHouse Keeper is impossible you have to stop your ZooKeeper cluster, convert data and start ClickHouse Keeper. `clickhouse-keeper-converter` tool allows converting ZooKeeper logs and snapshots to ClickHouse Keeper snapshot. It works only with ZooKeeper > 3.4. Steps for migration: diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index efdce2d4a88..7f7c14817ba 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -2939,7 +2939,7 @@ Possible values: - 0 — Projection optimization disabled. - 1 — Projection optimization enabled. -Default value: `0`. +Default value: `1`. ## force_optimize_projection {#force-optimize-projection} @@ -3902,6 +3902,13 @@ Controls validation of UTF-8 sequences in JSON output formats, doesn't impact fo Disabled by default. +### format_json_object_each_row_column_for_object_name {#format_json_object_each_row_column_for_object_name} + +The name of column that will be used for storing/writing object names in [JSONObjectEachRow](../../interfaces/formats.md#jsonobjecteachrow) format. +Column type should be String. If value is empty, default names `row_{i}`will be used for object names. + +Default value: ''. + ## TSV format settings {#tsv-format-settings} ### input_format_tsv_empty_as_default {#input_format_tsv_empty_as_default} diff --git a/docs/en/operations/system-tables/information_schema.md b/docs/en/operations/system-tables/information_schema.md index a573491282a..a8e516f02a3 100644 --- a/docs/en/operations/system-tables/information_schema.md +++ b/docs/en/operations/system-tables/information_schema.md @@ -178,7 +178,7 @@ Columns: - `view_definition` ([String](../../sql-reference/data-types/string.md)) — `SELECT` query for view. - `check_option` ([String](../../sql-reference/data-types/string.md)) — `NONE`, no checking. - `is_updatable` ([Enum8](../../sql-reference/data-types/enum.md)) — `NO`, the view is not updated. -- `is_insertable_into` ([Enum8](../../sql-reference/data-types/enum.md)) — Shows whether the created view is [materialized](../../sql-reference/statements/create/view/#materialized). Possible values: +- `is_insertable_into` ([Enum8](../../sql-reference/data-types/enum.md)) — Shows whether the created view is [materialized](../../sql-reference/statements/create/view.md/#materialized-view). Possible values: - `NO` — The created view is not materialized. - `YES` — The created view is materialized. - `is_trigger_updatable` ([Enum8](../../sql-reference/data-types/enum.md)) — `NO`, the trigger is not updated. diff --git a/docs/en/operations/system-tables/replicated_fetches.md b/docs/en/operations/system-tables/replicated_fetches.md index 3536bbaff4d..74888fd2f13 100644 --- a/docs/en/operations/system-tables/replicated_fetches.md +++ b/docs/en/operations/system-tables/replicated_fetches.md @@ -68,6 +68,5 @@ thread_id: 54 **See Also** -- [Managing ReplicatedMergeTree Tables](../../sql-reference/statements/system/#query-language-system-replicated) +- [Managing ReplicatedMergeTree Tables](../../sql-reference/statements/system.md/#managing-replicatedmergetree-tables) -[Original article](https://clickhouse.com/docs/en/operations/system_tables/replicated_fetches) diff --git a/docs/en/operations/system-tables/session_log.md b/docs/en/operations/system-tables/session_log.md index 79c8ea184ce..cdf86b57ef6 100644 --- a/docs/en/operations/system-tables/session_log.md +++ b/docs/en/operations/system-tables/session_log.md @@ -24,6 +24,7 @@ Columns: - `DOUBLE_SHA1_PASSWORD` - `LDAP` - `KERBEROS` + - `SSL_CERTIFICATE` - `profiles` ([Array](../../sql-reference/data-types/array.md)([LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md))) — The list of profiles set for all roles and/or users. - `roles` ([Array](../../sql-reference/data-types/array.md)([LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md))) — The list of roles to which the profile is applied. - `settings` ([Array](../../sql-reference/data-types/array.md)([Tuple](../../sql-reference/data-types/tuple.md)([LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md), [String](../../sql-reference/data-types/string.md)))) — Settings that were changed when the client logged in/out. diff --git a/docs/en/operations/system-tables/users.md b/docs/en/operations/system-tables/users.md index eaeabab131b..6ef9b7b18a4 100644 --- a/docs/en/operations/system-tables/users.md +++ b/docs/en/operations/system-tables/users.md @@ -12,7 +12,7 @@ Columns: - `storage` ([String](../../sql-reference/data-types/string.md)) — Path to the storage of users. Configured in the `access_control_path` parameter. -- `auth_type` ([Enum8](../../sql-reference/data-types/enum.md)('no_password' = 0,'plaintext_password' = 1, 'sha256_password' = 2, 'double_sha1_password' = 3)) — Shows the authentication type. There are multiple ways of user identification: with no password, with plain text password, with [SHA256](https://ru.wikipedia.org/wiki/SHA-2)-encoded password or with [double SHA-1](https://ru.wikipedia.org/wiki/SHA-1)-encoded password. +- `auth_type` ([Enum8](../../sql-reference/data-types/enum.md)('no_password' = 0,'plaintext_password' = 1, 'sha256_password' = 2, 'double_sha1_password' = 3, 'ldap' = 4, 'kerberos' = 5, 'ssl_certificate' = 6)) — Shows the authentication type. There are multiple ways of user identification: with no password, with plain text password, with [SHA256](https://ru.wikipedia.org/wiki/SHA-2)-encoded password or with [double SHA-1](https://ru.wikipedia.org/wiki/SHA-1)-encoded password. - `auth_params` ([String](../../sql-reference/data-types/string.md)) — Authentication parameters in the JSON format depending on the `auth_type`. diff --git a/docs/en/operations/troubleshooting.md b/docs/en/operations/troubleshooting.md index 93bd56087a2..ad92e773ea3 100644 --- a/docs/en/operations/troubleshooting.md +++ b/docs/en/operations/troubleshooting.md @@ -17,6 +17,49 @@ title: Troubleshooting - Check firewall settings. - If you cannot access the repository for any reason, download packages as described in the [install guide](../getting-started/install.md) article and install them manually using the `sudo dpkg -i ` command. You will also need the `tzdata` package. +### You Cannot Update Deb Packages from ClickHouse Repository with Apt-get {#you-cannot-update-deb-packages-from-clickhouse-repository-with-apt-get} + +- The issue may be happened when the GPG key is changed. + +Please use the following scripts to resolve the issue: + +```bash +sudo apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv 8919F6BD2B48D754 +sudo apt-get update +``` + +### You Get Different Warnings with `apt-get update` {#you-get-different-warnings-with-apt-get-update} + +- The completed warning messages are as one of following: + +``` +N: Skipping acquire of configured file 'main/binary-i386/Packages' as repository 'https://packages.clickhouse.com/deb stable InRelease' doesn't support architecture 'i386' +``` + +``` +E: Failed to fetch https://packages.clickhouse.com/deb/dists/stable/main/binary-amd64/Packages.gz File has unexpected size (30451 != 28154). Mirror sync in progress? +``` + +``` +E: Repository 'https://packages.clickhouse.com/deb stable InRelease' changed its 'Origin' value from 'Artifactory' to 'ClickHouse' +E: Repository 'https://packages.clickhouse.com/deb stable InRelease' changed its 'Label' value from 'Artifactory' to 'ClickHouse' +N: Repository 'https://packages.clickhouse.com/deb stable InRelease' changed its 'Suite' value from 'stable' to '' +N: This must be accepted explicitly before updates for this repository can be applied. See apt-secure(8) manpage for details. +``` + +``` +Err:11 https://packages.clickhouse.com/deb stable InRelease + 400 Bad Request [IP: 172.66.40.249 443] +``` + +To resolve the above issue, please use the following script: + +```bash +sudo rm /var/lib/apt/lists/packages.clickhouse.com_* /var/lib/dpkg/arch /var/lib/apt/lists/partial/packages.clickhouse.com_* +sudo apt-get clean +sudo apt-get autoclean +``` + ## Connecting to the Server {#troubleshooting-accepts-no-connections} Possible issues: diff --git a/docs/en/operations/utilities/clickhouse-benchmark.md b/docs/en/operations/utilities/clickhouse-benchmark.md index 1a250ea5481..faa7ac75c74 100644 --- a/docs/en/operations/utilities/clickhouse-benchmark.md +++ b/docs/en/operations/utilities/clickhouse-benchmark.md @@ -109,56 +109,38 @@ In the report you can find: `clickhouse-benchmark` can compare performances for two running ClickHouse servers. -To use the comparison mode, specify endpoints of both servers by two pairs of `--host`, `--port` keys. Keys matched together by position in arguments list, the first `--host` is matched with the first `--port` and so on. `clickhouse-benchmark` establishes connections to both servers, then sends queries. Each query addressed to a randomly selected server. The results are shown for each server separately. +To use the comparison mode, specify endpoints of both servers by two pairs of `--host`, `--port` keys. Keys matched together by position in arguments list, the first `--host` is matched with the first `--port` and so on. `clickhouse-benchmark` establishes connections to both servers, then sends queries. Each query addressed to a randomly selected server. The results are shown in a table. ## Example {#clickhouse-benchmark-example} ``` bash -$ echo "SELECT * FROM system.numbers LIMIT 10000000 OFFSET 10000000" | clickhouse-benchmark -i 10 +$ echo "SELECT * FROM system.numbers LIMIT 10000000 OFFSET 10000000" | clickhouse-benchmark --host=localhost --port=9001 --host=localhost --port=9000 -i 10 ``` ``` text Loaded 1 queries. -Queries executed: 6. +Queries executed: 5. -localhost:9000, queries 6, QPS: 6.153, RPS: 123398340.957, MiB/s: 941.455, result RPS: 61532982.200, result MiB/s: 469.459. +localhost:9001, queries 2, QPS: 3.764, RPS: 75446929.370, MiB/s: 575.614, result RPS: 37639659.982, result MiB/s: 287.168. +localhost:9000, queries 3, QPS: 3.815, RPS: 76466659.385, MiB/s: 583.394, result RPS: 38148392.297, result MiB/s: 291.049. -0.000% 0.159 sec. -10.000% 0.159 sec. -20.000% 0.159 sec. -30.000% 0.160 sec. -40.000% 0.160 sec. -50.000% 0.162 sec. -60.000% 0.164 sec. -70.000% 0.165 sec. -80.000% 0.166 sec. -90.000% 0.166 sec. -95.000% 0.167 sec. -99.000% 0.167 sec. -99.900% 0.167 sec. -99.990% 0.167 sec. +0.000% 0.258 sec. 0.250 sec. +10.000% 0.258 sec. 0.250 sec. +20.000% 0.258 sec. 0.250 sec. +30.000% 0.258 sec. 0.267 sec. +40.000% 0.258 sec. 0.267 sec. +50.000% 0.273 sec. 0.267 sec. +60.000% 0.273 sec. 0.267 sec. +70.000% 0.273 sec. 0.267 sec. +80.000% 0.273 sec. 0.269 sec. +90.000% 0.273 sec. 0.269 sec. +95.000% 0.273 sec. 0.269 sec. +99.000% 0.273 sec. 0.269 sec. +99.900% 0.273 sec. 0.269 sec. +99.990% 0.273 sec. 0.269 sec. - - -Queries executed: 10. - -localhost:9000, queries 10, QPS: 6.082, RPS: 121959604.568, MiB/s: 930.478, result RPS: 60815551.642, result MiB/s: 463.986. - -0.000% 0.159 sec. -10.000% 0.159 sec. -20.000% 0.160 sec. -30.000% 0.163 sec. -40.000% 0.164 sec. -50.000% 0.165 sec. -60.000% 0.166 sec. -70.000% 0.166 sec. -80.000% 0.167 sec. -90.000% 0.167 sec. -95.000% 0.170 sec. -99.000% 0.172 sec. -99.900% 0.172 sec. -99.990% 0.172 sec. +No difference proven at 99.5% confidence ``` [Original article](https://clickhouse.com/docs/en/operations/utilities/clickhouse-benchmark.md) diff --git a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md index 198ff12f1d6..02a4ad57a3b 100644 --- a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md +++ b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md @@ -303,17 +303,25 @@ or CREATE DICTIONARY somedict ( id UInt64, first Date, - last Date + last Date, + advertiser_id UInt64 ) PRIMARY KEY id +SOURCE(CLICKHOUSE(TABLE 'date_table')) +LIFETIME(MIN 1 MAX 1000) LAYOUT(RANGE_HASHED()) RANGE(MIN first MAX last) ``` -To work with these dictionaries, you need to pass an additional argument to the `dictGetT` function, for which a range is selected: +To work with these dictionaries, you need to pass an additional argument to the `dictGet` function, for which a range is selected: ``` sql -dictGetT('dict_name', 'attr_name', id, date) +dictGet('dict_name', 'attr_name', id, date) +``` +Query example: + +``` sql +SELECT dictGet('somedict', 'advertiser_id', 1, '2022-10-20 23:20:10.000'::DateTime64::UInt64); ``` This function returns the value for the specified `id`s and the date range that includes the passed date. diff --git a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-polygon.md b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-polygon.md index 912af5b5bce..e5ee48c9166 100644 --- a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-polygon.md +++ b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-polygon.md @@ -14,8 +14,10 @@ Example of a polygon dictionary configuration: - key - Array(Array(Array(Array(Float64)))) + + key + Array(Array(Array(Array(Float64)))) + diff --git a/docs/en/sql-reference/functions/date-time-functions.md b/docs/en/sql-reference/functions/date-time-functions.md index 76f66db924f..f7ea2690b21 100644 --- a/docs/en/sql-reference/functions/date-time-functions.md +++ b/docs/en/sql-reference/functions/date-time-functions.md @@ -271,11 +271,7 @@ Result: The return type of `toStartOf*`, `toLastDayOfMonth`, `toMonday`, `timeSlot` functions described below is determined by the configuration parameter [enable_extended_results_for_datetime_functions](../../operations/settings/settings#enable-extended-results-for-datetime-functions) which is `0` by default. Behavior for -* `enable_extended_results_for_datetime_functions = 0`: Functions `toStartOfYear`, `toStartOfISOYear`, `toStartOfQuarter`, `toStartOfMonth`, `toStartOfWeek`, `toLastDayOfMonth`, `toMonday` return `Date` or `DateTime`. Functions `toStartOfDay`, `toStartOfHour`, `toStartOfFifteenMinutes`, `toStartOfTenMinutes`, `toStartOfFiveMinutes`, `toStartOfMinute`, `timeSlot` return `DateTime`. Though these functions can take values of the extended types `Date32` and `DateTime64` as an argument, passing them a time outside the normal range (year 1970 to 2149 for `Date` / 2106 for `DateTime`) will produce wrong results. In case argument is out of normal range: - * If the argument is smaller than 1970, the result will be calculated from the argument `1970-01-01 (00:00:00)` instead. - * If the return type is `DateTime` and the argument is larger than `2106-02-07 08:28:15`, the result will be calculated from the argument `2106-02-07 08:28:15` instead. - * If the return type is `Date` and the argument is larger than `2149-06-06`, the result will be calculated from the argument `2149-06-06` instead. - * If `toLastDayOfMonth` is called with an argument greater then `2149-05-31`, the result will be calculated from the argument `2149-05-31` instead. +* `enable_extended_results_for_datetime_functions = 0`: Functions `toStartOfYear`, `toStartOfISOYear`, `toStartOfQuarter`, `toStartOfMonth`, `toStartOfWeek`, `toLastDayOfMonth`, `toMonday` return `Date` or `DateTime`. Functions `toStartOfDay`, `toStartOfHour`, `toStartOfFifteenMinutes`, `toStartOfTenMinutes`, `toStartOfFiveMinutes`, `toStartOfMinute`, `timeSlot` return `DateTime`. Though these functions can take values of the extended types `Date32` and `DateTime64` as an argument, passing them a time outside the normal range (year 1970 to 2149 for `Date` / 2106 for `DateTime`) will produce wrong results. * `enable_extended_results_for_datetime_functions = 1`: * Functions `toStartOfYear`, `toStartOfISOYear`, `toStartOfQuarter`, `toStartOfMonth`, `toStartOfWeek`, `toLastDayOfMonth`, `toMonday` return `Date` or `DateTime` if their argument is a `Date` or `DateTime`, and they return `Date32` or `DateTime64` if their argument is a `Date32` or `DateTime64`. * Functions `toStartOfDay`, `toStartOfHour`, `toStartOfFifteenMinutes`, `toStartOfTenMinutes`, `toStartOfFiveMinutes`, `toStartOfMinute`, `timeSlot` return `DateTime` if their argument is a `Date` or `DateTime`, and they return `DateTime64` if their argument is a `Date32` or `DateTime64`. @@ -302,25 +298,22 @@ Returns the date. Rounds down a date or date with time to the first day of the month. Returns the date. -## toLastDayOfMonth - -Rounds up a date or date with time to the last day of the month. -Returns the date. +:::note +The behavior of parsing incorrect dates is implementation specific. ClickHouse may return zero date, throw an exception or do “natural” overflow. +::: If `toLastDayOfMonth` is called with an argument of type `Date` greater then 2149-05-31, the result will be calculated from the argument 2149-05-31 instead. ## toMonday Rounds down a date or date with time to the nearest Monday. -As a special case, date arguments `1970-01-01`, `1970-01-02`, `1970-01-03` and `1970-01-04` return date `1970-01-01`. Returns the date. ## toStartOfWeek(t\[,mode\]) Rounds down a date or date with time to the nearest Sunday or Monday by mode. Returns the date. -As a special case, date arguments `1970-01-01`, `1970-01-02`, `1970-01-03` and `1970-01-04` (and `1970-01-05` if `mode` is `1`) return date `1970-01-01`. -The `mode` argument works exactly like the mode argument to toWeek(). For the single-argument syntax, a mode value of 0 is used. +The mode argument works exactly like the mode argument to toWeek(). For the single-argument syntax, a mode value of 0 is used. ## toStartOfDay @@ -671,9 +664,9 @@ Aliases: `dateDiff`, `DATE_DIFF`. - `quarter` - `year` -- `startdate` — The first time value to subtract (the subtrahend). [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md). +- `startdate` — The first time value to subtract (the subtrahend). [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). -- `enddate` — The second time value to subtract from (the minuend). [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md). +- `enddate` — The second time value to subtract from (the minuend). [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). - `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) (optional). If specified, it is applied to both `startdate` and `enddate`. If not specified, timezones of `startdate` and `enddate` are used. If they are not the same, the result is unspecified. [String](../../sql-reference/data-types/string.md). @@ -1163,7 +1156,7 @@ dateName(date_part, date) **Arguments** - `date_part` — Date part. Possible values: 'year', 'quarter', 'month', 'week', 'dayofyear', 'day', 'weekday', 'hour', 'minute', 'second'. [String](../../sql-reference/data-types/string.md). -- `date` — Date. [Date](../../sql-reference/data-types/date.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). +- `date` — Date. [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). - `timezone` — Timezone. Optional. [String](../../sql-reference/data-types/string.md). **Returned value** @@ -1251,7 +1244,7 @@ Result: └──────────────────────────┘ ``` -When there are two arguments: first is an [Integer](../../sql-reference/data-types/int-uint.md) or [DateTime](../../sql-reference/data-types/datetime.md), second is a constant format string — it acts in the same way as [formatDateTime](#formatdatetime) and return [String](../../sql-reference/data-types/string.md#string) type. +When there are two or three arguments, the first an [Integer](../../sql-reference/data-types/int-uint.md), [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md), the second a constant format string and the third an optional constant time zone string — it acts in the same way as [formatDateTime](#formatdatetime) and return [String](../../sql-reference/data-types/string.md#string) type. For example: diff --git a/docs/en/sql-reference/functions/encoding-functions.md b/docs/en/sql-reference/functions/encoding-functions.md index eb357df19db..4a6e46e1759 100644 --- a/docs/en/sql-reference/functions/encoding-functions.md +++ b/docs/en/sql-reference/functions/encoding-functions.md @@ -376,14 +376,6 @@ Result: └─────┘ ``` -## UUIDStringToNum(str) - -Accepts a string containing 36 characters in the format `123e4567-e89b-12d3-a456-426655440000`, and returns it as a set of bytes in a FixedString(16). - -## UUIDNumToString(str) - -Accepts a FixedString(16) value. Returns a string containing 36 characters in text format. - ## bitmaskToList(num) Accepts an integer. Returns a string containing the list of powers of two that total the source number when summed. They are comma-separated without spaces in text format, in ascending order. diff --git a/docs/en/sql-reference/functions/geo/index.md b/docs/en/sql-reference/functions/geo/index.md index 64e23094105..8d659236d4c 100644 --- a/docs/en/sql-reference/functions/geo/index.md +++ b/docs/en/sql-reference/functions/geo/index.md @@ -8,70 +8,69 @@ title: "Geo Functions" ## Geographical Coordinates Functions -- [greatCircleDistance](./coordinates.md#greatCircleDistance) -- [geoDistance](./coordinates.md#geoDistance) -- [greatCircleAngle](./coordinates.md#greatCircleAngle) -- [pointInEllipses](./coordinates.md#pointInEllipses) -- [pointInPolygon](./coordinates.md#pointInPolygon) +- [greatCircleDistance](./coordinates.md#greatcircledistance) +- [geoDistance](./coordinates.md#geodistance) +- [greatCircleAngle](./coordinates.md#greatcircleangle) +- [pointInEllipses](./coordinates.md#pointinellipses) +- [pointInPolygon](./coordinates.md#pointinpolygon) ## Geohash Functions -- [geohashEncode](./geohash.md#geohashEncode) -- [geohashDecode](./geohash.md#geohashDecode) -- [geohashesInBox](./geohash.md#geohashesInBox) +- [geohashEncode](./geohash.md#geohashencode) +- [geohashDecode](./geohash.md#geohashdecode) +- [geohashesInBox](./geohash.md#geohashesinbox) ## H3 Indexes Functions -- [h3IsValid](./h3.md#h3IsValid) -- [h3GetResolution](./h3.md#h3GetResolution) -- [h3EdgeAngle](./h3.md#h3EdgeAngle) -- [h3EdgeLengthM​](./h3.md#h3EdgeLengthM​) -- [h3EdgeLengthKm](./h3.md#h3EdgeLengthKm) -- [geoToH3](./h3.md#geoToH3) -- [h3ToGeo](./h3.md#h3ToGeo) -- [h3ToGeoBoundary](./h3.md#h3ToGeoBoundary) -- [h3kRing](./h3.md#h3kRing) -- [h3GetBaseCell](./h3.md#h3GetBaseCell) -- [h3HexAreaM2](./h3.md#h3HexAreaM2) -- [h3HexAreaKm2](./h3.md#h3HexAreaKm2) -- [h3IndexesAreNeighbors](./h3.md#h3IndexesAreNeighbors) -- [h3ToChildren](./h3.md#h3ToChildren) -- [h3ToParent](./h3.md#h3ToParent) -- [h3ToString](./h3.md#h3ToString) -- [stringToH3](./h3.md#stringToH3) -- [h3GetResolution](./h3.md#h3GetResolution) -- [h3IsResClassIII](./h3.md#h3IsResClassIII) -- [h3IsPentagon](./h3.md#h3IsPentagon) -- [h3GetFaces](./h3.md#h3GetFaces) -- [h3CellAreaM2](./h3.md#h3CellAreaM2) -- [h3CellAreaRads2](./h3.md#h3CellAreaRads2) -- [h3ToCenterChild](./h3.md#h3ToCenterChild) -- [h3ExactEdgeLengthM](./h3.md#h3ExactEdgeLengthM) -- [h3ExactEdgeLengthKm](./h3.md#h3ExactEdgeLengthKm) -- [h3ExactEdgeLengthRads](./h3.md#h3ExactEdgeLengthRads) -- [h3NumHexagons](./h3.md#h3NumHexagons) -- [h3Line](./h3.md#h3Line) -- [h3Distance](./h3.md#h3Distance) -- [h3HexRing](./h3.md#h3HexRing) -- [h3GetUnidirectionalEdge](./h3.md#h3GetUnidirectionalEdge) -- [h3UnidirectionalEdgeIsValid](./h3.md#h3UnidirectionalEdgeIsValid) -- [h3GetOriginIndexFromUnidirectionalEdge](./h3.md#h3GetOriginIndexFromUnidirectionalEdge) -- [h3GetDestinationIndexFromUnidirectionalEdge](./h3.md#h3GetDestinationIndexFromUnidirectionalEdge) -- [h3GetIndexesFromUnidirectionalEdge](./h3.md#h3GetIndexesFromUnidirectionalEdge) -- [h3GetUnidirectionalEdgesFromHexagon](./h3.md#h3GetUnidirectionalEdgesFromHexagon) -- [h3GetUnidirectionalEdgeBoundary](./h3.md#h3GetUnidirectionalEdgeBoundary) +- [h3IsValid](./h3.md#h3isvalid) +- [h3GetResolution](./h3.md#h3getresolution) +- [h3EdgeAngle](./h3.md#h3edgeangle) +- [h3EdgeLengthM](./h3.md#h3edgelengthm) +- [h3EdgeLengthKm](./h3.md#h3edgelengthkm) +- [geoToH3](./h3.md#geotoh3) +- [h3ToGeo](./h3.md#h3togeo) +- [h3ToGeoBoundary](./h3.md#h3togeoboundary) +- [h3kRing](./h3.md#h3kring) +- [h3GetBaseCell](./h3.md#h3getbasecell) +- [h3HexAreaM2](./h3.md#h3hexaream2) +- [h3HexAreaKm2](./h3.md#h3hexareakm2) +- [h3IndexesAreNeighbors](./h3.md#h3indexesareneighbors) +- [h3ToChildren](./h3.md#h3tochildren) +- [h3ToParent](./h3.md#h3toparent) +- [h3ToString](./h3.md#h3tostring) +- [stringToH3](./h3.md#stringtoh3) +- [h3GetResolution](./h3.md#h3getresolution) +- [h3IsResClassIII](./h3.md#h3isresclassiii) +- [h3IsPentagon](./h3.md#h3ispentagon) +- [h3GetFaces](./h3.md#h3getfaces) +- [h3CellAreaM2](./h3.md#h3cellaream2) +- [h3CellAreaRads2](./h3.md#h3cellarearads2) +- [h3ToCenterChild](./h3.md#h3tocenterchild) +- [h3ExactEdgeLengthM](./h3.md#h3exactedgelengthm) +- [h3ExactEdgeLengthKm](./h3.md#h3exactedgelengthkm) +- [h3ExactEdgeLengthRads](./h3.md#h3exactedgelengthrads) +- [h3NumHexagons](./h3.md#h3numhexagons) +- [h3Line](./h3.md#h3line) +- [h3Distance](./h3.md#h3distance) +- [h3HexRing](./h3.md#h3hexring) +- [h3GetUnidirectionalEdge](./h3.md#h3getunidirectionaledge) +- [h3UnidirectionalEdgeIsValid](./h3.md#h3unidirectionaledgeisvalid) +- [h3GetOriginIndexFromUnidirectionalEdge](./h3.md#h3getoriginindexfromunidirectionaledge) +- [h3GetDestinationIndexFromUnidirectionalEdge](./h3.md#h3getdestinationindexfromunidirectionaledge) +- [h3GetIndexesFromUnidirectionalEdge](./h3.md#h3getindexesfromunidirectionaledge) +- [h3GetUnidirectionalEdgesFromHexagon](./h3.md#h3getunidirectionaledgesfromhexagon) +- [h3GetUnidirectionalEdgeBoundary](./h3.md#h3getunidirectionaledgeboundary) ## S2 Index Functions -- [geoToS2](./s2.md#geoToS2) -- [s2ToGeo](./s2.md#s2ToGeo) -- [s2GetNeighbors](./s2.md#s2GetNeighbors) -- [s2CellsIntersect](./s2.md#s2CellsIntersect) -- [s2CapContains](./s2.md#s2CapContains) -- [s2CapUnion](./s2.md#s2CapUnion) -- [s2RectAdd](./s2.md#s2RectAdd) -- [s2RectContains](./s2.md#s2RectContains) -- [s2RectUinion](./s2.md#s2RectUinion) -- [s2RectIntersection](./s2.md#s2RectIntersection) +- [geoToS2](./s2.md#geotos2) +- [s2ToGeo](./s2.md#s2togeo) +- [s2GetNeighbors](./s2.md#s2getneighbors) +- [s2CellsIntersect](./s2.md#s2cellsintersect) +- [s2CapContains](./s2.md#s2capcontains) +- [s2CapUnion](./s2.md#s2capunion) +- [s2RectAdd](./s2.md#s2rectadd) +- [s2RectContains](./s2.md#s2rectcontains) +- [s2RectUnion](./s2.md#s2rectunion) +- [s2RectIntersection](./s2.md#s2rectintersection) -[Original article](https://clickhouse.com/docs/en/sql-reference/functions/geo/) diff --git a/docs/en/sql-reference/functions/other-functions.md b/docs/en/sql-reference/functions/other-functions.md index b80d75e3611..b9ec21bb59d 100644 --- a/docs/en/sql-reference/functions/other-functions.md +++ b/docs/en/sql-reference/functions/other-functions.md @@ -571,7 +571,7 @@ Example: ``` sql SELECT - transform(domain(Referer), ['yandex.ru', 'google.ru', 'vk.com'], ['www.yandex', 'example.com']) AS s, + transform(domain(Referer), ['yandex.ru', 'google.ru', 'vkontakte.ru'], ['www.yandex', 'example.com', 'vk.com']) AS s, count() AS c FROM test.hits GROUP BY domain(Referer) @@ -593,6 +593,27 @@ LIMIT 10 └────────────────┴─────────┘ ``` +## formatReadableDecimalSize(x) + +Accepts the size (number of bytes). Returns a rounded size with a suffix (KB, MB, etc.) as a string. + +Example: + +``` sql +SELECT + arrayJoin([1, 1024, 1024*1024, 192851925]) AS filesize_bytes, + formatReadableDecimalSize(filesize_bytes) AS filesize +``` + +``` text +┌─filesize_bytes─┬─filesize───┐ +│ 1 │ 1.00 B │ +│ 1024 │ 1.02 KB │ +│ 1048576 │ 1.05 MB │ +│ 192851925 │ 192.85 MB │ +└────────────────┴────────────┘ +``` + ## formatReadableSize(x) Accepts the size (number of bytes). Returns a rounded size with a suffix (KiB, MiB, etc.) as a string. diff --git a/docs/en/sql-reference/functions/string-functions.md b/docs/en/sql-reference/functions/string-functions.md index a8ba4843279..cdbf29f3e6d 100644 --- a/docs/en/sql-reference/functions/string-functions.md +++ b/docs/en/sql-reference/functions/string-functions.md @@ -571,13 +571,13 @@ Similar to base58Decode, but returns an empty string in case of error. ## base64Encode(s) -Encodes ‘s’ string into base64 +Encodes ‘s’ FixedString or String into base64. Alias: `TO_BASE64`. ## base64Decode(s) -Decode base64-encoded string ‘s’ into original string. In case of failure raises an exception. +Decode base64-encoded FixedString or String ‘s’ into original string. In case of failure raises an exception. Alias: `FROM_BASE64`. @@ -1150,3 +1150,13 @@ A text with tags . The content within CDATA Do Nothing for 2 Minutes 2:00   ``` + +## ascii(s) {#ascii} + +Returns the ASCII code point of the first character of str. The result type is Int32. + +If s is empty, the result is 0. If the first character is not an ASCII character or not part of the Latin-1 Supplement range of UTF-16, the result is undefined. + + + + diff --git a/docs/en/sql-reference/functions/string-replace-functions.md b/docs/en/sql-reference/functions/string-replace-functions.md index adf2a07b732..d1f0e44f6b4 100644 --- a/docs/en/sql-reference/functions/string-replace-functions.md +++ b/docs/en/sql-reference/functions/string-replace-functions.md @@ -6,28 +6,29 @@ sidebar_label: For Replacing in Strings # Functions for Searching and Replacing in Strings -:::note +:::note Functions for [searching](../../sql-reference/functions/string-search-functions.md) and [other manipulations with strings](../../sql-reference/functions/string-functions.md) are described separately. ::: ## replaceOne(haystack, pattern, replacement) -Replaces the first occurrence, if it exists, of the ‘pattern’ substring in ‘haystack’ with the ‘replacement’ substring. -Hereafter, ‘pattern’ and ‘replacement’ must be constants. +Replaces the first occurrence of the substring ‘pattern’ (if it exists) in ‘haystack’ by the ‘replacement’ string. +‘pattern’ and ‘replacement’ must be constants. ## replaceAll(haystack, pattern, replacement), replace(haystack, pattern, replacement) -Replaces all occurrences of the ‘pattern’ substring in ‘haystack’ with the ‘replacement’ substring. +Replaces all occurrences of the substring ‘pattern’ in ‘haystack’ by the ‘replacement’ string. ## replaceRegexpOne(haystack, pattern, replacement) -Replacement using the ‘pattern’ regular expression. A re2 regular expression. -Replaces only the first occurrence, if it exists. -A pattern can be specified as ‘replacement’. This pattern can include substitutions `\0-\9`. -The substitution `\0` includes the entire regular expression. Substitutions `\1-\9` correspond to the subpattern numbers.To use the `\` character in a template, escape it using `\`. -Also keep in mind that a string literal requires an extra escape. +Replaces the first occurrence of the substring matching the regular expression ‘pattern’ in ‘haystack‘ by the ‘replacement‘ string. +‘pattern‘ must be a constant [re2 regular expression](https://github.com/google/re2/wiki/Syntax). +‘replacement’ must be a plain constant string or a constant string containing substitutions `\0-\9`. +Substitutions `\1-\9` correspond to the 1st to 9th capturing group (submatch), substitution `\0` corresponds to the entire match. +To use a verbatim `\` character in the ‘pattern‘ or ‘replacement‘ string, escape it using `\`. +Also keep in mind that string literals require an extra escaping. -Example 1. Converting the date to American format: +Example 1. Converting ISO dates to American format: ``` sql SELECT DISTINCT @@ -62,7 +63,7 @@ SELECT replaceRegexpOne('Hello, World!', '.*', '\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0') ## replaceRegexpAll(haystack, pattern, replacement) -This does the same thing, but replaces all the occurrences. Example: +Like ‘replaceRegexpOne‘, but replaces all occurrences of the pattern. Example: ``` sql SELECT replaceRegexpAll('Hello, World!', '.', '\\0\\0') AS res diff --git a/docs/en/sql-reference/functions/uuid-functions.md b/docs/en/sql-reference/functions/uuid-functions.md index b8f222c2e4e..43542367cd5 100644 --- a/docs/en/sql-reference/functions/uuid-functions.md +++ b/docs/en/sql-reference/functions/uuid-functions.md @@ -211,12 +211,19 @@ SELECT toUUIDOrZero('61f0c404-5cb3-11e7-907b-a6006ad3dba0T') AS uuid ## UUIDStringToNum -Accepts a string containing 36 characters in the format `xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx`, and returns it as a set of bytes in a [FixedString(16)](../../sql-reference/data-types/fixedstring.md). +Accepts `string` containing 36 characters in the format `xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx`, and returns a [FixedString(16)](../../sql-reference/data-types/fixedstring.md) as its binary representation, with its format optionally specified by `variant` (`Big-endian` by default). + +**Syntax** ``` sql -UUIDStringToNum(String) +UUIDStringToNum(string[, variant = 1]) ``` +**Arguments** + +- `string` — String of 36 characters or FixedString(36). [String](../../sql-reference/syntax.md#syntax-string-literal). +- `variant` — Integer, representing a variant as specified by [RFC4122](https://datatracker.ietf.org/doc/html/rfc4122#section-4.1.1). 1 = `Big-endian` (default), 2 = `Microsoft`. + **Returned value** FixedString(16) @@ -235,14 +242,33 @@ SELECT └──────────────────────────────────────┴──────────────────┘ ``` +``` sql +SELECT + '612f3c40-5d3b-217e-707b-6a546a3d7b29' AS uuid, + UUIDStringToNum(uuid, 2) AS bytes +``` + +``` text +┌─uuid─────────────────────────────────┬─bytes────────────┐ +│ 612f3c40-5d3b-217e-707b-6a546a3d7b29 │ @ [GROUP BY] [ORDER BY] )` - Adds projection description to tables metadata. +## ADD PROJECTION -- `ALTER TABLE [db].name DROP PROJECTION name` - Removes projection description from tables metadata and deletes projection files from disk. Implemented as a [mutation](../../../sql-reference/statements/alter/index.md#mutations). +`ALTER TABLE [db].name ADD PROJECTION name ( SELECT [GROUP BY] [ORDER BY] )` - Adds projection description to tables metadata. -- `ALTER TABLE [db.]table MATERIALIZE PROJECTION name IN PARTITION partition_name` - The query rebuilds the projection `name` in the partition `partition_name`. Implemented as a [mutation](../../../sql-reference/statements/alter/index.md#mutations). +## DROP PROJECTION -- `ALTER TABLE [db.]table CLEAR PROJECTION name IN PARTITION partition_name` - Deletes projection files from disk without removing description. Implemented as a [mutation](../../../sql-reference/statements/alter/index.md#mutations). +`ALTER TABLE [db].name DROP PROJECTION name` - Removes projection description from tables metadata and deletes projection files from disk. Implemented as a [mutation](../../../sql-reference/statements/alter/index.md#mutations). + +## MATERIALIZE PROJECTION + +`ALTER TABLE [db.]table MATERIALIZE PROJECTION name IN PARTITION partition_name` - The query rebuilds the projection `name` in the partition `partition_name`. Implemented as a [mutation](../../../sql-reference/statements/alter/index.md#mutations). + +## CLEAR PROJECTION + +`ALTER TABLE [db.]table CLEAR PROJECTION name IN PARTITION partition_name` - Deletes projection files from disk without removing description. Implemented as a [mutation](../../../sql-reference/statements/alter/index.md#mutations). The commands `ADD`, `DROP` and `CLEAR` are lightweight in a sense that they only change metadata or remove files. -Also, they are replicated, syncing projections metadata via ZooKeeper. +Also, they are replicated, syncing projections metadata via ClickHouse Keeper or ZooKeeper. :::note Projection manipulation is supported only for tables with [`*MergeTree`](../../../engines/table-engines/mergetree-family/mergetree.md) engine (including [replicated](../../../engines/table-engines/mergetree-family/replication.md) variants). diff --git a/docs/en/sql-reference/statements/alter/user.md b/docs/en/sql-reference/statements/alter/user.md index 0a68885842a..31db89164d7 100644 --- a/docs/en/sql-reference/statements/alter/user.md +++ b/docs/en/sql-reference/statements/alter/user.md @@ -12,7 +12,7 @@ Syntax: ``` sql ALTER USER [IF EXISTS] name1 [ON CLUSTER cluster_name1] [RENAME TO new_name1] [, name2 [ON CLUSTER cluster_name2] [RENAME TO new_name2] ...] - [NOT IDENTIFIED | IDENTIFIED {[WITH {no_password | plaintext_password | sha256_password | sha256_hash | double_sha1_password | double_sha1_hash}] BY {'password' | 'hash'}} | {WITH ldap SERVER 'server_name'} | {WITH kerberos [REALM 'realm']}] + [NOT IDENTIFIED | IDENTIFIED {[WITH {no_password | plaintext_password | sha256_password | sha256_hash | double_sha1_password | double_sha1_hash}] BY {'password' | 'hash'}} | {WITH ldap SERVER 'server_name'} | {WITH kerberos [REALM 'realm']} | {WITH ssl_certificate CN 'common_name'}] [[ADD | DROP] HOST {LOCAL | NAME 'name' | REGEXP 'name_regexp' | IP 'address' | LIKE 'pattern'} [,...] | ANY | NONE] [DEFAULT ROLE role [,...] | ALL | ALL EXCEPT role [,...] ] [GRANTEES {user | role | ANY | NONE} [,...] [EXCEPT {user | role} [,...]]] diff --git a/docs/en/sql-reference/statements/create/database.md b/docs/en/sql-reference/statements/create/database.md index 432f5975cc8..7954d1362f1 100644 --- a/docs/en/sql-reference/statements/create/database.md +++ b/docs/en/sql-reference/statements/create/database.md @@ -31,7 +31,7 @@ By default, ClickHouse uses its own [Atomic](../../../engines/database-engines/a ### COMMENT -You can add a comment to the database when you creating it. +You can add a comment to the database when you are creating it. The comment is supported for all database engines. diff --git a/docs/en/sql-reference/statements/create/function.md b/docs/en/sql-reference/statements/create/function.md index 63c006b1e3e..90be007bf43 100644 --- a/docs/en/sql-reference/statements/create/function.md +++ b/docs/en/sql-reference/statements/create/function.md @@ -4,7 +4,7 @@ sidebar_position: 38 sidebar_label: FUNCTION --- -# CREATE FUNCTION +# CREATE FUNCTION — user defined function (UDF) Creates a user defined function from a lambda expression. The expression must consist of function parameters, constants, operators, or other function calls. diff --git a/docs/en/sql-reference/statements/create/user.md b/docs/en/sql-reference/statements/create/user.md index 56a0560e57e..a756b3d4a0d 100644 --- a/docs/en/sql-reference/statements/create/user.md +++ b/docs/en/sql-reference/statements/create/user.md @@ -12,7 +12,7 @@ Syntax: ``` sql CREATE USER [IF NOT EXISTS | OR REPLACE] name1 [ON CLUSTER cluster_name1] [, name2 [ON CLUSTER cluster_name2] ...] - [NOT IDENTIFIED | IDENTIFIED {[WITH {no_password | plaintext_password | sha256_password | sha256_hash | double_sha1_password | double_sha1_hash}] BY {'password' | 'hash'}} | {WITH ldap SERVER 'server_name'} | {WITH kerberos [REALM 'realm']}] + [NOT IDENTIFIED | IDENTIFIED {[WITH {no_password | plaintext_password | sha256_password | sha256_hash | double_sha1_password | double_sha1_hash}] BY {'password' | 'hash'}} | {WITH ldap SERVER 'server_name'} | {WITH kerberos [REALM 'realm']} | {WITH ssl_certificate CN 'common_name'}] [HOST {LOCAL | NAME 'name' | REGEXP 'name_regexp' | IP 'address' | LIKE 'pattern'} [,...] | ANY | NONE] [DEFAULT ROLE role [,...]] [DEFAULT DATABASE database | NONE] @@ -34,6 +34,7 @@ There are multiple ways of user identification: - `IDENTIFIED WITH double_sha1_hash BY 'hash'` - `IDENTIFIED WITH ldap SERVER 'server_name'` - `IDENTIFIED WITH kerberos` or `IDENTIFIED WITH kerberos REALM 'realm'` +- `IDENTIFIED WITH ssl_certificate CN 'mysite.com:user'` For identification with sha256_hash using `SALT` - hash must be calculated from concatination of 'password' and 'salt'. @@ -54,7 +55,7 @@ Another way of specifying host is to use `@` syntax following the username. Exam - `CREATE USER mira@'localhost'` — Equivalent to the `HOST LOCAL` syntax. - `CREATE USER mira@'192.168.%.%'` — Equivalent to the `HOST LIKE` syntax. -:::warning +:::warning ClickHouse treats `user_name@'address'` as a username as a whole. Thus, technically you can create multiple users with the same `user_name` and different constructions after `@`. However, we do not recommend to do so. ::: diff --git a/docs/en/sql-reference/statements/misc.md b/docs/en/sql-reference/statements/misc.md deleted file mode 100644 index d812dd2008a..00000000000 --- a/docs/en/sql-reference/statements/misc.md +++ /dev/null @@ -1,21 +0,0 @@ ---- -slug: /en/sql-reference/statements/misc -toc_hidden: true -sidebar_position: 70 ---- - -# Miscellaneous Statements - -- [ATTACH](../../sql-reference/statements/attach.md) -- [CHECK TABLE](../../sql-reference/statements/check-table.md) -- [DESCRIBE TABLE](../../sql-reference/statements/describe-table.md) -- [DETACH](../../sql-reference/statements/detach.md) -- [DROP](../../sql-reference/statements/drop.md) -- [EXISTS](../../sql-reference/statements/exists.md) -- [KILL](../../sql-reference/statements/kill.md) -- [OPTIMIZE](../../sql-reference/statements/optimize.md) -- [RENAME](../../sql-reference/statements/rename.md) -- [SET](../../sql-reference/statements/set.md) -- [SET ROLE](../../sql-reference/statements/set-role.md) -- [TRUNCATE](../../sql-reference/statements/truncate.md) -- [USE](../../sql-reference/statements/use.md) diff --git a/docs/en/sql-reference/statements/optimize.md b/docs/en/sql-reference/statements/optimize.md index 680ff773992..036d3f0599a 100644 --- a/docs/en/sql-reference/statements/optimize.md +++ b/docs/en/sql-reference/statements/optimize.md @@ -22,7 +22,7 @@ The `OPTIMIZE` query is supported for [MergeTree](../../engines/table-engines/me When `OPTIMIZE` is used with the [ReplicatedMergeTree](../../engines/table-engines/mergetree-family/replication.md) family of table engines, ClickHouse creates a task for merging and waits for execution on all replicas (if the [replication_alter_partitions_sync](../../operations/settings/settings.md#replication-alter-partitions-sync) setting is set to `2`) or on current replica (if the [replication_alter_partitions_sync](../../operations/settings/settings.md#replication-alter-partitions-sync) setting is set to `1`). - If `OPTIMIZE` does not perform a merge for any reason, it does not notify the client. To enable notifications, use the [optimize_throw_if_noop](../../operations/settings/settings.md#setting-optimize_throw_if_noop) setting. -- If you specify a `PARTITION`, only the specified partition is optimized. [How to set partition expression](../../sql-reference/statements/alter/index.md#alter-how-to-specify-part-expr). +- If you specify a `PARTITION`, only the specified partition is optimized. [How to set partition expression](alter/partition.md#how-to-set-partition-expression). - If you specify `FINAL`, optimization is performed even when all the data is already in one part. Also merge is forced even if concurrent merges are performed. - If you specify `DEDUPLICATE`, then completely identical rows (unless by-clause is specified) will be deduplicated (all columns are compared), it makes sense only for the MergeTree engine. diff --git a/docs/en/sql-reference/statements/select/intersect.md b/docs/en/sql-reference/statements/select/intersect.md index d3b2b51b6be..f1eb4738543 100644 --- a/docs/en/sql-reference/statements/select/intersect.md +++ b/docs/en/sql-reference/statements/select/intersect.md @@ -7,7 +7,7 @@ sidebar_label: INTERSECT The `INTERSECT` clause returns only those rows that result from both the first and the second queries. The queries must match the number of columns, order, and type. The result of `INTERSECT` can contain duplicate rows. -Multiple `INTERSECT` statements are executes left to right if parenthesis are not specified. The `INTERSECT` operator has a higher priority than the `UNION` and `EXCEPT` clause. +Multiple `INTERSECT` statements are executed left to right if parentheses are not specified. The `INTERSECT` operator has a higher priority than the `UNION` and `EXCEPT` clauses. ``` sql diff --git a/docs/en/sql-reference/table-functions/file.md b/docs/en/sql-reference/table-functions/file.md index f40107aaaca..fc81e7cf649 100644 --- a/docs/en/sql-reference/table-functions/file.md +++ b/docs/en/sql-reference/table-functions/file.md @@ -81,6 +81,7 @@ Multiple path components can have globs. For being processed file must exist and - `?` — Substitutes any single character. - `{some_string,another_string,yet_another_one}` — Substitutes any of strings `'some_string', 'another_string', 'yet_another_one'`. - `{N..M}` — Substitutes any number in range from N to M including both borders. +- `**` - Fetches all files inside the folder recursively. Constructions with `{}` are similar to the [remote](remote.md) table function. @@ -119,6 +120,22 @@ Query the data from files named `file000`, `file001`, … , `file999`: SELECT count(*) FROM file('big_dir/file{0..9}{0..9}{0..9}', 'CSV', 'name String, value UInt32'); ``` +**Example** + +Query the data from all files inside `big_dir` directory recursively: + +``` sql +SELECT count(*) FROM file('big_dir/**', 'CSV', 'name String, value UInt32'); +``` + +**Example** + +Query the data from all `file002` files from any folder inside `big_dir` directory recursively: + +``` sql +SELECT count(*) FROM file('big_dir/**/file002', 'CSV', 'name String, value UInt32'); +``` + ## Virtual Columns - `_path` — Path to the file. diff --git a/docs/en/sql-reference/table-functions/s3.md b/docs/en/sql-reference/table-functions/s3.md index 545037665bb..545a89223bf 100644 --- a/docs/en/sql-reference/table-functions/s3.md +++ b/docs/en/sql-reference/table-functions/s3.md @@ -127,6 +127,18 @@ INSERT INTO FUNCTION s3('https://clickhouse-public-datasets.s3.amazonaws.com/my- SELECT name, value FROM existing_table; ``` +Glob ** can be used for recursive directory traversal. Consider the below example, it will fetch all files from `my-test-bucket-768` directory recursively: + +``` sql +SELECT * FROM s3('https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/**', 'CSV', 'name String, value UInt32', 'gzip'); +``` + +The below get data from all `test-data.csv.gz` files from any folder inside `my-test-bucket` directory recursively: + +``` sql +SELECT * FROM s3('https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/**/test-data.csv.gz', 'CSV', 'name String, value UInt32', 'gzip'); +``` + ## Partitioned Write If you specify `PARTITION BY` expression when inserting data into `S3` table, a separate file is created for each partition value. Splitting the data into separate files helps to improve reading operations efficiency. diff --git a/docs/ru/development/browse-code.md b/docs/ru/development/browse-code.md deleted file mode 100644 index 640b1ac3693..00000000000 --- a/docs/ru/development/browse-code.md +++ /dev/null @@ -1,14 +0,0 @@ ---- -slug: /ru/development/browse-code -sidebar_position: 72 -sidebar_label: "Навигация по коду ClickHouse" ---- - - -# Навигация по коду ClickHouse {#navigatsiia-po-kodu-clickhouse} - -Для навигации по коду онлайн доступен **Woboq**, он расположен [здесь](https://clickhouse.com/codebrowser/ClickHouse/src/index.html). В нём реализовано удобное перемещение между исходными файлами, семантическая подсветка, подсказки, индексация и поиск. Слепок кода обновляется ежедневно. - -Также вы можете просматривать исходники на [GitHub](https://github.com/ClickHouse/ClickHouse). - -Если вы интересуетесь, какую среду разработки выбрать для работы с ClickHouse, мы рекомендуем CLion, QT Creator, VSCode или KDevelop (с некоторыми предостережениями). Вы можете использовать свою любимую среду разработки, Vim и Emacs тоже считаются. diff --git a/docs/ru/engines/table-engines/integrations/kafka.md b/docs/ru/engines/table-engines/integrations/kafka.md index 37fc902e777..a5f091e1b23 100644 --- a/docs/ru/engines/table-engines/integrations/kafka.md +++ b/docs/ru/engines/table-engines/integrations/kafka.md @@ -87,14 +87,15 @@ SETTINGS Устаревший способ создания таблицы - :::note "Attention" - Не используйте этот метод в новых проектах. По возможности переключите старые проекты на метод, описанный выше. +:::note "Attention" +Не используйте этот метод в новых проектах. По возможности переключите старые проекты на метод, описанный выше. +::: ``` sql Kafka(kafka_broker_list, kafka_topic_list, kafka_group_name, kafka_format [, kafka_row_delimiter, kafka_schema, kafka_num_consumers, kafka_skip_broken_messages]) ``` - ::: + ## Описание {#opisanie} diff --git a/docs/ru/engines/table-engines/mergetree-family/aggregatingmergetree.md b/docs/ru/engines/table-engines/mergetree-family/aggregatingmergetree.md index aa16113192e..86a275767a0 100644 --- a/docs/ru/engines/table-engines/mergetree-family/aggregatingmergetree.md +++ b/docs/ru/engines/table-engines/mergetree-family/aggregatingmergetree.md @@ -39,9 +39,10 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] Устаревший способ создания таблицы - :::note "Attention" - Не используйте этот способ в новых проектах и по возможности переведите старые проекты на способ описанный выше. - ::: +:::note "Attention" +Не используйте этот способ в новых проектах и по возможности переведите старые проекты на способ описанный выше. +::: + ``` sql CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] ( diff --git a/docs/ru/engines/table-engines/mergetree-family/collapsingmergetree.md b/docs/ru/engines/table-engines/mergetree-family/collapsingmergetree.md index ecaaa6b8417..72b4725c6ed 100644 --- a/docs/ru/engines/table-engines/mergetree-family/collapsingmergetree.md +++ b/docs/ru/engines/table-engines/mergetree-family/collapsingmergetree.md @@ -43,9 +43,10 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] Устаревший способ создания таблицы - :::note "Attention" - Не используйте этот способ в новых проектах и по возможности переведите старые проекты на способ описанный выше. - ::: +:::note "Attention" +Не используйте этот способ в новых проектах и по возможности переведите старые проекты на способ описанный выше. +::: + ``` sql CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] ( @@ -59,7 +60,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] - `sign` — Имя столбца с типом строки: `1` — строка состояния, `-1` — строка отмены состояния. - Тип данных столбца — `Int8`. + Тип данных столбца — `Int8`. diff --git a/docs/ru/engines/table-engines/mergetree-family/graphitemergetree.md b/docs/ru/engines/table-engines/mergetree-family/graphitemergetree.md index 818f85f7e37..324a3fd1633 100644 --- a/docs/ru/engines/table-engines/mergetree-family/graphitemergetree.md +++ b/docs/ru/engines/table-engines/mergetree-family/graphitemergetree.md @@ -55,9 +55,10 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] Устаревший способ создания таблицы - :::note "Attention" - Не используйте этот способ в новых проектах и по возможности переведите старые проекты на способ описанный выше. - ::: +:::note "Attention" +Не используйте этот способ в новых проектах и по возможности переведите старые проекты на способ описанный выше. +::: + ``` sql CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] ( diff --git a/docs/ru/engines/table-engines/mergetree-family/mergetree.md b/docs/ru/engines/table-engines/mergetree-family/mergetree.md index e01e0006b87..f024d5f1985 100644 --- a/docs/ru/engines/table-engines/mergetree-family/mergetree.md +++ b/docs/ru/engines/table-engines/mergetree-family/mergetree.md @@ -115,9 +115,10 @@ ENGINE MergeTree() PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDa Устаревший способ создания таблицы - :::note "Attention" - Не используйте этот способ в новых проектах и по возможности переведите старые проекты на способ, описанный выше. - ::: +:::note "Attention" +Не используйте этот способ в новых проектах и по возможности переведите старые проекты на способ, описанный выше. +::: + ``` sql CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] ( diff --git a/docs/ru/engines/table-engines/mergetree-family/summingmergetree.md b/docs/ru/engines/table-engines/mergetree-family/summingmergetree.md index 0d9d268fa46..7b69927e161 100644 --- a/docs/ru/engines/table-engines/mergetree-family/summingmergetree.md +++ b/docs/ru/engines/table-engines/mergetree-family/summingmergetree.md @@ -42,9 +42,10 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] Устаревший способ создания таблицы - :::note "Attention" - Не используйте этот способ в новых проектах и по возможности переведите старые проекты на способ описанный выше. - ::: +:::note "Attention" +Не используйте этот способ в новых проектах и по возможности переведите старые проекты на способ описанный выше. +::: + ``` sql CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] ( diff --git a/docs/ru/interfaces/third-party/client-libraries.md b/docs/ru/interfaces/third-party/client-libraries.md index ce9f94d5d74..b000208b53b 100644 --- a/docs/ru/interfaces/third-party/client-libraries.md +++ b/docs/ru/interfaces/third-party/client-libraries.md @@ -34,6 +34,7 @@ sidebar_label: "Клиентские библиотеки от сторонни - [node-clickhouse](https://github.com/apla/node-clickhouse) - [nestjs-clickhouse](https://github.com/depyronick/nestjs-clickhouse) - [clickhouse-client](https://github.com/depyronick/clickhouse-client) + - [node-clickhouse-orm](https://github.com/zimv/node-clickhouse-orm) - Perl - [perl-DBD-ClickHouse](https://github.com/elcamlost/perl-DBD-ClickHouse) - [HTTP-ClickHouse](https://metacpan.org/release/HTTP-ClickHouse) diff --git a/docs/ru/sql-reference/data-types/date.md b/docs/ru/sql-reference/data-types/date.md index 7254b82f461..185fe28d567 100644 --- a/docs/ru/sql-reference/data-types/date.md +++ b/docs/ru/sql-reference/data-types/date.md @@ -6,7 +6,7 @@ sidebar_label: Date # Date {#data-type-date} -Дата. Хранится в двух байтах в виде (беззнакового) числа дней, прошедших от 1970-01-01. Позволяет хранить значения от чуть больше, чем начала unix-эпохи до верхнего порога, определяющегося константой на этапе компиляции (сейчас - до 2149 года, последний полностью поддерживаемый год - 2148). +Дата. Хранится в двух байтах в виде (беззнакового) числа дней, прошедших от 1970-01-01. Позволяет хранить значения от чуть больше, чем начала unix-эпохи до верхнего порога, определяющегося константой на этапе компиляции (сейчас - до 2106 года, последний полностью поддерживаемый год - 2105). Диапазон значений: \[1970-01-01, 2149-06-06\]. diff --git a/docs/ru/sql-reference/functions/date-time-functions.md b/docs/ru/sql-reference/functions/date-time-functions.md index a7d2ce49fae..f18c2ea258a 100644 --- a/docs/ru/sql-reference/functions/date-time-functions.md +++ b/docs/ru/sql-reference/functions/date-time-functions.md @@ -272,15 +272,9 @@ SELECT toUnixTimestamp('2017-11-05 08:07:47', 'Asia/Tokyo') AS unix_timestamp; Поведение для * `enable_extended_results_for_datetime_functions = 0`: Функции `toStartOf*`, `toLastDayOfMonth`, `toMonday` возвращают `Date` или `DateTime`. Функции `toStartOfDay`, `toStartOfHour`, `toStartOfFifteenMinutes`, `toStartOfTenMinutes`, `toStartOfFiveMinutes`, `toStartOfMinute`, `timeSlot` возвращают `DateTime`. Хотя эти функции могут принимать значения типа `Date32` или `DateTime64` в качестве аргумента, при обработке аргумента вне нормального диапазона значений (`1970` - `2148` для `Date` и `1970-01-01 00:00:00`-`2106-02-07 08:28:15` для `DateTime`) будет получен некорректный результат. -В случае если значение аргумента вне нормального диапазона: - * `1970-01-01 (00:00:00)` будет возвращён для моментов времени до 1970 года, - * `2106-02-07 08:28:15` будет взят в качестве аргумента, если полученный аргумент превосходит данное значение и возвращаемый тип - `DateTime`, - * `2149-06-06` будет взят в качестве аргумента, если полученный аргумент превосходит данное значение и возвращаемый тип - `Date`, - * `2149-05-31` будет результатом функции `toLastDayOfMonth` при обработке аргумента больше `2149-05-31`. * `enable_extended_results_for_datetime_functions = 1`: * Функции `toStartOfYear`, `toStartOfISOYear`, `toStartOfQuarter`, `toStartOfMonth`, `toStartOfWeek`, `toLastDayOfMonth`, `toMonday` возвращают `Date` или `DateTime` если их аргумент `Date` или `DateTime` и они возвращают `Date32` или `DateTime64` если их аргумент `Date32` или `DateTime64`. * Функции `toStartOfDay`, `toStartOfHour`, `toStartOfFifteenMinutes`, `toStartOfTenMinutes`, `toStartOfFiveMinutes`, `toStartOfMinute`, `timeSlot` возвращают `DateTime` если их аргумент `Date` или `DateTime` и они возвращают `DateTime64` если их аргумент `Date32` или `DateTime64`. - ::: ## toStartOfYear {#tostartofyear} @@ -321,20 +315,20 @@ SELECT toStartOfISOYear(toDate('2017-01-01')) AS ISOYear20170101; Округляет дату или дату-с-временем до последнего числа месяца. Возвращается дата. -Если `toLastDayOfMonth` вызывается с аргументом типа `Date` большим чем 2149-05-31, то результат будет вычислен от аргумента 2149-05-31. +:::note "Attention" +Возвращаемое значение для некорректных дат зависит от реализации. ClickHouse может вернуть нулевую дату, выбросить исключение, или выполнить «естественное» перетекание дат между месяцами. +::: ## toMonday {#tomonday} Округляет дату или дату-с-временем вниз до ближайшего понедельника. -Частный случай: для дат `1970-01-01`, `1970-01-02`, `1970-01-03` и `1970-01-04` результатом будет `1970-01-01`. Возвращается дата. ## toStartOfWeek(t[,mode]) {#tostartofweek} Округляет дату или дату со временем до ближайшего воскресенья или понедельника в соответствии с mode. Возвращается дата. -Частный случай: для дат `1970-01-01`, `1970-01-02`, `1970-01-03` и `1970-01-04` (и `1970-01-05`, если `mode` равен `1`) результатом будет `1970-01-01`. -Аргумент `mode` работает точно так же, как аргумент mode [toWeek()](#toweek). Если аргумент mode опущен, то используется режим 0. +Аргумент mode работает точно так же, как аргумент mode [toWeek()](#toweek). Если аргумент mode опущен, то используется режим 0. ## toStartOfDay {#tostartofday} @@ -721,9 +715,9 @@ date_diff('unit', startdate, enddate, [timezone]) - `quarter` - `year` -- `startdate` — первая дата или дата со временем, которая вычитается из `enddate`. [Date](../../sql-reference/data-types/date.md) или [DateTime](../../sql-reference/data-types/datetime.md). +- `startdate` — первая дата или дата со временем, которая вычитается из `enddate`. [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) или [DateTime64](../../sql-reference/data-types/datetime64.md). -- `enddate` — вторая дата или дата со временем, из которой вычитается `startdate`. [Date](../../sql-reference/data-types/date.md) или [DateTime](../../sql-reference/data-types/datetime.md). +- `enddate` — вторая дата или дата со временем, из которой вычитается `startdate`. [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) или [DateTime64](../../sql-reference/data-types/datetime64.md). - `timezone` — [часовой пояс](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) (необязательно). Если этот аргумент указан, то он применяется как для `startdate`, так и для `enddate`. Если этот аргумент не указан, то используются часовые пояса аргументов `startdate` и `enddate`. Если часовые пояса аргументов `startdate` и `enddate` не совпадают, то результат не определен. [String](../../sql-reference/data-types/string.md). @@ -975,8 +969,7 @@ SELECT now('Europe/Moscow'); ## timeSlots(StartTime, Duration,\[, Size\]) {#timeslotsstarttime-duration-size} Для интервала, начинающегося в `StartTime` и длящегося `Duration` секунд, возвращает массив моментов времени, кратных `Size`. Параметр `Size` указывать необязательно, по умолчанию он равен 1800 секундам (30 минутам) - необязательный параметр. -Данная функция может использоваться, например, для анализа количества просмотров страницы за соответствующую сессию. -Аргумент `StartTime` может иметь тип `DateTime` или `DateTime64`. В случае, если используется `DateTime`, аргументы `Duration` и `Size` должны иметь тип `UInt32`; Для DateTime64 они должны быть типа `Decimal64`. + Возвращает массив DateTime/DateTime64 (тип будет совпадать с типом параметра ’StartTime’). Для DateTime64 масштаб(scale) возвращаемой величины может отличаться от масштаба фргумента ’StartTime’ --- результат будет иметь наибольший масштаб среди всех данных аргументов. Пример использования: @@ -1085,7 +1078,7 @@ dateName(date_part, date) **Аргументы** - `date_part` — часть даты. Возможные значения: 'year', 'quarter', 'month', 'week', 'dayofyear', 'day', 'weekday', 'hour', 'minute', 'second'. [String](../../sql-reference/data-types/string.md). -- `date` — дата. [Date](../../sql-reference/data-types/date.md), [DateTime](../../sql-reference/data-types/datetime.md) или [DateTime64](../../sql-reference/data-types/datetime64.md). +- `date` — дата. [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) или [DateTime64](../../sql-reference/data-types/datetime64.md). - `timezone` — часовой пояс. Необязательный аргумент. [String](../../sql-reference/data-types/string.md). **Возвращаемое значение** @@ -1133,8 +1126,7 @@ SELECT FROM_UNIXTIME(423543535); └──────────────────────────┘ ``` -В случае, когда есть два аргумента: первый типа [Integer](../../sql-reference/data-types/int-uint.md) или [DateTime](../../sql-reference/data-types/datetime.md), а второй является строкой постоянного формата — функция работает также, как [formatDateTime](#formatdatetime), и возвращает значение типа [String](../../sql-reference/data-types/string.md#string). - +В случае, когда есть два или три аргумента: первый типа [Integer](../../sql-reference/data-types/int-uint.md), [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) или [DateTime64](../../sql-reference/data-types/datetime64.md), а второй является строкой постоянного формата и третий является строкой постоянной временной зоны — функция работает также, как [formatDateTime](#formatdatetime), и возвращает значение типа [String](../../sql-reference/data-types/string.md#string). Запрос: diff --git a/docs/ru/sql-reference/functions/other-functions.md b/docs/ru/sql-reference/functions/other-functions.md index 5c8584cd2a0..af21ccd6bed 100644 --- a/docs/ru/sql-reference/functions/other-functions.md +++ b/docs/ru/sql-reference/functions/other-functions.md @@ -568,7 +568,7 @@ ORDER BY c DESC ``` sql SELECT - transform(domain(Referer), ['yandex.ru', 'google.ru', 'vk.com'], ['www.yandex', 'example.com']) AS s, + transform(domain(Referer), ['yandex.ru', 'google.ru', 'vkontakte.ru'], ['www.yandex', 'example.com', 'vk.com']) AS s, count() AS c FROM test.hits GROUP BY domain(Referer) diff --git a/docs/ru/sql-reference/operators/in.md b/docs/ru/sql-reference/operators/in.md index 2b3d87a877f..fa679b890a7 100644 --- a/docs/ru/sql-reference/operators/in.md +++ b/docs/ru/sql-reference/operators/in.md @@ -122,9 +122,9 @@ FROM t_null Существует два варианта IN-ов с подзапросами (аналогично для JOIN-ов): обычный `IN` / `JOIN` и `GLOBAL IN` / `GLOBAL JOIN`. Они отличаются способом выполнения при распределённой обработке запроса. - :::note "Attention" - Помните, что алгоритмы, описанные ниже, могут работать иначе в зависимости от [настройки](../../operations/settings/settings.md) `distributed_product_mode`. - ::: +:::note "Attention" +Помните, что алгоритмы, описанные ниже, могут работать иначе в зависимости от [настройки](../../operations/settings/settings.md) `distributed_product_mode`. +::: При использовании обычного IN-а, запрос отправляется на удалённые серверы, и на каждом из них выполняются подзапросы в секциях `IN` / `JOIN`. При использовании `GLOBAL IN` / `GLOBAL JOIN-а`, сначала выполняются все подзапросы для `GLOBAL IN` / `GLOBAL JOIN-ов`, и результаты складываются во временные таблицы. Затем эти временные таблицы передаются на каждый удалённый сервер, и на них выполняются запросы, с использованием этих переданных временных данных. diff --git a/docs/ru/sql-reference/statements/misc.md b/docs/ru/sql-reference/statements/misc.md deleted file mode 100644 index 437215f20ce..00000000000 --- a/docs/ru/sql-reference/statements/misc.md +++ /dev/null @@ -1,21 +0,0 @@ ---- -slug: /ru/sql-reference/statements/misc -sidebar_position: 41 ---- - -# Прочие виды запросов {#prochie-vidy-zaprosov} - -- [ATTACH](../../sql-reference/statements/attach.md) -- [CHECK TABLE](../../sql-reference/statements/check-table.md) -- [DESCRIBE TABLE](../../sql-reference/statements/describe-table.md) -- [DETACH](../../sql-reference/statements/detach.md) -- [DROP](../../sql-reference/statements/drop.md) -- [EXISTS](../../sql-reference/statements/exists.md) -- [KILL](../../sql-reference/statements/kill.md) -- [OPTIMIZE](../../sql-reference/statements/optimize.md) -- [RENAME](../../sql-reference/statements/rename.md) -- [SET](../../sql-reference/statements/set.md) -- [SET ROLE](../../sql-reference/statements/set-role.md) -- [TRUNCATE](../../sql-reference/statements/truncate.md) -- [USE](../../sql-reference/statements/use.md) - diff --git a/docs/zh/development/browse-code.md b/docs/zh/development/browse-code.md deleted file mode 100644 index 16382a94ed5..00000000000 --- a/docs/zh/development/browse-code.md +++ /dev/null @@ -1,13 +0,0 @@ ---- -slug: /zh/development/browse-code -sidebar_position: 63 -sidebar_label: "\u6D4F\u89C8\u6E90\u4EE3\u7801" ---- - -# 浏览ClickHouse源代码 {#browse-clickhouse-source-code} - -您可以使用 **Woboq** 在线代码浏览器 [点击这里](https://clickhouse.com/codebrowser/ClickHouse/src/index.html). 它提供了代码导航和语义突出显示、搜索和索引。 代码快照每天更新。 - -此外,您还可以像往常一样浏览源代码 [GitHub](https://github.com/ClickHouse/ClickHouse) - -如果你希望了解哪种IDE较好,我们推荐使用CLion,QT Creator,VS Code和KDevelop(有注意事项)。 您可以使用任何您喜欢的IDE。 Vim和Emacs也可以。 diff --git a/docs/zh/getting-started/example-datasets/brown-benchmark.mdx b/docs/zh/getting-started/example-datasets/brown-benchmark.mdx index c35e96718b1..6db4982f50f 100644 --- a/docs/zh/getting-started/example-datasets/brown-benchmark.mdx +++ b/docs/zh/getting-started/example-datasets/brown-benchmark.mdx @@ -1,10 +1,460 @@ --- slug: /zh/getting-started/example-datasets/brown-benchmark -sidebar_label: Brown University Benchmark -description: A new analytical benchmark for machine-generated log data -title: "Brown University Benchmark" +sidebar_label: 布朗大学基准 +description: 机器生成日志数据的新分析基准 +title: "布朗大学基准" --- -import Content from '@site/docs/en/getting-started/example-datasets/brown-benchmark.md'; +`MgBench` 是机器生成的日志数据的新分析基准,[Andrew Crotty](http://cs.brown.edu/people/acrotty/)。 - +下载数据: + +```bash +wget https://datasets.clickhouse.com/mgbench{1..3}.csv.xz +``` + +解压数据: + +```bash +xz -v -d mgbench{1..3}.csv.xz +``` + +创建数据库和表: + +```sql +CREATE DATABASE mgbench; +``` + +```sql +USE mgbench; +``` + +```sql +CREATE TABLE mgbench.logs1 ( + log_time DateTime, + machine_name LowCardinality(String), + machine_group LowCardinality(String), + cpu_idle Nullable(Float32), + cpu_nice Nullable(Float32), + cpu_system Nullable(Float32), + cpu_user Nullable(Float32), + cpu_wio Nullable(Float32), + disk_free Nullable(Float32), + disk_total Nullable(Float32), + part_max_used Nullable(Float32), + load_fifteen Nullable(Float32), + load_five Nullable(Float32), + load_one Nullable(Float32), + mem_buffers Nullable(Float32), + mem_cached Nullable(Float32), + mem_free Nullable(Float32), + mem_shared Nullable(Float32), + swap_free Nullable(Float32), + bytes_in Nullable(Float32), + bytes_out Nullable(Float32) +) +ENGINE = MergeTree() +ORDER BY (machine_group, machine_name, log_time); +``` + + +```sql +CREATE TABLE mgbench.logs2 ( + log_time DateTime, + client_ip IPv4, + request String, + status_code UInt16, + object_size UInt64 +) +ENGINE = MergeTree() +ORDER BY log_time; +``` + + +```sql +CREATE TABLE mgbench.logs3 ( + log_time DateTime64, + device_id FixedString(15), + device_name LowCardinality(String), + device_type LowCardinality(String), + device_floor UInt8, + event_type LowCardinality(String), + event_unit FixedString(1), + event_value Nullable(Float32) +) +ENGINE = MergeTree() +ORDER BY (event_type, log_time); +``` + +插入数据: + +``` +clickhouse-client --query "INSERT INTO mgbench.logs1 FORMAT CSVWithNames" < mgbench1.csv +clickhouse-client --query "INSERT INTO mgbench.logs2 FORMAT CSVWithNames" < mgbench2.csv +clickhouse-client --query "INSERT INTO mgbench.logs3 FORMAT CSVWithNames" < mgbench3.csv +``` + +## 运行基准查询: + +```sql +USE mgbench; +``` + +```sql +-- Q1.1: 自午夜以来每个 Web 服务器的 CPU/网络利用率是多少? + +SELECT machine_name, + MIN(cpu) AS cpu_min, + MAX(cpu) AS cpu_max, + AVG(cpu) AS cpu_avg, + MIN(net_in) AS net_in_min, + MAX(net_in) AS net_in_max, + AVG(net_in) AS net_in_avg, + MIN(net_out) AS net_out_min, + MAX(net_out) AS net_out_max, + AVG(net_out) AS net_out_avg +FROM ( + SELECT machine_name, + COALESCE(cpu_user, 0.0) AS cpu, + COALESCE(bytes_in, 0.0) AS net_in, + COALESCE(bytes_out, 0.0) AS net_out + FROM logs1 + WHERE machine_name IN ('anansi','aragog','urd') + AND log_time >= TIMESTAMP '2017-01-11 00:00:00' +) AS r +GROUP BY machine_name; +``` + + +```sql +-- Q1.2:最近一天有哪些机房的机器离线? + +SELECT machine_name, + log_time +FROM logs1 +WHERE (machine_name LIKE 'cslab%' OR + machine_name LIKE 'mslab%') + AND load_one IS NULL + AND log_time >= TIMESTAMP '2017-01-10 00:00:00' +ORDER BY machine_name, + log_time; +``` + +```sql +-- Q1.3:特定工作站过去 10 天的每小时的平均指标是多少? + +SELECT dt, + hr, + AVG(load_fifteen) AS load_fifteen_avg, + AVG(load_five) AS load_five_avg, + AVG(load_one) AS load_one_avg, + AVG(mem_free) AS mem_free_avg, + AVG(swap_free) AS swap_free_avg +FROM ( + SELECT CAST(log_time AS DATE) AS dt, + EXTRACT(HOUR FROM log_time) AS hr, + load_fifteen, + load_five, + load_one, + mem_free, + swap_free + FROM logs1 + WHERE machine_name = 'babbage' + AND load_fifteen IS NOT NULL + AND load_five IS NOT NULL + AND load_one IS NOT NULL + AND mem_free IS NOT NULL + AND swap_free IS NOT NULL + AND log_time >= TIMESTAMP '2017-01-01 00:00:00' +) AS r +GROUP BY dt, + hr +ORDER BY dt, + hr; +``` + +```sql +-- Q1.4: 1 个月内,每台服务器的磁盘 I/O 阻塞的频率是多少? + +SELECT machine_name, + COUNT(*) AS spikes +FROM logs1 +WHERE machine_group = 'Servers' + AND cpu_wio > 0.99 + AND log_time >= TIMESTAMP '2016-12-01 00:00:00' + AND log_time < TIMESTAMP '2017-01-01 00:00:00' +GROUP BY machine_name +ORDER BY spikes DESC +LIMIT 10; +``` + +```sql +-- Q1.5:哪些外部可访问的虚拟机的运行内存不足? + +SELECT machine_name, + dt, + MIN(mem_free) AS mem_free_min +FROM ( + SELECT machine_name, + CAST(log_time AS DATE) AS dt, + mem_free + FROM logs1 + WHERE machine_group = 'DMZ' + AND mem_free IS NOT NULL +) AS r +GROUP BY machine_name, + dt +HAVING MIN(mem_free) < 10000 +ORDER BY machine_name, + dt; +``` + +```sql +-- Q1.6: 每小时所有文件服务器的总网络流量是多少? + +SELECT dt, + hr, + SUM(net_in) AS net_in_sum, + SUM(net_out) AS net_out_sum, + SUM(net_in) + SUM(net_out) AS both_sum +FROM ( + SELECT CAST(log_time AS DATE) AS dt, + EXTRACT(HOUR FROM log_time) AS hr, + COALESCE(bytes_in, 0.0) / 1000000000.0 AS net_in, + COALESCE(bytes_out, 0.0) / 1000000000.0 AS net_out + FROM logs1 + WHERE machine_name IN ('allsorts','andes','bigred','blackjack','bonbon', + 'cadbury','chiclets','cotton','crows','dove','fireball','hearts','huey', + 'lindt','milkduds','milkyway','mnm','necco','nerds','orbit','peeps', + 'poprocks','razzles','runts','smarties','smuggler','spree','stride', + 'tootsie','trident','wrigley','york') +) AS r +GROUP BY dt, + hr +ORDER BY both_sum DESC +LIMIT 10; +``` + +```sql +-- Q2.1:过去 2 周内哪些请求导致了服务器错误? + +SELECT * +FROM logs2 +WHERE status_code >= 500 + AND log_time >= TIMESTAMP '2012-12-18 00:00:00' +ORDER BY log_time; +``` + +```sql +-- Q2.2:在特定的某 2 周内,用户密码文件是否被泄露了? + +SELECT * +FROM logs2 +WHERE status_code >= 200 + AND status_code < 300 + AND request LIKE '%/etc/passwd%' + AND log_time >= TIMESTAMP '2012-05-06 00:00:00' + AND log_time < TIMESTAMP '2012-05-20 00:00:00'; +``` + + +```sql +-- Q2.3:过去一个月顶级请求的平均路径深度是多少? + +SELECT top_level, + AVG(LENGTH(request) - LENGTH(REPLACE(request, '/', ''))) AS depth_avg +FROM ( + SELECT SUBSTRING(request FROM 1 FOR len) AS top_level, + request + FROM ( + SELECT POSITION(SUBSTRING(request FROM 2), '/') AS len, + request + FROM logs2 + WHERE status_code >= 200 + AND status_code < 300 + AND log_time >= TIMESTAMP '2012-12-01 00:00:00' + ) AS r + WHERE len > 0 +) AS s +WHERE top_level IN ('/about','/courses','/degrees','/events', + '/grad','/industry','/news','/people', + '/publications','/research','/teaching','/ugrad') +GROUP BY top_level +ORDER BY top_level; +``` + + +```sql +-- Q2.4:在过去的 3 个月里,哪些客户端发出了过多的请求? + +SELECT client_ip, + COUNT(*) AS num_requests +FROM logs2 +WHERE log_time >= TIMESTAMP '2012-10-01 00:00:00' +GROUP BY client_ip +HAVING COUNT(*) >= 100000 +ORDER BY num_requests DESC; +``` + + +```sql +-- Q2.5:每天的独立访问者数量是多少? + +SELECT dt, + COUNT(DISTINCT client_ip) +FROM ( + SELECT CAST(log_time AS DATE) AS dt, + client_ip + FROM logs2 +) AS r +GROUP BY dt +ORDER BY dt; +``` + + +```sql +-- Q2.6:平均和最大数据传输速率(Gbps)是多少? + +SELECT AVG(transfer) / 125000000.0 AS transfer_avg, + MAX(transfer) / 125000000.0 AS transfer_max +FROM ( + SELECT log_time, + SUM(object_size) AS transfer + FROM logs2 + GROUP BY log_time +) AS r; +``` + + +```sql +-- Q3.1:自 2019/11/29 17:00 以来,室温是否达到过冰点? + +SELECT * +FROM logs3 +WHERE event_type = 'temperature' + AND event_value <= 32.0 + AND log_time >= '2019-11-29 17:00:00.000'; +``` + + +```sql +-- Q3.4:在过去的 6 个月里,每扇门打开的频率是多少? + +SELECT device_name, + device_floor, + COUNT(*) AS ct +FROM logs3 +WHERE event_type = 'door_open' + AND log_time >= '2019-06-01 00:00:00.000' +GROUP BY device_name, + device_floor +ORDER BY ct DESC; +``` + +下面的查询 3.5 使用了 UNION 关键词。设置该模式以便组合 SELECT 的查询结果。该设置仅在未明确指定 UNION ALL 或 UNION DISTINCT 但使用了 UNION 进行共享时使用。 + +```sql +SET union_default_mode = 'DISTINCT' +``` + +```sql +-- Q3.5: 在冬季和夏季,建筑物内哪些地方会出现较大的温度变化? + +WITH temperature AS ( + SELECT dt, + device_name, + device_type, + device_floor + FROM ( + SELECT dt, + hr, + device_name, + device_type, + device_floor, + AVG(event_value) AS temperature_hourly_avg + FROM ( + SELECT CAST(log_time AS DATE) AS dt, + EXTRACT(HOUR FROM log_time) AS hr, + device_name, + device_type, + device_floor, + event_value + FROM logs3 + WHERE event_type = 'temperature' + ) AS r + GROUP BY dt, + hr, + device_name, + device_type, + device_floor + ) AS s + GROUP BY dt, + device_name, + device_type, + device_floor + HAVING MAX(temperature_hourly_avg) - MIN(temperature_hourly_avg) >= 25.0 +) +SELECT DISTINCT device_name, + device_type, + device_floor, + 'WINTER' +FROM temperature +WHERE dt >= DATE '2018-12-01' + AND dt < DATE '2019-03-01' +UNION +SELECT DISTINCT device_name, + device_type, + device_floor, + 'SUMMER' +FROM temperature +WHERE dt >= DATE '2019-06-01' + AND dt < DATE '2019-09-01'; +``` + + +```sql +-- Q3.6:对于每种类别的设备,每月的功耗指标是什么? + +SELECT yr, + mo, + SUM(coffee_hourly_avg) AS coffee_monthly_sum, + AVG(coffee_hourly_avg) AS coffee_monthly_avg, + SUM(printer_hourly_avg) AS printer_monthly_sum, + AVG(printer_hourly_avg) AS printer_monthly_avg, + SUM(projector_hourly_avg) AS projector_monthly_sum, + AVG(projector_hourly_avg) AS projector_monthly_avg, + SUM(vending_hourly_avg) AS vending_monthly_sum, + AVG(vending_hourly_avg) AS vending_monthly_avg +FROM ( + SELECT dt, + yr, + mo, + hr, + AVG(coffee) AS coffee_hourly_avg, + AVG(printer) AS printer_hourly_avg, + AVG(projector) AS projector_hourly_avg, + AVG(vending) AS vending_hourly_avg + FROM ( + SELECT CAST(log_time AS DATE) AS dt, + EXTRACT(YEAR FROM log_time) AS yr, + EXTRACT(MONTH FROM log_time) AS mo, + EXTRACT(HOUR FROM log_time) AS hr, + CASE WHEN device_name LIKE 'coffee%' THEN event_value END AS coffee, + CASE WHEN device_name LIKE 'printer%' THEN event_value END AS printer, + CASE WHEN device_name LIKE 'projector%' THEN event_value END AS projector, + CASE WHEN device_name LIKE 'vending%' THEN event_value END AS vending + FROM logs3 + WHERE device_type = 'meter' + ) AS r + GROUP BY dt, + yr, + mo, + hr +) AS s +GROUP BY yr, + mo +ORDER BY yr, + mo; +``` + +此数据集可在 [Playground](https://play.clickhouse.com/play?user=play) 中进行交互式的请求, [example](https://play.clickhouse.com/play?user=play#U0VMRUNUIG1hY2hpbmVfbmFtZSwKICAgICAgIE1JTihjcHUpIEFTIGNwdV9taW4sCiAgICAgICBNQVgoY3B1KSBBUyBjcHVfbWF4LAogICAgICAgQVZHKGNwdSkgQVMgY3B1X2F2ZywKICAgICAgIE1JTihuZXRfaW4pIEFTIG5ldF9pbl9taW4sCiAgICAgICBNQVgobmV0X2luKSBBUyBuZXRfaW5fbWF4LAogICAgICAgQVZHKG5ldF9pbikgQVMgbmV0X2luX2F2ZywKICAgICAgIE1JTihuZXRfb3V0KSBBUyBuZXRfb3V0X21pbiwKICAgICAgIE1BWChuZXRfb3V0KSBBUyBuZXRfb3V0X21heCwKICAgICAgIEFWRyhuZXRfb3V0KSBBUyBuZXRfb3V0X2F2ZwpGUk9NICgKICBTRUxFQ1QgbWFjaGluZV9uYW1lLAogICAgICAgICBDT0FMRVNDRShjcHVfdXNlciwgMC4wKSBBUyBjcHUsCiAgICAgICAgIENPQUxFU0NFKGJ5dGVzX2luLCAwLjApIEFTIG5ldF9pbiwKICAgICAgICAgQ09BTEVTQ0UoYnl0ZXNfb3V0LCAwLjApIEFTIG5ldF9vdXQKICBGUk9NIG1nYmVuY2gubG9nczEKICBXSEVSRSBtYWNoaW5lX25hbWUgSU4gKCdhbmFuc2knLCdhcmFnb2cnLCd1cmQnKQogICAgQU5EIGxvZ190aW1lID49IFRJTUVTVEFNUCAnMjAxNy0wMS0xMSAwMDowMDowMCcKKSBBUyByCkdST1VQIEJZIG1hY2hpbmVfbmFtZQ==). diff --git a/docs/zh/getting-started/example-datasets/cell-towers.mdx b/docs/zh/getting-started/example-datasets/cell-towers.mdx index ece13445210..9738680519a 100644 --- a/docs/zh/getting-started/example-datasets/cell-towers.mdx +++ b/docs/zh/getting-started/example-datasets/cell-towers.mdx @@ -1,9 +1,232 @@ --- slug: /zh/getting-started/example-datasets/cell-towers -sidebar_label: Cell Towers -title: "Cell Towers" +sidebar_label: 蜂窝信号塔 +sidebar_position: 3 +title: "蜂窝信号塔" --- -import Content from '@site/docs/en/getting-started/example-datasets/cell-towers.md'; +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; +import CodeBlock from '@theme/CodeBlock'; +import ActionsMenu from '@site/docs/en/_snippets/_service_actions_menu.md'; +import SQLConsoleDetail from '@site/docs/en/_snippets/_launch_sql_console.md'; + +该数据集来自 [OpenCellid](https://www.opencellid.org/) - 世界上最大的蜂窝信号塔的开放数据库。 + +截至 2021 年,它拥有超过 4000 万条关于全球蜂窝信号塔(GSM、LTE、UMTS 等)的记录及其地理坐标和元数据(国家代码、网络等)。 + +OpenCelliD 项目在 `Creative Commons Attribution-ShareAlike 4.0 International License` 协议下许可使用,我们根据相同许可条款重新分发此数据集的快照。登录后即可下载最新版本的数据集。 + + +## 获取数据集 {#get-the-dataset} + + + + +在 ClickHouse Cloud 上可以通过一个按钮实现通过 S3 上传此数据集。登录你的 ClickHouse Cloud 组织,或通过 [ClickHouse.cloud](https://clickhouse.cloud) 创建免费试用版。 + +从 **Sample data** 选项卡中选择 **Cell Towers** 数据集,然后选择 **Load data**: + +![加载数据集](@site/docs/en/_snippets/images/cloud-load-data-sample.png) + +检查 cell_towers 的表结构: + +```sql +DESCRIBE TABLE cell_towers +``` + + + + + + +1. 下载 2021 年 2 月以来的数据集快照:[cell_towers.csv.xz](https://datasets.clickhouse.com/cell_towers.csv.xz) (729 MB)。 + +2. 验证完整性(可选步骤): + +```bash +md5sum cell_towers.csv.xz +``` + +```response +8cf986f4a0d9f12c6f384a0e9192c908 cell_towers.csv.xz +``` + +3. 使用以下命令解压: + +```bash +xz -d cell_towers.csv.xz +``` + +4. 创建表: + +```sql +CREATE TABLE cell_towers +( + radio Enum8('' = 0, 'CDMA' = 1, 'GSM' = 2, 'LTE' = 3, 'NR' = 4, 'UMTS' = 5), + mcc UInt16, + net UInt16, + area UInt16, + cell UInt64, + unit Int16, + lon Float64, + lat Float64, + range UInt32, + samples UInt32, + changeable UInt8, + created DateTime, + updated DateTime, + averageSignal UInt8 +) +ENGINE = MergeTree ORDER BY (radio, mcc, net, created); +``` + +5. 插入数据集: + +```bash +clickhouse-client --query "INSERT INTO cell_towers FORMAT CSVWithNames" < cell_towers.csv +``` + + + + +## 查询示例 {#examples} + +1. 按类型划分的基站数量: + +```sql +SELECT radio, count() AS c FROM cell_towers GROUP BY radio ORDER BY c DESC +``` +```response +┌─radio─┬────────c─┐ +│ UMTS │ 20686487 │ +│ LTE │ 12101148 │ +│ GSM │ 9931312 │ +│ CDMA │ 556344 │ +│ NR │ 867 │ +└───────┴──────────┘ + +5 rows in set. Elapsed: 0.011 sec. Processed 43.28 million rows, 43.28 MB (3.83 billion rows/s., 3.83 GB/s.) +``` + +2. 各个[移动国家代码(MCC)](https://en.wikipedia.org/wiki/Mobile_country_code)对应的蜂窝信号塔数量: + +```sql +SELECT mcc, count() FROM cell_towers GROUP BY mcc ORDER BY count() DESC LIMIT 10 +``` +```response +┌─mcc─┬─count()─┐ +│ 310 │ 5024650 │ +│ 262 │ 2622423 │ +│ 250 │ 1953176 │ +│ 208 │ 1891187 │ +│ 724 │ 1836150 │ +│ 404 │ 1729151 │ +│ 234 │ 1618924 │ +│ 510 │ 1353998 │ +│ 440 │ 1343355 │ +│ 311 │ 1332798 │ +└─────┴─────────┘ + +10 rows in set. Elapsed: 0.019 sec. Processed 43.28 million rows, 86.55 MB (2.33 billion rows/s., 4.65 GB/s.) +``` + +排名靠前的国家是:美国、德国和俄罗斯。 + +你可以通过在 ClickHouse 中创建一个 [External Dictionary](../../sql-reference/dictionaries/external-dictionaries/external-dicts.md) 来解码这些值。 + +## 用例:合并地理数据 {#use-case} + +使用 `pointInPolygon` 函数。 + +1. 创建一个用于存储多边形的表: + + + + +```sql +CREATE TABLE moscow (polygon Array(Tuple(Float64, Float64))) +ORDER BY polygon; +``` + + + + +```sql +CREATE TEMPORARY TABLE +moscow (polygon Array(Tuple(Float64, Float64))); +``` + + + + +2. 以下点大致上构造了莫斯科的地理围栏(除“新莫斯科”外): + +```sql +INSERT INTO moscow VALUES ([(37.84172564285271, 55.78000432402266), +(37.8381207618713, 55.775874525970494), (37.83979446823122, 55.775626746008065), (37.84243326983639, 55.77446586811748), (37.84262672750849, 55.771974101091104), (37.84153238623039, 55.77114545193181), (37.841124690460184, 55.76722010265554), +(37.84239076983644, 55.76654891107098), (37.842283558197025, 55.76258709833121), (37.8421759312134, 55.758073999993734), (37.84198330422974, 55.75381499999371), (37.8416827275085, 55.749277102484484), (37.84157576190186, 55.74794544108413), +(37.83897929098507, 55.74525257875241), (37.83739676451868, 55.74404373042019), (37.838732481460525, 55.74298009816793), (37.841183997352545, 55.743060321833575), (37.84097476190185, 55.73938799999373), (37.84048155819702, 55.73570799999372), +(37.840095812164286, 55.73228210777237), (37.83983814285274, 55.73080491981639), (37.83846476321406, 55.729799917464675), (37.83835745269769, 55.72919751082619), (37.838636380279524, 55.72859509486539), (37.8395161005249, 55.727705075632784), +(37.83897964285276, 55.722727886185154), (37.83862557539366, 55.72034817326636), (37.83559735744853, 55.71944437307499), (37.835370708803126, 55.71831419154461), (37.83738169402022, 55.71765218986692), (37.83823396494291, 55.71691750159089), +(37.838056931213345, 55.71547311301385), (37.836812846557606, 55.71221445615604), (37.83522525396725, 55.709331054395555), (37.83269301586908, 55.70953687463627), (37.829667367706236, 55.70903403789297), (37.83311126588435, 55.70552351822608), +(37.83058993121339, 55.70041317726053), (37.82983872750851, 55.69883771404813), (37.82934501586913, 55.69718947487017), (37.828926414016685, 55.69504441658371), (37.82876530422971, 55.69287499999378), (37.82894754100031, 55.690759754047335), +(37.827697554878185, 55.68951421135665), (37.82447346292115, 55.68965045405069), (37.83136543914793, 55.68322046195302), (37.833554015869154, 55.67814012759211), (37.83544184655761, 55.67295011628339), (37.837480388885474, 55.6672498719639), +(37.838960677246064, 55.66316274139358), (37.83926093121332, 55.66046999999383), (37.839025050262435, 55.65869897264431), (37.83670784390257, 55.65794084879904), (37.835656529083245, 55.65694309303843), (37.83704060449217, 55.65689306460552), +(37.83696819873806, 55.65550363526252), (37.83760389616388, 55.65487847246661), (37.83687972750851, 55.65356745541324), (37.83515216004943, 55.65155951234079), (37.83312418518067, 55.64979413590619), (37.82801726983639, 55.64640836412121), +(37.820614174591, 55.64164525405531), (37.818908190475426, 55.6421883258084), (37.81717543386075, 55.64112490388471), (37.81690987037274, 55.63916106913107), (37.815099354492155, 55.637925371757085), (37.808769150787356, 55.633798276884455), +(37.80100123544311, 55.62873670012244), (37.79598013491824, 55.62554336109055), (37.78634567724606, 55.62033499605651), (37.78334147619623, 55.618768681480326), (37.77746201055901, 55.619855533402706), (37.77527329626457, 55.61909966711279), +(37.77801986242668, 55.618770300976294), (37.778212973541216, 55.617257701952106), (37.77784818518065, 55.61574504433011), (37.77016867724609, 55.61148576294007), (37.760191219573976, 55.60599579539028), (37.75338926983641, 55.60227892751446), +(37.746329965606634, 55.59920577639331), (37.73939925396728, 55.59631430313617), (37.73273665739439, 55.5935318803559), (37.7299954450912, 55.59350760316188), (37.7268679946899, 55.59469840523759), (37.72626726983634, 55.59229549697373), +(37.7262673598022, 55.59081598950582), (37.71897193121335, 55.5877595845419), (37.70871550793456, 55.58393177431724), (37.700497489410374, 55.580917323756644), (37.69204305026244, 55.57778089778455), (37.68544477378839, 55.57815154690915), +(37.68391050793454, 55.57472945079756), (37.678803592590306, 55.57328235936491), (37.6743402539673, 55.57255251445782), (37.66813862698363, 55.57216388774464), (37.617927457672096, 55.57505691895805), (37.60443099999999, 55.5757737568051), +(37.599683515869145, 55.57749105910326), (37.59754177842709, 55.57796291823627), (37.59625834786988, 55.57906686095235), (37.59501783265684, 55.57746616444403), (37.593090671936025, 55.57671634534502), (37.587018007904, 55.577944600233785), +(37.578692203704804, 55.57982895000019), (37.57327546607398, 55.58116294118248), (37.57385012109279, 55.581550362779), (37.57399562266922, 55.5820107079112), (37.5735356072979, 55.58226289171689), (37.57290393054962, 55.582393529795155), +(37.57037722355653, 55.581919415056234), (37.5592298306885, 55.584471614867844), (37.54189249206543, 55.58867650795186), (37.5297256269836, 55.59158133551745), (37.517837865081766, 55.59443656218868), (37.51200186508174, 55.59635625174229), +(37.506808949737554, 55.59907823904434), (37.49820432275389, 55.6062944994944), (37.494406071441674, 55.60967103463367), (37.494760001358024, 55.61066689753365), (37.49397137107085, 55.61220931698269), (37.49016528606031, 55.613417718449064), +(37.48773249206542, 55.61530616333343), (37.47921386508177, 55.622640129112334), (37.470652153442394, 55.62993723476164), (37.46273446298218, 55.6368075123157), (37.46350692265317, 55.64068225239439), (37.46050283203121, 55.640794546982576), +(37.457627470916734, 55.64118904154646), (37.450718034393326, 55.64690488145138), (37.44239252645875, 55.65397824729769), (37.434587576721185, 55.66053543155961), (37.43582144975277, 55.661693766520735), (37.43576786245721, 55.662755031737014), +(37.430982915344174, 55.664610641628116), (37.428547447097685, 55.66778515273695), (37.42945134592044, 55.668633314343566), (37.42859571562949, 55.66948145750025), (37.4262836402282, 55.670813882451405), (37.418709037048295, 55.6811141674414), +(37.41922139651101, 55.68235377885389), (37.419218771842885, 55.68359335082235), (37.417196501327446, 55.684375235224735), (37.41607020370478, 55.68540557585352), (37.415640857147146, 55.68686637150793), (37.414632153442334, 55.68903015131686), +(37.413344899475064, 55.690896881757396), (37.41171432275391, 55.69264232162232), (37.40948282275393, 55.69455101638112), (37.40703674603271, 55.69638690385348), (37.39607169577025, 55.70451821283731), (37.38952706878662, 55.70942491932811), +(37.387778313491815, 55.71149057784176), (37.39049275399779, 55.71419814298992), (37.385557272491454, 55.7155489617061), (37.38388335714726, 55.71849856042102), (37.378368238098155, 55.7292763261685), (37.37763597123337, 55.730845879211614), +(37.37890062088197, 55.73167906388319), (37.37750451918789, 55.734703664681774), (37.375610832015965, 55.734851959522246), (37.3723813571472, 55.74105626086403), (37.37014935714723, 55.746115620904355), (37.36944173016362, 55.750883999993725), +(37.36975304365541, 55.76335905525834), (37.37244070571134, 55.76432079697595), (37.3724259757175, 55.76636979670426), (37.369922155757884, 55.76735417953104), (37.369892695770275, 55.76823419316575), (37.370214730163575, 55.782312184391266), +(37.370493611114505, 55.78436801120489), (37.37120164550783, 55.78596427165359), (37.37284851456452, 55.7874378183096), (37.37608325135799, 55.7886695054807), (37.3764587460632, 55.78947647305964), (37.37530000265506, 55.79146512926804), +(37.38235915344241, 55.79899647809345), (37.384344043655396, 55.80113596939471), (37.38594269577028, 55.80322699999366), (37.38711208598329, 55.804919036911976), (37.3880239841309, 55.806610999993666), (37.38928977249147, 55.81001864976979), +(37.39038389947512, 55.81348641242801), (37.39235781481933, 55.81983538336746), (37.393709457672124, 55.82417822811877), (37.394685720901464, 55.82792275755836), (37.39557615344238, 55.830447148154136), (37.39844478226658, 55.83167107969975), +(37.40019761214057, 55.83151823557964), (37.400398790382326, 55.83264967594742), (37.39659544313046, 55.83322180909622), (37.39667059524539, 55.83402792148566), (37.39682089947515, 55.83638877400216), (37.39643489154053, 55.83861656112751), +(37.3955338994751, 55.84072348043264), (37.392680272491454, 55.84502158126453), (37.39241188227847, 55.84659117913199), (37.392529730163616, 55.84816071336481), (37.39486835714723, 55.85288092980303), (37.39873052645878, 55.859893456073635), +(37.40272161111449, 55.86441833633205), (37.40697072750854, 55.867579567544375), (37.410007082016016, 55.868369880337), (37.4120992989502, 55.86920843741314), (37.412668021163924, 55.87055369615854), (37.41482461111453, 55.87170587948249), +(37.41862266137694, 55.873183961039565), (37.42413732540892, 55.874879126654704), (37.4312182698669, 55.875614937236705), (37.43111093783558, 55.8762723478417), (37.43332105622856, 55.87706546369396), (37.43385747619623, 55.87790681284802), +(37.441303050262405, 55.88027084462084), (37.44747234260555, 55.87942070143253), (37.44716141796871, 55.88072960917233), (37.44769797085568, 55.88121221323979), (37.45204320500181, 55.882080694420715), (37.45673176190186, 55.882346110794586), +(37.463383999999984, 55.88252729504517), (37.46682797486874, 55.88294937719063), (37.470014457672086, 55.88361266759345), (37.47751410450743, 55.88546991372396), (37.47860317658232, 55.88534929207307), (37.48165826025772, 55.882563306475106), +(37.48316434442331, 55.8815803226785), (37.483831555817645, 55.882427612793315), (37.483182967125686, 55.88372791409729), (37.483092277908824, 55.88495581062434), (37.4855716508179, 55.8875561994203), (37.486440636245746, 55.887827444039566), +(37.49014203439328, 55.88897899871799), (37.493210285705544, 55.890208937135604), (37.497512451065035, 55.891342397444696), (37.49780744510645, 55.89174030252967), (37.49940333499519, 55.89239745507079), (37.50018383334346, 55.89339220941865), +(37.52421672750851, 55.903869074155224), (37.52977457672118, 55.90564076517974), (37.53503220370484, 55.90661661218259), (37.54042858064267, 55.90714113744566), (37.54320461007303, 55.905645048442985), (37.545686966066306, 55.906608607018505), +(37.54743976120755, 55.90788552162358), (37.55796999999999, 55.90901557907218), (37.572711542327866, 55.91059395704873), (37.57942799999998, 55.91073854155573), (37.58502865872187, 55.91009969268444), (37.58739968913264, 55.90794809960554), +(37.59131567193598, 55.908713267595054), (37.612687423278814, 55.902866854295375), (37.62348079629517, 55.90041967242986), (37.635797880950896, 55.898141151686396), (37.649487626983664, 55.89639275532968), (37.65619302513125, 55.89572360207488), +(37.66294133862307, 55.895295577183965), (37.66874564418033, 55.89505457604897), (37.67375601586915, 55.89254677027454), (37.67744661901856, 55.8947775867987), (37.688347, 55.89450045676125), (37.69480554232789, 55.89422926332761), +(37.70107096560668, 55.89322256101114), (37.705962965606716, 55.891763491662616), (37.711885134918205, 55.889110234998974), (37.71682005026245, 55.886577568759876), (37.7199315476074, 55.88458159806678), (37.72234560316464, 55.882281005794134), +(37.72364385977171, 55.8809452036196), (37.725371142837474, 55.8809722706006), (37.727870902099546, 55.88037213862385), (37.73394330422971, 55.877941504088696), (37.745339592590376, 55.87208120378722), (37.75525267724611, 55.86703807949492), +(37.76919976190188, 55.859821640197474), (37.827835219574, 55.82962968399116), (37.83341438888553, 55.82575289922351), (37.83652584655761, 55.82188784027888), (37.83809213491821, 55.81612575504693), (37.83605359521481, 55.81460347077685), +(37.83632178569025, 55.81276696067908), (37.838623105812026, 55.811486181656385), (37.83912198147584, 55.807329380532785), (37.839079078033414, 55.80510270463816), (37.83965844708251, 55.79940712529036), (37.840581150787344, 55.79131399999368), +(37.84172564285271, 55.78000432402266)]); +``` + +3. 检查莫斯科有多少个蜂窝信号塔: + +```sql +SELECT count() FROM cell_towers +WHERE pointInPolygon((lon, lat), (SELECT * FROM moscow)) +``` +```response +┌─count()─┐ +│ 310463 │ +└─────────┘ + +1 rows in set. Elapsed: 0.067 sec. Processed 43.28 million rows, 692.42 MB (645.83 million rows/s., 10.33 GB/s.) +``` + +虽然不能创建临时表,但此数据集仍可在 [Playground](https://play.clickhouse.com/play?user=play) 中进行交互式的请求, [example](https://play.clickhouse.com/play?user=play#U0VMRUNUIG1jYywgY291bnQoKSBGUk9NIGNlbGxfdG93ZXJzIEdST1VQIEJZIG1jYyBPUkRFUiBCWSBjb3VudCgpIERFU0M=). - diff --git a/docs/zh/getting-started/example-datasets/menus.mdx b/docs/zh/getting-started/example-datasets/menus.mdx index 250b8a4cd37..10e9f2bd318 100644 --- a/docs/zh/getting-started/example-datasets/menus.mdx +++ b/docs/zh/getting-started/example-datasets/menus.mdx @@ -1,9 +1,352 @@ ---- -slug: /zh/getting-started/example-datasets/menus -sidebar_label: New York Public Library "What's on the Menu?" Dataset -title: "New York Public Library \"What's on the Menu?\" Dataset" +--- +slug: /zh/getting-started/example-datasets/menus +sidebar_label: '纽约公共图书馆“菜单上有什么?”数据集' +title: '纽约公共图书馆“菜单上有什么?”数据集' --- -import Content from '@site/docs/en/getting-started/example-datasets/menus.md'; +该数据集由纽约公共图书馆创建。其中含有有关酒店、餐馆和咖啡馆的菜单上的菜肴及其价格的历史数据。 - +来源:http://menus.nypl.org/data +数据为开放数据。 + +数据来自于图书馆中的档案,因此可能不完整,以至于难以进行统计分析。尽管如此,该数据集也是非常有意思的。数据集中只有 130 万条关于菜单中的菜肴的记录 - 这对于 ClickHouse 来说是一个非常小的数据量,但这仍是一个很好的例子。 + +## 下载数据集 {#download-dataset} + +运行命令: + +```bash +wget https://s3.amazonaws.com/menusdata.nypl.org/gzips/2021_08_01_07_01_17_data.tgz +``` + +如果有需要可以使用 http://menus.nypl.org/data 中的最新链接。下载的大小约为 35 MB。 + +## 解压数据集 {#unpack-dataset} + +```bash +tar xvf 2021_08_01_07_01_17_data.tgz +``` + +解压后的的大小约为 150 MB。 + +数据集由四个表组成: + +- `Menu` - 有关菜单的信息,其中包含:餐厅名称,看到菜单的日期等 +- `Dish` - 有关菜肴的信息,其中包含:菜肴名称以及一些特征。 +- `MenuPage` - 有关菜单中页面的信息,每个页面都属于某个 `Menu`。 +- `MenuItem` - 菜单项。某个菜单页面上的菜肴及其价格:指向 `Dish` 和 `MenuPage`的链接。 + +## 创建表 {#create-tables} + +使用 [Decimal](/docs/zh/sql-reference/data-types/decimal.md) 数据类型来存储价格。 + +```sql +CREATE TABLE dish +( + id UInt32, + name String, + description String, + menus_appeared UInt32, + times_appeared Int32, + first_appeared UInt16, + last_appeared UInt16, + lowest_price Decimal64(3), + highest_price Decimal64(3) +) ENGINE = MergeTree ORDER BY id; + +CREATE TABLE menu +( + id UInt32, + name String, + sponsor String, + event String, + venue String, + place String, + physical_description String, + occasion String, + notes String, + call_number String, + keywords String, + language String, + date String, + location String, + location_type String, + currency String, + currency_symbol String, + status String, + page_count UInt16, + dish_count UInt16 +) ENGINE = MergeTree ORDER BY id; + +CREATE TABLE menu_page +( + id UInt32, + menu_id UInt32, + page_number UInt16, + image_id String, + full_height UInt16, + full_width UInt16, + uuid UUID +) ENGINE = MergeTree ORDER BY id; + +CREATE TABLE menu_item +( + id UInt32, + menu_page_id UInt32, + price Decimal64(3), + high_price Decimal64(3), + dish_id UInt32, + created_at DateTime, + updated_at DateTime, + xpos Float64, + ypos Float64 +) ENGINE = MergeTree ORDER BY id; +``` + +## 导入数据 {#import-data} + +执行以下命令将数据导入 ClickHouse: + +```bash +clickhouse-client --format_csv_allow_single_quotes 0 --input_format_null_as_default 0 --query "INSERT INTO dish FORMAT CSVWithNames" < Dish.csv +clickhouse-client --format_csv_allow_single_quotes 0 --input_format_null_as_default 0 --query "INSERT INTO menu FORMAT CSVWithNames" < Menu.csv +clickhouse-client --format_csv_allow_single_quotes 0 --input_format_null_as_default 0 --query "INSERT INTO menu_page FORMAT CSVWithNames" < MenuPage.csv +clickhouse-client --format_csv_allow_single_quotes 0 --input_format_null_as_default 0 --date_time_input_format best_effort --query "INSERT INTO menu_item FORMAT CSVWithNames" < MenuItem.csv +``` + +因为数据由带有标题的 CSV 表示,所以使用 [CSVWithNames](/docs/zh/interfaces/formats.md#csvwithnames) 格式。 + +因为只有双引号用于数据字段,单引号可以在值内,所以禁用了 `format_csv_allow_single_quotes` 以避免混淆 CSV 解析器。 + +因为数据中没有 [NULL](/docs/zh/sql-reference/syntax.md#null-literal) 值,所以禁用 [input_format_null_as_default](/docs/zh/operations/settings/settings.md#settings-input-format-null-as-default)。不然 ClickHouse 将会尝试解析 `\N` 序列,并可能与数据中的 `\` 混淆。 + +设置 [date_time_input_format best_effort](/docs/zh/operations/settings/settings.md#settings-date_time_input_format) 以便解析各种格式的 [DateTime](/docs/zh/sql-reference/data-types/datetime.md)字段。例如,识别像“2000-01-01 01:02”这样没有秒数的 ISO-8601 时间字符串。如果没有此设置,则仅允许使用固定的 DateTime 格式。 + +## 非规范化数据 {#denormalize-data} + +数据以 [规范化形式] (https://en.wikipedia.org/wiki/Database_normalization#Normal_forms) 在多个表格中呈现。这意味着如果你想进行如查询菜单项中的菜名这类的查询,则必须执行 [JOIN](/docs/zh/sql-reference/statements/select/join.md#select-join)。在典型的分析任务中,预先处理联接的数据以避免每次都执行“联接”会更有效率。这中操作被称为“非规范化”数据。 + +我们将创建一个表“menu_item_denorm”,其中将包含所有联接在一起的数据: + +```sql +CREATE TABLE menu_item_denorm +ENGINE = MergeTree ORDER BY (dish_name, created_at) +AS SELECT + price, + high_price, + created_at, + updated_at, + xpos, + ypos, + dish.id AS dish_id, + dish.name AS dish_name, + dish.description AS dish_description, + dish.menus_appeared AS dish_menus_appeared, + dish.times_appeared AS dish_times_appeared, + dish.first_appeared AS dish_first_appeared, + dish.last_appeared AS dish_last_appeared, + dish.lowest_price AS dish_lowest_price, + dish.highest_price AS dish_highest_price, + menu.id AS menu_id, + menu.name AS menu_name, + menu.sponsor AS menu_sponsor, + menu.event AS menu_event, + menu.venue AS menu_venue, + menu.place AS menu_place, + menu.physical_description AS menu_physical_description, + menu.occasion AS menu_occasion, + menu.notes AS menu_notes, + menu.call_number AS menu_call_number, + menu.keywords AS menu_keywords, + menu.language AS menu_language, + menu.date AS menu_date, + menu.location AS menu_location, + menu.location_type AS menu_location_type, + menu.currency AS menu_currency, + menu.currency_symbol AS menu_currency_symbol, + menu.status AS menu_status, + menu.page_count AS menu_page_count, + menu.dish_count AS menu_dish_count +FROM menu_item + JOIN dish ON menu_item.dish_id = dish.id + JOIN menu_page ON menu_item.menu_page_id = menu_page.id + JOIN menu ON menu_page.menu_id = menu.id; +``` + +## 验证数据 {#validate-data} + +请求: + +```sql +SELECT count() FROM menu_item_denorm; +``` + +结果: + +```text +┌─count()─┐ +│ 1329175 │ +└─────────┘ +``` + +## 运行一些查询 {#run-queries} + +### 菜品的平均历史价格 {#query-averaged-historical-prices} + +请求: + +```sql +SELECT + round(toUInt32OrZero(extract(menu_date, '^\\d{4}')), -1) AS d, + count(), + round(avg(price), 2), + bar(avg(price), 0, 100, 100) +FROM menu_item_denorm +WHERE (menu_currency = 'Dollars') AND (d > 0) AND (d < 2022) +GROUP BY d +ORDER BY d ASC; +``` + +结果: + +```text +┌────d─┬─count()─┬─round(avg(price), 2)─┬─bar(avg(price), 0, 100, 100)─┐ +│ 1850 │ 618 │ 1.5 │ █▍ │ +│ 1860 │ 1634 │ 1.29 │ █▎ │ +│ 1870 │ 2215 │ 1.36 │ █▎ │ +│ 1880 │ 3909 │ 1.01 │ █ │ +│ 1890 │ 8837 │ 1.4 │ █▍ │ +│ 1900 │ 176292 │ 0.68 │ ▋ │ +│ 1910 │ 212196 │ 0.88 │ ▊ │ +│ 1920 │ 179590 │ 0.74 │ ▋ │ +│ 1930 │ 73707 │ 0.6 │ ▌ │ +│ 1940 │ 58795 │ 0.57 │ ▌ │ +│ 1950 │ 41407 │ 0.95 │ ▊ │ +│ 1960 │ 51179 │ 1.32 │ █▎ │ +│ 1970 │ 12914 │ 1.86 │ █▋ │ +│ 1980 │ 7268 │ 4.35 │ ████▎ │ +│ 1990 │ 11055 │ 6.03 │ ██████ │ +│ 2000 │ 2467 │ 11.85 │ ███████████▋ │ +│ 2010 │ 597 │ 25.66 │ █████████████████████████▋ │ +└──────┴─────────┴──────────────────────┴──────────────────────────────┘ +``` + +带上一粒盐。 + +### 汉堡价格 {#query-burger-prices} + +请求: + +```sql +SELECT + round(toUInt32OrZero(extract(menu_date, '^\\d{4}')), -1) AS d, + count(), + round(avg(price), 2), + bar(avg(price), 0, 50, 100) +FROM menu_item_denorm +WHERE (menu_currency = 'Dollars') AND (d > 0) AND (d < 2022) AND (dish_name ILIKE '%burger%') +GROUP BY d +ORDER BY d ASC; +``` + +结果: + +```text +┌────d─┬─count()─┬─round(avg(price), 2)─┬─bar(avg(price), 0, 50, 100)───────────┐ +│ 1880 │ 2 │ 0.42 │ ▋ │ +│ 1890 │ 7 │ 0.85 │ █▋ │ +│ 1900 │ 399 │ 0.49 │ ▊ │ +│ 1910 │ 589 │ 0.68 │ █▎ │ +│ 1920 │ 280 │ 0.56 │ █ │ +│ 1930 │ 74 │ 0.42 │ ▋ │ +│ 1940 │ 119 │ 0.59 │ █▏ │ +│ 1950 │ 134 │ 1.09 │ ██▏ │ +│ 1960 │ 272 │ 0.92 │ █▋ │ +│ 1970 │ 108 │ 1.18 │ ██▎ │ +│ 1980 │ 88 │ 2.82 │ █████▋ │ +│ 1990 │ 184 │ 3.68 │ ███████▎ │ +│ 2000 │ 21 │ 7.14 │ ██████████████▎ │ +│ 2010 │ 6 │ 18.42 │ ████████████████████████████████████▋ │ +└──────┴─────────┴──────────────────────┴───────────────────────────────────────┘ +``` + +###伏特加{#query-vodka} + +请求: + +```sql +SELECT + round(toUInt32OrZero(extract(menu_date, '^\\d{4}')), -1) AS d, + count(), + round(avg(price), 2), + bar(avg(price), 0, 50, 100) +FROM menu_item_denorm +WHERE (menu_currency IN ('Dollars', '')) AND (d > 0) AND (d < 2022) AND (dish_name ILIKE '%vodka%') +GROUP BY d +ORDER BY d ASC; +``` + +结果: + +```text +┌────d─┬─count()─┬─round(avg(price), 2)─┬─bar(avg(price), 0, 50, 100)─┐ +│ 1910 │ 2 │ 0 │ │ +│ 1920 │ 1 │ 0.3 │ ▌ │ +│ 1940 │ 21 │ 0.42 │ ▋ │ +│ 1950 │ 14 │ 0.59 │ █▏ │ +│ 1960 │ 113 │ 2.17 │ ████▎ │ +│ 1970 │ 37 │ 0.68 │ █▎ │ +│ 1980 │ 19 │ 2.55 │ █████ │ +│ 1990 │ 86 │ 3.6 │ ███████▏ │ +│ 2000 │ 2 │ 3.98 │ ███████▊ │ +└──────┴─────────┴──────────────────────┴─────────────────────────────┘ +``` + +要查询 `Vodka`,必须声明通过 `ILIKE '%vodka%'` 进行查询。 + +### 鱼子酱 {#query-caviar} + +列出鱼子酱的价格。另外,列出任何带有鱼子酱的菜肴的名称。 + +请求: + +```sql +SELECT + round(toUInt32OrZero(extract(menu_date, '^\\d{4}')), -1) AS d, + count(), + round(avg(price), 2), + bar(avg(price), 0, 50, 100), + any(dish_name) +FROM menu_item_denorm +WHERE (menu_currency IN ('Dollars', '')) AND (d > 0) AND (d < 2022) AND (dish_name ILIKE '%caviar%') +GROUP BY d +ORDER BY d ASC; +``` + +结果: + +```text +┌────d─┬─count()─┬─round(avg(price), 2)─┬─bar(avg(price), 0, 50, 100)──────┬─any(dish_name)──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ +│ 1090 │ 1 │ 0 │ │ Caviar │ +│ 1880 │ 3 │ 0 │ │ Caviar │ +│ 1890 │ 39 │ 0.59 │ █▏ │ Butter and caviar │ +│ 1900 │ 1014 │ 0.34 │ ▋ │ Anchovy Caviar on Toast │ +│ 1910 │ 1588 │ 1.35 │ ██▋ │ 1/1 Brötchen Caviar │ +│ 1920 │ 927 │ 1.37 │ ██▋ │ ASTRAKAN CAVIAR │ +│ 1930 │ 289 │ 1.91 │ ███▋ │ Astrachan caviar │ +│ 1940 │ 201 │ 0.83 │ █▋ │ (SPECIAL) Domestic Caviar Sandwich │ +│ 1950 │ 81 │ 2.27 │ ████▌ │ Beluga Caviar │ +│ 1960 │ 126 │ 2.21 │ ████▍ │ Beluga Caviar │ +│ 1970 │ 105 │ 0.95 │ █▊ │ BELUGA MALOSSOL CAVIAR AMERICAN DRESSING │ +│ 1980 │ 12 │ 7.22 │ ██████████████▍ │ Authentic Iranian Beluga Caviar the world's finest black caviar presented in ice garni and a sampling of chilled 100° Russian vodka │ +│ 1990 │ 74 │ 14.42 │ ████████████████████████████▋ │ Avocado Salad, Fresh cut avocado with caviare │ +│ 2000 │ 3 │ 7.82 │ ███████████████▋ │ Aufgeschlagenes Kartoffelsueppchen mit Forellencaviar │ +│ 2010 │ 6 │ 15.58 │ ███████████████████████████████▏ │ "OYSTERS AND PEARLS" "Sabayon" of Pearl Tapioca with Island Creek Oysters and Russian Sevruga Caviar │ +└──────┴─────────┴──────────────────────┴──────────────────────────────────┴─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ +``` + +至少他们有伏特加配鱼子酱。真棒。 + +## 在线 Playground{#playground} + +此数据集已经上传到了 ClickHouse Playground 中,[example](https://play.clickhouse.com/play?user=play#U0VMRUNUCiAgICByb3VuZCh0b1VJbnQzMk9yWmVybyhleHRyYWN0KG1lbnVfZGF0ZSwgJ15cXGR7NH0nKSksIC0xKSBBUyBkLAogICAgY291bnQoKSwKICAgIHJvdW5kKGF2ZyhwcmljZSksIDIpLAogICAgYmFyKGF2ZyhwcmljZSksIDAsIDUwLCAxMDApLAogICAgYW55KGRpc2hfbmFtZSkKRlJPTSBtZW51X2l0ZW1fZGVub3JtCldIRVJFIChtZW51X2N1cnJlbmN5IElOICgnRG9sbGFycycsICcnKSkgQU5EIChkID4gMCkgQU5EIChkIDwgMjAyMikgQU5EIChkaXNoX25hbWUgSUxJS0UgJyVjYXZpYXIlJykKR1JPVVAgQlkgZApPUkRFUiBCWSBkIEFTQw==)。 diff --git a/docs/zh/getting-started/example-datasets/opensky.mdx b/docs/zh/getting-started/example-datasets/opensky.mdx index e8d5367e970..92cd104e06e 100644 --- a/docs/zh/getting-started/example-datasets/opensky.mdx +++ b/docs/zh/getting-started/example-datasets/opensky.mdx @@ -1,9 +1,416 @@ ---- +--- slug: /zh/getting-started/example-datasets/opensky -sidebar_label: Air Traffic Data -title: "Crowdsourced air traffic data from The OpenSky Network 2020" +sidebar_label: 空中交通数据 +description: 该数据集中的数据是从完整的 OpenSky 数据集中衍生而来的,对其中的数据进行了必要的清理,用以展示在 COVID-19 期间空中交通的发展。 +title: "来自 The OpenSky Network 2020 的众包空中交通数据" --- -import Content from '@site/docs/en/getting-started/example-datasets/opensky.md'; +该数据集中的数据是从完整的 OpenSky 数据集中派生和清理的,以说明 COVID-19 大流行期间空中交通的发展。它涵盖了自 2019 年 1 月 1 日以来该网络中 2500 多名成员观测到的所有航班。直到 COVID-19 大流行结束,更多数据将定期的更新到数据集中。 - +来源:https://zenodo.org/record/5092942#.YRBCyTpRXYd + +Martin Strohmeier、Xavier Olive、Jannis Lübbe、Matthias Schäfer 和 Vincent Lenders “来自 OpenSky 网络 2019-2020 的众包空中交通数据”地球系统科学数据 13(2),2021 https://doi.org/10.5194/essd- 13-357-2021 + +## 下载数据集 {#download-dataset} + +运行命令: + +```bash +wget -O- https://zenodo.org/record/5092942 | grep -oP 'https://zenodo.org/record/5092942/files/flightlist_\d+_\d+\.csv\.gz' | xargs wget +``` + +Download will take about 2 minutes with good internet connection. There are 30 files with total size of 4.3 GB. + +## 创建表 {#create-table} + +```sql +CREATE TABLE opensky +( + callsign String, + number String, + icao24 String, + registration String, + typecode String, + origin String, + destination String, + firstseen DateTime, + lastseen DateTime, + day DateTime, + latitude_1 Float64, + longitude_1 Float64, + altitude_1 Float64, + latitude_2 Float64, + longitude_2 Float64, + altitude_2 Float64 +) ENGINE = MergeTree ORDER BY (origin, destination, callsign); +``` + +## 导入数据 {#import-data} + +将数据并行导入到 ClickHouse: + +```bash +ls -1 flightlist_*.csv.gz | xargs -P100 -I{} bash -c 'gzip -c -d "{}" | clickhouse-client --date_time_input_format best_effort --query "INSERT INTO opensky FORMAT CSVWithNames"' +``` + +- 这里我们将文件列表(`ls -1 flightlist_*.csv.gz`)传递给`xargs`以进行并行处理。 `xargs -P100` 指定最多使用 100 个并行工作程序,但由于我们只有 30 个文件,工作程序的数量将只有 30 个。 +- 对于每个文件,`xargs` 将通过 `bash -c` 为每个文件运行一个脚本文件。该脚本通过使用 `{}` 表示文件名占位符,然后 `xargs` 由命令进行填充(使用 `-I{}`)。 +- 该脚本会将文件 (`gzip -c -d "{}"`) 解压缩到标准输出(`-c` 参数),并将输出重定向到 `clickhouse-client`。 +- 我们还要求使用扩展解析器解析 [DateTime](../../sql-reference/data-types/datetime.md) 字段 ([--date_time_input_format best_effort](../../operations/settings/ settings.md#settings-date_time_input_format)) 以识别具有时区偏移的 ISO-8601 格式。 + +最后,`clickhouse-client` 会以 [CSVWithNames](../../interfaces/formats.md#csvwithnames) 格式读取输入数据然后执行插入。 + +并行导入需要 24 秒。 + +如果您不想使用并行导入,以下是顺序导入的方式: + +```bash +for file in flightlist_*.csv.gz; do gzip -c -d "$file" | clickhouse-client --date_time_input_format best_effort --query "INSERT INTO opensky FORMAT CSVWithNames"; done +``` + +## 验证数据 {#validate-data} + +请求: + +```sql +SELECT count() FROM opensky; +``` + +结果: + +```text +┌──count()─┐ +│ 66010819 │ +└──────────┘ +``` + +ClickHouse 中的数据集大小只有 2.66 GiB,检查一下。 + +请求: + +```sql +SELECT formatReadableSize(total_bytes) FROM system.tables WHERE name = 'opensky'; +``` + +结果: + +```text +┌─formatReadableSize(total_bytes)─┐ +│ 2.66 GiB │ +└─────────────────────────────────┘ +``` + +## 运行一些查询 {#run-queries} + +总行驶距离为 680 亿公里。 + +请求: + +```sql +SELECT formatReadableQuantity(sum(geoDistance(longitude_1, latitude_1, longitude_2, latitude_2)) / 1000) FROM opensky; +``` + +结果: + +```text +┌─formatReadableQuantity(divide(sum(geoDistance(longitude_1, latitude_1, longitude_2, latitude_2)), 1000))─┐ +│ 68.72 billion │ +└──────────────────────────────────────────────────────────────────────────────────────────────────────────┘ +``` + +平均飞行距离约为 1000 公里。 + +请求: + +```sql +SELECT avg(geoDistance(longitude_1, latitude_1, longitude_2, latitude_2)) FROM opensky; +``` + +结果: + +```text +┌─avg(geoDistance(longitude_1, latitude_1, longitude_2, latitude_2))─┐ +│ 1041090.6465708319 │ +└────────────────────────────────────────────────────────────────────┘ +``` + +### 最繁忙的始发机场和观测到的平均距离{#busy-airports-average-distance} + +请求: + +```sql +SELECT + origin, + count(), + round(avg(geoDistance(longitude_1, latitude_1, longitude_2, latitude_2))) AS distance, + bar(distance, 0, 10000000, 100) AS bar +FROM opensky +WHERE origin != '' +GROUP BY origin +ORDER BY count() DESC +LIMIT 100; +``` + +结果: + +```text + ┌─origin─┬─count()─┬─distance─┬─bar────────────────────────────────────┐ + 1. │ KORD │ 745007 │ 1546108 │ ███████████████▍ │ + 2. │ KDFW │ 696702 │ 1358721 │ █████████████▌ │ + 3. │ KATL │ 667286 │ 1169661 │ ███████████▋ │ + 4. │ KDEN │ 582709 │ 1287742 │ ████████████▊ │ + 5. │ KLAX │ 581952 │ 2628393 │ ██████████████████████████▎ │ + 6. │ KLAS │ 447789 │ 1336967 │ █████████████▎ │ + 7. │ KPHX │ 428558 │ 1345635 │ █████████████▍ │ + 8. │ KSEA │ 412592 │ 1757317 │ █████████████████▌ │ + 9. │ KCLT │ 404612 │ 880355 │ ████████▋ │ + 10. │ VIDP │ 363074 │ 1445052 │ ██████████████▍ │ + 11. │ EDDF │ 362643 │ 2263960 │ ██████████████████████▋ │ + 12. │ KSFO │ 361869 │ 2445732 │ ████████████████████████▍ │ + 13. │ KJFK │ 349232 │ 2996550 │ █████████████████████████████▊ │ + 14. │ KMSP │ 346010 │ 1287328 │ ████████████▋ │ + 15. │ LFPG │ 344748 │ 2206203 │ ██████████████████████ │ + 16. │ EGLL │ 341370 │ 3216593 │ ████████████████████████████████▏ │ + 17. │ EHAM │ 340272 │ 2116425 │ █████████████████████▏ │ + 18. │ KEWR │ 337696 │ 1826545 │ ██████████████████▎ │ + 19. │ KPHL │ 320762 │ 1291761 │ ████████████▊ │ + 20. │ OMDB │ 308855 │ 2855706 │ ████████████████████████████▌ │ + 21. │ UUEE │ 307098 │ 1555122 │ ███████████████▌ │ + 22. │ KBOS │ 304416 │ 1621675 │ ████████████████▏ │ + 23. │ LEMD │ 291787 │ 1695097 │ ████████████████▊ │ + 24. │ YSSY │ 272979 │ 1875298 │ ██████████████████▋ │ + 25. │ KMIA │ 265121 │ 1923542 │ ███████████████████▏ │ + 26. │ ZGSZ │ 263497 │ 745086 │ ███████▍ │ + 27. │ EDDM │ 256691 │ 1361453 │ █████████████▌ │ + 28. │ WMKK │ 254264 │ 1626688 │ ████████████████▎ │ + 29. │ CYYZ │ 251192 │ 2175026 │ █████████████████████▋ │ + 30. │ KLGA │ 248699 │ 1106935 │ ███████████ │ + 31. │ VHHH │ 248473 │ 3457658 │ ██████████████████████████████████▌ │ + 32. │ RJTT │ 243477 │ 1272744 │ ████████████▋ │ + 33. │ KBWI │ 241440 │ 1187060 │ ███████████▋ │ + 34. │ KIAD │ 239558 │ 1683485 │ ████████████████▋ │ + 35. │ KIAH │ 234202 │ 1538335 │ ███████████████▍ │ + 36. │ KFLL │ 223447 │ 1464410 │ ██████████████▋ │ + 37. │ KDAL │ 212055 │ 1082339 │ ██████████▋ │ + 38. │ KDCA │ 207883 │ 1013359 │ ██████████▏ │ + 39. │ LIRF │ 207047 │ 1427965 │ ██████████████▎ │ + 40. │ PANC │ 206007 │ 2525359 │ █████████████████████████▎ │ + 41. │ LTFJ │ 205415 │ 860470 │ ████████▌ │ + 42. │ KDTW │ 204020 │ 1106716 │ ███████████ │ + 43. │ VABB │ 201679 │ 1300865 │ █████████████ │ + 44. │ OTHH │ 200797 │ 3759544 │ █████████████████████████████████████▌ │ + 45. │ KMDW │ 200796 │ 1232551 │ ████████████▎ │ + 46. │ KSAN │ 198003 │ 1495195 │ ██████████████▊ │ + 47. │ KPDX │ 197760 │ 1269230 │ ████████████▋ │ + 48. │ SBGR │ 197624 │ 2041697 │ ████████████████████▍ │ + 49. │ VOBL │ 189011 │ 1040180 │ ██████████▍ │ + 50. │ LEBL │ 188956 │ 1283190 │ ████████████▋ │ + 51. │ YBBN │ 188011 │ 1253405 │ ████████████▌ │ + 52. │ LSZH │ 187934 │ 1572029 │ ███████████████▋ │ + 53. │ YMML │ 187643 │ 1870076 │ ██████████████████▋ │ + 54. │ RCTP │ 184466 │ 2773976 │ ███████████████████████████▋ │ + 55. │ KSNA │ 180045 │ 778484 │ ███████▋ │ + 56. │ EGKK │ 176420 │ 1694770 │ ████████████████▊ │ + 57. │ LOWW │ 176191 │ 1274833 │ ████████████▋ │ + 58. │ UUDD │ 176099 │ 1368226 │ █████████████▋ │ + 59. │ RKSI │ 173466 │ 3079026 │ ██████████████████████████████▋ │ + 60. │ EKCH │ 172128 │ 1229895 │ ████████████▎ │ + 61. │ KOAK │ 171119 │ 1114447 │ ███████████▏ │ + 62. │ RPLL │ 170122 │ 1440735 │ ██████████████▍ │ + 63. │ KRDU │ 167001 │ 830521 │ ████████▎ │ + 64. │ KAUS │ 164524 │ 1256198 │ ████████████▌ │ + 65. │ KBNA │ 163242 │ 1022726 │ ██████████▏ │ + 66. │ KSDF │ 162655 │ 1380867 │ █████████████▋ │ + 67. │ ENGM │ 160732 │ 910108 │ █████████ │ + 68. │ LIMC │ 160696 │ 1564620 │ ███████████████▋ │ + 69. │ KSJC │ 159278 │ 1081125 │ ██████████▋ │ + 70. │ KSTL │ 157984 │ 1026699 │ ██████████▎ │ + 71. │ UUWW │ 156811 │ 1261155 │ ████████████▌ │ + 72. │ KIND │ 153929 │ 987944 │ █████████▊ │ + 73. │ ESSA │ 153390 │ 1203439 │ ████████████ │ + 74. │ KMCO │ 153351 │ 1508657 │ ███████████████ │ + 75. │ KDVT │ 152895 │ 74048 │ ▋ │ + 76. │ VTBS │ 152645 │ 2255591 │ ██████████████████████▌ │ + 77. │ CYVR │ 149574 │ 2027413 │ ████████████████████▎ │ + 78. │ EIDW │ 148723 │ 1503985 │ ███████████████ │ + 79. │ LFPO │ 143277 │ 1152964 │ ███████████▌ │ + 80. │ EGSS │ 140830 │ 1348183 │ █████████████▍ │ + 81. │ KAPA │ 140776 │ 420441 │ ████▏ │ + 82. │ KHOU │ 138985 │ 1068806 │ ██████████▋ │ + 83. │ KTPA │ 138033 │ 1338223 │ █████████████▍ │ + 84. │ KFFZ │ 137333 │ 55397 │ ▌ │ + 85. │ NZAA │ 136092 │ 1581264 │ ███████████████▋ │ + 86. │ YPPH │ 133916 │ 1271550 │ ████████████▋ │ + 87. │ RJBB │ 133522 │ 1805623 │ ██████████████████ │ + 88. │ EDDL │ 133018 │ 1265919 │ ████████████▋ │ + 89. │ ULLI │ 130501 │ 1197108 │ ███████████▊ │ + 90. │ KIWA │ 127195 │ 250876 │ ██▌ │ + 91. │ KTEB │ 126969 │ 1189414 │ ███████████▊ │ + 92. │ VOMM │ 125616 │ 1127757 │ ███████████▎ │ + 93. │ LSGG │ 123998 │ 1049101 │ ██████████▍ │ + 94. │ LPPT │ 122733 │ 1779187 │ █████████████████▋ │ + 95. │ WSSS │ 120493 │ 3264122 │ ████████████████████████████████▋ │ + 96. │ EBBR │ 118539 │ 1579939 │ ███████████████▋ │ + 97. │ VTBD │ 118107 │ 661627 │ ██████▌ │ + 98. │ KVNY │ 116326 │ 692960 │ ██████▊ │ + 99. │ EDDT │ 115122 │ 941740 │ █████████▍ │ +100. │ EFHK │ 114860 │ 1629143 │ ████████████████▎ │ + └────────┴─────────┴──────────┴────────────────────────────────────────┘ +``` + +### 每周来自莫斯科三个主要机场的航班数量 {#flights-from-moscow} + +请求: + +```sql +SELECT + toMonday(day) AS k, + count() AS c, + bar(c, 0, 10000, 100) AS bar +FROM opensky +WHERE origin IN ('UUEE', 'UUDD', 'UUWW') +GROUP BY k +ORDER BY k ASC; +``` + +结果: + +```text + ┌──────────k─┬────c─┬─bar──────────────────────────────────────────────────────────────────────────┐ + 1. │ 2018-12-31 │ 5248 │ ████████████████████████████████████████████████████▍ │ + 2. │ 2019-01-07 │ 6302 │ ███████████████████████████████████████████████████████████████ │ + 3. │ 2019-01-14 │ 5701 │ █████████████████████████████████████████████████████████ │ + 4. │ 2019-01-21 │ 5638 │ ████████████████████████████████████████████████████████▍ │ + 5. │ 2019-01-28 │ 5731 │ █████████████████████████████████████████████████████████▎ │ + 6. │ 2019-02-04 │ 5683 │ ████████████████████████████████████████████████████████▋ │ + 7. │ 2019-02-11 │ 5759 │ █████████████████████████████████████████████████████████▌ │ + 8. │ 2019-02-18 │ 5736 │ █████████████████████████████████████████████████████████▎ │ + 9. │ 2019-02-25 │ 5873 │ ██████████████████████████████████████████████████████████▋ │ + 10. │ 2019-03-04 │ 5965 │ ███████████████████████████████████████████████████████████▋ │ + 11. │ 2019-03-11 │ 5900 │ ███████████████████████████████████████████████████████████ │ + 12. │ 2019-03-18 │ 5823 │ ██████████████████████████████████████████████████████████▏ │ + 13. │ 2019-03-25 │ 5899 │ ██████████████████████████████████████████████████████████▊ │ + 14. │ 2019-04-01 │ 6043 │ ████████████████████████████████████████████████████████████▍ │ + 15. │ 2019-04-08 │ 6098 │ ████████████████████████████████████████████████████████████▊ │ + 16. │ 2019-04-15 │ 6196 │ █████████████████████████████████████████████████████████████▊ │ + 17. │ 2019-04-22 │ 6486 │ ████████████████████████████████████████████████████████████████▋ │ + 18. │ 2019-04-29 │ 6682 │ ██████████████████████████████████████████████████████████████████▋ │ + 19. │ 2019-05-06 │ 6739 │ ███████████████████████████████████████████████████████████████████▍ │ + 20. │ 2019-05-13 │ 6600 │ ██████████████████████████████████████████████████████████████████ │ + 21. │ 2019-05-20 │ 6575 │ █████████████████████████████████████████████████████████████████▋ │ + 22. │ 2019-05-27 │ 6786 │ ███████████████████████████████████████████████████████████████████▋ │ + 23. │ 2019-06-03 │ 6872 │ ████████████████████████████████████████████████████████████████████▋ │ + 24. │ 2019-06-10 │ 7045 │ ██████████████████████████████████████████████████████████████████████▍ │ + 25. │ 2019-06-17 │ 7045 │ ██████████████████████████████████████████████████████████████████████▍ │ + 26. │ 2019-06-24 │ 6852 │ ████████████████████████████████████████████████████████████████████▌ │ + 27. │ 2019-07-01 │ 7248 │ ████████████████████████████████████████████████████████████████████████▍ │ + 28. │ 2019-07-08 │ 7284 │ ████████████████████████████████████████████████████████████████████████▋ │ + 29. │ 2019-07-15 │ 7142 │ ███████████████████████████████████████████████████████████████████████▍ │ + 30. │ 2019-07-22 │ 7108 │ ███████████████████████████████████████████████████████████████████████ │ + 31. │ 2019-07-29 │ 7251 │ ████████████████████████████████████████████████████████████████████████▌ │ + 32. │ 2019-08-05 │ 7403 │ ██████████████████████████████████████████████████████████████████████████ │ + 33. │ 2019-08-12 │ 7457 │ ██████████████████████████████████████████████████████████████████████████▌ │ + 34. │ 2019-08-19 │ 7502 │ ███████████████████████████████████████████████████████████████████████████ │ + 35. │ 2019-08-26 │ 7540 │ ███████████████████████████████████████████████████████████████████████████▍ │ + 36. │ 2019-09-02 │ 7237 │ ████████████████████████████████████████████████████████████████████████▎ │ + 37. │ 2019-09-09 │ 7328 │ █████████████████████████████████████████████████████████████████████████▎ │ + 38. │ 2019-09-16 │ 5566 │ ███████████████████████████████████████████████████████▋ │ + 39. │ 2019-09-23 │ 7049 │ ██████████████████████████████████████████████████████████████████████▍ │ + 40. │ 2019-09-30 │ 6880 │ ████████████████████████████████████████████████████████████████████▋ │ + 41. │ 2019-10-07 │ 6518 │ █████████████████████████████████████████████████████████████████▏ │ + 42. │ 2019-10-14 │ 6688 │ ██████████████████████████████████████████████████████████████████▊ │ + 43. │ 2019-10-21 │ 6667 │ ██████████████████████████████████████████████████████████████████▋ │ + 44. │ 2019-10-28 │ 6303 │ ███████████████████████████████████████████████████████████████ │ + 45. │ 2019-11-04 │ 6298 │ ██████████████████████████████████████████████████████████████▊ │ + 46. │ 2019-11-11 │ 6137 │ █████████████████████████████████████████████████████████████▎ │ + 47. │ 2019-11-18 │ 6051 │ ████████████████████████████████████████████████████████████▌ │ + 48. │ 2019-11-25 │ 5820 │ ██████████████████████████████████████████████████████████▏ │ + 49. │ 2019-12-02 │ 5942 │ ███████████████████████████████████████████████████████████▍ │ + 50. │ 2019-12-09 │ 4891 │ ████████████████████████████████████████████████▊ │ + 51. │ 2019-12-16 │ 5682 │ ████████████████████████████████████████████████████████▋ │ + 52. │ 2019-12-23 │ 6111 │ █████████████████████████████████████████████████████████████ │ + 53. │ 2019-12-30 │ 5870 │ ██████████████████████████████████████████████████████████▋ │ + 54. │ 2020-01-06 │ 5953 │ ███████████████████████████████████████████████████████████▌ │ + 55. │ 2020-01-13 │ 5698 │ ████████████████████████████████████████████████████████▊ │ + 56. │ 2020-01-20 │ 5339 │ █████████████████████████████████████████████████████▍ │ + 57. │ 2020-01-27 │ 5566 │ ███████████████████████████████████████████████████████▋ │ + 58. │ 2020-02-03 │ 5801 │ ██████████████████████████████████████████████████████████ │ + 59. │ 2020-02-10 │ 5692 │ ████████████████████████████████████████████████████████▊ │ + 60. │ 2020-02-17 │ 5912 │ ███████████████████████████████████████████████████████████ │ + 61. │ 2020-02-24 │ 6031 │ ████████████████████████████████████████████████████████████▎ │ + 62. │ 2020-03-02 │ 6105 │ █████████████████████████████████████████████████████████████ │ + 63. │ 2020-03-09 │ 5823 │ ██████████████████████████████████████████████████████████▏ │ + 64. │ 2020-03-16 │ 4659 │ ██████████████████████████████████████████████▌ │ + 65. │ 2020-03-23 │ 3720 │ █████████████████████████████████████▏ │ + 66. │ 2020-03-30 │ 1720 │ █████████████████▏ │ + 67. │ 2020-04-06 │ 849 │ ████████▍ │ + 68. │ 2020-04-13 │ 710 │ ███████ │ + 69. │ 2020-04-20 │ 725 │ ███████▏ │ + 70. │ 2020-04-27 │ 920 │ █████████▏ │ + 71. │ 2020-05-04 │ 859 │ ████████▌ │ + 72. │ 2020-05-11 │ 1047 │ ██████████▍ │ + 73. │ 2020-05-18 │ 1135 │ ███████████▎ │ + 74. │ 2020-05-25 │ 1266 │ ████████████▋ │ + 75. │ 2020-06-01 │ 1793 │ █████████████████▊ │ + 76. │ 2020-06-08 │ 1979 │ ███████████████████▋ │ + 77. │ 2020-06-15 │ 2297 │ ██████████████████████▊ │ + 78. │ 2020-06-22 │ 2788 │ ███████████████████████████▊ │ + 79. │ 2020-06-29 │ 3389 │ █████████████████████████████████▊ │ + 80. │ 2020-07-06 │ 3545 │ ███████████████████████████████████▍ │ + 81. │ 2020-07-13 │ 3569 │ ███████████████████████████████████▋ │ + 82. │ 2020-07-20 │ 3784 │ █████████████████████████████████████▋ │ + 83. │ 2020-07-27 │ 3960 │ ███████████████████████████████████████▌ │ + 84. │ 2020-08-03 │ 4323 │ ███████████████████████████████████████████▏ │ + 85. │ 2020-08-10 │ 4581 │ █████████████████████████████████████████████▋ │ + 86. │ 2020-08-17 │ 4791 │ ███████████████████████████████████████████████▊ │ + 87. │ 2020-08-24 │ 4928 │ █████████████████████████████████████████████████▎ │ + 88. │ 2020-08-31 │ 4687 │ ██████████████████████████████████████████████▋ │ + 89. │ 2020-09-07 │ 4643 │ ██████████████████████████████████████████████▍ │ + 90. │ 2020-09-14 │ 4594 │ █████████████████████████████████████████████▊ │ + 91. │ 2020-09-21 │ 4478 │ ████████████████████████████████████████████▋ │ + 92. │ 2020-09-28 │ 4382 │ ███████████████████████████████████████████▋ │ + 93. │ 2020-10-05 │ 4261 │ ██████████████████████████████████████████▌ │ + 94. │ 2020-10-12 │ 4243 │ ██████████████████████████████████████████▍ │ + 95. │ 2020-10-19 │ 3941 │ ███████████████████████████████████████▍ │ + 96. │ 2020-10-26 │ 3616 │ ████████████████████████████████████▏ │ + 97. │ 2020-11-02 │ 3586 │ ███████████████████████████████████▋ │ + 98. │ 2020-11-09 │ 3403 │ ██████████████████████████████████ │ + 99. │ 2020-11-16 │ 3336 │ █████████████████████████████████▎ │ +100. │ 2020-11-23 │ 3230 │ ████████████████████████████████▎ │ +101. │ 2020-11-30 │ 3183 │ ███████████████████████████████▋ │ +102. │ 2020-12-07 │ 3285 │ ████████████████████████████████▋ │ +103. │ 2020-12-14 │ 3367 │ █████████████████████████████████▋ │ +104. │ 2020-12-21 │ 3748 │ █████████████████████████████████████▍ │ +105. │ 2020-12-28 │ 3986 │ ███████████████████████████████████████▋ │ +106. │ 2021-01-04 │ 3906 │ ███████████████████████████████████████ │ +107. │ 2021-01-11 │ 3425 │ ██████████████████████████████████▎ │ +108. │ 2021-01-18 │ 3144 │ ███████████████████████████████▍ │ +109. │ 2021-01-25 │ 3115 │ ███████████████████████████████▏ │ +110. │ 2021-02-01 │ 3285 │ ████████████████████████████████▋ │ +111. │ 2021-02-08 │ 3321 │ █████████████████████████████████▏ │ +112. │ 2021-02-15 │ 3475 │ ██████████████████████████████████▋ │ +113. │ 2021-02-22 │ 3549 │ ███████████████████████████████████▍ │ +114. │ 2021-03-01 │ 3755 │ █████████████████████████████████████▌ │ +115. │ 2021-03-08 │ 3080 │ ██████████████████████████████▋ │ +116. │ 2021-03-15 │ 3789 │ █████████████████████████████████████▊ │ +117. │ 2021-03-22 │ 3804 │ ██████████████████████████████████████ │ +118. │ 2021-03-29 │ 4238 │ ██████████████████████████████████████████▍ │ +119. │ 2021-04-05 │ 4307 │ ███████████████████████████████████████████ │ +120. │ 2021-04-12 │ 4225 │ ██████████████████████████████████████████▎ │ +121. │ 2021-04-19 │ 4391 │ ███████████████████████████████████████████▊ │ +122. │ 2021-04-26 │ 4868 │ ████████████████████████████████████████████████▋ │ +123. │ 2021-05-03 │ 4977 │ █████████████████████████████████████████████████▋ │ +124. │ 2021-05-10 │ 5164 │ ███████████████████████████████████████████████████▋ │ +125. │ 2021-05-17 │ 4986 │ █████████████████████████████████████████████████▋ │ +126. │ 2021-05-24 │ 5024 │ ██████████████████████████████████████████████████▏ │ +127. │ 2021-05-31 │ 4824 │ ████████████████████████████████████████████████▏ │ +128. │ 2021-06-07 │ 5652 │ ████████████████████████████████████████████████████████▌ │ +129. │ 2021-06-14 │ 5613 │ ████████████████████████████████████████████████████████▏ │ +130. │ 2021-06-21 │ 6061 │ ████████████████████████████████████████████████████████████▌ │ +131. │ 2021-06-28 │ 2554 │ █████████████████████████▌ │ + └────────────┴──────┴──────────────────────────────────────────────────────────────────────────────┘ +``` + +### 在线 Playground {#playground} + +你可以使用交互式资源 [Online Playground](https://play.clickhouse.com/play?user=play) 来尝试对此数据集的其他查询。 例如, [执行这个查询](https://play.clickhouse.com/play?user=play#U0VMRUNUCiAgICBvcmlnaW4sCiAgICBjb3VudCgpLAogICAgcm91bmQoYXZnKGdlb0Rpc3RhbmNlKGxvbmdpdHVkZV8xLCBsYXRpdHVkZV8xLCBsb25naXR1ZGVfMiwgbGF0aXR1ZGVfMikpKSBBUyBkaXN0YW5jZSwKICAgIGJhcihkaXN0YW5jZSwgMCwgMTAwMDAwMDAsIDEwMCkgQVMgYmFyCkZST00gb3BlbnNreQpXSEVSRSBvcmlnaW4gIT0gJycKR1JPVVAgQlkgb3JpZ2luCk9SREVSIEJZIGNvdW50KCkgREVTQwpMSU1JVCAxMDA=). 但是,请注意无法在 Playground 中创建临时表。 diff --git a/docs/zh/getting-started/example-datasets/recipes.mdx b/docs/zh/getting-started/example-datasets/recipes.mdx index da3a2ac541b..b7ed92962c5 100644 --- a/docs/zh/getting-started/example-datasets/recipes.mdx +++ b/docs/zh/getting-started/example-datasets/recipes.mdx @@ -1,9 +1,339 @@ ---- -slug: /zh/getting-started/example-datasets/recipes -sidebar_label: Recipes Dataset -title: "Recipes Dataset" +--- +slug: /zh/getting-started/example-datasets/recipes +sidebar_label: 食谱数据集 +title: "食谱数据集" --- -import Content from '@site/docs/en/getting-started/example-datasets/recipes.md'; +RecipeNLG 数据集可在 [此处](https://recipenlg.cs.put.poznan.pl/dataset) 下载。其中包含 220 万份食谱。大小略小于 1 GB。 - +## 下载并解压数据集 + +1. 进入下载页面[https://recipenlg.cs.put.poznan.pl/dataset](https://recipenlg.cs.put.poznan.pl/dataset)。 +2. 接受条款和条件并下载 zip 文件。 +3. 使用 `unzip` 解压 zip 文件,得到 `full_dataset.csv` 文件。 + +## 创建表 + +运行 clickhouse-client 并执行以下 CREATE 请求: + +``` sql +CREATE TABLE recipes +( + title String, + ingredients Array(String), + directions Array(String), + link String, + source LowCardinality(String), + NER Array(String) +) ENGINE = MergeTree ORDER BY title; +``` + +## 插入数据 + +运行以下命令: + +``` bash +clickhouse-client --query " + INSERT INTO recipes + SELECT + title, + JSONExtract(ingredients, 'Array(String)'), + JSONExtract(directions, 'Array(String)'), + link, + source, + JSONExtract(NER, 'Array(String)') + FROM input('num UInt32, title String, ingredients String, directions String, link String, source LowCardinality(String), NER String') + FORMAT CSVWithNames +" --input_format_with_names_use_header 0 --format_csv_allow_single_quote 0 --input_format_allow_errors_num 10 < full_dataset.csv +``` + +这是一个展示如何解析自定义 CSV,这其中涉及了许多调整。 + +说明: +- 数据集为 CSV 格式,但在插入时需要一些预处理;使用表函数 [input](../../sql-reference/table-functions/input.md) 进行预处理; +- CSV 文件的结构在表函数 `input` 的参数中指定; +- 字段 `num`(行号)是不需要的 - 可以忽略并从文件中进行解析; +- 使用 `FORMAT CSVWithNames`,因为标题不包含第一个字段的名称,因此 CSV 中的标题将被忽略(通过命令行参数 `--input_format_with_names_use_header 0`); +- 文件仅使用双引号将 CSV 字符串括起来;一些字符串没有用双引号括起来,单引号也不能被解析为括起来的字符串 - 所以添加`--format_csv_allow_single_quote 0`参数接受文件中的单引号; +- 由于某些 CSV 的字符串的开头包含 `\M/` 因此无法被解析; CSV 中唯一可能以反斜杠开头的值是 `\N`,这个值被解析为 SQL NULL。通过添加`--input_format_allow_errors_num 10`参数,允许在导入过程中跳过 10 个格式错误; +- 在数据集中的 Ingredients、directions 和 NER 字段为数组;但这些数组并没有以一般形式表示:这些字段作为 JSON 序列化为字符串,然后放入 CSV 中 - 在导入是将它们解析为字符串,然后使用 [JSONExtract](../../sql-reference/functions/json-functions.md ) 函数将其转换为数组。 + +## 验证插入的数据 + +通过检查行数: + +请求: + +``` sql +SELECT count() FROM recipes; +``` + +结果: + +``` text +┌─count()─┐ +│ 2231141 │ +└─────────┘ +``` + +## 示例查询 + +### 按配方数量排列的顶级组件: + +在此示例中,我们学习如何使用 [arrayJoin](../../sql-reference/functions/array-join/) 函数将数组扩展为行的集合。 + +请求: + +``` sql +SELECT + arrayJoin(NER) AS k, + count() AS c +FROM recipes +GROUP BY k +ORDER BY c DESC +LIMIT 50 +``` + +结果: + +``` text +┌─k────────────────────┬──────c─┐ +│ salt │ 890741 │ +│ sugar │ 620027 │ +│ butter │ 493823 │ +│ flour │ 466110 │ +│ eggs │ 401276 │ +│ onion │ 372469 │ +│ garlic │ 358364 │ +│ milk │ 346769 │ +│ water │ 326092 │ +│ vanilla │ 270381 │ +│ olive oil │ 197877 │ +│ pepper │ 179305 │ +│ brown sugar │ 174447 │ +│ tomatoes │ 163933 │ +│ egg │ 160507 │ +│ baking powder │ 148277 │ +│ lemon juice │ 146414 │ +│ Salt │ 122557 │ +│ cinnamon │ 117927 │ +│ sour cream │ 116682 │ +│ cream cheese │ 114423 │ +│ margarine │ 112742 │ +│ celery │ 112676 │ +│ baking soda │ 110690 │ +│ parsley │ 102151 │ +│ chicken │ 101505 │ +│ onions │ 98903 │ +│ vegetable oil │ 91395 │ +│ oil │ 85600 │ +│ mayonnaise │ 84822 │ +│ pecans │ 79741 │ +│ nuts │ 78471 │ +│ potatoes │ 75820 │ +│ carrots │ 75458 │ +│ pineapple │ 74345 │ +│ soy sauce │ 70355 │ +│ black pepper │ 69064 │ +│ thyme │ 68429 │ +│ mustard │ 65948 │ +│ chicken broth │ 65112 │ +│ bacon │ 64956 │ +│ honey │ 64626 │ +│ oregano │ 64077 │ +│ ground beef │ 64068 │ +│ unsalted butter │ 63848 │ +│ mushrooms │ 61465 │ +│ Worcestershire sauce │ 59328 │ +│ cornstarch │ 58476 │ +│ green pepper │ 58388 │ +│ Cheddar cheese │ 58354 │ +└──────────────────────┴────────┘ + +50 rows in set. Elapsed: 0.112 sec. Processed 2.23 million rows, 361.57 MB (19.99 million rows/s., 3.24 GB/s.) +``` + +### 最复杂的草莓食谱 + +``` sql +SELECT + title, + length(NER), + length(directions) +FROM recipes +WHERE has(NER, 'strawberry') +ORDER BY length(directions) DESC +LIMIT 10 +``` + +结果: + +``` text +┌─title────────────────────────────────────────────────────────────┬─length(NER)─┬─length(directions)─┐ +│ Chocolate-Strawberry-Orange Wedding Cake │ 24 │ 126 │ +│ Strawberry Cream Cheese Crumble Tart │ 19 │ 47 │ +│ Charlotte-Style Ice Cream │ 11 │ 45 │ +│ Sinfully Good a Million Layers Chocolate Layer Cake, With Strawb │ 31 │ 45 │ +│ Sweetened Berries With Elderflower Sherbet │ 24 │ 44 │ +│ Chocolate-Strawberry Mousse Cake │ 15 │ 42 │ +│ Rhubarb Charlotte with Strawberries and Rum │ 20 │ 42 │ +│ Chef Joey's Strawberry Vanilla Tart │ 7 │ 37 │ +│ Old-Fashioned Ice Cream Sundae Cake │ 17 │ 37 │ +│ Watermelon Cake │ 16 │ 36 │ +└──────────────────────────────────────────────────────────────────┴─────────────┴────────────────────┘ + +10 rows in set. Elapsed: 0.215 sec. Processed 2.23 million rows, 1.48 GB (10.35 million rows/s., 6.86 GB/s.) +``` + +在此示例中,我们使用 [has](../../sql-reference/functions/array-functions/#hasarr-elem) 函数来按过滤数组类型元素并按 directions 的数量进行排序。 + +有一个婚礼蛋糕需要整个126个步骤来制作!显示 directions: + +请求: + +``` sql +SELECT arrayJoin(directions) +FROM recipes +WHERE title = 'Chocolate-Strawberry-Orange Wedding Cake' +``` + +结果: + +``` text +┌─arrayJoin(directions)───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ +│ Position 1 rack in center and 1 rack in bottom third of oven and preheat to 350F. │ +│ Butter one 5-inch-diameter cake pan with 2-inch-high sides, one 8-inch-diameter cake pan with 2-inch-high sides and one 12-inch-diameter cake pan with 2-inch-high sides. │ +│ Dust pans with flour; line bottoms with parchment. │ +│ Combine 1/3 cup orange juice and 2 ounces unsweetened chocolate in heavy small saucepan. │ +│ Stir mixture over medium-low heat until chocolate melts. │ +│ Remove from heat. │ +│ Gradually mix in 1 2/3 cups orange juice. │ +│ Sift 3 cups flour, 2/3 cup cocoa, 2 teaspoons baking soda, 1 teaspoon salt and 1/2 teaspoon baking powder into medium bowl. │ +│ using electric mixer, beat 1 cup (2 sticks) butter and 3 cups sugar in large bowl until blended (mixture will look grainy). │ +│ Add 4 eggs, 1 at a time, beating to blend after each. │ +│ Beat in 1 tablespoon orange peel and 1 tablespoon vanilla extract. │ +│ Add dry ingredients alternately with orange juice mixture in 3 additions each, beating well after each addition. │ +│ Mix in 1 cup chocolate chips. │ +│ Transfer 1 cup plus 2 tablespoons batter to prepared 5-inch pan, 3 cups batter to prepared 8-inch pan and remaining batter (about 6 cups) to 12-inch pan. │ +│ Place 5-inch and 8-inch pans on center rack of oven. │ +│ Place 12-inch pan on lower rack of oven. │ +│ Bake cakes until tester inserted into center comes out clean, about 35 minutes. │ +│ Transfer cakes in pans to racks and cool completely. │ +│ Mark 4-inch diameter circle on one 6-inch-diameter cardboard cake round. │ +│ Cut out marked circle. │ +│ Mark 7-inch-diameter circle on one 8-inch-diameter cardboard cake round. │ +│ Cut out marked circle. │ +│ Mark 11-inch-diameter circle on one 12-inch-diameter cardboard cake round. │ +│ Cut out marked circle. │ +│ Cut around sides of 5-inch-cake to loosen. │ +│ Place 4-inch cardboard over pan. │ +│ Hold cardboard and pan together; turn cake out onto cardboard. │ +│ Peel off parchment.Wrap cakes on its cardboard in foil. │ +│ Repeat turning out, peeling off parchment and wrapping cakes in foil, using 7-inch cardboard for 8-inch cake and 11-inch cardboard for 12-inch cake. │ +│ Using remaining ingredients, make 1 more batch of cake batter and bake 3 more cake layers as described above. │ +│ Cool cakes in pans. │ +│ Cover cakes in pans tightly with foil. │ +│ (Can be prepared ahead. │ +│ Let stand at room temperature up to 1 day or double-wrap all cake layers and freeze up to 1 week. │ +│ Bring cake layers to room temperature before using.) │ +│ Place first 12-inch cake on its cardboard on work surface. │ +│ Spread 2 3/4 cups ganache over top of cake and all the way to edge. │ +│ Spread 2/3 cup jam over ganache, leaving 1/2-inch chocolate border at edge. │ +│ Drop 1 3/4 cups white chocolate frosting by spoonfuls over jam. │ +│ Gently spread frosting over jam, leaving 1/2-inch chocolate border at edge. │ +│ Rub some cocoa powder over second 12-inch cardboard. │ +│ Cut around sides of second 12-inch cake to loosen. │ +│ Place cardboard, cocoa side down, over pan. │ +│ Turn cake out onto cardboard. │ +│ Peel off parchment. │ +│ Carefully slide cake off cardboard and onto filling on first 12-inch cake. │ +│ Refrigerate. │ +│ Place first 8-inch cake on its cardboard on work surface. │ +│ Spread 1 cup ganache over top all the way to edge. │ +│ Spread 1/4 cup jam over, leaving 1/2-inch chocolate border at edge. │ +│ Drop 1 cup white chocolate frosting by spoonfuls over jam. │ +│ Gently spread frosting over jam, leaving 1/2-inch chocolate border at edge. │ +│ Rub some cocoa over second 8-inch cardboard. │ +│ Cut around sides of second 8-inch cake to loosen. │ +│ Place cardboard, cocoa side down, over pan. │ +│ Turn cake out onto cardboard. │ +│ Peel off parchment. │ +│ Slide cake off cardboard and onto filling on first 8-inch cake. │ +│ Refrigerate. │ +│ Place first 5-inch cake on its cardboard on work surface. │ +│ Spread 1/2 cup ganache over top of cake and all the way to edge. │ +│ Spread 2 tablespoons jam over, leaving 1/2-inch chocolate border at edge. │ +│ Drop 1/3 cup white chocolate frosting by spoonfuls over jam. │ +│ Gently spread frosting over jam, leaving 1/2-inch chocolate border at edge. │ +│ Rub cocoa over second 6-inch cardboard. │ +│ Cut around sides of second 5-inch cake to loosen. │ +│ Place cardboard, cocoa side down, over pan. │ +│ Turn cake out onto cardboard. │ +│ Peel off parchment. │ +│ Slide cake off cardboard and onto filling on first 5-inch cake. │ +│ Chill all cakes 1 hour to set filling. │ +│ Place 12-inch tiered cake on its cardboard on revolving cake stand. │ +│ Spread 2 2/3 cups frosting over top and sides of cake as a first coat. │ +│ Refrigerate cake. │ +│ Place 8-inch tiered cake on its cardboard on cake stand. │ +│ Spread 1 1/4 cups frosting over top and sides of cake as a first coat. │ +│ Refrigerate cake. │ +│ Place 5-inch tiered cake on its cardboard on cake stand. │ +│ Spread 3/4 cup frosting over top and sides of cake as a first coat. │ +│ Refrigerate all cakes until first coats of frosting set, about 1 hour. │ +│ (Cakes can be made to this point up to 1 day ahead; cover and keep refrigerate.) │ +│ Prepare second batch of frosting, using remaining frosting ingredients and following directions for first batch. │ +│ Spoon 2 cups frosting into pastry bag fitted with small star tip. │ +│ Place 12-inch cake on its cardboard on large flat platter. │ +│ Place platter on cake stand. │ +│ Using icing spatula, spread 2 1/2 cups frosting over top and sides of cake; smooth top. │ +│ Using filled pastry bag, pipe decorative border around top edge of cake. │ +│ Refrigerate cake on platter. │ +│ Place 8-inch cake on its cardboard on cake stand. │ +│ Using icing spatula, spread 1 1/2 cups frosting over top and sides of cake; smooth top. │ +│ Using pastry bag, pipe decorative border around top edge of cake. │ +│ Refrigerate cake on its cardboard. │ +│ Place 5-inch cake on its cardboard on cake stand. │ +│ Using icing spatula, spread 3/4 cup frosting over top and sides of cake; smooth top. │ +│ Using pastry bag, pipe decorative border around top edge of cake, spooning more frosting into bag if necessary. │ +│ Refrigerate cake on its cardboard. │ +│ Keep all cakes refrigerated until frosting sets, about 2 hours. │ +│ (Can be prepared 2 days ahead. │ +│ Cover loosely; keep refrigerated.) │ +│ Place 12-inch cake on platter on work surface. │ +│ Press 1 wooden dowel straight down into and completely through center of cake. │ +│ Mark dowel 1/4 inch above top of frosting. │ +│ Remove dowel and cut with serrated knife at marked point. │ +│ Cut 4 more dowels to same length. │ +│ Press 1 cut dowel back into center of cake. │ +│ Press remaining 4 cut dowels into cake, positioning 3 1/2 inches inward from cake edges and spacing evenly. │ +│ Place 8-inch cake on its cardboard on work surface. │ +│ Press 1 dowel straight down into and completely through center of cake. │ +│ Mark dowel 1/4 inch above top of frosting. │ +│ Remove dowel and cut with serrated knife at marked point. │ +│ Cut 3 more dowels to same length. │ +│ Press 1 cut dowel back into center of cake. │ +│ Press remaining 3 cut dowels into cake, positioning 2 1/2 inches inward from edges and spacing evenly. │ +│ Using large metal spatula as aid, place 8-inch cake on its cardboard atop dowels in 12-inch cake, centering carefully. │ +│ Gently place 5-inch cake on its cardboard atop dowels in 8-inch cake, centering carefully. │ +│ Using citrus stripper, cut long strips of orange peel from oranges. │ +│ Cut strips into long segments. │ +│ To make orange peel coils, wrap peel segment around handle of wooden spoon; gently slide peel off handle so that peel keeps coiled shape. │ +│ Garnish cake with orange peel coils, ivy or mint sprigs, and some berries. │ +│ (Assembled cake can be made up to 8 hours ahead. │ +│ Let stand at cool room temperature.) │ +│ Remove top and middle cake tiers. │ +│ Remove dowels from cakes. │ +│ Cut top and middle cakes into slices. │ +│ To cut 12-inch cake: Starting 3 inches inward from edge and inserting knife straight down, cut through from top to bottom to make 6-inch-diameter circle in center of cake. │ +│ Cut outer portion of cake into slices; cut inner portion into slices and serve with strawberries. │ +└─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ + +126 rows in set. Elapsed: 0.011 sec. Processed 8.19 thousand rows, 5.34 MB (737.75 thousand rows/s., 480.59 MB/s.) +``` + +### 在线 Playground + +此数据集也可在 [在线 Playground](https://play.clickhouse.com/play?user=play#U0VMRUNUCiAgICBhcnJheUpvaW4oTkVSKSBBUyBrLAogICAgY291bnQoKSBBUyBjCkZST00gcmVjaXBlcwpHUk9VUCBCWSBrCk9SREVSIEJZIGMgREVTQwpMSU1JVCA1MA==) 中体验。 + +[原文链接](https://clickhouse.com/docs/en/getting-started/example-datasets/recipes/) diff --git a/docs/zh/getting-started/example-datasets/uk-price-paid.mdx b/docs/zh/getting-started/example-datasets/uk-price-paid.mdx index 3a14a3ce55d..058f0ae421a 100644 --- a/docs/zh/getting-started/example-datasets/uk-price-paid.mdx +++ b/docs/zh/getting-started/example-datasets/uk-price-paid.mdx @@ -42,9 +42,9 @@ ORDER BY (postcode1, postcode2, addr1, addr2); - 将`postcode` 拆分为两个不同的列 - `postcode1` 和 `postcode2`,因为这更适合存储和查询 - 将`time` 字段转换为日期为它只包含 00:00 时间 -- 忽略 [UUid](../../sql-reference/data-types/uuid.md) 字段,因为我们不需要它进行分析 -- 使用 [transform](../../sql-reference/functions/other-functions.md#transform) 函数将 `Enum` 字段 `type` 和 `duration` 转换为更易读的 `Enum` 字段 -- 将 `is_new` 字段从单字符串(` Y`/`N`) 到 [UInt8](../../sql-reference/data-types/int-uint.md#uint8-uint16-uint32-uint64-uint256-int8-int16-int32-int64 -int128-int256) 字段为 0 或 1 +- 忽略 [UUid](/docs/zh/sql-reference/data-types/uuid.md) 字段,因为我们不需要它进行分析 +- 使用 [transform](/docs/zh/sql-reference/functions/other-functions.md#transform) 函数将 `Enum` 字段 `type` 和 `duration` 转换为更易读的 `Enum` 字段 +- 将 `is_new` 字段从单字符串(` Y`/`N`) 到 [UInt8](/docs/zh/sql-reference/data-types/int-uint.md#uint8-uint16-uint32-uint64-uint256-int8-int16-int32-int64 -int128-int256) 字段为 0 或 1 - 删除最后两列,因为它们都具有相同的值(即 0) `url` 函数将来自网络服务器的数据流式传输到 ClickHouse 表中。以下命令将 500 万行插入到 `uk_price_paid` 表中: @@ -342,7 +342,7 @@ LIMIT 100 ## 使用 Projection 加速查询 {#speedup-with-projections} -[Projections](../../sql-reference/statements/alter/projection.md) 允许我们通过存储任意格式的预先聚合的数据来提高查询速度。在此示例中,我们创建了一个按年份、地区和城镇分组的房产的平均价格、总价格和数量的 Projection。在执行时,如果 ClickHouse 认为 Projection 可以提高查询的性能,它将使用 Projection(何时使用由 ClickHouse 决定)。 +[Projections](/docs/zh/sql-reference/statements/alter/projection.mdx) 允许我们通过存储任意格式的预先聚合的数据来提高查询速度。在此示例中,我们创建了一个按年份、地区和城镇分组的房产的平均价格、总价格和数量的 Projection。在执行时,如果 ClickHouse 认为 Projection 可以提高查询的性能,它将使用 Projection(何时使用由 ClickHouse 决定)。 ### 构建投影{#build-projection} diff --git a/docs/zh/interfaces/third-party/client-libraries.md b/docs/zh/interfaces/third-party/client-libraries.md index d4959e37668..1b7bff02b1a 100644 --- a/docs/zh/interfaces/third-party/client-libraries.md +++ b/docs/zh/interfaces/third-party/client-libraries.md @@ -35,6 +35,9 @@ Yandex**没有**维护下面列出的库,也没有做过任何广泛的测试 - NodeJs - [clickhouse (NodeJs)](https://github.com/TimonKK/clickhouse) - [node-clickhouse](https://github.com/apla/node-clickhouse) + - [nestjs-clickhouse](https://github.com/depyronick/nestjs-clickhouse) + - [clickhouse-client](https://github.com/depyronick/clickhouse-client) + - [node-clickhouse-orm](https://github.com/zimv/node-clickhouse-orm) - Perl - [perl-DBD-ClickHouse](https://github.com/elcamlost/perl-DBD-ClickHouse) - [HTTP-ClickHouse](https://metacpan.org/release/HTTP-ClickHouse) diff --git a/docs/zh/sql-reference/data-types/date.md b/docs/zh/sql-reference/data-types/date.md index 9b1acdbe939..a8874151e75 100644 --- a/docs/zh/sql-reference/data-types/date.md +++ b/docs/zh/sql-reference/data-types/date.md @@ -3,7 +3,7 @@ slug: /zh/sql-reference/data-types/date --- # 日期 {#date} -日期类型,用两个字节存储,表示从 1970-01-01 (无符号) 到当前的日期值。允许存储从 Unix 纪元开始到编译阶段定义的上限阈值常量(目前上限是2149年,但最终完全支持的年份为2148)。最小值输出为1970-01-01。 +日期类型,用两个字节存储,表示从 1970-01-01 (无符号) 到当前的日期值。允许存储从 Unix 纪元开始到编译阶段定义的上限阈值常量(目前上限是2106年,但最终完全支持的年份为2105)。最小值输出为1970-01-01。 值的范围: \[1970-01-01, 2149-06-06\]。 diff --git a/docs/zh/sql-reference/functions/other-functions.md b/docs/zh/sql-reference/functions/other-functions.md index a475420ba64..62d2a377ff1 100644 --- a/docs/zh/sql-reference/functions/other-functions.md +++ b/docs/zh/sql-reference/functions/other-functions.md @@ -237,7 +237,7 @@ ORDER BY c DESC ``` sql SELECT - transform(domain(Referer), ['yandex.ru', 'google.ru', 'vk.com'], ['www.yandex', 'example.com']) AS s, + transform(domain(Referer), ['yandex.ru', 'google.ru', 'vkontakte.ru'], ['www.yandex', 'example.com', 'vk.com']) AS s, count() AS c FROM test.hits GROUP BY domain(Referer) diff --git a/packages/build b/packages/build index c5ebf8641a3..531e068338d 100755 --- a/packages/build +++ b/packages/build @@ -26,8 +26,10 @@ SOURCE=${SOURCE:-$PKG_ROOT} HELP="${0} [--test] [--rpm] [-h|--help] --test - adds '+test' prefix to version --apk - build APK packages + --archlinux - build archlinux packages --rpm - build RPM packages --tgz - build tarball package + --deb - build deb package --help - show this help and exit Used envs: @@ -47,16 +49,21 @@ fi export CLICKHOUSE_VERSION_STRING - while [[ $1 == --* ]] do case "$1" in --test ) VERSION_POSTFIX+='+test' shift ;; + --deb ) + MAKE_DEB=1 + shift ;; --apk ) MAKE_APK=1 shift ;; + --archlinux ) + MAKE_ARCHLINUX=1 + shift ;; --rpm ) MAKE_RPM=1 shift ;; @@ -131,18 +138,24 @@ CLICKHOUSE_VERSION_STRING+=$VERSION_POSTFIX echo -e "\nCurrent version is $CLICKHOUSE_VERSION_STRING" for config in clickhouse*.yaml; do - echo "Building deb package for $config" + if [ -n "$MAKE_DEB" ] || [ -n "$MAKE_TGZ" ]; then + echo "Building deb package for $config" - # Preserve package path - exec 9>&1 - PKG_PATH=$(nfpm package --target "$OUTPUT_DIR" --config "$config" --packager deb | tee /dev/fd/9) - PKG_PATH=${PKG_PATH##*created package: } - exec 9>&- + # Preserve package path + exec 9>&1 + PKG_PATH=$(nfpm package --target "$OUTPUT_DIR" --config "$config" --packager deb | tee /dev/fd/9) + PKG_PATH=${PKG_PATH##*created package: } + exec 9>&- + fi if [ -n "$MAKE_APK" ]; then echo "Building apk package for $config" nfpm package --target "$OUTPUT_DIR" --config "$config" --packager apk fi + if [ -n "$MAKE_ARCHLINUX" ]; then + echo "Building archlinux package for $config" + nfpm package --target "$OUTPUT_DIR" --config "$config" --packager archlinux + fi if [ -n "$MAKE_RPM" ]; then echo "Building rpm package for $config" nfpm package --target "$OUTPUT_DIR" --config "$config" --packager rpm diff --git a/packages/clickhouse-client.yaml b/packages/clickhouse-client.yaml index 459a09ee0b8..d4fd9300208 100644 --- a/packages/clickhouse-client.yaml +++ b/packages/clickhouse-client.yaml @@ -37,7 +37,7 @@ deb: contents: - src: root/etc/clickhouse-client/config.xml dst: /etc/clickhouse-client/config.xml - type: config + type: config|noreplace - src: root/usr/bin/clickhouse-benchmark dst: /usr/bin/clickhouse-benchmark - src: root/usr/bin/clickhouse-compressor diff --git a/packages/clickhouse-keeper.yaml b/packages/clickhouse-keeper.yaml index 7803729c469..f2095dda02a 100644 --- a/packages/clickhouse-keeper.yaml +++ b/packages/clickhouse-keeper.yaml @@ -27,9 +27,9 @@ deb: Source: clickhouse contents: -- src: root/etc/clickhouse-keeper - dst: /etc/clickhouse-keeper - type: config +- src: root/etc/clickhouse-keeper/keeper_config.xml + dst: /etc/clickhouse-keeper/keeper_config.xml + type: config|noreplace - src: root/usr/bin/clickhouse-keeper dst: /usr/bin/clickhouse-keeper # docs diff --git a/packages/clickhouse-server.yaml b/packages/clickhouse-server.yaml index a94ad1e9169..fe59828ca43 100644 --- a/packages/clickhouse-server.yaml +++ b/packages/clickhouse-server.yaml @@ -42,9 +42,12 @@ deb: Source: clickhouse contents: -- src: root/etc/clickhouse-server - dst: /etc/clickhouse-server - type: config +- src: root/etc/clickhouse-server/config.xml + dst: /etc/clickhouse-server/config.xml + type: config|noreplace +- src: root/etc/clickhouse-server/users.xml + dst: /etc/clickhouse-server/users.xml + type: config|noreplace - src: clickhouse-server.init dst: /etc/init.d/clickhouse-server - src: clickhouse-server.service diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp index e616cb8cf72..93136df2a5b 100644 --- a/programs/client/Client.cpp +++ b/programs/client/Client.cpp @@ -1088,7 +1088,8 @@ void Client::processConfig() } else { - need_render_progress = config().getBool("progress", false); + std::string progress = config().getString("progress", "tty"); + need_render_progress = (Poco::icompare(progress, "off") && Poco::icompare(progress, "no") && Poco::icompare(progress, "false") && Poco::icompare(progress, "0")); echo_queries = config().getBool("echo", false); ignore_error = config().getBool("ignore-error", false); @@ -1108,15 +1109,21 @@ void Client::processConfig() else format = config().getString("format", is_interactive ? "PrettyCompact" : "TabSeparated"); - format_max_block_size = config().getInt("format_max_block_size", global_context->getSettingsRef().max_block_size); + format_max_block_size = config().getUInt64("format_max_block_size", + global_context->getSettingsRef().max_block_size); insert_format = "Values"; /// Setting value from cmd arg overrides one from config if (global_context->getSettingsRef().max_insert_block_size.changed) + { insert_format_max_block_size = global_context->getSettingsRef().max_insert_block_size; + } else - insert_format_max_block_size = config().getInt("insert_format_max_block_size", global_context->getSettingsRef().max_insert_block_size); + { + insert_format_max_block_size = config().getUInt64("insert_format_max_block_size", + global_context->getSettingsRef().max_insert_block_size); + } ClientInfo & client_info = global_context->getClientInfo(); client_info.setInitialQuery(); diff --git a/programs/client/clickhouse-client.xml b/programs/client/clickhouse-client.xml index 66e7afd8f8c..00f5b26eddf 100644 --- a/programs/client/clickhouse-client.xml +++ b/programs/client/clickhouse-client.xml @@ -19,7 +19,6 @@ {host} {port} {user} - {database} {display_name} Terminal colors: https://misc.flogisoft.com/bash/tip_colors_and_formatting See also: https://wiki.hackzine.org/development/misc/readline-color-prompt.html diff --git a/programs/copier/Aliases.h b/programs/copier/Aliases.h index c4d9c40d9f1..02be3441acd 100644 --- a/programs/copier/Aliases.h +++ b/programs/copier/Aliases.h @@ -1,6 +1,10 @@ #pragma once -#include +#include + +#include + +#include namespace DB { @@ -8,21 +12,4 @@ namespace DB using DatabaseAndTableName = std::pair; using ListOfDatabasesAndTableNames = std::vector; - - /// Hierarchical description of the tasks - struct ShardPartitionPiece; - struct ShardPartition; - struct TaskShard; - struct TaskTable; - struct TaskCluster; - struct ClusterPartition; - - using PartitionPieces = std::vector; - using TasksPartition = std::map>; - using ShardInfo = Cluster::ShardInfo; - using TaskShardPtr = std::shared_ptr; - using TasksShard = std::vector; - using TasksTable = std::list; - using ClusterPartitions = std::map>; } - diff --git a/programs/copier/CMakeLists.txt b/programs/copier/CMakeLists.txt index 57e0996ed78..2c17e70bc5e 100644 --- a/programs/copier/CMakeLists.txt +++ b/programs/copier/CMakeLists.txt @@ -1,7 +1,13 @@ set(CLICKHOUSE_COPIER_SOURCES "${CMAKE_CURRENT_SOURCE_DIR}/ClusterCopierApp.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/ClusterCopier.cpp" - "${CMAKE_CURRENT_SOURCE_DIR}/Internals.cpp") + "${CMAKE_CURRENT_SOURCE_DIR}/Internals.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/ShardPartition.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/ShardPartitionPiece.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/StatusAccumulator.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/TaskCluster.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/TaskShard.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/TaskTable.cpp") set (CLICKHOUSE_COPIER_LINK PRIVATE diff --git a/programs/copier/ClusterCopier.h b/programs/copier/ClusterCopier.h index b354fc59eee..063b13e9078 100644 --- a/programs/copier/ClusterCopier.h +++ b/programs/copier/ClusterCopier.h @@ -3,7 +3,8 @@ #include "Aliases.h" #include "Internals.h" #include "TaskCluster.h" -#include "TaskTableAndShard.h" +#include "TaskShard.h" +#include "TaskTable.h" #include "ShardPartition.h" #include "ShardPartitionPiece.h" #include "ZooKeeperStaff.h" diff --git a/programs/copier/ClusterPartition.h b/programs/copier/ClusterPartition.h index ed69bfa8c26..22063989e22 100644 --- a/programs/copier/ClusterPartition.h +++ b/programs/copier/ClusterPartition.h @@ -1,17 +1,22 @@ #pragma once -#include "Aliases.h" +#include +#include namespace DB { - /// Contains info about all shards that contain a partition - struct ClusterPartition - { - double elapsed_time_seconds = 0; - UInt64 bytes_copied = 0; - UInt64 rows_copied = 0; - UInt64 blocks_copied = 0; - UInt64 total_tries = 0; - }; +/// Contains info about all shards that contain a partition +struct ClusterPartition +{ + double elapsed_time_seconds = 0; + UInt64 bytes_copied = 0; + UInt64 rows_copied = 0; + UInt64 blocks_copied = 0; + + UInt64 total_tries = 0; +}; + +using ClusterPartitions = std::map>; + } diff --git a/programs/copier/ShardPartition.cpp b/programs/copier/ShardPartition.cpp new file mode 100644 index 00000000000..4c962fc807d --- /dev/null +++ b/programs/copier/ShardPartition.cpp @@ -0,0 +1,70 @@ +#include "ShardPartition.h" + +#include "TaskShard.h" +#include "TaskTable.h" + +namespace DB +{ + +ShardPartition::ShardPartition(TaskShard & parent, String name_quoted_, size_t number_of_splits) + : task_shard(parent) + , name(std::move(name_quoted_)) +{ + pieces.reserve(number_of_splits); +} + +String ShardPartition::getPartitionCleanStartPath() const +{ + return getPartitionPath() + "/clean_start"; +} + +String ShardPartition::getPartitionPieceCleanStartPath(size_t current_piece_number) const +{ + assert(current_piece_number < task_shard.task_table.number_of_splits); + return getPartitionPiecePath(current_piece_number) + "/clean_start"; +} + +String ShardPartition::getPartitionPath() const +{ + return task_shard.task_table.getPartitionPath(name); +} + +String ShardPartition::getPartitionPiecePath(size_t current_piece_number) const +{ + assert(current_piece_number < task_shard.task_table.number_of_splits); + return task_shard.task_table.getPartitionPiecePath(name, current_piece_number); +} + +String ShardPartition::getShardStatusPath() const +{ + // schema: //tables///shards/ + // e.g. /root/table_test.hits/201701/shards/1 + return getPartitionShardsPath() + "/" + toString(task_shard.numberInCluster()); +} + +String ShardPartition::getPartitionShardsPath() const +{ + return getPartitionPath() + "/shards"; +} + +String ShardPartition::getPartitionActiveWorkersPath() const +{ + return getPartitionPath() + "/partition_active_workers"; +} + +String ShardPartition::getActiveWorkerPath() const +{ + return getPartitionActiveWorkersPath() + "/" + toString(task_shard.numberInCluster()); +} + +String ShardPartition::getCommonPartitionIsDirtyPath() const +{ + return getPartitionPath() + "/is_dirty"; +} + +String ShardPartition::getCommonPartitionIsCleanedPath() const +{ + return getCommonPartitionIsDirtyPath() + "/cleaned"; +} + +} diff --git a/programs/copier/ShardPartition.h b/programs/copier/ShardPartition.h index 7de381977f9..2457213733c 100644 --- a/programs/copier/ShardPartition.h +++ b/programs/copier/ShardPartition.h @@ -1,19 +1,23 @@ #pragma once -#include "Aliases.h" -#include "TaskTableAndShard.h" +#include "ShardPartitionPiece.h" + +#include + +#include namespace DB { +struct TaskShard; + /// Just destination partition of a shard /// I don't know what this comment means. /// In short, when we discovered what shards contain currently processing partition, /// This class describes a partition (name) that is stored on the shard (parent). struct ShardPartition { - ShardPartition(TaskShard &parent, String name_quoted_, size_t number_of_splits = 10) - : task_shard(parent), name(std::move(name_quoted_)) { pieces.reserve(number_of_splits); } + ShardPartition(TaskShard &parent, String name_quoted_, size_t number_of_splits = 10); String getPartitionPath() const; @@ -45,58 +49,6 @@ struct ShardPartition String name; }; -inline String ShardPartition::getPartitionCleanStartPath() const -{ - return getPartitionPath() + "/clean_start"; -} - -inline String ShardPartition::getPartitionPieceCleanStartPath(size_t current_piece_number) const -{ - assert(current_piece_number < task_shard.task_table.number_of_splits); - return getPartitionPiecePath(current_piece_number) + "/clean_start"; -} - -inline String ShardPartition::getPartitionPath() const -{ - return task_shard.task_table.getPartitionPath(name); -} - -inline String ShardPartition::getPartitionPiecePath(size_t current_piece_number) const -{ - assert(current_piece_number < task_shard.task_table.number_of_splits); - return task_shard.task_table.getPartitionPiecePath(name, current_piece_number); -} - -inline String ShardPartition::getShardStatusPath() const -{ - // schema: //tables/
//shards/ - // e.g. /root/table_test.hits/201701/shards/1 - return getPartitionShardsPath() + "/" + toString(task_shard.numberInCluster()); -} - -inline String ShardPartition::getPartitionShardsPath() const -{ - return getPartitionPath() + "/shards"; -} - -inline String ShardPartition::getPartitionActiveWorkersPath() const -{ - return getPartitionPath() + "/partition_active_workers"; -} - -inline String ShardPartition::getActiveWorkerPath() const -{ - return getPartitionActiveWorkersPath() + "/" + toString(task_shard.numberInCluster()); -} - -inline String ShardPartition::getCommonPartitionIsDirtyPath() const -{ - return getPartitionPath() + "/is_dirty"; -} - -inline String ShardPartition::getCommonPartitionIsCleanedPath() const -{ - return getCommonPartitionIsDirtyPath() + "/cleaned"; -} +using TasksPartition = std::map>; } diff --git a/programs/copier/ShardPartitionPiece.cpp b/programs/copier/ShardPartitionPiece.cpp new file mode 100644 index 00000000000..36d1621e012 --- /dev/null +++ b/programs/copier/ShardPartitionPiece.cpp @@ -0,0 +1,64 @@ +#include "ShardPartitionPiece.h" + +#include "ShardPartition.h" +#include "TaskShard.h" + +#include + +namespace DB +{ + +ShardPartitionPiece::ShardPartitionPiece(ShardPartition & parent, size_t current_piece_number_, bool is_present_piece_) + : is_absent_piece(!is_present_piece_) + , current_piece_number(current_piece_number_) + , shard_partition(parent) +{ +} + +String ShardPartitionPiece::getPartitionPiecePath() const +{ + return shard_partition.getPartitionPath() + "/piece_" + toString(current_piece_number); +} + +String ShardPartitionPiece::getPartitionPieceCleanStartPath() const +{ + return getPartitionPiecePath() + "/clean_start"; +} + +String ShardPartitionPiece::getPartitionPieceIsDirtyPath() const +{ + return getPartitionPiecePath() + "/is_dirty"; +} + +String ShardPartitionPiece::getPartitionPieceIsCleanedPath() const +{ + return getPartitionPieceIsDirtyPath() + "/cleaned"; +} + +String ShardPartitionPiece::getPartitionPieceActiveWorkersPath() const +{ + return getPartitionPiecePath() + "/partition_piece_active_workers"; +} + +String ShardPartitionPiece::getActiveWorkerPath() const +{ + return getPartitionPieceActiveWorkersPath() + "/" + toString(shard_partition.task_shard.numberInCluster()); +} + +/// On what shards do we have current partition. +String ShardPartitionPiece::getPartitionPieceShardsPath() const +{ + return getPartitionPiecePath() + "/shards"; +} + +String ShardPartitionPiece::getShardStatusPath() const +{ + return getPartitionPieceShardsPath() + "/" + toString(shard_partition.task_shard.numberInCluster()); +} + +String ShardPartitionPiece::getPartitionPieceCleanerPath() const +{ + return getPartitionPieceIsDirtyPath() + "/cleaner"; +} + +} diff --git a/programs/copier/ShardPartitionPiece.h b/programs/copier/ShardPartitionPiece.h index a21fd531da4..aba378d466d 100644 --- a/programs/copier/ShardPartitionPiece.h +++ b/programs/copier/ShardPartitionPiece.h @@ -1,16 +1,15 @@ #pragma once -#include "Internals.h" +#include namespace DB { +struct ShardPartition; + struct ShardPartitionPiece { - - ShardPartitionPiece(ShardPartition &parent, size_t current_piece_number_, bool is_present_piece_) - : is_absent_piece(!is_present_piece_), current_piece_number(current_piece_number_), - shard_partition(parent) {} + ShardPartitionPiece(ShardPartition & parent, size_t current_piece_number_, bool is_present_piece_); String getPartitionPiecePath() const; @@ -37,52 +36,6 @@ struct ShardPartitionPiece ShardPartition & shard_partition; }; - -inline String ShardPartitionPiece::getPartitionPiecePath() const -{ - return shard_partition.getPartitionPath() + "/piece_" + toString(current_piece_number); -} - -inline String ShardPartitionPiece::getPartitionPieceCleanStartPath() const -{ - return getPartitionPiecePath() + "/clean_start"; -} - -inline String ShardPartitionPiece::getPartitionPieceIsDirtyPath() const -{ - return getPartitionPiecePath() + "/is_dirty"; -} - -inline String ShardPartitionPiece::getPartitionPieceIsCleanedPath() const -{ - return getPartitionPieceIsDirtyPath() + "/cleaned"; -} - -inline String ShardPartitionPiece::getPartitionPieceActiveWorkersPath() const -{ - return getPartitionPiecePath() + "/partition_piece_active_workers"; -} - -inline String ShardPartitionPiece::getActiveWorkerPath() const -{ - return getPartitionPieceActiveWorkersPath() + "/" + toString(shard_partition.task_shard.numberInCluster()); -} - -/// On what shards do we have current partition. -inline String ShardPartitionPiece::getPartitionPieceShardsPath() const -{ - return getPartitionPiecePath() + "/shards"; -} - -inline String ShardPartitionPiece::getShardStatusPath() const -{ - return getPartitionPieceShardsPath() + "/" + toString(shard_partition.task_shard.numberInCluster()); -} - -inline String ShardPartitionPiece::getPartitionPieceCleanerPath() const -{ - return getPartitionPieceIsDirtyPath() + "/cleaner"; -} - +using PartitionPieces = std::vector; } diff --git a/programs/copier/StatusAccumulator.cpp b/programs/copier/StatusAccumulator.cpp new file mode 100644 index 00000000000..77adeac708c --- /dev/null +++ b/programs/copier/StatusAccumulator.cpp @@ -0,0 +1,48 @@ +#include "StatusAccumulator.h" + +#include +#include +#include +#include + +#include + +namespace DB +{ + +StatusAccumulator::MapPtr StatusAccumulator::fromJSON(String state_json) +{ + Poco::JSON::Parser parser; + auto state = parser.parse(state_json).extract(); + MapPtr result_ptr = std::make_shared(); + for (const auto & table_name : state->getNames()) + { + auto table_status_json = state->getValue(table_name); + auto table_status = parser.parse(table_status_json).extract(); + /// Map entry will be created if it is absent + auto & map_table_status = (*result_ptr)[table_name]; + map_table_status.all_partitions_count += table_status->getValue("all_partitions_count"); + map_table_status.processed_partitions_count += table_status->getValue("processed_partitions_count"); + } + return result_ptr; +} + +String StatusAccumulator::serializeToJSON(MapPtr statuses) +{ + Poco::JSON::Object result_json; + for (const auto & [table_name, table_status] : *statuses) + { + Poco::JSON::Object status_json; + status_json.set("all_partitions_count", table_status.all_partitions_count); + status_json.set("processed_partitions_count", table_status.processed_partitions_count); + + result_json.set(table_name, status_json); + } + std::ostringstream oss; // STYLE_CHECK_ALLOW_STD_STRING_STREAM + oss.exceptions(std::ios::failbit); + Poco::JSON::Stringifier::stringify(result_json, oss); + auto result = oss.str(); + return result; +} + +} diff --git a/programs/copier/StatusAccumulator.h b/programs/copier/StatusAccumulator.h index 6e20e3dc95d..d420b611602 100644 --- a/programs/copier/StatusAccumulator.h +++ b/programs/copier/StatusAccumulator.h @@ -1,65 +1,27 @@ #pragma once +#include -#include -#include -#include -#include - -#include #include -#include -#include +#include namespace DB { class StatusAccumulator { - public: - struct TableStatus - { - size_t all_partitions_count; - size_t processed_partitions_count; - }; +public: + struct TableStatus + { + size_t all_partitions_count; + size_t processed_partitions_count; + }; - using Map = std::unordered_map; - using MapPtr = std::shared_ptr; + using Map = std::unordered_map; + using MapPtr = std::shared_ptr; - static MapPtr fromJSON(std::string state_json) - { - Poco::JSON::Parser parser; - auto state = parser.parse(state_json).extract(); - MapPtr result_ptr = std::make_shared(); - for (const auto & table_name : state->getNames()) - { - auto table_status_json = state->getValue(table_name); - auto table_status = parser.parse(table_status_json).extract(); - /// Map entry will be created if it is absent - auto & map_table_status = (*result_ptr)[table_name]; - map_table_status.all_partitions_count += table_status->getValue("all_partitions_count"); - map_table_status.processed_partitions_count += table_status->getValue("processed_partitions_count"); - } - return result_ptr; - } - - static std::string serializeToJSON(MapPtr statuses) - { - Poco::JSON::Object result_json; - for (const auto & [table_name, table_status] : *statuses) - { - Poco::JSON::Object status_json; - status_json.set("all_partitions_count", table_status.all_partitions_count); - status_json.set("processed_partitions_count", table_status.processed_partitions_count); - - result_json.set(table_name, status_json); - } - std::ostringstream oss; // STYLE_CHECK_ALLOW_STD_STRING_STREAM - oss.exceptions(std::ios::failbit); - Poco::JSON::Stringifier::stringify(result_json, oss); - auto result = oss.str(); - return result; - } + static MapPtr fromJSON(String state_json); + static String serializeToJSON(MapPtr statuses); }; } diff --git a/programs/copier/TaskCluster.cpp b/programs/copier/TaskCluster.cpp new file mode 100644 index 00000000000..957c7d2120d --- /dev/null +++ b/programs/copier/TaskCluster.cpp @@ -0,0 +1,74 @@ +#include "TaskCluster.h" + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; +} + +TaskCluster::TaskCluster(const String & task_zookeeper_path_, const String & default_local_database_) + : task_zookeeper_path(task_zookeeper_path_) + , default_local_database(default_local_database_) +{} + +void DB::TaskCluster::loadTasks(const Poco::Util::AbstractConfiguration & config, const String & base_key) +{ + String prefix = base_key.empty() ? "" : base_key + "."; + + clusters_prefix = prefix + "remote_servers"; + if (!config.has(clusters_prefix)) + throw Exception("You should specify list of clusters in " + clusters_prefix, ErrorCodes::BAD_ARGUMENTS); + + Poco::Util::AbstractConfiguration::Keys tables_keys; + config.keys(prefix + "tables", tables_keys); + + for (const auto & table_key : tables_keys) + { + table_tasks.emplace_back(*this, config, prefix + "tables", table_key); + } +} + +void DB::TaskCluster::reloadSettings(const Poco::Util::AbstractConfiguration & config, const String & base_key) +{ + String prefix = base_key.empty() ? "" : base_key + "."; + + max_workers = config.getUInt64(prefix + "max_workers"); + + settings_common = Settings(); + if (config.has(prefix + "settings")) + settings_common.loadSettingsFromConfig(prefix + "settings", config); + + settings_common.prefer_localhost_replica = false; + + settings_pull = settings_common; + if (config.has(prefix + "settings_pull")) + settings_pull.loadSettingsFromConfig(prefix + "settings_pull", config); + + settings_push = settings_common; + if (config.has(prefix + "settings_push")) + settings_push.loadSettingsFromConfig(prefix + "settings_push", config); + + auto set_default_value = [] (auto && setting, auto && default_value) + { + setting = setting.changed ? setting.value : default_value; + }; + + /// Override important settings + settings_pull.readonly = 1; + settings_pull.prefer_localhost_replica = false; + settings_push.insert_distributed_sync = true; + settings_push.prefer_localhost_replica = false; + + set_default_value(settings_pull.load_balancing, LoadBalancing::NEAREST_HOSTNAME); + set_default_value(settings_pull.max_threads, 1); + set_default_value(settings_pull.max_block_size, 8192UL); + set_default_value(settings_pull.preferred_block_size_bytes, 0); + + set_default_value(settings_push.insert_distributed_timeout, 0); + set_default_value(settings_push.replication_alter_partitions_sync, 2); +} + +} + diff --git a/programs/copier/TaskCluster.h b/programs/copier/TaskCluster.h index 7d8f01ba15f..fc1c8a663ec 100644 --- a/programs/copier/TaskCluster.h +++ b/programs/copier/TaskCluster.h @@ -1,21 +1,20 @@ #pragma once -#include "Aliases.h" +#include "TaskTable.h" + +#include +#include + #include +#include + namespace DB { -namespace ErrorCodes -{ - extern const int BAD_ARGUMENTS; -} struct TaskCluster { - TaskCluster(const String & task_zookeeper_path_, const String & default_local_database_) - : task_zookeeper_path(task_zookeeper_path_) - , default_local_database(default_local_database_) - {} + TaskCluster(const String & task_zookeeper_path_, const String & default_local_database_); void loadTasks(const Poco::Util::AbstractConfiguration & config, const String & base_key = ""); @@ -50,61 +49,4 @@ struct TaskCluster pcg64 random_engine; }; -inline void DB::TaskCluster::loadTasks(const Poco::Util::AbstractConfiguration & config, const String & base_key) -{ - String prefix = base_key.empty() ? "" : base_key + "."; - - clusters_prefix = prefix + "remote_servers"; - if (!config.has(clusters_prefix)) - throw Exception("You should specify list of clusters in " + clusters_prefix, ErrorCodes::BAD_ARGUMENTS); - - Poco::Util::AbstractConfiguration::Keys tables_keys; - config.keys(prefix + "tables", tables_keys); - - for (const auto & table_key : tables_keys) - { - table_tasks.emplace_back(*this, config, prefix + "tables", table_key); - } -} - -inline void DB::TaskCluster::reloadSettings(const Poco::Util::AbstractConfiguration & config, const String & base_key) -{ - String prefix = base_key.empty() ? "" : base_key + "."; - - max_workers = config.getUInt64(prefix + "max_workers"); - - settings_common = Settings(); - if (config.has(prefix + "settings")) - settings_common.loadSettingsFromConfig(prefix + "settings", config); - - settings_common.prefer_localhost_replica = 0; - - settings_pull = settings_common; - if (config.has(prefix + "settings_pull")) - settings_pull.loadSettingsFromConfig(prefix + "settings_pull", config); - - settings_push = settings_common; - if (config.has(prefix + "settings_push")) - settings_push.loadSettingsFromConfig(prefix + "settings_push", config); - - auto set_default_value = [] (auto && setting, auto && default_value) - { - setting = setting.changed ? setting.value : default_value; - }; - - /// Override important settings - settings_pull.readonly = 1; - settings_pull.prefer_localhost_replica = false; - settings_push.insert_distributed_sync = true; - settings_push.prefer_localhost_replica = false; - - set_default_value(settings_pull.load_balancing, LoadBalancing::NEAREST_HOSTNAME); - set_default_value(settings_pull.max_threads, 1); - set_default_value(settings_pull.max_block_size, 8192UL); - set_default_value(settings_pull.preferred_block_size_bytes, 0); - - set_default_value(settings_push.insert_distributed_timeout, 0); - set_default_value(settings_push.replication_alter_partitions_sync, 2); -} - } diff --git a/programs/copier/TaskShard.cpp b/programs/copier/TaskShard.cpp new file mode 100644 index 00000000000..d156f451a84 --- /dev/null +++ b/programs/copier/TaskShard.cpp @@ -0,0 +1,37 @@ +#include "TaskShard.h" + +#include "TaskTable.h" + +namespace DB +{ + +TaskShard::TaskShard(TaskTable & parent, const Cluster::ShardInfo & info_) + : task_table(parent) + , info(info_) +{ + list_of_split_tables_on_shard.assign(task_table.number_of_splits, DatabaseAndTableName()); +} + +UInt32 TaskShard::numberInCluster() const +{ + return info.shard_num; +} + +UInt32 TaskShard::indexInCluster() const +{ + return info.shard_num - 1; +} + +String DB::TaskShard::getDescription() const +{ + return fmt::format("N{} (having a replica {}, pull table {} of cluster {}", + numberInCluster(), getHostNameExample(), getQuotedTable(task_table.table_pull), task_table.cluster_pull_name); +} + +String DB::TaskShard::getHostNameExample() const +{ + const auto & replicas = task_table.cluster_pull->getShardsAddresses().at(indexInCluster()); + return replicas.at(0).readableString(); +} + +} diff --git a/programs/copier/TaskShard.h b/programs/copier/TaskShard.h new file mode 100644 index 00000000000..05d652077ea --- /dev/null +++ b/programs/copier/TaskShard.h @@ -0,0 +1,56 @@ +#pragma once + +#include "Aliases.h" +#include "Internals.h" +#include "ClusterPartition.h" +#include "ShardPartition.h" + + +namespace DB +{ + +struct TaskTable; + +struct TaskShard +{ + TaskShard(TaskTable & parent, const Cluster::ShardInfo & info_); + + TaskTable & task_table; + + Cluster::ShardInfo info; + + UInt32 numberInCluster() const; + + UInt32 indexInCluster() const; + + String getDescription() const; + + String getHostNameExample() const; + + /// Used to sort clusters by their proximity + ShardPriority priority; + + /// Column with unique destination partitions (computed from engine_push_partition_key expr.) in the shard + ColumnWithTypeAndName partition_key_column; + + /// There is a task for each destination partition + TasksPartition partition_tasks; + + /// Which partitions have been checked for existence + /// If some partition from this lists is exists, it is in partition_tasks + std::set checked_partitions; + + /// Last CREATE TABLE query of the table of the shard + ASTPtr current_pull_table_create_query; + ASTPtr current_push_table_create_query; + + /// Internal distributed tables + DatabaseAndTableName table_read_shard; + DatabaseAndTableName main_table_split_shard; + ListOfDatabasesAndTableNames list_of_split_tables_on_shard; +}; + +using TaskShardPtr = std::shared_ptr; +using TasksShard = std::vector; + +} diff --git a/programs/copier/TaskTable.cpp b/programs/copier/TaskTable.cpp new file mode 100644 index 00000000000..5b09a9c99a7 --- /dev/null +++ b/programs/copier/TaskTable.cpp @@ -0,0 +1,221 @@ +#include "TaskTable.h" + +#include "ClusterPartition.h" +#include "TaskCluster.h" + +#include + +#include + +namespace DB +{ +namespace ErrorCodes +{ + extern const int UNKNOWN_ELEMENT_IN_CONFIG; + extern const int LOGICAL_ERROR; +} + +TaskTable::TaskTable(TaskCluster & parent, const Poco::Util::AbstractConfiguration & config, + const String & prefix_, const String & table_key) + : task_cluster(parent) +{ + String table_prefix = prefix_ + "." + table_key + "."; + + name_in_config = table_key; + + number_of_splits = config.getUInt64(table_prefix + "number_of_splits", 3); + + allow_to_copy_alias_and_materialized_columns = config.getBool(table_prefix + "allow_to_copy_alias_and_materialized_columns", false); + allow_to_drop_target_partitions = config.getBool(table_prefix + "allow_to_drop_target_partitions", false); + + cluster_pull_name = config.getString(table_prefix + "cluster_pull"); + cluster_push_name = config.getString(table_prefix + "cluster_push"); + + table_pull.first = config.getString(table_prefix + "database_pull"); + table_pull.second = config.getString(table_prefix + "table_pull"); + + table_push.first = config.getString(table_prefix + "database_push"); + table_push.second = config.getString(table_prefix + "table_push"); + + /// Used as node name in ZooKeeper + table_id = escapeForFileName(cluster_push_name) + + "." + escapeForFileName(table_push.first) + + "." + escapeForFileName(table_push.second); + + engine_push_str = config.getString(table_prefix + "engine", "rand()"); + + { + ParserStorage parser_storage; + engine_push_ast = parseQuery(parser_storage, engine_push_str, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH); + engine_push_partition_key_ast = extractPartitionKey(engine_push_ast); + primary_key_comma_separated = boost::algorithm::join(extractPrimaryKeyColumnNames(engine_push_ast), ", "); + is_replicated_table = isReplicatedTableEngine(engine_push_ast); + } + + sharding_key_str = config.getString(table_prefix + "sharding_key"); + + auxiliary_engine_split_asts.reserve(number_of_splits); + { + ParserExpressionWithOptionalAlias parser_expression(false); + sharding_key_ast = parseQuery(parser_expression, sharding_key_str, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH); + main_engine_split_ast = createASTStorageDistributed(cluster_push_name, table_push.first, table_push.second, + sharding_key_ast); + + for (const auto piece_number : collections::range(0, number_of_splits)) + { + auxiliary_engine_split_asts.emplace_back + ( + createASTStorageDistributed(cluster_push_name, table_push.first, + table_push.second + "_piece_" + toString(piece_number), sharding_key_ast) + ); + } + } + + where_condition_str = config.getString(table_prefix + "where_condition", ""); + if (!where_condition_str.empty()) + { + ParserExpressionWithOptionalAlias parser_expression(false); + where_condition_ast = parseQuery(parser_expression, where_condition_str, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH); + + // Will use canonical expression form + where_condition_str = queryToString(where_condition_ast); + } + + String enabled_partitions_prefix = table_prefix + "enabled_partitions"; + has_enabled_partitions = config.has(enabled_partitions_prefix); + + if (has_enabled_partitions) + { + Strings keys; + config.keys(enabled_partitions_prefix, keys); + + if (keys.empty()) + { + /// Parse list of partition from space-separated string + String partitions_str = config.getString(table_prefix + "enabled_partitions"); + boost::trim_if(partitions_str, isWhitespaceASCII); + boost::split(enabled_partitions, partitions_str, isWhitespaceASCII, boost::token_compress_on); + } + else + { + /// Parse sequence of ... + for (const String &key : keys) + { + if (!startsWith(key, "partition")) + throw Exception("Unknown key " + key + " in " + enabled_partitions_prefix, ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG); + + enabled_partitions.emplace_back(config.getString(enabled_partitions_prefix + "." + key)); + } + } + + std::copy(enabled_partitions.begin(), enabled_partitions.end(), std::inserter(enabled_partitions_set, enabled_partitions_set.begin())); + } +} + + +String TaskTable::getPartitionPath(const String & partition_name) const +{ + return task_cluster.task_zookeeper_path // root + + "/tables/" + table_id // tables/dst_cluster.merge.hits + + "/" + escapeForFileName(partition_name); // 201701 +} + +String TaskTable::getPartitionAttachIsActivePath(const String & partition_name) const +{ + return getPartitionPath(partition_name) + "/attach_active"; +} + +String TaskTable::getPartitionAttachIsDonePath(const String & partition_name) const +{ + return getPartitionPath(partition_name) + "/attach_is_done"; +} + +String TaskTable::getPartitionPiecePath(const String & partition_name, size_t piece_number) const +{ + assert(piece_number < number_of_splits); + return getPartitionPath(partition_name) + "/piece_" + toString(piece_number); // 1...number_of_splits +} + +String TaskTable::getCertainPartitionIsDirtyPath(const String &partition_name) const +{ + return getPartitionPath(partition_name) + "/is_dirty"; +} + +String TaskTable::getCertainPartitionPieceIsDirtyPath(const String & partition_name, const size_t piece_number) const +{ + return getPartitionPiecePath(partition_name, piece_number) + "/is_dirty"; +} + +String TaskTable::getCertainPartitionIsCleanedPath(const String & partition_name) const +{ + return getCertainPartitionIsDirtyPath(partition_name) + "/cleaned"; +} + +String TaskTable::getCertainPartitionPieceIsCleanedPath(const String & partition_name, const size_t piece_number) const +{ + return getCertainPartitionPieceIsDirtyPath(partition_name, piece_number) + "/cleaned"; +} + +String TaskTable::getCertainPartitionTaskStatusPath(const String & partition_name) const +{ + return getPartitionPath(partition_name) + "/shards"; +} + +String TaskTable::getCertainPartitionPieceTaskStatusPath(const String & partition_name, const size_t piece_number) const +{ + return getPartitionPiecePath(partition_name, piece_number) + "/shards"; +} + +bool TaskTable::isReplicatedTable() const +{ + return is_replicated_table; +} + +String TaskTable::getStatusAllPartitionCount() const +{ + return task_cluster.task_zookeeper_path + "/status/all_partitions_count"; +} + +String TaskTable::getStatusProcessedPartitionsCount() const +{ + return task_cluster.task_zookeeper_path + "/status/processed_partitions_count"; +} + +ASTPtr TaskTable::rewriteReplicatedCreateQueryToPlain() const +{ + ASTPtr prev_engine_push_ast = engine_push_ast->clone(); + + auto & new_storage_ast = prev_engine_push_ast->as(); + auto & new_engine_ast = new_storage_ast.engine->as(); + + /// Remove "Replicated" from name + new_engine_ast.name = new_engine_ast.name.substr(10); + + if (new_engine_ast.arguments) + { + auto & replicated_table_arguments = new_engine_ast.arguments->children; + + + /// In some cases of Atomic database engine usage ReplicatedMergeTree tables + /// could be created without arguments. + if (!replicated_table_arguments.empty()) + { + /// Delete first two arguments of Replicated...MergeTree() table. + replicated_table_arguments.erase(replicated_table_arguments.begin()); + replicated_table_arguments.erase(replicated_table_arguments.begin()); + } + } + + return new_storage_ast.clone(); +} + +ClusterPartition & TaskTable::getClusterPartition(const String & partition_name) +{ + auto it = cluster_partitions.find(partition_name); + if (it == cluster_partitions.end()) + throw Exception("There are no cluster partition " + partition_name + " in " + table_id, + ErrorCodes::LOGICAL_ERROR); + return it->second; +} + +} diff --git a/programs/copier/TaskTable.h b/programs/copier/TaskTable.h new file mode 100644 index 00000000000..2bb7f078bc6 --- /dev/null +++ b/programs/copier/TaskTable.h @@ -0,0 +1,173 @@ +#pragma once + +#include "Aliases.h" +#include "TaskShard.h" + + +namespace DB +{ + +struct ClusterPartition; +struct TaskCluster; + +struct TaskTable +{ + TaskTable(TaskCluster & parent, const Poco::Util::AbstractConfiguration & config, const String & prefix, const String & table_key); + + TaskCluster & task_cluster; + + /// These functions used in checkPartitionIsDone() or checkPartitionPieceIsDone() + /// They are implemented here not to call task_table.tasks_shard[partition_name].second.pieces[current_piece_number] etc. + + String getPartitionPath(const String & partition_name) const; + + String getPartitionAttachIsActivePath(const String & partition_name) const; + + String getPartitionAttachIsDonePath(const String & partition_name) const; + + String getPartitionPiecePath(const String & partition_name, size_t piece_number) const; + + String getCertainPartitionIsDirtyPath(const String & partition_name) const; + + String getCertainPartitionPieceIsDirtyPath(const String & partition_name, size_t piece_number) const; + + String getCertainPartitionIsCleanedPath(const String & partition_name) const; + + String getCertainPartitionPieceIsCleanedPath(const String & partition_name, size_t piece_number) const; + + String getCertainPartitionTaskStatusPath(const String & partition_name) const; + + String getCertainPartitionPieceTaskStatusPath(const String & partition_name, size_t piece_number) const; + + bool isReplicatedTable() const; + + /// These nodes are used for check-status option + String getStatusAllPartitionCount() const; + String getStatusProcessedPartitionsCount() const; + + /// Partitions will be split into number-of-splits pieces. + /// Each piece will be copied independently. (10 by default) + size_t number_of_splits; + + bool allow_to_copy_alias_and_materialized_columns{false}; + bool allow_to_drop_target_partitions{false}; + + String name_in_config; + + /// Used as task ID + String table_id; + + /// Column names in primary key + String primary_key_comma_separated; + + /// Source cluster and table + String cluster_pull_name; + DatabaseAndTableName table_pull; + + /// Destination cluster and table + String cluster_push_name; + DatabaseAndTableName table_push; + + /// Storage of destination table + /// (tables that are stored on each shard of target cluster) + String engine_push_str; + ASTPtr engine_push_ast; + ASTPtr engine_push_partition_key_ast; + + /// First argument of Replicated...MergeTree() + String engine_push_zk_path; + bool is_replicated_table; + + ASTPtr rewriteReplicatedCreateQueryToPlain() const; + + /* + * A Distributed table definition used to split data + * Distributed table will be created on each shard of default + * cluster to perform data copying and resharding + * */ + String sharding_key_str; + ASTPtr sharding_key_ast; + ASTPtr main_engine_split_ast; + + /* + * To copy partition piece form one cluster to another we have to use Distributed table. + * In case of usage separate table (engine_push) for each partition piece, + * we have to use many Distributed tables. + * */ + ASTs auxiliary_engine_split_asts; + + /// Additional WHERE expression to filter input data + String where_condition_str; + ASTPtr where_condition_ast; + + /// Resolved clusters + ClusterPtr cluster_pull; + ClusterPtr cluster_push; + + /// Filter partitions that should be copied + bool has_enabled_partitions = false; + Strings enabled_partitions; + NameSet enabled_partitions_set; + + /** + * Prioritized list of shards + * all_shards contains information about all shards in the table. + * So we have to check whether particular shard have current partition or not while processing. + */ + TasksShard all_shards; + TasksShard local_shards; + + /// All partitions of the current table. + ClusterPartitions cluster_partitions; + NameSet finished_cluster_partitions; + + /// Partition names to process in user-specified order + Strings ordered_partition_names; + + ClusterPartition & getClusterPartition(const String & partition_name); + + Stopwatch watch; + UInt64 bytes_copied = 0; + UInt64 rows_copied = 0; + + template + void initShards(RandomEngine &&random_engine); +}; + +using TasksTable = std::list; + + +template +inline void TaskTable::initShards(RandomEngine && random_engine) +{ + const String & fqdn_name = getFQDNOrHostName(); + std::uniform_int_distribution get_urand(0, std::numeric_limits::max()); + + // Compute the priority + for (const auto & shard_info : cluster_pull->getShardsInfo()) + { + TaskShardPtr task_shard = std::make_shared(*this, shard_info); + const auto & replicas = cluster_pull->getShardsAddresses().at(task_shard->indexInCluster()); + task_shard->priority = getReplicasPriority(replicas, fqdn_name, get_urand(random_engine)); + + all_shards.emplace_back(task_shard); + } + + // Sort by priority + std::sort(all_shards.begin(), all_shards.end(), + [](const TaskShardPtr & lhs, const TaskShardPtr & rhs) + { + return ShardPriority::greaterPriority(lhs->priority, rhs->priority); + }); + + // Cut local shards + auto it_first_remote = std::lower_bound(all_shards.begin(), all_shards.end(), 1, + [](const TaskShardPtr & lhs, UInt8 is_remote) + { + return lhs->priority.is_remote < is_remote; + }); + + local_shards.assign(all_shards.begin(), it_first_remote); +} + +} diff --git a/programs/copier/TaskTableAndShard.h b/programs/copier/TaskTableAndShard.h deleted file mode 100644 index cef9b669971..00000000000 --- a/programs/copier/TaskTableAndShard.h +++ /dev/null @@ -1,434 +0,0 @@ -#pragma once - -#include "Aliases.h" -#include "Internals.h" -#include "ClusterPartition.h" - -#include -#include - -#include -#include - - -namespace DB -{ -namespace ErrorCodes -{ - extern const int UNKNOWN_ELEMENT_IN_CONFIG; - extern const int LOGICAL_ERROR; -} - -struct TaskShard; - -struct TaskTable -{ - TaskTable(TaskCluster & parent, const Poco::Util::AbstractConfiguration & config, const String & prefix, - const String & table_key); - - TaskCluster & task_cluster; - - /// These functions used in checkPartitionIsDone() or checkPartitionPieceIsDone() - /// They are implemented here not to call task_table.tasks_shard[partition_name].second.pieces[current_piece_number] etc. - - String getPartitionPath(const String & partition_name) const; - - String getPartitionAttachIsActivePath(const String & partition_name) const; - - String getPartitionAttachIsDonePath(const String & partition_name) const; - - String getPartitionPiecePath(const String & partition_name, size_t piece_number) const; - - String getCertainPartitionIsDirtyPath(const String & partition_name) const; - - String getCertainPartitionPieceIsDirtyPath(const String & partition_name, size_t piece_number) const; - - String getCertainPartitionIsCleanedPath(const String & partition_name) const; - - String getCertainPartitionPieceIsCleanedPath(const String & partition_name, size_t piece_number) const; - - String getCertainPartitionTaskStatusPath(const String & partition_name) const; - - String getCertainPartitionPieceTaskStatusPath(const String & partition_name, size_t piece_number) const; - - bool isReplicatedTable() const { return is_replicated_table; } - - /// These nodes are used for check-status option - String getStatusAllPartitionCount() const; - String getStatusProcessedPartitionsCount() const; - - /// Partitions will be split into number-of-splits pieces. - /// Each piece will be copied independently. (10 by default) - size_t number_of_splits; - - bool allow_to_copy_alias_and_materialized_columns{false}; - bool allow_to_drop_target_partitions{false}; - - String name_in_config; - - /// Used as task ID - String table_id; - - /// Column names in primary key - String primary_key_comma_separated; - - /// Source cluster and table - String cluster_pull_name; - DatabaseAndTableName table_pull; - - /// Destination cluster and table - String cluster_push_name; - DatabaseAndTableName table_push; - - /// Storage of destination table - /// (tables that are stored on each shard of target cluster) - String engine_push_str; - ASTPtr engine_push_ast; - ASTPtr engine_push_partition_key_ast; - - /// First argument of Replicated...MergeTree() - String engine_push_zk_path; - bool is_replicated_table; - - ASTPtr rewriteReplicatedCreateQueryToPlain() const; - - /* - * A Distributed table definition used to split data - * Distributed table will be created on each shard of default - * cluster to perform data copying and resharding - * */ - String sharding_key_str; - ASTPtr sharding_key_ast; - ASTPtr main_engine_split_ast; - - /* - * To copy partition piece form one cluster to another we have to use Distributed table. - * In case of usage separate table (engine_push) for each partition piece, - * we have to use many Distributed tables. - * */ - ASTs auxiliary_engine_split_asts; - - /// Additional WHERE expression to filter input data - String where_condition_str; - ASTPtr where_condition_ast; - - /// Resolved clusters - ClusterPtr cluster_pull; - ClusterPtr cluster_push; - - /// Filter partitions that should be copied - bool has_enabled_partitions = false; - Strings enabled_partitions; - NameSet enabled_partitions_set; - - /** - * Prioritized list of shards - * all_shards contains information about all shards in the table. - * So we have to check whether particular shard have current partition or not while processing. - */ - TasksShard all_shards; - TasksShard local_shards; - - /// All partitions of the current table. - ClusterPartitions cluster_partitions; - NameSet finished_cluster_partitions; - - /// Partition names to process in user-specified order - Strings ordered_partition_names; - - ClusterPartition & getClusterPartition(const String & partition_name) - { - auto it = cluster_partitions.find(partition_name); - if (it == cluster_partitions.end()) - throw Exception("There are no cluster partition " + partition_name + " in " + table_id, - ErrorCodes::LOGICAL_ERROR); - return it->second; - } - - Stopwatch watch; - UInt64 bytes_copied = 0; - UInt64 rows_copied = 0; - - template - void initShards(RandomEngine &&random_engine); -}; - - -struct TaskShard -{ - TaskShard(TaskTable & parent, const ShardInfo & info_) : task_table(parent), info(info_) - { - list_of_split_tables_on_shard.assign(task_table.number_of_splits, DatabaseAndTableName()); - } - - TaskTable & task_table; - - ShardInfo info; - - UInt32 numberInCluster() const { return info.shard_num; } - - UInt32 indexInCluster() const { return info.shard_num - 1; } - - String getDescription() const; - - String getHostNameExample() const; - - /// Used to sort clusters by their proximity - ShardPriority priority; - - /// Column with unique destination partitions (computed from engine_push_partition_key expr.) in the shard - ColumnWithTypeAndName partition_key_column; - - /// There is a task for each destination partition - TasksPartition partition_tasks; - - /// Which partitions have been checked for existence - /// If some partition from this lists is exists, it is in partition_tasks - std::set checked_partitions; - - /// Last CREATE TABLE query of the table of the shard - ASTPtr current_pull_table_create_query; - ASTPtr current_push_table_create_query; - - /// Internal distributed tables - DatabaseAndTableName table_read_shard; - DatabaseAndTableName main_table_split_shard; - ListOfDatabasesAndTableNames list_of_split_tables_on_shard; -}; - - -inline String TaskTable::getPartitionPath(const String & partition_name) const -{ - return task_cluster.task_zookeeper_path // root - + "/tables/" + table_id // tables/dst_cluster.merge.hits - + "/" + escapeForFileName(partition_name); // 201701 -} - -inline String TaskTable::getPartitionAttachIsActivePath(const String & partition_name) const -{ - return getPartitionPath(partition_name) + "/attach_active"; -} - -inline String TaskTable::getPartitionAttachIsDonePath(const String & partition_name) const -{ - return getPartitionPath(partition_name) + "/attach_is_done"; -} - -inline String TaskTable::getPartitionPiecePath(const String & partition_name, size_t piece_number) const -{ - assert(piece_number < number_of_splits); - return getPartitionPath(partition_name) + "/piece_" + toString(piece_number); // 1...number_of_splits -} - -inline String TaskTable::getCertainPartitionIsDirtyPath(const String &partition_name) const -{ - return getPartitionPath(partition_name) + "/is_dirty"; -} - -inline String TaskTable::getCertainPartitionPieceIsDirtyPath(const String & partition_name, const size_t piece_number) const -{ - return getPartitionPiecePath(partition_name, piece_number) + "/is_dirty"; -} - -inline String TaskTable::getCertainPartitionIsCleanedPath(const String & partition_name) const -{ - return getCertainPartitionIsDirtyPath(partition_name) + "/cleaned"; -} - -inline String TaskTable::getCertainPartitionPieceIsCleanedPath(const String & partition_name, const size_t piece_number) const -{ - return getCertainPartitionPieceIsDirtyPath(partition_name, piece_number) + "/cleaned"; -} - -inline String TaskTable::getCertainPartitionTaskStatusPath(const String & partition_name) const -{ - return getPartitionPath(partition_name) + "/shards"; -} - -inline String TaskTable::getCertainPartitionPieceTaskStatusPath(const String & partition_name, const size_t piece_number) const -{ - return getPartitionPiecePath(partition_name, piece_number) + "/shards"; -} - -inline String TaskTable::getStatusAllPartitionCount() const -{ - return task_cluster.task_zookeeper_path + "/status/all_partitions_count"; -} - -inline String TaskTable::getStatusProcessedPartitionsCount() const -{ - return task_cluster.task_zookeeper_path + "/status/processed_partitions_count"; -} - -inline TaskTable::TaskTable(TaskCluster & parent, const Poco::Util::AbstractConfiguration & config, - const String & prefix_, const String & table_key) - : task_cluster(parent) -{ - String table_prefix = prefix_ + "." + table_key + "."; - - name_in_config = table_key; - - number_of_splits = config.getUInt64(table_prefix + "number_of_splits", 3); - - allow_to_copy_alias_and_materialized_columns = config.getBool(table_prefix + "allow_to_copy_alias_and_materialized_columns", false); - allow_to_drop_target_partitions = config.getBool(table_prefix + "allow_to_drop_target_partitions", false); - - cluster_pull_name = config.getString(table_prefix + "cluster_pull"); - cluster_push_name = config.getString(table_prefix + "cluster_push"); - - table_pull.first = config.getString(table_prefix + "database_pull"); - table_pull.second = config.getString(table_prefix + "table_pull"); - - table_push.first = config.getString(table_prefix + "database_push"); - table_push.second = config.getString(table_prefix + "table_push"); - - /// Used as node name in ZooKeeper - table_id = escapeForFileName(cluster_push_name) - + "." + escapeForFileName(table_push.first) - + "." + escapeForFileName(table_push.second); - - engine_push_str = config.getString(table_prefix + "engine", "rand()"); - - { - ParserStorage parser_storage; - engine_push_ast = parseQuery(parser_storage, engine_push_str, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH); - engine_push_partition_key_ast = extractPartitionKey(engine_push_ast); - primary_key_comma_separated = boost::algorithm::join(extractPrimaryKeyColumnNames(engine_push_ast), ", "); - is_replicated_table = isReplicatedTableEngine(engine_push_ast); - } - - sharding_key_str = config.getString(table_prefix + "sharding_key"); - - auxiliary_engine_split_asts.reserve(number_of_splits); - { - ParserExpressionWithOptionalAlias parser_expression(false); - sharding_key_ast = parseQuery(parser_expression, sharding_key_str, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH); - main_engine_split_ast = createASTStorageDistributed(cluster_push_name, table_push.first, table_push.second, - sharding_key_ast); - - for (const auto piece_number : collections::range(0, number_of_splits)) - { - auxiliary_engine_split_asts.emplace_back - ( - createASTStorageDistributed(cluster_push_name, table_push.first, - table_push.second + "_piece_" + toString(piece_number), sharding_key_ast) - ); - } - } - - where_condition_str = config.getString(table_prefix + "where_condition", ""); - if (!where_condition_str.empty()) - { - ParserExpressionWithOptionalAlias parser_expression(false); - where_condition_ast = parseQuery(parser_expression, where_condition_str, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH); - - // Will use canonical expression form - where_condition_str = queryToString(where_condition_ast); - } - - String enabled_partitions_prefix = table_prefix + "enabled_partitions"; - has_enabled_partitions = config.has(enabled_partitions_prefix); - - if (has_enabled_partitions) - { - Strings keys; - config.keys(enabled_partitions_prefix, keys); - - if (keys.empty()) - { - /// Parse list of partition from space-separated string - String partitions_str = config.getString(table_prefix + "enabled_partitions"); - boost::trim_if(partitions_str, isWhitespaceASCII); - boost::split(enabled_partitions, partitions_str, isWhitespaceASCII, boost::token_compress_on); - } - else - { - /// Parse sequence of ... - for (const String &key : keys) - { - if (!startsWith(key, "partition")) - throw Exception("Unknown key " + key + " in " + enabled_partitions_prefix, ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG); - - enabled_partitions.emplace_back(config.getString(enabled_partitions_prefix + "." + key)); - } - } - - std::copy(enabled_partitions.begin(), enabled_partitions.end(), std::inserter(enabled_partitions_set, enabled_partitions_set.begin())); - } -} - -template -inline void TaskTable::initShards(RandomEngine && random_engine) -{ - const String & fqdn_name = getFQDNOrHostName(); - std::uniform_int_distribution get_urand(0, std::numeric_limits::max()); - - // Compute the priority - for (const auto & shard_info : cluster_pull->getShardsInfo()) - { - TaskShardPtr task_shard = std::make_shared(*this, shard_info); - const auto & replicas = cluster_pull->getShardsAddresses().at(task_shard->indexInCluster()); - task_shard->priority = getReplicasPriority(replicas, fqdn_name, get_urand(random_engine)); - - all_shards.emplace_back(task_shard); - } - - // Sort by priority - std::sort(all_shards.begin(), all_shards.end(), - [](const TaskShardPtr & lhs, const TaskShardPtr & rhs) - { - return ShardPriority::greaterPriority(lhs->priority, rhs->priority); - }); - - // Cut local shards - auto it_first_remote = std::lower_bound(all_shards.begin(), all_shards.end(), 1, - [](const TaskShardPtr & lhs, UInt8 is_remote) - { - return lhs->priority.is_remote < is_remote; - }); - - local_shards.assign(all_shards.begin(), it_first_remote); -} - -inline ASTPtr TaskTable::rewriteReplicatedCreateQueryToPlain() const -{ - ASTPtr prev_engine_push_ast = engine_push_ast->clone(); - - auto & new_storage_ast = prev_engine_push_ast->as(); - auto & new_engine_ast = new_storage_ast.engine->as(); - - /// Remove "Replicated" from name - new_engine_ast.name = new_engine_ast.name.substr(10); - - if (new_engine_ast.arguments) - { - auto & replicated_table_arguments = new_engine_ast.arguments->children; - - - /// In some cases of Atomic database engine usage ReplicatedMergeTree tables - /// could be created without arguments. - if (!replicated_table_arguments.empty()) - { - /// Delete first two arguments of Replicated...MergeTree() table. - replicated_table_arguments.erase(replicated_table_arguments.begin()); - replicated_table_arguments.erase(replicated_table_arguments.begin()); - } - } - - return new_storage_ast.clone(); -} - - -inline String DB::TaskShard::getDescription() const -{ - return fmt::format("N{} (having a replica {}, pull table {} of cluster {}", - numberInCluster(), getHostNameExample(), getQuotedTable(task_table.table_pull), task_table.cluster_pull_name); -} - -inline String DB::TaskShard::getHostNameExample() const -{ - const auto & replicas = task_table.cluster_pull->getShardsAddresses().at(indexInCluster()); - return replicas.at(0).readableString(); -} - -} diff --git a/programs/copier/ZooKeeperStaff.h b/programs/copier/ZooKeeperStaff.h index a9e04578607..3d4a11186e3 100644 --- a/programs/copier/ZooKeeperStaff.h +++ b/programs/copier/ZooKeeperStaff.h @@ -47,8 +47,8 @@ public: WrappingUInt32 epoch; WrappingUInt32 counter; explicit Zxid(UInt64 _zxid) - : epoch(_zxid >> 32) - , counter(_zxid) + : epoch(static_cast(_zxid >> 32)) + , counter(static_cast(_zxid)) {} bool operator<=(const Zxid & other) const diff --git a/programs/diagnostics/go.mod b/programs/diagnostics/go.mod index 19fc2ec8202..fb1568ea491 100644 --- a/programs/diagnostics/go.mod +++ b/programs/diagnostics/go.mod @@ -1,6 +1,6 @@ module github.com/ClickHouse/ClickHouse/programs/diagnostics -go 1.17 +go 1.19 require ( github.com/ClickHouse/clickhouse-go/v2 v2.0.12 diff --git a/programs/diagnostics/go.sum b/programs/diagnostics/go.sum index dd1b18ce0c7..aa69472e9c2 100644 --- a/programs/diagnostics/go.sum +++ b/programs/diagnostics/go.sum @@ -65,7 +65,6 @@ github.com/Azure/go-autorest/logger v0.2.0/go.mod h1:T9E3cAhj2VqvPOtCYAvby9aBXkZ github.com/Azure/go-autorest/tracing v0.6.0/go.mod h1:+vhtPC754Xsa23ID7GlGsrdKBpUA79WCAKPPZVC2DeU= github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo= -github.com/ClickHouse/clickhouse-go v1.5.3 h1:Vok8zUb/wlqc9u8oEqQzBMBRDoFd8NxPRqgYEqMnV88= github.com/ClickHouse/clickhouse-go v1.5.3/go.mod h1:EaI/sW7Azgz9UATzd5ZdZHRUhHgv5+JMS9NSr2smCJI= github.com/ClickHouse/clickhouse-go/v2 v2.0.12 h1:Nbl/NZwoM6LGJm7smNBgvtdr/rxjlIssSW3eG/Nmb9E= github.com/ClickHouse/clickhouse-go/v2 v2.0.12/go.mod h1:u4RoNQLLM2W6hNSPYrIESLJqaWSInZVmfM+MlaAhXcg= @@ -457,7 +456,6 @@ github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0/go.mod h1:8NvIoxWQoOIhqOTXgf github.com/grpc-ecosystem/grpc-gateway v1.9.5/go.mod h1:vNeuVxBJEsws4ogUvrchl83t/GYV9WGTSLVdBhOQFDY= github.com/grpc-ecosystem/grpc-gateway v1.16.0/go.mod h1:BDjrQk3hbvj6Nolgz8mAMFbcEtjT1g+wF4CSlocrBnw= github.com/hashicorp/consul/api v1.11.0/go.mod h1:XjsvQN+RJGWI2TWy1/kqaE16HrR2J/FWgkYjdZQsX9M= -github.com/hashicorp/consul/api v1.12.0/go.mod h1:6pVBMo0ebnYdt2S3H87XhekM/HHrUoTD2XXb/VrZVy0= github.com/hashicorp/consul/sdk v0.8.0/go.mod h1:GBvyrGALthsZObzUGsfgHZQDXjg4lOjagTIwIR1vPms= github.com/hashicorp/errwrap v0.0.0-20141028054710-7554cd9344ce/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4= github.com/hashicorp/errwrap v1.0.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4= @@ -663,9 +661,7 @@ github.com/paulmach/protoscan v0.2.1-0.20210522164731-4e53c6875432/go.mod h1:2sV github.com/pelletier/go-toml v1.9.4 h1:tjENF6MfZAg8e4ZmZTeWaWiT2vXtsoO6+iuOjFhECwM= github.com/pelletier/go-toml v1.9.4/go.mod h1:u1nR/EPcESfeI/szUZKdtJ0xRNbUoANCkoOuaOx1Y+c= github.com/peterbourgon/diskv v2.0.1+incompatible/go.mod h1:uqqh8zWWbv1HBMNONnaR/tNboyR3/BZd58JJSHlUSCU= -github.com/pierrec/lz4 v2.0.5+incompatible h1:2xWsjqPFWcplujydGg4WmhC/6fZqK42wMM8aXeqhl0I= github.com/pierrec/lz4 v2.0.5+incompatible/go.mod h1:pdkljMzZIN41W+lC3N2tnIh5sFi+IEE17M5jbnwPHcY= -github.com/pierrec/lz4/v4 v4.1.12/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4= github.com/pierrec/lz4/v4 v4.1.14 h1:+fL8AQEZtz/ijeNnpduH0bROTu0O3NZAlPjQxGn8LwE= github.com/pierrec/lz4/v4 v4.1.14/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4= github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= @@ -717,7 +713,6 @@ github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQD github.com/ryanuber/columnize v0.0.0-20160712163229-9b3edd62028f/go.mod h1:sm1tb6uqfes/u+d4ooFouqFdy9/2g9QGwK3SQygK0Ts= github.com/safchain/ethtool v0.0.0-20190326074333-42ed695e3de8/go.mod h1:Z0q5wiBQGYcxhMZ6gUqHn6pYNLypFAvaL3UvgZLR0U4= github.com/sagikazarmark/crypt v0.3.0/go.mod h1:uD/D+6UF4SrIR1uGEv7bBNkNqLGqUr43MRiaGWX1Nig= -github.com/sagikazarmark/crypt v0.4.0/go.mod h1:ALv2SRj7GxYV4HO9elxH9nS6M9gW+xDNxqmyJ6RfDFM= github.com/satori/go.uuid v1.2.0/go.mod h1:dA0hQrYB0VpLJoorglMZABFdXlWrHn1NEOzdhQKdks0= github.com/sean-/seed v0.0.0-20170313163322-e2103e2c3529/go.mod h1:DxrIzT+xaE7yg65j358z/aeFdxmN0P9QXhEzd20vsDc= github.com/seccomp/libseccomp-golang v0.9.1/go.mod h1:GbW5+tmTXfcxTToHLXlScSlAvWlF4P2Ca7zGrPiEpWo= @@ -1083,7 +1078,6 @@ golang.org/x/sys v0.0.0-20211109184856-51b60fd695b3/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.0.0-20211110154304-99a53858aa08/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20211124211545-fe61309f8881/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20211205182925-97ca703d548d/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20211210111614-af8b64212486/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220114195835-da31bd327af9 h1:XfKQ4OlFl8okEOr5UvAqFRVj8pY/4yfcXrddB8qAbU0= golang.org/x/sys v0.0.0-20220114195835-da31bd327af9/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= @@ -1202,7 +1196,6 @@ google.golang.org/api v0.57.0/go.mod h1:dVPlbZyBo2/OjBpmvNdpn2GRm6rPy75jyU7bmhdr google.golang.org/api v0.59.0/go.mod h1:sT2boj7M9YJxZzgeZqXogmhfmRWDtPzT31xkieUbuZU= google.golang.org/api v0.61.0/go.mod h1:xQRti5UdCmoCEqFxcz93fTl338AVqDgyaDRuOZ3hg9I= google.golang.org/api v0.62.0/go.mod h1:dKmwPCydfsad4qCH08MSdgWjfHOyfpd4VtDGgRFdavw= -google.golang.org/api v0.63.0/go.mod h1:gs4ij2ffTRXwuzzgJl/56BdwJaA194ijkfn++9tDuPo= google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM= google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= google.golang.org/appengine v1.5.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= diff --git a/programs/disks/DisksApp.cpp b/programs/disks/DisksApp.cpp index b662921a3b1..91472a8df33 100644 --- a/programs/disks/DisksApp.cpp +++ b/programs/disks/DisksApp.cpp @@ -57,8 +57,8 @@ void DisksApp::addOptions( ("config-file,C", po::value(), "Set config file") ("disk", po::value(), "Set disk name") ("command_name", po::value(), "Name for command to do") - ("send-logs", "Send logs") - ("log-level", "Logging level") + ("save-logs", "Save logs to a file") + ("log-level", po::value(), "Logging level") ; positional_options_description.add("command_name", 1); @@ -82,10 +82,10 @@ void DisksApp::processOptions() config().setString("config-file", options["config-file"].as()); if (options.count("disk")) config().setString("disk", options["disk"].as()); - if (options.count("send-logs")) - config().setBool("send-logs", true); + if (options.count("save-logs")) + config().setBool("save-logs", true); if (options.count("log-level")) - Poco::Logger::root().setLevel(options["log-level"].as()); + config().setString("log-level", options["log-level"].as()); } void DisksApp::init(std::vector & common_arguments) @@ -149,15 +149,6 @@ void DisksApp::parseAndCheckOptions( int DisksApp::main(const std::vector & /*args*/) { - if (config().has("send-logs")) - { - auto log_level = config().getString("log-level", "trace"); - Poco::Logger::root().setLevel(Poco::Logger::parseLevel(log_level)); - - auto log_path = config().getString("logger.clickhouse-disks", "/var/log/clickhouse-server/clickhouse-disks.log"); - Poco::Logger::root().setChannel(Poco::AutoPtr(new Poco::FileChannel(log_path))); - } - if (config().has("config-file") || fs::exists(getDefaultConfigFileName())) { String config_path = config().getString("config-file", getDefaultConfigFileName()); @@ -171,6 +162,20 @@ int DisksApp::main(const std::vector & /*args*/) throw Exception(ErrorCodes::BAD_ARGUMENTS, "No config-file specifiged"); } + if (config().has("save-logs")) + { + auto log_level = config().getString("log-level", "trace"); + Poco::Logger::root().setLevel(Poco::Logger::parseLevel(log_level)); + + auto log_path = config().getString("logger.clickhouse-disks", "/var/log/clickhouse-server/clickhouse-disks.log"); + Poco::Logger::root().setChannel(Poco::AutoPtr(new Poco::FileChannel(log_path))); + } + else + { + auto log_level = config().getString("log-level", "none"); + Poco::Logger::root().setLevel(Poco::Logger::parseLevel(log_level)); + } + registerDisks(); registerFormats(); diff --git a/programs/install/Install.cpp b/programs/install/Install.cpp index 00c86571265..8028ccde72d 100644 --- a/programs/install/Install.cpp +++ b/programs/install/Install.cpp @@ -893,7 +893,7 @@ namespace if (fs::exists(pid_file)) { ReadBufferFromFile in(pid_file.string()); - UInt64 pid; + Int32 pid; if (tryReadIntText(pid, in)) { fmt::print("{} file exists and contains pid = {}.\n", pid_file.string(), pid); @@ -982,9 +982,9 @@ namespace return 0; } - UInt64 isRunning(const fs::path & pid_file) + int isRunning(const fs::path & pid_file) { - UInt64 pid = 0; + int pid = 0; if (fs::exists(pid_file)) { @@ -1057,7 +1057,7 @@ namespace if (force && do_not_kill) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Specified flags are incompatible"); - UInt64 pid = isRunning(pid_file); + int pid = isRunning(pid_file); if (!pid) return 0; diff --git a/programs/keeper/CMakeLists.txt b/programs/keeper/CMakeLists.txt index ce176ccade5..9266a4ca419 100644 --- a/programs/keeper/CMakeLists.txt +++ b/programs/keeper/CMakeLists.txt @@ -45,6 +45,7 @@ if (BUILD_STANDALONE_KEEPER) ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperLogStore.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperServer.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperSnapshotManager.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperSnapshotManagerS3.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperStateMachine.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperStateManager.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperStorage.cpp diff --git a/programs/keeper/Keeper.cpp b/programs/keeper/Keeper.cpp index 5077f59b7dd..e1d03b40b66 100644 --- a/programs/keeper/Keeper.cpp +++ b/programs/keeper/Keeper.cpp @@ -68,12 +68,12 @@ namespace ErrorCodes namespace { -int waitServersToFinish(std::vector & servers, size_t seconds_to_wait) +size_t waitServersToFinish(std::vector & servers, size_t seconds_to_wait) { - const int sleep_max_ms = 1000 * seconds_to_wait; - const int sleep_one_ms = 100; - int sleep_current_ms = 0; - int current_connections = 0; + const size_t sleep_max_ms = 1000 * seconds_to_wait; + const size_t sleep_one_ms = 100; + size_t sleep_current_ms = 0; + size_t current_connections = 0; for (;;) { current_connections = 0; @@ -441,7 +441,7 @@ int Keeper::main(const std::vector & /*args*/) main_config_reloader.reset(); LOG_DEBUG(log, "Waiting for current connections to Keeper to finish."); - int current_connections = 0; + size_t current_connections = 0; for (auto & server : *servers) { server.stop(); diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index ffec435239e..3ac9c1e7c37 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -8,12 +8,12 @@ #include #include #include +#include +#include #include #include -#include #include #include -#include #include #include #include @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include @@ -488,7 +489,8 @@ void LocalServer::processConfig() } else { - need_render_progress = config().getBool("progress", false); + std::string progress = config().getString("progress", "tty"); + need_render_progress = (Poco::icompare(progress, "off") && Poco::icompare(progress, "no") && Poco::icompare(progress, "false") && Poco::icompare(progress, "0")); echo_queries = config().hasOption("echo") || config().hasOption("verbose"); ignore_error = config().getBool("ignore-error", false); is_multiquery = true; @@ -546,9 +548,14 @@ void LocalServer::processConfig() /// Setting value from cmd arg overrides one from config if (global_context->getSettingsRef().max_insert_block_size.changed) + { insert_format_max_block_size = global_context->getSettingsRef().max_insert_block_size; + } else - insert_format_max_block_size = config().getInt("insert_format_max_block_size", global_context->getSettingsRef().max_insert_block_size); + { + insert_format_max_block_size = config().getUInt64("insert_format_max_block_size", + global_context->getSettingsRef().max_insert_block_size); + } /// Sets external authenticators config (LDAP, Kerberos). global_context->setExternalAuthenticatorsConfig(config()); @@ -586,6 +593,18 @@ void LocalServer::processConfig() if (mmap_cache_size) global_context->setMMappedFileCache(mmap_cache_size); +#if USE_EMBEDDED_COMPILER + /// 128 MB + constexpr size_t compiled_expression_cache_size_default = 1024 * 1024 * 128; + size_t compiled_expression_cache_size = config().getUInt64("compiled_expression_cache_size", compiled_expression_cache_size_default); + + constexpr size_t compiled_expression_cache_elements_size_default = 10000; + size_t compiled_expression_cache_elements_size + = config().getUInt64("compiled_expression_cache_elements_size", compiled_expression_cache_elements_size_default); + + CompiledExpressionCacheFactory::instance().init(compiled_expression_cache_size, compiled_expression_cache_elements_size); +#endif + /// Load global settings from default_profile and system_profile. global_context->setDefaultProfiles(config()); @@ -602,8 +621,6 @@ void LocalServer::processConfig() global_context->setCurrentDatabase(default_database); applyCmdOptions(global_context); - bool enable_objects_loader = false; - if (config().has("path")) { String path = global_context->getPath(); @@ -611,12 +628,6 @@ void LocalServer::processConfig() /// Lock path directory before read status.emplace(fs::path(path) / "status", StatusFile::write_full_info); - LOG_DEBUG(log, "Loading user defined objects from {}", path); - Poco::File(path + "user_defined/").createDirectories(); - UserDefinedSQLObjectsLoader::instance().loadObjects(global_context); - enable_objects_loader = true; - LOG_DEBUG(log, "Loaded user defined objects."); - LOG_DEBUG(log, "Loading metadata from {}", path); loadMetadataSystem(global_context); attachSystemTablesLocal(global_context, *createMemoryDatabaseIfNotExists(global_context, DatabaseCatalog::SYSTEM_DATABASE)); @@ -630,6 +641,9 @@ void LocalServer::processConfig() DatabaseCatalog::instance().loadDatabases(); } + /// For ClickHouse local if path is not set the loader will be disabled. + global_context->getUserDefinedSQLObjectsLoader().loadObjects(); + LOG_DEBUG(log, "Loaded metadata."); } else if (!config().has("no-system-tables")) @@ -639,9 +653,6 @@ void LocalServer::processConfig() attachInformationSchema(global_context, *createMemoryDatabaseIfNotExists(global_context, DatabaseCatalog::INFORMATION_SCHEMA_UPPERCASE)); } - /// Persist SQL user defined objects only if user_defined folder was created - UserDefinedSQLObjectsLoader::instance().enable(enable_objects_loader); - server_display_name = config().getString("display_name", getFQDNOrHostName()); prompt_by_server_display_name = config().getRawString("prompt_by_server_display_name.default", "{display_name} :) "); std::map prompt_substitutions{{"display_name", server_display_name}}; diff --git a/programs/obfuscator/Obfuscator.cpp b/programs/obfuscator/Obfuscator.cpp index bdf26c9e730..7fdc5a54d8a 100644 --- a/programs/obfuscator/Obfuscator.cpp +++ b/programs/obfuscator/Obfuscator.cpp @@ -279,7 +279,7 @@ Float transformFloatMantissa(Float x, UInt64 seed) constexpr size_t mantissa_num_bits = std::is_same_v ? 23 : 52; UInt x_uint = bit_cast(x); - x_uint = feistelNetwork(x_uint, mantissa_num_bits, seed); + x_uint = static_cast(feistelNetwork(x_uint, mantissa_num_bits, seed)); return bit_cast(x_uint); } @@ -511,13 +511,13 @@ public: for (size_t i = 0; i < size; ++i) { UInt32 src_datetime = src_data[i]; - UInt32 src_date = date_lut.toDate(src_datetime); + UInt32 src_date = static_cast(date_lut.toDate(src_datetime)); Int32 src_diff = src_datetime - src_prev_value; - Int32 res_diff = transformSigned(src_diff, seed); + Int32 res_diff = static_cast(transformSigned(src_diff, seed)); UInt32 new_datetime = res_prev_value + res_diff; - UInt32 new_time = new_datetime - date_lut.toDate(new_datetime); + UInt32 new_time = new_datetime - static_cast(date_lut.toDate(new_datetime)); res_data[i] = src_date + new_time; src_prev_value = src_datetime; diff --git a/programs/odbc-bridge/ColumnInfoHandler.cpp b/programs/odbc-bridge/ColumnInfoHandler.cpp index 0ea2495af78..bf11947d436 100644 --- a/programs/odbc-bridge/ColumnInfoHandler.cpp +++ b/programs/odbc-bridge/ColumnInfoHandler.cpp @@ -183,7 +183,10 @@ void ODBCColumnsInfoHandler::handleRequest(HTTPServerRequest & request, HTTPServ if (columns.empty()) throw Exception("Columns definition was not returned", ErrorCodes::LOGICAL_ERROR); - WriteBufferFromHTTPServerResponse out(response, request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD, keep_alive_timeout); + WriteBufferFromHTTPServerResponse out( + response, + request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD, + keep_alive_timeout); try { writeStringBinary(columns.toString(), out); diff --git a/programs/odbc-bridge/ODBCBlockInputStream.cpp b/programs/odbc-bridge/ODBCBlockInputStream.cpp index dec4c249b4b..5bbc39dc559 100644 --- a/programs/odbc-bridge/ODBCBlockInputStream.cpp +++ b/programs/odbc-bridge/ODBCBlockInputStream.cpp @@ -139,7 +139,7 @@ void ODBCSource::insertValue( readDateTimeText(time, in, assert_cast(data_type.get())->getTimeZone()); if (time < 0) time = 0; - assert_cast(column).insertValue(time); + assert_cast(column).insertValue(static_cast(time)); break; } case ValueType::vtDateTime64: diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index aed586a86f6..b412b579539 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -53,7 +53,6 @@ #include #include #include -#include #include #include #include @@ -62,6 +61,7 @@ #include #include #include +#include #include #include #include @@ -82,13 +82,17 @@ #if USE_BORINGSSL #include #endif +#include #include #include +#include +#include #include #include #include #include #include +#include #include "config.h" #include "config_version.h" @@ -224,12 +228,12 @@ catch (...) path)); } -int waitServersToFinish(std::vector & servers, size_t seconds_to_wait) +size_t waitServersToFinish(std::vector & servers, size_t seconds_to_wait) { - const int sleep_max_ms = 1000 * seconds_to_wait; - const int sleep_one_ms = 100; - int sleep_current_ms = 0; - int current_connections = 0; + const size_t sleep_max_ms = 1000 * seconds_to_wait; + const size_t sleep_one_ms = 100; + size_t sleep_current_ms = 0; + size_t current_connections = 0; for (;;) { current_connections = 0; @@ -387,7 +391,16 @@ bool getListenTry(const Poco::Util::AbstractConfiguration & config) { bool listen_try = config.getBool("listen_try", false); if (!listen_try) - listen_try = DB::getMultipleValuesFromConfig(config, "", "listen_host").empty(); + { + Poco::Util::AbstractConfiguration::Keys protocols; + config.keys("protocols", protocols); + listen_try = + DB::getMultipleValuesFromConfig(config, "", "listen_host").empty() && + std::none_of(protocols.begin(), protocols.end(), [&](const auto & protocol) + { + return config.has("protocols." + protocol + ".host") && config.has("protocols." + protocol + ".port"); + }); + } return listen_try; } @@ -920,7 +933,7 @@ int Server::main(const std::vector & /*args*/) else { rlim_t old = rlim.rlim_cur; - rlim.rlim_cur = config().getUInt("max_open_files", rlim.rlim_max); + rlim.rlim_cur = config().getUInt("max_open_files", static_cast(rlim.rlim_max)); int rc = setrlimit(RLIMIT_NOFILE, &rlim); if (rc != 0) LOG_WARNING(log, "Cannot set max number of file descriptors to {}. Try to specify max_open_files according to your system limits. error: {}", rlim.rlim_cur, errnoToString()); @@ -1010,12 +1023,6 @@ int Server::main(const std::vector & /*args*/) fs::create_directories(user_scripts_path); } - { - std::string user_defined_path = config().getString("user_defined_path", path / "user_defined/"); - global_context->setUserDefinedPath(user_defined_path); - fs::create_directories(user_defined_path); - } - /// top_level_domains_lists { const std::string & top_level_domains_path = config().getString("top_level_domains_path", path / "top_level_domains/"); @@ -1500,7 +1507,7 @@ int Server::main(const std::vector & /*args*/) if (!servers_to_start_before_tables.empty()) { LOG_DEBUG(log, "Waiting for current connections to servers for tables to finish."); - int current_connections = 0; + size_t current_connections = 0; for (auto & server : servers_to_start_before_tables) { server.stop(); @@ -1559,18 +1566,6 @@ int Server::main(const std::vector & /*args*/) /// system logs may copy global context. global_context->setCurrentDatabaseNameInGlobalContext(default_database); - LOG_INFO(log, "Loading user defined objects from {}", path_str); - try - { - UserDefinedSQLObjectsLoader::instance().loadObjects(global_context); - } - catch (...) - { - tryLogCurrentException(log, "Caught exception while loading user defined objects"); - throw; - } - LOG_DEBUG(log, "Loaded user defined objects"); - LOG_INFO(log, "Loading metadata from {}", path_str); try @@ -1598,6 +1593,8 @@ int Server::main(const std::vector & /*args*/) database_catalog.loadDatabases(); /// After loading validate that default database exists database_catalog.assertDatabaseExists(default_database); + /// Load user-defined SQL functions. + global_context->getUserDefinedSQLObjectsLoader().loadObjects(); } catch (...) { @@ -1796,7 +1793,7 @@ int Server::main(const std::vector & /*args*/) is_cancelled = true; - int current_connections = 0; + size_t current_connections = 0; { std::lock_guard lock(servers_lock); for (auto & server : servers) @@ -1853,6 +1850,82 @@ int Server::main(const std::vector & /*args*/) return Application::EXIT_OK; } +std::unique_ptr Server::buildProtocolStackFromConfig( + const Poco::Util::AbstractConfiguration & config, + const std::string & protocol, + Poco::Net::HTTPServerParams::Ptr http_params, + AsynchronousMetrics & async_metrics, + bool & is_secure) +{ + auto create_factory = [&](const std::string & type, const std::string & conf_name) -> TCPServerConnectionFactory::Ptr + { + if (type == "tcp") + return TCPServerConnectionFactory::Ptr(new TCPHandlerFactory(*this, false, false)); + + if (type == "tls") +#if USE_SSL + return TCPServerConnectionFactory::Ptr(new TLSHandlerFactory(*this, conf_name)); +#else + throw Exception{"SSL support for TCP protocol is disabled because Poco library was built without NetSSL support.", + ErrorCodes::SUPPORT_IS_DISABLED}; +#endif + + if (type == "proxy1") + return TCPServerConnectionFactory::Ptr(new ProxyV1HandlerFactory(*this, conf_name)); + if (type == "mysql") + return TCPServerConnectionFactory::Ptr(new MySQLHandlerFactory(*this)); + if (type == "postgres") + return TCPServerConnectionFactory::Ptr(new PostgreSQLHandlerFactory(*this)); + if (type == "http") + return TCPServerConnectionFactory::Ptr( + new HTTPServerConnectionFactory(context(), http_params, createHandlerFactory(*this, config, async_metrics, "HTTPHandler-factory")) + ); + if (type == "prometheus") + return TCPServerConnectionFactory::Ptr( + new HTTPServerConnectionFactory(context(), http_params, createHandlerFactory(*this, config, async_metrics, "PrometheusHandler-factory")) + ); + if (type == "interserver") + return TCPServerConnectionFactory::Ptr( + new HTTPServerConnectionFactory(context(), http_params, createHandlerFactory(*this, config, async_metrics, "InterserverIOHTTPHandler-factory")) + ); + + throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Protocol configuration error, unknown protocol name '{}'", type); + }; + + std::string conf_name = "protocols." + protocol; + std::string prefix = conf_name + "."; + std::unordered_set pset {conf_name}; + + auto stack = std::make_unique(*this, conf_name); + + while (true) + { + // if there is no "type" - it's a reference to another protocol and this is just an endpoint + if (config.has(prefix + "type")) + { + std::string type = config.getString(prefix + "type"); + if (type == "tls") + { + if (is_secure) + throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Protocol '{}' contains more than one TLS layer", protocol); + is_secure = true; + } + + stack->append(create_factory(type, conf_name)); + } + + if (!config.has(prefix + "impl")) + break; + + conf_name = "protocols." + config.getString(prefix + "impl"); + prefix = conf_name + "."; + + if (!pset.insert(conf_name).second) + throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Protocol '{}' configuration contains a loop on '{}'", protocol, conf_name); + } + + return stack; +} void Server::createServers( Poco::Util::AbstractConfiguration & config, @@ -1871,6 +1944,55 @@ void Server::createServers( http_params->setTimeout(settings.http_receive_timeout); http_params->setKeepAliveTimeout(keep_alive_timeout); + Poco::Util::AbstractConfiguration::Keys protocols; + config.keys("protocols", protocols); + + for (const auto & protocol : protocols) + { + std::vector hosts; + if (config.has("protocols." + protocol + ".host")) + hosts.push_back(config.getString("protocols." + protocol + ".host")); + else + hosts = listen_hosts; + + for (const auto & host : hosts) + { + std::string conf_name = "protocols." + protocol; + std::string prefix = conf_name + "."; + + if (!config.has(prefix + "port")) + continue; + + std::string description {" protocol"}; + if (config.has(prefix + "description")) + description = config.getString(prefix + "description"); + std::string port_name = prefix + "port"; + bool is_secure = false; + auto stack = buildProtocolStackFromConfig(config, protocol, http_params, async_metrics, is_secure); + + if (stack->empty()) + throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Protocol '{}' stack empty", protocol); + + createServer(config, host, port_name.c_str(), listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter + { + Poco::Net::ServerSocket socket; + auto address = socketBindListen(config, socket, host, port, is_secure); + socket.setReceiveTimeout(settings.receive_timeout); + socket.setSendTimeout(settings.send_timeout); + + return ProtocolServerAdapter( + host, + port_name.c_str(), + description + ": " + address.toString(), + std::make_unique( + stack.release(), + server_pool, + socket, + new Poco::Net::TCPServerParams)); + }); + } + } + for (const auto & listen_host : listen_hosts) { /// HTTP @@ -2118,13 +2240,50 @@ void Server::updateServers( { if (!server.isStopping()) { - bool has_host = std::find(listen_hosts.begin(), listen_hosts.end(), server.getListenHost()) != listen_hosts.end(); - bool has_port = !config.getString(server.getPortName(), "").empty(); + std::string port_name = server.getPortName(); + bool has_host = false; + bool is_http = false; + if (port_name.starts_with("protocols.")) + { + std::string protocol = port_name.substr(0, port_name.find_last_of('.')); + has_host = config.has(protocol + ".host"); - /// NOTE: better to compare using getPortName() over using - /// dynamic_cast<> since HTTPServer is also used for prometheus and - /// internal replication communications. - bool is_http = server.getPortName() == "http_port" || server.getPortName() == "https_port"; + std::string conf_name = protocol; + std::string prefix = protocol + "."; + std::unordered_set pset {conf_name}; + while (true) + { + if (config.has(prefix + "type")) + { + std::string type = config.getString(prefix + "type"); + if (type == "http") + { + is_http = true; + break; + } + } + + if (!config.has(prefix + "impl")) + break; + + conf_name = "protocols." + config.getString(prefix + "impl"); + prefix = conf_name + "."; + + if (!pset.insert(conf_name).second) + throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Protocol '{}' configuration contains a loop on '{}'", protocol, conf_name); + } + } + else + { + /// NOTE: better to compare using getPortName() over using + /// dynamic_cast<> since HTTPServer is also used for prometheus and + /// internal replication communications. + is_http = server.getPortName() == "http_port" || server.getPortName() == "https_port"; + } + + if (!has_host) + has_host = std::find(listen_hosts.begin(), listen_hosts.end(), server.getListenHost()) != listen_hosts.end(); + bool has_port = !config.getString(port_name, "").empty(); bool force_restart = is_http && !isSameConfiguration(previous_config, config, "http_handlers"); if (force_restart) LOG_TRACE(log, " had been changed, will reload {}", server.getDescription()); diff --git a/programs/server/Server.h b/programs/server/Server.h index 44a5a441e43..53841b1fcd4 100644 --- a/programs/server/Server.h +++ b/programs/server/Server.h @@ -3,6 +3,8 @@ #include #include +#include +#include /** Server provides three interfaces: * 1. HTTP - simple interface for any applications. @@ -77,6 +79,13 @@ private: UInt16 port, [[maybe_unused]] bool secure = false) const; + std::unique_ptr buildProtocolStackFromConfig( + const Poco::Util::AbstractConfiguration & config, + const std::string & protocol, + Poco::Net::HTTPServerParams::Ptr http_params, + AsynchronousMetrics & async_metrics, + bool & is_secure); + using CreateServerFunc = std::function; void createServer( Poco::Util::AbstractConfiguration & config, diff --git a/programs/server/config.xml b/programs/server/config.xml index 7f3a749b629..deebb434120 100644 --- a/programs/server/config.xml +++ b/programs/server/config.xml @@ -1336,17 +1336,13 @@ name - name for the rule (optional) regexp - RE2 compatible regular expression (mandatory) replace - substitution string for sensitive data (optional, by default - six asterisks) - --> hide encrypt/decrypt arguments ((?:aes_)?(?:encrypt|decrypt)(?:_mysql)?)\s*\(\s*(?:'(?:\\'|.)+'|.*?)\s*\) - \1(???) - + --> inst("abc"), inst(1), inst("de"), inst(2), inst("fg"), inst(1), inst(2) using Instructions = std::vector; - static const size_t max_captures = 10; + static constexpr int max_captures = 10; - - static Instructions createInstructions(const std::string & s, int num_captures) + static Instructions createInstructions(std::string_view replacement, int num_captures) { Instructions instructions; - String now; - for (size_t i = 0; i < s.size(); ++i) + String literals; + for (size_t i = 0; i < replacement.size(); ++i) { - if (s[i] == '\\' && i + 1 < s.size()) + if (replacement[i] == '\\' && i + 1 < replacement.size()) { - if (isNumericASCII(s[i + 1])) /// Substitution + if (isNumericASCII(replacement[i + 1])) /// Substitution { - if (!now.empty()) + if (!literals.empty()) { - instructions.emplace_back(now); - now = ""; + instructions.emplace_back(literals); + literals = ""; } - instructions.emplace_back(s[i + 1] - '0'); + instructions.emplace_back(replacement[i + 1] - '0'); } else - now += s[i + 1]; /// Escaping + literals += replacement[i + 1]; /// Escaping ++i; } else - now += s[i]; /// Plain character + literals += replacement[i]; /// Plain character } - if (!now.empty()) - { - instructions.emplace_back(now); - now = ""; - } + if (!literals.empty()) + instructions.emplace_back(literals); - for (const auto & it : instructions) - if (it.substitution_num >= num_captures) - throw Exception(ErrorCodes::BAD_ARGUMENTS, - "Invalid replace instruction in replacement string. Id: {}, but regexp has only {} subpatterns", - it.substitution_num, num_captures - 1); + for (const auto & instr : instructions) + if (instr.substitution_num >= num_captures) + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "Id {} in replacement string is an invalid substitution, regexp has only {} capturing groups", + instr.substitution_num, num_captures - 1); return instructions; } - static void processString( - const re2_st::StringPiece & input, + const char * haystack_data, + size_t haystack_length, ColumnString::Chars & res_data, ColumnString::Offset & res_offset, - re2_st::RE2 & searcher, + const re2_st::RE2 & searcher, int num_captures, const Instructions & instructions) { + re2_st::StringPiece haystack(haystack_data, haystack_length); re2_st::StringPiece matches[max_captures]; size_t copy_pos = 0; size_t match_pos = 0; - while (match_pos < static_cast(input.length())) + while (match_pos < haystack_length) { /// If no more replacements possible for current string bool can_finish_current_string = false; - if (searcher.Match(input, match_pos, input.length(), re2_st::RE2::Anchor::UNANCHORED, matches, num_captures)) + if (searcher.Match(haystack, match_pos, haystack_length, re2_st::RE2::Anchor::UNANCHORED, matches, num_captures)) { - const auto & match = matches[0]; - size_t bytes_to_copy = (match.data() - input.data()) - copy_pos; + const auto & match = matches[0]; /// Complete match (\0) + size_t bytes_to_copy = (match.data() - haystack.data()) - copy_pos; - /// Copy prefix before matched regexp without modification + /// Copy prefix before current match without modification res_data.resize(res_data.size() + bytes_to_copy); - memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], input.data() + copy_pos, bytes_to_copy); + memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], haystack.data() + copy_pos, bytes_to_copy); res_offset += bytes_to_copy; copy_pos += bytes_to_copy + match.length(); match_pos = copy_pos; - /// Do substitution instructions - for (const auto & it : instructions) + /// Substitute inside current match using instructions + for (const auto & instr : instructions) { - if (it.substitution_num >= 0) - { - const auto & substitution = matches[it.substitution_num]; - - res_data.resize(res_data.size() + substitution.length()); - memcpy(&res_data[res_offset], substitution.data(), substitution.length()); - res_offset += substitution.length(); - } + std::string_view replacement; + if (instr.substitution_num >= 0) + replacement = std::string_view(matches[instr.substitution_num].data(), matches[instr.substitution_num].size()); else - { - const auto & literal = it.literal; - - res_data.resize(res_data.size() + literal.size()); - memcpy(&res_data[res_offset], literal.data(), literal.size()); - res_offset += literal.size(); - } + replacement = instr.literal; + res_data.resize(res_data.size() + replacement.size()); + memcpy(&res_data[res_offset], replacement.data(), replacement.size()); + res_offset += replacement.size(); } - if (replace_one) + if constexpr (replace == ReplaceRegexpTraits::Replace::First) can_finish_current_string = true; - if (match.length() == 0) + if (match.empty()) { /// Step one character to avoid infinite loop ++match_pos; - if (match_pos >= static_cast(input.length())) + if (match_pos >= haystack_length) can_finish_current_string = true; } } @@ -151,10 +149,10 @@ struct ReplaceRegexpImpl /// If ready, append suffix after match to end of string. if (can_finish_current_string) { - res_data.resize(res_data.size() + input.length() - copy_pos); - memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], input.data() + copy_pos, input.length() - copy_pos); - res_offset += input.length() - copy_pos; - copy_pos = input.length(); + res_data.resize(res_data.size() + haystack_length - copy_pos); + memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], haystack.data() + copy_pos, haystack_length - copy_pos); + res_offset += haystack_length - copy_pos; + copy_pos = haystack_length; match_pos = copy_pos; } } @@ -164,12 +162,11 @@ struct ReplaceRegexpImpl ++res_offset; } - static void vector( const ColumnString::Chars & data, const ColumnString::Offsets & offsets, - const std::string & needle, - const std::string & replacement, + const String & needle, + const String & replacement, ColumnString::Chars & res_data, ColumnString::Offsets & res_offsets) { @@ -178,21 +175,30 @@ struct ReplaceRegexpImpl size_t size = offsets.size(); res_offsets.resize(size); - typename re2_st::RE2::Options regexp_options; - /// Never write error messages to stderr. It's ignorant to do it from library code. + re2_st::RE2::Options regexp_options; + /// Don't write error messages to stderr. regexp_options.set_log_errors(false); + re2_st::RE2 searcher(needle, regexp_options); - int num_captures = std::min(searcher.NumberOfCapturingGroups() + 1, static_cast(max_captures)); + + if (!searcher.ok()) + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "The pattern argument is not a valid re2 pattern: {}", + searcher.error()); + + int num_captures = std::min(searcher.NumberOfCapturingGroups() + 1, max_captures); Instructions instructions = createInstructions(replacement, num_captures); /// Cannot perform search for whole columns. Will process each string separately. for (size_t i = 0; i < size; ++i) { - int from = i > 0 ? offsets[i - 1] : 0; - re2_st::StringPiece input(reinterpret_cast(data.data() + from), offsets[i] - from - 1); + size_t from = i > 0 ? offsets[i - 1] : 0; + const char * haystack_data = reinterpret_cast(data.data() + from); + const size_t haystack_length = static_cast(offsets[i] - from - 1); - processString(input, res_data, res_offset, searcher, num_captures, instructions); + processString(haystack_data, haystack_length, res_data, res_offset, searcher, num_captures, instructions); res_offsets[i] = res_offset; } } @@ -200,8 +206,8 @@ struct ReplaceRegexpImpl static void vectorFixed( const ColumnString::Chars & data, size_t n, - const std::string & needle, - const std::string & replacement, + const String & needle, + const String & replacement, ColumnString::Chars & res_data, ColumnString::Offsets & res_offsets) { @@ -210,20 +216,29 @@ struct ReplaceRegexpImpl res_data.reserve(data.size()); res_offsets.resize(size); - typename re2_st::RE2::Options regexp_options; - /// Never write error messages to stderr. It's ignorant to do it from library code. + re2_st::RE2::Options regexp_options; + /// Don't write error messages to stderr. regexp_options.set_log_errors(false); + re2_st::RE2 searcher(needle, regexp_options); - int num_captures = std::min(searcher.NumberOfCapturingGroups() + 1, static_cast(max_captures)); + + if (!searcher.ok()) + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "The pattern argument is not a valid re2 pattern: {}", + searcher.error()); + + int num_captures = std::min(searcher.NumberOfCapturingGroups() + 1, max_captures); Instructions instructions = createInstructions(replacement, num_captures); for (size_t i = 0; i < size; ++i) { - int from = i * n; - re2_st::StringPiece input(reinterpret_cast(data.data() + from), n); + size_t from = i * n; + const char * haystack_data = reinterpret_cast(data.data() + from); + const size_t haystack_length = n; - processString(input, res_data, res_offset, searcher, num_captures, instructions); + processString(haystack_data, haystack_length, res_data, res_offset, searcher, num_captures, instructions); res_offsets[i] = res_offset; } } diff --git a/src/Functions/ReplaceStringImpl.h b/src/Functions/ReplaceStringImpl.h index ab0e53d3c45..1a9ec49c58c 100644 --- a/src/Functions/ReplaceStringImpl.h +++ b/src/Functions/ReplaceStringImpl.h @@ -8,9 +8,17 @@ namespace DB { +struct ReplaceStringTraits +{ + enum class Replace + { + First, + All + }; +}; /** Replace one or all occurencies of substring 'needle' to 'replacement'. 'needle' and 'replacement' are constants. */ -template +template struct ReplaceStringImpl { static void vector( @@ -66,7 +74,7 @@ struct ReplaceStringImpl memcpy(&res_data[res_offset], replacement.data(), replacement.size()); res_offset += replacement.size(); pos = match + needle.size(); - if (replace_one) + if constexpr (replace == ReplaceStringTraits::Replace::First) can_finish_current_string = true; } else @@ -155,7 +163,7 @@ struct ReplaceStringImpl memcpy(&res_data[res_offset], replacement.data(), replacement.size()); res_offset += replacement.size(); pos = match + needle.size(); - if (replace_one || pos == begin + n * (i + 1)) + if (replace == ReplaceStringTraits::Replace::First || pos == begin + n * (i + 1)) can_finish_current_string = true; } else diff --git a/src/Functions/URL/CMakeLists.txt b/src/Functions/URL/CMakeLists.txt index 6328476543d..0e148e87604 100644 --- a/src/Functions/URL/CMakeLists.txt +++ b/src/Functions/URL/CMakeLists.txt @@ -2,6 +2,7 @@ include("${ClickHouse_SOURCE_DIR}/cmake/dbms_glob_sources.cmake") add_headers_and_sources(clickhouse_functions_url .) add_library(clickhouse_functions_url OBJECT ${clickhouse_functions_url_sources} ${clickhouse_functions_url_headers}) target_link_libraries(clickhouse_functions_url PRIVATE dbms) +set_source_files_properties(tldLookup.generated.cpp PROPERTIES COMPILE_FLAGS -Wno-shorten-64-to-32) if (OMIT_HEAVY_DEBUG_SYMBOLS) target_compile_options(clickhouse_functions_url PRIVATE "-g0") diff --git a/src/Functions/URL/ExtractFirstSignificantSubdomain.h b/src/Functions/URL/ExtractFirstSignificantSubdomain.h index 73137da474f..0d1b1cac8ef 100644 --- a/src/Functions/URL/ExtractFirstSignificantSubdomain.h +++ b/src/Functions/URL/ExtractFirstSignificantSubdomain.h @@ -16,7 +16,7 @@ struct FirstSignificantSubdomainDefaultLookup } }; -template +template struct ExtractFirstSignificantSubdomain { static size_t getReserveLengthForElement() { return 10; } @@ -35,7 +35,7 @@ struct ExtractFirstSignificantSubdomain Pos tmp; size_t domain_length; - ExtractDomain::execute(data, size, tmp, domain_length); + ExtractDomain::execute(data, size, tmp, domain_length); if (domain_length == 0) return; @@ -105,7 +105,7 @@ struct ExtractFirstSignificantSubdomain Pos tmp; size_t domain_length; - ExtractDomain::execute(data, size, tmp, domain_length); + ExtractDomain::execute(data, size, tmp, domain_length); if (domain_length == 0) return; diff --git a/src/Functions/URL/cutToFirstSignificantSubdomain.cpp b/src/Functions/URL/cutToFirstSignificantSubdomain.cpp index dddfbe4f4dd..7bf09d1eb00 100644 --- a/src/Functions/URL/cutToFirstSignificantSubdomain.cpp +++ b/src/Functions/URL/cutToFirstSignificantSubdomain.cpp @@ -6,7 +6,7 @@ namespace DB { -template +template struct CutToFirstSignificantSubdomain { static size_t getReserveLengthForElement() { return 15; } @@ -19,7 +19,7 @@ struct CutToFirstSignificantSubdomain Pos tmp_data; size_t tmp_length; Pos domain_end; - ExtractFirstSignificantSubdomain::execute(data, size, tmp_data, tmp_length, &domain_end); + ExtractFirstSignificantSubdomain::execute(data, size, tmp_data, tmp_length, &domain_end); if (tmp_length == 0) return; @@ -30,15 +30,47 @@ struct CutToFirstSignificantSubdomain }; struct NameCutToFirstSignificantSubdomain { static constexpr auto name = "cutToFirstSignificantSubdomain"; }; -using FunctionCutToFirstSignificantSubdomain = FunctionStringToString>, NameCutToFirstSignificantSubdomain>; +using FunctionCutToFirstSignificantSubdomain = FunctionStringToString>, NameCutToFirstSignificantSubdomain>; struct NameCutToFirstSignificantSubdomainWithWWW { static constexpr auto name = "cutToFirstSignificantSubdomainWithWWW"; }; -using FunctionCutToFirstSignificantSubdomainWithWWW = FunctionStringToString>, NameCutToFirstSignificantSubdomainWithWWW>; +using FunctionCutToFirstSignificantSubdomainWithWWW = FunctionStringToString>, NameCutToFirstSignificantSubdomainWithWWW>; + +struct NameCutToFirstSignificantSubdomainRFC { static constexpr auto name = "cutToFirstSignificantSubdomainRFC"; }; +using FunctionCutToFirstSignificantSubdomainRFC = FunctionStringToString>, NameCutToFirstSignificantSubdomainRFC>; + +struct NameCutToFirstSignificantSubdomainWithWWWRFC { static constexpr auto name = "cutToFirstSignificantSubdomainWithWWWRFC"; }; +using FunctionCutToFirstSignificantSubdomainWithWWWRFC = FunctionStringToString>, NameCutToFirstSignificantSubdomainWithWWWRFC>; REGISTER_FUNCTION(CutToFirstSignificantSubdomain) { - factory.registerFunction(); - factory.registerFunction(); + factory.registerFunction( + { + R"(Returns the part of the domain that includes top-level subdomains up to the "first significant subdomain" (see documentation of the `firstSignificantSubdomain`).)", + Documentation::Examples{ + {"cutToFirstSignificantSubdomain1", "SELECT cutToFirstSignificantSubdomain('https://news.clickhouse.com.tr/')"}, + {"cutToFirstSignificantSubdomain2", "SELECT cutToFirstSignificantSubdomain('www.tr')"}, + {"cutToFirstSignificantSubdomain3", "SELECT cutToFirstSignificantSubdomain('tr')"}, + }, + Documentation::Categories{"URL"} + }); + factory.registerFunction( + { + R"(Returns the part of the domain that includes top-level subdomains up to the "first significant subdomain", without stripping "www".)", + Documentation::Examples{}, + Documentation::Categories{"URL"} + }); + factory.registerFunction( + { + R"(Similar to `cutToFirstSignificantSubdomain` but follows stricter rules to be compatible with RFC 3986 and less performant.)", + Documentation::Examples{}, + Documentation::Categories{"URL"} + }); + factory.registerFunction( + { + R"(Similar to `cutToFirstSignificantSubdomainWithWWW` but follows stricter rules to be compatible with RFC 3986 and less performant.)", + Documentation::Examples{}, + Documentation::Categories{"URL"} + }); } } diff --git a/src/Functions/URL/cutToFirstSignificantSubdomainCustom.cpp b/src/Functions/URL/cutToFirstSignificantSubdomainCustom.cpp index a2e51200910..e81921d69ff 100644 --- a/src/Functions/URL/cutToFirstSignificantSubdomainCustom.cpp +++ b/src/Functions/URL/cutToFirstSignificantSubdomainCustom.cpp @@ -5,7 +5,7 @@ namespace DB { -template +template struct CutToFirstSignificantSubdomainCustom { static size_t getReserveLengthForElement() { return 15; } @@ -18,7 +18,7 @@ struct CutToFirstSignificantSubdomainCustom Pos tmp_data; size_t tmp_length; Pos domain_end; - ExtractFirstSignificantSubdomain::executeCustom(tld_lookup, data, size, tmp_data, tmp_length, &domain_end); + ExtractFirstSignificantSubdomain::executeCustom(tld_lookup, data, size, tmp_data, tmp_length, &domain_end); if (tmp_length == 0) return; @@ -29,15 +29,54 @@ struct CutToFirstSignificantSubdomainCustom }; struct NameCutToFirstSignificantSubdomainCustom { static constexpr auto name = "cutToFirstSignificantSubdomainCustom"; }; -using FunctionCutToFirstSignificantSubdomainCustom = FunctionCutToFirstSignificantSubdomainCustomImpl, NameCutToFirstSignificantSubdomainCustom>; +using FunctionCutToFirstSignificantSubdomainCustom = FunctionCutToFirstSignificantSubdomainCustomImpl, NameCutToFirstSignificantSubdomainCustom>; struct NameCutToFirstSignificantSubdomainCustomWithWWW { static constexpr auto name = "cutToFirstSignificantSubdomainCustomWithWWW"; }; -using FunctionCutToFirstSignificantSubdomainCustomWithWWW = FunctionCutToFirstSignificantSubdomainCustomImpl, NameCutToFirstSignificantSubdomainCustomWithWWW>; +using FunctionCutToFirstSignificantSubdomainCustomWithWWW = FunctionCutToFirstSignificantSubdomainCustomImpl, NameCutToFirstSignificantSubdomainCustomWithWWW>; + +struct NameCutToFirstSignificantSubdomainCustomRFC { static constexpr auto name = "cutToFirstSignificantSubdomainCustomRFC"; }; +using FunctionCutToFirstSignificantSubdomainCustomRFC = FunctionCutToFirstSignificantSubdomainCustomImpl, NameCutToFirstSignificantSubdomainCustomRFC>; + +struct NameCutToFirstSignificantSubdomainCustomWithWWWRFC { static constexpr auto name = "cutToFirstSignificantSubdomainCustomWithWWWRFC"; }; +using FunctionCutToFirstSignificantSubdomainCustomWithWWWRFC = FunctionCutToFirstSignificantSubdomainCustomImpl, NameCutToFirstSignificantSubdomainCustomWithWWWRFC>; REGISTER_FUNCTION(CutToFirstSignificantSubdomainCustom) { - factory.registerFunction(); - factory.registerFunction(); + factory.registerFunction( + { + R"( +Returns the part of the domain that includes top-level subdomains up to the first significant subdomain. Accepts custom TLD list name. + +Can be useful if you need fresh TLD list or you have custom. + )", + Documentation::Examples{ + {"cutToFirstSignificantSubdomainCustom", "SELECT cutToFirstSignificantSubdomainCustom('bar.foo.there-is-no-such-domain', 'public_suffix_list');"}, + }, + Documentation::Categories{"URL"} + }); + factory.registerFunction( + { + R"( +Returns the part of the domain that includes top-level subdomains up to the first significant subdomain without stripping `www`. +Accepts custom TLD list name from config. + +Can be useful if you need fresh TLD list or you have custom. + )", + Documentation::Examples{{"cutToFirstSignificantSubdomainCustomWithWWW", "SELECT cutToFirstSignificantSubdomainCustomWithWWW('www.foo', 'public_suffix_list')"}}, + Documentation::Categories{"URL"} + }); + factory.registerFunction( + { + R"(Similar to `cutToFirstSignificantSubdomainCustom` but follows stricter rules according to RFC 3986.)", + Documentation::Examples{}, + Documentation::Categories{"URL"} + }); + factory.registerFunction( + { + R"(Similar to `cutToFirstSignificantSubdomainCustomWithWWW` but follows stricter rules according to RFC 3986.)", + Documentation::Examples{}, + Documentation::Categories{"URL"} + }); } } diff --git a/src/Functions/URL/domain.cpp b/src/Functions/URL/domain.cpp index 1d781b37943..fce7cea4693 100644 --- a/src/Functions/URL/domain.cpp +++ b/src/Functions/URL/domain.cpp @@ -7,12 +7,31 @@ namespace DB { struct NameDomain { static constexpr auto name = "domain"; }; -using FunctionDomain = FunctionStringToString>, NameDomain>; +using FunctionDomain = FunctionStringToString>, NameDomain>; +struct NameDomainRFC { static constexpr auto name = "domainRFC"; }; +using FunctionDomainRFC = FunctionStringToString>, NameDomainRFC>; REGISTER_FUNCTION(Domain) { - factory.registerFunction(); + factory.registerFunction( + { + R"( +Extracts the hostname from a URL. + +The URL can be specified with or without a scheme. +If the argument can't be parsed as URL, the function returns an empty string. + )", + Documentation::Examples{{"domain", "SELECT domain('svn+ssh://some.svn-hosting.com:80/repo/trunk')"}}, + Documentation::Categories{"URL"} + }); + + factory.registerFunction( + { + R"(Similar to `domain` but follows stricter rules to be compatible with RFC 3986 and less performant.)", + Documentation::Examples{}, + Documentation::Categories{"URL"} + }); } } diff --git a/src/Functions/URL/domain.h b/src/Functions/URL/domain.h index 1245bb20182..64362edf2c3 100644 --- a/src/Functions/URL/domain.h +++ b/src/Functions/URL/domain.h @@ -20,6 +20,115 @@ inline std::string_view checkAndReturnHost(const Pos & pos, const Pos & dot_pos, return std::string_view(start_of_host, pos - start_of_host); } +/// Extracts host from given url (RPC). +/// +/// @return empty string view if the host is not valid (i.e. it does not have dot, or there no symbol after dot). +inline std::string_view getURLHostRFC(const char * data, size_t size) +{ + Pos pos = data; + Pos end = data + size; + + if (*pos == '/' && *(pos + 1) == '/') + { + pos += 2; + } + else + { + Pos scheme_end = data + std::min(size, 16UL); + for (++pos; pos < scheme_end; ++pos) + { + if (!isAlphaNumericASCII(*pos)) + { + switch (*pos) + { + case '.': + case '-': + case '+': + break; + case ' ': /// restricted symbols + case '\t': + case '<': + case '>': + case '%': + case '{': + case '}': + case '|': + case '\\': + case '^': + case '~': + case '[': + case ']': + case ';': + case '=': + case '&': + return std::string_view{}; + default: + goto exloop; + } + } + } +exloop: if ((scheme_end - pos) > 2 && *pos == ':' && *(pos + 1) == '/' && *(pos + 2) == '/') + pos += 3; + else + pos = data; + } + + Pos dot_pos = nullptr; + Pos colon_pos = nullptr; + bool has_at_symbol = false; + bool has_terminator_after_colon = false; + const auto * start_of_host = pos; + for (; pos < end; ++pos) + { + switch (*pos) + { + case '.': + if (has_at_symbol || colon_pos == nullptr) + dot_pos = pos; + break; + case ':': + if (has_at_symbol || colon_pos) goto done; + colon_pos = pos; + break; + case '/': /// end symbols + case '?': + case '#': + goto done; + case '@': /// myemail@gmail.com + if (has_terminator_after_colon) return std::string_view{}; + if (has_at_symbol) goto done; + has_at_symbol = true; + start_of_host = pos + 1; + break; + case ' ': /// restricted symbols in whole URL + case '\t': + case '<': + case '>': + case '%': + case '{': + case '}': + case '|': + case '\\': + case '^': + case '~': + case '[': + case ']': + case ';': + case '=': + case '&': + if (colon_pos == nullptr) + return std::string_view{}; + else + has_terminator_after_colon = true; + } + } + +done: + if (!has_at_symbol) + pos = colon_pos ? colon_pos : pos; + return checkAndReturnHost(pos, dot_pos, start_of_host); +} + /// Extracts host from given url. /// /// @return empty string view if the host is not valid (i.e. it does not have dot, or there no symbol after dot). @@ -113,14 +222,18 @@ exloop: if ((scheme_end - pos) > 2 && *pos == ':' && *(pos + 1) == '/' && *(pos return checkAndReturnHost(pos, dot_pos, start_of_host); } -template +template struct ExtractDomain { static size_t getReserveLengthForElement() { return 15; } static void execute(Pos data, size_t size, Pos & res_data, size_t & res_size) { - std::string_view host = getURLHost(data, size); + std::string_view host; + if constexpr (conform_rfc) + host = getURLHostRFC(data, size); + else + host = getURLHost(data, size); if (host.empty()) { diff --git a/src/Functions/URL/domainWithoutWWW.cpp b/src/Functions/URL/domainWithoutWWW.cpp index 53ff5bc919e..48401e5e6e5 100644 --- a/src/Functions/URL/domainWithoutWWW.cpp +++ b/src/Functions/URL/domainWithoutWWW.cpp @@ -6,12 +6,31 @@ namespace DB { struct NameDomainWithoutWWW { static constexpr auto name = "domainWithoutWWW"; }; -using FunctionDomainWithoutWWW = FunctionStringToString>, NameDomainWithoutWWW>; +using FunctionDomainWithoutWWW = FunctionStringToString>, NameDomainWithoutWWW>; + +struct NameDomainWithoutWWWRFC { static constexpr auto name = "domainWithoutWWWRFC"; }; +using FunctionDomainWithoutWWWRFC = FunctionStringToString>, NameDomainWithoutWWWRFC>; REGISTER_FUNCTION(DomainWithoutWWW) { - factory.registerFunction(); + factory.registerFunction( + { + R"( +Extracts the hostname from a URL, removing the leading "www." if present. + +The URL can be specified with or without a scheme. +If the argument can't be parsed as URL, the function returns an empty string. + )", + Documentation::Examples{{"domainWithoutWWW", "SELECT domainWithoutWWW('https://www.clickhouse.com')"}}, + Documentation::Categories{"URL"} + }); + factory.registerFunction( + { + R"(Similar to `domainWithoutWWW` but follows stricter rules to be compatible with RFC 3986 and less performant.)", + Documentation::Examples{}, + Documentation::Categories{"URL"} + }); } } diff --git a/src/Functions/URL/firstSignificantSubdomain.cpp b/src/Functions/URL/firstSignificantSubdomain.cpp index d3aeb90771f..62307ef816c 100644 --- a/src/Functions/URL/firstSignificantSubdomain.cpp +++ b/src/Functions/URL/firstSignificantSubdomain.cpp @@ -7,12 +7,35 @@ namespace DB { struct NameFirstSignificantSubdomain { static constexpr auto name = "firstSignificantSubdomain"; }; +using FunctionFirstSignificantSubdomain = FunctionStringToString>, NameFirstSignificantSubdomain>; -using FunctionFirstSignificantSubdomain = FunctionStringToString>, NameFirstSignificantSubdomain>; +struct NameFirstSignificantSubdomainRFC { static constexpr auto name = "firstSignificantSubdomainRFC"; }; +using FunctionFirstSignificantSubdomainRFC = FunctionStringToString>, NameFirstSignificantSubdomainRFC>; REGISTER_FUNCTION(FirstSignificantSubdomain) { - factory.registerFunction(); + factory.registerFunction( + { + R"( +Returns the "first significant subdomain". + +The first significant subdomain is a second-level domain if it is 'com', 'net', 'org', or 'co'. +Otherwise, it is a third-level domain. + +For example, firstSignificantSubdomain('https://news.clickhouse.com/') = 'clickhouse', firstSignificantSubdomain ('https://news.clickhouse.com.tr/') = 'clickhouse'. + +The list of "insignificant" second-level domains and other implementation details may change in the future. + )", + Documentation::Examples{{"firstSignificantSubdomain", "SELECT firstSignificantSubdomain('https://news.clickhouse.com/')"}}, + Documentation::Categories{"URL"} + }); + + factory.registerFunction( + { + R"(Returns the "first significant subdomain" according to RFC 1034.)", + Documentation::Examples{}, + Documentation::Categories{"URL"} + }); } } diff --git a/src/Functions/URL/firstSignificantSubdomainCustom.cpp b/src/Functions/URL/firstSignificantSubdomainCustom.cpp index f43b42d0309..c07aa2b3ac8 100644 --- a/src/Functions/URL/firstSignificantSubdomainCustom.cpp +++ b/src/Functions/URL/firstSignificantSubdomainCustom.cpp @@ -7,12 +7,15 @@ namespace DB { struct NameFirstSignificantSubdomainCustom { static constexpr auto name = "firstSignificantSubdomainCustom"; }; +using FunctionFirstSignificantSubdomainCustom = FunctionCutToFirstSignificantSubdomainCustomImpl, NameFirstSignificantSubdomainCustom>; -using FunctionFirstSignificantSubdomainCustom = FunctionCutToFirstSignificantSubdomainCustomImpl, NameFirstSignificantSubdomainCustom>; +struct NameFirstSignificantSubdomainCustomRFC { static constexpr auto name = "firstSignificantSubdomainCustomRFC"; }; +using FunctionFirstSignificantSubdomainCustomRFC = FunctionCutToFirstSignificantSubdomainCustomImpl, NameFirstSignificantSubdomainCustomRFC>; REGISTER_FUNCTION(FirstSignificantSubdomainCustom) { factory.registerFunction(); + factory.registerFunction(); } } diff --git a/src/Functions/URL/port.cpp b/src/Functions/URL/port.cpp index 85b060ca987..52fa4077c18 100644 --- a/src/Functions/URL/port.cpp +++ b/src/Functions/URL/port.cpp @@ -18,12 +18,9 @@ namespace ErrorCodes extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; } -struct FunctionPort : public IFunction +template +struct FunctionPortImpl : public IFunction { - static constexpr auto name = "port"; - static FunctionPtr create(ContextPtr) { return std::make_shared(); } - - String getName() const override { return name; } bool isVariadic() const override { return true; } size_t getNumberOfArguments() const override { return 0; } bool useDefaultImplementationForConstants() const override { return true; } @@ -94,7 +91,12 @@ private: const char * p = reinterpret_cast(buf.data()) + offset; const char * end = p + size; - std::string_view host = getURLHost(p, size); + std::string_view host; + if constexpr (conform_rfc) + host = getURLHostRFC(p, size); + else + host = getURLHost(p, size); + if (host.empty()) return default_port; if (host.size() == size) @@ -121,9 +123,34 @@ private: } }; +struct FunctionPort : public FunctionPortImpl +{ + static constexpr auto name = "port"; + String getName() const override { return name; } + static FunctionPtr create(ContextPtr) { return std::make_shared(); } +}; + +struct FunctionPortRFC : public FunctionPortImpl +{ + static constexpr auto name = "portRFC"; + String getName() const override { return name; } + static FunctionPtr create(ContextPtr) { return std::make_shared(); } +}; + REGISTER_FUNCTION(Port) { - factory.registerFunction(); + factory.registerFunction( + { + R"(Returns the port or `default_port` if there is no port in the URL (or in case of validation error).)", + Documentation::Examples{}, + Documentation::Categories{"URL"} + }); + factory.registerFunction( + { + R"(Similar to `port`, but conforms to RFC 3986.)", + Documentation::Examples{}, + Documentation::Categories{"URL"} + }); } } diff --git a/src/Functions/URL/topLevelDomain.cpp b/src/Functions/URL/topLevelDomain.cpp index 9937618cae9..ed9b40d4b73 100644 --- a/src/Functions/URL/topLevelDomain.cpp +++ b/src/Functions/URL/topLevelDomain.cpp @@ -5,13 +5,18 @@ namespace DB { +template struct ExtractTopLevelDomain { static size_t getReserveLengthForElement() { return 5; } static void execute(Pos data, size_t size, Pos & res_data, size_t & res_size) { - std::string_view host = getURLHost(data, size); + std::string_view host; + if constexpr (conform_rfc) + host = getURLHostRFC(data, size); + else + host = getURLHost(data, size); res_data = data; res_size = 0; @@ -41,11 +46,30 @@ struct ExtractTopLevelDomain }; struct NameTopLevelDomain { static constexpr auto name = "topLevelDomain"; }; -using FunctionTopLevelDomain = FunctionStringToString, NameTopLevelDomain>; +using FunctionTopLevelDomain = FunctionStringToString>, NameTopLevelDomain>; + +struct NameTopLevelDomainRFC { static constexpr auto name = "topLevelDomainRFC"; }; +using FunctionTopLevelDomainRFC = FunctionStringToString>, NameTopLevelDomainRFC>; REGISTER_FUNCTION(TopLevelDomain) { - factory.registerFunction(); + factory.registerFunction( + { + R"( +Extracts the the top-level domain from a URL. + +Returns an empty string if the argument cannot be parsed as a URL or does not contain a top-level domain. + )", + Documentation::Examples{{"topLevelDomain", "SELECT topLevelDomain('svn+ssh://www.some.svn-hosting.com:80/repo/trunk')"}}, + Documentation::Categories{"URL"} + }); + + factory.registerFunction( + { + R"(Similar to topLevelDomain, but conforms to RFC 3986.)", + Documentation::Examples{}, + Documentation::Categories{"URL"} + }); } } diff --git a/src/Interpreters/ExternalUserDefinedExecutableFunctionsLoader.cpp b/src/Functions/UserDefined/ExternalUserDefinedExecutableFunctionsLoader.cpp similarity index 98% rename from src/Interpreters/ExternalUserDefinedExecutableFunctionsLoader.cpp rename to src/Functions/UserDefined/ExternalUserDefinedExecutableFunctionsLoader.cpp index 8c7220a85da..d4ecbf66987 100644 --- a/src/Interpreters/ExternalUserDefinedExecutableFunctionsLoader.cpp +++ b/src/Functions/UserDefined/ExternalUserDefinedExecutableFunctionsLoader.cpp @@ -5,8 +5,8 @@ #include -#include -#include +#include +#include #include #include diff --git a/src/Interpreters/ExternalUserDefinedExecutableFunctionsLoader.h b/src/Functions/UserDefined/ExternalUserDefinedExecutableFunctionsLoader.h similarity index 94% rename from src/Interpreters/ExternalUserDefinedExecutableFunctionsLoader.h rename to src/Functions/UserDefined/ExternalUserDefinedExecutableFunctionsLoader.h index 4d4843e8677..1a62175eb0c 100644 --- a/src/Interpreters/ExternalUserDefinedExecutableFunctionsLoader.h +++ b/src/Functions/UserDefined/ExternalUserDefinedExecutableFunctionsLoader.h @@ -4,7 +4,7 @@ #include #include -#include +#include namespace DB { diff --git a/src/Functions/UserDefined/IUserDefinedSQLObjectsLoader.h b/src/Functions/UserDefined/IUserDefinedSQLObjectsLoader.h new file mode 100644 index 00000000000..4c7850951b5 --- /dev/null +++ b/src/Functions/UserDefined/IUserDefinedSQLObjectsLoader.h @@ -0,0 +1,47 @@ +#pragma once + +#include + + +namespace DB +{ +class IAST; +struct Settings; +enum class UserDefinedSQLObjectType; + +/// Interface for a loader of user-defined SQL objects. +/// Implementations: UserDefinedSQLLoaderFromDisk, UserDefinedSQLLoaderFromZooKeeper +class IUserDefinedSQLObjectsLoader +{ +public: + virtual ~IUserDefinedSQLObjectsLoader() = default; + + /// Whether this loader can replicate SQL objects to another node. + virtual bool isReplicated() const { return false; } + virtual String getReplicationID() const { return ""; } + + /// Loads all objects. Can be called once - if objects are already loaded the function does nothing. + virtual void loadObjects() = 0; + + /// Stops watching. + virtual void stopWatching() {} + + /// Immediately reloads all objects, throws an exception if failed. + virtual void reloadObjects() = 0; + + /// Immediately reloads a specified object only. + virtual void reloadObject(UserDefinedSQLObjectType object_type, const String & object_name) = 0; + + /// Stores an object (must be called only by UserDefinedSQLFunctionFactory::registerFunction). + virtual bool storeObject( + UserDefinedSQLObjectType object_type, + const String & object_name, + const IAST & create_object_query, + bool throw_if_exists, + bool replace_if_exists, + const Settings & settings) = 0; + + /// Removes an object (must be called only by UserDefinedSQLFunctionFactory::unregisterFunction). + virtual bool removeObject(UserDefinedSQLObjectType object_type, const String & object_name, bool throw_if_not_exists) = 0; +}; +} diff --git a/src/Interpreters/UserDefinedExecutableFunction.cpp b/src/Functions/UserDefined/UserDefinedExecutableFunction.cpp similarity index 100% rename from src/Interpreters/UserDefinedExecutableFunction.cpp rename to src/Functions/UserDefined/UserDefinedExecutableFunction.cpp diff --git a/src/Interpreters/UserDefinedExecutableFunction.h b/src/Functions/UserDefined/UserDefinedExecutableFunction.h similarity index 100% rename from src/Interpreters/UserDefinedExecutableFunction.h rename to src/Functions/UserDefined/UserDefinedExecutableFunction.h diff --git a/src/Interpreters/UserDefinedExecutableFunctionFactory.cpp b/src/Functions/UserDefined/UserDefinedExecutableFunctionFactory.cpp similarity index 99% rename from src/Interpreters/UserDefinedExecutableFunctionFactory.cpp rename to src/Functions/UserDefined/UserDefinedExecutableFunctionFactory.cpp index 18784609397..3f3cfc4c8e3 100644 --- a/src/Interpreters/UserDefinedExecutableFunctionFactory.cpp +++ b/src/Functions/UserDefined/UserDefinedExecutableFunctionFactory.cpp @@ -12,9 +12,9 @@ #include #include +#include #include #include -#include #include #include diff --git a/src/Interpreters/UserDefinedExecutableFunctionFactory.h b/src/Functions/UserDefined/UserDefinedExecutableFunctionFactory.h similarity index 100% rename from src/Interpreters/UserDefinedExecutableFunctionFactory.h rename to src/Functions/UserDefined/UserDefinedExecutableFunctionFactory.h diff --git a/src/Functions/UserDefined/UserDefinedSQLFunctionFactory.cpp b/src/Functions/UserDefined/UserDefinedSQLFunctionFactory.cpp new file mode 100644 index 00000000000..622854b3508 --- /dev/null +++ b/src/Functions/UserDefined/UserDefinedSQLFunctionFactory.cpp @@ -0,0 +1,301 @@ +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int FUNCTION_ALREADY_EXISTS; + extern const int UNKNOWN_FUNCTION; + extern const int CANNOT_DROP_FUNCTION; + extern const int CANNOT_CREATE_RECURSIVE_FUNCTION; + extern const int UNSUPPORTED_METHOD; +} + + +namespace +{ + void validateFunctionRecursiveness(const IAST & node, const String & function_to_create) + { + for (const auto & child : node.children) + { + auto function_name_opt = tryGetFunctionName(child); + if (function_name_opt && function_name_opt.value() == function_to_create) + throw Exception(ErrorCodes::CANNOT_CREATE_RECURSIVE_FUNCTION, "You cannot create recursive function"); + + validateFunctionRecursiveness(*child, function_to_create); + } + } + + void validateFunction(ASTPtr function, const String & name) + { + ASTFunction * lambda_function = function->as(); + + if (!lambda_function) + throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "Expected function, got: {}", function->formatForErrorMessage()); + + auto & lambda_function_expression_list = lambda_function->arguments->children; + + if (lambda_function_expression_list.size() != 2) + throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "Lambda must have arguments and body"); + + const ASTFunction * tuple_function_arguments = lambda_function_expression_list[0]->as(); + + if (!tuple_function_arguments || !tuple_function_arguments->arguments) + throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "Lambda must have valid arguments"); + + std::unordered_set arguments; + + for (const auto & argument : tuple_function_arguments->arguments->children) + { + const auto * argument_identifier = argument->as(); + + if (!argument_identifier) + throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "Lambda argument must be identifier"); + + const auto & argument_name = argument_identifier->name(); + auto [_, inserted] = arguments.insert(argument_name); + if (!inserted) + throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "Identifier {} already used as function parameter", argument_name); + } + + ASTPtr function_body = lambda_function_expression_list[1]; + if (!function_body) + throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "Lambda must have valid function body"); + + validateFunctionRecursiveness(*function_body, name); + } + + ASTPtr normalizeCreateFunctionQuery(const IAST & create_function_query) + { + auto ptr = create_function_query.clone(); + auto & res = typeid_cast(*ptr); + res.if_not_exists = false; + res.or_replace = false; + FunctionNameNormalizer().visit(res.function_core.get()); + return ptr; + } +} + + +UserDefinedSQLFunctionFactory & UserDefinedSQLFunctionFactory::instance() +{ + static UserDefinedSQLFunctionFactory result; + return result; +} + +void UserDefinedSQLFunctionFactory::checkCanBeRegistered(const ContextPtr & context, const String & function_name, const IAST & create_function_query) +{ + if (FunctionFactory::instance().hasNameOrAlias(function_name)) + throw Exception(ErrorCodes::FUNCTION_ALREADY_EXISTS, "The function '{}' already exists", function_name); + + if (AggregateFunctionFactory::instance().hasNameOrAlias(function_name)) + throw Exception(ErrorCodes::FUNCTION_ALREADY_EXISTS, "The aggregate function '{}' already exists", function_name); + + if (UserDefinedExecutableFunctionFactory::instance().has(function_name, context)) + throw Exception(ErrorCodes::FUNCTION_ALREADY_EXISTS, "User defined executable function '{}' already exists", function_name); + + validateFunction(assert_cast(create_function_query).function_core, function_name); +} + +void UserDefinedSQLFunctionFactory::checkCanBeUnregistered(const ContextPtr & context, const String & function_name) +{ + if (FunctionFactory::instance().hasNameOrAlias(function_name) || + AggregateFunctionFactory::instance().hasNameOrAlias(function_name)) + throw Exception(ErrorCodes::CANNOT_DROP_FUNCTION, "Cannot drop system function '{}'", function_name); + + if (UserDefinedExecutableFunctionFactory::instance().has(function_name, context)) + throw Exception(ErrorCodes::CANNOT_DROP_FUNCTION, "Cannot drop user defined executable function '{}'", function_name); +} + +bool UserDefinedSQLFunctionFactory::registerFunction(const ContextMutablePtr & context, const String & function_name, ASTPtr create_function_query, bool throw_if_exists, bool replace_if_exists) +{ + checkCanBeRegistered(context, function_name, *create_function_query); + create_function_query = normalizeCreateFunctionQuery(*create_function_query); + + std::lock_guard lock{mutex}; + auto it = function_name_to_create_query_map.find(function_name); + if (it != function_name_to_create_query_map.end()) + { + if (throw_if_exists) + throw Exception(ErrorCodes::FUNCTION_ALREADY_EXISTS, "User-defined function '{}' already exists", function_name); + else if (!replace_if_exists) + return false; + } + + try + { + auto & loader = context->getUserDefinedSQLObjectsLoader(); + bool stored = loader.storeObject(UserDefinedSQLObjectType::Function, function_name, *create_function_query, throw_if_exists, replace_if_exists, context->getSettingsRef()); + if (!stored) + return false; + } + catch (Exception & exception) + { + exception.addMessage(fmt::format("while storing user defined function {}", backQuote(function_name))); + throw; + } + + function_name_to_create_query_map[function_name] = create_function_query; + return true; +} + +bool UserDefinedSQLFunctionFactory::unregisterFunction(const ContextMutablePtr & context, const String & function_name, bool throw_if_not_exists) +{ + checkCanBeUnregistered(context, function_name); + + std::lock_guard lock(mutex); + auto it = function_name_to_create_query_map.find(function_name); + if (it == function_name_to_create_query_map.end()) + { + if (throw_if_not_exists) + throw Exception(ErrorCodes::UNKNOWN_FUNCTION, "User-defined function '{}' doesn't exist", function_name); + else + return false; + } + + try + { + auto & loader = context->getUserDefinedSQLObjectsLoader(); + bool removed = loader.removeObject(UserDefinedSQLObjectType::Function, function_name, throw_if_not_exists); + if (!removed) + return false; + } + catch (Exception & exception) + { + exception.addMessage(fmt::format("while removing user defined function {}", backQuote(function_name))); + throw; + } + + function_name_to_create_query_map.erase(function_name); + return true; +} + +ASTPtr UserDefinedSQLFunctionFactory::get(const String & function_name) const +{ + std::lock_guard lock(mutex); + + auto it = function_name_to_create_query_map.find(function_name); + if (it == function_name_to_create_query_map.end()) + throw Exception(ErrorCodes::UNKNOWN_FUNCTION, + "The function name '{}' is not registered", + function_name); + + return it->second; +} + +ASTPtr UserDefinedSQLFunctionFactory::tryGet(const std::string & function_name) const +{ + std::lock_guard lock(mutex); + + auto it = function_name_to_create_query_map.find(function_name); + if (it == function_name_to_create_query_map.end()) + return nullptr; + + return it->second; +} + +bool UserDefinedSQLFunctionFactory::has(const String & function_name) const +{ + return tryGet(function_name) != nullptr; +} + +std::vector UserDefinedSQLFunctionFactory::getAllRegisteredNames() const +{ + std::vector registered_names; + + std::lock_guard lock(mutex); + registered_names.reserve(function_name_to_create_query_map.size()); + + for (const auto & [name, _] : function_name_to_create_query_map) + registered_names.emplace_back(name); + + return registered_names; +} + +bool UserDefinedSQLFunctionFactory::empty() const +{ + std::lock_guard lock(mutex); + return function_name_to_create_query_map.empty(); +} + +void UserDefinedSQLFunctionFactory::backup(BackupEntriesCollector & backup_entries_collector, const String & data_path_in_backup) const +{ + backupUserDefinedSQLObjects(backup_entries_collector, data_path_in_backup, UserDefinedSQLObjectType::Function, getAllFunctions()); +} + +void UserDefinedSQLFunctionFactory::restore(RestorerFromBackup & restorer, const String & data_path_in_backup) +{ + auto restored_functions = restoreUserDefinedSQLObjects(restorer, data_path_in_backup, UserDefinedSQLObjectType::Function); + const auto & restore_settings = restorer.getRestoreSettings(); + bool throw_if_exists = (restore_settings.create_function == RestoreUDFCreationMode::kCreate); + bool replace_if_exists = (restore_settings.create_function == RestoreUDFCreationMode::kReplace); + auto context = restorer.getContext(); + for (const auto & [function_name, create_function_query] : restored_functions) + registerFunction(context, function_name, create_function_query, throw_if_exists, replace_if_exists); +} + +void UserDefinedSQLFunctionFactory::setAllFunctions(const std::vector> & new_functions) +{ + std::unordered_map normalized_functions; + for (const auto & [function_name, create_query] : new_functions) + normalized_functions[function_name] = normalizeCreateFunctionQuery(*create_query); + + std::lock_guard lock(mutex); + function_name_to_create_query_map = std::move(normalized_functions); +} + +std::vector> UserDefinedSQLFunctionFactory::getAllFunctions() const +{ + std::lock_guard lock{mutex}; + std::vector> all_functions; + all_functions.reserve(function_name_to_create_query_map.size()); + std::copy(function_name_to_create_query_map.begin(), function_name_to_create_query_map.end(), std::back_inserter(all_functions)); + return all_functions; +} + +void UserDefinedSQLFunctionFactory::setFunction(const String & function_name, const IAST & create_function_query) +{ + std::lock_guard lock(mutex); + function_name_to_create_query_map[function_name] = normalizeCreateFunctionQuery(create_function_query); +} + +void UserDefinedSQLFunctionFactory::removeFunction(const String & function_name) +{ + std::lock_guard lock(mutex); + function_name_to_create_query_map.erase(function_name); +} + +void UserDefinedSQLFunctionFactory::removeAllFunctionsExcept(const Strings & function_names_to_keep) +{ + boost::container::flat_set names_set_to_keep{function_names_to_keep.begin(), function_names_to_keep.end()}; + std::lock_guard lock(mutex); + for (auto it = function_name_to_create_query_map.begin(); it != function_name_to_create_query_map.end();) + { + auto current = it++; + if (!names_set_to_keep.contains(current->first)) + function_name_to_create_query_map.erase(current); + } +} + +std::unique_lock UserDefinedSQLFunctionFactory::getLock() const +{ + return std::unique_lock{mutex}; +} + +} diff --git a/src/Functions/UserDefined/UserDefinedSQLFunctionFactory.h b/src/Functions/UserDefined/UserDefinedSQLFunctionFactory.h new file mode 100644 index 00000000000..45196759d3b --- /dev/null +++ b/src/Functions/UserDefined/UserDefinedSQLFunctionFactory.h @@ -0,0 +1,70 @@ +#pragma once + +#include +#include + +#include + +#include +#include + + +namespace DB +{ +class BackupEntriesCollector; +class RestorerFromBackup; + +/// Factory for SQLUserDefinedFunctions +class UserDefinedSQLFunctionFactory : public IHints<1, UserDefinedSQLFunctionFactory> +{ +public: + static UserDefinedSQLFunctionFactory & instance(); + + /// Register function for function_name in factory for specified create_function_query. + bool registerFunction(const ContextMutablePtr & context, const String & function_name, ASTPtr create_function_query, bool throw_if_exists, bool replace_if_exists); + + /// Unregister function for function_name. + bool unregisterFunction(const ContextMutablePtr & context, const String & function_name, bool throw_if_not_exists); + + /// Get function create query for function_name. If no function registered with function_name throws exception. + ASTPtr get(const String & function_name) const; + + /// Get function create query for function_name. If no function registered with function_name return nullptr. + ASTPtr tryGet(const String & function_name) const; + + /// Check if function with function_name registered. + bool has(const String & function_name) const; + + /// Get all user defined functions registered names. + std::vector getAllRegisteredNames() const override; + + /// Check whether any UDFs have been registered + bool empty() const; + + /// Makes backup entries for all user-defined SQL functions. + void backup(BackupEntriesCollector & backup_entries_collector, const String & data_path_in_backup) const; + + /// Restores user-defined SQL functions from the backup. + void restore(RestorerFromBackup & restorer, const String & data_path_in_backup); + +private: + friend class UserDefinedSQLObjectsLoaderFromDisk; + friend class UserDefinedSQLObjectsLoaderFromZooKeeper; + + /// Checks that a specified function can be registered, throws an exception if not. + static void checkCanBeRegistered(const ContextPtr & context, const String & function_name, const IAST & create_function_query); + static void checkCanBeUnregistered(const ContextPtr & context, const String & function_name); + + /// The following functions must be called only by the loader. + void setAllFunctions(const std::vector> & new_functions); + std::vector> getAllFunctions() const; + void setFunction(const String & function_name, const IAST & create_function_query); + void removeFunction(const String & function_name); + void removeAllFunctionsExcept(const Strings & function_names_to_keep); + std::unique_lock getLock() const; + + std::unordered_map function_name_to_create_query_map; + mutable std::recursive_mutex mutex; +}; + +} diff --git a/src/Interpreters/UserDefinedSQLFunctionVisitor.cpp b/src/Functions/UserDefined/UserDefinedSQLFunctionVisitor.cpp similarity index 98% rename from src/Interpreters/UserDefinedSQLFunctionVisitor.cpp rename to src/Functions/UserDefined/UserDefinedSQLFunctionVisitor.cpp index 1adb3d5819a..9bb0abc6369 100644 --- a/src/Interpreters/UserDefinedSQLFunctionVisitor.cpp +++ b/src/Functions/UserDefined/UserDefinedSQLFunctionVisitor.cpp @@ -8,7 +8,7 @@ #include #include #include -#include +#include namespace DB diff --git a/src/Interpreters/UserDefinedSQLFunctionVisitor.h b/src/Functions/UserDefined/UserDefinedSQLFunctionVisitor.h similarity index 100% rename from src/Interpreters/UserDefinedSQLFunctionVisitor.h rename to src/Functions/UserDefined/UserDefinedSQLFunctionVisitor.h diff --git a/src/Functions/UserDefined/UserDefinedSQLObjectType.h b/src/Functions/UserDefined/UserDefinedSQLObjectType.h new file mode 100644 index 00000000000..f7e6fff5cad --- /dev/null +++ b/src/Functions/UserDefined/UserDefinedSQLObjectType.h @@ -0,0 +1,12 @@ +#pragma once + + +namespace DB +{ + +enum class UserDefinedSQLObjectType +{ + Function +}; + +} diff --git a/src/Functions/UserDefined/UserDefinedSQLObjectsBackup.cpp b/src/Functions/UserDefined/UserDefinedSQLObjectsBackup.cpp new file mode 100644 index 00000000000..6962c21280d --- /dev/null +++ b/src/Functions/UserDefined/UserDefinedSQLObjectsBackup.cpp @@ -0,0 +1,103 @@ +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int CANNOT_RESTORE_TABLE; +} + +void backupUserDefinedSQLObjects( + BackupEntriesCollector & backup_entries_collector, + const String & data_path_in_backup, + UserDefinedSQLObjectType /* object_type */, + const std::vector> & objects) +{ + std::vector> backup_entries; + backup_entries.reserve(objects.size()); + for (const auto & [function_name, create_function_query] : objects) + backup_entries.emplace_back( + escapeForFileName(function_name) + ".sql", std::make_shared(queryToString(create_function_query))); + + fs::path data_path_in_backup_fs{data_path_in_backup}; + for (const auto & entry : backup_entries) + backup_entries_collector.addBackupEntry(data_path_in_backup_fs / entry.first, entry.second); +} + + +std::vector> +restoreUserDefinedSQLObjects(RestorerFromBackup & restorer, const String & data_path_in_backup, UserDefinedSQLObjectType object_type) +{ + auto context = restorer.getContext(); + auto backup = restorer.getBackup(); + fs::path data_path_in_backup_fs{data_path_in_backup}; + + Strings filenames = backup->listFiles(data_path_in_backup); + if (filenames.empty()) + return {}; /// Nothing to restore. + + for (const auto & filename : filenames) + { + if (!filename.ends_with(".sql")) + { + throw Exception( + ErrorCodes::CANNOT_RESTORE_TABLE, + "Cannot restore user-defined SQL objects: File name {} doesn't have the extension .sql", + String{data_path_in_backup_fs / filename}); + } + } + + std::vector> res; + + for (const auto & filename : filenames) + { + String escaped_function_name = filename.substr(0, filename.length() - strlen(".sql")); + String function_name = unescapeForFileName(escaped_function_name); + + String filepath = data_path_in_backup_fs / filename; + auto backup_entry = backup->readFile(filepath); + auto in = backup_entry->getReadBuffer(); + String statement_def; + readStringUntilEOF(statement_def, *in); + + ASTPtr ast; + + switch (object_type) + { + case UserDefinedSQLObjectType::Function: + { + ParserCreateFunctionQuery parser; + ast = parseQuery( + parser, + statement_def.data(), + statement_def.data() + statement_def.size(), + "in file " + filepath + " from backup " + backup->getNameForLogging(), + 0, + context->getSettingsRef().max_parser_depth); + break; + } + } + + res.emplace_back(std::move(function_name), ast); + } + + return res; +} + +} diff --git a/src/Functions/UserDefined/UserDefinedSQLObjectsBackup.h b/src/Functions/UserDefined/UserDefinedSQLObjectsBackup.h new file mode 100644 index 00000000000..a1e970d8af5 --- /dev/null +++ b/src/Functions/UserDefined/UserDefinedSQLObjectsBackup.h @@ -0,0 +1,25 @@ +#pragma once + +#include +#include + + +namespace DB +{ +class BackupEntriesCollector; +class RestorerFromBackup; +enum class UserDefinedSQLObjectType; +class IBackupEntry; +using BackupEntryPtr = std::shared_ptr; + +/// Makes backup entries to backup user-defined SQL objects. +void backupUserDefinedSQLObjects( + BackupEntriesCollector & backup_entries_collector, + const String & data_path_in_backup, + UserDefinedSQLObjectType object_type, + const std::vector> & objects); + +/// Restores user-defined SQL objects from the backup. +std::vector> +restoreUserDefinedSQLObjects(RestorerFromBackup & restorer, const String & data_path_in_backup, UserDefinedSQLObjectType object_type); +} diff --git a/src/Functions/UserDefined/UserDefinedSQLObjectsLoaderFromDisk.cpp b/src/Functions/UserDefined/UserDefinedSQLObjectsLoaderFromDisk.cpp new file mode 100644 index 00000000000..93466be54fb --- /dev/null +++ b/src/Functions/UserDefined/UserDefinedSQLObjectsLoaderFromDisk.cpp @@ -0,0 +1,265 @@ +#include "Functions/UserDefined/UserDefinedSQLObjectsLoaderFromDisk.h" + +#include "Functions/UserDefined/UserDefinedSQLFunctionFactory.h" +#include "Functions/UserDefined/UserDefinedSQLObjectType.h" + +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include + +#include +#include +#include + +#include +#include + +#include + +namespace fs = std::filesystem; + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int DIRECTORY_DOESNT_EXIST; + extern const int FUNCTION_ALREADY_EXISTS; + extern const int UNKNOWN_FUNCTION; +} + + +namespace +{ + /// Converts a path to an absolute path and append it with a separator. + String makeDirectoryPathCanonical(const String & directory_path) + { + auto canonical_directory_path = std::filesystem::weakly_canonical(directory_path); + if (canonical_directory_path.has_filename()) + canonical_directory_path += std::filesystem::path::preferred_separator; + return canonical_directory_path; + } +} + +UserDefinedSQLObjectsLoaderFromDisk::UserDefinedSQLObjectsLoaderFromDisk(const ContextPtr & global_context_, const String & dir_path_) + : global_context(global_context_) + , dir_path{makeDirectoryPathCanonical(dir_path_)} + , log{&Poco::Logger::get("UserDefinedSQLObjectsLoaderFromDisk")} +{ + createDirectory(); +} + + +ASTPtr UserDefinedSQLObjectsLoaderFromDisk::tryLoadObject(UserDefinedSQLObjectType object_type, const String & object_name) +{ + return tryLoadObject(object_type, object_name, getFilePath(object_type, object_name), /* check_file_exists= */ true); +} + + +ASTPtr UserDefinedSQLObjectsLoaderFromDisk::tryLoadObject(UserDefinedSQLObjectType object_type, const String & object_name, const String & path, bool check_file_exists) +{ + LOG_DEBUG(log, "Loading user defined object {} from file {}", backQuote(object_name), path); + + try + { + if (check_file_exists && !fs::exists(path)) + return nullptr; + + /// There is .sql file with user defined object creation statement. + ReadBufferFromFile in(path); + + String object_create_query; + readStringUntilEOF(object_create_query, in); + + switch (object_type) + { + case UserDefinedSQLObjectType::Function: + { + ParserCreateFunctionQuery parser; + ASTPtr ast = parseQuery( + parser, + object_create_query.data(), + object_create_query.data() + object_create_query.size(), + "", + 0, + global_context->getSettingsRef().max_parser_depth); + UserDefinedSQLFunctionFactory::checkCanBeRegistered(global_context, object_name, *ast); + return ast; + } + } + } + catch (...) + { + tryLogCurrentException(log, fmt::format("while loading user defined SQL object {} from path {}", backQuote(object_name), path)); + return nullptr; /// Failed to load this sql object, will ignore it + } +} + + +void UserDefinedSQLObjectsLoaderFromDisk::loadObjects() +{ + if (!objects_loaded) + loadObjectsImpl(); +} + + +void UserDefinedSQLObjectsLoaderFromDisk::reloadObjects() +{ + loadObjectsImpl(); +} + + +void UserDefinedSQLObjectsLoaderFromDisk::loadObjectsImpl() +{ + LOG_INFO(log, "Loading user defined objects from {}", dir_path); + createDirectory(); + + std::vector> function_names_and_queries; + + Poco::DirectoryIterator dir_end; + for (Poco::DirectoryIterator it(dir_path); it != dir_end; ++it) + { + if (it->isDirectory()) + continue; + + const String & file_name = it.name(); + if (!startsWith(file_name, "function_") || !endsWith(file_name, ".sql")) + continue; + + size_t prefix_length = strlen("function_"); + size_t suffix_length = strlen(".sql"); + String function_name = unescapeForFileName(file_name.substr(prefix_length, file_name.length() - prefix_length - suffix_length)); + + if (function_name.empty()) + continue; + + ASTPtr ast = tryLoadObject(UserDefinedSQLObjectType::Function, function_name, dir_path + it.name(), /* check_file_exists= */ false); + if (ast) + function_names_and_queries.emplace_back(function_name, ast); + } + + UserDefinedSQLFunctionFactory::instance().setAllFunctions(function_names_and_queries); + objects_loaded = true; + + LOG_DEBUG(log, "User defined objects loaded"); +} + + +void UserDefinedSQLObjectsLoaderFromDisk::reloadObject(UserDefinedSQLObjectType object_type, const String & object_name) +{ + createDirectory(); + auto ast = tryLoadObject(object_type, object_name); + auto & factory = UserDefinedSQLFunctionFactory::instance(); + if (ast) + factory.setFunction(object_name, *ast); + else + factory.removeFunction(object_name); +} + + +void UserDefinedSQLObjectsLoaderFromDisk::createDirectory() +{ + std::error_code create_dir_error_code; + fs::create_directories(dir_path, create_dir_error_code); + if (!fs::exists(dir_path) || !fs::is_directory(dir_path) || create_dir_error_code) + throw Exception("Couldn't create directory " + dir_path + " reason: '" + create_dir_error_code.message() + "'", ErrorCodes::DIRECTORY_DOESNT_EXIST); +} + + +bool UserDefinedSQLObjectsLoaderFromDisk::storeObject( + UserDefinedSQLObjectType object_type, + const String & object_name, + const IAST & create_object_query, + bool throw_if_exists, + bool replace_if_exists, + const Settings & settings) +{ + String file_path = getFilePath(object_type, object_name); + LOG_DEBUG(log, "Storing user-defined object {} to file {}", backQuote(object_name), file_path); + + if (fs::exists(file_path)) + { + if (throw_if_exists) + throw Exception(ErrorCodes::FUNCTION_ALREADY_EXISTS, "User-defined function '{}' already exists", object_name); + else if (!replace_if_exists) + return false; + } + + WriteBufferFromOwnString create_statement_buf; + formatAST(create_object_query, create_statement_buf, false); + writeChar('\n', create_statement_buf); + String create_statement = create_statement_buf.str(); + + String temp_file_path = file_path + ".tmp"; + + try + { + WriteBufferFromFile out(temp_file_path, create_statement.size()); + writeString(create_statement, out); + out.next(); + if (settings.fsync_metadata) + out.sync(); + out.close(); + + if (replace_if_exists) + fs::rename(temp_file_path, file_path); + else + renameNoReplace(temp_file_path, file_path); + } + catch (...) + { + fs::remove(temp_file_path); + throw; + } + + LOG_TRACE(log, "Object {} stored", backQuote(object_name)); + return true; +} + + +bool UserDefinedSQLObjectsLoaderFromDisk::removeObject( + UserDefinedSQLObjectType object_type, const String & object_name, bool throw_if_not_exists) +{ + String file_path = getFilePath(object_type, object_name); + LOG_DEBUG(log, "Removing user defined object {} stored in file {}", backQuote(object_name), file_path); + + bool existed = fs::remove(file_path); + + if (!existed) + { + if (throw_if_not_exists) + throw Exception(ErrorCodes::UNKNOWN_FUNCTION, "User-defined function '{}' doesn't exist", object_name); + else + return false; + } + + LOG_TRACE(log, "Object {} removed", backQuote(object_name)); + return true; +} + + +String UserDefinedSQLObjectsLoaderFromDisk::getFilePath(UserDefinedSQLObjectType object_type, const String & object_name) const +{ + String file_path; + switch (object_type) + { + case UserDefinedSQLObjectType::Function: + { + file_path = dir_path + "function_" + escapeForFileName(object_name) + ".sql"; + break; + } + } + return file_path; +} + +} diff --git a/src/Functions/UserDefined/UserDefinedSQLObjectsLoaderFromDisk.h b/src/Functions/UserDefined/UserDefinedSQLObjectsLoaderFromDisk.h new file mode 100644 index 00000000000..7b0bb291f42 --- /dev/null +++ b/src/Functions/UserDefined/UserDefinedSQLObjectsLoaderFromDisk.h @@ -0,0 +1,46 @@ +#pragma once + +#include +#include +#include + + +namespace DB +{ + +/// Loads user-defined sql objects from a specified folder. +class UserDefinedSQLObjectsLoaderFromDisk : public IUserDefinedSQLObjectsLoader +{ +public: + UserDefinedSQLObjectsLoaderFromDisk(const ContextPtr & global_context_, const String & dir_path_); + + void loadObjects() override; + + void reloadObjects() override; + + void reloadObject(UserDefinedSQLObjectType object_type, const String & object_name) override; + + bool storeObject( + UserDefinedSQLObjectType object_type, + const String & object_name, + const IAST & create_object_query, + bool throw_if_exists, + bool replace_if_exists, + const Settings & settings) override; + + bool removeObject(UserDefinedSQLObjectType object_type, const String & object_name, bool throw_if_not_exists) override; + +private: + void createDirectory(); + void loadObjectsImpl(); + ASTPtr tryLoadObject(UserDefinedSQLObjectType object_type, const String & object_name); + ASTPtr tryLoadObject(UserDefinedSQLObjectType object_type, const String & object_name, const String & file_path, bool check_file_exists); + String getFilePath(UserDefinedSQLObjectType object_type, const String & object_name) const; + + ContextPtr global_context; + String dir_path; + Poco::Logger * log; + std::atomic objects_loaded = false; +}; + +} diff --git a/src/Functions/UserDefined/createUserDefinedSQLObjectsLoader.cpp b/src/Functions/UserDefined/createUserDefinedSQLObjectsLoader.cpp new file mode 100644 index 00000000000..9d0137328d1 --- /dev/null +++ b/src/Functions/UserDefined/createUserDefinedSQLObjectsLoader.cpp @@ -0,0 +1,21 @@ +#include +#include +#include +#include +#include + +namespace fs = std::filesystem; + + +namespace DB +{ + +std::unique_ptr createUserDefinedSQLObjectsLoader(const ContextMutablePtr & global_context) +{ + const auto & config = global_context->getConfigRef(); + String default_path = fs::path{global_context->getPath()} / "user_defined/"; + String path = config.getString("user_defined_path", default_path); + return std::make_unique(global_context, path); +} + +} diff --git a/src/Functions/UserDefined/createUserDefinedSQLObjectsLoader.h b/src/Functions/UserDefined/createUserDefinedSQLObjectsLoader.h new file mode 100644 index 00000000000..b3a4623dba3 --- /dev/null +++ b/src/Functions/UserDefined/createUserDefinedSQLObjectsLoader.h @@ -0,0 +1,12 @@ +#pragma once + +#include + + +namespace DB +{ +class IUserDefinedSQLObjectsLoader; + +std::unique_ptr createUserDefinedSQLObjectsLoader(const ContextMutablePtr & global_context); + +} diff --git a/src/Functions/array/FunctionArrayMapped.h b/src/Functions/array/FunctionArrayMapped.h index 6d500cc15c4..dfed7cedcf0 100644 --- a/src/Functions/array/FunctionArrayMapped.h +++ b/src/Functions/array/FunctionArrayMapped.h @@ -185,8 +185,10 @@ public: const auto * data_type_function = checkAndGetDataType(arguments[0].type.get()); if (!data_type_function) - throw Exception("First argument for function " + getName() + " must be a function", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "First argument for function {} must be a function. Actual {}", + getName(), + arguments[0].type->getName()); /// The types of the remaining arguments are already checked in getLambdaArgumentTypes. diff --git a/src/Functions/array/arrayAggregation.cpp b/src/Functions/array/arrayAggregation.cpp index 7b72060f0c0..c8eae78dfaa 100644 --- a/src/Functions/array/arrayAggregation.cpp +++ b/src/Functions/array/arrayAggregation.cpp @@ -223,7 +223,7 @@ struct ArrayAggregateImpl if (unlikely(result_scale > DecimalUtils::max_precision)) throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Scale {} is out of bounds", result_scale); - res[i] = DecimalUtils::convertTo(product, result_scale); + res[i] = DecimalUtils::convertTo(product, static_cast(result_scale)); } else { @@ -332,7 +332,7 @@ struct ArrayAggregateImpl if (unlikely(result_scale > DecimalUtils::max_precision)) throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Scale {} is out of bounds", result_scale); - res[i] = DecimalUtils::convertTo(aggregate_value, result_scale); + res[i] = DecimalUtils::convertTo(aggregate_value, static_cast(result_scale)); } else { diff --git a/src/Functions/array/arrayCount.cpp b/src/Functions/array/arrayCount.cpp index cb902206e8b..f7ded051e5e 100644 --- a/src/Functions/array/arrayCount.cpp +++ b/src/Functions/array/arrayCount.cpp @@ -49,7 +49,7 @@ struct ArrayCountImpl size_t pos = 0; for (size_t i = 0; i < offsets.size(); ++i) { - out_counts[i] = offsets[i] - pos; + out_counts[i] = static_cast(offsets[i] - pos); pos = offsets[i]; } @@ -73,7 +73,7 @@ struct ArrayCountImpl if (filter[pos]) ++count; } - out_counts[i] = count; + out_counts[i] = static_cast(count); } return out_column; diff --git a/src/Functions/array/arrayElement.cpp b/src/Functions/array/arrayElement.cpp index acd8f89ffe5..59224096d3c 100644 --- a/src/Functions/array/arrayElement.cpp +++ b/src/Functions/array/arrayElement.cpp @@ -1025,12 +1025,14 @@ ColumnPtr FunctionArrayElement::executeMap( if (col_const_map) values_array = ColumnConst::create(values_array, input_rows_count); + const auto & type_map = assert_cast(*arguments[0].type); + /// Prepare arguments to call arrayElement for array with values and calculated indices at previous step. ColumnsWithTypeAndName new_arguments = { { values_array, - std::make_shared(result_type), + std::make_shared(type_map.getValueType()), "" }, { @@ -1086,7 +1088,9 @@ ColumnPtr FunctionArrayElement::executeImpl(const ColumnsWithTypeAndName & argum col_array = checkAndGetColumn(arguments[0].column.get()); if (col_array) + { is_array_of_nullable = isColumnNullable(col_array->getData()); + } else { col_const_array = checkAndGetColumnConstData(arguments[0].column.get()); diff --git a/src/Functions/array/arrayEnumerate.cpp b/src/Functions/array/arrayEnumerate.cpp index b20f91fe2dd..666e01899bd 100644 --- a/src/Functions/array/arrayEnumerate.cpp +++ b/src/Functions/array/arrayEnumerate.cpp @@ -60,7 +60,7 @@ public: for (auto off : offsets) { for (ColumnArray::Offset j = prev_off; j < off; ++j) - res_values[j] = j - prev_off + 1; + res_values[j] = static_cast(j - prev_off + 1); prev_off = off; } diff --git a/src/Functions/array/arrayEnumerateRanked.cpp b/src/Functions/array/arrayEnumerateRanked.cpp index 7c4b755e020..d19781f97c3 100644 --- a/src/Functions/array/arrayEnumerateRanked.cpp +++ b/src/Functions/array/arrayEnumerateRanked.cpp @@ -38,7 +38,7 @@ ArraysDepths getArraysDepths(const ColumnsWithTypeAndName & arguments) if (depths.size() < array_num && prev_array_depth) depths.emplace_back(prev_array_depth); - prev_array_depth = type_array->getNumberOfDimensions(); + prev_array_depth = static_cast(type_array->getNumberOfDimensions()); ++array_num; } else @@ -55,7 +55,7 @@ ArraysDepths getArraysDepths(const ColumnsWithTypeAndName & arguments) if (i == 0) { - clear_depth = value; + clear_depth = static_cast(value); } else { diff --git a/src/Functions/array/arrayFirstLastIndex.cpp b/src/Functions/array/arrayFirstLastIndex.cpp index f7355eb2b38..effcb04ab48 100644 --- a/src/Functions/array/arrayFirstLastIndex.cpp +++ b/src/Functions/array/arrayFirstLastIndex.cpp @@ -61,7 +61,7 @@ struct ArrayFirstLastIndexImpl if constexpr (strategy == ArrayFirstLastIndexStrategy::First) out_index[offset_index] = 1; else - out_index[offset_index] = end_offset - start_offset; + out_index[offset_index] = static_cast(end_offset - start_offset); } else { @@ -113,7 +113,7 @@ struct ArrayFirstLastIndexImpl } } - out_index[offset_index] = result_index; + out_index[offset_index] = static_cast(result_index); } return out_column; diff --git a/src/Functions/array/arrayReduce.cpp b/src/Functions/array/arrayReduce.cpp index fd16f1fc986..c93e67d4b1c 100644 --- a/src/Functions/array/arrayReduce.cpp +++ b/src/Functions/array/arrayReduce.cpp @@ -152,13 +152,6 @@ ColumnPtr FunctionArrayReduce::executeImpl(const ColumnsWithTypeAndName & argume MutableColumnPtr result_holder = result_type->createColumn(); IColumn & res_col = *result_holder; - /// AggregateFunction's states should be inserted into column using specific way - auto * res_col_aggregate_function = typeid_cast(&res_col); - - if (!res_col_aggregate_function && agg_func.isState()) - throw Exception("State function " + agg_func.getName() + " inserts results into non-state column " - + result_type->getName(), ErrorCodes::ILLEGAL_COLUMN); - PODArray places(input_rows_count); for (size_t i = 0; i < input_rows_count; ++i) { @@ -190,10 +183,9 @@ ColumnPtr FunctionArrayReduce::executeImpl(const ColumnsWithTypeAndName & argume } for (size_t i = 0; i < input_rows_count; ++i) - if (!res_col_aggregate_function) - agg_func.insertResultInto(places[i], res_col, arena.get()); - else - res_col_aggregate_function->insertFrom(places[i]); + /// We should use insertMergeResultInto to insert result into ColumnAggregateFunction + /// correctly if result contains AggregateFunction's states + agg_func.insertMergeResultInto(places[i], res_col, arena.get()); return result_holder; } diff --git a/src/Functions/array/arrayReduceInRanges.cpp b/src/Functions/array/arrayReduceInRanges.cpp index d2a382e86ba..11d5e03eb3d 100644 --- a/src/Functions/array/arrayReduceInRanges.cpp +++ b/src/Functions/array/arrayReduceInRanges.cpp @@ -202,13 +202,6 @@ ColumnPtr FunctionArrayReduceInRanges::executeImpl( result_arr->getOffsets().insert(ranges_offsets->begin(), ranges_offsets->end()); - /// AggregateFunction's states should be inserted into column using specific way - auto * res_col_aggregate_function = typeid_cast(&result_data); - - if (!res_col_aggregate_function && agg_func.isState()) - throw Exception("State function " + agg_func.getName() + " inserts results into non-state column " - + result_type->getName(), ErrorCodes::ILLEGAL_COLUMN); - /// Perform the aggregation size_t begin = 0; @@ -379,11 +372,9 @@ ColumnPtr FunctionArrayReduceInRanges::executeImpl( for (size_t k = local_begin; k < local_end; ++k) true_func->add(place, aggregate_arguments, begin + k, arena.get()); } - - if (!res_col_aggregate_function) - agg_func.insertResultInto(place, result_data, arena.get()); - else - res_col_aggregate_function->insertFrom(place); + /// We should use insertMergeResultInto to insert result into ColumnAggregateFunction + /// correctly if result contains AggregateFunction's states + agg_func.insertMergeResultInto(place, result_data, arena.get()); } } diff --git a/src/Functions/array/arrayUniq.cpp b/src/Functions/array/arrayUniq.cpp index ff75efaae71..a43c21508d9 100644 --- a/src/Functions/array/arrayUniq.cpp +++ b/src/Functions/array/arrayUniq.cpp @@ -233,7 +233,7 @@ void FunctionArrayUniq::executeMethodImpl( method.emplaceKey(set, j, pool); } - res_values[i] = set.size() + found_null; + res_values[i] = static_cast(set.size() + found_null); prev_off = off; } } diff --git a/src/Functions/array/range.cpp b/src/Functions/array/range.cpp index 6b3d8ad1139..3b5bb686e60 100644 --- a/src/Functions/array/range.cpp +++ b/src/Functions/array/range.cpp @@ -97,7 +97,7 @@ private: for (size_t row_idx = 0, rows = in->size(); row_idx < rows; ++row_idx) { for (size_t elem_idx = 0, elems = in_data[row_idx]; elem_idx < elems; ++elem_idx) - out_data[offset + elem_idx] = elem_idx; + out_data[offset + elem_idx] = static_cast(elem_idx); offset += in_data[row_idx]; out_offsets[row_idx] = offset; @@ -153,7 +153,7 @@ private: { for (size_t st = start, ed = end_data[row_idx]; st < ed; st += step) { - out_data[offset++] = st; + out_data[offset++] = static_cast(st); if (st > st + step) throw Exception{"A call to function " + getName() + " overflows, investigate the values of arguments you are passing", @@ -212,7 +212,7 @@ private: { for (size_t st = start_data[row_idx], ed = end_data[row_idx]; st < ed; st += step) { - out_data[offset++] = st; + out_data[offset++] = static_cast(st); if (st > st + step) throw Exception{"A call to function " + getName() + " overflows, investigate the values of arguments you are passing", @@ -271,7 +271,7 @@ private: { for (size_t st = start, ed = end_data[row_idx]; st < ed; st += step_data[row_idx]) { - out_data[offset++] = st; + out_data[offset++] = static_cast(st); if (st > st + step_data[row_idx]) throw Exception{"A call to function " + getName() + " overflows, investigate the values of arguments you are passing", @@ -333,7 +333,7 @@ private: { for (size_t st = start_data[row_idx], ed = end_start[row_idx]; st < ed; st += step_data[row_idx]) { - out_data[offset++] = st; + out_data[offset++] = static_cast(st); if (st > st + step_data[row_idx]) throw Exception{"A call to function " + getName() + " overflows, investigate the values of arguments you are passing", @@ -407,7 +407,7 @@ private: if ((res = executeConstStartStep(column_ptrs[1], start, step, input_rows_count)) || (res = executeConstStartStep(column_ptrs[1], start, step, input_rows_count)) || - (res = executeConstStartStep(column_ptrs[1], start, step, input_rows_count)) || + (res = executeConstStartStep(column_ptrs[1], static_cast(start), static_cast(step), input_rows_count)) || (res = executeConstStartStep(column_ptrs[1], start, step, input_rows_count))) { } @@ -418,7 +418,7 @@ private: if ((res = executeConstStart(column_ptrs[1], column_ptrs[2], start, input_rows_count)) || (res = executeConstStart(column_ptrs[1], column_ptrs[2], start, input_rows_count)) || - (res = executeConstStart(column_ptrs[1], column_ptrs[2], start, input_rows_count)) || + (res = executeConstStart(column_ptrs[1], column_ptrs[2], static_cast(start), input_rows_count)) || (res = executeConstStart(column_ptrs[1], column_ptrs[2], start, input_rows_count))) { } @@ -429,7 +429,7 @@ private: if ((res = executeConstStep(column_ptrs[0], column_ptrs[1], step, input_rows_count)) || (res = executeConstStep(column_ptrs[0], column_ptrs[1], step, input_rows_count)) || - (res = executeConstStep(column_ptrs[0], column_ptrs[1], step, input_rows_count)) || + (res = executeConstStep(column_ptrs[0], column_ptrs[1], static_cast(step), input_rows_count)) || (res = executeConstStep(column_ptrs[0], column_ptrs[1], step, input_rows_count))) { } diff --git a/src/Functions/ascii.cpp b/src/Functions/ascii.cpp new file mode 100644 index 00000000000..cb59be55cc1 --- /dev/null +++ b/src/Functions/ascii.cpp @@ -0,0 +1,86 @@ +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int NOT_IMPLEMENTED; +} + +struct AsciiName +{ + static constexpr auto name = "ascii"; +}; + + +struct AsciiImpl +{ + static constexpr auto is_fixed_to_constant = false; + using ReturnType = Int32; + + + static void vector(const ColumnString::Chars & data, const ColumnString::Offsets & offsets, PaddedPODArray & res) + { + size_t size = offsets.size(); + + ColumnString::Offset prev_offset = 0; + for (size_t i = 0; i < size; ++i) + { + res[i] = doAscii(data, prev_offset, offsets[i] - prev_offset - 1); + prev_offset = offsets[i]; + } + } + + [[noreturn]] static void vectorFixedToConstant(const ColumnString::Chars & /*data*/, size_t /*n*/, Int32 & /*res*/) + { + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "vectorFixedToConstant not implemented for function {}", AsciiName::name); + } + + static void vectorFixedToVector(const ColumnString::Chars & data, size_t n, PaddedPODArray & res) + { + size_t size = data.size() / n; + + for (size_t i = 0; i < size; ++i) + { + res[i] = doAscii(data, i * n, n); + } + } + + [[noreturn]] static void array(const ColumnString::Offsets & /*offsets*/, PaddedPODArray & /*res*/) + { + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Cannot apply function {} to Array argument", AsciiName::name); + } + + [[noreturn]] static void uuid(const ColumnUUID::Container & /*offsets*/, size_t /*n*/, PaddedPODArray & /*res*/) + { + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Cannot apply function {} to UUID argument", AsciiName::name); + } + +private: + static Int32 doAscii(const ColumnString::Chars & buf, size_t offset, size_t size) + { + return size ? static_cast(buf[offset]) : 0; + } +}; + +using FunctionAscii = FunctionStringOrArrayToT; + +REGISTER_FUNCTION(Ascii) +{ + factory.registerFunction( + { + R"( +Returns the ASCII code point of the first character of str. The result type is Int32. + +If s is empty, the result is 0. If the first character is not an ASCII character or not part of the Latin-1 Supplement range of UTF-16, the result is undefined) + )", + Documentation::Examples{{"ascii", "SELECT ascii('234')"}}, + Documentation::Categories{"String"} + }, FunctionFactory::CaseInsensitive); +} + +} diff --git a/src/Functions/base64Decode.cpp b/src/Functions/base64Decode.cpp index f6943233d44..4060aafe1a3 100644 --- a/src/Functions/base64Decode.cpp +++ b/src/Functions/base64Decode.cpp @@ -1,8 +1,7 @@ #include + #if USE_BASE64 #include -#include - namespace DB { @@ -15,4 +14,5 @@ REGISTER_FUNCTION(Base64Decode) factory.registerAlias("FROM_BASE64", "base64Decode", FunctionFactory::CaseInsensitive); } } + #endif diff --git a/src/Functions/base64Encode.cpp b/src/Functions/base64Encode.cpp index fc06935e0a1..773db7e09d9 100644 --- a/src/Functions/base64Encode.cpp +++ b/src/Functions/base64Encode.cpp @@ -1,10 +1,7 @@ -#include #include -#include "config.h" - #if USE_BASE64 -# include +#include namespace DB { @@ -17,4 +14,5 @@ REGISTER_FUNCTION(Base64Encode) factory.registerAlias("TO_BASE64", "base64Encode", FunctionFactory::CaseInsensitive); } } + #endif diff --git a/src/Functions/blockSerializedSize.cpp b/src/Functions/blockSerializedSize.cpp index d406984c51c..35be65f3fed 100644 --- a/src/Functions/blockSerializedSize.cpp +++ b/src/Functions/blockSerializedSize.cpp @@ -54,7 +54,7 @@ public: auto serialization = elem.type->getDefaultSerialization(); - serialization->serializeBinaryBulkStatePrefix(settings, state); + serialization->serializeBinaryBulkStatePrefix(*full_column, settings, state); serialization->serializeBinaryBulkWithMultipleStreams(*full_column, 0 /** offset */, 0 /** limit */, settings, state); diff --git a/src/Functions/dateDiff.cpp b/src/Functions/dateDiff.cpp index b8bf3c11698..f5a4b50fb54 100644 --- a/src/Functions/dateDiff.cpp +++ b/src/Functions/dateDiff.cpp @@ -2,6 +2,7 @@ #include #include #include +#include #include #include @@ -44,7 +45,6 @@ namespace */ class FunctionDateDiff : public IFunction { - using ColumnDateTime64 = ColumnDecimal; public: static constexpr auto name = "dateDiff"; static FunctionPtr create(ContextPtr) { return std::make_shared(); } @@ -61,25 +61,30 @@ public: DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override { if (arguments.size() != 3 && arguments.size() != 4) - throw Exception("Number of arguments for function " + getName() + " doesn't match: passed " - + toString(arguments.size()) + ", should be 3 or 4", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Number of arguments for function {} doesn't match: passed {}, should be 3 or 4", + getName(), arguments.size()); if (!isString(arguments[0])) - throw Exception("First argument for function " + getName() + " (unit) must be String", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "First argument for function {} (unit) must be String", + getName()); - if (!isDate(arguments[1]) && !isDateTime(arguments[1]) && !isDateTime64(arguments[1])) - throw Exception("Second argument for function " + getName() + " must be Date or DateTime", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + if (!isDate(arguments[1]) && !isDate32(arguments[1]) && !isDateTime(arguments[1]) && !isDateTime64(arguments[1])) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Second argument for function {} must be Date, Date32, DateTime or DateTime64", + getName()); - if (!isDate(arguments[2]) && !isDateTime(arguments[2]) && !isDateTime64(arguments[2])) - throw Exception("Third argument for function " + getName() + " must be Date or DateTime", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + if (!isDate(arguments[2]) && !isDate32(arguments[2]) && !isDateTime(arguments[2]) && !isDateTime64(arguments[2])) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Third argument for function {} must be Date, Date32, DateTime or DateTime64", + getName() + ); if (arguments.size() == 4 && !isString(arguments[3])) - throw Exception("Fourth argument for function " + getName() + " (timezone) must be String", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Fourth argument for function {} (timezone) must be String", + getName()); return std::make_shared(); } @@ -91,7 +96,9 @@ public: { const auto * unit_column = checkAndGetColumnConst(arguments[0].column.get()); if (!unit_column) - throw Exception("First argument for function " + getName() + " must be constant String", ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, + "First argument for function {} must be constant String", + getName()); String unit = Poco::toLower(unit_column->getValue()); @@ -105,23 +112,24 @@ public: const auto & timezone_y = extractTimeZoneFromFunctionArguments(arguments, 3, 2); if (unit == "year" || unit == "yy" || unit == "yyyy") - dispatchForColumns(x, y, timezone_x, timezone_y, res->getData()); + dispatchForColumns>(x, y, timezone_x, timezone_y, res->getData()); else if (unit == "quarter" || unit == "qq" || unit == "q") - dispatchForColumns(x, y, timezone_x, timezone_y, res->getData()); + dispatchForColumns>(x, y, timezone_x, timezone_y, res->getData()); else if (unit == "month" || unit == "mm" || unit == "m") - dispatchForColumns(x, y, timezone_x, timezone_y, res->getData()); + dispatchForColumns>(x, y, timezone_x, timezone_y, res->getData()); else if (unit == "week" || unit == "wk" || unit == "ww") - dispatchForColumns(x, y, timezone_x, timezone_y, res->getData()); + dispatchForColumns>(x, y, timezone_x, timezone_y, res->getData()); else if (unit == "day" || unit == "dd" || unit == "d") - dispatchForColumns(x, y, timezone_x, timezone_y, res->getData()); + dispatchForColumns>(x, y, timezone_x, timezone_y, res->getData()); else if (unit == "hour" || unit == "hh" || unit == "h") - dispatchForColumns(x, y, timezone_x, timezone_y, res->getData()); + dispatchForColumns>(x, y, timezone_x, timezone_y, res->getData()); else if (unit == "minute" || unit == "mi" || unit == "n") - dispatchForColumns(x, y, timezone_x, timezone_y, res->getData()); + dispatchForColumns>(x, y, timezone_x, timezone_y, res->getData()); else if (unit == "second" || unit == "ss" || unit == "s") - dispatchForColumns(x, y, timezone_x, timezone_y, res->getData()); + dispatchForColumns>(x, y, timezone_x, timezone_y, res->getData()); else - throw Exception("Function " + getName() + " does not support '" + unit + "' unit", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Function {} does not support '{}' unit", getName(), unit); return res; } @@ -133,20 +141,26 @@ private: const DateLUTImpl & timezone_x, const DateLUTImpl & timezone_y, ColumnInt64::Container & result) const { - if (const auto * x_vec_16 = checkAndGetColumn(&x)) + if (const auto * x_vec_16 = checkAndGetColumn(&x)) dispatchForSecondColumn(*x_vec_16, y, timezone_x, timezone_y, result); - else if (const auto * x_vec_32 = checkAndGetColumn(&x)) + else if (const auto * x_vec_32 = checkAndGetColumn(&x)) dispatchForSecondColumn(*x_vec_32, y, timezone_x, timezone_y, result); + else if (const auto * x_vec_32_s = checkAndGetColumn(&x)) + dispatchForSecondColumn(*x_vec_32_s, y, timezone_x, timezone_y, result); else if (const auto * x_vec_64 = checkAndGetColumn(&x)) dispatchForSecondColumn(*x_vec_64, y, timezone_x, timezone_y, result); - else if (const auto * x_const_16 = checkAndGetColumnConst(&x)) + else if (const auto * x_const_16 = checkAndGetColumnConst(&x)) dispatchConstForSecondColumn(x_const_16->getValue(), y, timezone_x, timezone_y, result); - else if (const auto * x_const_32 = checkAndGetColumnConst(&x)) + else if (const auto * x_const_32 = checkAndGetColumnConst(&x)) dispatchConstForSecondColumn(x_const_32->getValue(), y, timezone_x, timezone_y, result); + else if (const auto * x_const_32_s = checkAndGetColumnConst(&x)) + dispatchConstForSecondColumn(x_const_32_s->getValue(), y, timezone_x, timezone_y, result); else if (const auto * x_const_64 = checkAndGetColumnConst(&x)) dispatchConstForSecondColumn(x_const_64->getValue>(), y, timezone_x, timezone_y, result); else - throw Exception("Illegal column for first argument of function " + getName() + ", must be Date, DateTime or DateTime64", ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, + "Illegal column for first argument of function {}, must be Date, Date32, DateTime or DateTime64", + getName()); } template @@ -155,20 +169,26 @@ private: const DateLUTImpl & timezone_x, const DateLUTImpl & timezone_y, ColumnInt64::Container & result) const { - if (const auto * y_vec_16 = checkAndGetColumn(&y)) + if (const auto * y_vec_16 = checkAndGetColumn(&y)) vectorVector(x, *y_vec_16, timezone_x, timezone_y, result); - else if (const auto * y_vec_32 = checkAndGetColumn(&y)) + else if (const auto * y_vec_32 = checkAndGetColumn(&y)) vectorVector(x, *y_vec_32, timezone_x, timezone_y, result); + else if (const auto * y_vec_32_s = checkAndGetColumn(&y)) + vectorVector(x, *y_vec_32_s, timezone_x, timezone_y, result); else if (const auto * y_vec_64 = checkAndGetColumn(&y)) vectorVector(x, *y_vec_64, timezone_x, timezone_y, result); - else if (const auto * y_const_16 = checkAndGetColumnConst(&y)) + else if (const auto * y_const_16 = checkAndGetColumnConst(&y)) vectorConstant(x, y_const_16->getValue(), timezone_x, timezone_y, result); - else if (const auto * y_const_32 = checkAndGetColumnConst(&y)) + else if (const auto * y_const_32 = checkAndGetColumnConst(&y)) vectorConstant(x, y_const_32->getValue(), timezone_x, timezone_y, result); + else if (const auto * y_const_32_s = checkAndGetColumnConst(&y)) + vectorConstant(x, y_const_32_s->getValue(), timezone_x, timezone_y, result); else if (const auto * y_const_64 = checkAndGetColumnConst(&y)) vectorConstant(x, y_const_64->getValue>(), timezone_x, timezone_y, result); else - throw Exception("Illegal column for second argument of function " + getName() + ", must be Date or DateTime", ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, + "Illegal column for second argument of function {}, must be Date, Date32, DateTime or DateTime64", + getName()); } template @@ -177,14 +197,18 @@ private: const DateLUTImpl & timezone_x, const DateLUTImpl & timezone_y, ColumnInt64::Container & result) const { - if (const auto * y_vec_16 = checkAndGetColumn(&y)) + if (const auto * y_vec_16 = checkAndGetColumn(&y)) constantVector(x, *y_vec_16, timezone_x, timezone_y, result); - else if (const auto * y_vec_32 = checkAndGetColumn(&y)) + else if (const auto * y_vec_32 = checkAndGetColumn(&y)) constantVector(x, *y_vec_32, timezone_x, timezone_y, result); + else if (const auto * y_vec_32_s = checkAndGetColumn(&y)) + constantVector(x, *y_vec_32_s, timezone_x, timezone_y, result); else if (const auto * y_vec_64 = checkAndGetColumn(&y)) constantVector(x, *y_vec_64, timezone_x, timezone_y, result); else - throw Exception("Illegal column for second argument of function " + getName() + ", must be Date or DateTime", ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, + "Illegal column for second argument of function {}, must be Date, Date32, DateTime or DateTime64", + getName()); } template diff --git a/src/Functions/dateName.cpp b/src/Functions/dateName.cpp index 3911b1cf838..36c0be49190 100644 --- a/src/Functions/dateName.cpp +++ b/src/Functions/dateName.cpp @@ -4,6 +4,7 @@ #include #include +#include #include #include #include @@ -34,6 +35,11 @@ template <> struct DataTypeToTimeTypeMap using TimeType = UInt16; }; +template <> struct DataTypeToTimeTypeMap +{ + using TimeType = Int32; +}; + template <> struct DataTypeToTimeTypeMap { using TimeType = UInt32; @@ -72,7 +78,7 @@ public: ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Number of arguments for function {} doesn't match: passed {}", getName(), - toString(arguments.size())); + arguments.size()); if (!WhichDataType(arguments[0].type).isString()) throw Exception( @@ -83,7 +89,7 @@ public: WhichDataType first_argument_type(arguments[1].type); - if (!(first_argument_type.isDate() || first_argument_type.isDateTime() || first_argument_type.isDateTime64())) + if (!(first_argument_type.isDate() || first_argument_type.isDateTime() || first_argument_type.isDate32() || first_argument_type.isDateTime64())) throw Exception( ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of 2 argument of function {}. Must be a date or a date with time", @@ -108,6 +114,7 @@ public: ColumnPtr res; if (!((res = executeType(arguments, result_type)) + || (res = executeType(arguments, result_type)) || (res = executeType(arguments, result_type)) || (res = executeType(arguments, result_type)))) throw Exception( diff --git a/src/Functions/divide/divideImpl.cpp b/src/Functions/divide/divideImpl.cpp index 6c151dfefb5..940f4b35df9 100644 --- a/src/Functions/divide/divideImpl.cpp +++ b/src/Functions/divide/divideImpl.cpp @@ -18,7 +18,7 @@ namespace NAMESPACE template void divideImpl(const A * __restrict a_pos, B b, ResultType * __restrict c_pos, size_t size) { - libdivide::divider divider(b); + libdivide::divider divider(static_cast(b)); const A * a_end = a_pos + size; #if defined(__SSE2__) diff --git a/src/Functions/errorCodeToName.cpp b/src/Functions/errorCodeToName.cpp index 1736311c6cc..0025d38c8f2 100644 --- a/src/Functions/errorCodeToName.cpp +++ b/src/Functions/errorCodeToName.cpp @@ -45,7 +45,8 @@ public: for (size_t i = 0; i < input_rows_count; ++i) { const Int64 error_code = input_column.getInt(i); - std::string_view error_name = ErrorCodes::getName(error_code); + std::string_view error_name = + ErrorCodes::getName(static_cast(error_code)); col_res->insertData(error_name.data(), error_name.size()); } diff --git a/src/Functions/extractAllGroups.h b/src/Functions/extractAllGroups.h index 06b16181c94..e077086a359 100644 --- a/src/Functions/extractAllGroups.h +++ b/src/Functions/extractAllGroups.h @@ -136,7 +136,8 @@ public: const auto * end = pos + current_row.size(); while (pos < end && regexp->Match({pos, static_cast(end - pos)}, - 0, end - pos, regexp->UNANCHORED, matched_groups.data(), matched_groups.size())) + 0, end - pos, regexp->UNANCHORED, + matched_groups.data(), static_cast(matched_groups.size()))) { // 1 is to exclude group #0 which is whole re match. for (size_t group = 1; group <= groups_count; ++group) @@ -179,7 +180,8 @@ public: const auto * end = pos + current_row.size; while (pos < end && regexp->Match({pos, static_cast(end - pos)}, - 0, end - pos, regexp->UNANCHORED, matched_groups.data(), matched_groups.size())) + 0, end - pos, regexp->UNANCHORED, matched_groups.data(), + static_cast(matched_groups.size()))) { // 1 is to exclude group #0 which is whole re match. for (size_t group = 1; group <= groups_count; ++group) diff --git a/src/Functions/extractGroups.cpp b/src/Functions/extractGroups.cpp index eb6e609a4be..8ec389827db 100644 --- a/src/Functions/extractGroups.cpp +++ b/src/Functions/extractGroups.cpp @@ -90,7 +90,8 @@ public: std::string_view current_row = column_haystack->getDataAt(i).toView(); if (re2->Match(re2_st::StringPiece(current_row.data(), current_row.size()), - 0, current_row.size(), re2_st::RE2::UNANCHORED, matched_groups.data(), matched_groups.size())) + 0, current_row.size(), re2_st::RE2::UNANCHORED, matched_groups.data(), + static_cast(matched_groups.size()))) { // 1 is to exclude group #0 which is whole re match. for (size_t group = 1; group <= groups_count; ++group) diff --git a/src/Functions/formatDateTime.cpp b/src/Functions/formatDateTime.cpp index 09071c5c1a0..4db04d61d84 100644 --- a/src/Functions/formatDateTime.cpp +++ b/src/Functions/formatDateTime.cpp @@ -1,5 +1,6 @@ #include #include +#include #include #include #include @@ -45,6 +46,7 @@ template <> struct ActionValueTypeMap { using ActionValueTyp template <> struct ActionValueTypeMap { using ActionValueType = UInt32; }; template <> struct ActionValueTypeMap { using ActionValueType = UInt32; }; template <> struct ActionValueTypeMap { using ActionValueType = UInt16; }; +template <> struct ActionValueTypeMap { using ActionValueType = Int32; }; template <> struct ActionValueTypeMap { using ActionValueType = UInt32; }; // TODO(vnemkov): to add sub-second format instruction, make that DateTime64 and do some math in Action. template <> struct ActionValueTypeMap { using ActionValueType = Int64; }; @@ -315,44 +317,39 @@ public: if constexpr (support_integer) { if (arguments.size() != 1 && arguments.size() != 2 && arguments.size() != 3) - throw Exception( - "Number of arguments for function " + getName() + " doesn't match: passed " + toString(arguments.size()) - + ", should be 1, 2 or 3", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Number of arguments for function {} doesn't match: passed {}, should be 1, 2 or 3", + getName(), arguments.size()); if (arguments.size() == 1 && !isInteger(arguments[0].type)) - throw Exception( - "Illegal type " + arguments[0].type->getName() + " of 1 argument of function " + getName() - + " when arguments size is 1. Should be integer", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - if (arguments.size() > 1 && !(isInteger(arguments[0].type) || isDate(arguments[0].type) || isDateTime(arguments[0].type) || isDateTime64(arguments[0].type))) - throw Exception( - "Illegal type " + arguments[0].type->getName() + " of 1 argument of function " + getName() - + " when arguments size is 2 or 3. Should be a integer or a date with time", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of first argument of function {} when arguments size is 1. Should be integer", + arguments[0].type->getName(), getName()); + if (arguments.size() > 1 && !(isInteger(arguments[0].type) || isDate(arguments[0].type) || isDateTime(arguments[0].type) || isDate32(arguments[0].type) || isDateTime64(arguments[0].type))) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of first argument of function {} when arguments size is 2 or 3. Should be a integer or a date with time", + arguments[0].type->getName(), getName()); } else { if (arguments.size() != 2 && arguments.size() != 3) - throw Exception( - "Number of arguments for function " + getName() + " doesn't match: passed " + toString(arguments.size()) - + ", should be 2 or 3", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); - if (!isDate(arguments[0].type) && !isDateTime(arguments[0].type) && !isDateTime64(arguments[0].type)) - throw Exception( - "Illegal type " + arguments[0].type->getName() + " of 1 argument of function " + getName() - + ". Should be a date or a date with time", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Number of arguments for function {} doesn't match: passed {}, should be 2 or 3", + getName(), arguments.size()); + if (!isDate(arguments[0].type) && !isDateTime(arguments[0].type) && !isDate32(arguments[0].type) && !isDateTime64(arguments[0].type)) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of first argument of function {}. Should be a date or a date with time", + arguments[0].type->getName(), getName()); } if (arguments.size() == 2 && !WhichDataType(arguments[1].type).isString()) - throw Exception( - "Illegal type " + arguments[1].type->getName() + " of 2 argument of function " + getName() + ". Must be String.", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of second argument of function {}. Must be String.", + arguments[1].type->getName(), getName()); if (arguments.size() == 3 && !WhichDataType(arguments[2].type).isString()) - throw Exception( - "Illegal type " + arguments[2].type->getName() + " of 3 argument of function " + getName() + ". Must be String.", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of third argument of function {}. Must be String.", + arguments[2].type->getName(), getName()); if (arguments.size() == 1) return std::make_shared(); @@ -373,10 +370,9 @@ public: return true; })) { - throw Exception( - "Illegal column " + arguments[0].column->getName() + " of function " + getName() - + ", must be Integer or DateTime when arguments size is 1.", - ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, + "Illegal column {} of function {}, must be Integer, Date, Date32, DateTime or DateTime64 when arguments size is 1.", + arguments[0].column->getName(), getName()); } } else @@ -385,32 +381,31 @@ public: { using FromDataType = std::decay_t; if (!(res = executeType(arguments, result_type))) - throw Exception( - "Illegal column " + arguments[0].column->getName() + " of function " + getName() - + ", must be Integer or DateTime.", - ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, + "Illegal column {} of function {}, must be Integer, Date, Date32, DateTime or DateTime64.", + arguments[0].column->getName(), getName()); return true; })) { if (!((res = executeType(arguments, result_type)) + || (res = executeType(arguments, result_type)) || (res = executeType(arguments, result_type)) || (res = executeType(arguments, result_type)))) - throw Exception( - "Illegal column " + arguments[0].column->getName() + " of function " + getName() - + ", must be Integer or DateTime.", - ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, + "Illegal column {} of function {}, must be Integer or DateTime.", + arguments[0].column->getName(), getName()); } } } else { if (!((res = executeType(arguments, result_type)) + || (res = executeType(arguments, result_type)) || (res = executeType(arguments, result_type)) || (res = executeType(arguments, result_type)))) - throw Exception( - "Illegal column " + arguments[0].column->getName() + " of function " + getName() - + ", must be Date or DateTime.", - ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, + "Illegal column {} of function {}, must be Date or DateTime.", + arguments[0].column->getName(), getName()); } return res; @@ -425,10 +420,9 @@ public: const ColumnConst * pattern_column = checkAndGetColumnConst(arguments[1].column.get()); if (!pattern_column) - throw Exception("Illegal column " + arguments[1].column->getName() - + " of second ('format') argument of function " + getName() - + ". Must be constant string.", - ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, + "Illegal column {} of second ('format') argument of function {}. Must be constant string.", + arguments[1].column->getName(), getName()); String pattern = pattern_column->getValue(); @@ -499,7 +493,7 @@ public: else { for (auto & instruction : instructions) - instruction.perform(pos, vec[i], time_zone); + instruction.perform(pos, static_cast(vec[i]), time_zone); } dst_offsets[i] = pos - begin; @@ -712,12 +706,14 @@ public: // Unimplemented case 'U': [[fallthrough]]; case 'W': - throw Exception("Wrong pattern '" + pattern + "', symbol '" + *pos + " is not implemented ' for function " + getName(), - ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, + "Wrong pattern '{}', symbol '{}' is not implemented for function {}", + pattern, *pos, getName()); default: - throw Exception( - "Wrong pattern '" + pattern + "', unexpected symbol '" + *pos + "' for function " + getName(), ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, + "Wrong pattern '{}', unexpected symbol '{}' for function {}", + pattern, *pos, getName()); } ++pos; diff --git a/src/Functions/formatReadableDecimalSize.cpp b/src/Functions/formatReadableDecimalSize.cpp new file mode 100644 index 00000000000..184b574abdf --- /dev/null +++ b/src/Functions/formatReadableDecimalSize.cpp @@ -0,0 +1,35 @@ +#include +#include + + +namespace DB +{ + +namespace +{ + struct Impl + { + static constexpr auto name = "formatReadableDecimalSize"; + + static void format(double value, DB::WriteBuffer & out) + { + formatReadableSizeWithDecimalSuffix(value, out); + } + }; +} + +REGISTER_FUNCTION(FormatReadableDecimalSize) +{ + factory.registerFunction>( + { + R"( +Accepts the size (number of bytes). Returns a rounded size with a suffix (KB, MB, etc.) as a string. +)", + Documentation::Examples{ + {"formatReadableDecimalSize", "SELECT formatReadableDecimalSize(1000)"}}, + Documentation::Categories{"OtherFunctions"} + }, + FunctionFactory::CaseSensitive); +} + +} diff --git a/src/Functions/grouping.h b/src/Functions/grouping.h index b9ef6ffc107..830c509f1f5 100644 --- a/src/Functions/grouping.h +++ b/src/Functions/grouping.h @@ -13,6 +13,11 @@ namespace DB { +namespace ErrorCodes +{ + extern const int UNSUPPORTED_METHOD; +} + class FunctionGroupingBase : public IFunction { protected: @@ -71,6 +76,22 @@ public: } }; +class FunctionGrouping : public FunctionGroupingBase +{ +public: + explicit FunctionGrouping(bool force_compatibility_) + : FunctionGroupingBase(ColumnNumbers(), force_compatibility_) + {} + + String getName() const override { return "grouping"; } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName &, const DataTypePtr &, size_t) const override + { + throw Exception(ErrorCodes::UNSUPPORTED_METHOD, + "Method executeImpl is not supported for 'grouping' function"); + } +}; + class FunctionGroupingOrdinary : public FunctionGroupingBase { public: diff --git a/src/Functions/initializeAggregation.cpp b/src/Functions/initializeAggregation.cpp index b7dcce9c188..08352553b9c 100644 --- a/src/Functions/initializeAggregation.cpp +++ b/src/Functions/initializeAggregation.cpp @@ -17,7 +17,6 @@ namespace DB namespace ErrorCodes { extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; - extern const int ILLEGAL_COLUMN; extern const int ILLEGAL_TYPE_OF_ARGUMENT; extern const int BAD_ARGUMENTS; } @@ -114,13 +113,6 @@ ColumnPtr FunctionInitializeAggregation::executeImpl(const ColumnsWithTypeAndNam MutableColumnPtr result_holder = result_type->createColumn(); IColumn & res_col = *result_holder; - /// AggregateFunction's states should be inserted into column using specific way - auto * res_col_aggregate_function = typeid_cast(&res_col); - - if (!res_col_aggregate_function && agg_func.isState()) - throw Exception("State function " + agg_func.getName() + " inserts results into non-state column " - + result_type->getName(), ErrorCodes::ILLEGAL_COLUMN); - PODArray places(input_rows_count); for (size_t i = 0; i < input_rows_count; ++i) { @@ -151,10 +143,9 @@ ColumnPtr FunctionInitializeAggregation::executeImpl(const ColumnsWithTypeAndNam } for (size_t i = 0; i < input_rows_count; ++i) - if (!res_col_aggregate_function) - agg_func.insertResultInto(places[i], res_col, arena.get()); - else - res_col_aggregate_function->insertFrom(places[i]); + /// We should use insertMergeResultInto to insert result into ColumnAggregateFunction + /// correctly if result contains AggregateFunction's states + agg_func.insertMergeResultInto(places[i], res_col, arena.get()); return result_holder; } diff --git a/src/Functions/makeDate.cpp b/src/Functions/makeDate.cpp index e2d93c0fdc9..7ebca71af13 100644 --- a/src/Functions/makeDate.cpp +++ b/src/Functions/makeDate.cpp @@ -7,6 +7,7 @@ #include #include #include +#include #include #include @@ -149,7 +150,7 @@ struct MakeDateTraits { static constexpr auto name = "makeDate"; using ReturnDataType = DataTypeDate; - using ReturnColumnType = ColumnUInt16; + using ReturnColumnType = ColumnDate; static constexpr auto MIN_YEAR = 1970; static constexpr auto MAX_YEAR = 2149; @@ -162,7 +163,7 @@ struct MakeDate32Traits { static constexpr auto name = "makeDate32"; using ReturnDataType = DataTypeDate32; - using ReturnColumnType = ColumnInt32; + using ReturnColumnType = ColumnDate32; static constexpr auto MIN_YEAR = 1900; static constexpr auto MAX_YEAR = 2299; @@ -267,7 +268,7 @@ public: Columns converted_arguments; convertRequiredArguments(arguments, converted_arguments); - auto res_column = ColumnUInt32::create(input_rows_count); + auto res_column = ColumnDateTime::create(input_rows_count); auto & result_data = res_column->getData(); const auto & year_data = typeid_cast(*converted_arguments[0]).getData(); @@ -294,7 +295,7 @@ public: else if (unlikely(date_time > 0x0ffffffffll)) date_time = 0x0ffffffffll; - result_data[i] = date_time; + result_data[i] = static_cast(date_time); } return res_column; @@ -365,7 +366,7 @@ public: fraction_data = &typeid_cast(*converted_arguments[6]).getData(); } - auto res_column = ColumnDecimal::create(input_rows_count, precision); + auto res_column = ColumnDateTime64::create(input_rows_count, static_cast(precision)); auto & result_data = res_column->getData(); const auto & year_data = typeid_cast(*converted_arguments[0]).getData(); @@ -411,7 +412,10 @@ public: fraction = max_fraction; } - result_data[i] = DecimalUtils::decimalFromComponents(date_time, static_cast(fraction), precision); + result_data[i] = DecimalUtils::decimalFromComponents( + date_time, + static_cast(fraction), + static_cast(precision)); } return res_column; diff --git a/src/Functions/minus.cpp b/src/Functions/minus.cpp index 3668e4afc18..04877a42b18 100644 --- a/src/Functions/minus.cpp +++ b/src/Functions/minus.cpp @@ -23,7 +23,7 @@ struct MinusImpl return static_cast(static_cast(a)) - static_cast(static_cast(b)); } else - return static_cast(a) - b; + return static_cast(a) - static_cast(b); } /// Apply operation and check overflow. It's used for Deciamal operations. @returns true if overflowed, false otherwise. diff --git a/src/Functions/modulo.cpp b/src/Functions/modulo.cpp index 9a3aa12037f..b2411899160 100644 --- a/src/Functions/modulo.cpp +++ b/src/Functions/modulo.cpp @@ -80,7 +80,7 @@ struct ModuloByConstantImpl || (std::is_signed_v && std::is_signed_v && b < std::numeric_limits::lowest()))) { for (size_t i = 0; i < size; ++i) - dst[i] = src[i]; + dst[i] = static_cast(src[i]); return; } @@ -101,16 +101,19 @@ struct ModuloByConstantImpl if (b & (b - 1)) { - libdivide::divider divider(b); + libdivide::divider divider(static_cast(b)); for (size_t i = 0; i < size; ++i) - dst[i] = src[i] - (src[i] / divider) * b; /// NOTE: perhaps, the division semantics with the remainder of negative numbers is not preserved. + { + /// NOTE: perhaps, the division semantics with the remainder of negative numbers is not preserved. + dst[i] = static_cast(src[i] - (src[i] / divider) * b); + } } else { // gcc libdivide doesn't work well for pow2 division auto mask = b - 1; for (size_t i = 0; i < size; ++i) - dst[i] = src[i] & mask; + dst[i] = static_cast(src[i] & mask); } } diff --git a/src/Functions/mortonDecode.cpp b/src/Functions/mortonDecode.cpp new file mode 100644 index 00000000000..337fd5e3a38 --- /dev/null +++ b/src/Functions/mortonDecode.cpp @@ -0,0 +1,433 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#if USE_MULTITARGET_CODE && defined(__BMI2__) +#include +#endif + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int ILLEGAL_COLUMN; + extern const int ARGUMENT_OUT_OF_BOUND; +} + +#define EXTRACT_VECTOR(INDEX) \ + auto col##INDEX = ColumnUInt64::create(); \ + auto & vec##INDEX = col##INDEX->getData(); \ + vec##INDEX.resize(input_rows_count); + +#define DECODE(ND, ...) \ + if (nd == (ND)) \ + { \ + for (size_t i = 0; i < input_rows_count; i++) \ + { \ + auto res = MortonND_##ND##D_Dec.Decode(col_code->getUInt(i)); \ + __VA_ARGS__ \ + } \ + } + +#define MASK(IDX, ...) \ + ((mask) ? shrink(mask->getColumn((IDX)).getUInt(0), std::get(__VA_ARGS__)) : std::get(__VA_ARGS__)) + +#define EXECUTE() \ + size_t nd; \ + const auto * col_const = typeid_cast(arguments[0].column.get()); \ + const auto * mask = typeid_cast(col_const->getDataColumnPtr().get()); \ + if (mask) \ + nd = mask->tupleSize(); \ + else \ + nd = col_const->getUInt(0); \ + auto non_const_arguments = arguments; \ + non_const_arguments[1].column = non_const_arguments[1].column->convertToFullColumnIfConst(); \ + const ColumnPtr & col_code = non_const_arguments[1].column; \ + Columns tuple_columns(nd); \ + EXTRACT_VECTOR(0) \ + if (nd == 1) \ + { \ + if (mask) \ + { \ + for (size_t i = 0; i < input_rows_count; i++) \ + { \ + vec0[i] = shrink(mask->getColumn(0).getUInt(0), col_code->getUInt(i)); \ + } \ + tuple_columns[0] = std::move(col0); \ + } \ + else \ + { \ + for (size_t i = 0; i < input_rows_count; i++) \ + { \ + vec0[i] = col_code->getUInt(i); \ + } \ + tuple_columns[0] = std::move(col0); \ + } \ + return ColumnTuple::create(tuple_columns); \ + } \ + EXTRACT_VECTOR(1) \ + DECODE(2, \ + vec0[i] = MASK(0, res); \ + vec1[i] = MASK(1, res);) \ + EXTRACT_VECTOR(2) \ + DECODE(3, \ + vec0[i] = MASK(0, res); \ + vec1[i] = MASK(1, res); \ + vec2[i] = MASK(2, res);) \ + EXTRACT_VECTOR(3) \ + DECODE(4, \ + vec0[i] = MASK(0, res); \ + vec1[i] = MASK(1, res); \ + vec2[i] = MASK(2, res); \ + vec3[i] = MASK(3, res);) \ + EXTRACT_VECTOR(4) \ + DECODE(5, \ + vec0[i] = MASK(0, res); \ + vec1[i] = MASK(1, res); \ + vec2[i] = MASK(2, res); \ + vec3[i] = MASK(3, res); \ + vec4[i] = MASK(4, res);) \ + EXTRACT_VECTOR(5) \ + DECODE(6, \ + vec0[i] = MASK(0, res); \ + vec1[i] = MASK(1, res); \ + vec2[i] = MASK(2, res); \ + vec3[i] = MASK(3, res); \ + vec4[i] = MASK(4, res); \ + vec5[i] = MASK(5, res);) \ + EXTRACT_VECTOR(6) \ + DECODE(7, \ + vec0[i] = MASK(0, res); \ + vec1[i] = MASK(1, res); \ + vec2[i] = MASK(2, res); \ + vec3[i] = MASK(3, res); \ + vec4[i] = MASK(4, res); \ + vec5[i] = MASK(5, res); \ + vec6[i] = MASK(6, res);) \ + EXTRACT_VECTOR(7) \ + DECODE(8, \ + vec0[i] = MASK(0, res); \ + vec1[i] = MASK(1, res); \ + vec2[i] = MASK(2, res); \ + vec3[i] = MASK(3, res); \ + vec4[i] = MASK(4, res); \ + vec5[i] = MASK(5, res); \ + vec6[i] = MASK(6, res); \ + vec7[i] = MASK(7, res);) \ + switch (nd) \ + { \ + case 2: \ + tuple_columns[0] = std::move(col0); \ + tuple_columns[1] = std::move(col1); \ + break; \ + case 3: \ + tuple_columns[0] = std::move(col0); \ + tuple_columns[1] = std::move(col1); \ + tuple_columns[2] = std::move(col2); \ + return ColumnTuple::create(tuple_columns); \ + case 4: \ + tuple_columns[0] = std::move(col0); \ + tuple_columns[1] = std::move(col1); \ + tuple_columns[2] = std::move(col2); \ + tuple_columns[3] = std::move(col3); \ + return ColumnTuple::create(tuple_columns); \ + case 5: \ + tuple_columns[0] = std::move(col0); \ + tuple_columns[1] = std::move(col1); \ + tuple_columns[2] = std::move(col2); \ + tuple_columns[3] = std::move(col3); \ + tuple_columns[4] = std::move(col4); \ + return ColumnTuple::create(tuple_columns); \ + case 6: \ + tuple_columns[0] = std::move(col0); \ + tuple_columns[1] = std::move(col1); \ + tuple_columns[2] = std::move(col2); \ + tuple_columns[3] = std::move(col3); \ + tuple_columns[4] = std::move(col4); \ + tuple_columns[5] = std::move(col5); \ + return ColumnTuple::create(tuple_columns); \ + case 7: \ + tuple_columns[0] = std::move(col0); \ + tuple_columns[1] = std::move(col1); \ + tuple_columns[2] = std::move(col2); \ + tuple_columns[3] = std::move(col3); \ + tuple_columns[4] = std::move(col4); \ + tuple_columns[5] = std::move(col5); \ + tuple_columns[6] = std::move(col6); \ + return ColumnTuple::create(tuple_columns); \ + case 8: \ + tuple_columns[0] = std::move(col0); \ + tuple_columns[1] = std::move(col1); \ + tuple_columns[2] = std::move(col2); \ + tuple_columns[3] = std::move(col3); \ + tuple_columns[4] = std::move(col4); \ + tuple_columns[5] = std::move(col5); \ + tuple_columns[6] = std::move(col6); \ + tuple_columns[7] = std::move(col7); \ + return ColumnTuple::create(tuple_columns); \ + } \ + return ColumnTuple::create(tuple_columns); + +DECLARE_DEFAULT_CODE( +constexpr auto MortonND_2D_Dec = mortonnd::MortonNDLutDecoder<2, 32, 8>(); +constexpr auto MortonND_3D_Dec = mortonnd::MortonNDLutDecoder<3, 21, 8>(); +constexpr auto MortonND_4D_Dec = mortonnd::MortonNDLutDecoder<4, 16, 8>(); +constexpr auto MortonND_5D_Dec = mortonnd::MortonNDLutDecoder<5, 12, 8>(); +constexpr auto MortonND_6D_Dec = mortonnd::MortonNDLutDecoder<6, 10, 8>(); +constexpr auto MortonND_7D_Dec = mortonnd::MortonNDLutDecoder<7, 9, 8>(); +constexpr auto MortonND_8D_Dec = mortonnd::MortonNDLutDecoder<8, 8, 8>(); +class FunctionMortonDecode : public IFunction +{ +public: + static constexpr auto name = "mortonDecode"; + static FunctionPtr create(ContextPtr) + { + return std::make_shared(); + } + + String getName() const override + { + return name; + } + + size_t getNumberOfArguments() const override + { + return 2; + } + + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; } + + ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {0}; } + + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override + { + UInt64 tuple_size = 0; + const auto * col_const = typeid_cast(arguments[0].column.get()); + if (!col_const) + throw Exception(ErrorCodes::ILLEGAL_COLUMN, + "Illegal column type {} of function {}, should be a constant (UInt or Tuple)", + arguments[0].type->getName(), getName()); + if (!WhichDataType(arguments[1].type).isNativeUInt()) + throw Exception(ErrorCodes::ILLEGAL_COLUMN, + "Illegal column type {} of function {}, should be a native UInt", + arguments[1].type->getName(), getName()); + const auto * mask = typeid_cast(col_const->getDataColumnPtr().get()); + if (mask) + { + tuple_size = mask->tupleSize(); + } + else if (WhichDataType(arguments[0].type).isNativeUInt()) + { + tuple_size = col_const->getUInt(0); + } + else + throw Exception(ErrorCodes::ILLEGAL_COLUMN, + "Illegal column type {} of function {}, should be UInt or Tuple", + arguments[0].type->getName(), getName()); + if (tuple_size > 8 || tuple_size < 1) + throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, + "Illegal first argument for function {}, should be a number in range 1-8 or a Tuple of such size", + getName()); + if (mask) + { + const auto * type_tuple = typeid_cast(arguments[0].type.get()); + for (size_t i = 0; i < tuple_size; i++) + { + if (!WhichDataType(type_tuple->getElement(i)).isNativeUInt()) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of argument in tuple for function {}, should be a native UInt", + type_tuple->getElement(i)->getName(), getName()); + auto ratio = mask->getColumn(i).getUInt(0); + if (ratio > 8 || ratio < 1) + throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, + "Illegal argument {} in tuple for function {}, should be a number in range 1-8", + ratio, getName()); + } + } + DataTypes types(tuple_size); + for (size_t i = 0; i < tuple_size; i++) + { + types[i] = std::make_shared(); + } + return std::make_shared(types); + } + + static UInt64 shrink(UInt64 ratio, UInt64 value) + { + switch (ratio) + { + case 1: + return value; + case 2: + return std::get<1>(MortonND_2D_Dec.Decode(value)); + case 3: + return std::get<2>(MortonND_3D_Dec.Decode(value)); + case 4: + return std::get<3>(MortonND_4D_Dec.Decode(value)); + case 5: + return std::get<4>(MortonND_5D_Dec.Decode(value)); + case 6: + return std::get<5>(MortonND_6D_Dec.Decode(value)); + case 7: + return std::get<6>(MortonND_7D_Dec.Decode(value)); + case 8: + return std::get<7>(MortonND_8D_Dec.Decode(value)); + } + return value; + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override + { + EXECUTE() + } +}; +) // DECLARE_DEFAULT_CODE + +#if defined(MORTON_ND_BMI2_ENABLED) +#undef DECODE +#define DECODE(ND, ...) \ + if (nd == (ND)) \ + { \ + for (size_t i = 0; i < input_rows_count; i++) \ + { \ + auto res = MortonND_##ND##D::Decode(col_code->getUInt(i)); \ + __VA_ARGS__ \ + } \ + } + +DECLARE_AVX2_SPECIFIC_CODE( +using MortonND_2D = mortonnd::MortonNDBmi<2, uint64_t>; +using MortonND_3D = mortonnd::MortonNDBmi<3, uint64_t>; +using MortonND_4D = mortonnd::MortonNDBmi<4, uint64_t>; +using MortonND_5D = mortonnd::MortonNDBmi<5, uint64_t>; +using MortonND_6D = mortonnd::MortonNDBmi<6, uint64_t>; +using MortonND_7D = mortonnd::MortonNDBmi<7, uint64_t>; +using MortonND_8D = mortonnd::MortonNDBmi<8, uint64_t>; +class FunctionMortonDecode: public TargetSpecific::Default::FunctionMortonDecode +{ + static UInt64 shrink(UInt64 ratio, UInt64 value) + { + switch (ratio) + { + case 1: + return value; + case 2: + return std::get<1>(MortonND_2D::Decode(value)); + case 3: + return std::get<2>(MortonND_3D::Decode(value)); + case 4: + return std::get<3>(MortonND_4D::Decode(value)); + case 5: + return std::get<4>(MortonND_5D::Decode(value)); + case 6: + return std::get<5>(MortonND_6D::Decode(value)); + case 7: + return std::get<6>(MortonND_7D::Decode(value)); + case 8: + return std::get<7>(MortonND_8D::Decode(value)); + } + return value; + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override + { + EXECUTE() + } +}; +) +#endif // MORTON_ND_BMI2_ENABLED + +#undef DECODE +#undef MASK +#undef EXTRACT_VECTOR +#undef EXECUTE + +class FunctionMortonDecode: public TargetSpecific::Default::FunctionMortonDecode +{ +public: + explicit FunctionMortonDecode(ContextPtr context) : selector(context) + { + selector.registerImplementation(); + +#if USE_MULTITARGET_CODE && defined(MORTON_ND_BMI2_ENABLED) + selector.registerImplementation(); +#endif + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override + { + return selector.selectAndExecute(arguments, result_type, input_rows_count); + } + + static FunctionPtr create(ContextPtr context) + { + return std::make_shared(context); + } + +private: + ImplementationSelector selector; +}; + +REGISTER_FUNCTION(MortonDecode) +{ + factory.registerFunction({ + R"( +Decodes a Morton encoding (ZCurve) into the corresponding unsigned integer tuple + +The function has two modes of operation: +- Simple +- Expanded + +Simple: accepts a resulting tuple size as a first argument and the code as a second argument. +[example:simple] +Will decode into: `(1,2,3,4)` +The resulting tuple size cannot be more than 8 + +Expanded: accepts a range mask (tuple) as a first argument and the code as a second argument. +Each number in mask configures the amount of range shrink +1 - no shrink +2 - 2x shrink +3 - 3x shrink +.... +Up to 8x shrink. +[example:range_shrank] +Note: see mortonEncode() docs on why range change might be beneficial. +Still limited to 8 numbers at most. + +Morton code for one argument is always the argument itself (as a tuple). +[example:identity] +Produces: `(1)` + +You can shrink one argument too: +[example:identity_shrank] +Produces: `(128)` + +The function accepts a column of codes as a second argument: +[example:from_table] + +The range tuple must be a constant: +[example:from_table_range] +)", + Documentation::Examples{ + {"simple", "SELECT mortonDecode(4, 2149)"}, + {"range_shrank", "SELECT mortonDecode((1,2), 1572864)"}, + {"identity", "SELECT mortonDecode(1, 1)"}, + {"identity_shrank", "SELECT mortonDecode(tuple(2), 32768)"}, + {"from_table", "SELECT mortonDecode(2, code) FROM table"}, + {"from_table_range", "SELECT mortonDecode((1,2), code) FROM table"}, + }, + Documentation::Categories {"ZCurve", "Morton coding"} + }); +} + +} diff --git a/src/Functions/mortonEncode.cpp b/src/Functions/mortonEncode.cpp new file mode 100644 index 00000000000..4bdd237fa9c --- /dev/null +++ b/src/Functions/mortonEncode.cpp @@ -0,0 +1,393 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#if USE_MULTITARGET_CODE && defined(__BMI2__) +#include +#endif + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int ARGUMENT_OUT_OF_BOUND; + extern const int TOO_FEW_ARGUMENTS_FOR_FUNCTION; +} + +#define EXTRACT_VECTOR(INDEX) \ + const ColumnPtr & col##INDEX = non_const_arguments[(INDEX) + vectorStartIndex].column; + +#define ENCODE(ND, ...) \ + if (nd == (ND)) \ + { \ + for (size_t i = 0; i < input_rows_count; i++) \ + { \ + vec_res[i] = MortonND_##ND##D_Enc.Encode(__VA_ARGS__); \ + } \ + return col_res; \ + } + +#define EXPAND(IDX, ...) \ + (mask) ? expand(mask->getColumn(IDX).getUInt(0), __VA_ARGS__) : __VA_ARGS__ + +#define MASK(ND, IDX, ...) \ + (EXPAND(IDX, __VA_ARGS__) & MortonND_##ND##D_Enc.InputMask()) + +#define EXECUTE() \ + size_t nd = arguments.size(); \ + size_t vectorStartIndex = 0; \ + const auto * const_col = typeid_cast(arguments[0].column.get()); \ + const ColumnTuple * mask; \ + if (const_col) \ + mask = typeid_cast(const_col->getDataColumnPtr().get()); \ + else \ + mask = typeid_cast(arguments[0].column.get()); \ + if (mask) \ + { \ + nd = mask->tupleSize(); \ + vectorStartIndex = 1; \ + for (size_t i = 0; i < nd; i++) \ + { \ + auto ratio = mask->getColumn(i).getUInt(0); \ + if (ratio > 8 || ratio < 1) \ + throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, \ + "Illegal argument {} of function {}, should be a number in range 1-8", \ + arguments[0].column->getName(), getName()); \ + } \ + } \ + \ + auto non_const_arguments = arguments; \ + for (auto & argument : non_const_arguments) \ + argument.column = argument.column->convertToFullColumnIfConst(); \ + \ + auto col_res = ColumnUInt64::create(); \ + ColumnUInt64::Container & vec_res = col_res->getData(); \ + vec_res.resize(input_rows_count); \ + \ + EXTRACT_VECTOR(0) \ + if (nd == 1) \ + { \ + for (size_t i = 0; i < input_rows_count; i++) \ + { \ + vec_res[i] = EXPAND(0, col0->getUInt(i)); \ + } \ + return col_res; \ + } \ + \ + EXTRACT_VECTOR(1) \ + ENCODE(2, \ + MASK(2, 0, col0->getUInt(i)), \ + MASK(2, 1, col1->getUInt(i))) \ + EXTRACT_VECTOR(2) \ + ENCODE(3, \ + MASK(3, 0, col0->getUInt(i)), \ + MASK(3, 1, col1->getUInt(i)), \ + MASK(3, 2, col2->getUInt(i))) \ + EXTRACT_VECTOR(3) \ + ENCODE(4, \ + MASK(4, 0, col0->getUInt(i)), \ + MASK(4, 1, col1->getUInt(i)), \ + MASK(4, 2, col2->getUInt(i)), \ + MASK(4, 3, col3->getUInt(i))) \ + EXTRACT_VECTOR(4) \ + ENCODE(5, \ + MASK(5, 0, col0->getUInt(i)), \ + MASK(5, 1, col1->getUInt(i)), \ + MASK(5, 2, col2->getUInt(i)), \ + MASK(5, 3, col3->getUInt(i)), \ + MASK(5, 4, col4->getUInt(i))) \ + EXTRACT_VECTOR(5) \ + ENCODE(6, \ + MASK(6, 0, col0->getUInt(i)), \ + MASK(6, 1, col1->getUInt(i)), \ + MASK(6, 2, col2->getUInt(i)), \ + MASK(6, 3, col3->getUInt(i)), \ + MASK(6, 4, col4->getUInt(i)), \ + MASK(6, 5, col5->getUInt(i))) \ + EXTRACT_VECTOR(6) \ + ENCODE(7, \ + MASK(7, 0, col0->getUInt(i)), \ + MASK(7, 1, col1->getUInt(i)), \ + MASK(7, 2, col2->getUInt(i)), \ + MASK(7, 3, col3->getUInt(i)), \ + MASK(7, 4, col4->getUInt(i)), \ + MASK(7, 5, col5->getUInt(i)), \ + MASK(7, 6, col6->getUInt(i))) \ + EXTRACT_VECTOR(7) \ + ENCODE(8, \ + MASK(8, 0, col0->getUInt(i)), \ + MASK(8, 1, col1->getUInt(i)), \ + MASK(8, 2, col2->getUInt(i)), \ + MASK(8, 3, col3->getUInt(i)), \ + MASK(8, 4, col4->getUInt(i)), \ + MASK(8, 5, col5->getUInt(i)), \ + MASK(8, 6, col6->getUInt(i)), \ + MASK(8, 7, col7->getUInt(i))) \ + \ + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, \ + "Illegal number of UInt arguments of function {}, max: 8", \ + getName()); \ + +DECLARE_DEFAULT_CODE( +constexpr auto MortonND_2D_Enc = mortonnd::MortonNDLutEncoder<2, 32, 8>(); +constexpr auto MortonND_3D_Enc = mortonnd::MortonNDLutEncoder<3, 21, 8>(); +constexpr auto MortonND_4D_Enc = mortonnd::MortonNDLutEncoder<4, 16, 8>(); +constexpr auto MortonND_5D_Enc = mortonnd::MortonNDLutEncoder<5, 12, 8>(); +constexpr auto MortonND_6D_Enc = mortonnd::MortonNDLutEncoder<6, 10, 8>(); +constexpr auto MortonND_7D_Enc = mortonnd::MortonNDLutEncoder<7, 9, 8>(); +constexpr auto MortonND_8D_Enc = mortonnd::MortonNDLutEncoder<8, 8, 8>(); +class FunctionMortonEncode : public IFunction +{ +public: + static constexpr auto name = "mortonEncode"; + static FunctionPtr create(ContextPtr) + { + return std::make_shared(); + } + + String getName() const override + { + return name; + } + + bool isVariadic() const override + { + return true; + } + + size_t getNumberOfArguments() const override + { + return 0; + } + + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; } + + bool useDefaultImplementationForConstants() const override { return true; } + + DataTypePtr getReturnTypeImpl(const DB::DataTypes & arguments) const override + { + size_t vectorStartIndex = 0; + if (arguments.empty()) + throw Exception(ErrorCodes::TOO_FEW_ARGUMENTS_FOR_FUNCTION, + "At least one UInt argument is required for function {}", + getName()); + if (WhichDataType(arguments[0]).isTuple()) + { + vectorStartIndex = 1; + const auto * type_tuple = typeid_cast(arguments[0].get()); + auto tuple_size = type_tuple->getElements().size(); + if (tuple_size != (arguments.size() - 1)) + throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, + "Illegal argument {} for function {}, tuple size should be equal to number of UInt arguments", + arguments[0]->getName(), getName()); + for (size_t i = 0; i < tuple_size; i++) + { + if (!WhichDataType(type_tuple->getElement(i)).isNativeUInt()) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of argument in tuple for function {}, should be a native UInt", + type_tuple->getElement(i)->getName(), getName()); + } + } + + for (size_t i = vectorStartIndex; i < arguments.size(); i++) + { + const auto & arg = arguments[i]; + if (!WhichDataType(arg).isNativeUInt()) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of argument of function {}, should be a native UInt", + arg->getName(), getName()); + } + return std::make_shared(); + } + + static UInt64 expand(UInt64 ratio, UInt64 value) + { + switch (ratio) + { + case 1: + return value; + case 2: + return MortonND_2D_Enc.Encode(0, value & MortonND_2D_Enc.InputMask()); + case 3: + return MortonND_3D_Enc.Encode(0, 0, value & MortonND_3D_Enc.InputMask()); + case 4: + return MortonND_4D_Enc.Encode(0, 0, 0, value & MortonND_4D_Enc.InputMask()); + case 5: + return MortonND_5D_Enc.Encode(0, 0, 0, 0, value & MortonND_5D_Enc.InputMask()); + case 6: + return MortonND_6D_Enc.Encode(0, 0, 0, 0, 0, value & MortonND_6D_Enc.InputMask()); + case 7: + return MortonND_7D_Enc.Encode(0, 0, 0, 0, 0, 0, value & MortonND_7D_Enc.InputMask()); + case 8: + return MortonND_8D_Enc.Encode(0, 0, 0, 0, 0, 0, 0, value & MortonND_8D_Enc.InputMask()); + } + return value; + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override + { + EXECUTE() + } +}; +) // DECLARE_DEFAULT_CODE + +#if defined(MORTON_ND_BMI2_ENABLED) +#undef ENCODE +#define ENCODE(ND, ...) \ + if (nd == (ND)) \ + { \ + for (size_t i = 0; i < input_rows_count; i++) \ + { \ + vec_res[i] = MortonND_##ND##D::Encode(__VA_ARGS__); \ + } \ + return col_res; \ + } + +#undef MASK +#define MASK(ND, IDX, ...) \ + (EXPAND(IDX, __VA_ARGS__)) + +DECLARE_AVX2_SPECIFIC_CODE( +using MortonND_2D = mortonnd::MortonNDBmi<2, uint64_t>; +using MortonND_3D = mortonnd::MortonNDBmi<3, uint64_t>; +using MortonND_4D = mortonnd::MortonNDBmi<4, uint64_t>; +using MortonND_5D = mortonnd::MortonNDBmi<5, uint64_t>; +using MortonND_6D = mortonnd::MortonNDBmi<6, uint64_t>; +using MortonND_7D = mortonnd::MortonNDBmi<7, uint64_t>; +using MortonND_8D = mortonnd::MortonNDBmi<8, uint64_t>; + +class FunctionMortonEncode : public TargetSpecific::Default::FunctionMortonEncode +{ +public: + static UInt64 expand(UInt64 ratio, UInt64 value) + { + switch (ratio) + { + case 1: + return value; + case 2: + return MortonND_2D::Encode(0, value); + case 3: + return MortonND_3D::Encode(0, 0, value); + case 4: + return MortonND_4D::Encode(0, 0, 0, value); + case 5: + return MortonND_5D::Encode(0, 0, 0, 0, value); + case 6: + return MortonND_6D::Encode(0, 0, 0, 0, 0, value); + case 7: + return MortonND_7D::Encode(0, 0, 0, 0, 0, 0, value); + case 8: + return MortonND_8D::Encode(0, 0, 0, 0, 0, 0, 0, value); + } + return value; + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override + { + EXECUTE() + } +}; +) // DECLARE_AVX2_SPECIFIC_CODE +#endif // MORTON_ND_BMI2_ENABLED + +#undef ENCODE +#undef MASK +#undef EXTRACT_VECTOR +#undef EXPAND +#undef EXECUTE + +class FunctionMortonEncode: public TargetSpecific::Default::FunctionMortonEncode +{ +public: + explicit FunctionMortonEncode(ContextPtr context) : selector(context) + { + selector.registerImplementation(); + +#if USE_MULTITARGET_CODE && defined(MORTON_ND_BMI2_ENABLED) + selector.registerImplementation(); +#endif + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override + { + return selector.selectAndExecute(arguments, result_type, input_rows_count); + } + + static FunctionPtr create(ContextPtr context) + { + return std::make_shared(context); + } + +private: + ImplementationSelector selector; +}; + +REGISTER_FUNCTION(MortonEncode) +{ + factory.registerFunction({ + R"( +Calculates Morton encoding (ZCurve) for a list of unsigned integers + +The function has two modes of operation: +- Simple +- Expanded + +Simple: accepts up to 8 unsigned integers as arguments and produces a UInt64 code. +[example:simple] + +Expanded: accepts a range mask (tuple) as a first argument and up to 8 unsigned integers as other arguments. +Each number in mask configures the amount of range expansion +1 - no expansion +2 - 2x expansion +3 - 3x expansion +.... +Up to 8x expansion. +[example:range_expanded] +Note: tuple size must be equal to the number of the other arguments + +Range expansion can be beneficial when you need a similar distribution for arguments with wildly different ranges (or cardinality) +For example: 'IP Address' (0...FFFFFFFF) and 'Country code' (0...FF) + +Morton encoding for one argument is always the argument itself. +[example:identity] +Produces: `1` + +You can expand one argument too: +[example:identity_expanded] +Produces: `32768` + +The function also accepts columns as arguments: +[example:from_table] + +But the range tuple must still be a constant: +[example:from_table_range] + +Please note that you can fit only so much bits of information into Morton code as UInt64 has. +Two arguments will have a range of maximum 2^32 (64/2) each +Three arguments: range of max 2^21 (64/3) each +And so on, all overflow will be clamped to zero +)", + Documentation::Examples{ + {"simple", "SELECT mortonEncode(1, 2, 3)"}, + {"range_expanded", "SELECT mortonEncode((1,2), 1024, 16)"}, + {"identity", "SELECT mortonEncode(1)"}, + {"identity_expanded", "SELECT mortonEncode(tuple(2), 128)"}, + {"from_table", "SELECT mortonEncode(n1, n2) FROM table"}, + {"from_table_range", "SELECT mortonEncode((1,2), n1, n2) FROM table"}, + }, + Documentation::Categories {"ZCurve", "Morton coding"} + }); +} + +} diff --git a/src/Functions/now64.cpp b/src/Functions/now64.cpp index c5225d3317f..a6df4235d60 100644 --- a/src/Functions/now64.cpp +++ b/src/Functions/now64.cpp @@ -130,7 +130,7 @@ public: ". Expected const integer.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - scale = argument.column->get64(0); + scale = static_cast(argument.column->get64(0)); } if (arguments.size() == 2) { diff --git a/src/Functions/nowInBlock.cpp b/src/Functions/nowInBlock.cpp index db72e791587..b1764590fda 100644 --- a/src/Functions/nowInBlock.cpp +++ b/src/Functions/nowInBlock.cpp @@ -2,7 +2,7 @@ #include #include #include -#include +#include namespace DB @@ -74,7 +74,7 @@ public: ColumnPtr executeImpl(const ColumnsWithTypeAndName &, const DataTypePtr &, size_t input_rows_count) const override { - return ColumnUInt32::create(input_rows_count, time(nullptr)); + return ColumnDateTime::create(input_rows_count, static_cast(time(nullptr))); } }; diff --git a/src/Functions/plus.cpp b/src/Functions/plus.cpp index 4b81c23584c..cd9cf6cec5c 100644 --- a/src/Functions/plus.cpp +++ b/src/Functions/plus.cpp @@ -25,7 +25,7 @@ struct PlusImpl return static_cast(static_cast(a)) + static_cast(static_cast(b)); } else - return static_cast(a) + b; + return static_cast(a) + static_cast(b); } /// Apply operation and check overflow. It's used for Deciamal operations. @returns true if overflowed, false otherwise. diff --git a/src/Functions/pointInEllipses.cpp b/src/Functions/pointInEllipses.cpp index f69886ad71f..07b7f013cac 100644 --- a/src/Functions/pointInEllipses.cpp +++ b/src/Functions/pointInEllipses.cpp @@ -102,7 +102,7 @@ private: Float64 ellipse_data[4]; for (const auto idx : collections::range(0, 4)) { - int arg_idx = 2 + 4 * ellipse_idx + idx; + size_t arg_idx = 2 + 4 * ellipse_idx + idx; const auto * column = arguments[arg_idx].column.get(); if (const auto * col = checkAndGetColumnConst>(column)) { diff --git a/src/Functions/randDistribution.cpp b/src/Functions/randDistribution.cpp new file mode 100644 index 00000000000..94dad4fdc89 --- /dev/null +++ b/src/Functions/randDistribution.cpp @@ -0,0 +1,472 @@ +#include +#include +#include +#include "Common/Exception.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int ILLEGAL_COLUMN; + extern const int BAD_ARGUMENTS; + extern const int LOGICAL_ERROR; +} + +namespace +{ +struct UniformDistribution +{ + using ReturnType = DataTypeFloat64; + static constexpr const char * getName() { return "randUniform"; } + static constexpr size_t getNumberOfArguments() { return 2; } + + static void generate(Float64 min, Float64 max, ColumnFloat64::Container & container) + { + auto distribution = std::uniform_real_distribution<>(min, max); + for (auto & elem : container) + elem = distribution(thread_local_rng); + } +}; + +struct NormalDistribution +{ + using ReturnType = DataTypeFloat64; + static constexpr const char * getName() { return "randNormal"; } + static constexpr size_t getNumberOfArguments() { return 2; } + + static void generate(Float64 mean, Float64 variance, ColumnFloat64::Container & container) + { + auto distribution = std::normal_distribution<>(mean, variance); + for (auto & elem : container) + elem = distribution(thread_local_rng); + } +}; + +struct LogNormalDistribution +{ + using ReturnType = DataTypeFloat64; + static constexpr const char * getName() { return "randLogNormal"; } + static constexpr size_t getNumberOfArguments() { return 2; } + + static void generate(Float64 mean, Float64 variance, ColumnFloat64::Container & container) + { + auto distribution = std::lognormal_distribution<>(mean, variance); + for (auto & elem : container) + elem = distribution(thread_local_rng); + } +}; + +struct ExponentialDistribution +{ + using ReturnType = DataTypeFloat64; + static constexpr const char * getName() { return "randExponential"; } + static constexpr size_t getNumberOfArguments() { return 1; } + + static void generate(Float64 lambda, ColumnFloat64::Container & container) + { + auto distribution = std::exponential_distribution<>(lambda); + for (auto & elem : container) + elem = distribution(thread_local_rng); + } +}; + +struct ChiSquaredDistribution +{ + using ReturnType = DataTypeFloat64; + static constexpr const char * getName() { return "randChiSquared"; } + static constexpr size_t getNumberOfArguments() { return 1; } + + static void generate(Float64 degree_of_freedom, ColumnFloat64::Container & container) + { + auto distribution = std::chi_squared_distribution<>(degree_of_freedom); + for (auto & elem : container) + elem = distribution(thread_local_rng); + } +}; + +struct StudentTDistribution +{ + using ReturnType = DataTypeFloat64; + static constexpr const char * getName() { return "randStudentT"; } + static constexpr size_t getNumberOfArguments() { return 1; } + + static void generate(Float64 degree_of_freedom, ColumnFloat64::Container & container) + { + auto distribution = std::student_t_distribution<>(degree_of_freedom); + for (auto & elem : container) + elem = distribution(thread_local_rng); + } +}; + +struct FisherFDistribution +{ + using ReturnType = DataTypeFloat64; + static constexpr const char * getName() { return "randFisherF"; } + static constexpr size_t getNumberOfArguments() { return 2; } + + static void generate(Float64 d1, Float64 d2, ColumnFloat64::Container & container) + { + auto distribution = std::fisher_f_distribution<>(d1, d2); + for (auto & elem : container) + elem = distribution(thread_local_rng); + } +}; + +struct BernoulliDistribution +{ + using ReturnType = DataTypeUInt8; + static constexpr const char * getName() { return "randBernoulli"; } + static constexpr size_t getNumberOfArguments() { return 1; } + + static void generate(Float64 p, ColumnUInt8::Container & container) + { + if (p < 0.0f || p > 1.0f) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Argument of function {} should be inside [0, 1] because it is a probability", getName()); + + auto distribution = std::bernoulli_distribution(p); + for (auto & elem : container) + elem = static_cast(distribution(thread_local_rng)); + } +}; + +struct BinomialDistribution +{ + using ReturnType = DataTypeUInt64; + static constexpr const char * getName() { return "randBinomial"; } + static constexpr size_t getNumberOfArguments() { return 2; } + + static void generate(UInt64 t, Float64 p, ColumnUInt64::Container & container) + { + if (p < 0.0f || p > 1.0f) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Argument of function {} should be inside [0, 1] because it is a probability", getName()); + + auto distribution = std::binomial_distribution(t, p); + for (auto & elem : container) + elem = static_cast(distribution(thread_local_rng)); + } +}; + +struct NegativeBinomialDistribution +{ + using ReturnType = DataTypeUInt64; + static constexpr const char * getName() { return "randNegativeBinomial"; } + static constexpr size_t getNumberOfArguments() { return 2; } + + static void generate(UInt64 t, Float64 p, ColumnUInt64::Container & container) + { + if (p < 0.0f || p > 1.0f) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Argument of function {} should be inside [0, 1] because it is a probability", getName()); + + auto distribution = std::negative_binomial_distribution(t, p); + for (auto & elem : container) + elem = static_cast(distribution(thread_local_rng)); + } +}; + +struct PoissonDistribution +{ + using ReturnType = DataTypeUInt64; + static constexpr const char * getName() { return "randPoisson"; } + static constexpr size_t getNumberOfArguments() { return 1; } + + static void generate(UInt64 n, ColumnUInt64::Container & container) + { + auto distribution = std::poisson_distribution(n); + for (auto & elem : container) + elem = static_cast(distribution(thread_local_rng)); + } +}; + +} + +/** Function which will generate values according to the specified distribution + * Accepts only constant arguments + * Similar to the functions rand and rand64 an additional 'tag' argument could be added to the + * end of arguments list (this argument will be ignored) which will guarantee that functions are not sticked together + * during optimisations. + * Example: SELECT randNormal(0, 1, 1), randNormal(0, 1, 2) FROM numbers(10) + * This query will return two different columns + */ +template +class FunctionRandomDistribution : public IFunction +{ +private: + + template + ResultType getParameterFromConstColumn(size_t parameter_number, const ColumnsWithTypeAndName & arguments) const + { + if (parameter_number >= arguments.size()) + throw Exception( + ErrorCodes::LOGICAL_ERROR, "Parameter number ({}) is greater than the size of arguments ({}). This is a bug", parameter_number, arguments.size()); + + const IColumn * col = arguments[parameter_number].column.get(); + + if (!isColumnConst(*col)) + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Parameter number {} of function must be constant.", parameter_number, getName()); + + auto parameter = applyVisitor(FieldVisitorConvertToNumber(), assert_cast(*col).getField()); + + if (isNaN(parameter) || !std::isfinite(parameter)) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Parameter number {} of function {} cannot be NaN of infinite", parameter_number, getName()); + + return parameter; + } + +public: + static FunctionPtr create(ContextPtr) + { + return std::make_shared>(); + } + + static constexpr auto name = Distribution::getName(); + String getName() const override { return name; } + size_t getNumberOfArguments() const override { return Distribution::getNumberOfArguments(); } + bool isVariadic() const override { return true; } + bool isDeterministic() const override { return false; } + bool isDeterministicInScopeOfQuery() const override { return false; } + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; } + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + { + auto desired = Distribution::getNumberOfArguments(); + if (arguments.size() != desired && arguments.size() != desired + 1) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Wrong number of arguments for function {}. Should be {} or {}", getName(), desired, desired + 1); + + for (size_t i = 0; i < Distribution::getNumberOfArguments(); ++i) + { + const auto & type = arguments[i]; + WhichDataType which(type); + if (!which.isFloat() && !which.isNativeUInt()) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of argument of function {}, expected Float64 or integer", type->getName(), getName()); + } + + return std::make_shared(); + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & /*result_type*/, size_t input_rows_count) const override + { + if constexpr (std::is_same_v) + { + auto res_column = ColumnUInt8::create(input_rows_count); + auto & res_data = res_column->getData(); + Distribution::generate(getParameterFromConstColumn(0, arguments), res_data); + return res_column; + } + else if constexpr (std::is_same_v || std::is_same_v) + { + auto res_column = ColumnUInt64::create(input_rows_count); + auto & res_data = res_column->getData(); + Distribution::generate(getParameterFromConstColumn(0, arguments), getParameterFromConstColumn(1, arguments), res_data); + return res_column; + } + else if constexpr (std::is_same_v) + { + auto res_column = ColumnUInt64::create(input_rows_count); + auto & res_data = res_column->getData(); + Distribution::generate(getParameterFromConstColumn(0, arguments), res_data); + return res_column; + } + else + { + auto res_column = ColumnFloat64::create(input_rows_count); + auto & res_data = res_column->getData(); + if constexpr (Distribution::getNumberOfArguments() == 1) + { + Distribution::generate(getParameterFromConstColumn(0, arguments), res_data); + } + else if constexpr (Distribution::getNumberOfArguments() == 2) + { + Distribution::generate(getParameterFromConstColumn(0, arguments), getParameterFromConstColumn(1, arguments), res_data); + } + else + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "More than two argument specified for function {}", getName()); + } + + return res_column; + } + } +}; + + +REGISTER_FUNCTION(Distribution) +{ + factory.registerFunction>( + { + R"( +Returns a random number from the uniform distribution in the specified range. +Accepts two parameters - minimum bound and maximum bound. + +Typical usage: +[example:typical] +)", + Documentation::Examples{ + {"typical", "SELECT randUniform(0, 1) FROM numbers(100000);"}}, + Documentation::Categories{"Distribution"} + }); + + factory.registerFunction>( + { + R"( +Returns a random number from the normal distribution. +Accepts two parameters - mean and variance. + +Typical usage: +[example:typical] +)", + Documentation::Examples{ + {"typical", "SELECT randNormal(0, 5) FROM numbers(100000);"}}, + Documentation::Categories{"Distribution"} + }); + + + factory.registerFunction>( + { + R"( +Returns a random number from the lognormal distribution (a distribution of a random variable whose logarithm is normally distributed). +Accepts two parameters - mean and variance. + +Typical usage: +[example:typical] +)", + Documentation::Examples{ + {"typical", "SELECT randLogNormal(0, 5) FROM numbers(100000);"}}, + Documentation::Categories{"Distribution"} + }); + + + factory.registerFunction>( + { + R"( +Returns a random number from the exponential distribution. +Accepts one parameter. + +Typical usage: +[example:typical] +)", + Documentation::Examples{ + {"typical", "SELECT randExponential(0, 5) FROM numbers(100000);"}}, + Documentation::Categories{"Distribution"} + }); + + + factory.registerFunction>( + { + R"( +Returns a random number from the chi-squared distribution (a distribution of a sum of the squares of k independent standard normal random variables). +Accepts one parameter - degree of freedom. + +Typical usage: +[example:typical] +)", + Documentation::Examples{ + {"typical", "SELECT randChiSquared(5) FROM numbers(100000);"}}, + Documentation::Categories{"Distribution"} + }); + + factory.registerFunction>( + { + R"( +Returns a random number from the t-distribution. +Accepts one parameter - degree of freedom. + +Typical usage: +[example:typical] +)", + Documentation::Examples{ + {"typical", "SELECT randStudentT(5) FROM numbers(100000);"}}, + Documentation::Categories{"Distribution"} + }); + + + factory.registerFunction>( + { + R"( +Returns a random number from the f-distribution. +The F-distribution is the distribution of X = (S1 / d1) / (S2 / d2) where d1 and d2 are degrees of freedom. +Accepts two parameters - degrees of freedom. + +Typical usage: +[example:typical] +)", + Documentation::Examples{ + {"typical", "SELECT randFisherF(5) FROM numbers(100000);"}}, + Documentation::Categories{"Distribution"} + }); + + + factory.registerFunction>( + { + R"( +Returns a random number from the Bernoulli distribution. +Accepts two parameters - probability of success. + +Typical usage: +[example:typical] +)", + Documentation::Examples{ + {"typical", "SELECT randBernoulli(0.1) FROM numbers(100000);"}}, + Documentation::Categories{"Distribution"} + }); + + + factory.registerFunction>( + { + R"( +Returns a random number from the binomial distribution. +Accepts two parameters - number of experiments and probability of success in each experiment. + +Typical usage: +[example:typical] +)", + Documentation::Examples{ + {"typical", "SELECT randBinomial(10, 0.1) FROM numbers(100000);"}}, + Documentation::Categories{"Distribution"} + }); + + + factory.registerFunction>( + { + R"( +Returns a random number from the negative binomial distribution. +Accepts two parameters - number of experiments and probability of success in each experiment. + +Typical usage: +[example:typical] +)", + Documentation::Examples{ + {"typical", "SELECT randNegativeBinomial(10, 0.1) FROM numbers(100000);"}}, + Documentation::Categories{"Distribution"} + }); + + + factory.registerFunction>( + { + R"( +Returns a random number from the poisson distribution. +Accepts two parameters - the mean number of occurrences. + +Typical usage: +[example:typical] +)", + Documentation::Examples{ + {"typical", "SELECT randPoisson(3) FROM numbers(100000);"}}, + Documentation::Categories{"Distribution"} + }); +} + +} diff --git a/src/Functions/randomStringUTF8.cpp b/src/Functions/randomStringUTF8.cpp index 043db179d71..bcaa603b85d 100644 --- a/src/Functions/randomStringUTF8.cpp +++ b/src/Functions/randomStringUTF8.cpp @@ -116,8 +116,8 @@ public: { UInt64 rand = rng(); - UInt32 code_point1 = generate_code_point(rand); - UInt32 code_point2 = generate_code_point(rand >> 32); + UInt32 code_point1 = generate_code_point(static_cast(rand)); + UInt32 code_point2 = generate_code_point(static_cast(rand >> 32u)); /// We have padding in column buffers that we can overwrite. size_t length1 = UTF8::convertCodePointToUTF8(code_point1, pos, sizeof(int)); diff --git a/src/Functions/replaceAll.cpp b/src/Functions/replaceAll.cpp index 7c5cd82ca5d..d85d192d199 100644 --- a/src/Functions/replaceAll.cpp +++ b/src/Functions/replaceAll.cpp @@ -13,7 +13,7 @@ struct NameReplaceAll static constexpr auto name = "replaceAll"; }; -using FunctionReplaceAll = FunctionStringReplace, NameReplaceAll>; +using FunctionReplaceAll = FunctionStringReplace, NameReplaceAll>; } diff --git a/src/Functions/replaceOne.cpp b/src/Functions/replaceOne.cpp index c0c21dbf51f..6557339537e 100644 --- a/src/Functions/replaceOne.cpp +++ b/src/Functions/replaceOne.cpp @@ -13,7 +13,7 @@ struct NameReplaceOne static constexpr auto name = "replaceOne"; }; -using FunctionReplaceOne = FunctionStringReplace, NameReplaceOne>; +using FunctionReplaceOne = FunctionStringReplace, NameReplaceOne>; } diff --git a/src/Functions/replaceRegexpAll.cpp b/src/Functions/replaceRegexpAll.cpp index 0250b4a5ba6..4eaf46c05d4 100644 --- a/src/Functions/replaceRegexpAll.cpp +++ b/src/Functions/replaceRegexpAll.cpp @@ -13,7 +13,7 @@ struct NameReplaceRegexpAll static constexpr auto name = "replaceRegexpAll"; }; -using FunctionReplaceRegexpAll = FunctionStringReplace, NameReplaceRegexpAll>; +using FunctionReplaceRegexpAll = FunctionStringReplace, NameReplaceRegexpAll>; } diff --git a/src/Functions/replaceRegexpOne.cpp b/src/Functions/replaceRegexpOne.cpp index b40992b73fc..60e29213a9a 100644 --- a/src/Functions/replaceRegexpOne.cpp +++ b/src/Functions/replaceRegexpOne.cpp @@ -13,7 +13,7 @@ struct NameReplaceRegexpOne static constexpr auto name = "replaceRegexpOne"; }; -using FunctionReplaceRegexpOne = FunctionStringReplace, NameReplaceRegexpOne>; +using FunctionReplaceRegexpOne = FunctionStringReplace, NameReplaceRegexpOne>; } diff --git a/src/Functions/runningConcurrency.cpp b/src/Functions/runningConcurrency.cpp index 37fa11bce8f..c759476006f 100644 --- a/src/Functions/runningConcurrency.cpp +++ b/src/Functions/runningConcurrency.cpp @@ -43,6 +43,7 @@ namespace DB const typename ColVecArg::Container & vec_end = col_end->getData(); using ColVecConc = typename ConcurrencyDataType::ColumnType; + using FieldType = typename ConcurrencyDataType::FieldType; typename ColVecConc::MutablePtr col_concurrency = ColVecConc::create(input_rows_count); typename ColVecConc::Container & vec_concurrency = col_concurrency->getData(); @@ -74,7 +75,7 @@ namespace DB ongoing_until.erase( ongoing_until.begin(), ongoing_until.upper_bound(begin)); - vec_concurrency[i] = ongoing_until.size(); + vec_concurrency[i] = static_cast(ongoing_until.size()); } return col_concurrency; diff --git a/src/Functions/runningDifference.h b/src/Functions/runningDifference.h index f3caf245d08..053d7cb9736 100644 --- a/src/Functions/runningDifference.h +++ b/src/Functions/runningDifference.h @@ -117,7 +117,7 @@ private: else if (which.isDate()) f(DataTypeDate::FieldType()); else if (which.isDate32()) - f(DataTypeDate::FieldType()); + f(DataTypeDate32::FieldType()); else if (which.isDateTime()) f(DataTypeDateTime::FieldType()); else diff --git a/src/Functions/stem.cpp b/src/Functions/stem.cpp index 9c7ce895fce..91c98ec9b82 100644 --- a/src/Functions/stem.cpp +++ b/src/Functions/stem.cpp @@ -51,8 +51,8 @@ struct StemImpl /// Note that accessing -1th element is valid for PaddedPODArray. size_t original_size = offsets[i] - offsets[i - 1]; const sb_symbol * result = sb_stemmer_stem(stemmer, - reinterpret_cast(data.data() + offsets[i - 1]), - original_size - 1); + reinterpret_cast(data.data() + offsets[i - 1]), + static_cast(original_size - 1)); size_t new_size = sb_stemmer_length(stemmer) + 1; memcpy(res_data.data() + data_size, result, new_size); diff --git a/src/Functions/tests/gtest_has_all.cpp b/src/Functions/tests/gtest_has_all.cpp index ca7bc80b4aa..1776a461580 100644 --- a/src/Functions/tests/gtest_has_all.cpp +++ b/src/Functions/tests/gtest_has_all.cpp @@ -18,9 +18,9 @@ void arrayInit(T* elements_to_have, size_t nb_elements_to_have, T* array_element { for (size_t i = 0; i < array_size; ++i) { - array_elements[i] = i; + array_elements[i] = static_cast(i); } - auto [dist, gen] = uni_int_dist(0, array_size - 1); + auto [dist, gen] = uni_int_dist(0, static_cast(array_size - 1)); for (size_t i = 0; i < nb_elements_to_have; ++i) { elements_to_have[i] = array_elements[dist(gen)]; @@ -28,14 +28,14 @@ void arrayInit(T* elements_to_have, size_t nb_elements_to_have, T* array_element if (!all_elements_present) { /// make one element to be searched for missing from the target array - elements_to_have[nb_elements_to_have - 1] = array_size + 1; + elements_to_have[nb_elements_to_have - 1] = static_cast(array_size + 1); } } void nullMapInit(UInt8 * null_map, size_t null_map_size, size_t nb_null_elements) { /// -2 to keep the last element of the array non-null - auto [dist, gen] = uni_int_dist(0, null_map_size - 2); + auto [dist, gen] = uni_int_dist(0, static_cast(null_map_size - 2)); for (size_t i = 0; i < null_map_size; ++i) { null_map[i] = 0; diff --git a/src/Functions/throwIf.cpp b/src/Functions/throwIf.cpp index 692faf1883c..357c5e0651a 100644 --- a/src/Functions/throwIf.cpp +++ b/src/Functions/throwIf.cpp @@ -22,11 +22,6 @@ namespace ErrorCodes namespace { -/// The regex-based code style check script in CI complains when it sees "ErrorCodes:: ErrorCode" (space added to avoid another match). -/// Because this expression is only used in this file, don't add some suppression mechanism to the already complex style checker, instead -/// work around by creating a namespace alias. -namespace ErrorCodeAlias = ErrorCodes; - /// Throw an exception if the argument is non zero. class FunctionThrowIf : public IFunction { @@ -93,7 +88,7 @@ public: custom_message = message_column->getValue(); } - std::optional custom_error_code; + std::optional custom_error_code; if (allow_custom_error_code_argument && arguments.size() == 3) { if (!isColumnConst(*(arguments[2].column))) @@ -125,7 +120,7 @@ public: private: template - ColumnPtr execute(const IColumn * in_untyped, const std::optional & message, const std::optional & error_code) const + ColumnPtr execute(const IColumn * in_untyped, const std::optional & message, const std::optional & error_code) const { const auto * in = checkAndGetColumn>(in_untyped); diff --git a/src/Functions/timeSlots.cpp b/src/Functions/timeSlots.cpp index 949ca7bc0e4..72d6059e0a1 100644 --- a/src/Functions/timeSlots.cpp +++ b/src/Functions/timeSlots.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include @@ -19,6 +20,7 @@ namespace ErrorCodes extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; extern const int ILLEGAL_TYPE_OF_ARGUMENT; extern const int ILLEGAL_COLUMN; + extern const int BAD_ARGUMENTS; } namespace @@ -41,6 +43,9 @@ struct TimeSlotsImpl const PaddedPODArray & starts, const PaddedPODArray & durations, UInt32 time_slot_size, PaddedPODArray & result_values, ColumnArray::Offsets & result_offsets) { + if (time_slot_size == 0) + throw Exception("Time slot size cannot be zero", ErrorCodes::BAD_ARGUMENTS); + size_t size = starts.size(); result_offsets.resize(size); @@ -63,6 +68,9 @@ struct TimeSlotsImpl const PaddedPODArray & starts, UInt32 duration, UInt32 time_slot_size, PaddedPODArray & result_values, ColumnArray::Offsets & result_offsets) { + if (time_slot_size == 0) + throw Exception("Time slot size cannot be zero", ErrorCodes::BAD_ARGUMENTS); + size_t size = starts.size(); result_offsets.resize(size); @@ -85,6 +93,9 @@ struct TimeSlotsImpl UInt32 start, const PaddedPODArray & durations, UInt32 time_slot_size, PaddedPODArray & result_values, ColumnArray::Offsets & result_offsets) { + if (time_slot_size == 0) + throw Exception("Time slot size cannot be zero", ErrorCodes::BAD_ARGUMENTS); + size_t size = durations.size(); result_offsets.resize(size); @@ -125,6 +136,9 @@ struct TimeSlotsImpl ColumnArray::Offset current_offset = 0; time_slot_size = time_slot_size.value * ts_multiplier; + if (time_slot_size == 0) + throw Exception("Time slot size cannot be zero", ErrorCodes::BAD_ARGUMENTS); + for (size_t i = 0; i < size; ++i) { for (DateTime64 value = (starts[i] * dt_multiplier) / time_slot_size, end = (starts[i] * dt_multiplier + durations[i] * dur_multiplier) / time_slot_size; value <= end; value += 1) @@ -155,6 +169,9 @@ struct TimeSlotsImpl ColumnArray::Offset current_offset = 0; duration = duration * dur_multiplier; time_slot_size = time_slot_size.value * ts_multiplier; + if (time_slot_size == 0) + throw Exception("Time slot size cannot be zero", ErrorCodes::BAD_ARGUMENTS); + for (size_t i = 0; i < size; ++i) { for (DateTime64 value = (starts[i] * dt_multiplier) / time_slot_size, end = (starts[i] * dt_multiplier + duration) / time_slot_size; value <= end; value += 1) @@ -185,6 +202,9 @@ struct TimeSlotsImpl ColumnArray::Offset current_offset = 0; start = dt_multiplier * start; time_slot_size = time_slot_size.value * ts_multiplier; + if (time_slot_size == 0) + throw Exception("Time slot size cannot be zero", ErrorCodes::BAD_ARGUMENTS); + for (size_t i = 0; i < size; ++i) { for (DateTime64 value = start / time_slot_size, end = (start + durations[i] * dur_multiplier) / time_slot_size; value <= end; value += 1) @@ -281,11 +301,11 @@ public: throw Exception("Third argument for function " + getName() + " must be greater than zero", ErrorCodes::ILLEGAL_COLUMN); } - const auto * dt_starts = checkAndGetColumn(arguments[0].column.get()); - const auto * dt_const_starts = checkAndGetColumnConst(arguments[0].column.get()); + const auto * dt_starts = checkAndGetColumn(arguments[0].column.get()); + const auto * dt_const_starts = checkAndGetColumnConst(arguments[0].column.get()); - const auto * durations = checkAndGetColumn(arguments[1].column.get()); - const auto * const_durations = checkAndGetColumnConst(arguments[1].column.get()); + const auto * durations = checkAndGetColumn(arguments[1].column.get()); + const auto * const_durations = checkAndGetColumnConst(arguments[1].column.get()); auto res = ColumnArray::create(ColumnUInt32::create()); ColumnUInt32::Container & res_values = typeid_cast(res->getData()).getData(); @@ -322,8 +342,8 @@ public: time_slot_scale = assert_cast(arguments[2].type.get())->getScale(); } - const auto * starts = checkAndGetColumn(arguments[0].column.get()); - const auto * const_starts = checkAndGetColumnConst(arguments[0].column.get()); + const auto * starts = checkAndGetColumn(arguments[0].column.get()); + const auto * const_starts = checkAndGetColumnConst(arguments[0].column.get()); const auto * durations = checkAndGetColumn>(arguments[1].column.get()); const auto * const_durations = checkAndGetColumnConst>(arguments[1].column.get()); diff --git a/src/Functions/toRelativeDayNum.cpp b/src/Functions/toRelativeDayNum.cpp index 241104493cd..db3eb119dcf 100644 --- a/src/Functions/toRelativeDayNum.cpp +++ b/src/Functions/toRelativeDayNum.cpp @@ -7,7 +7,7 @@ namespace DB { -using FunctionToRelativeDayNum = FunctionDateOrDateTimeToSomething; +using FunctionToRelativeDayNum = FunctionDateOrDateTimeToSomething>; REGISTER_FUNCTION(ToRelativeDayNum) { diff --git a/src/Functions/toRelativeHourNum.cpp b/src/Functions/toRelativeHourNum.cpp index 2404d73c450..838b1bb1ca1 100644 --- a/src/Functions/toRelativeHourNum.cpp +++ b/src/Functions/toRelativeHourNum.cpp @@ -7,7 +7,7 @@ namespace DB { -using FunctionToRelativeHourNum = FunctionDateOrDateTimeToSomething; +using FunctionToRelativeHourNum = FunctionDateOrDateTimeToSomething>; REGISTER_FUNCTION(ToRelativeHourNum) { diff --git a/src/Functions/toRelativeMinuteNum.cpp b/src/Functions/toRelativeMinuteNum.cpp index a5ecada1e92..e9318517119 100644 --- a/src/Functions/toRelativeMinuteNum.cpp +++ b/src/Functions/toRelativeMinuteNum.cpp @@ -7,7 +7,7 @@ namespace DB { -using FunctionToRelativeMinuteNum = FunctionDateOrDateTimeToSomething; +using FunctionToRelativeMinuteNum = FunctionDateOrDateTimeToSomething>; REGISTER_FUNCTION(ToRelativeMinuteNum) { diff --git a/src/Functions/toRelativeMonthNum.cpp b/src/Functions/toRelativeMonthNum.cpp index 8f46e04e483..7b058c3ba12 100644 --- a/src/Functions/toRelativeMonthNum.cpp +++ b/src/Functions/toRelativeMonthNum.cpp @@ -7,7 +7,7 @@ namespace DB { -using FunctionToRelativeMonthNum = FunctionDateOrDateTimeToSomething; +using FunctionToRelativeMonthNum = FunctionDateOrDateTimeToSomething>; REGISTER_FUNCTION(ToRelativeMonthNum) { diff --git a/src/Functions/toRelativeQuarterNum.cpp b/src/Functions/toRelativeQuarterNum.cpp index 8ea0c42ef09..c7702d47f42 100644 --- a/src/Functions/toRelativeQuarterNum.cpp +++ b/src/Functions/toRelativeQuarterNum.cpp @@ -7,7 +7,7 @@ namespace DB { -using FunctionToRelativeQuarterNum = FunctionDateOrDateTimeToSomething; +using FunctionToRelativeQuarterNum = FunctionDateOrDateTimeToSomething>; REGISTER_FUNCTION(ToRelativeQuarterNum) { diff --git a/src/Functions/toRelativeSecondNum.cpp b/src/Functions/toRelativeSecondNum.cpp index 7af41ab8334..db80f721fbd 100644 --- a/src/Functions/toRelativeSecondNum.cpp +++ b/src/Functions/toRelativeSecondNum.cpp @@ -7,7 +7,7 @@ namespace DB { -using FunctionToRelativeSecondNum = FunctionDateOrDateTimeToSomething; +using FunctionToRelativeSecondNum = FunctionDateOrDateTimeToSomething>; REGISTER_FUNCTION(ToRelativeSecondNum) { diff --git a/src/Functions/toRelativeWeekNum.cpp b/src/Functions/toRelativeWeekNum.cpp index fe7aec3fd9a..beca00d8cc4 100644 --- a/src/Functions/toRelativeWeekNum.cpp +++ b/src/Functions/toRelativeWeekNum.cpp @@ -7,7 +7,7 @@ namespace DB { -using FunctionToRelativeWeekNum = FunctionDateOrDateTimeToSomething; +using FunctionToRelativeWeekNum = FunctionDateOrDateTimeToSomething>; REGISTER_FUNCTION(ToRelativeWeekNum) { diff --git a/src/Functions/toRelativeYearNum.cpp b/src/Functions/toRelativeYearNum.cpp index 4574d8513e0..b4fe3318129 100644 --- a/src/Functions/toRelativeYearNum.cpp +++ b/src/Functions/toRelativeYearNum.cpp @@ -7,7 +7,7 @@ namespace DB { -using FunctionToRelativeYearNum = FunctionDateOrDateTimeToSomething; +using FunctionToRelativeYearNum = FunctionDateOrDateTimeToSomething>; REGISTER_FUNCTION(ToRelativeYearNum) { diff --git a/src/Functions/toStartOfInterval.cpp b/src/Functions/toStartOfInterval.cpp index ac648b87448..3054cf280d9 100644 --- a/src/Functions/toStartOfInterval.cpp +++ b/src/Functions/toStartOfInterval.cpp @@ -1,5 +1,6 @@ #include #include +#include #include #include #include @@ -133,17 +134,17 @@ namespace { static UInt32 execute(UInt16 d, Int64 days, const DateLUTImpl & time_zone, Int64) { - return time_zone.toStartOfDayInterval(ExtendedDayNum(d), days); + return static_cast(time_zone.toStartOfDayInterval(ExtendedDayNum(d), days)); } static UInt32 execute(Int32 d, Int64 days, const DateLUTImpl & time_zone, Int64) { - return time_zone.toStartOfDayInterval(ExtendedDayNum(d), days); + return static_cast(time_zone.toStartOfDayInterval(ExtendedDayNum(d), days)); } static UInt32 execute(UInt32 t, Int64 days, const DateLUTImpl & time_zone, Int64) { - return time_zone.toStartOfDayInterval(time_zone.toDayNum(t), days); + return static_cast(time_zone.toStartOfDayInterval(time_zone.toDayNum(t), days)); } static Int64 execute(Int64 t, Int64 days, const DateLUTImpl & time_zone, Int64 scale_multiplier) @@ -437,7 +438,7 @@ private: if (which_type.isDateTime64()) { - const auto * time_column_vec = checkAndGetColumn(time_column.column.get()); + const auto * time_column_vec = checkAndGetColumn(time_column.column.get()); auto scale = assert_cast(from_datatype).getScale(); if (time_column_vec) @@ -445,19 +446,19 @@ private: } if (which_type.isDateTime()) { - const auto * time_column_vec = checkAndGetColumn(time_column.column.get()); + const auto * time_column_vec = checkAndGetColumn(time_column.column.get()); if (time_column_vec) return dispatchForIntervalColumn(assert_cast(from_datatype), *time_column_vec, interval_column, result_type, time_zone); } if (which_type.isDate()) { - const auto * time_column_vec = checkAndGetColumn(time_column.column.get()); + const auto * time_column_vec = checkAndGetColumn(time_column.column.get()); if (time_column_vec) return dispatchForIntervalColumn(assert_cast(from_datatype), *time_column_vec, interval_column, result_type, time_zone); } if (which_type.isDate32()) { - const auto * time_column_vec = checkAndGetColumn(time_column.column.get()); + const auto * time_column_vec = checkAndGetColumn(time_column.column.get()); if (time_column_vec) return dispatchForIntervalColumn(assert_cast(from_datatype), *time_column_vec, interval_column, result_type, time_zone); } @@ -517,6 +518,7 @@ private: ColumnPtr execute(const FromDataType &, const ColumnType & time_column_type, Int64 num_units, const DataTypePtr & result_type, const DateLUTImpl & time_zone, const UInt16 scale) const { using ToColumnType = typename ToDataType::ColumnType; + using ToFieldType = typename ToDataType::FieldType; const auto & time_data = time_column_type.getData(); size_t size = time_data.size(); @@ -529,7 +531,8 @@ private: Int64 scale_multiplier = DecimalUtils::scaleMultiplier(scale); for (size_t i = 0; i != size; ++i) - result_data[i] = Transform::execute(time_data[i], num_units, time_zone, scale_multiplier); + result_data[i] = static_cast( + Transform::execute(time_data[i], num_units, time_zone, scale_multiplier)); return result_col; } diff --git a/src/Functions/toValidUTF8.cpp b/src/Functions/toValidUTF8.cpp index 9874e39baa4..4b79bc0bbda 100644 --- a/src/Functions/toValidUTF8.cpp +++ b/src/Functions/toValidUTF8.cpp @@ -106,7 +106,7 @@ struct ToValidUTF8Impl /// Sequence was not fully written to this buffer. break; } - else if (Poco::UTF8Encoding::isLegal(reinterpret_cast(p), len)) + else if (Poco::UTF8Encoding::isLegal(reinterpret_cast(p), static_cast(len))) { /// Valid sequence. p += len; diff --git a/src/Functions/tryBase64Decode.cpp b/src/Functions/tryBase64Decode.cpp index 1102c7a3418..bd452b8357b 100644 --- a/src/Functions/tryBase64Decode.cpp +++ b/src/Functions/tryBase64Decode.cpp @@ -1,7 +1,7 @@ #include + #if USE_BASE64 #include -#include namespace DB { @@ -10,4 +10,5 @@ REGISTER_FUNCTION(TryBase64Decode) factory.registerFunction>(); } } + #endif diff --git a/src/Functions/tupleElement.cpp b/src/Functions/tupleElement.cpp index 4f7ddda6b0b..6ac36dc80ed 100644 --- a/src/Functions/tupleElement.cpp +++ b/src/Functions/tupleElement.cpp @@ -82,7 +82,10 @@ public: const DataTypeTuple * tuple = checkAndGetDataType(tuple_col); if (!tuple) - throw Exception("First argument for function " + getName() + " must be tuple or array of tuple.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "First argument for function {} must be tuple or array of tuple. Actual {}", + getName(), + arguments[0].type->getName()); auto index = getElementNum(arguments[1].column, *tuple, number_of_arguments); if (index.has_value()) @@ -137,7 +140,10 @@ public: const DataTypeTuple * tuple_type_concrete = checkAndGetDataType(tuple_type); const ColumnTuple * tuple_col_concrete = checkAndGetColumn(tuple_col); if (!tuple_type_concrete || !tuple_col_concrete) - throw Exception("First argument for function " + getName() + " must be tuple or array of tuple.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "First argument for function {} must be tuple or array of tuple. Actual {}", + getName(), + first_arg.type->getName()); auto index = getElementNum(arguments[1].column, *tuple_type_concrete, arguments.size()); @@ -221,20 +227,18 @@ private: std::optional getElementNum(const ColumnPtr & index_column, const DataTypeTuple & tuple, const size_t argument_size) const { - if ( - checkAndGetColumnConst(index_column.get()) - || checkAndGetColumnConst(index_column.get()) - || checkAndGetColumnConst(index_column.get()) - || checkAndGetColumnConst(index_column.get()) - ) + if (checkAndGetColumnConst(index_column.get()) + || checkAndGetColumnConst(index_column.get()) + || checkAndGetColumnConst(index_column.get()) + || checkAndGetColumnConst(index_column.get())) { size_t index = index_column->getUInt(0); if (index == 0) - throw Exception("Indices in tuples are 1-based.", ErrorCodes::ILLEGAL_INDEX); + throw Exception(ErrorCodes::ILLEGAL_INDEX, "Indices in tuples are 1-based."); if (index > tuple.getElements().size()) - throw Exception("Index for tuple element is out of range.", ErrorCodes::ILLEGAL_INDEX); + throw Exception(ErrorCodes::ILLEGAL_INDEX, "Index for tuple element is out of range."); return std::optional(index - 1); } @@ -253,7 +257,9 @@ private: return std::nullopt; } else - throw Exception("Second argument to " + getName() + " must be a constant UInt or String", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Second argument to {} must be a constant UInt or String", + getName()); } }; diff --git a/src/IO/AIO.cpp b/src/IO/AIO.cpp index fb762271e4d..494ed3bae38 100644 --- a/src/IO/AIO.cpp +++ b/src/IO/AIO.cpp @@ -23,22 +23,22 @@ namespace DB int io_setup(unsigned nr, aio_context_t * ctxp) { - return syscall(__NR_io_setup, nr, ctxp); + return static_cast(syscall(__NR_io_setup, nr, ctxp)); } int io_destroy(aio_context_t ctx) { - return syscall(__NR_io_destroy, ctx); + return static_cast(syscall(__NR_io_destroy, ctx)); } int io_submit(aio_context_t ctx, long nr, struct iocb * iocbpp[]) // NOLINT { - return syscall(__NR_io_submit, ctx, nr, iocbpp); + return static_cast(syscall(__NR_io_submit, ctx, nr, iocbpp)); } int io_getevents(aio_context_t ctx, long min_nr, long max_nr, io_event * events, struct timespec * timeout) // NOLINT { - return syscall(__NR_io_getevents, ctx, min_nr, max_nr, events, timeout); + return static_cast(syscall(__NR_io_getevents, ctx, min_nr, max_nr, events, timeout)); } diff --git a/src/IO/Archives/ZipArchiveReader.cpp b/src/IO/Archives/ZipArchiveReader.cpp index 3127f299f5c..a7c72c7b575 100644 --- a/src/IO/Archives/ZipArchiveReader.cpp +++ b/src/IO/Archives/ZipArchiveReader.cpp @@ -281,7 +281,7 @@ private: bool nextImpl() override { RawHandle raw_handle = handle.getRawHandle(); - auto bytes_read = unzReadCurrentFile(raw_handle, internal_buffer.begin(), internal_buffer.size()); + auto bytes_read = unzReadCurrentFile(raw_handle, internal_buffer.begin(), static_cast(internal_buffer.size())); if (bytes_read < 0) checkResult(bytes_read); diff --git a/src/IO/Archives/ZipArchiveWriter.cpp b/src/IO/Archives/ZipArchiveWriter.cpp index 817e8132b64..d413783356d 100644 --- a/src/IO/Archives/ZipArchiveWriter.cpp +++ b/src/IO/Archives/ZipArchiveWriter.cpp @@ -134,7 +134,8 @@ private: if (!offset()) return; RawHandle raw_handle = handle.getRawHandle(); - checkResult(zipWriteInFileInZip(raw_handle, working_buffer.begin(), offset())); + int code = zipWriteInFileInZip(raw_handle, working_buffer.begin(), static_cast(offset())); + checkResult(code); } void checkResult(int code) const { handle.checkResult(code); } diff --git a/src/IO/BufferWithOwnMemory.h b/src/IO/BufferWithOwnMemory.h index 2121747500b..2e451e0032e 100644 --- a/src/IO/BufferWithOwnMemory.h +++ b/src/IO/BufferWithOwnMemory.h @@ -34,8 +34,7 @@ namespace ErrorCodes template > struct Memory : boost::noncopyable, Allocator { - /// Padding is needed to allow usage of 'memcpySmallAllowReadWriteOverflow15' function with this buffer. - static constexpr size_t pad_right = 15; + static constexpr size_t pad_right = PADDING_FOR_SIMD - 1; size_t m_capacity = 0; /// With padding. size_t m_size = 0; diff --git a/src/IO/Bzip2ReadBuffer.cpp b/src/IO/Bzip2ReadBuffer.cpp index 9d183393159..9970edcbcf3 100644 --- a/src/IO/Bzip2ReadBuffer.cpp +++ b/src/IO/Bzip2ReadBuffer.cpp @@ -85,11 +85,11 @@ bool Bzip2ReadBuffer::nextImpl() if (!bz->stream.avail_in) { in->nextIfAtEnd(); - bz->stream.avail_in = in->buffer().end() - in->position(); + bz->stream.avail_in = static_cast(in->buffer().end() - in->position()); bz->stream.next_in = in->position(); } - bz->stream.avail_out = internal_buffer.size(); + bz->stream.avail_out = static_cast(internal_buffer.size()); bz->stream.next_out = internal_buffer.begin(); ret = BZ2_bzDecompress(&bz->stream); @@ -99,7 +99,7 @@ bool Bzip2ReadBuffer::nextImpl() if (ret == BZ_STREAM_END && !in->eof()) { bz->reinitialize(); - bz->stream.avail_in = in->buffer().end() - in->position(); + bz->stream.avail_in = static_cast(in->buffer().end() - in->position()); bz->stream.next_in = in->position(); ret = BZ_OK; diff --git a/src/IO/Bzip2WriteBuffer.cpp b/src/IO/Bzip2WriteBuffer.cpp index 10a1803fec8..4b6bed70d35 100644 --- a/src/IO/Bzip2WriteBuffer.cpp +++ b/src/IO/Bzip2WriteBuffer.cpp @@ -58,7 +58,7 @@ void Bzip2WriteBuffer::nextImpl() } bz->stream.next_in = working_buffer.begin(); - bz->stream.avail_in = offset(); + bz->stream.avail_in = static_cast(offset()); try { @@ -66,7 +66,7 @@ void Bzip2WriteBuffer::nextImpl() { out->nextIfAtEnd(); bz->stream.next_out = out->position(); - bz->stream.avail_out = out->buffer().end() - out->position(); + bz->stream.avail_out = static_cast(out->buffer().end() - out->position()); int ret = BZ2_bzCompress(&bz->stream, BZ_RUN); @@ -95,7 +95,7 @@ void Bzip2WriteBuffer::finalizeBefore() out->nextIfAtEnd(); bz->stream.next_out = out->position(); - bz->stream.avail_out = out->buffer().end() - out->position(); + bz->stream.avail_out = static_cast(out->buffer().end() - out->position()); int ret = BZ2_bzCompress(&bz->stream, BZ_FINISH); diff --git a/src/IO/FileEncryptionCommon.cpp b/src/IO/FileEncryptionCommon.cpp index 13d8acb8c7b..5592da8721c 100644 --- a/src/IO/FileEncryptionCommon.cpp +++ b/src/IO/FileEncryptionCommon.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include #include @@ -84,10 +85,13 @@ namespace while (in_size < size) { out.nextIfAtEnd(); + size_t part_size = std::min(size - in_size, out.available()); + part_size = std::min(part_size, INT_MAX); + uint8_t * ciphertext = reinterpret_cast(out.position()); int ciphertext_size = 0; - if (!EVP_EncryptUpdate(evp_ctx, ciphertext, &ciphertext_size, &in[in_size], part_size)) + if (!EVP_EncryptUpdate(evp_ctx, ciphertext, &ciphertext_size, &in[in_size], static_cast(part_size))) throw Exception("Failed to encrypt", ErrorCodes::DATA_ENCRYPTION_ERROR); in_size += part_size; @@ -110,7 +114,7 @@ namespace uint8_t ciphertext[kBlockSize]; int ciphertext_size = 0; - if (!EVP_EncryptUpdate(evp_ctx, ciphertext, &ciphertext_size, padded_data, padded_data_size)) + if (!EVP_EncryptUpdate(evp_ctx, ciphertext, &ciphertext_size, padded_data, safe_cast(padded_data_size))) throw Exception("Failed to encrypt", ErrorCodes::DATA_ENCRYPTION_ERROR); if (!ciphertext_size) @@ -142,7 +146,7 @@ namespace const uint8_t * in = reinterpret_cast(data); uint8_t * plaintext = reinterpret_cast(out); int plaintext_size = 0; - if (!EVP_DecryptUpdate(evp_ctx, plaintext, &plaintext_size, in, size)) + if (!EVP_DecryptUpdate(evp_ctx, plaintext, &plaintext_size, in, safe_cast(size))) throw Exception("Failed to decrypt", ErrorCodes::DATA_ENCRYPTION_ERROR); return plaintext_size; } @@ -153,10 +157,9 @@ namespace uint8_t padded_data[kBlockSize] = {}; memcpy(&padded_data[pad_left], data, size); size_t padded_data_size = pad_left + size; - uint8_t plaintext[kBlockSize]; int plaintext_size = 0; - if (!EVP_DecryptUpdate(evp_ctx, plaintext, &plaintext_size, padded_data, padded_data_size)) + if (!EVP_DecryptUpdate(evp_ctx, plaintext, &plaintext_size, padded_data, safe_cast(padded_data_size))) throw Exception("Failed to decrypt", ErrorCodes::DATA_ENCRYPTION_ERROR); if (!plaintext_size) diff --git a/src/IO/HTTPCommon.cpp b/src/IO/HTTPCommon.cpp index 9fd48914f64..f33b2399492 100644 --- a/src/IO/HTTPCommon.cpp +++ b/src/IO/HTTPCommon.cpp @@ -142,7 +142,7 @@ namespace bool proxy_https_, size_t max_pool_size_, bool resolve_host_ = true) - : Base(max_pool_size_, &Poco::Logger::get("HTTPSessionPool")) + : Base(static_cast(max_pool_size_), &Poco::Logger::get("HTTPSessionPool")) , host(host_) , port(port_) , https(https_) @@ -271,7 +271,7 @@ namespace }; } -void setResponseDefaultHeaders(HTTPServerResponse & response, unsigned keep_alive_timeout) +void setResponseDefaultHeaders(HTTPServerResponse & response, size_t keep_alive_timeout) { if (!response.getKeepAlive()) return; diff --git a/src/IO/HTTPCommon.h b/src/IO/HTTPCommon.h index 18e83abb83b..51da17d4ca7 100644 --- a/src/IO/HTTPCommon.h +++ b/src/IO/HTTPCommon.h @@ -38,7 +38,7 @@ public: using PooledHTTPSessionPtr = SingleEndpointHTTPSessionPool::Entry; using HTTPSessionPtr = std::shared_ptr; -void setResponseDefaultHeaders(HTTPServerResponse & response, unsigned keep_alive_timeout); +void setResponseDefaultHeaders(HTTPServerResponse & response, size_t keep_alive_timeout); /// Create session object to perform requests and set required parameters. HTTPSessionPtr makeHTTPSession(const Poco::URI & uri, const ConnectionTimeouts & timeouts, bool resolve_host = true); diff --git a/src/IO/MMapReadBufferFromFileDescriptor.cpp b/src/IO/MMapReadBufferFromFileDescriptor.cpp index 5a636971fa0..c0eb73f8638 100644 --- a/src/IO/MMapReadBufferFromFileDescriptor.cpp +++ b/src/IO/MMapReadBufferFromFileDescriptor.cpp @@ -28,7 +28,7 @@ void MMapReadBufferFromFileDescriptor::init() BufferBase::set(mapped.getData(), length, 0); size_t page_size = static_cast(::getPageSize()); - ReadBuffer::padded = (length % page_size) > 0 && (length % page_size) <= (page_size - 15); + ReadBuffer::padded = (length % page_size) > 0 && (length % page_size) <= (page_size - (PADDING_FOR_SIMD - 1)); } diff --git a/src/IO/MMapReadBufferFromFileWithCache.cpp b/src/IO/MMapReadBufferFromFileWithCache.cpp index 0d31c29bdaa..503a58b65b9 100644 --- a/src/IO/MMapReadBufferFromFileWithCache.cpp +++ b/src/IO/MMapReadBufferFromFileWithCache.cpp @@ -17,7 +17,7 @@ void MMapReadBufferFromFileWithCache::init() BufferBase::set(mapped->getData(), length, 0); size_t page_size = static_cast(::getPageSize()); - ReadBuffer::padded = (length % page_size) > 0 && (length % page_size) <= (page_size - 15); + ReadBuffer::padded = (length % page_size) > 0 && (length % page_size) <= (page_size - (PADDING_FOR_SIMD - 1)); } diff --git a/src/IO/PeekableReadBuffer.h b/src/IO/PeekableReadBuffer.h index 15283793755..45763863437 100644 --- a/src/IO/PeekableReadBuffer.h +++ b/src/IO/PeekableReadBuffer.h @@ -99,7 +99,7 @@ private: /// creation (for example if PeekableReadBuffer is often created or if we need to remember small amount of /// data after checkpoint), at the beginning we will use small amount of memory on stack and allocate /// larger buffer only if reserved memory is not enough. - char stack_memory[16]; + char stack_memory[PADDING_FOR_SIMD]; bool use_stack_memory = true; }; diff --git a/src/IO/ReadBufferFromMemory.h b/src/IO/ReadBufferFromMemory.h index dc5c464604b..ad96e4bfa28 100644 --- a/src/IO/ReadBufferFromMemory.h +++ b/src/IO/ReadBufferFromMemory.h @@ -16,6 +16,8 @@ public: requires (sizeof(CharT) == 1) ReadBufferFromMemory(const CharT * buf, size_t size) : SeekableReadBuffer(const_cast(reinterpret_cast(buf)), size, 0) {} + explicit ReadBufferFromMemory(const std::string_view&& str) + : SeekableReadBuffer(const_cast(str.data()), str.size(), 0) {} off_t seek(off_t off, int whence) override; diff --git a/src/IO/ReadBufferFromPocoSocket.cpp b/src/IO/ReadBufferFromPocoSocket.cpp index 988ad75cdf4..7ba23dd1588 100644 --- a/src/IO/ReadBufferFromPocoSocket.cpp +++ b/src/IO/ReadBufferFromPocoSocket.cpp @@ -29,6 +29,7 @@ namespace ErrorCodes extern const int NETWORK_ERROR; extern const int SOCKET_TIMEOUT; extern const int CANNOT_READ_FROM_SOCKET; + extern const int LOGICAL_ERROR; } @@ -54,7 +55,10 @@ bool ReadBufferFromPocoSocket::nextImpl() while (async_callback && !socket.poll(0, Poco::Net::Socket::SELECT_READ)) async_callback(socket.impl()->sockfd(), socket.getReceiveTimeout(), socket_description); - bytes_read = socket.impl()->receiveBytes(internal_buffer.begin(), internal_buffer.size()); + if (internal_buffer.size() > INT_MAX) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Buffer overflow"); + + bytes_read = socket.impl()->receiveBytes(internal_buffer.begin(), static_cast(internal_buffer.size())); } catch (const Poco::Net::NetException & e) { diff --git a/src/IO/ReadHelpers.h b/src/IO/ReadHelpers.h index d5b0ce4bebe..27a24eef804 100644 --- a/src/IO/ReadHelpers.h +++ b/src/IO/ReadHelpers.h @@ -1095,6 +1095,7 @@ inline void readText(is_floating_point auto & x, ReadBuffer & buf) { readFloatTe inline void readText(String & x, ReadBuffer & buf) { readEscapedString(x, buf); } inline void readText(LocalDate & x, ReadBuffer & buf) { readDateText(x, buf); } +inline void readText(DayNum & x, ReadBuffer & buf) { readDateText(x, buf); } inline void readText(LocalDateTime & x, ReadBuffer & buf) { readDateTimeText(x, buf); } inline void readText(UUID & x, ReadBuffer & buf) { readUUIDText(x, buf); } @@ -1176,6 +1177,7 @@ inline void readCSV(T & x, ReadBuffer & buf) inline void readCSV(String & x, ReadBuffer & buf, const FormatSettings::CSV & settings) { readCSVString(x, buf, settings); } inline void readCSV(LocalDate & x, ReadBuffer & buf) { readCSVSimple(x, buf); } +inline void readCSV(DayNum & x, ReadBuffer & buf) { readCSVSimple(x, buf); } inline void readCSV(LocalDateTime & x, ReadBuffer & buf) { readCSVSimple(x, buf); } inline void readCSV(UUID & x, ReadBuffer & buf) { readCSVSimple(x, buf); } inline void readCSV(UInt128 & x, ReadBuffer & buf) { readCSVSimple(x, buf); } diff --git a/src/IO/ReadWriteBufferFromHTTP.h b/src/IO/ReadWriteBufferFromHTTP.h index de2b5654ae5..b60fdee1184 100644 --- a/src/IO/ReadWriteBufferFromHTTP.h +++ b/src/IO/ReadWriteBufferFromHTTP.h @@ -528,16 +528,17 @@ namespace detail auto on_retriable_error = [&]() { - retry_with_range_header = true; - impl.reset(); - auto http_session = session->getSession(); - http_session->reset(); - sleepForMilliseconds(milliseconds_to_wait); + retry_with_range_header = true; + impl.reset(); + auto http_session = session->getSession(); + http_session->reset(); + sleepForMilliseconds(milliseconds_to_wait); }; for (size_t i = 0; i < settings.http_max_tries; ++i) { exception = nullptr; + initialization_error = InitializeError::NONE; try { diff --git a/src/IO/S3/PocoHTTPClient.h b/src/IO/S3/PocoHTTPClient.h index 57e4369e565..5649638285d 100644 --- a/src/IO/S3/PocoHTTPClient.h +++ b/src/IO/S3/PocoHTTPClient.h @@ -2,20 +2,22 @@ #include "config.h" +#include +#include + #if USE_AWS_S3 #include #include #include #include -#include +#include #include #include #include #include - namespace Aws::Http::Standard { class StandardHttpResponse; @@ -23,6 +25,7 @@ class StandardHttpResponse; namespace DB { + class Context; } diff --git a/src/IO/S3Common.cpp b/src/IO/S3Common.cpp index df19748b493..859f5ce796b 100644 --- a/src/IO/S3Common.cpp +++ b/src/IO/S3Common.cpp @@ -1,9 +1,11 @@ +#include + +#include +#include #include "config.h" #if USE_AWS_S3 -# include - # include # include @@ -780,25 +782,16 @@ namespace S3 boost::to_upper(name); if (name != S3 && name != COS && name != OBS && name != OSS) - { throw Exception(ErrorCodes::BAD_ARGUMENTS, "Object storage system name is unrecognized in virtual hosted style S3 URI: {}", quoteString(name)); - } + if (name == S3) - { storage_name = name; - } else if (name == OBS) - { storage_name = OBS; - } else if (name == OSS) - { storage_name = OSS; - } else - { storage_name = COSN; - } } else if (re2::RE2::PartialMatch(uri.getPath(), path_style_pattern, &bucket, &key)) { @@ -851,8 +844,82 @@ namespace S3 { return getObjectInfo(client_ptr, bucket, key, version_id, throw_on_error, for_disk_s3).size; } + } } #endif + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int INVALID_CONFIG_PARAMETER; +} + +namespace S3 +{ + +AuthSettings AuthSettings::loadFromConfig(const std::string & config_elem, const Poco::Util::AbstractConfiguration & config) +{ + auto access_key_id = config.getString(config_elem + ".access_key_id", ""); + auto secret_access_key = config.getString(config_elem + ".secret_access_key", ""); + auto region = config.getString(config_elem + ".region", ""); + auto server_side_encryption_customer_key_base64 = config.getString(config_elem + ".server_side_encryption_customer_key_base64", ""); + + std::optional use_environment_credentials; + if (config.has(config_elem + ".use_environment_credentials")) + use_environment_credentials = config.getBool(config_elem + ".use_environment_credentials"); + + std::optional use_insecure_imds_request; + if (config.has(config_elem + ".use_insecure_imds_request")) + use_insecure_imds_request = config.getBool(config_elem + ".use_insecure_imds_request"); + + HeaderCollection headers; + Poco::Util::AbstractConfiguration::Keys subconfig_keys; + config.keys(config_elem, subconfig_keys); + for (const std::string & subkey : subconfig_keys) + { + if (subkey.starts_with("header")) + { + auto header_str = config.getString(config_elem + "." + subkey); + auto delimiter = header_str.find(':'); + if (delimiter == std::string::npos) + throw Exception("Malformed s3 header value", ErrorCodes::INVALID_CONFIG_PARAMETER); + headers.emplace_back(HttpHeader{header_str.substr(0, delimiter), header_str.substr(delimiter + 1, String::npos)}); + } + } + + return AuthSettings + { + std::move(access_key_id), std::move(secret_access_key), + std::move(region), + std::move(server_side_encryption_customer_key_base64), + std::move(headers), + use_environment_credentials, + use_insecure_imds_request + }; +} + + +void AuthSettings::updateFrom(const AuthSettings & from) +{ + /// Update with check for emptyness only parameters which + /// can be passed not only from config, but via ast. + + if (!from.access_key_id.empty()) + access_key_id = from.access_key_id; + if (!from.secret_access_key.empty()) + secret_access_key = from.secret_access_key; + + headers = from.headers; + region = from.region; + server_side_encryption_customer_key_base64 = from.server_side_encryption_customer_key_base64; + use_environment_credentials = from.use_environment_credentials; + use_insecure_imds_request = from.use_insecure_imds_request; +} + +} +} diff --git a/src/IO/S3Common.h b/src/IO/S3Common.h index 5c27b32985f..93e5eb78c7f 100644 --- a/src/IO/S3Common.h +++ b/src/IO/S3Common.h @@ -1,5 +1,11 @@ #pragma once +#include +#include + +#include +#include + #include "config.h" #if USE_AWS_S3 @@ -8,7 +14,6 @@ #include #include #include -#include #include #include @@ -27,8 +32,6 @@ namespace ErrorCodes } class RemoteHostFilter; -struct HttpHeader; -using HeaderCollection = std::vector; class S3Exception : public Exception { @@ -130,5 +133,33 @@ S3::ObjectInfo getObjectInfo(std::shared_ptr client_ptr size_t getObjectSize(std::shared_ptr client_ptr, const String & bucket, const String & key, const String & version_id, bool throw_on_error, bool for_disk_s3); } - #endif + +namespace Poco::Util +{ +class AbstractConfiguration; +}; + +namespace DB::S3 +{ + +struct AuthSettings +{ + static AuthSettings loadFromConfig(const std::string & config_elem, const Poco::Util::AbstractConfiguration & config); + + std::string access_key_id; + std::string secret_access_key; + std::string region; + std::string server_side_encryption_customer_key_base64; + + HeaderCollection headers; + + std::optional use_environment_credentials; + std::optional use_insecure_imds_request; + + bool operator==(const AuthSettings & other) const = default; + + void updateFrom(const AuthSettings & from); +}; + +} diff --git a/src/IO/VarInt.h b/src/IO/VarInt.h index 3161ca6d8a8..816aa8fd057 100644 --- a/src/IO/VarInt.h +++ b/src/IO/VarInt.h @@ -83,14 +83,14 @@ inline void readVarUInt(UInt32 & x, ReadBuffer & istr) { UInt64 tmp; readVarUInt(tmp, istr); - x = tmp; + x = static_cast(tmp); } inline void readVarInt(Int32 & x, ReadBuffer & istr) { Int64 tmp; readVarInt(tmp, istr); - x = tmp; + x = static_cast(tmp); } inline void readVarUInt(UInt16 & x, ReadBuffer & istr) diff --git a/src/IO/WriteBufferFromPocoSocket.cpp b/src/IO/WriteBufferFromPocoSocket.cpp index fb4e5df9b59..95d532e9bd4 100644 --- a/src/IO/WriteBufferFromPocoSocket.cpp +++ b/src/IO/WriteBufferFromPocoSocket.cpp @@ -31,6 +31,7 @@ namespace ErrorCodes extern const int NETWORK_ERROR; extern const int SOCKET_TIMEOUT; extern const int CANNOT_WRITE_TO_SOCKET; + extern const int LOGICAL_ERROR; } @@ -55,7 +56,11 @@ void WriteBufferFromPocoSocket::nextImpl() try { CurrentMetrics::Increment metric_increment(CurrentMetrics::NetworkSend); - res = socket.impl()->sendBytes(working_buffer.begin() + bytes_written, offset() - bytes_written); + char * pos = working_buffer.begin() + bytes_written; + size_t size = offset() - bytes_written; + if (size > INT_MAX) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Buffer overflow"); + res = socket.impl()->sendBytes(pos, static_cast(size)); } catch (const Poco::Net::NetException & e) { diff --git a/src/IO/WriteBufferFromS3.cpp b/src/IO/WriteBufferFromS3.cpp index 14118c3c04e..9ed2c41fd01 100644 --- a/src/IO/WriteBufferFromS3.cpp +++ b/src/IO/WriteBufferFromS3.cpp @@ -123,7 +123,10 @@ void WriteBufferFromS3::nextImpl() void WriteBufferFromS3::allocateBuffer() { if (total_parts_uploaded != 0 && total_parts_uploaded % s3_settings.upload_part_size_multiply_parts_count_threshold == 0) + { upload_part_size *= s3_settings.upload_part_size_multiply_factor; + upload_part_size = std::min(upload_part_size, s3_settings.max_upload_part_size); + } temporary_buffer = Aws::MakeShared("temporary buffer"); temporary_buffer->exceptions(std::ios::badbit); @@ -305,7 +308,7 @@ void WriteBufferFromS3::writePart() UploadPartTask task; auto & tags = TSA_SUPPRESS_WARNING_FOR_WRITE(part_tags); /// Suppress warning because schedule == false. - fillUploadRequest(task.req, tags.size() + 1); + fillUploadRequest(task.req, static_cast(tags.size() + 1)); processUploadRequest(task); tags.push_back(task.tag); } @@ -362,7 +365,7 @@ void WriteBufferFromS3::completeMultipartUpload() for (size_t i = 0; i < tags.size(); ++i) { Aws::S3::Model::CompletedPart part; - multipart_upload.AddParts(part.WithETag(tags[i]).WithPartNumber(i + 1)); + multipart_upload.AddParts(part.WithETag(tags[i]).WithPartNumber(static_cast(i + 1))); } req.SetMultipartUpload(multipart_upload); diff --git a/src/IO/WriteBufferFromS3.h b/src/IO/WriteBufferFromS3.h index 782e580d8be..28f831856d7 100644 --- a/src/IO/WriteBufferFromS3.h +++ b/src/IO/WriteBufferFromS3.h @@ -110,8 +110,8 @@ private: std::unique_ptr put_object_task; /// Does not need protection by mutex because of the logic around is_finished field. std::list TSA_GUARDED_BY(bg_tasks_mutex) upload_object_tasks; - size_t num_added_bg_tasks TSA_GUARDED_BY(bg_tasks_mutex) = 0; - size_t num_finished_bg_tasks TSA_GUARDED_BY(bg_tasks_mutex) = 0; + int num_added_bg_tasks TSA_GUARDED_BY(bg_tasks_mutex) = 0; + int num_finished_bg_tasks TSA_GUARDED_BY(bg_tasks_mutex) = 0; std::mutex bg_tasks_mutex; std::condition_variable bg_tasks_condvar; diff --git a/src/IO/WriteBufferValidUTF8.cpp b/src/IO/WriteBufferValidUTF8.cpp index 10e86f01343..4c8e172f43c 100644 --- a/src/IO/WriteBufferValidUTF8.cpp +++ b/src/IO/WriteBufferValidUTF8.cpp @@ -102,7 +102,7 @@ void WriteBufferValidUTF8::nextImpl() break; #endif - size_t len = length_of_utf8_sequence[static_cast(*p)]; + UInt8 len = length_of_utf8_sequence[static_cast(*p)]; if (len > 4) { // NOLINT diff --git a/src/IO/WriteHelpers.cpp b/src/IO/WriteHelpers.cpp index cb341e60a8b..a9788505995 100644 --- a/src/IO/WriteHelpers.cpp +++ b/src/IO/WriteHelpers.cpp @@ -18,19 +18,6 @@ void formatHex(IteratorSrc src, IteratorDst dst, size_t num_bytes) } } -void formatUUID(const UInt8 * src16, UInt8 * dst36) -{ - formatHex(&src16[0], &dst36[0], 4); - dst36[8] = '-'; - formatHex(&src16[4], &dst36[9], 2); - dst36[13] = '-'; - formatHex(&src16[6], &dst36[14], 2); - dst36[18] = '-'; - formatHex(&src16[8], &dst36[19], 2); - dst36[23] = '-'; - formatHex(&src16[10], &dst36[24], 6); -} - /** Function used when byte ordering is important when parsing uuid * ex: When we create an UUID type */ diff --git a/src/IO/WriteHelpers.h b/src/IO/WriteHelpers.h index c3e1e59218f..39024b33eb1 100644 --- a/src/IO/WriteHelpers.h +++ b/src/IO/WriteHelpers.h @@ -139,7 +139,7 @@ inline void writeBoolText(bool x, WriteBuffer & buf) template inline size_t writeFloatTextFastPath(T x, char * buffer) { - int result = 0; + Int64 result = 0; if constexpr (std::is_same_v) { @@ -624,9 +624,6 @@ inline void writeXMLStringForTextElement(std::string_view s, WriteBuffer & buf) writeXMLStringForTextElement(s.data(), s.data() + s.size(), buf); } -template -void formatHex(IteratorSrc src, IteratorDst dst, size_t num_bytes); -void formatUUID(const UInt8 * src16, UInt8 * dst36); void formatUUID(std::reverse_iterator src16, UInt8 * dst36); inline void writeUUIDText(const UUID & uuid, WriteBuffer & buf) diff --git a/src/IO/WriteSettings.h b/src/IO/WriteSettings.h index 38a706997cf..a1f5b23fb97 100644 --- a/src/IO/WriteSettings.h +++ b/src/IO/WriteSettings.h @@ -15,6 +15,7 @@ struct WriteSettings bool enable_filesystem_cache_on_write_operations = false; bool enable_filesystem_cache_log = false; bool is_file_cache_persistent = false; + bool s3_allow_parallel_part_upload = true; /// Monitoring bool for_object_storage = false; // to choose which profile events should be incremented diff --git a/src/IO/ZlibDeflatingWriteBuffer.cpp b/src/IO/ZlibDeflatingWriteBuffer.cpp index c265791e38a..43014096e2a 100644 --- a/src/IO/ZlibDeflatingWriteBuffer.cpp +++ b/src/IO/ZlibDeflatingWriteBuffer.cpp @@ -49,7 +49,7 @@ void ZlibDeflatingWriteBuffer::nextImpl() return; zstr.next_in = reinterpret_cast(working_buffer.begin()); - zstr.avail_in = offset(); + zstr.avail_in = static_cast(offset()); try { @@ -57,7 +57,7 @@ void ZlibDeflatingWriteBuffer::nextImpl() { out->nextIfAtEnd(); zstr.next_out = reinterpret_cast(out->position()); - zstr.avail_out = out->buffer().end() - out->position(); + zstr.avail_out = static_cast(out->buffer().end() - out->position()); int rc = deflate(&zstr, Z_NO_FLUSH); out->position() = out->buffer().end() - zstr.avail_out; @@ -96,7 +96,7 @@ void ZlibDeflatingWriteBuffer::finalizeBefore() { out->nextIfAtEnd(); zstr.next_out = reinterpret_cast(out->position()); - zstr.avail_out = out->buffer().end() - out->position(); + zstr.avail_out = static_cast(out->buffer().end() - out->position()); int rc = deflate(&zstr, Z_FULL_FLUSH); out->position() = out->buffer().end() - zstr.avail_out; @@ -110,7 +110,7 @@ void ZlibDeflatingWriteBuffer::finalizeBefore() { out->nextIfAtEnd(); zstr.next_out = reinterpret_cast(out->position()); - zstr.avail_out = out->buffer().end() - out->position(); + zstr.avail_out = static_cast(out->buffer().end() - out->position()); int rc = deflate(&zstr, Z_FINISH); out->position() = out->buffer().end() - zstr.avail_out; diff --git a/src/IO/ZlibInflatingReadBuffer.cpp b/src/IO/ZlibInflatingReadBuffer.cpp index 4cb56bef6b1..9c2ee640cbe 100644 --- a/src/IO/ZlibInflatingReadBuffer.cpp +++ b/src/IO/ZlibInflatingReadBuffer.cpp @@ -61,11 +61,11 @@ bool ZlibInflatingReadBuffer::nextImpl() { in->nextIfAtEnd(); zstr.next_in = reinterpret_cast(in->position()); - zstr.avail_in = in->buffer().end() - in->position(); + zstr.avail_in = static_cast(in->buffer().end() - in->position()); } /// init output bytes (place, where decompressed data will be) zstr.next_out = reinterpret_cast(internal_buffer.begin()); - zstr.avail_out = internal_buffer.size(); + zstr.avail_out = static_cast(internal_buffer.size()); int rc = inflate(&zstr, Z_NO_FLUSH); diff --git a/src/IO/ZstdDeflatingAppendableWriteBuffer.cpp b/src/IO/ZstdDeflatingAppendableWriteBuffer.cpp index 459f486af18..79fb4ccead5 100644 --- a/src/IO/ZstdDeflatingAppendableWriteBuffer.cpp +++ b/src/IO/ZstdDeflatingAppendableWriteBuffer.cpp @@ -149,7 +149,7 @@ void ZstdDeflatingAppendableWriteBuffer::finalizeZstd() { try { - int err = ZSTD_freeCCtx(cctx); + size_t err = ZSTD_freeCCtx(cctx); /// This is just in case, since it is impossible to get an error by using this wrapper. if (unlikely(err)) throw Exception(ErrorCodes::ZSTD_ENCODER_FAILED, "ZSTD_freeCCtx failed: error: '{}'; zstd version: {}", ZSTD_getErrorName(err), ZSTD_VERSION_STRING); diff --git a/src/IO/ZstdDeflatingWriteBuffer.cpp b/src/IO/ZstdDeflatingWriteBuffer.cpp index 238645b16df..c7f9b0d718b 100644 --- a/src/IO/ZstdDeflatingWriteBuffer.cpp +++ b/src/IO/ZstdDeflatingWriteBuffer.cpp @@ -100,7 +100,7 @@ void ZstdDeflatingWriteBuffer::finalizeAfter() { try { - int err = ZSTD_freeCCtx(cctx); + size_t err = ZSTD_freeCCtx(cctx); /// This is just in case, since it is impossible to get an error by using this wrapper. if (unlikely(err)) throw Exception(ErrorCodes::ZSTD_ENCODER_FAILED, "ZSTD_freeCCtx failed: error: '{}'; zstd version: {}", ZSTD_getErrorName(err), ZSTD_VERSION_STRING); diff --git a/src/IO/examples/valid_utf8_perf.cpp b/src/IO/examples/valid_utf8_perf.cpp index b95cdb2c27c..f42251188d9 100644 --- a/src/IO/examples/valid_utf8_perf.cpp +++ b/src/IO/examples/valid_utf8_perf.cpp @@ -10,7 +10,7 @@ int main(int argc, char ** argv) { int repeats = 1; if (argc >= 2) - repeats = std::stol(argv[1]); + repeats = static_cast(std::stol(argv[1])); std::string text((std::istreambuf_iterator(std::cin)), std::istreambuf_iterator()); diff --git a/src/IO/examples/zlib_ng_bug.cpp b/src/IO/examples/zlib_ng_bug.cpp index 9fe3c961913..f7c3d1eeefe 100644 --- a/src/IO/examples/zlib_ng_bug.cpp +++ b/src/IO/examples/zlib_ng_bug.cpp @@ -23,9 +23,9 @@ int main(int, char **) throw std::runtime_error("Cannot deflateInit2"); zstr.next_in = in.data(); - zstr.avail_in = in.size(); + zstr.avail_in = static_cast(in.size()); zstr.next_out = out.data(); - zstr.avail_out = out.size(); + zstr.avail_out = static_cast(out.size()); while (zstr.avail_in > 0) if (Z_OK != deflate(&zstr, Z_NO_FLUSH)) diff --git a/src/IO/readDecimalText.h b/src/IO/readDecimalText.h index 2e06acb2f3e..9d7f8137136 100644 --- a/src/IO/readDecimalText.h +++ b/src/IO/readDecimalText.h @@ -106,7 +106,7 @@ inline bool readDigits(ReadBuffer & buf, T & x, uint32_t & digits, int32_t & exp exponent -= places; // TODO: accurate shift10 for big integers - x *= intExp10OfSize(places); + x *= intExp10OfSize(places); places = 0; x += (byte - '0'); @@ -147,23 +147,32 @@ inline bool readDigits(ReadBuffer & buf, T & x, uint32_t & digits, int32_t & exp return true; } -template -inline void readDecimalText(ReadBuffer & buf, T & x, uint32_t precision, uint32_t & scale, bool digits_only = false) +template +inline ReturnType readDecimalText(ReadBuffer & buf, T & x, uint32_t precision, uint32_t & scale, bool digits_only = false) { + static constexpr bool throw_exception = std::is_same_v; + uint32_t digits = precision; int32_t exponent; - readDigits(buf, x, digits, exponent, digits_only); + auto ok = readDigits(buf, x, digits, exponent, digits_only); + + if (!throw_exception && !ok) + return ReturnType(false); if (static_cast(digits) + exponent > static_cast(precision - scale)) { - static constexpr const char * pattern = - "Decimal value is too big: {} digits were read: {}e{}." - " Expected to read decimal with scale {} and precision {}"; + if constexpr (throw_exception) + { + static constexpr const char * pattern = "Decimal value is too big: {} digits were read: {}e{}." + " Expected to read decimal with scale {} and precision {}"; - if constexpr (is_big_int_v) - throw Exception(fmt::format(pattern, digits, x.value, exponent, scale, precision), ErrorCodes::ARGUMENT_OUT_OF_BOUND); + if constexpr (is_big_int_v) + throw Exception(fmt::format(pattern, digits, x.value, exponent, scale, precision), ErrorCodes::ARGUMENT_OUT_OF_BOUND); + else + throw Exception(fmt::format(pattern, digits, x, exponent, scale, precision), ErrorCodes::ARGUMENT_OUT_OF_BOUND); + } else - throw Exception(fmt::format(pattern, digits, x, exponent, scale, precision), ErrorCodes::ARGUMENT_OUT_OF_BOUND); + return ReturnType(false); } if (static_cast(scale) + exponent < 0) @@ -175,7 +184,7 @@ inline void readDecimalText(ReadBuffer & buf, T & x, uint32_t precision, uint32_ /// Too big negative exponent x.value = 0; scale = 0; - return; + return ReturnType(true); } else { @@ -184,26 +193,18 @@ inline void readDecimalText(ReadBuffer & buf, T & x, uint32_t precision, uint32_ assert(divisor > 0); /// This is for Clang Static Analyzer. It is not smart enough to infer it automatically. x.value /= divisor; scale = 0; - return; + return ReturnType(true); } } scale += exponent; + return ReturnType(true); } template inline bool tryReadDecimalText(ReadBuffer & buf, T & x, uint32_t precision, uint32_t & scale) { - uint32_t digits = precision; - int32_t exponent; - - if (!readDigits(buf, x, digits, exponent, true) || - static_cast(digits) + exponent > static_cast(precision - scale) || - static_cast(scale) + exponent < 0) - return false; - - scale += exponent; - return true; + return readDecimalText(buf, x, precision, scale, true); } template diff --git a/src/IO/readFloatText.h b/src/IO/readFloatText.h index b8d0c1ba3c0..a72ff82008e 100644 --- a/src/IO/readFloatText.h +++ b/src/IO/readFloatText.h @@ -349,11 +349,11 @@ ReturnType readFloatTextFastImpl(T & x, ReadBuffer & in) constexpr int significant_digits = std::numeric_limits::digits10; readUIntTextUpToNSignificantDigits(before_point, in); - int read_digits = in.count() - count_after_sign; + size_t read_digits = in.count() - count_after_sign; if (unlikely(read_digits > significant_digits)) { - int before_point_additional_exponent = read_digits - significant_digits; + int before_point_additional_exponent = static_cast(read_digits) - significant_digits; x = static_cast(shift10(before_point, before_point_additional_exponent)); } else @@ -377,11 +377,11 @@ ReturnType readFloatTextFastImpl(T & x, ReadBuffer & in) ++in.position(); auto after_leading_zeros_count = in.count(); - auto after_point_num_leading_zeros = after_leading_zeros_count - after_point_count; + int after_point_num_leading_zeros = static_cast(after_leading_zeros_count - after_point_count); readUIntTextUpToNSignificantDigits(after_point, in); read_digits = in.count() - after_leading_zeros_count; - after_point_exponent = (read_digits > significant_digits ? -significant_digits : -read_digits) - after_point_num_leading_zeros; + after_point_exponent = (read_digits > significant_digits ? -significant_digits : static_cast(-read_digits)) - after_point_num_leading_zeros; } if (checkChar('e', in) || checkChar('E', in)) diff --git a/src/IO/tests/gtest_memory_resize.cpp b/src/IO/tests/gtest_memory_resize.cpp index 8619419a47a..d760a948075 100644 --- a/src/IO/tests/gtest_memory_resize.cpp +++ b/src/IO/tests/gtest_memory_resize.cpp @@ -79,24 +79,24 @@ TEST(MemoryResizeTest, SmallInitAndSmallResize) memory.resize(1); ASSERT_TRUE(memory.m_data); - ASSERT_EQ(memory.m_capacity, 16); + ASSERT_EQ(memory.m_capacity, PADDING_FOR_SIMD); ASSERT_EQ(memory.m_size, 1); } { auto memory = Memory(1); ASSERT_TRUE(memory.m_data); - ASSERT_EQ(memory.m_capacity, 16); + ASSERT_EQ(memory.m_capacity, PADDING_FOR_SIMD); ASSERT_EQ(memory.m_size, 1); memory.resize(0); ASSERT_TRUE(memory.m_data); - ASSERT_EQ(memory.m_capacity, 16); + ASSERT_EQ(memory.m_capacity, PADDING_FOR_SIMD); ASSERT_EQ(memory.m_size, 0); memory.resize(1); ASSERT_TRUE(memory.m_data); - ASSERT_EQ(memory.m_capacity, 16); + ASSERT_EQ(memory.m_capacity, PADDING_FOR_SIMD); ASSERT_EQ(memory.m_size, 1); } } @@ -116,52 +116,52 @@ TEST(MemoryResizeTest, SmallInitAndBigResizeOverflowWhenPadding) memory.resize(1); ASSERT_TRUE(memory.m_data); - ASSERT_EQ(memory.m_capacity, 16); + ASSERT_EQ(memory.m_capacity, PADDING_FOR_SIMD); ASSERT_EQ(memory.m_size, 1); memory.resize(2); ASSERT_TRUE(memory.m_data); - ASSERT_EQ(memory.m_capacity, 17); + ASSERT_EQ(memory.m_capacity, PADDING_FOR_SIMD + 1); ASSERT_EQ(memory.m_size, 2); EXPECT_THROW_ERROR_CODE(memory.resize(std::numeric_limits::max()), Exception, ErrorCodes::ARGUMENT_OUT_OF_BOUND); ASSERT_TRUE(memory.m_data); // state is intact after exception - ASSERT_EQ(memory.m_capacity, 17); + ASSERT_EQ(memory.m_capacity, PADDING_FOR_SIMD + 1); ASSERT_EQ(memory.m_size, 2); - memory.resize(0x8000000000000000ULL-16); + memory.resize(0x8000000000000000ULL - PADDING_FOR_SIMD); ASSERT_TRUE(memory.m_data); ASSERT_EQ(memory.m_capacity, 0x8000000000000000ULL - 1); - ASSERT_EQ(memory.m_size, 0x8000000000000000ULL - 16); + ASSERT_EQ(memory.m_size, 0x8000000000000000ULL - PADDING_FOR_SIMD); #ifndef ABORT_ON_LOGICAL_ERROR - EXPECT_THROW_ERROR_CODE(memory.resize(0x8000000000000000ULL-15), Exception, ErrorCodes::LOGICAL_ERROR); + EXPECT_THROW_ERROR_CODE(memory.resize(0x8000000000000000ULL - (PADDING_FOR_SIMD - 1)), Exception, ErrorCodes::LOGICAL_ERROR); ASSERT_TRUE(memory.m_data); // state is intact after exception ASSERT_EQ(memory.m_capacity, 0x8000000000000000ULL - 1); - ASSERT_EQ(memory.m_size, 0x8000000000000000ULL - 16); + ASSERT_EQ(memory.m_size, 0x8000000000000000ULL - PADDING_FOR_SIMD); #endif } { auto memory = Memory(1); ASSERT_TRUE(memory.m_data); - ASSERT_EQ(memory.m_capacity, 16); + ASSERT_EQ(memory.m_capacity, PADDING_FOR_SIMD); ASSERT_EQ(memory.m_size, 1); EXPECT_THROW_ERROR_CODE(memory.resize(std::numeric_limits::max()), Exception, ErrorCodes::ARGUMENT_OUT_OF_BOUND); ASSERT_TRUE(memory.m_data); // state is intact after exception - ASSERT_EQ(memory.m_capacity, 16); + ASSERT_EQ(memory.m_capacity, PADDING_FOR_SIMD); ASSERT_EQ(memory.m_size, 1); memory.resize(1); ASSERT_TRUE(memory.m_data); - ASSERT_EQ(memory.m_capacity, 16); + ASSERT_EQ(memory.m_capacity, PADDING_FOR_SIMD); ASSERT_EQ(memory.m_size, 1); #ifndef ABORT_ON_LOGICAL_ERROR - EXPECT_THROW_ERROR_CODE(memory.resize(0x8000000000000000ULL-15), Exception, ErrorCodes::LOGICAL_ERROR); + EXPECT_THROW_ERROR_CODE(memory.resize(0x8000000000000000ULL - (PADDING_FOR_SIMD - 1)), Exception, ErrorCodes::LOGICAL_ERROR); ASSERT_TRUE(memory.m_data); // state is intact after exception - ASSERT_EQ(memory.m_capacity, 16); + ASSERT_EQ(memory.m_capacity, PADDING_FOR_SIMD); ASSERT_EQ(memory.m_size, 1); #endif } @@ -201,7 +201,7 @@ TEST(MemoryResizeTest, BigInitAndSmallResizeOverflowWhenPadding) { EXPECT_THROW_ERROR_CODE( { - auto memory = Memory(std::numeric_limits::max() - 15); + auto memory = Memory(std::numeric_limits::max() - (PADDING_FOR_SIMD - 1)); } , Exception , ErrorCodes::LOGICAL_ERROR); @@ -210,7 +210,7 @@ TEST(MemoryResizeTest, BigInitAndSmallResizeOverflowWhenPadding) { EXPECT_THROW_ERROR_CODE( { - auto memory = Memory(0x8000000000000000ULL - 15); + auto memory = Memory(0x8000000000000000ULL - (PADDING_FOR_SIMD - 1)); } , Exception , ErrorCodes::LOGICAL_ERROR); @@ -218,10 +218,10 @@ TEST(MemoryResizeTest, BigInitAndSmallResizeOverflowWhenPadding) #endif { - auto memory = Memory(0x8000000000000000ULL - 16); - ASSERT_TRUE(memory.m_data); - ASSERT_EQ(memory.m_capacity, 0x8000000000000000ULL - 1); - ASSERT_EQ(memory.m_size, 0x8000000000000000ULL - 16); + auto memory = Memory(0x8000000000000000ULL - PADDING_FOR_SIMD); + ASSERT_TRUE(memory.m_data); + ASSERT_EQ(memory.m_capacity, 0x8000000000000000ULL - 1); + ASSERT_EQ(memory.m_size, 0x8000000000000000ULL - PADDING_FOR_SIMD); memory.resize(1); ASSERT_TRUE(memory.m_data); @@ -240,32 +240,32 @@ TEST(MemoryResizeTest, AlignmentWithRealAllocator) memory.resize(1); ASSERT_TRUE(memory.m_data); - ASSERT_EQ(memory.m_capacity, 16); + ASSERT_EQ(memory.m_capacity, PADDING_FOR_SIMD); ASSERT_EQ(memory.m_size, 1); memory.resize(2); ASSERT_TRUE(memory.m_data); - ASSERT_EQ(memory.m_capacity, 17); + ASSERT_EQ(memory.m_capacity, PADDING_FOR_SIMD + 1); ASSERT_EQ(memory.m_size, 2); memory.resize(3); ASSERT_TRUE(memory.m_data); - ASSERT_EQ(memory.m_capacity, 18); + ASSERT_EQ(memory.m_capacity, PADDING_FOR_SIMD + 2); ASSERT_EQ(memory.m_size, 3); memory.resize(4); ASSERT_TRUE(memory.m_data); - ASSERT_EQ(memory.m_capacity, 19); + ASSERT_EQ(memory.m_capacity, PADDING_FOR_SIMD + 3); ASSERT_EQ(memory.m_size, 4); memory.resize(0); ASSERT_TRUE(memory.m_data); - ASSERT_EQ(memory.m_capacity, 19); + ASSERT_EQ(memory.m_capacity, PADDING_FOR_SIMD + 3); ASSERT_EQ(memory.m_size, 0); memory.resize(1); ASSERT_TRUE(memory.m_data); - ASSERT_EQ(memory.m_capacity, 19); + ASSERT_EQ(memory.m_capacity, PADDING_FOR_SIMD + 3); ASSERT_EQ(memory.m_size, 1); } @@ -291,12 +291,12 @@ TEST(MemoryResizeTest, AlignmentWithRealAllocator) memory.resize(1); ASSERT_TRUE(memory.m_data); - ASSERT_EQ(memory.m_capacity, 16); + ASSERT_EQ(memory.m_capacity, PADDING_FOR_SIMD); ASSERT_EQ(memory.m_size, 1); memory.resize(32); ASSERT_TRUE(memory.m_data); - ASSERT_EQ(memory.m_capacity, 47); + ASSERT_EQ(memory.m_capacity, PADDING_FOR_SIMD + 31); ASSERT_EQ(memory.m_size, 32); } } @@ -316,13 +316,12 @@ TEST(MemoryResizeTest, SomeAlignmentOverflowWhenAlignment) memory.resize(1); ASSERT_TRUE(memory.m_data); - ASSERT_EQ(memory.m_capacity, 16); + ASSERT_EQ(memory.m_capacity, PADDING_FOR_SIMD); ASSERT_EQ(memory.m_size, 1); EXPECT_THROW_ERROR_CODE(memory.resize(std::numeric_limits::max()), Exception, ErrorCodes::ARGUMENT_OUT_OF_BOUND); ASSERT_TRUE(memory.m_data); // state is intact after exception - ASSERT_EQ(memory.m_capacity, 16); + ASSERT_EQ(memory.m_capacity, PADDING_FOR_SIMD); ASSERT_EQ(memory.m_size, 1); } - } diff --git a/src/Interpreters/ActionsVisitor.cpp b/src/Interpreters/ActionsVisitor.cpp index 980207c7f9c..9a0d33b19fc 100644 --- a/src/Interpreters/ActionsVisitor.cpp +++ b/src/Interpreters/ActionsVisitor.cpp @@ -52,7 +52,7 @@ #include #include #include -#include +#include namespace DB diff --git a/src/Interpreters/ActionsVisitor.h b/src/Interpreters/ActionsVisitor.h index a27745d2cfa..fea013fd075 100644 --- a/src/Interpreters/ActionsVisitor.h +++ b/src/Interpreters/ActionsVisitor.h @@ -140,7 +140,7 @@ public: * when we add lots of column with same prefix. One counter for all * prefixes is good enough. */ - int next_unique_suffix; + size_t next_unique_suffix; Data( ContextPtr context_, diff --git a/src/Interpreters/Aggregator.cpp b/src/Interpreters/Aggregator.cpp index b5d15b0927b..c38006af975 100644 --- a/src/Interpreters/Aggregator.cpp +++ b/src/Interpreters/Aggregator.cpp @@ -811,6 +811,11 @@ AggregatedDataVariants::Type Aggregator::chooseAggregationMethod() return AggregatedDataVariants::Type::low_cardinality_key32; if (size_of_field == 8) return AggregatedDataVariants::Type::low_cardinality_key64; + if (size_of_field == 16) + return AggregatedDataVariants::Type::low_cardinality_keys128; + if (size_of_field == 32) + return AggregatedDataVariants::Type::low_cardinality_keys256; + throw Exception("Logical error: low cardinality numeric column has sizeOfField not in 1, 2, 4, 8, 16, 32.", ErrorCodes::LOGICAL_ERROR); } if (size_of_field == 1) @@ -1633,14 +1638,14 @@ Block Aggregator::convertOneBucketToBlock( Method & method, Arena * arena, bool final, - size_t bucket) const + Int32 bucket) const { // Used in ConvertingAggregatedToChunksSource -> ConvertingAggregatedToChunksTransform (expects single chunk for each bucket_id). constexpr bool return_single_block = true; Block block = convertToBlockImpl( method, method.data.impls[bucket], arena, data_variants.aggregates_pools, final, method.data.impls[bucket].size()); - block.info.bucket_num = bucket; + block.info.bucket_num = static_cast(bucket); return block; } @@ -1648,7 +1653,7 @@ Block Aggregator::mergeAndConvertOneBucketToBlock( ManyAggregatedDataVariants & variants, Arena * arena, bool final, - size_t bucket, + Int32 bucket, std::atomic * is_cancelled) const { auto & merged_data = *variants[0]; @@ -1692,7 +1697,7 @@ void Aggregator::writeToTemporaryFileImpl( max_temporary_block_size_bytes = block_size_bytes; }; - for (size_t bucket = 0; bucket < Method::Data::NUM_BUCKETS; ++bucket) + for (UInt32 bucket = 0; bucket < Method::Data::NUM_BUCKETS; ++bucket) { Block block = convertOneBucketToBlock(data_variants, method, data_variants.aggregates_pool, false, bucket); out.write(block); @@ -2632,7 +2637,7 @@ void NO_INLINE Aggregator::mergeBucketImpl( ManyAggregatedDataVariants Aggregator::prepareVariantsToMerge(ManyAggregatedDataVariants & data_variants) const { if (data_variants.empty()) - throw Exception("Empty data passed to Aggregator::mergeAndConvertToBlocks.", ErrorCodes::EMPTY_DATA_PASSED); + throw Exception("Empty data passed to Aggregator::prepareVariantsToMerge.", ErrorCodes::EMPTY_DATA_PASSED); LOG_TRACE(log, "Merging aggregated data"); @@ -3154,19 +3159,19 @@ void NO_INLINE Aggregator::convertBlockToTwoLevelImpl( selector[i] = bucket; } - size_t num_buckets = destinations.size(); + UInt32 num_buckets = static_cast(destinations.size()); for (size_t column_idx = 0; column_idx < columns; ++column_idx) { const ColumnWithTypeAndName & src_col = source.getByPosition(column_idx); MutableColumns scattered_columns = src_col.column->scatter(num_buckets, selector); - for (size_t bucket = 0, size = num_buckets; bucket < size; ++bucket) + for (UInt32 bucket = 0, size = num_buckets; bucket < size; ++bucket) { if (!scattered_columns[bucket]->empty()) { Block & dst = destinations[bucket]; - dst.info.bucket_num = bucket; + dst.info.bucket_num = static_cast(bucket); dst.insert({std::move(scattered_columns[bucket]), src_col.type, src_col.name}); } diff --git a/src/Interpreters/Aggregator.h b/src/Interpreters/Aggregator.h index b8aab7a3343..c81cfa2c0a2 100644 --- a/src/Interpreters/Aggregator.h +++ b/src/Interpreters/Aggregator.h @@ -1301,13 +1301,13 @@ private: Method & method, Arena * arena, bool final, - size_t bucket) const; + Int32 bucket) const; Block mergeAndConvertOneBucketToBlock( ManyAggregatedDataVariants & variants, Arena * arena, bool final, - size_t bucket, + Int32 bucket, std::atomic * is_cancelled = nullptr) const; Block prepareBlockAndFillWithoutKey(AggregatedDataVariants & data_variants, bool final, bool is_overflows) const; diff --git a/src/Interpreters/AsynchronousInsertQueue.cpp b/src/Interpreters/AsynchronousInsertQueue.cpp index 3aadea918fb..bf85affcb90 100644 --- a/src/Interpreters/AsynchronousInsertQueue.cpp +++ b/src/Interpreters/AsynchronousInsertQueue.cpp @@ -37,6 +37,7 @@ namespace ProfileEvents { extern const Event AsyncInsertQuery; extern const Event AsyncInsertBytes; + extern const Event FailedAsyncInsertQuery; } namespace DB @@ -101,6 +102,8 @@ void AsynchronousInsertQueue::InsertData::Entry::finish(std::exception_ptr excep { std::lock_guard lock(mutex); finished = true; + if (exception_) + ProfileEvents::increment(ProfileEvents::FailedAsyncInsertQuery, 1); exception = exception_; cv.notify_all(); } diff --git a/src/Interpreters/AsynchronousMetrics.cpp b/src/Interpreters/AsynchronousMetrics.cpp index 338ae1bbbfd..488ac77e956 100644 --- a/src/Interpreters/AsynchronousMetrics.cpp +++ b/src/Interpreters/AsynchronousMetrics.cpp @@ -703,19 +703,26 @@ void AsynchronousMetrics::update(TimePoint update_time) Int64 free_memory_in_allocator_arenas = 0; #if USE_JEMALLOC - /// This is a memory which is kept by allocator. - /// Will subsract it from RSS to decrease memory drift. + /// According to jemalloc man, pdirty is: + /// + /// Number of pages within unused extents that are potentially + /// dirty, and for which madvise() or similar has not been called. + /// + /// So they will be subtracted from RSS to make accounting more + /// accurate, since those pages are not really RSS but a memory + /// that can be used at anytime via jemalloc. free_memory_in_allocator_arenas = je_malloc_pdirty * getPageSize(); #endif - Int64 difference = rss - free_memory_in_allocator_arenas - amount; + Int64 difference = rss - amount; /// Log only if difference is high. This is for convenience. The threshold is arbitrary. if (difference >= 1048576 || difference <= -1048576) LOG_TRACE(log, - "MemoryTracking: was {}, peak {}, will set to {} (RSS), difference: {}", + "MemoryTracking: was {}, peak {}, free memory in arenas {}, will set to {} (RSS), difference: {}", ReadableSize(amount), ReadableSize(peak), + ReadableSize(free_memory_in_allocator_arenas), ReadableSize(rss), ReadableSize(difference)); diff --git a/src/Interpreters/Cache/FileCache.cpp b/src/Interpreters/Cache/FileCache.cpp index 20a9f6cce1d..72fa1b3c324 100644 --- a/src/Interpreters/Cache/FileCache.cpp +++ b/src/Interpreters/Cache/FileCache.cpp @@ -32,6 +32,8 @@ FileCache::FileCache( , allow_persistent_files(cache_settings_.do_not_evict_index_and_mark_files) , enable_cache_hits_threshold(cache_settings_.enable_cache_hits_threshold) , enable_filesystem_query_cache_limit(cache_settings_.enable_filesystem_query_cache_limit) + , enable_bypass_cache_with_threashold(cache_settings_.enable_bypass_cache_with_threashold) + , bypass_cache_threashold(cache_settings_.bypass_cache_threashold) , log(&Poco::Logger::get("FileCache")) , main_priority(std::make_unique()) , stash_priority(std::make_unique()) @@ -185,6 +187,20 @@ FileSegments FileCache::getImpl( /// Given range = [left, right] and non-overlapping ordered set of file segments, /// find list [segment1, ..., segmentN] of segments which intersect with given range. + FileSegments result; + + if (enable_bypass_cache_with_threashold && (range.size() > bypass_cache_threashold)) + { + auto file_segment = std::make_shared( + range.left, range.size(), key, this, FileSegment::State::SKIP_CACHE, CreateFileSegmentSettings{}); + { + std::unique_lock segment_lock(file_segment->mutex); + file_segment->detachAssumeStateFinalized(segment_lock); + } + result.emplace_back(file_segment); + return result; + } + auto it = files.find(key); if (it == files.end()) return {}; @@ -197,7 +213,6 @@ FileSegments FileCache::getImpl( return {}; } - FileSegments result; auto segment_it = file_segments.lower_bound(range.left); if (segment_it == file_segments.end()) { @@ -392,7 +407,6 @@ FileSegmentsHolder FileCache::getOrSet(const Key & key, size_t offset, size_t si #endif FileSegment::Range range(offset, offset + size - 1); - /// Get all segments which intersect with the given range. auto file_segments = getImpl(key, range, cache_lock); @@ -404,7 +418,6 @@ FileSegmentsHolder FileCache::getOrSet(const Key & key, size_t offset, size_t si { fillHolesWithEmptyFileSegments(file_segments, key, range, /* fill_with_detached */false, settings, cache_lock); } - assert(!file_segments.empty()); return FileSegmentsHolder(std::move(file_segments)); } diff --git a/src/Interpreters/Cache/FileCache.h b/src/Interpreters/Cache/FileCache.h index 07aea230803..706762b6915 100644 --- a/src/Interpreters/Cache/FileCache.h +++ b/src/Interpreters/Cache/FileCache.h @@ -140,6 +140,9 @@ private: const size_t enable_cache_hits_threshold; const bool enable_filesystem_query_cache_limit; + const bool enable_bypass_cache_with_threashold; + const size_t bypass_cache_threashold; + mutable std::mutex mutex; Poco::Logger * log; diff --git a/src/Interpreters/Cache/FileCacheSettings.cpp b/src/Interpreters/Cache/FileCacheSettings.cpp index 4b8d806bb53..b13cdd2ed04 100644 --- a/src/Interpreters/Cache/FileCacheSettings.cpp +++ b/src/Interpreters/Cache/FileCacheSettings.cpp @@ -35,6 +35,13 @@ void FileCacheSettings::loadFromConfig(const Poco::Util::AbstractConfiguration & enable_filesystem_query_cache_limit = config.getUInt64(config_prefix + ".enable_filesystem_query_cache_limit", false); enable_cache_hits_threshold = config.getUInt64(config_prefix + ".enable_cache_hits_threshold", REMOTE_FS_OBJECTS_CACHE_ENABLE_HITS_THRESHOLD); + enable_bypass_cache_with_threashold = config.getUInt64(config_prefix + ".enable_bypass_cache_with_threashold", false); + + if (config.has(config_prefix + ".bypass_cache_threashold")) + bypass_cache_threashold = parseWithSizeSuffix(config.getString(config_prefix + ".bypass_cache_threashold")); + else + bypass_cache_threashold = REMOTE_FS_OBJECTS_CACHE_BYPASS_THRESHOLD; + do_not_evict_index_and_mark_files = config.getUInt64(config_prefix + ".do_not_evict_index_and_mark_files", false); } diff --git a/src/Interpreters/Cache/FileCacheSettings.h b/src/Interpreters/Cache/FileCacheSettings.h index c6155edad85..80f7b5fa93f 100644 --- a/src/Interpreters/Cache/FileCacheSettings.h +++ b/src/Interpreters/Cache/FileCacheSettings.h @@ -20,6 +20,9 @@ struct FileCacheSettings bool do_not_evict_index_and_mark_files = true; + bool enable_bypass_cache_with_threashold = false; + size_t bypass_cache_threashold = REMOTE_FS_OBJECTS_CACHE_BYPASS_THRESHOLD; + void loadFromConfig(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix); }; diff --git a/src/Interpreters/Cache/FileCache_fwd.h b/src/Interpreters/Cache/FileCache_fwd.h index 25c16b4e840..72dc1144fb9 100644 --- a/src/Interpreters/Cache/FileCache_fwd.h +++ b/src/Interpreters/Cache/FileCache_fwd.h @@ -7,6 +7,7 @@ namespace DB static constexpr int REMOTE_FS_OBJECTS_CACHE_DEFAULT_MAX_FILE_SEGMENT_SIZE = 100 * 1024 * 1024; static constexpr int REMOTE_FS_OBJECTS_CACHE_DEFAULT_MAX_ELEMENTS = 1024 * 1024; static constexpr int REMOTE_FS_OBJECTS_CACHE_ENABLE_HITS_THRESHOLD = 0; +static constexpr size_t REMOTE_FS_OBJECTS_CACHE_BYPASS_THRESHOLD = 256 * 1024 * 1024;; class FileCache; using FileCachePtr = std::shared_ptr; diff --git a/src/Interpreters/Cache/FileSegment.cpp b/src/Interpreters/Cache/FileSegment.cpp index cf48c5cd976..418bcee05d9 100644 --- a/src/Interpreters/Cache/FileSegment.cpp +++ b/src/Interpreters/Cache/FileSegment.cpp @@ -66,7 +66,7 @@ FileSegment::FileSegment( { throw Exception( ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR, - "Can create cell with either EMPTY, DOWNLOADED, DOWNLOADING state"); + "Can only create cell with either EMPTY, DOWNLOADED or SKIP_CACHE state"); } } } diff --git a/src/Interpreters/Cache/FileSegment.h b/src/Interpreters/Cache/FileSegment.h index 617e7173c2f..8f9c0097d77 100644 --- a/src/Interpreters/Cache/FileSegment.h +++ b/src/Interpreters/Cache/FileSegment.h @@ -66,10 +66,10 @@ public: */ DOWNLOADING, /** - * Space reservation for a file segment is incremental, i.e. downaloder reads buffer_size bytes + * Space reservation for a file segment is incremental, i.e. downloader reads buffer_size bytes * from remote fs -> tries to reserve buffer_size bytes to put them to cache -> writes to cache * on successful reservation and stops cache write otherwise. Those, who waited for the same file - * file segment, will read downloaded part from cache and remaining part directly from remote fs. + * segment, will read downloaded part from cache and remaining part directly from remote fs. */ PARTIALLY_DOWNLOADED_NO_CONTINUATION, /** diff --git a/src/Interpreters/ClientInfo.h b/src/Interpreters/ClientInfo.h index a1096b99325..f7a172b226d 100644 --- a/src/Interpreters/ClientInfo.h +++ b/src/Interpreters/ClientInfo.h @@ -69,6 +69,7 @@ public: Interface interface = Interface::TCP; bool is_secure = false; + String certificate; /// For tcp String os_user; diff --git a/src/Interpreters/Cluster.cpp b/src/Interpreters/Cluster.cpp index 6877c0ece06..b76434b23e7 100644 --- a/src/Interpreters/Cluster.cpp +++ b/src/Interpreters/Cluster.cpp @@ -423,7 +423,7 @@ Cluster::Cluster(const Poco::Util::AbstractConfiguration & config, info.all_addresses.push_back(address); auto pool = ConnectionPoolFactory::instance().get( - settings.distributed_connections_pool_size, + static_cast(settings.distributed_connections_pool_size), address.host_name, address.port, address.default_database, address.user, address.password, address.quota_key, address.cluster, address.cluster_secret, @@ -497,7 +497,7 @@ Cluster::Cluster(const Poco::Util::AbstractConfiguration & config, for (const auto & replica : replica_addresses) { auto replica_pool = ConnectionPoolFactory::instance().get( - settings.distributed_connections_pool_size, + static_cast(settings.distributed_connections_pool_size), replica.host_name, replica.port, replica.default_database, replica.user, replica.password, replica.quota_key, replica.cluster, replica.cluster_secret, @@ -585,11 +585,11 @@ Cluster::Cluster( for (const auto & replica : current) { auto replica_pool = ConnectionPoolFactory::instance().get( - settings.distributed_connections_pool_size, - replica.host_name, replica.port, - replica.default_database, replica.user, replica.password, replica.quota_key, - replica.cluster, replica.cluster_secret, - "server", replica.compression, replica.secure, replica.priority); + static_cast(settings.distributed_connections_pool_size), + replica.host_name, replica.port, + replica.default_database, replica.user, replica.password, replica.quota_key, + replica.cluster, replica.cluster_secret, + "server", replica.compression, replica.secure, replica.priority); all_replicas.emplace_back(replica_pool); if (replica.is_local && !treat_local_as_remote) shard_local_addresses.push_back(replica); @@ -693,7 +693,7 @@ Cluster::Cluster(Cluster::ReplicasAsShardsTag, const Cluster & from, const Setti info.all_addresses.push_back(address); auto pool = ConnectionPoolFactory::instance().get( - settings.distributed_connections_pool_size, + static_cast(settings.distributed_connections_pool_size), address.host_name, address.port, address.default_database, diff --git a/src/Interpreters/ClusterDiscovery.cpp b/src/Interpreters/ClusterDiscovery.cpp index 88d7cedec83..36b2f17e8a1 100644 --- a/src/Interpreters/ClusterDiscovery.cpp +++ b/src/Interpreters/ClusterDiscovery.cpp @@ -217,7 +217,7 @@ bool ClusterDiscovery::needUpdate(const Strings & node_uuids, const NodesInfo & ClusterPtr ClusterDiscovery::makeCluster(const ClusterInfo & cluster_info) { - std::vector> shards; + std::vector shards; { std::map replica_adresses; @@ -244,7 +244,7 @@ ClusterPtr ClusterDiscovery::makeCluster(const ClusterInfo & cluster_info) /* password= */ "", /* clickhouse_port= */ secure ? context->getTCPPortSecure().value_or(DBMS_DEFAULT_SECURE_PORT) : context->getTCPPort(), /* treat_local_as_remote= */ false, - /* treat_local_port_as_remote= */ context->getApplicationType() == Context::ApplicationType::LOCAL, + /* treat_local_port_as_remote= */ false, /// should be set only for clickhouse-local, but cluster discovery is not used there /* secure= */ secure); return cluster; } diff --git a/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp b/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp index fce2e9b2f08..4653491aac9 100644 --- a/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp +++ b/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp @@ -69,7 +69,7 @@ void SelectStreamFactory::createForShard( query_ast, header, context, processed_stage, shard_info.shard_num, shard_count, /*replica_num=*/0, /*replica_count=*/0, /*coordinator=*/nullptr)); }; - auto emplace_remote_stream = [&](bool lazy = false, UInt32 local_delay = 0) + auto emplace_remote_stream = [&](bool lazy = false, time_t local_delay = 0) { remote_shards.emplace_back(Shard{ .query = query_ast, @@ -131,7 +131,7 @@ void SelectStreamFactory::createForShard( return; } - UInt32 local_delay = replicated_storage->getAbsoluteDelay(); + UInt64 local_delay = replicated_storage->getAbsoluteDelay(); if (local_delay < max_allowed_delay) { @@ -205,7 +205,7 @@ SelectStreamFactory::ShardPlans SelectStreamFactory::createForShardWithParallelR if (!max_allowed_delay) return false; - UInt32 local_delay = replicated_storage->getAbsoluteDelay(); + UInt64 local_delay = replicated_storage->getAbsoluteDelay(); return local_delay >= max_allowed_delay; }; diff --git a/src/Interpreters/ClusterProxy/SelectStreamFactory.h b/src/Interpreters/ClusterProxy/SelectStreamFactory.h index 440017a8e80..8ebddea4988 100644 --- a/src/Interpreters/ClusterProxy/SelectStreamFactory.h +++ b/src/Interpreters/ClusterProxy/SelectStreamFactory.h @@ -46,7 +46,7 @@ public: /// If we connect to replicas lazily. /// (When there is a local replica with big delay). bool lazy = false; - UInt32 local_delay = 0; + time_t local_delay = 0; }; using Shards = std::vector; diff --git a/src/Interpreters/ClusterProxy/executeQuery.cpp b/src/Interpreters/ClusterProxy/executeQuery.cpp index d974721627e..e9ec38f3806 100644 --- a/src/Interpreters/ClusterProxy/executeQuery.cpp +++ b/src/Interpreters/ClusterProxy/executeQuery.cpp @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -26,7 +27,7 @@ namespace ErrorCodes namespace ClusterProxy { -ContextMutablePtr updateSettingsForCluster(const Cluster & cluster, ContextPtr context, const Settings & settings, Poco::Logger * log) +ContextMutablePtr updateSettingsForCluster(const Cluster & cluster, ContextPtr context, const Settings & settings, const StorageID & main_table, const SelectQueryInfo * query_info, Poco::Logger * log) { Settings new_settings = settings; new_settings.queue_max_wait_ms = Cluster::saturate(new_settings.queue_max_wait_ms, settings.max_execution_time); @@ -96,6 +97,20 @@ ContextMutablePtr updateSettingsForCluster(const Cluster & cluster, ContextPtr c new_settings.limit.changed = false; } + /// Setting additional_table_filters may be applied to Distributed table. + /// In case if query is executed up to WithMergableState on remote shard, it is impossible to filter on initiator. + /// We need to propagate the setting, but change the table name from distributed to source. + /// + /// Here we don't try to analyze setting again. In case if query_info->additional_filter_ast is not empty, some filter was applied. + /// It's just easier to add this filter for a source table. + if (query_info && query_info->additional_filter_ast) + { + Tuple tuple; + tuple.push_back(main_table.getShortName()); + tuple.push_back(queryToString(query_info->additional_filter_ast)); + new_settings.additional_table_filters.value.push_back(std::move(tuple)); + } + auto new_context = Context::createCopy(context); new_context->setSettings(new_settings); return new_context; @@ -121,12 +136,12 @@ void executeQuery( std::vector plans; SelectStreamFactory::Shards remote_shards; - auto new_context = updateSettingsForCluster(*query_info.getCluster(), context, settings, log); + auto new_context = updateSettingsForCluster(*query_info.getCluster(), context, settings, main_table, &query_info, log); new_context->getClientInfo().distributed_depth += 1; ThrottlerPtr user_level_throttler; - if (auto * process_list_element = context->getProcessListElement()) + if (auto process_list_element = context->getProcessListElement()) user_level_throttler = process_list_element->getUserNetworkThrottler(); /// Network bandwidth limit, if needed. @@ -165,7 +180,7 @@ void executeQuery( stream_factory.createForShard(shard_info, query_ast_for_shard, main_table, table_func_ptr, - new_context, plans, remote_shards, shards); + new_context, plans, remote_shards, static_cast(shards)); } if (!remote_shards.empty()) @@ -228,7 +243,7 @@ void executeQueryWithParallelReplicas( const Settings & settings = context->getSettingsRef(); ThrottlerPtr user_level_throttler; - if (auto * process_list_element = context->getProcessListElement()) + if (auto process_list_element = context->getProcessListElement()) user_level_throttler = process_list_element->getUserNetworkThrottler(); /// Network bandwidth limit, if needed. @@ -269,7 +284,8 @@ void executeQueryWithParallelReplicas( query_ast_for_shard = query_ast; auto shard_plans = stream_factory.createForShardWithParallelReplicas(shard_info, - query_ast_for_shard, main_table, table_func_ptr, throttler, context, shards, query_info.storage_limits); + query_ast_for_shard, main_table, table_func_ptr, throttler, context, + static_cast(shards), query_info.storage_limits); if (!shard_plans.local_plan && !shard_plans.remote_plan) throw Exception(ErrorCodes::LOGICAL_ERROR, "No plans were generated for reading from shard. This is a bug"); diff --git a/src/Interpreters/ClusterProxy/executeQuery.h b/src/Interpreters/ClusterProxy/executeQuery.h index 1a5035015a7..ac88752ce74 100644 --- a/src/Interpreters/ClusterProxy/executeQuery.h +++ b/src/Interpreters/ClusterProxy/executeQuery.h @@ -35,7 +35,7 @@ class SelectStreamFactory; /// /// @return new Context with adjusted settings ContextMutablePtr updateSettingsForCluster( - const Cluster & cluster, ContextPtr context, const Settings & settings, Poco::Logger * log = nullptr); + const Cluster & cluster, ContextPtr context, const Settings & settings, const StorageID & main_table, const SelectQueryInfo * query_info = nullptr, Poco::Logger * log = nullptr); /// Execute a distributed query, creating a query plan, from which the query pipeline can be built. /// `stream_factory` object encapsulates the logic of creating plans for a different type of query diff --git a/src/Interpreters/ConcurrentHashJoin.cpp b/src/Interpreters/ConcurrentHashJoin.cpp index b7ad56dca91..cc79a71245b 100644 --- a/src/Interpreters/ConcurrentHashJoin.cpp +++ b/src/Interpreters/ConcurrentHashJoin.cpp @@ -38,7 +38,7 @@ static UInt32 toPowerOfTwo(UInt32 x) ConcurrentHashJoin::ConcurrentHashJoin(ContextPtr context_, std::shared_ptr table_join_, size_t slots_, const Block & right_sample_block, bool any_take_last_row_) : context(context_) , table_join(table_join_) - , slots(toPowerOfTwo(std::min(slots_, 256))) + , slots(toPowerOfTwo(std::min(static_cast(slots_), 256))) { for (size_t i = 0; i < slots; ++i) { diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index b08c2bab81c..9c949a17e64 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -57,7 +57,9 @@ #include #include #include -#include +#include +#include +#include #include #include #include @@ -186,7 +188,6 @@ struct ContextSharedPart : boost::noncopyable String user_files_path; /// Path to the directory with user provided files, usable by 'file' table function. String dictionaries_lib_path; /// Path to the directory with user provided binaries and libraries for external dictionaries. String user_scripts_path; /// Path to the directory with user provided scripts. - String user_defined_path; /// Path to the directory with user defined objects. ConfigurationPtr config; /// Global configuration settings. String tmp_path; /// Path to the temporary files that occur when processing the request. @@ -194,16 +195,18 @@ struct ContextSharedPart : boost::noncopyable mutable std::unique_ptr embedded_dictionaries; /// Metrica's dictionaries. Have lazy initialization. mutable std::unique_ptr external_dictionaries_loader; - mutable std::unique_ptr external_user_defined_executable_functions_loader; scope_guard models_repository_guard; ExternalLoaderXMLConfigRepository * external_dictionaries_config_repository = nullptr; scope_guard dictionaries_xmls; + mutable std::unique_ptr external_user_defined_executable_functions_loader; ExternalLoaderXMLConfigRepository * user_defined_executable_functions_config_repository = nullptr; scope_guard user_defined_executable_functions_xmls; + mutable std::unique_ptr user_defined_sql_objects_loader; + #if USE_NLP mutable std::optional synonyms_extensions; mutable std::optional lemmatizers; @@ -420,6 +423,8 @@ struct ContextSharedPart : boost::noncopyable external_dictionaries_loader->enablePeriodicUpdates(false); if (external_user_defined_executable_functions_loader) external_user_defined_executable_functions_loader->enablePeriodicUpdates(false); + if (user_defined_sql_objects_loader) + user_defined_sql_objects_loader->stopWatching(); Session::shutdownNamedSessions(); @@ -450,6 +455,7 @@ struct ContextSharedPart : boost::noncopyable std::unique_ptr delete_embedded_dictionaries; std::unique_ptr delete_external_dictionaries_loader; std::unique_ptr delete_external_user_defined_executable_functions_loader; + std::unique_ptr delete_user_defined_sql_objects_loader; std::unique_ptr delete_buffer_flush_schedule_pool; std::unique_ptr delete_schedule_pool; std::unique_ptr delete_distributed_schedule_pool; @@ -457,6 +463,18 @@ struct ContextSharedPart : boost::noncopyable std::unique_ptr delete_ddl_worker; std::unique_ptr delete_access_control; + /// Delete DDLWorker before zookeeper. + /// Cause it can call Context::getZooKeeper and resurrect it. + + { + auto lock = std::lock_guard(mutex); + delete_ddl_worker = std::move(ddl_worker); + } + + /// DDLWorker should be deleted without lock, cause its internal thread can + /// take it as well, which will cause deadlock. + delete_ddl_worker.reset(); + { auto lock = std::lock_guard(mutex); @@ -488,11 +506,11 @@ struct ContextSharedPart : boost::noncopyable delete_embedded_dictionaries = std::move(embedded_dictionaries); delete_external_dictionaries_loader = std::move(external_dictionaries_loader); delete_external_user_defined_executable_functions_loader = std::move(external_user_defined_executable_functions_loader); + delete_user_defined_sql_objects_loader = std::move(user_defined_sql_objects_loader); delete_buffer_flush_schedule_pool = std::move(buffer_flush_schedule_pool); delete_schedule_pool = std::move(schedule_pool); delete_distributed_schedule_pool = std::move(distributed_schedule_pool); delete_message_broker_schedule_pool = std::move(message_broker_schedule_pool); - delete_ddl_worker = std::move(ddl_worker); delete_access_control = std::move(access_control); /// Stop trace collector if any @@ -515,12 +533,12 @@ struct ContextSharedPart : boost::noncopyable delete_embedded_dictionaries.reset(); delete_external_dictionaries_loader.reset(); delete_external_user_defined_executable_functions_loader.reset(); + delete_user_defined_sql_objects_loader.reset(); delete_ddl_worker.reset(); delete_buffer_flush_schedule_pool.reset(); delete_schedule_pool.reset(); delete_distributed_schedule_pool.reset(); delete_message_broker_schedule_pool.reset(); - delete_ddl_worker.reset(); delete_access_control.reset(); total_memory_tracker.resetOvercommitTracker(); @@ -658,12 +676,6 @@ String Context::getUserScriptsPath() const return shared->user_scripts_path; } -String Context::getUserDefinedPath() const -{ - auto lock = getLock(); - return shared->user_defined_path; -} - Strings Context::getWarnings() const { Strings common_warnings; @@ -726,9 +738,6 @@ void Context::setPath(const String & path) if (shared->user_scripts_path.empty()) shared->user_scripts_path = shared->path + "user_scripts/"; - - if (shared->user_defined_path.empty()) - shared->user_defined_path = shared->path + "user_defined/"; } VolumePtr Context::setTemporaryStorage(const String & path, const String & policy_name, size_t max_size) @@ -804,12 +813,6 @@ void Context::setUserScriptsPath(const String & path) shared->user_scripts_path = path; } -void Context::setUserDefinedPath(const String & path) -{ - auto lock = getLock(); - shared->user_defined_path = path; -} - void Context::addWarningMessage(const String & msg) const { auto lock = getLock(); @@ -1470,10 +1473,8 @@ void Context::setCurrentQueryId(const String & query_id) void Context::killCurrentQuery() { - if (process_list_elem) - { - process_list_elem->cancelQuery(true); - } + if (auto elem = process_list_elem.lock()) + elem->cancelQuery(true); } String Context::getDefaultFormat() const @@ -1652,6 +1653,22 @@ void Context::loadOrReloadUserDefinedExecutableFunctions(const Poco::Util::Abstr shared->user_defined_executable_functions_xmls = external_user_defined_executable_functions_loader.addConfigRepository(std::move(repository)); } +const IUserDefinedSQLObjectsLoader & Context::getUserDefinedSQLObjectsLoader() const +{ + auto lock = getLock(); + if (!shared->user_defined_sql_objects_loader) + shared->user_defined_sql_objects_loader = createUserDefinedSQLObjectsLoader(getGlobalContext()); + return *shared->user_defined_sql_objects_loader; +} + +IUserDefinedSQLObjectsLoader & Context::getUserDefinedSQLObjectsLoader() +{ + auto lock = getLock(); + if (!shared->user_defined_sql_objects_loader) + shared->user_defined_sql_objects_loader = createUserDefinedSQLObjectsLoader(getGlobalContext()); + return *shared->user_defined_sql_objects_loader; +} + #if USE_NLP SynonymsExtensions & Context::getSynonymsExtensions() const @@ -1698,15 +1715,15 @@ ProgressCallback Context::getProgressCallback() const } -void Context::setProcessListElement(ProcessList::Element * elem) +void Context::setProcessListElement(QueryStatusPtr elem) { /// Set to a session or query. In the session, only one query is processed at a time. Therefore, the lock is not needed. process_list_elem = elem; } -ProcessList::Element * Context::getProcessListElement() const +QueryStatusPtr Context::getProcessListElement() const { - return process_list_elem; + return process_list_elem.lock(); } @@ -2054,7 +2071,12 @@ zkutil::ZooKeeperPtr Context::getZooKeeper() const if (!shared->zookeeper) shared->zookeeper = std::make_shared(config, "zookeeper", getZooKeeperLog()); else if (shared->zookeeper->expired()) + { + Stopwatch watch; + LOG_DEBUG(shared->log, "Trying to establish a new connection with ZooKeeper"); shared->zookeeper = shared->zookeeper->startNewSession(); + LOG_DEBUG(shared->log, "Establishing a new connection with ZooKeeper took {} ms", watch.elapsedMilliseconds()); + } return shared->zookeeper; } @@ -3410,7 +3432,7 @@ void Context::initializeBackgroundExecutorsIfNeeded() size_t background_merges_mutations_concurrency_ratio = 2; if (config.has("background_merges_mutations_concurrency_ratio")) background_merges_mutations_concurrency_ratio = config.getUInt64("background_merges_mutations_concurrency_ratio"); - else if (config.has("profiles.default.background_pool_size")) + else if (config.has("profiles.default.background_merges_mutations_concurrency_ratio")) background_merges_mutations_concurrency_ratio = config.getUInt64("profiles.default.background_merges_mutations_concurrency_ratio"); size_t background_move_pool_size = 8; @@ -3625,6 +3647,7 @@ WriteSettings Context::getWriteSettings() const res.enable_filesystem_cache_on_write_operations = settings.enable_filesystem_cache_on_write_operations; res.enable_filesystem_cache_log = settings.enable_filesystem_cache_log; + res.s3_allow_parallel_part_upload = settings.s3_allow_parallel_part_upload; res.remote_throttler = getRemoteWriteThrottler(); diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h index 7711ea34dc7..eeb9e8da148 100644 --- a/src/Interpreters/Context.h +++ b/src/Interpreters/Context.h @@ -54,6 +54,7 @@ enum class RowPolicyFilterType; class EmbeddedDictionaries; class ExternalDictionariesLoader; class ExternalUserDefinedExecutableFunctionsLoader; +class IUserDefinedSQLObjectsLoader; class InterserverCredentials; using InterserverCredentialsPtr = std::shared_ptr; class InterserverIOHandler; @@ -67,6 +68,7 @@ class MMappedFileCache; class UncompressedCache; class ProcessList; class QueryStatus; +using QueryStatusPtr = std::shared_ptr; class Macros; struct Progress; struct FileProgress; @@ -229,7 +231,7 @@ private: using FileProgressCallback = std::function; FileProgressCallback file_progress_callback; /// Callback for tracking progress of file loading. - QueryStatus * process_list_elem = nullptr; /// For tracking total resource usage for query. + std::weak_ptr process_list_elem; /// For tracking total resource usage for query. StorageID insertion_table = StorageID::createEmpty(); /// Saved insertion table in query context bool is_distributed = false; /// Whether the current context it used for distributed query @@ -435,7 +437,6 @@ public: String getUserFilesPath() const; String getDictionariesLibPath() const; String getUserScriptsPath() const; - String getUserDefinedPath() const; /// A list of warnings about server configuration to place in `system.warnings` table. Strings getWarnings() const; @@ -450,7 +451,6 @@ public: void setUserFilesPath(const String & path); void setDictionariesLibPath(const String & path); void setUserScriptsPath(const String & path); - void setUserDefinedPath(const String & path); void addWarningMessage(const String & msg) const; @@ -653,16 +653,19 @@ public: /// Returns the current constraints (can return null). std::shared_ptr getSettingsConstraintsAndCurrentProfiles() const; - const EmbeddedDictionaries & getEmbeddedDictionaries() const; const ExternalDictionariesLoader & getExternalDictionariesLoader() const; - const ExternalUserDefinedExecutableFunctionsLoader & getExternalUserDefinedExecutableFunctionsLoader() const; - EmbeddedDictionaries & getEmbeddedDictionaries(); ExternalDictionariesLoader & getExternalDictionariesLoader(); ExternalDictionariesLoader & getExternalDictionariesLoaderUnlocked(); - ExternalUserDefinedExecutableFunctionsLoader & getExternalUserDefinedExecutableFunctionsLoader(); - ExternalUserDefinedExecutableFunctionsLoader & getExternalUserDefinedExecutableFunctionsLoaderUnlocked(); + const EmbeddedDictionaries & getEmbeddedDictionaries() const; + EmbeddedDictionaries & getEmbeddedDictionaries(); void tryCreateEmbeddedDictionaries(const Poco::Util::AbstractConfiguration & config) const; void loadOrReloadDictionaries(const Poco::Util::AbstractConfiguration & config); + + const ExternalUserDefinedExecutableFunctionsLoader & getExternalUserDefinedExecutableFunctionsLoader() const; + ExternalUserDefinedExecutableFunctionsLoader & getExternalUserDefinedExecutableFunctionsLoader(); + ExternalUserDefinedExecutableFunctionsLoader & getExternalUserDefinedExecutableFunctionsLoaderUnlocked(); + const IUserDefinedSQLObjectsLoader & getUserDefinedSQLObjectsLoader() const; + IUserDefinedSQLObjectsLoader & getUserDefinedSQLObjectsLoader(); void loadOrReloadUserDefinedExecutableFunctions(const Poco::Util::AbstractConfiguration & config); #if USE_NLP @@ -748,9 +751,9 @@ public: /** Set in executeQuery and InterpreterSelectQuery. Then it is used in QueryPipeline, * to update and monitor information about the total number of resources spent for the query. */ - void setProcessListElement(QueryStatus * elem); + void setProcessListElement(QueryStatusPtr elem); /// Can return nullptr if the query was not inserted into the ProcessList. - QueryStatus * getProcessListElement() const; + QueryStatusPtr getProcessListElement() const; /// List all queries. ProcessList & getProcessList(); diff --git a/src/Interpreters/DDLTask.cpp b/src/Interpreters/DDLTask.cpp index 2d609c00406..980e8f6e7b6 100644 --- a/src/Interpreters/DDLTask.cpp +++ b/src/Interpreters/DDLTask.cpp @@ -8,12 +8,14 @@ #include #include #include -#include -#include #include +#include #include +#include +#include #include #include +#include namespace DB @@ -168,6 +170,13 @@ void DDLTaskBase::parseQueryFromEntry(ContextPtr context) query = parseQuery(parser_query, begin, end, description, 0, settings.max_parser_depth); } +void DDLTaskBase::formatRewrittenQuery(ContextPtr context) +{ + /// Convert rewritten AST back to string. + query_str = queryToString(*query); + query_for_logging = maskSensitiveInfoInQueryForLogging(query_str, query, context); +} + ContextMutablePtr DDLTaskBase::makeQueryContext(ContextPtr from_context, const ZooKeeperPtr & /*zookeeper*/) { auto query_context = Context::createCopy(from_context); @@ -265,6 +274,7 @@ void DDLTask::setClusterInfo(ContextPtr context, Poco::Logger * log) host_id.readableString(), entry_name, address_in_cluster.readableString(), cluster_name); } + /// Rewrite AST without ON CLUSTER. WithoutOnClusterASTRewriteParams params; params.default_database = address_in_cluster.default_database; params.host_id = address_in_cluster.toString(); @@ -405,6 +415,7 @@ void DatabaseReplicatedTask::parseQueryFromEntry(ContextPtr context) chassert(!ddl_query->database); ddl_query->setDatabase(database->getDatabaseName()); } + formatRewrittenQuery(context); } ContextMutablePtr DatabaseReplicatedTask::makeQueryContext(ContextPtr from_context, const ZooKeeperPtr & zookeeper) diff --git a/src/Interpreters/DDLTask.h b/src/Interpreters/DDLTask.h index 661cee84a45..2043de6701e 100644 --- a/src/Interpreters/DDLTask.h +++ b/src/Interpreters/DDLTask.h @@ -99,6 +99,9 @@ struct DDLTaskBase String host_id_str; ASTPtr query; + String query_str; + String query_for_logging; + bool is_initial_query = false; bool is_circular_replicated = false; bool execute_on_leader = false; @@ -114,6 +117,7 @@ struct DDLTaskBase virtual ~DDLTaskBase() = default; virtual void parseQueryFromEntry(ContextPtr context); + void formatRewrittenQuery(ContextPtr context); virtual String getShardID() const = 0; diff --git a/src/Interpreters/DDLWorker.cpp b/src/Interpreters/DDLWorker.cpp index 8873d851de1..2e1918e1a37 100644 --- a/src/Interpreters/DDLWorker.cpp +++ b/src/Interpreters/DDLWorker.cpp @@ -10,8 +10,6 @@ #include #include #include -#include -#include #include #include #include @@ -26,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -114,7 +113,7 @@ DDLWorker::DDLWorker( void DDLWorker::startup() { [[maybe_unused]] bool prev_stop_flag = stop_flag.exchange(false); - chassert(true); + chassert(prev_stop_flag); main_thread = ThreadFromGlobalPool(&DDLWorker::runMainThread, this); cleanup_thread = ThreadFromGlobalPool(&DDLWorker::runCleanupThread, this); } @@ -206,6 +205,8 @@ DDLTaskPtr DDLWorker::initAndCheckTask(const String & entry_name, String & out_r task->parseQueryFromEntry(context); /// Stage 3.2: check cluster and find the host in cluster task->setClusterInfo(context, log); + /// Stage 3.3: output rewritten query back to string + task->formatRewrittenQuery(context); } catch (...) { @@ -430,11 +431,12 @@ DDLTaskBase & DDLWorker::saveTask(DDLTaskPtr && task) return *current_tasks.back(); } -bool DDLWorker::tryExecuteQuery(const String & query, DDLTaskBase & task, const ZooKeeperPtr & zookeeper) +bool DDLWorker::tryExecuteQuery(DDLTaskBase & task, const ZooKeeperPtr & zookeeper) { /// Add special comment at the start of query to easily identify DDL-produced queries in query_log String query_prefix = "/* ddl_entry=" + task.entry_name + " */ "; - String query_to_execute = query_prefix + query; + String query_to_execute = query_prefix + task.query_str; + String query_to_show_in_logs = query_prefix + task.query_for_logging; ReadBufferFromString istr(query_to_execute); String dummy_string; @@ -462,7 +464,7 @@ bool DDLWorker::tryExecuteQuery(const String & query, DDLTaskBase & task, const throw; task.execution_status = ExecutionStatus::fromCurrentException(); - tryLogCurrentException(log, "Query " + query + " wasn't finished successfully"); + tryLogCurrentException(log, "Query " + query_to_show_in_logs + " wasn't finished successfully"); /// We use return value of tryExecuteQuery(...) in tryExecuteQueryOnLeaderReplica(...) to determine /// if replica has stopped being leader and we should retry query. @@ -483,7 +485,7 @@ bool DDLWorker::tryExecuteQuery(const String & query, DDLTaskBase & task, const throw; task.execution_status = ExecutionStatus::fromCurrentException(); - tryLogCurrentException(log, "Query " + query + " wasn't finished successfully"); + tryLogCurrentException(log, "Query " + query_to_show_in_logs + " wasn't finished successfully"); /// We don't know what exactly happened, but maybe it's Poco::NetException or std::bad_alloc, /// so we consider unknown exception as retryable error. @@ -491,14 +493,14 @@ bool DDLWorker::tryExecuteQuery(const String & query, DDLTaskBase & task, const } task.execution_status = ExecutionStatus(0); - LOG_DEBUG(log, "Executed query: {}", query); + LOG_DEBUG(log, "Executed query: {}", query_to_show_in_logs); return true; } void DDLWorker::updateMaxDDLEntryID(const String & entry_name) { - UInt64 id = DDLTaskBase::getLogEntryNumber(entry_name); + UInt32 id = DDLTaskBase::getLogEntryNumber(entry_name); auto prev_id = max_id.load(std::memory_order_relaxed); while (prev_id < id) { @@ -513,7 +515,7 @@ void DDLWorker::updateMaxDDLEntryID(const String & entry_name) void DDLWorker::processTask(DDLTaskBase & task, const ZooKeeperPtr & zookeeper) { - LOG_DEBUG(log, "Processing task {} ({})", task.entry_name, task.entry.query); + LOG_DEBUG(log, "Processing task {} ({})", task.entry_name, task.query_for_logging); chassert(!task.completely_processed); /// Setup tracing context on current thread for current DDL @@ -532,7 +534,8 @@ void DDLWorker::processTask(DDLTaskBase & task, const ZooKeeperPtr & zookeeper) auto active_node = zkutil::EphemeralNodeHolder::existing(active_node_path, *zookeeper); /// Try fast path - auto create_active_res = zookeeper->tryCreate(active_node_path, {}, zkutil::CreateMode::Ephemeral); + const String canary_value = Field(ServerUUID::get()).dump(); + auto create_active_res = zookeeper->tryCreate(active_node_path, canary_value, zkutil::CreateMode::Ephemeral); if (create_active_res != Coordination::Error::ZOK) { if (create_active_res != Coordination::Error::ZNONODE && create_active_res != Coordination::Error::ZNODEEXISTS) @@ -563,10 +566,10 @@ void DDLWorker::processTask(DDLTaskBase & task, const ZooKeeperPtr & zookeeper) { /// Connection has been lost and now we are retrying, /// but our previous ephemeral node still exists. - zookeeper->waitForEphemeralToDisappearIfAny(active_node_path); + zookeeper->handleEphemeralNodeExistence(active_node_path, canary_value); } - zookeeper->create(active_node_path, {}, zkutil::CreateMode::Ephemeral); + zookeeper->create(active_node_path, canary_value, zkutil::CreateMode::Ephemeral); } /// We must hold the lock until task execution status is committed to ZooKeeper, @@ -585,8 +588,7 @@ void DDLWorker::processTask(DDLTaskBase & task, const ZooKeeperPtr & zookeeper) try { - String rewritten_query = queryToString(task.query); - LOG_DEBUG(log, "Executing query: {}", rewritten_query); + LOG_DEBUG(log, "Executing query: {}", task.query_for_logging); StoragePtr storage; if (auto * query_with_table = dynamic_cast(task.query.get()); query_with_table) @@ -603,12 +605,12 @@ void DDLWorker::processTask(DDLTaskBase & task, const ZooKeeperPtr & zookeeper) if (task.execute_on_leader) { - tryExecuteQueryOnLeaderReplica(task, storage, rewritten_query, task.entry_path, zookeeper, execute_on_leader_lock); + tryExecuteQueryOnLeaderReplica(task, storage, task.entry_path, zookeeper, execute_on_leader_lock); } else { storage.reset(); - tryExecuteQuery(rewritten_query, task, zookeeper); + tryExecuteQuery(task, zookeeper); } } catch (const Coordination::Exception &) @@ -692,7 +694,6 @@ bool DDLWorker::taskShouldBeExecutedOnLeader(const ASTPtr & ast_ddl, const Stora bool DDLWorker::tryExecuteQueryOnLeaderReplica( DDLTaskBase & task, StoragePtr storage, - const String & rewritten_query, const String & /*node_path*/, const ZooKeeperPtr & zookeeper, std::unique_ptr & execute_on_leader_lock) @@ -791,7 +792,7 @@ bool DDLWorker::tryExecuteQueryOnLeaderReplica( /// If the leader will unexpectedly changed this method will return false /// and on the next iteration new leader will take lock - if (tryExecuteQuery(rewritten_query, task, zookeeper)) + if (tryExecuteQuery(task, zookeeper)) { executed_by_us = true; break; diff --git a/src/Interpreters/DDLWorker.h b/src/Interpreters/DDLWorker.h index e3c1fa4c271..65ef4b440a1 100644 --- a/src/Interpreters/DDLWorker.h +++ b/src/Interpreters/DDLWorker.h @@ -101,12 +101,11 @@ protected: bool tryExecuteQueryOnLeaderReplica( DDLTaskBase & task, StoragePtr storage, - const String & rewritten_query, const String & node_path, const ZooKeeperPtr & zookeeper, std::unique_ptr & execute_on_leader_lock); - bool tryExecuteQuery(const String & query, DDLTaskBase & task, const ZooKeeperPtr & zookeeper); + bool tryExecuteQuery(DDLTaskBase & task, const ZooKeeperPtr & zookeeper); /// Checks and cleanups queue's nodes void cleanupQueue(Int64 current_time_seconds, const ZooKeeperPtr & zookeeper); @@ -159,7 +158,7 @@ protected: /// How many tasks could be in the queue size_t max_tasks_in_queue = 1000; - std::atomic max_id = 0; + std::atomic max_id = 0; const CurrentMetrics::Metric * max_entry_metric; const CurrentMetrics::Metric * max_pushed_entry_metric; }; diff --git a/src/Interpreters/DatabaseCatalog.cpp b/src/Interpreters/DatabaseCatalog.cpp index ab6f088ac69..7ceb0bf3a00 100644 --- a/src/Interpreters/DatabaseCatalog.cpp +++ b/src/Interpreters/DatabaseCatalog.cpp @@ -144,9 +144,9 @@ StoragePtr TemporaryTableHolder::getTable() const void DatabaseCatalog::initializeAndLoadTemporaryDatabase() { drop_delay_sec = getContext()->getConfigRef().getInt("database_atomic_delay_before_drop_table_sec", default_drop_delay_sec); - unused_dir_hide_timeout_sec = getContext()->getConfigRef().getInt("database_catalog_unused_dir_hide_timeout_sec", unused_dir_hide_timeout_sec); - unused_dir_rm_timeout_sec = getContext()->getConfigRef().getInt("database_catalog_unused_dir_rm_timeout_sec", unused_dir_rm_timeout_sec); - unused_dir_cleanup_period_sec = getContext()->getConfigRef().getInt("database_catalog_unused_dir_cleanup_period_sec", unused_dir_cleanup_period_sec); + unused_dir_hide_timeout_sec = getContext()->getConfigRef().getInt64("database_catalog_unused_dir_hide_timeout_sec", unused_dir_hide_timeout_sec); + unused_dir_rm_timeout_sec = getContext()->getConfigRef().getInt64("database_catalog_unused_dir_rm_timeout_sec", unused_dir_rm_timeout_sec); + unused_dir_cleanup_period_sec = getContext()->getConfigRef().getInt64("database_catalog_unused_dir_cleanup_period_sec", unused_dir_cleanup_period_sec); auto db_for_temporary_and_external_tables = std::make_shared(TEMPORARY_DATABASE, getContext()); attachDatabase(TEMPORARY_DATABASE, db_for_temporary_and_external_tables); diff --git a/src/Interpreters/DirectJoin.cpp b/src/Interpreters/DirectJoin.cpp index 02b3854a47b..e148db1d8e6 100644 --- a/src/Interpreters/DirectJoin.cpp +++ b/src/Interpreters/DirectJoin.cpp @@ -93,6 +93,16 @@ DirectKeyValueJoin::DirectKeyValueJoin(std::shared_ptr table_join_, LOG_TRACE(log, "Using direct join"); } +DirectKeyValueJoin::DirectKeyValueJoin( + std::shared_ptr table_join_, + const Block & right_sample_block_, + std::shared_ptr storage_, + const Block & right_sample_block_with_storage_column_names_) + : DirectKeyValueJoin(table_join_, right_sample_block_, storage_) +{ + right_sample_block_with_storage_column_names = right_sample_block_with_storage_column_names_; +} + bool DirectKeyValueJoin::addJoinedBlock(const Block &, bool) { throw DB::Exception(ErrorCodes::LOGICAL_ERROR, "Unreachable code reached"); @@ -114,14 +124,15 @@ void DirectKeyValueJoin::joinBlock(Block & block, std::shared_ptr &) return; Block original_right_block = originalRightBlock(right_sample_block, *table_join); - const Names & attribute_names = original_right_block.getNames(); + Block right_block_to_use = right_sample_block_with_storage_column_names ? right_sample_block_with_storage_column_names : original_right_block; + const Names & attribute_names = right_block_to_use.getNames(); NullMap null_map; Chunk joined_chunk = storage->getByKeys({key_col}, null_map, attribute_names); /// Expected right block may differ from structure in storage, because of `join_use_nulls` or we just select not all joined attributes Block sample_storage_block = storage->getSampleBlock(attribute_names); - MutableColumns result_columns = convertBlockStructure(sample_storage_block, original_right_block, joined_chunk.mutateColumns(), null_map); + MutableColumns result_columns = convertBlockStructure(sample_storage_block, right_block_to_use, joined_chunk.mutateColumns(), null_map); for (size_t i = 0; i < result_columns.size(); ++i) { diff --git a/src/Interpreters/DirectJoin.h b/src/Interpreters/DirectJoin.h index 8e82b59da02..6a6f4505474 100644 --- a/src/Interpreters/DirectJoin.h +++ b/src/Interpreters/DirectJoin.h @@ -25,6 +25,12 @@ public: const Block & right_sample_block_, std::shared_ptr storage_); + DirectKeyValueJoin( + std::shared_ptr table_join_, + const Block & right_sample_block_, + std::shared_ptr storage_, + const Block & right_sample_block_with_storage_column_names_); + virtual const TableJoin & getTableJoin() const override { return *table_join; } virtual bool addJoinedBlock(const Block &, bool) override; @@ -52,6 +58,7 @@ private: std::shared_ptr table_join; std::shared_ptr storage; Block right_sample_block; + Block right_sample_block_with_storage_column_names; Block sample_block_with_columns_to_add; Poco::Logger * log; diff --git a/src/Interpreters/ExpressionActions.cpp b/src/Interpreters/ExpressionActions.cpp index b27df0f1c35..9b38072b5af 100644 --- a/src/Interpreters/ExpressionActions.cpp +++ b/src/Interpreters/ExpressionActions.cpp @@ -1073,8 +1073,8 @@ void ExpressionActionsChain::JoinStep::finalize(const NameSet & required_output_ } /// Result will also contain joined columns. - for (const auto & column_name : analyzed_join->columnsAddedByJoin()) - required_names.emplace(column_name); + for (const auto & column : analyzed_join->columnsAddedByJoin()) + required_names.emplace(column.name); for (const auto & column : result_columns) { diff --git a/src/Interpreters/ExternalLoader.cpp b/src/Interpreters/ExternalLoader.cpp index 704dff325b7..ea2b9045120 100644 --- a/src/Interpreters/ExternalLoader.cpp +++ b/src/Interpreters/ExternalLoader.cpp @@ -714,7 +714,10 @@ public: /// Object was never loaded successfully and should be reloaded. startLoading(info); } - LOG_TRACE(log, "Object '{}' is neither loaded nor failed, so it will not be reloaded as outdated.", info.name); + else + { + LOG_TRACE(log, "Object '{}' is neither loaded nor failed, so it will not be reloaded as outdated.", info.name); + } } } } diff --git a/src/Interpreters/HashJoin.cpp b/src/Interpreters/HashJoin.cpp index 7780b335128..41c7c28a6fa 100644 --- a/src/Interpreters/HashJoin.cpp +++ b/src/Interpreters/HashJoin.cpp @@ -232,6 +232,11 @@ HashJoin::HashJoin(std::shared_ptr table_join_, const Block & right_s data->type = Type::CROSS; sample_block_with_columns_to_add = right_sample_block; } + else if (table_join->getClauses().empty()) + { + data->type = Type::EMPTY; + sample_block_with_columns_to_add = right_sample_block; + } else if (table_join->oneDisjunct()) { const auto & key_names_right = table_join->getOnlyClause().key_names_right; @@ -653,7 +658,9 @@ void HashJoin::initRightBlockStructure(Block & saved_block_sample) /// Save non key columns for (auto & column : sample_block_with_columns_to_add) { - if (!saved_block_sample.findByName(column.name)) + if (auto * col = saved_block_sample.findByName(column.name)) + *col = column; + else saved_block_sample.insert(column); } } diff --git a/src/Interpreters/InDepthNodeVisitor.h b/src/Interpreters/InDepthNodeVisitor.h index 785c88ec77b..9c4fed56fd4 100644 --- a/src/Interpreters/InDepthNodeVisitor.h +++ b/src/Interpreters/InDepthNodeVisitor.h @@ -25,13 +25,47 @@ public: {} void visit(T & ast) + { + if (ostr) + visitImpl(ast); + else + visitImpl(ast); + } + +private: + Data & data; + size_t visit_depth; + WriteBuffer * ostr; + + template + void visitImpl(T & ast) { checkStackSize(); - DumpASTNode dump(*ast, ostr, visit_depth, typeid(Matcher).name()); + if constexpr (with_dump) + { + DumpASTNode dump(*ast, ostr, visit_depth, typeid(Matcher).name()); + visitImplMain(ast); + } + else + { + visitImplMain(ast); + } + } + template + void visitImplMain(T & ast) + { if constexpr (!_top_to_bottom) - visitChildren(ast); + visitChildren(ast); + doVisit(ast); + + if constexpr (_top_to_bottom) + visitChildren(ast); + } + + void doVisit(T & ast) + { try { Matcher::visit(ast, data); @@ -41,16 +75,9 @@ public: e.addMessage("While processing {}", ast->formatForErrorMessage()); throw; } - - if constexpr (_top_to_bottom) - visitChildren(ast); } -private: - Data & data; - size_t visit_depth; - WriteBuffer * ostr; - + template void visitChildren(T & ast) { for (auto & child : ast->children) @@ -62,7 +89,7 @@ private: need_visit_child = Matcher::needChildVisit(ast, child); if (need_visit_child) - visit(child); + visitImpl(child); } } }; diff --git a/src/Interpreters/InterpreterCreateFunctionQuery.cpp b/src/Interpreters/InterpreterCreateFunctionQuery.cpp index dfd18ad28de..d56b5029e41 100644 --- a/src/Interpreters/InterpreterCreateFunctionQuery.cpp +++ b/src/Interpreters/InterpreterCreateFunctionQuery.cpp @@ -1,16 +1,11 @@ #include #include +#include +#include #include -#include -#include -#include -#include -#include #include #include -#include -#include namespace DB @@ -18,13 +13,11 @@ namespace DB namespace ErrorCodes { - extern const int CANNOT_CREATE_RECURSIVE_FUNCTION; - extern const int UNSUPPORTED_METHOD; + extern const int INCORRECT_QUERY; } BlockIO InterpreterCreateFunctionQuery::execute() { - FunctionNameNormalizer().visit(query_ptr.get()); ASTCreateFunctionQuery & create_function_query = query_ptr->as(); AccessRightsElements access_rights_elements; @@ -33,80 +26,27 @@ BlockIO InterpreterCreateFunctionQuery::execute() if (create_function_query.or_replace) access_rights_elements.emplace_back(AccessType::DROP_FUNCTION); + auto current_context = getContext(); + if (!create_function_query.cluster.empty()) { + if (current_context->getUserDefinedSQLObjectsLoader().isReplicated()) + throw Exception(ErrorCodes::INCORRECT_QUERY, "ON CLUSTER is not allowed because used-defined functions are replicated automatically"); + DDLQueryOnClusterParams params; params.access_to_check = std::move(access_rights_elements); - return executeDDLQueryOnCluster(query_ptr, getContext(), params); + return executeDDLQueryOnCluster(query_ptr, current_context, params); } - auto current_context = getContext(); current_context->checkAccess(access_rights_elements); - auto & user_defined_function_factory = UserDefinedSQLFunctionFactory::instance(); - auto function_name = create_function_query.getFunctionName(); + bool throw_if_exists = !create_function_query.if_not_exists && !create_function_query.or_replace; + bool replace_if_exists = create_function_query.or_replace; - bool if_not_exists = create_function_query.if_not_exists; - bool replace = create_function_query.or_replace; - - create_function_query.if_not_exists = false; - create_function_query.or_replace = false; - - validateFunction(create_function_query.function_core, function_name); - user_defined_function_factory.registerFunction(current_context, function_name, query_ptr, replace, if_not_exists, persist_function); + UserDefinedSQLFunctionFactory::instance().registerFunction(current_context, function_name, query_ptr, throw_if_exists, replace_if_exists); return {}; } -void InterpreterCreateFunctionQuery::validateFunction(ASTPtr function, const String & name) -{ - ASTFunction * lambda_function = function->as(); - - if (!lambda_function) - throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "Expected function, got: {}", function->formatForErrorMessage()); - - auto & lambda_function_expression_list = lambda_function->arguments->children; - - if (lambda_function_expression_list.size() != 2) - throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "Lambda must have arguments and body"); - - const ASTFunction * tuple_function_arguments = lambda_function_expression_list[0]->as(); - - if (!tuple_function_arguments || !tuple_function_arguments->arguments) - throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "Lambda must have valid arguments"); - - std::unordered_set arguments; - - for (const auto & argument : tuple_function_arguments->arguments->children) - { - const auto * argument_identifier = argument->as(); - - if (!argument_identifier) - throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "Lambda argument must be identifier"); - - const auto & argument_name = argument_identifier->name(); - auto [_, inserted] = arguments.insert(argument_name); - if (!inserted) - throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "Identifier {} already used as function parameter", argument_name); - } - - ASTPtr function_body = lambda_function_expression_list[1]; - if (!function_body) - throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "Lambda must have valid function body"); - - validateFunctionRecursiveness(function_body, name); -} - -void InterpreterCreateFunctionQuery::validateFunctionRecursiveness(ASTPtr node, const String & function_to_create) -{ - for (const auto & child : node->children) - { - auto function_name_opt = tryGetFunctionName(child); - if (function_name_opt && function_name_opt.value() == function_to_create) - throw Exception(ErrorCodes::CANNOT_CREATE_RECURSIVE_FUNCTION, "You cannot create recursive function"); - - validateFunctionRecursiveness(child, function_to_create); - } -} } diff --git a/src/Interpreters/InterpreterCreateFunctionQuery.h b/src/Interpreters/InterpreterCreateFunctionQuery.h index a67fdb9605d..d5fedd5ca6b 100644 --- a/src/Interpreters/InterpreterCreateFunctionQuery.h +++ b/src/Interpreters/InterpreterCreateFunctionQuery.h @@ -8,24 +8,18 @@ namespace DB class Context; -class InterpreterCreateFunctionQuery : public IInterpreter, WithContext +class InterpreterCreateFunctionQuery : public IInterpreter, WithMutableContext { public: - InterpreterCreateFunctionQuery(const ASTPtr & query_ptr_, ContextPtr context_, bool persist_function_) - : WithContext(context_) - , query_ptr(query_ptr_) - , persist_function(persist_function_) {} + InterpreterCreateFunctionQuery(const ASTPtr & query_ptr_, ContextMutablePtr context_) + : WithMutableContext(context_), query_ptr(query_ptr_) + { + } BlockIO execute() override; - void setInternal(bool internal_); - private: - static void validateFunction(ASTPtr function, const String & name); - static void validateFunctionRecursiveness(ASTPtr node, const String & function_to_create); - ASTPtr query_ptr; - bool persist_function; }; } diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index e89aa2244fe..e9cf06c5c69 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -12,17 +12,14 @@ #include #include -#include #include #include #include -#include #include #include #include -#include #include #include #include @@ -37,7 +34,6 @@ #include #include #include -#include #include #include #include @@ -59,7 +55,6 @@ #include #include -#include #include #include #include @@ -484,9 +479,8 @@ ColumnsDescription InterpreterCreateQuery::getColumnsDescription( { column_type = DataTypeFactory::instance().get(col_decl.type); - const auto * aggregate_function_type = typeid_cast(column_type.get()); - if (attach && aggregate_function_type && aggregate_function_type->isVersioned()) - aggregate_function_type->setVersion(0, /* if_empty */true); + if (attach) + setVersionToAggregateFunctions(column_type, true); if (col_decl.null_modifier) { @@ -732,7 +726,8 @@ InterpreterCreateQuery::TableProperties InterpreterCreateQuery::getTableProperti else if (create.as_table_function) { /// Table function without columns list. - auto table_function = TableFunctionFactory::instance().get(create.as_table_function, getContext()); + auto table_function_ast = create.as_table_function->ptr(); + auto table_function = TableFunctionFactory::instance().get(table_function_ast, getContext()); properties.columns = table_function->getActualTableStructure(getContext()); } else if (create.is_dictionary) @@ -835,7 +830,7 @@ void InterpreterCreateQuery::validateTableStructure(const ASTCreateQuery & creat { for (const auto & [name, type] : properties.columns.getAllPhysical()) { - if (isObject(type)) + if (type->hasDynamicSubcolumns()) { throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot create table with column '{}' which type is '{}' " @@ -973,7 +968,7 @@ void InterpreterCreateQuery::setEngine(ASTCreateQuery & create) const if (as_create.storage) create.set(create.storage, as_create.storage->ptr()); else if (as_create.as_table_function) - create.as_table_function = as_create.as_table_function->clone(); + create.set(create.as_table_function, as_create.as_table_function->ptr()); else throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot set engine, it's a bug."); @@ -1349,12 +1344,12 @@ bool InterpreterCreateQuery::doCreateTable(ASTCreateQuery & create, /// NOTE: CREATE query may be rewritten by Storage creator or table function if (create.as_table_function) { - const auto & factory = TableFunctionFactory::instance(); - auto table_func = factory.get(create.as_table_function, getContext()); + auto table_function_ast = create.as_table_function->ptr(); + auto table_function = TableFunctionFactory::instance().get(table_function_ast, getContext()); /// In case of CREATE AS table_function() query we should use global context /// in storage creation because there will be no query context on server startup /// and because storage lifetime is bigger than query context lifetime. - res = table_func->execute(create.as_table_function, getContext(), create.getTable(), properties.columns, /*use_global_context=*/true); + res = table_function->execute(table_function_ast, getContext(), create.getTable(), properties.columns, /*use_global_context=*/true); res->renameInMemory({create.getDatabase(), create.getTable(), create.uuid}); } else @@ -1404,7 +1399,7 @@ bool InterpreterCreateQuery::doCreateTable(ASTCreateQuery & create, /// we can safely destroy the object without a call to "shutdown", because there is guarantee /// that no background threads/similar resources remain after exception from "startup". - if (!res->supportsDynamicSubcolumns() && hasObjectColumns(res->getInMemoryMetadataPtr()->getColumns())) + if (!res->supportsDynamicSubcolumns() && hasDynamicSubcolumns(res->getInMemoryMetadataPtr()->getColumns())) { throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot create table with column of type Object, " @@ -1704,8 +1699,12 @@ void InterpreterCreateQuery::addColumnsDescriptionToCreateQueryIfNecessary(ASTCr return; auto ast_storage = std::make_shared(); - auto query_from_storage = DB::getCreateQueryFromStorage(storage, ast_storage, false, - getContext()->getSettingsRef().max_parser_depth, true); + unsigned max_parser_depth = static_cast(getContext()->getSettingsRef().max_parser_depth); + auto query_from_storage = DB::getCreateQueryFromStorage(storage, + ast_storage, + false, + max_parser_depth, + true); auto & create_query_from_storage = query_from_storage->as(); if (!create.columns_list) diff --git a/src/Interpreters/InterpreterDescribeQuery.cpp b/src/Interpreters/InterpreterDescribeQuery.cpp index 0524feea1f6..512f9be6fa1 100644 --- a/src/Interpreters/InterpreterDescribeQuery.cpp +++ b/src/Interpreters/InterpreterDescribeQuery.cpp @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -17,7 +18,6 @@ #include #include - namespace DB { @@ -60,10 +60,9 @@ Block InterpreterDescribeQuery::getSampleBlock(bool include_subcolumns) return block; } - BlockIO InterpreterDescribeQuery::execute() { - ColumnsDescription columns; + std::vector columns; StorageSnapshotPtr storage_snapshot; const auto & ast = query_ptr->as(); @@ -72,14 +71,34 @@ BlockIO InterpreterDescribeQuery::execute() if (table_expression.subquery) { - auto names_and_types = InterpreterSelectWithUnionQuery::getSampleBlock( - table_expression.subquery->children.at(0), getContext()).getNamesAndTypesList(); - columns = ColumnsDescription(std::move(names_and_types)); + NamesAndTypesList names_and_types; + auto select_query = table_expression.subquery->children.at(0); + auto current_context = getContext(); + + if (settings.allow_experimental_analyzer) + { + SelectQueryOptions select_query_options; + names_and_types = InterpreterSelectQueryAnalyzer(select_query, select_query_options, current_context).getSampleBlock().getNamesAndTypesList(); + } + else + { + names_and_types = InterpreterSelectWithUnionQuery::getSampleBlock(select_query, current_context).getNamesAndTypesList(); + } + + for (auto && [name, type] : names_and_types) + { + ColumnDescription description; + description.name = std::move(name); + description.type = std::move(type); + columns.emplace_back(std::move(description)); + } } else if (table_expression.table_function) { TableFunctionPtr table_function_ptr = TableFunctionFactory::instance().get(table_expression.table_function, getContext()); - columns = table_function_ptr->getActualTableStructure(getContext()); + auto table_function_column_descriptions = table_function_ptr->getActualTableStructure(getContext()); + for (const auto & table_function_column_description : table_function_column_descriptions) + columns.emplace_back(table_function_column_description); } else { @@ -90,7 +109,9 @@ BlockIO InterpreterDescribeQuery::execute() auto metadata_snapshot = table->getInMemoryMetadataPtr(); storage_snapshot = table->getStorageSnapshot(metadata_snapshot, getContext()); - columns = metadata_snapshot->getColumns(); + auto metadata_column_descriptions = metadata_snapshot->getColumns(); + for (const auto & metadata_column_description : metadata_column_descriptions) + columns.emplace_back(metadata_column_description); } bool extend_object_types = settings.describe_extend_object_types && storage_snapshot; diff --git a/src/Interpreters/InterpreterDropFunctionQuery.cpp b/src/Interpreters/InterpreterDropFunctionQuery.cpp index bb2032f355a..df81ae661c7 100644 --- a/src/Interpreters/InterpreterDropFunctionQuery.cpp +++ b/src/Interpreters/InterpreterDropFunctionQuery.cpp @@ -1,17 +1,22 @@ #include #include +#include +#include #include #include #include -#include -#include #include namespace DB { +namespace ErrorCodes +{ + extern const int INCORRECT_QUERY; +} + BlockIO InterpreterDropFunctionQuery::execute() { FunctionNameNormalizer().visit(query_ptr.get()); @@ -20,17 +25,23 @@ BlockIO InterpreterDropFunctionQuery::execute() AccessRightsElements access_rights_elements; access_rights_elements.emplace_back(AccessType::DROP_FUNCTION); + auto current_context = getContext(); + if (!drop_function_query.cluster.empty()) { + if (current_context->getUserDefinedSQLObjectsLoader().isReplicated()) + throw Exception(ErrorCodes::INCORRECT_QUERY, "ON CLUSTER is not allowed because used-defined functions are replicated automatically"); + DDLQueryOnClusterParams params; params.access_to_check = std::move(access_rights_elements); - return executeDDLQueryOnCluster(query_ptr, getContext(), params); + return executeDDLQueryOnCluster(query_ptr, current_context, params); } - auto current_context = getContext(); current_context->checkAccess(access_rights_elements); - UserDefinedSQLFunctionFactory::instance().unregisterFunction(current_context, drop_function_query.function_name, drop_function_query.if_exists); + bool throw_if_not_exists = !drop_function_query.if_exists; + + UserDefinedSQLFunctionFactory::instance().unregisterFunction(current_context, drop_function_query.function_name, throw_if_not_exists); return {}; } diff --git a/src/Interpreters/InterpreterDropQuery.cpp b/src/Interpreters/InterpreterDropQuery.cpp index 8be3dce7bf1..28f8e43ee9b 100644 --- a/src/Interpreters/InterpreterDropQuery.cpp +++ b/src/Interpreters/InterpreterDropQuery.cpp @@ -213,7 +213,7 @@ BlockIO InterpreterDropQuery::executeToTableImpl(ContextPtr context_, ASTDropQue { /// And for simple MergeTree we can stop merges before acquiring the lock auto merges_blocker = table->getActionLock(ActionLocks::PartsMerge); - auto table_lock = table->lockExclusively(context_->getCurrentQueryId(), context_->getSettingsRef().lock_acquire_timeout); + table_lock = table->lockExclusively(context_->getCurrentQueryId(), context_->getSettingsRef().lock_acquire_timeout); } auto metadata_snapshot = table->getInMemoryMetadataPtr(); diff --git a/src/Interpreters/InterpreterExplainQuery.cpp b/src/Interpreters/InterpreterExplainQuery.cpp index 4799970b6a1..fb8d3c6049f 100644 --- a/src/Interpreters/InterpreterExplainQuery.cpp +++ b/src/Interpreters/InterpreterExplainQuery.cpp @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -28,6 +29,9 @@ #include +#include +#include + namespace DB { @@ -155,6 +159,30 @@ struct QueryASTSettings {"graph", graph}, {"optimize", optimize} }; + + std::unordered_map> integer_settings; +}; + +struct QueryTreeSettings +{ + bool run_passes = false; + bool dump_passes = false; + bool dump_ast = false; + Int64 passes = -1; + + constexpr static char name[] = "QUERY TREE"; + + std::unordered_map> boolean_settings = + { + {"run_passes", run_passes}, + {"dump_passes", dump_passes}, + {"dump_ast", dump_ast} + }; + + std::unordered_map> integer_settings = + { + {"passes", passes} + }; }; struct QueryPlanSettings @@ -177,6 +205,8 @@ struct QueryPlanSettings {"json", json}, {"sorting", query_plan_options.sorting}, }; + + std::unordered_map> integer_settings; }; struct QueryPipelineSettings @@ -193,18 +223,31 @@ struct QueryPipelineSettings {"graph", graph}, {"compact", compact}, }; + + std::unordered_map> integer_settings; }; template struct ExplainSettings : public Settings { using Settings::boolean_settings; + using Settings::integer_settings; bool has(const std::string & name_) const + { + return hasBooleanSetting(name_) || hasIntegerSetting(name_); + } + + bool hasBooleanSetting(const std::string & name_) const { return boolean_settings.count(name_) > 0; } + bool hasIntegerSetting(const std::string & name_) const + { + return integer_settings.count(name_) > 0; + } + void setBooleanSetting(const std::string & name_, bool value) { auto it = boolean_settings.find(name_); @@ -214,6 +257,15 @@ struct ExplainSettings : public Settings it->second.get() = value; } + void setIntegerSetting(const std::string & name_, Int64 value) + { + auto it = integer_settings.find(name_); + if (it == integer_settings.end()) + throw Exception("Unknown setting for ExplainSettings: " + name_, ErrorCodes::LOGICAL_ERROR); + + it->second.get() = value; + } + std::string getSettingsList() const { std::string res; @@ -224,6 +276,13 @@ struct ExplainSettings : public Settings res += setting.first; } + for (const auto & setting : integer_settings) + { + if (!res.empty()) + res += ", "; + + res += setting.first; + } return res; } @@ -246,15 +305,23 @@ ExplainSettings checkAndGetSettings(const ASTPtr & ast_settings) if (change.value.getType() != Field::Types::UInt64) throw Exception(ErrorCodes::INVALID_SETTING_VALUE, - "Invalid type {} for setting \"{}\" only boolean settings are supported", + "Invalid type {} for setting \"{}\" only integer settings are supported", change.value.getTypeName(), change.name); - auto value = change.value.get(); - if (value > 1) - throw Exception("Invalid value " + std::to_string(value) + " for setting \"" + change.name + - "\". Only boolean settings are supported", ErrorCodes::INVALID_SETTING_VALUE); + if (settings.hasBooleanSetting(change.name)) + { + auto value = change.value.get(); + if (value > 1) + throw Exception("Invalid value " + std::to_string(value) + " for setting \"" + change.name + + "\". Expected boolean type", ErrorCodes::INVALID_SETTING_VALUE); - settings.setBooleanSetting(change.name, value); + settings.setBooleanSetting(change.name, value); + } + else + { + auto value = change.value.get(); + settings.setIntegerSetting(change.name, value); + } } return settings; @@ -304,6 +371,46 @@ QueryPipeline InterpreterExplainQuery::executeImpl() ast.getExplainedQuery()->format(IAST::FormatSettings(buf, false)); break; } + case ASTExplainQuery::QueryTree: + { + if (ast.getExplainedQuery()->as() == nullptr) + throw Exception(ErrorCodes::INCORRECT_QUERY, "Only SELECT is supported for EXPLAIN QUERY TREE query"); + + auto settings = checkAndGetSettings(ast.getSettings()); + auto query_tree = buildQueryTree(ast.getExplainedQuery(), getContext()); + + if (settings.run_passes) + { + auto query_tree_pass_manager = QueryTreePassManager(getContext()); + addQueryTreePasses(query_tree_pass_manager); + + size_t pass_index = settings.passes < 0 ? query_tree_pass_manager.getPasses().size() : static_cast(settings.passes); + + if (settings.dump_passes) + { + query_tree_pass_manager.dump(buf, pass_index); + if (pass_index > 0) + buf << '\n'; + } + + query_tree_pass_manager.run(query_tree, pass_index); + + query_tree->dumpTree(buf); + } + else + { + query_tree->dumpTree(buf); + } + + if (settings.dump_ast) + { + buf << '\n'; + buf << '\n'; + query_tree->toAST()->format(IAST::FormatSettings(buf, false)); + } + + break; + } case ASTExplainQuery::QueryPlan: { if (!dynamic_cast(ast.getExplainedQuery().get())) @@ -312,8 +419,16 @@ QueryPipeline InterpreterExplainQuery::executeImpl() auto settings = checkAndGetSettings(ast.getSettings()); QueryPlan plan; - InterpreterSelectWithUnionQuery interpreter(ast.getExplainedQuery(), getContext(), options); - interpreter.buildQueryPlan(plan); + if (getContext()->getSettingsRef().allow_experimental_analyzer) + { + InterpreterSelectQueryAnalyzer interpreter(ast.getExplainedQuery(), options, getContext()); + plan = std::move(interpreter).extractQueryPlan(); + } + else + { + InterpreterSelectWithUnionQuery interpreter(ast.getExplainedQuery(), getContext(), options); + interpreter.buildQueryPlan(plan); + } if (settings.optimize) plan.optimize(QueryPlanOptimizationSettings::fromContext(getContext())); @@ -347,8 +462,17 @@ QueryPipeline InterpreterExplainQuery::executeImpl() auto settings = checkAndGetSettings(ast.getSettings()); QueryPlan plan; - InterpreterSelectWithUnionQuery interpreter(ast.getExplainedQuery(), getContext(), options); - interpreter.buildQueryPlan(plan); + if (getContext()->getSettingsRef().allow_experimental_analyzer) + { + InterpreterSelectQueryAnalyzer interpreter(ast.getExplainedQuery(), options, getContext()); + plan = std::move(interpreter).extractQueryPlan(); + } + else + { + InterpreterSelectWithUnionQuery interpreter(ast.getExplainedQuery(), getContext(), options); + interpreter.buildQueryPlan(plan); + } + auto pipeline = plan.buildQueryPipeline( QueryPlanOptimizationSettings::fromContext(getContext()), BuildQueryPipelineSettings::fromContext(getContext())); diff --git a/src/Interpreters/InterpreterFactory.cpp b/src/Interpreters/InterpreterFactory.cpp index 170f3c463b4..06d5746af59 100644 --- a/src/Interpreters/InterpreterFactory.cpp +++ b/src/Interpreters/InterpreterFactory.cpp @@ -63,6 +63,7 @@ #include #include #include +#include #include #include #include @@ -118,6 +119,9 @@ std::unique_ptr InterpreterFactory::get(ASTPtr & query, ContextMut if (query->as()) { + if (context->getSettingsRef().allow_experimental_analyzer) + return std::make_unique(query, options, context); + /// This is internal part of ASTSelectWithUnionQuery. /// Even if there is SELECT without union, it is represented by ASTSelectWithUnionQuery with single ASTSelectQuery as a child. return std::make_unique(query, context, options); @@ -125,6 +129,10 @@ std::unique_ptr InterpreterFactory::get(ASTPtr & query, ContextMut else if (query->as()) { ProfileEvents::increment(ProfileEvents::SelectQuery); + + if (context->getSettingsRef().allow_experimental_analyzer) + return std::make_unique(query, options, context); + return std::make_unique(query, context, options); } else if (query->as()) @@ -296,7 +304,7 @@ std::unique_ptr InterpreterFactory::get(ASTPtr & query, ContextMut } else if (query->as()) { - return std::make_unique(query, context, true /*persist_function*/); + return std::make_unique(query, context); } else if (query->as()) { diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index 51a3dde261a..107740c3b96 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -292,7 +292,7 @@ Chain InterpreterInsertQuery::buildChainImpl( out.addSource(std::make_shared( out.getInputHeader(), table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size, - table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0)); + table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL)); } auto counting = std::make_shared(out.getInputHeader(), thread_status, getContext()->getQuota()); diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index 79deb38317c..d8ac263e3d1 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -117,7 +117,8 @@ FilterDAGInfoPtr generateFilterActions( const StoragePtr & storage, const StorageSnapshotPtr & storage_snapshot, const StorageMetadataPtr & metadata_snapshot, - Names & prerequisite_columns) + Names & prerequisite_columns, + PreparedSetsPtr prepared_sets) { auto filter_info = std::make_shared(); @@ -155,7 +156,7 @@ FilterDAGInfoPtr generateFilterActions( /// Using separate expression analyzer to prevent any possible alias injection auto syntax_result = TreeRewriter(context).analyzeSelect(query_ast, TreeRewriterResult({}, storage, storage_snapshot)); - SelectQueryExpressionAnalyzer analyzer(query_ast, syntax_result, context, metadata_snapshot); + SelectQueryExpressionAnalyzer analyzer(query_ast, syntax_result, context, metadata_snapshot, {}, false, {}, prepared_sets); filter_info->actions = analyzer.simpleSelectActions(); filter_info->column_name = expr_list->children.at(0)->getColumnName(); @@ -615,7 +616,8 @@ InterpreterSelectQuery::InterpreterSelectQuery( if (row_policy_filter) { filter_info = generateFilterActions( - table_id, row_policy_filter, context, storage, storage_snapshot, metadata_snapshot, required_columns); + table_id, row_policy_filter, context, storage, storage_snapshot, metadata_snapshot, required_columns, + prepared_sets); query_info.filter_asts.push_back(row_policy_filter); } @@ -623,7 +625,8 @@ InterpreterSelectQuery::InterpreterSelectQuery( if (query_info.additional_filter_ast) { additional_filter_info = generateFilterActions( - table_id, query_info.additional_filter_ast, context, storage, storage_snapshot, metadata_snapshot, required_columns); + table_id, query_info.additional_filter_ast, context, storage, storage_snapshot, metadata_snapshot, required_columns, + prepared_sets); additional_filter_info->do_remove_column = true; @@ -2143,6 +2146,8 @@ void InterpreterSelectQuery::executeFetchColumns(QueryProcessingStage::Enum proc auto [limit_length, limit_offset] = getLimitLengthAndOffset(query, context); + auto local_limits = getStorageLimits(*context, options); + /** Optimization - if not specified DISTINCT, WHERE, GROUP, HAVING, ORDER, JOIN, LIMIT BY, WITH TIES * but LIMIT is specified, and limit + offset < max_block_size, * then as the block size we will use limit + offset (not to read more from the table than requested), @@ -2161,17 +2166,22 @@ void InterpreterSelectQuery::executeFetchColumns(QueryProcessingStage::Enum proc && !query_analyzer->hasAggregation() && !query_analyzer->hasWindow() && query.limitLength() - && limit_length <= std::numeric_limits::max() - limit_offset - && limit_length + limit_offset < max_block_size) + && limit_length <= std::numeric_limits::max() - limit_offset) { - max_block_size = std::max(1, limit_length + limit_offset); - max_threads_execute_query = max_streams = 1; + if (limit_length + limit_offset < max_block_size) + { + max_block_size = std::max(1, limit_length + limit_offset); + max_threads_execute_query = max_streams = 1; + } + if (limit_length + limit_offset < local_limits.local_limits.size_limits.max_rows) + { + query_info.limit = limit_length + limit_offset; + } } if (!max_block_size) throw Exception("Setting 'max_block_size' cannot be zero", ErrorCodes::PARAMETER_OUT_OF_BOUND); - auto local_limits = getStorageLimits(*context, options); storage_limits.emplace_back(local_limits); /// Initialize the initial data streams to which the query transforms are superimposed. Table or subquery or prepared input? diff --git a/src/Interpreters/InterpreterSelectQueryAnalyzer.cpp b/src/Interpreters/InterpreterSelectQueryAnalyzer.cpp new file mode 100644 index 00000000000..61ec5932b7d --- /dev/null +++ b/src/Interpreters/InterpreterSelectQueryAnalyzer.cpp @@ -0,0 +1,120 @@ +#include + +#include +#include +#include +#include + +#include +#include + +#include +#include +#include +#include + +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int UNSUPPORTED_METHOD; +} + +namespace +{ + +ASTPtr normalizeAndValidateQuery(const ASTPtr & query) +{ + if (query->as() || query->as()) + { + return query; + } + else if (auto * subquery = query->as()) + { + return subquery->children[0]; + } + else + { + throw Exception(ErrorCodes::UNSUPPORTED_METHOD, + "Expected ASTSelectWithUnionQuery or ASTSelectQuery. Actual {}", + query->formatForErrorMessage()); + } +} + +QueryTreeNodePtr buildQueryTreeAndRunPasses(const ASTPtr & query, const ContextPtr & context) +{ + auto query_tree = buildQueryTree(query, context); + + QueryTreePassManager query_tree_pass_manager(context); + addQueryTreePasses(query_tree_pass_manager); + query_tree_pass_manager.run(query_tree); + + return query_tree; +} + +} + +InterpreterSelectQueryAnalyzer::InterpreterSelectQueryAnalyzer( + const ASTPtr & query_, + const SelectQueryOptions & select_query_options_, + ContextPtr context_) + : WithContext(context_) + , query(normalizeAndValidateQuery(query_)) + , query_tree(buildQueryTreeAndRunPasses(query, context_)) + , select_query_options(select_query_options_) + , planner(query_tree, select_query_options, context_) +{ +} + +InterpreterSelectQueryAnalyzer::InterpreterSelectQueryAnalyzer( + const QueryTreeNodePtr & query_tree_, + const SelectQueryOptions & select_query_options_, + ContextPtr context_) + : WithContext(context_) + , query(query_tree_->toAST()) + , query_tree(query_tree_) + , select_query_options(select_query_options_) + , planner(query_tree, select_query_options, context_) +{ +} + +Block InterpreterSelectQueryAnalyzer::getSampleBlock() +{ + planner.buildQueryPlanIfNeeded(); + return planner.getQueryPlan().getCurrentDataStream().header; +} + +BlockIO InterpreterSelectQueryAnalyzer::execute() +{ + planner.buildQueryPlanIfNeeded(); + auto & query_plan = planner.getQueryPlan(); + + QueryPlanOptimizationSettings optimization_settings; + BuildQueryPipelineSettings build_pipeline_settings; + auto pipeline_builder = query_plan.buildQueryPipeline(optimization_settings, build_pipeline_settings); + + BlockIO result; + result.pipeline = QueryPipelineBuilder::getPipeline(std::move(*pipeline_builder)); + + if (!select_query_options.ignore_quota && (select_query_options.to_stage == QueryProcessingStage::Complete)) + result.pipeline.setQuota(getContext()->getQuota()); + + return result; +} + +QueryPlan && InterpreterSelectQueryAnalyzer::extractQueryPlan() && +{ + planner.buildQueryPlanIfNeeded(); + return std::move(planner).extractQueryPlan(); +} + +void InterpreterSelectQueryAnalyzer::extendQueryLogElemImpl(QueryLogElement & elem, const ASTPtr &, ContextPtr) const +{ + elem.query_kind = "Select"; +} + +} diff --git a/src/Interpreters/InterpreterSelectQueryAnalyzer.h b/src/Interpreters/InterpreterSelectQueryAnalyzer.h new file mode 100644 index 00000000000..e9884567ab0 --- /dev/null +++ b/src/Interpreters/InterpreterSelectQueryAnalyzer.h @@ -0,0 +1,49 @@ +#pragma once + +#include +#include + +#include +#include +#include + +#include + +namespace DB +{ + +class InterpreterSelectQueryAnalyzer : public IInterpreter, public WithContext +{ +public: + /// Initialize interpreter with query AST + InterpreterSelectQueryAnalyzer(const ASTPtr & query_, + const SelectQueryOptions & select_query_options_, + ContextPtr context_); + + /// Initialize interpreter with query tree + InterpreterSelectQueryAnalyzer(const QueryTreeNodePtr & query_tree_, + const SelectQueryOptions & select_query_options_, + ContextPtr context_); + + Block getSampleBlock(); + + BlockIO execute() override; + + QueryPlan && extractQueryPlan() &&; + + bool supportsTransactions() const override { return true; } + + bool ignoreLimits() const override { return select_query_options.ignore_limits; } + + bool ignoreQuota() const override { return select_query_options.ignore_quota; } + + void extendQueryLogElemImpl(QueryLogElement & elem, const ASTPtr &, ContextPtr) const override; + +private: + ASTPtr query; + QueryTreeNodePtr query_tree; + SelectQueryOptions select_query_options; + Planner planner; +}; + +} diff --git a/src/Interpreters/InterpreterSystemQuery.cpp b/src/Interpreters/InterpreterSystemQuery.cpp index dcb8f0eaadb..d05fd70e074 100644 --- a/src/Interpreters/InterpreterSystemQuery.cpp +++ b/src/Interpreters/InterpreterSystemQuery.cpp @@ -12,7 +12,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/Interpreters/JIT/compileFunction.cpp b/src/Interpreters/JIT/compileFunction.cpp index d3a7eb0cfaa..e12b4894eb0 100644 --- a/src/Interpreters/JIT/compileFunction.cpp +++ b/src/Interpreters/JIT/compileFunction.cpp @@ -614,7 +614,7 @@ static void compileSortDescription(llvm::Module & module, /** Join results from all comparator steps. * Result of columns comparison equals to first compare block where lhs is not equal to lhs or last compare block. */ - auto * compare_result_phi = b.CreatePHI(b.getInt8Ty(), comparator_steps_and_results.size()); + auto * compare_result_phi = b.CreatePHI(b.getInt8Ty(), static_cast(comparator_steps_and_results.size())); for (const auto & [block, result_value] : comparator_steps_and_results) compare_result_phi->addIncoming(result_value, block); diff --git a/src/Interpreters/JoinToSubqueryTransformVisitor.cpp b/src/Interpreters/JoinToSubqueryTransformVisitor.cpp index 8e515caace4..5879c96f7b3 100644 --- a/src/Interpreters/JoinToSubqueryTransformVisitor.cpp +++ b/src/Interpreters/JoinToSubqueryTransformVisitor.cpp @@ -546,10 +546,13 @@ std::vector normalizeColumnNamesExtractNeeded( { auto alias = aliases.find(ident->name())->second; auto alias_ident = alias->clone(); - alias_ident->as()->restoreTable(); - bool alias_equals_column_name = alias_ident->getColumnNameWithoutAlias() == ident->getColumnNameWithoutAlias(); - if (!alias_equals_column_name) - throw Exception("Alias clashes with qualified column '" + ident->name() + "'", ErrorCodes::AMBIGUOUS_COLUMN_NAME); + if (auto * alias_ident_typed = alias_ident->as()) + { + alias_ident_typed->restoreTable(); + bool alias_equals_column_name = alias_ident->getColumnNameWithoutAlias() == ident->getColumnNameWithoutAlias(); + if (!alias_equals_column_name) + throw Exception("Alias clashes with qualified column '" + ident->name() + "'", ErrorCodes::AMBIGUOUS_COLUMN_NAME); + } } String short_name = ident->shortName(); String original_long_name; diff --git a/src/Interpreters/NormalizeSelectWithUnionQueryVisitor.cpp b/src/Interpreters/NormalizeSelectWithUnionQueryVisitor.cpp index 40c42f7728e..b3c2063c6f6 100644 --- a/src/Interpreters/NormalizeSelectWithUnionQueryVisitor.cpp +++ b/src/Interpreters/NormalizeSelectWithUnionQueryVisitor.cpp @@ -45,8 +45,7 @@ void NormalizeSelectWithUnionQueryMatcher::visit(ASTSelectWithUnionQuery & ast, SelectUnionModesSet current_set_of_modes; bool distinct_found = false; - int i; - for (i = union_modes.size() - 1; i >= 0; --i) + for (Int64 i = union_modes.size() - 1; i >= 0; --i) { current_set_of_modes.insert(union_modes[i]); if (const auto * union_ast = typeid_cast(select_list[i + 1].get())) diff --git a/src/Interpreters/PartLog.cpp b/src/Interpreters/PartLog.cpp index 75e6d02d6e1..b35ee50b98e 100644 --- a/src/Interpreters/PartLog.cpp +++ b/src/Interpreters/PartLog.cpp @@ -207,8 +207,8 @@ bool PartLog::addNewParts( elem.table_name = table_id.table_name; elem.partition_id = part->info.partition_id; elem.part_name = part->name; - elem.disk_name = part->data_part_storage->getDiskName(); - elem.path_on_disk = part->data_part_storage->getFullPath(); + elem.disk_name = part->getDataPartStorage().getDiskName(); + elem.path_on_disk = part->getDataPartStorage().getFullPath(); elem.part_type = part->getType(); elem.bytes_compressed_on_disk = part->getBytesOnDisk(); diff --git a/src/Interpreters/PreparedSets.h b/src/Interpreters/PreparedSets.h index 06600c49f13..a50e390ee5a 100644 --- a/src/Interpreters/PreparedSets.h +++ b/src/Interpreters/PreparedSets.h @@ -39,7 +39,6 @@ public: /// This is a temporary table for transferring to remote servers for distributed query processing. StoragePtr table; -private: /// The source is obtained using the InterpreterSelectQuery subquery. std::unique_ptr source; }; diff --git a/src/Interpreters/ProcessList.cpp b/src/Interpreters/ProcessList.cpp index d5194a02513..551d20f835a 100644 --- a/src/Interpreters/ProcessList.cpp +++ b/src/Interpreters/ProcessList.cpp @@ -242,16 +242,21 @@ ProcessList::EntryPtr ProcessList::insert(const String & query_, const IAST * as /// since allocation and deallocation could happen in different threads } - auto process_it = processes.emplace(processes.end(), - query_context, query_, client_info, priorities.insert(settings.priority), std::move(thread_group), query_kind); + auto process_it = processes.emplace(processes.end(), std::make_shared( + query_context, + query_, + client_info, + priorities.insert(static_cast(settings.priority)), + std::move(thread_group), + query_kind)); increaseQueryKindAmount(query_kind); res = std::make_shared(*this, process_it); - process_it->setUserProcessList(&user_process_list); + (*process_it)->setUserProcessList(&user_process_list); - user_process_list.queries.emplace(client_info.current_query_id, &res->get()); + user_process_list.queries.emplace(client_info.current_query_id, res->getQueryStatus()); /// Track memory usage for all simultaneously running queries from single user. user_process_list.user_memory_tracker.setOrRaiseHardLimit(settings.max_memory_usage_for_user); @@ -280,11 +285,11 @@ ProcessListEntry::~ProcessListEntry() { auto lock = parent.safeLock(); - String user = it->getClientInfo().current_user; - String query_id = it->getClientInfo().current_query_id; - IAST::QueryKind query_kind = it->query_kind; + String user = (*it)->getClientInfo().current_user; + String query_id = (*it)->getClientInfo().current_query_id; + IAST::QueryKind query_kind = (*it)->query_kind; - const QueryStatus * process_list_element_ptr = &*it; + const QueryStatusPtr process_list_element_ptr = *it; auto user_process_list_it = parent.user_to_queries.find(user); if (user_process_list_it == parent.user_to_queries.end()) @@ -307,7 +312,7 @@ ProcessListEntry::~ProcessListEntry() } /// Wait for the query if it is in the cancellation right now. - parent.cancelled_cv.wait(lock.lock, [&]() { return it->is_cancelling == false; }); + parent.cancelled_cv.wait(lock.lock, [&]() { return process_list_element_ptr->is_cancelling == false; }); /// This removes the memory_tracker of one request. parent.processes.erase(it); @@ -344,6 +349,7 @@ QueryStatus::QueryStatus( , client_info(client_info_) , thread_group(std::move(thread_group_)) , priority_handle(std::move(priority_handle_)) + , global_overcommit_tracker(context_->getGlobalOvercommitTracker()) , query_kind(query_kind_) , num_queries_increment(CurrentMetrics::Query) { @@ -360,8 +366,8 @@ QueryStatus::~QueryStatus() { if (user_process_list) user_process_list->user_overcommit_tracker.onQueryStop(memory_tracker); - if (auto shared_context = getContext()) - shared_context->getGlobalOvercommitTracker()->onQueryStop(memory_tracker); + if (global_overcommit_tracker) + global_overcommit_tracker->onQueryStop(memory_tracker); } } @@ -430,7 +436,7 @@ ThrottlerPtr QueryStatus::getUserNetworkThrottler() } -QueryStatus * ProcessList::tryGetProcessListElement(const String & current_query_id, const String & current_user) +QueryStatusPtr ProcessList::tryGetProcessListElement(const String & current_query_id, const String & current_user) { auto user_it = user_to_queries.find(current_user); if (user_it != user_to_queries.end()) @@ -442,13 +448,13 @@ QueryStatus * ProcessList::tryGetProcessListElement(const String & current_query return query_it->second; } - return nullptr; + return {}; } CancellationCode ProcessList::sendCancelToQuery(const String & current_query_id, const String & current_user, bool kill) { - QueryStatus * elem; + QueryStatusPtr elem; /// Cancelling the query should be done without the lock. /// @@ -484,7 +490,7 @@ CancellationCode ProcessList::sendCancelToQuery(const String & current_query_id, void ProcessList::killAllQueries() { - std::vector cancelled_processes; + std::vector cancelled_processes; SCOPE_EXIT({ auto lock = safeLock(); @@ -498,8 +504,8 @@ void ProcessList::killAllQueries() cancelled_processes.reserve(processes.size()); for (auto & process : processes) { - cancelled_processes.push_back(&process); - process.is_cancelling = true; + cancelled_processes.push_back(process); + process->is_cancelling = true; } } @@ -558,7 +564,7 @@ ProcessList::Info ProcessList::getInfo(bool get_thread_list, bool get_profile_ev per_query_infos.reserve(processes.size()); for (const auto & process : processes) - per_query_infos.emplace_back(process.getInfo(get_thread_list, get_profile_events, get_settings)); + per_query_infos.emplace_back(process->getInfo(get_thread_list, get_profile_events, get_settings)); return per_query_infos; } diff --git a/src/Interpreters/ProcessList.h b/src/Interpreters/ProcessList.h index 6943c7cfcd8..5fbdce358f9 100644 --- a/src/Interpreters/ProcessList.h +++ b/src/Interpreters/ProcessList.h @@ -133,6 +133,8 @@ protected: ProcessListForUser * user_process_list = nullptr; + OvercommitTracker * global_overcommit_tracker = nullptr; + IAST::QueryKind query_kind; /// This field is unused in this class, but it @@ -221,6 +223,8 @@ public: [[nodiscard]] bool checkTimeLimitSoft(); }; +using QueryStatusPtr = std::shared_ptr; + /// Information of process list for user. struct ProcessListForUserInfo @@ -241,7 +245,7 @@ struct ProcessListForUser ProcessListForUser(ContextPtr global_context, ProcessList * global_process_list); /// query_id -> ProcessListElement(s). There can be multiple queries with the same query_id as long as all queries except one are cancelled. - using QueryToElement = std::unordered_map; + using QueryToElement = std::unordered_map; QueryToElement queries; ProfileEvents::Counters user_performance_counters{VariableContext::User, &ProfileEvents::global_counters}; @@ -278,7 +282,7 @@ class ProcessList; class ProcessListEntry { private: - using Container = std::list; + using Container = std::list; ProcessList & parent; Container::iterator it; @@ -289,11 +293,8 @@ public: ~ProcessListEntry(); - QueryStatus * operator->() { return &*it; } - const QueryStatus * operator->() const { return &*it; } - - QueryStatus & get() { return *it; } - const QueryStatus & get() const { return *it; } + QueryStatusPtr getQueryStatus() { return *it; } + const QueryStatusPtr getQueryStatus() const { return *it; } }; @@ -319,7 +320,7 @@ protected: class ProcessList : public ProcessListBase { public: - using Element = QueryStatus; + using Element = QueryStatusPtr; using Entry = ProcessListEntry; using QueryAmount = UInt64; @@ -358,7 +359,7 @@ protected: ThrottlerPtr total_network_throttler; /// Call under lock. Finds process with specified current_user and current_query_id. - QueryStatus * tryGetProcessListElement(const String & current_query_id, const String & current_user); + QueryStatusPtr tryGetProcessListElement(const String & current_query_id, const String & current_user); /// limit for insert. 0 means no limit. Otherwise, when limit exceeded, an exception is thrown. size_t max_insert_queries_amount = 0; diff --git a/src/Interpreters/RowRefs.cpp b/src/Interpreters/RowRefs.cpp index 68076e1fec2..0e553ef145e 100644 --- a/src/Interpreters/RowRefs.cpp +++ b/src/Interpreters/RowRefs.cpp @@ -89,7 +89,7 @@ public: assert(!sorted.load(std::memory_order_acquire)); - entries.emplace_back(key, row_refs.size()); + entries.emplace_back(key, static_cast(row_refs.size())); row_refs.emplace_back(RowRef(block, row_num)); } diff --git a/src/Interpreters/RowRefs.h b/src/Interpreters/RowRefs.h index 2c9f2062a82..294da1da571 100644 --- a/src/Interpreters/RowRefs.h +++ b/src/Interpreters/RowRefs.h @@ -29,7 +29,10 @@ struct RowRef SizeT row_num = 0; RowRef() = default; - RowRef(const Block * block_, size_t row_num_) : block(block_), row_num(row_num_) {} + RowRef(const Block * block_, size_t row_num_) + : block(block_) + , row_num(static_cast(row_num_)) + {} }; /// Single linked list of references to rows. Used for ALL JOINs (non-unique JOINs) diff --git a/src/Interpreters/Session.cpp b/src/Interpreters/Session.cpp index 52588a5f4cc..7639dec813d 100644 --- a/src/Interpreters/Session.cpp +++ b/src/Interpreters/Session.cpp @@ -244,7 +244,7 @@ void Session::shutdownNamedSessions() NamedSessionsStorage::instance().shutdown(); } -Session::Session(const ContextPtr & global_context_, ClientInfo::Interface interface_, bool is_secure) +Session::Session(const ContextPtr & global_context_, ClientInfo::Interface interface_, bool is_secure, const std::string & certificate) : auth_id(UUIDHelpers::generateV4()), global_context(global_context_), log(&Poco::Logger::get(String{magic_enum::enum_name(interface_)} + "-Session")) @@ -252,6 +252,7 @@ Session::Session(const ContextPtr & global_context_, ClientInfo::Interface inter prepared_client_info.emplace(); prepared_client_info->interface = interface_; prepared_client_info->is_secure = is_secure; + prepared_client_info->certificate = certificate; } Session::~Session() diff --git a/src/Interpreters/Session.h b/src/Interpreters/Session.h index ed4f7809dee..0f17c378915 100644 --- a/src/Interpreters/Session.h +++ b/src/Interpreters/Session.h @@ -32,7 +32,7 @@ public: /// Stops using named sessions. The method must be called at the server shutdown. static void shutdownNamedSessions(); - Session(const ContextPtr & global_context_, ClientInfo::Interface interface_, bool is_secure = false); + Session(const ContextPtr & global_context_, ClientInfo::Interface interface_, bool is_secure = false, const std::string & certificate = ""); ~Session(); Session(const Session &&) = delete; diff --git a/src/Interpreters/SessionLog.cpp b/src/Interpreters/SessionLog.cpp index 3edb84c046d..79aac63b40c 100644 --- a/src/Interpreters/SessionLog.cpp +++ b/src/Interpreters/SessionLog.cpp @@ -86,6 +86,7 @@ NamesAndTypesList SessionLogElement::getNamesAndTypes() AUTH_TYPE_NAME_AND_VALUE(AuthType::DOUBLE_SHA1_PASSWORD), AUTH_TYPE_NAME_AND_VALUE(AuthType::LDAP), AUTH_TYPE_NAME_AND_VALUE(AuthType::KERBEROS), + AUTH_TYPE_NAME_AND_VALUE(AuthType::SSL_CERTIFICATE), }); #undef AUTH_TYPE_NAME_AND_VALUE static_assert(static_cast(AuthenticationType::MAX) == 7); diff --git a/src/Interpreters/Set.cpp b/src/Interpreters/Set.cpp index ded8b04a589..e75232aa0f5 100644 --- a/src/Interpreters/Set.cpp +++ b/src/Interpreters/Set.cpp @@ -131,6 +131,7 @@ void Set::setHeader(const ColumnsWithTypeAndName & header) if (const auto * low_cardinality_type = typeid_cast(data_types.back().get())) { data_types.back() = low_cardinality_type->getDictionaryType(); + set_elements_types.back() = low_cardinality_type->getDictionaryType(); materialized_columns.emplace_back(key_columns.back()->convertToFullColumnIfLowCardinality()); key_columns.back() = materialized_columns.back().get(); } diff --git a/src/Interpreters/TableJoin.h b/src/Interpreters/TableJoin.h index 3835ef77deb..316beccae80 100644 --- a/src/Interpreters/TableJoin.h +++ b/src/Interpreters/TableJoin.h @@ -55,15 +55,26 @@ public: ASTPtr on_filter_condition_left; ASTPtr on_filter_condition_right; + std::string analyzer_left_filter_condition_column_name; + std::string analyzer_right_filter_condition_column_name; + JoinOnClause() = default; std::pair condColumnNames() const { std::pair res; + + if (!analyzer_left_filter_condition_column_name.empty()) + res.first = analyzer_left_filter_condition_column_name; + + if (!analyzer_right_filter_condition_column_name.empty()) + res.second = analyzer_right_filter_condition_column_name; + if (on_filter_condition_left) res.first = on_filter_condition_left->getColumnName(); if (on_filter_condition_right) res.second = on_filter_condition_right->getColumnName(); + return res; } @@ -111,9 +122,6 @@ private: * to the subquery will be added expression `expr(t2 columns)`. * It's possible to use name `expr(t2 columns)`. */ - - friend class TreeRewriter; - SizeLimits size_limits; const size_t default_max_bytes = 0; const bool join_use_nulls = false; @@ -124,9 +132,6 @@ private: const size_t max_files_to_merge = 0; const String temporary_files_codec = "LZ4"; - /// the limit has no technical reasons, it supposed to improve safety - const size_t MAX_DISJUNCTS = 16; /// NOLINT - ASTs key_asts_left; ASTs key_asts_right; @@ -160,6 +165,8 @@ private: std::string right_storage_name; + bool is_join_with_constant = false; + Names requiredJoinedNames() const; /// Create converting actions and change key column names if required @@ -178,6 +185,8 @@ private: NamesAndTypesList correctedColumnsAddedByJoin() const; + void deduplicateAndQualifyColumnNames(const NameSet & left_table_columns, const String & right_table_prefix); + public: TableJoin() = default; @@ -217,8 +226,8 @@ public: bool allowParallelHashJoin() const; bool joinUseNulls() const { return join_use_nulls; } - bool forceNullableRight() const { return join_use_nulls && isLeftOrFull(table_join.kind); } - bool forceNullableLeft() const { return join_use_nulls && isRightOrFull(table_join.kind); } + bool forceNullableRight() const { return join_use_nulls && isLeftOrFull(kind()); } + bool forceNullableLeft() const { return join_use_nulls && isRightOrFull(kind()); } size_t defaultMaxBytes() const { return default_max_bytes; } size_t maxJoinedBlockRows() const { return max_joined_block_rows; } size_t maxRowsInRightBlock() const { return partial_merge_join_rows_in_right_blocks; } @@ -229,6 +238,9 @@ public: bool oneDisjunct() const; + ASTTableJoin & getTableJoin() { return table_join; } + const ASTTableJoin & getTableJoin() const { return table_join; } + JoinOnClause & getOnlyClause() { assertHasOneOnExpr(); return clauses[0]; } const JoinOnClause & getOnlyClause() const { assertHasOneOnExpr(); return clauses[0]; } @@ -266,13 +278,26 @@ public: NamesWithAliases getNamesWithAliases(const NameSet & required_columns) const; NamesWithAliases getRequiredColumns(const Block & sample, const Names & action_required_columns) const; - void deduplicateAndQualifyColumnNames(const NameSet & left_table_columns, const String & right_table_prefix); size_t rightKeyInclusion(const String & name) const; NameSet requiredRightKeys() const; + bool isJoinWithConstant() const + { + return is_join_with_constant; + } + + void setIsJoinWithConstant(bool is_join_with_constant_value) + { + is_join_with_constant = is_join_with_constant_value; + } + bool leftBecomeNullable(const DataTypePtr & column_type) const; bool rightBecomeNullable(const DataTypePtr & column_type) const; void addJoinedColumn(const NameAndTypePair & joined_column); + void setColumnsAddedByJoin(const NamesAndTypesList & columns_added_by_join_value) + { + columns_added_by_join = columns_added_by_join_value; + } template void addJoinedColumnsAndCorrectTypesImpl(TColumns & left_columns, bool correct_nullability); @@ -294,15 +319,13 @@ public: ASTPtr leftKeysList() const; ASTPtr rightKeysList() const; /// For ON syntax only - const NamesAndTypesList & columnsFromJoinedTable() const { return columns_from_joined_table; } - - Names columnsAddedByJoin() const + void setColumnsFromJoinedTable(NamesAndTypesList columns_from_joined_table_value, const NameSet & left_table_columns, const String & right_table_prefix) { - Names res; - for (const auto & col : columns_added_by_join) - res.push_back(col.name); - return res; + columns_from_joined_table = std::move(columns_from_joined_table_value); + deduplicateAndQualifyColumnNames(left_table_columns, right_table_prefix); } + const NamesAndTypesList & columnsFromJoinedTable() const { return columns_from_joined_table; } + const NamesAndTypesList & columnsAddedByJoin() const { return columns_added_by_join; } /// StorageJoin overrides key names (cause of different names qualification) void setRightKeys(const Names & keys) { getOnlyClause().key_names_right = keys; } diff --git a/src/Interpreters/TemporaryDataOnDisk.cpp b/src/Interpreters/TemporaryDataOnDisk.cpp index 3eb93f1f20e..c5ae6f6c885 100644 --- a/src/Interpreters/TemporaryDataOnDisk.cpp +++ b/src/Interpreters/TemporaryDataOnDisk.cpp @@ -20,7 +20,7 @@ namespace ErrorCodes extern const int NOT_ENOUGH_SPACE; } -void TemporaryDataOnDiskScope::deltaAllocAndCheck(int compressed_delta, int uncompressed_delta) +void TemporaryDataOnDiskScope::deltaAllocAndCheck(ssize_t compressed_delta, ssize_t uncompressed_delta) { if (parent) parent->deltaAllocAndCheck(compressed_delta, uncompressed_delta); diff --git a/src/Interpreters/TemporaryDataOnDisk.h b/src/Interpreters/TemporaryDataOnDisk.h index 81bd2067650..11edc8700d2 100644 --- a/src/Interpreters/TemporaryDataOnDisk.h +++ b/src/Interpreters/TemporaryDataOnDisk.h @@ -53,7 +53,7 @@ public: VolumePtr getVolume() const { return volume; } protected: - void deltaAllocAndCheck(int compressed_delta, int uncompressed_delta); + void deltaAllocAndCheck(ssize_t compressed_delta, ssize_t uncompressed_delta); TemporaryDataOnDiskScopePtr parent = nullptr; VolumePtr volume; diff --git a/src/Interpreters/ThreadStatusExt.cpp b/src/Interpreters/ThreadStatusExt.cpp index 4810174e395..ee126f2da11 100644 --- a/src/Interpreters/ThreadStatusExt.cpp +++ b/src/Interpreters/ThreadStatusExt.cpp @@ -55,12 +55,12 @@ void ThreadStatus::applyQuerySettings() #if defined(OS_LINUX) /// Set "nice" value if required. - Int32 new_os_thread_priority = settings.os_thread_priority; + Int32 new_os_thread_priority = static_cast(settings.os_thread_priority); if (new_os_thread_priority && hasLinuxCapability(CAP_SYS_NICE)) { LOG_TRACE(log, "Setting nice to {}", new_os_thread_priority); - if (0 != setpriority(PRIO_PROCESS, thread_id, new_os_thread_priority)) + if (0 != setpriority(PRIO_PROCESS, static_cast(thread_id), new_os_thread_priority)) throwFromErrno("Cannot 'setpriority'", ErrorCodes::CANNOT_SET_THREAD_PRIORITY); os_thread_priority = new_os_thread_priority; @@ -349,7 +349,7 @@ void ThreadStatus::detachQuery(bool exit_if_already_detached, bool thread_exits) { LOG_TRACE(log, "Resetting nice"); - if (0 != setpriority(PRIO_PROCESS, thread_id, 0)) + if (0 != setpriority(PRIO_PROCESS, static_cast(thread_id), 0)) LOG_ERROR(log, "Cannot 'setpriority' back to zero: {}", errnoToString()); os_thread_priority = 0; diff --git a/src/Interpreters/TreeOptimizer.cpp b/src/Interpreters/TreeOptimizer.cpp index 74f084df40b..e4301bad1e8 100644 --- a/src/Interpreters/TreeOptimizer.cpp +++ b/src/Interpreters/TreeOptimizer.cpp @@ -23,7 +23,6 @@ #include #include #include -#include #include #include @@ -35,6 +34,7 @@ #include #include +#include #include #include diff --git a/src/Interpreters/TreeRewriter.cpp b/src/Interpreters/TreeRewriter.cpp index 2f5bfd00938..da12dccd8d8 100644 --- a/src/Interpreters/TreeRewriter.cpp +++ b/src/Interpreters/TreeRewriter.cpp @@ -24,13 +24,14 @@ #include #include #include -#include -#include #include #include #include #include +#include +#include + #include #include #include @@ -610,7 +611,7 @@ void getArrayJoinedColumns(ASTPtr & query, TreeRewriterResult & result, const AS } } -void setJoinStrictness(ASTSelectQuery & select_query, JoinStrictness join_default_strictness, bool old_any, ASTTableJoin & out_table_join) +void setJoinStrictness(ASTSelectQuery & select_query, JoinStrictness join_default_strictness, bool old_any, std::shared_ptr & analyzed_join) { const ASTTablesInSelectQueryElement * node = select_query.join(); if (!node) @@ -648,7 +649,7 @@ void setJoinStrictness(ASTSelectQuery & select_query, JoinStrictness join_defaul throw Exception("ANY FULL JOINs are not implemented", ErrorCodes::NOT_IMPLEMENTED); } - out_table_join = table_join; + analyzed_join->getTableJoin() = table_join; } /// Evaluate expression and return boolean value if it can be interpreted as bool. @@ -1235,14 +1236,11 @@ TreeRewriterResultPtr TreeRewriter::analyzeSelect( if (tables_with_columns.size() > 1) { const auto & right_table = tables_with_columns[1]; - auto & cols_from_joined = result.analyzed_join->columns_from_joined_table; - cols_from_joined = right_table.columns; + auto columns_from_joined_table = right_table.columns; /// query can use materialized or aliased columns from right joined table, /// we want to request it for right table - cols_from_joined.insert(cols_from_joined.end(), right_table.hidden_columns.begin(), right_table.hidden_columns.end()); - - result.analyzed_join->deduplicateAndQualifyColumnNames( - source_columns_set, right_table.table.getQualifiedNamePrefix()); + columns_from_joined_table.insert(columns_from_joined_table.end(), right_table.hidden_columns.begin(), right_table.hidden_columns.end()); + result.analyzed_join->setColumnsFromJoinedTable(std::move(columns_from_joined_table), source_columns_set, right_table.table.getQualifiedNamePrefix()); } translateQualifiedNames(query, *select_query, source_columns_set, tables_with_columns); @@ -1253,7 +1251,7 @@ TreeRewriterResultPtr TreeRewriter::analyzeSelect( NameSet all_source_columns_set = source_columns_set; if (table_join) { - for (const auto & [name, _] : table_join->columns_from_joined_table) + for (const auto & [name, _] : table_join->columnsFromJoinedTable()) all_source_columns_set.insert(name); } @@ -1303,7 +1301,7 @@ TreeRewriterResultPtr TreeRewriter::analyzeSelect( getArrayJoinedColumns(query, result, select_query, result.source_columns, source_columns_set); setJoinStrictness( - *select_query, settings.join_default_strictness, settings.any_join_distinct_right_table_keys, result.analyzed_join->table_join); + *select_query, settings.join_default_strictness, settings.any_join_distinct_right_table_keys, result.analyzed_join); auto * table_join_ast = select_query->join() ? select_query->join()->table_join->as() : nullptr; if (table_join_ast && tables_with_columns.size() >= 2) diff --git a/src/Interpreters/UserDefinedSQLFunctionFactory.cpp b/src/Interpreters/UserDefinedSQLFunctionFactory.cpp deleted file mode 100644 index 2f876f00cc3..00000000000 --- a/src/Interpreters/UserDefinedSQLFunctionFactory.cpp +++ /dev/null @@ -1,168 +0,0 @@ -#include "UserDefinedSQLFunctionFactory.h" - -#include - -#include -#include -#include -#include -#include - - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int FUNCTION_ALREADY_EXISTS; - extern const int UNKNOWN_FUNCTION; - extern const int CANNOT_DROP_FUNCTION; -} - -UserDefinedSQLFunctionFactory & UserDefinedSQLFunctionFactory::instance() -{ - static UserDefinedSQLFunctionFactory result; - return result; -} - -void UserDefinedSQLFunctionFactory::registerFunction(ContextPtr context, const String & function_name, ASTPtr create_function_query, bool replace, bool if_not_exists, bool persist) -{ - if (FunctionFactory::instance().hasNameOrAlias(function_name)) - { - if (if_not_exists) - return; - - throw Exception(ErrorCodes::FUNCTION_ALREADY_EXISTS, "The function '{}' already exists", function_name); - } - - if (AggregateFunctionFactory::instance().hasNameOrAlias(function_name)) - { - if (if_not_exists) - return; - - throw Exception(ErrorCodes::FUNCTION_ALREADY_EXISTS, "The aggregate function '{}' already exists", function_name); - } - - if (UserDefinedExecutableFunctionFactory::instance().has(function_name, context)) - { - if (if_not_exists) - return; - - throw Exception(ErrorCodes::FUNCTION_ALREADY_EXISTS, "User defined executable function '{}' already exists", function_name); - } - - std::lock_guard lock(mutex); - - auto [it, inserted] = function_name_to_create_query.emplace(function_name, create_function_query); - - if (!inserted) - { - if (if_not_exists) - return; - - if (replace) - it->second = create_function_query; - else - throw Exception(ErrorCodes::FUNCTION_ALREADY_EXISTS, - "The function name '{}' is not unique", - function_name); - } - - if (persist) - { - try - { - UserDefinedSQLObjectsLoader::instance().storeObject(context, UserDefinedSQLObjectType::Function, function_name, *create_function_query, replace); - } - catch (Exception & exception) - { - function_name_to_create_query.erase(it); - exception.addMessage(fmt::format("while storing user defined function {} on disk", backQuote(function_name))); - throw; - } - } -} - -void UserDefinedSQLFunctionFactory::unregisterFunction(ContextPtr context, const String & function_name, bool if_exists) -{ - if (FunctionFactory::instance().hasNameOrAlias(function_name) || - AggregateFunctionFactory::instance().hasNameOrAlias(function_name)) - throw Exception(ErrorCodes::CANNOT_DROP_FUNCTION, "Cannot drop system function '{}'", function_name); - - if (UserDefinedExecutableFunctionFactory::instance().has(function_name, context)) - throw Exception(ErrorCodes::CANNOT_DROP_FUNCTION, "Cannot drop user defined executable function '{}'", function_name); - - std::lock_guard lock(mutex); - - auto it = function_name_to_create_query.find(function_name); - if (it == function_name_to_create_query.end()) - { - if (if_exists) - return; - - throw Exception(ErrorCodes::UNKNOWN_FUNCTION, - "The function name '{}' is not registered", - function_name); - } - - try - { - UserDefinedSQLObjectsLoader::instance().removeObject(context, UserDefinedSQLObjectType::Function, function_name); - } - catch (Exception & exception) - { - exception.addMessage(fmt::format("while removing user defined function {} from disk", backQuote(function_name))); - throw; - } - - function_name_to_create_query.erase(it); -} - -ASTPtr UserDefinedSQLFunctionFactory::get(const String & function_name) const -{ - std::lock_guard lock(mutex); - - auto it = function_name_to_create_query.find(function_name); - if (it == function_name_to_create_query.end()) - throw Exception(ErrorCodes::UNKNOWN_FUNCTION, - "The function name '{}' is not registered", - function_name); - - return it->second; -} - -ASTPtr UserDefinedSQLFunctionFactory::tryGet(const std::string & function_name) const -{ - std::lock_guard lock(mutex); - - auto it = function_name_to_create_query.find(function_name); - if (it == function_name_to_create_query.end()) - return nullptr; - - return it->second; -} - -bool UserDefinedSQLFunctionFactory::has(const String & function_name) const -{ - return tryGet(function_name) != nullptr; -} - -std::vector UserDefinedSQLFunctionFactory::getAllRegisteredNames() const -{ - std::vector registered_names; - - std::lock_guard lock(mutex); - registered_names.reserve(function_name_to_create_query.size()); - - for (const auto & [name, _] : function_name_to_create_query) - registered_names.emplace_back(name); - - return registered_names; -} - -bool UserDefinedSQLFunctionFactory::empty() const -{ - std::lock_guard lock(mutex); - return function_name_to_create_query.empty(); -} -} diff --git a/src/Interpreters/UserDefinedSQLFunctionFactory.h b/src/Interpreters/UserDefinedSQLFunctionFactory.h deleted file mode 100644 index db43bb7298e..00000000000 --- a/src/Interpreters/UserDefinedSQLFunctionFactory.h +++ /dev/null @@ -1,54 +0,0 @@ -#pragma once - -#include -#include - -#include - -#include -#include - - -namespace DB -{ - -/// Factory for SQLUserDefinedFunctions -class UserDefinedSQLFunctionFactory : public IHints<1, UserDefinedSQLFunctionFactory> -{ -public: - static UserDefinedSQLFunctionFactory & instance(); - - /** Register function for function_name in factory for specified create_function_query. - * If function exists and if_not_exists = false and replace = false throws exception. - * If replace = true and sql user defined function with function_name already exists replace it with create_function_query. - * If persist = true persist function on disk. - */ - void registerFunction(ContextPtr context, const String & function_name, ASTPtr create_function_query, bool replace, bool if_not_exists, bool persist); - - /** Unregister function for function_name. - * If if_exists = true then do not throw exception if function is not registered. - * If if_exists = false then throw exception if function is not registered. - */ - void unregisterFunction(ContextPtr context, const String & function_name, bool if_exists); - - /// Get function create query for function_name. If no function registered with function_name throws exception. - ASTPtr get(const String & function_name) const; - - /// Get function create query for function_name. If no function registered with function_name return nullptr. - ASTPtr tryGet(const String & function_name) const; - - /// Check if function with function_name registered. - bool has(const String & function_name) const; - - /// Get all user defined functions registered names. - std::vector getAllRegisteredNames() const override; - - /// Check whether any UDFs have been registered - bool empty() const; - -private: - std::unordered_map function_name_to_create_query; - mutable std::mutex mutex; -}; - -} diff --git a/src/Interpreters/UserDefinedSQLObjectsLoader.cpp b/src/Interpreters/UserDefinedSQLObjectsLoader.cpp deleted file mode 100644 index c6f50fc4a0a..00000000000 --- a/src/Interpreters/UserDefinedSQLObjectsLoader.cpp +++ /dev/null @@ -1,184 +0,0 @@ -#include "UserDefinedSQLObjectsLoader.h" - -#include - -#include -#include -#include - -#include -#include -#include -#include - -#include -#include - -#include -#include -#include -#include - -#include -#include -#include - - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int OBJECT_ALREADY_STORED_ON_DISK; - extern const int OBJECT_WAS_NOT_STORED_ON_DISK; -} - -UserDefinedSQLObjectsLoader & UserDefinedSQLObjectsLoader::instance() -{ - static UserDefinedSQLObjectsLoader ret; - return ret; -} - -UserDefinedSQLObjectsLoader::UserDefinedSQLObjectsLoader() - : log(&Poco::Logger::get("UserDefinedSQLObjectsLoader")) -{} - -void UserDefinedSQLObjectsLoader::loadUserDefinedObject(ContextPtr context, UserDefinedSQLObjectType object_type, std::string_view name, const String & path) -{ - auto name_ref = StringRef(name.data(), name.size()); - LOG_DEBUG(log, "Loading user defined object {} from file {}", backQuote(name_ref), path); - - /// There is .sql file with user defined object creation statement. - ReadBufferFromFile in(path); - - String object_create_query; - readStringUntilEOF(object_create_query, in); - - try - { - switch (object_type) - { - case UserDefinedSQLObjectType::Function: - { - ParserCreateFunctionQuery parser; - ASTPtr ast = parseQuery( - parser, - object_create_query.data(), - object_create_query.data() + object_create_query.size(), - "in file " + path, - 0, - context->getSettingsRef().max_parser_depth); - - InterpreterCreateFunctionQuery interpreter(ast, context, false /*persist_function*/); - interpreter.execute(); - } - } - } - catch (Exception & e) - { - e.addMessage(fmt::format("while loading user defined objects {} from path {}", backQuote(name_ref), path)); - throw; - } -} - -void UserDefinedSQLObjectsLoader::loadObjects(ContextPtr context) -{ - if (unlikely(!enable_persistence)) - return; - - LOG_DEBUG(log, "Loading user defined objects"); - - String dir_path = context->getUserDefinedPath(); - Poco::DirectoryIterator dir_end; - for (Poco::DirectoryIterator it(dir_path); it != dir_end; ++it) - { - if (it->isDirectory()) - continue; - - const std::string & file_name = it.name(); - - /// For '.svn', '.gitignore' directory and similar. - if (file_name.at(0) == '.') - continue; - - if (!startsWith(file_name, "function_") || !endsWith(file_name, ".sql")) - continue; - - std::string_view object_name = file_name; - - object_name.remove_prefix(strlen("function_")); - object_name.remove_suffix(strlen(".sql")); - - if (object_name.empty()) - continue; - - loadUserDefinedObject(context, UserDefinedSQLObjectType::Function, object_name, dir_path + it.name()); - } -} - -void UserDefinedSQLObjectsLoader::storeObject(ContextPtr context, UserDefinedSQLObjectType object_type, const String & object_name, const IAST & ast, bool replace) -{ - if (unlikely(!enable_persistence)) - return; - - String dir_path = context->getUserDefinedPath(); - String file_path; - - switch (object_type) - { - case UserDefinedSQLObjectType::Function: - { - file_path = dir_path + "function_" + escapeForFileName(object_name) + ".sql"; - } - } - - if (!replace && std::filesystem::exists(file_path)) - throw Exception(ErrorCodes::OBJECT_ALREADY_STORED_ON_DISK, "User defined object {} already stored on disk", backQuote(file_path)); - - LOG_DEBUG(log, "Storing object {} to file {}", backQuote(object_name), file_path); - - WriteBufferFromOwnString create_statement_buf; - formatAST(ast, create_statement_buf, false); - writeChar('\n', create_statement_buf); - String create_statement = create_statement_buf.str(); - - WriteBufferFromFile out(file_path, create_statement.size()); - writeString(create_statement, out); - out.next(); - if (context->getSettingsRef().fsync_metadata) - out.sync(); - out.close(); - - LOG_DEBUG(log, "Stored object {}", backQuote(object_name)); -} - -void UserDefinedSQLObjectsLoader::removeObject(ContextPtr context, UserDefinedSQLObjectType object_type, const String & object_name) -{ - if (unlikely(!enable_persistence)) - return; - - String dir_path = context->getUserDefinedPath(); - LOG_DEBUG(log, "Removing file for user defined object {} from {}", backQuote(object_name), dir_path); - - std::filesystem::path file_path; - - switch (object_type) - { - case UserDefinedSQLObjectType::Function: - { - file_path = dir_path + "function_" + escapeForFileName(object_name) + ".sql"; - } - } - - if (!std::filesystem::exists(file_path)) - throw Exception(ErrorCodes::OBJECT_WAS_NOT_STORED_ON_DISK, "User defined object {} was not stored on disk", backQuote(file_path.string())); - - std::filesystem::remove(file_path); -} - -void UserDefinedSQLObjectsLoader::enable(bool enable_persistence_) -{ - enable_persistence = enable_persistence_; -} - -} diff --git a/src/Interpreters/UserDefinedSQLObjectsLoader.h b/src/Interpreters/UserDefinedSQLObjectsLoader.h deleted file mode 100644 index 9dfba1181c1..00000000000 --- a/src/Interpreters/UserDefinedSQLObjectsLoader.h +++ /dev/null @@ -1,37 +0,0 @@ -#pragma once - -#include -#include - -#include - - -namespace DB -{ - -enum class UserDefinedSQLObjectType -{ - Function -}; - -class UserDefinedSQLObjectsLoader : private boost::noncopyable -{ -public: - static UserDefinedSQLObjectsLoader & instance(); - UserDefinedSQLObjectsLoader(); - - void loadObjects(ContextPtr context); - void storeObject(ContextPtr context, UserDefinedSQLObjectType object_type, const String & object_name, const IAST & ast, bool replace); - void removeObject(ContextPtr context, UserDefinedSQLObjectType object_type, const String & object_name); - - /// For ClickHouse local if path is not set we can disable loader. - void enable(bool enable_persistence); - -private: - - void loadUserDefinedObject(ContextPtr context, UserDefinedSQLObjectType object_type, std::string_view object_name, const String & file_path); - Poco::Logger * log; - bool enable_persistence = true; -}; - -} diff --git a/src/Interpreters/WindowDescription.cpp b/src/Interpreters/WindowDescription.cpp index 335610b2be9..7ed7788cf1d 100644 --- a/src/Interpreters/WindowDescription.cpp +++ b/src/Interpreters/WindowDescription.cpp @@ -20,7 +20,8 @@ std::string WindowFunctionDescription::dump() const WriteBufferFromOwnString ss; ss << "window function '" << column_name << "\n"; - ss << "function node " << function_node->dumpTree() << "\n"; + if (function_node) + ss << "function node " << function_node->dumpTree() << "\n"; ss << "aggregate function '" << aggregate_function->getName() << "'\n"; if (!function_parameters.empty()) { diff --git a/src/Interpreters/WindowDescription.h b/src/Interpreters/WindowDescription.h index e7bc0473c26..3b9af6575e8 100644 --- a/src/Interpreters/WindowDescription.h +++ b/src/Interpreters/WindowDescription.h @@ -99,7 +99,6 @@ struct WindowDescription // The window functions that are calculated for this window. std::vector window_functions; - std::string dump() const; void checkValid() const; diff --git a/src/Interpreters/convertFieldToType.cpp b/src/Interpreters/convertFieldToType.cpp index 55156cde7be..e57016d969a 100644 --- a/src/Interpreters/convertFieldToType.cpp +++ b/src/Interpreters/convertFieldToType.cpp @@ -88,7 +88,7 @@ Field convertIntToDecimalType(const Field & from, const DataTypeDecimal & typ if (!type.canStoreWhole(value)) throw Exception("Number is too big to place in " + type.getName(), ErrorCodes::ARGUMENT_OUT_OF_BOUND); - T scaled_value = type.getScaleMultiplier() * static_cast(value); + T scaled_value = type.getScaleMultiplier() * T(static_cast(value)); return DecimalField(scaled_value, type.getScale()); } @@ -236,10 +236,11 @@ Field convertFieldToTypeImpl(const Field & src, const IDataType & type, const ID } if (which_type.isDateTime64() - && (which_from_type.isNativeInt() || which_from_type.isNativeUInt() || which_from_type.isDate() || which_from_type.isDate32() || which_from_type.isDateTime() || which_from_type.isDateTime64())) + && (src.getType() == Field::Types::UInt64 || src.getType() == Field::Types::Int64 || src.getType() == Field::Types::Decimal64)) { const auto scale = static_cast(type).getScale(); - const auto decimal_value = DecimalUtils::decimalFromComponents(applyVisitor(FieldVisitorConvertToNumber(), src), 0, scale); + const auto decimal_value + = DecimalUtils::decimalFromComponents(applyVisitor(FieldVisitorConvertToNumber(), src), 0, scale); return Field(DecimalField(decimal_value, scale)); } } @@ -386,6 +387,9 @@ Field convertFieldToTypeImpl(const Field & src, const IDataType & type, const ID } else if (isObject(type)) { + if (src.getType() == Field::Types::Object) + return src; /// Already in needed type. + const auto * from_type_tuple = typeid_cast(from_type_hint); if (src.getType() == Field::Types::Tuple && from_type_tuple && from_type_tuple->haveExplicitNames()) { diff --git a/src/Interpreters/createBlockSelector.cpp b/src/Interpreters/createBlockSelector.cpp index b1a9a4e9e35..fce9833ddfb 100644 --- a/src/Interpreters/createBlockSelector.cpp +++ b/src/Interpreters/createBlockSelector.cpp @@ -50,7 +50,7 @@ IColumn::Selector createBlockSelector( /// libdivide support only UInt32 and UInt64. using TUInt32Or64 = std::conditional_t; - libdivide::divider divider(total_weight); + libdivide::divider divider(static_cast(total_weight)); const auto & data = typeid_cast &>(column).getData(); diff --git a/src/Interpreters/examples/hash_map_string_small.cpp b/src/Interpreters/examples/hash_map_string_small.cpp index 4a96f717bf7..b58cdfbacd0 100644 --- a/src/Interpreters/examples/hash_map_string_small.cpp +++ b/src/Interpreters/examples/hash_map_string_small.cpp @@ -23,7 +23,7 @@ struct SmallStringRef { - UInt32 size = 0; + size_t size = 0; union { diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index 63641d4bdcb..087f3fd8887 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -34,7 +34,6 @@ #include #include #include -#include #include #include @@ -56,9 +55,9 @@ #include #include #include +#include #include -#include #include #include @@ -77,7 +76,6 @@ namespace ProfileEvents { - extern const Event QueryMaskingRulesMatch; extern const Event FailedQuery; extern const Event FailedInsertQuery; extern const Event FailedSelectQuery; @@ -109,37 +107,6 @@ static void checkASTSizeLimits(const IAST & ast, const Settings & settings) } -/// Makes a version of a query without sensitive information (e.g. passwords) for logging. -/// The parameter `parsed query` can be nullptr if the query cannot be parsed. -static String prepareQueryForLogging(const String & query, const ASTPtr & parsed_query, ContextPtr context) -{ - String res = query; - - // Wiping a password or hash from CREATE/ALTER USER query because we don't want it to go to logs. - if (parsed_query && canContainPassword(*parsed_query)) - { - ASTPtr ast_for_logging = parsed_query->clone(); - wipePasswordFromQuery(ast_for_logging); - res = serializeAST(*ast_for_logging); - } - - // Wiping sensitive data before cropping query by log_queries_cut_to_length, - // otherwise something like credit card without last digit can go to log. - if (auto * masker = SensitiveDataMasker::getInstance()) - { - auto matches = masker->wipeSensitiveData(res); - if (matches > 0) - { - ProfileEvents::increment(ProfileEvents::QueryMaskingRulesMatch, matches); - } - } - - res = res.substr(0, context->getSettingsRef().log_queries_cut_to_length); - - return res; -} - - /// Log query into text log (not into system table). static void logQuery(const String & query, ContextPtr context, bool internal, QueryProcessingStage::Enum stage) { @@ -425,14 +392,14 @@ static std::tuple executeQueryImpl( /// MUST go before any modification (except for prepared statements, /// since it substitute parameters and without them query does not contain /// parameters), to keep query as-is in query_log and server log. - query_for_logging = prepareQueryForLogging(query, ast, context); + query_for_logging = maskSensitiveInfoInQueryForLogging(query, ast, context); } catch (...) { /// Anyway log the query. if (query.empty()) query.assign(begin, std::min(end - begin, static_cast(max_query_size))); - query_for_logging = prepareQueryForLogging(query, ast, context); + query_for_logging = maskSensitiveInfoInQueryForLogging(query, ast, context); logQuery(query_for_logging, context, internal, stage); @@ -537,7 +504,7 @@ static std::tuple executeQueryImpl( { /// processlist also has query masked now, to avoid secrets leaks though SHOW PROCESSLIST by other users. process_list_entry = context->getProcessList().insert(query_for_logging, ast.get(), context); - context->setProcessListElement(&process_list_entry->get()); + context->setProcessListElement(process_list_entry->getQueryStatus()); } /// Load external tables if they were provided @@ -549,15 +516,9 @@ static std::tuple executeQueryImpl( if (insert_query) { if (insert_query->table_id) - { insert_query->table_id = context->resolveStorageID(insert_query->table_id); - LOG_DEBUG(&Poco::Logger::get("executeQuery"), "2) database: {}", insert_query->table_id.getDatabaseName()); - } else if (auto table = insert_query->getTable(); !table.empty()) - { insert_query->table_id = context->resolveStorageID(StorageID{insert_query->getDatabase(), table}); - LOG_DEBUG(&Poco::Logger::get("executeQuery"), "2) database: {}", insert_query->table_id.getDatabaseName()); - } } if (insert_query && insert_query->select) @@ -588,10 +549,28 @@ static std::tuple executeQueryImpl( std::shared_ptr quota; std::unique_ptr interpreter; + bool async_insert = false; auto * queue = context->getAsynchronousInsertQueue(); - const bool async_insert = queue - && insert_query && !insert_query->select - && insert_query->hasInlinedData() && settings.async_insert; + + if (insert_query && settings.async_insert) + { + String reason; + + if (!queue) + reason = "asynchronous insert queue is not configured"; + else if (insert_query->select) + reason = "insert query has select"; + else if (!insert_query->hasInlinedData()) + reason = "insert query doesn't have inlined data"; + else + async_insert = true; + + if (!async_insert) + { + LOG_DEBUG(&Poco::Logger::get("executeQuery"), + "Setting async_insert=1, but INSERT query will be executed synchronously (reason: {})", reason); + } + } if (async_insert) { @@ -701,9 +680,9 @@ static std::tuple executeQueryImpl( if (process_list_entry) { /// Query was killed before execution - if ((*process_list_entry)->isKilled()) - throw Exception("Query '" + (*process_list_entry)->getInfo().client_info.current_query_id + "' is killed in pending state", - ErrorCodes::QUERY_WAS_CANCELLED); + if (process_list_entry->getQueryStatus()->isKilled()) + throw Exception(ErrorCodes::QUERY_WAS_CANCELLED, + "Query '{}' is killed in pending state", process_list_entry->getQueryStatus()->getInfo().client_info.current_query_id); } /// Hold element of process list till end of query execution. @@ -847,7 +826,7 @@ static std::tuple executeQueryImpl( pulling_pipeline = pipeline.pulling(), query_span](QueryPipeline & query_pipeline) mutable { - QueryStatus * process_list_elem = context->getProcessListElement(); + QueryStatusPtr process_list_elem = context->getProcessListElement(); if (process_list_elem) { @@ -932,9 +911,10 @@ static std::tuple executeQueryImpl( processor_elem.processor_name = processor->getName(); - processor_elem.elapsed_us = processor->getElapsedUs(); - processor_elem.input_wait_elapsed_us = processor->getInputWaitElapsedUs(); - processor_elem.output_wait_elapsed_us = processor->getOutputWaitElapsedUs(); + /// NOTE: convert this to UInt64 + processor_elem.elapsed_us = static_cast(processor->getElapsedUs()); + processor_elem.input_wait_elapsed_us = static_cast(processor->getInputWaitElapsedUs()); + processor_elem.output_wait_elapsed_us = static_cast(processor->getOutputWaitElapsedUs()); auto stats = processor->getProcessorDataStats(); processor_elem.input_rows = stats.input_rows; @@ -1013,7 +993,7 @@ static std::tuple executeQueryImpl( elem.exception_code = getCurrentExceptionCode(); elem.exception = getCurrentExceptionMessage(false); - QueryStatus * process_list_elem = context->getProcessListElement(); + QueryStatusPtr process_list_elem = context->getProcessListElement(); const Settings & current_settings = context->getSettingsRef(); /// Update performance counters before logging to query_log diff --git a/src/Interpreters/getHeaderForProcessingStage.cpp b/src/Interpreters/getHeaderForProcessingStage.cpp index e16647091ba..48acfb5512a 100644 --- a/src/Interpreters/getHeaderForProcessingStage.cpp +++ b/src/Interpreters/getHeaderForProcessingStage.cpp @@ -14,6 +14,7 @@ namespace DB namespace ErrorCodes { extern const int LOGICAL_ERROR; + extern const int UNSUPPORTED_METHOD; } bool hasJoin(const ASTSelectQuery & select) @@ -118,6 +119,10 @@ Block getHeaderForProcessingStage( case QueryProcessingStage::WithMergeableStateAfterAggregationAndLimit: case QueryProcessingStage::MAX: { + /// TODO: Analyzer syntax analyzer result + if (!query_info.syntax_analyzer_result) + throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "getHeaderForProcessingStage is unsupported"); + auto query = query_info.query->clone(); TreeRewriterResult new_rewriter_result = *query_info.syntax_analyzer_result; removeJoin(*query->as(), new_rewriter_result, context); diff --git a/src/Interpreters/maskSensitiveInfoInQueryForLogging.cpp b/src/Interpreters/maskSensitiveInfoInQueryForLogging.cpp new file mode 100644 index 00000000000..c69f91394b9 --- /dev/null +++ b/src/Interpreters/maskSensitiveInfoInQueryForLogging.cpp @@ -0,0 +1,623 @@ +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace ProfileEvents +{ + extern const Event QueryMaskingRulesMatch; +} + + +namespace DB +{ + +namespace +{ + enum class PasswordWipingMode + { + Query, + BackupName, + }; + + + template + class PasswordWipingVisitor + { + public: + struct Data + { + bool can_contain_password = false; + bool password_was_hidden = false; + bool is_create_table_query = false; + bool is_create_database_query = false; + bool is_create_dictionary_query = false; + ContextPtr context; + PasswordWipingMode mode = PasswordWipingMode::Query; + }; + + using Visitor = std::conditional_t< + check_only, + ConstInDepthNodeVisitor, + InDepthNodeVisitor>; + + static bool needChildVisit(const ASTPtr & /* ast */, const ASTPtr & /* child */, Data & data) + { + if constexpr (check_only) + { + return !data.can_contain_password; + } + else + { + return true; + } + } + + static void visit(ASTPtr ast, Data & data) + { + if (auto * create_user_query = ast->as()) + { + visitCreateUserQuery(*create_user_query, data); + } + else if (auto * create_query = ast->as()) + { + visitCreateQuery(*create_query, data); + } + else if (auto * backup_query = ast->as()) + { + visitBackupQuery(*backup_query, data); + } + else if (auto * storage = ast->as()) + { + if (data.is_create_table_query) + visitTableEngine(*storage, data); + else if (data.is_create_database_query) + visitDatabaseEngine(*storage, data); + } + else if (auto * dictionary = ast->as()) + { + if (data.is_create_dictionary_query) + visitDictionaryDef(*dictionary, data); + } + else if (auto * function = ast->as()) + { + if (data.mode == PasswordWipingMode::BackupName) + wipePasswordFromBackupEngineArguments(*function, data); + else + visitFunction(*function, data); + } + } + + private: + static void visitCreateUserQuery(ASTCreateUserQuery & query, Data & data) + { + if (!query.auth_data) + return; + + auto auth_type = query.auth_data->getType(); + if (auth_type == AuthenticationType::NO_PASSWORD || auth_type == AuthenticationType::LDAP + || auth_type == AuthenticationType::KERBEROS || auth_type == AuthenticationType::SSL_CERTIFICATE) + return; /// No password, nothing to hide. + + if constexpr (check_only) + { + data.can_contain_password = true; + return; + } + + query.show_password = false; + data.password_was_hidden = true; + } + + static void visitCreateQuery(ASTCreateQuery & query, Data & data) + { + if (query.is_dictionary) + data.is_create_dictionary_query = true; + else if (query.table) + data.is_create_table_query = true; + else if (query.database) + data.is_create_database_query = true; + } + + static void visitTableEngine(ASTStorage & storage, Data & data) + { + if (!storage.engine) + return; + + const String & engine_name = storage.engine->name; + + if (engine_name == "ExternalDistributed") + { + /// ExternalDistributed('engine', 'host:port', 'database', 'table', 'user', 'password') + wipePasswordFromArgument(*storage.engine, data, 5); + } + else if (engine_name == "MySQL") + { + /// MySQL('host:port', 'database', 'table', 'user', 'password', ...) + wipePasswordFromArgument(*storage.engine, data, 4); + } + else if (engine_name == "PostgreSQL") + { + /// PostgreSQL('host:port', 'database', 'table', 'user', 'password', ...) + wipePasswordFromArgument(*storage.engine, data, 4); + } + else if (engine_name == "MaterializedPostgreSQL") + { + /// MaterializedPostgreSQL('host:port', 'database', 'table', 'user', 'password', ...) + wipePasswordFromArgument(*storage.engine, data, 4); + } + else if (engine_name == "MongoDB") + { + /// MongoDB('host:port', 'database', 'collection', 'user', 'password', ...) + wipePasswordFromArgument(*storage.engine, data, 4); + } + else if (engine_name == "S3" || engine_name == "COSN") + { + /// S3('url', ['aws_access_key_id', 'aws_secret_access_key',] ...) + wipePasswordFromS3TableEngineArguments(*storage.engine, data); + } + } + + static void wipePasswordFromS3TableEngineArguments(ASTFunction & engine, Data & data) + { + /// We replace 'aws_secret_access_key' with '[HIDDEN'] for the following signatures: + /// S3('url', 'aws_access_key_id', 'aws_secret_access_key', 'format') + /// S3('url', 'aws_access_key_id', 'aws_secret_access_key', 'format', 'compression') + + /// But we should check the number of arguments first because we don't need to do that replacements in case of + /// S3('url' [, 'format' [, 'compression']]) + size_t num_arguments; + if (!tryGetNumArguments(engine, &num_arguments) || (num_arguments < 4)) + return; + + wipePasswordFromArgument(engine, data, 2); + } + + static void visitDatabaseEngine(ASTStorage & storage, Data & data) + { + if (!storage.engine) + return; + + const String & engine_name = storage.engine->name; + + if (engine_name == "MySQL" || engine_name == "MaterializeMySQL" || engine_name == "MaterializedMySQL") + { + /// MySQL('host:port', 'database', 'user', 'password') + wipePasswordFromArgument(*storage.engine, data, 3); + } + else if (engine_name == "PostgreSQL" || engine_name == "MaterializedPostgreSQL") + { + /// PostgreSQL('host:port', 'database', 'user', 'password', ...) + wipePasswordFromArgument(*storage.engine, data, 3); + } + } + + static void visitFunction(ASTFunction & function, Data & data) + { + if (function.name == "mysql") + { + /// mysql('host:port', 'database', 'table', 'user', 'password', ...) + wipePasswordFromArgument(function, data, 4); + } + else if (function.name == "postgresql") + { + /// postgresql('host:port', 'database', 'table', 'user', 'password', ...) + wipePasswordFromArgument(function, data, 4); + } + else if (function.name == "mongodb") + { + /// mongodb('host:port', 'database', 'collection', 'user', 'password', ...) + wipePasswordFromArgument(function, data, 4); + } + else if (function.name == "s3" || function.name == "cosn") + { + /// s3('url', 'aws_access_key_id', 'aws_secret_access_key', ...) + wipePasswordFromS3FunctionArguments(function, data, /* is_cluster_function= */ false); + } + else if (function.name == "s3Cluster") + { + /// s3Cluster('cluster_name', 'url', 'aws_access_key_id', 'aws_secret_access_key', ...) + wipePasswordFromS3FunctionArguments(function, data, /* is_cluster_function= */ true); + } + else if (function.name == "remote" || function.name == "remoteSecure") + { + /// remote('addresses_expr', 'db', 'table', 'user', 'password', ...) + wipePasswordFromRemoteFunctionArguments(function, data); + } + else if ( + function.name == "encrypt" || function.name == "decrypt" || function.name == "aes_encrypt_mysql" + || function.name == "aes_decrypt_mysql" || function.name == "tryDecrypt") + { + /// encrypt('mode', 'plaintext', 'key' [, iv, aad]) + wipePasswordFromEncryptionFunctionArguments(function, data); + } + } + + static void wipePasswordFromS3FunctionArguments(ASTFunction & function, Data & data, bool is_cluster_function) + { + /// s3Cluster('cluster_name', 'url', ...) has 'url' as its second argument. + size_t url_arg_idx = is_cluster_function ? 1 : 0; + + /// We're going to replace 'aws_secret_access_key' with '[HIDDEN'] for the following signatures: + /// s3('url', 'aws_access_key_id', 'aws_secret_access_key', ...) + /// s3Cluster('cluster_name', 'url', 'aws_access_key_id', 'aws_secret_access_key', 'format', 'compression') + + /// But we should check the number of arguments first because we don't need to do any replacements in case of + /// s3('url' [, 'format']) or s3Cluster('cluster_name', 'url' [, 'format']) + size_t num_arguments; + if (!tryGetNumArguments(function, &num_arguments) || (num_arguments < url_arg_idx + 3)) + return; + + if (num_arguments >= url_arg_idx + 5) + { + /// s3('url', 'aws_access_key_id', 'aws_secret_access_key', 'format', 'structure', ...) + wipePasswordFromArgument(function, data, url_arg_idx + 2); + } + else + { + /// s3('url', 'aws_access_key_id', 'aws_secret_access_key', ...) + /// We need to distinguish that from s3('url', 'format', 'structure' [, 'compression_method']). + /// So we will check whether the argument after 'url' is a format. + String format; + if (!tryGetEvaluatedConstStringFromArgument(function, data, url_arg_idx + 1, &format)) + return; + + if (FormatFactory::instance().getAllFormats().contains(format)) + return; /// The argument after 'url' is a format: s3('url', 'format', ...) + + /// The argument after 'url' is not a format so we do our replacement: + /// s3('url', 'aws_access_key_id', 'aws_secret_access_key', ...) -> s3('url', 'aws_access_key_id', '[HIDDEN]', ...) + wipePasswordFromArgument(function, data, url_arg_idx + 2); + } + } + + static void wipePasswordFromRemoteFunctionArguments(ASTFunction & function, Data & data) + { + /// We're going to replace 'password' with '[HIDDEN'] for the following signatures: + /// remote('addresses_expr', db.table, 'user' [, 'password'] [, sharding_key]) + /// remote('addresses_expr', 'db', 'table', 'user' [, 'password'] [, sharding_key]) + /// remote('addresses_expr', table_function(), 'user' [, 'password'] [, sharding_key]) + + /// But we should check the number of arguments first because we don't need to do any replacements in case of + /// remote('addresses_expr', db.table) + size_t num_arguments; + if (!tryGetNumArguments(function, &num_arguments) || (num_arguments < 3)) + return; + + auto & arguments = assert_cast(*function.arguments).children; + size_t arg_num = 1; + + /// Skip 1 or 2 arguments with table_function() or db.table or 'db', 'table'. + const auto * table_function = arguments[arg_num]->as(); + if (table_function && TableFunctionFactory::instance().isTableFunctionName(table_function->name)) + { + ++arg_num; + } + else + { + String database; + if (!tryGetEvaluatedConstDatabaseNameFromArgument(function, data, arg_num, &database)) + return; + ++arg_num; + + auto qualified_name = QualifiedTableName::parseFromString(database); + if (qualified_name.database.empty()) + ++arg_num; /// skip 'table' argument + } + + /// Check if username and password are specified + /// (sharding_key can be of any type so while we're getting string literals they're username & password). + String username, password; + bool username_specified = tryGetStringFromArgument(function, arg_num, &username); + bool password_specified = username_specified && tryGetStringFromArgument(function, arg_num + 1, &password); + + if (password_specified) + { + /// Password is specified so we do our replacement: + /// remote('addresses_expr', db.table, 'user', 'password', ...) -> remote('addresses_expr', db.table, 'user', '[HIDDEN]', ...) + wipePasswordFromArgument(function, data, arg_num + 1); + } + } + + static void wipePasswordFromEncryptionFunctionArguments(ASTFunction & function, Data & data) + { + /// We replace all arguments after 'mode' with '[HIDDEN]': + /// encrypt('mode', 'plaintext', 'key' [, iv, aad]) -> encrypt('mode', '[HIDDEN]') + wipePasswordFromArgument(function, data, 1); + removeArgumentsAfter(function, data, 2); + } + + static void visitBackupQuery(ASTBackupQuery & query, Data & data) + { + if (query.backup_name) + { + if (auto * backup_engine = query.backup_name->as()) + wipePasswordFromBackupEngineArguments(*backup_engine, data); + } + + if (query.base_backup_name) + { + if (auto * base_backup_engine = query.base_backup_name->as()) + wipePasswordFromBackupEngineArguments(*base_backup_engine, data); + } + } + + static void wipePasswordFromBackupEngineArguments(ASTFunction & engine, Data & data) + { + if (engine.name == "S3") + { + /// BACKUP ... TO S3(url, [aws_access_key_id, aws_secret_access_key]) + wipePasswordFromArgument(engine, data, 2); + } + } + + static void wipePasswordFromArgument(ASTFunction & function, Data & data, size_t arg_idx) + { + if (!function.arguments) + return; + + auto * expr_list = function.arguments->as(); + if (!expr_list) + return; /// return because we don't want to validate query here + + auto & arguments = expr_list->children; + if (arg_idx >= arguments.size()) + return; + + if constexpr (check_only) + { + data.can_contain_password = true; + return; + } + + arguments[arg_idx] = std::make_shared("[HIDDEN]"); + data.password_was_hidden = true; + } + + static void removeArgumentsAfter(ASTFunction & function, Data & data, size_t new_num_arguments) + { + if (!function.arguments) + return; + + auto * expr_list = function.arguments->as(); + if (!expr_list) + return; /// return because we don't want to validate query here + + auto & arguments = expr_list->children; + if (new_num_arguments >= arguments.size()) + return; + + if constexpr (check_only) + { + data.can_contain_password = true; + return; + } + + arguments.resize(new_num_arguments); + data.password_was_hidden = true; + } + + static bool tryGetNumArguments(const ASTFunction & function, size_t * num_arguments) + { + if (!function.arguments) + return false; + + auto * expr_list = function.arguments->as(); + if (!expr_list) + return false; /// return false because we don't want to validate query here + + const auto & arguments = expr_list->children; + *num_arguments = arguments.size(); + return true; + } + + static bool tryGetStringFromArgument(const ASTFunction & function, size_t arg_idx, String * value) + { + if (!function.arguments) + return false; + + const auto * expr_list = function.arguments->as(); + if (!expr_list) + return false; /// return false because we don't want to validate query here + + const auto & arguments = expr_list->children; + if (arg_idx >= arguments.size()) + return false; + + const auto * literal = arguments[arg_idx]->as(); + if (!literal || literal->value.getType() != Field::Types::String) + return false; + + *value = literal->value.safeGet(); + return true; + } + + static bool tryGetEvaluatedConstStringFromArgument(const ASTFunction & function, Data & data, size_t arg_idx, String * value) + { + if (!function.arguments) + return false; + + const auto * expr_list = function.arguments->as(); + if (!expr_list) + return false; /// return false because we don't want to validate query here + + const auto & arguments = expr_list->children; + if (arg_idx >= arguments.size()) + return false; + + if constexpr (check_only) + { + data.can_contain_password = true; + return false; + } + + ASTPtr argument = arguments[arg_idx]; + try + { + argument = evaluateConstantExpressionOrIdentifierAsLiteral(argument, data.context); + } + catch (...) + { + return false; + } + + const auto & literal = assert_cast(*argument); + if (literal.value.getType() != Field::Types::String) + return false; + + *value = literal.value.safeGet(); + return true; + } + + static bool tryGetEvaluatedConstDatabaseNameFromArgument(const ASTFunction & function, Data & data, size_t arg_idx, String * value) + { + if (!function.arguments) + return false; + + const auto * expr_list = function.arguments->as(); + if (!expr_list) + return false; /// return false because we don't want to validate query here + + const auto & arguments = expr_list->children; + if (arg_idx >= arguments.size()) + return false; + + if constexpr (check_only) + { + data.can_contain_password = true; + return false; + } + + ASTPtr argument = arguments[arg_idx]; + try + { + argument = evaluateConstantExpressionForDatabaseName(argument, data.context); + } + catch (...) + { + return false; + } + + const auto & literal = assert_cast(*argument); + if (literal.value.getType() != Field::Types::String) + return false; + + *value = literal.value.safeGet(); + return true; + } + + static void visitDictionaryDef(ASTDictionary & dictionary, Data & data) + { + if (!dictionary.source || !dictionary.source->elements) + return; + + const auto * expr_list = dictionary.source->elements->as(); + if (!expr_list) + return; /// return because we don't want to validate query here + + const auto & elements = expr_list->children; + + /// We replace password in the dictionary's definition: + /// SOURCE(CLICKHOUSE(host 'example01-01-1' port 9000 user 'default' password 'qwe123' db 'default' table 'ids')) -> + /// SOURCE(CLICKHOUSE(host 'example01-01-1' port 9000 user 'default' password '[HIDDEN]' db 'default' table 'ids')) + for (const auto & element : elements) + { + auto * pair = element->as(); + if (!pair) + continue; /// just skip because we don't want to validate query here + + if (pair->first == "password") + { + if constexpr (check_only) + { + data.can_contain_password = true; + return; + } + pair->set(pair->second, std::make_shared("[HIDDEN]")); + data.password_was_hidden = true; + } + } + } + }; + + /// Checks the type of a specified AST and returns true if it can contain a password. + bool canContainPassword(const IAST & ast, PasswordWipingMode mode) + { + using WipingVisitor = PasswordWipingVisitor; + WipingVisitor::Data data; + data.mode = mode; + WipingVisitor::Visitor visitor{data}; + ASTPtr ast_ptr = std::const_pointer_cast(ast.shared_from_this()); + visitor.visit(ast_ptr); + return data.can_contain_password; + } + + /// Removes a password or its hash from a query if it's specified there or replaces it with some placeholder. + /// This function is used to prepare a query for storing in logs (we don't want logs to contain sensitive information). + bool wipePasswordFromQuery(ASTPtr ast, PasswordWipingMode mode, const ContextPtr & context) + { + using WipingVisitor = PasswordWipingVisitor; + WipingVisitor::Data data; + data.context = context; + data.mode = mode; + WipingVisitor::Visitor visitor{data}; + visitor.visit(ast); + return data.password_was_hidden; + } + + /// Common utility for masking sensitive information. + String maskSensitiveInfoImpl(const String & query, const ASTPtr & parsed_query, PasswordWipingMode mode, const ContextPtr & context) + { + String res = query; + + // Wiping a password or hash from the query because we don't want it to go to logs. + if (parsed_query && canContainPassword(*parsed_query, mode)) + { + ASTPtr ast_without_password = parsed_query->clone(); + if (wipePasswordFromQuery(ast_without_password, mode, context)) + res = serializeAST(*ast_without_password); + } + + // Wiping sensitive data before cropping query by log_queries_cut_to_length, + // otherwise something like credit card without last digit can go to log. + if (auto * masker = SensitiveDataMasker::getInstance()) + { + auto matches = masker->wipeSensitiveData(res); + if (matches > 0) + { + ProfileEvents::increment(ProfileEvents::QueryMaskingRulesMatch, matches); + } + } + + res = res.substr(0, context->getSettingsRef().log_queries_cut_to_length); + + return res; + } +} + + +String maskSensitiveInfoInQueryForLogging(const String & query, const ASTPtr & parsed_query, const ContextPtr & context) +{ + return maskSensitiveInfoImpl(query, parsed_query, PasswordWipingMode::Query, context); +} + + +String maskSensitiveInfoInBackupNameForLogging(const String & backup_name, const ASTPtr & ast, const ContextPtr & context) +{ + return maskSensitiveInfoImpl(backup_name, ast, PasswordWipingMode::BackupName, context); +} + +} diff --git a/src/Interpreters/maskSensitiveInfoInQueryForLogging.h b/src/Interpreters/maskSensitiveInfoInQueryForLogging.h new file mode 100644 index 00000000000..3892f89bc52 --- /dev/null +++ b/src/Interpreters/maskSensitiveInfoInQueryForLogging.h @@ -0,0 +1,19 @@ +#pragma once + +#include +#include + + +namespace DB +{ + +/// Makes a version of a query without sensitive information (e.g. passwords) for logging. +/// The parameter `parsed query` is allowed to be nullptr if the query cannot be parsed. +/// Does not validate AST, works a best-effort way. +String maskSensitiveInfoInQueryForLogging(const String & query, const ASTPtr & parsed_query, const ContextPtr & context); + +/// Makes a version of backup name without sensitive information (e.g. passwords) for logging. +/// Does not validate AST, works a best-effort way. +String maskSensitiveInfoInBackupNameForLogging(const String & backup_name, const ASTPtr & ast, const ContextPtr & context); + +} diff --git a/src/Parsers/ASTAlterQuery.cpp b/src/Parsers/ASTAlterQuery.cpp index 2d8193871b0..959fc55c945 100644 --- a/src/Parsers/ASTAlterQuery.cpp +++ b/src/Parsers/ASTAlterQuery.cpp @@ -378,7 +378,7 @@ void ASTAlterCommand::formatImpl(const FormatSettings & settings, FormatState & } else if (type == ASTAlterCommand::FREEZE_ALL) { - settings.ostr << (settings.hilite ? hilite_keyword : "") << "FREEZE"; + settings.ostr << (settings.hilite ? hilite_keyword : "") << "FREEZE" << (settings.hilite ? hilite_none : ""); if (!with_name.empty()) { @@ -399,7 +399,7 @@ void ASTAlterCommand::formatImpl(const FormatSettings & settings, FormatState & } else if (type == ASTAlterCommand::UNFREEZE_ALL) { - settings.ostr << (settings.hilite ? hilite_keyword : "") << "UNFREEZE"; + settings.ostr << (settings.hilite ? hilite_keyword : "") << "UNFREEZE" << (settings.hilite ? hilite_none : ""); if (!with_name.empty()) { diff --git a/src/Parsers/ASTBackupQuery.cpp b/src/Parsers/ASTBackupQuery.cpp index 4af95b96ee3..567b52b5669 100644 --- a/src/Parsers/ASTBackupQuery.cpp +++ b/src/Parsers/ASTBackupQuery.cpp @@ -245,7 +245,21 @@ String ASTBackupQuery::getID(char) const ASTPtr ASTBackupQuery::clone() const { - return std::make_shared(*this); + auto res = std::make_shared(*this); + + if (backup_name) + res->backup_name = backup_name->clone(); + + if (base_backup_name) + res->base_backup_name = base_backup_name->clone(); + + if (cluster_host_ids) + res->cluster_host_ids = cluster_host_ids->clone(); + + if (settings) + res->settings = settings->clone(); + + return res; } diff --git a/src/Parsers/ASTColumnsMatcher.cpp b/src/Parsers/ASTColumnsMatcher.cpp index 8f167f99b37..0fc6847de68 100644 --- a/src/Parsers/ASTColumnsMatcher.cpp +++ b/src/Parsers/ASTColumnsMatcher.cpp @@ -60,6 +60,11 @@ void ASTColumnsRegexpMatcher::setPattern(String pattern) DB::ErrorCodes::CANNOT_COMPILE_REGEXP); } +const std::shared_ptr & ASTColumnsRegexpMatcher::getMatcher() const +{ + return column_matcher; +} + bool ASTColumnsRegexpMatcher::isColumnMatching(const String & column_name) const { return RE2::PartialMatch(column_name, *column_matcher); @@ -114,4 +119,128 @@ void ASTColumnsListMatcher::formatImpl(const FormatSettings & settings, FormatSt } } +ASTPtr ASTQualifiedColumnsRegexpMatcher::clone() const +{ + auto clone = std::make_shared(*this); + clone->cloneChildren(); + return clone; +} + +void ASTQualifiedColumnsRegexpMatcher::appendColumnName(WriteBuffer & ostr) const +{ + const auto & qualifier = children.at(0); + qualifier->appendColumnName(ostr); + writeCString(".COLUMNS(", ostr); + writeQuotedString(original_pattern, ostr); + writeChar(')', ostr); +} + +void ASTQualifiedColumnsRegexpMatcher::setPattern(String pattern) +{ + original_pattern = std::move(pattern); + column_matcher = std::make_shared(original_pattern, RE2::Quiet); + if (!column_matcher->ok()) + throw DB::Exception( + "COLUMNS pattern " + original_pattern + " cannot be compiled: " + column_matcher->error(), + DB::ErrorCodes::CANNOT_COMPILE_REGEXP); +} + +void ASTQualifiedColumnsRegexpMatcher::setMatcher(std::shared_ptr matcher) +{ + column_matcher = std::move(matcher); +} + +const std::shared_ptr & ASTQualifiedColumnsRegexpMatcher::getMatcher() const +{ + return column_matcher; +} + +void ASTQualifiedColumnsRegexpMatcher::updateTreeHashImpl(SipHash & hash_state) const +{ + hash_state.update(original_pattern.size()); + hash_state.update(original_pattern); + IAST::updateTreeHashImpl(hash_state); +} + +void ASTQualifiedColumnsRegexpMatcher::formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const +{ + settings.ostr << (settings.hilite ? hilite_keyword : ""); + + const auto & qualifier = children.at(0); + qualifier->formatImpl(settings, state, frame); + + settings.ostr << ".COLUMNS" << (settings.hilite ? hilite_none : "") << "("; + settings.ostr << quoteString(original_pattern); + settings.ostr << ")"; + + /// Format column transformers + size_t children_size = children.size(); + + for (size_t i = 1; i < children_size; ++i) + { + const auto & child = children[i]; + settings.ostr << ' '; + child->formatImpl(settings, state, frame); + } +} + +ASTPtr ASTQualifiedColumnsListMatcher::clone() const +{ + auto clone = std::make_shared(*this); + clone->column_list = column_list->clone(); + clone->cloneChildren(); + return clone; +} + +void ASTQualifiedColumnsListMatcher::appendColumnName(WriteBuffer & ostr) const +{ + const auto & qualifier = children.at(0); + qualifier->appendColumnName(ostr); + writeCString(".COLUMNS(", ostr); + + for (auto it = column_list->children.begin(); it != column_list->children.end(); ++it) + { + if (it != column_list->children.begin()) + writeCString(", ", ostr); + + (*it)->appendColumnName(ostr); + } + writeChar(')', ostr); +} + +void ASTQualifiedColumnsListMatcher::updateTreeHashImpl(SipHash & hash_state) const +{ + column_list->updateTreeHash(hash_state); + IAST::updateTreeHashImpl(hash_state); +} + +void ASTQualifiedColumnsListMatcher::formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const +{ + settings.ostr << (settings.hilite ? hilite_keyword : ""); + + const auto & qualifier = children.at(0); + qualifier->formatImpl(settings, state, frame); + + settings.ostr << ".COLUMNS" << (settings.hilite ? hilite_none : "") << "("; + + for (ASTs::const_iterator it = column_list->children.begin(); it != column_list->children.end(); ++it) + { + if (it != column_list->children.begin()) + settings.ostr << ", "; + + (*it)->formatImpl(settings, state, frame); + } + settings.ostr << ")"; + + /// Format column transformers + size_t children_size = children.size(); + + for (size_t i = 1; i < children_size; ++i) + { + const auto & child = children[i]; + settings.ostr << ' '; + child->formatImpl(settings, state, frame); + } +} + } diff --git a/src/Parsers/ASTColumnsMatcher.h b/src/Parsers/ASTColumnsMatcher.h index 5aaf3cbe30d..7ce246608b9 100644 --- a/src/Parsers/ASTColumnsMatcher.h +++ b/src/Parsers/ASTColumnsMatcher.h @@ -24,6 +24,7 @@ public: void appendColumnName(WriteBuffer & ostr) const override; void setPattern(String pattern); + const std::shared_ptr & getMatcher() const; bool isColumnMatching(const String & column_name) const; void updateTreeHashImpl(SipHash & hash_state) const override; @@ -49,5 +50,39 @@ protected: void formatImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const override; }; +/// Same as ASTColumnsRegexpMatcher. Qualified identifier is first child. +class ASTQualifiedColumnsRegexpMatcher : public IAST +{ +public: + String getID(char) const override { return "QualifiedColumnsRegexpMatcher"; } + ASTPtr clone() const override; + + void appendColumnName(WriteBuffer & ostr) const override; + const std::shared_ptr & getMatcher() const; + void setPattern(String pattern); + void setMatcher(std::shared_ptr matcher); + void updateTreeHashImpl(SipHash & hash_state) const override; + +protected: + void formatImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const override; + +private: + std::shared_ptr column_matcher; + String original_pattern; +}; + +/// Same as ASTColumnsListMatcher. Qualified identifier is first child. +class ASTQualifiedColumnsListMatcher : public IAST +{ +public: + String getID(char) const override { return "QualifiedColumnsListMatcher"; } + ASTPtr clone() const override; + void appendColumnName(WriteBuffer & ostr) const override; + void updateTreeHashImpl(SipHash & hash_state) const override; + + ASTPtr column_list; +protected: + void formatImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const override; +}; } diff --git a/src/Parsers/ASTColumnsTransformers.cpp b/src/Parsers/ASTColumnsTransformers.cpp index 71207724a89..118c22b463f 100644 --- a/src/Parsers/ASTColumnsTransformers.cpp +++ b/src/Parsers/ASTColumnsTransformers.cpp @@ -270,6 +270,11 @@ void ASTColumnsExceptTransformer::setPattern(String pattern) DB::ErrorCodes::CANNOT_COMPILE_REGEXP); } +const std::shared_ptr & ASTColumnsExceptTransformer::getMatcher() const +{ + return column_matcher; +} + bool ASTColumnsExceptTransformer::isColumnMatching(const String & column_name) const { return RE2::PartialMatch(column_name, *column_matcher); diff --git a/src/Parsers/ASTColumnsTransformers.h b/src/Parsers/ASTColumnsTransformers.h index 0f16f6b93e7..5179726e8cb 100644 --- a/src/Parsers/ASTColumnsTransformers.h +++ b/src/Parsers/ASTColumnsTransformers.h @@ -60,6 +60,7 @@ public: } void transform(ASTs & nodes) const override; void setPattern(String pattern); + const std::shared_ptr & getMatcher() const; bool isColumnMatching(const String & column_name) const; void appendColumnName(WriteBuffer & ostr) const override; void updateTreeHashImpl(SipHash & hash_state) const override; diff --git a/src/Parsers/ASTCreateQuery.cpp b/src/Parsers/ASTCreateQuery.cpp index f8853d21178..d7dc4e217b7 100644 --- a/src/Parsers/ASTCreateQuery.cpp +++ b/src/Parsers/ASTCreateQuery.cpp @@ -210,6 +210,8 @@ ASTPtr ASTCreateQuery::clone() const res->set(res->dictionary, dictionary->clone()); } + if (as_table_function) + res->set(res->as_table_function, as_table_function->clone()); if (comment) res->set(res->comment, comment->clone()); diff --git a/src/Parsers/ASTCreateQuery.h b/src/Parsers/ASTCreateQuery.h index de0f187f0e2..41083c688ad 100644 --- a/src/Parsers/ASTCreateQuery.h +++ b/src/Parsers/ASTCreateQuery.h @@ -83,7 +83,7 @@ public: ASTPtr lateness_function; String as_database; String as_table; - ASTPtr as_table_function; + IAST * as_table_function = nullptr; ASTSelectWithUnionQuery * select = nullptr; IAST * comment = nullptr; diff --git a/src/Parsers/ASTExplainQuery.h b/src/Parsers/ASTExplainQuery.h index ea9ccf5a4f4..156ffdeacb9 100644 --- a/src/Parsers/ASTExplainQuery.h +++ b/src/Parsers/ASTExplainQuery.h @@ -15,6 +15,7 @@ public: { ParsedAST, /// 'EXPLAIN AST SELECT ...' AnalyzedSyntax, /// 'EXPLAIN SYNTAX SELECT ...' + QueryTree, /// 'EXPLAIN QUERY TREE SELECT ...' QueryPlan, /// 'EXPLAIN SELECT ...' QueryPipeline, /// 'EXPLAIN PIPELINE ...' QueryEstimates, /// 'EXPLAIN ESTIMATE ...' @@ -109,6 +110,7 @@ private: { case ParsedAST: return "EXPLAIN AST"; case AnalyzedSyntax: return "EXPLAIN SYNTAX"; + case QueryTree: return "EXPLAIN QUERY TREE"; case QueryPlan: return "EXPLAIN"; case QueryPipeline: return "EXPLAIN PIPELINE"; case QueryEstimates: return "EXPLAIN ESTIMATE"; diff --git a/src/Parsers/ASTFunction.h b/src/Parsers/ASTFunction.h index 6d5089f802e..5756fb9ba86 100644 --- a/src/Parsers/ASTFunction.h +++ b/src/Parsers/ASTFunction.h @@ -24,6 +24,8 @@ public: bool compute_after_window_functions = false; + bool is_lambda_function = false; + // We have to make these fields ASTPtr because this is what the visitors // expect. Some of them take const ASTPtr & (makes no sense), and some // take ASTPtr & and modify it. I don't understand how the latter is diff --git a/src/Parsers/ASTIdentifier.h b/src/Parsers/ASTIdentifier.h index 14e2fcef39d..c9712d578e0 100644 --- a/src/Parsers/ASTIdentifier.h +++ b/src/Parsers/ASTIdentifier.h @@ -49,9 +49,10 @@ public: void restoreTable(); // TODO(ilezhankin): get rid of this std::shared_ptr createTable() const; // returns |nullptr| if identifier is not table. -protected: String full_name; std::vector name_parts; + +protected: std::shared_ptr semantic; /// pimpl void formatImplWithoutAlias(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override; diff --git a/src/Parsers/ASTSampleRatio.h b/src/Parsers/ASTSampleRatio.h index a3e70b7dab7..220f938335b 100644 --- a/src/Parsers/ASTSampleRatio.h +++ b/src/Parsers/ASTSampleRatio.h @@ -34,4 +34,14 @@ public: void formatImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const override; }; +inline bool operator==(const ASTSampleRatio::Rational & lhs, const ASTSampleRatio::Rational & rhs) +{ + return lhs.numerator == rhs.numerator && lhs.denominator == rhs.denominator; +} + +inline bool operator!=(const ASTSampleRatio::Rational & lhs, const ASTSampleRatio::Rational & rhs) +{ + return !(lhs == rhs); +} + } diff --git a/src/Parsers/ASTTTLElement.cpp b/src/Parsers/ASTTTLElement.cpp index 90278e27c0c..86dd85e0eb8 100644 --- a/src/Parsers/ASTTTLElement.cpp +++ b/src/Parsers/ASTTTLElement.cpp @@ -93,7 +93,7 @@ void ASTTTLElement::setExpression(int & pos, ASTPtr && ast) { if (pos == -1) { - pos = children.size(); + pos = static_cast(children.size()); children.emplace_back(ast); } else diff --git a/src/Parsers/Access/ASTCreateQuotaQuery.cpp b/src/Parsers/Access/ASTCreateQuotaQuery.cpp index 0bb6872e3af..56abedf5235 100644 --- a/src/Parsers/Access/ASTCreateQuotaQuery.cpp +++ b/src/Parsers/Access/ASTCreateQuotaQuery.cpp @@ -141,7 +141,12 @@ String ASTCreateQuotaQuery::getID(char) const ASTPtr ASTCreateQuotaQuery::clone() const { - return std::make_shared(*this); + auto res = std::make_shared(*this); + + if (roles) + res->roles = std::static_pointer_cast(roles->clone()); + + return res; } diff --git a/src/Parsers/Access/ASTCreateRoleQuery.cpp b/src/Parsers/Access/ASTCreateRoleQuery.cpp index 29e78d710cf..d624b9a9157 100644 --- a/src/Parsers/Access/ASTCreateRoleQuery.cpp +++ b/src/Parsers/Access/ASTCreateRoleQuery.cpp @@ -42,7 +42,12 @@ String ASTCreateRoleQuery::getID(char) const ASTPtr ASTCreateRoleQuery::clone() const { - return std::make_shared(*this); + auto res = std::make_shared(*this); + + if (settings) + res->settings = std::static_pointer_cast(settings->clone()); + + return res; } diff --git a/src/Parsers/Access/ASTCreateRowPolicyQuery.cpp b/src/Parsers/Access/ASTCreateRowPolicyQuery.cpp index d968fdd3250..ca888be2cfe 100644 --- a/src/Parsers/Access/ASTCreateRowPolicyQuery.cpp +++ b/src/Parsers/Access/ASTCreateRowPolicyQuery.cpp @@ -124,7 +124,25 @@ String ASTCreateRowPolicyQuery::getID(char) const ASTPtr ASTCreateRowPolicyQuery::clone() const { - return std::make_shared(*this); + auto res = std::make_shared(*this); + + if (names) + res->names = std::static_pointer_cast(names->clone()); + + if (roles) + res->roles = std::static_pointer_cast(roles->clone()); + + /// `res->filters` is already initialized by the copy constructor of ASTCreateRowPolicyQuery (see the first line of this function). + /// But the copy constructor just copied the pointers inside `filters` instead of cloning. + /// We need to make a deep copy and not a shallow copy, so we have to manually clone each pointer in `res->filters`. + chassert(res->filters.size() == filters.size()); + for (auto & [_, res_filter] : res->filters) + { + if (res_filter) + res_filter = res_filter->clone(); + } + + return res; } diff --git a/src/Parsers/Access/ASTCreateSettingsProfileQuery.cpp b/src/Parsers/Access/ASTCreateSettingsProfileQuery.cpp index d9385e6be7b..56ddef433ef 100644 --- a/src/Parsers/Access/ASTCreateSettingsProfileQuery.cpp +++ b/src/Parsers/Access/ASTCreateSettingsProfileQuery.cpp @@ -49,7 +49,15 @@ String ASTCreateSettingsProfileQuery::getID(char) const ASTPtr ASTCreateSettingsProfileQuery::clone() const { - return std::make_shared(*this); + auto res = std::make_shared(*this); + + if (to_roles) + res->to_roles = std::static_pointer_cast(to_roles->clone()); + + if (settings) + res->settings = std::static_pointer_cast(settings->clone()); + + return res; } diff --git a/src/Parsers/Access/ASTCreateUserQuery.cpp b/src/Parsers/Access/ASTCreateUserQuery.cpp index 0f7d0810fba..b4eaf08856e 100644 --- a/src/Parsers/Access/ASTCreateUserQuery.cpp +++ b/src/Parsers/Access/ASTCreateUserQuery.cpp @@ -275,7 +275,24 @@ String ASTCreateUserQuery::getID(char) const ASTPtr ASTCreateUserQuery::clone() const { - return std::make_shared(*this); + auto res = std::make_shared(*this); + + if (names) + res->names = std::static_pointer_cast(names->clone()); + + if (default_roles) + res->default_roles = std::static_pointer_cast(default_roles->clone()); + + if (default_database) + res->default_database = std::static_pointer_cast(default_database->clone()); + + if (grantees) + res->grantees = std::static_pointer_cast(grantees->clone()); + + if (settings) + res->settings = std::static_pointer_cast(settings->clone()); + + return res; } diff --git a/src/Parsers/Access/ASTDropAccessEntityQuery.cpp b/src/Parsers/Access/ASTDropAccessEntityQuery.cpp index 22b30d47ffa..88f2d7bce63 100644 --- a/src/Parsers/Access/ASTDropAccessEntityQuery.cpp +++ b/src/Parsers/Access/ASTDropAccessEntityQuery.cpp @@ -29,7 +29,12 @@ String ASTDropAccessEntityQuery::getID(char) const ASTPtr ASTDropAccessEntityQuery::clone() const { - return std::make_shared(*this); + auto res = std::make_shared(*this); + + if (row_policy_names) + res->row_policy_names = std::static_pointer_cast(row_policy_names->clone()); + + return res; } diff --git a/src/Parsers/Access/ASTGrantQuery.cpp b/src/Parsers/Access/ASTGrantQuery.cpp index 99dc119087c..1d15fc272cf 100644 --- a/src/Parsers/Access/ASTGrantQuery.cpp +++ b/src/Parsers/Access/ASTGrantQuery.cpp @@ -96,7 +96,15 @@ String ASTGrantQuery::getID(char) const ASTPtr ASTGrantQuery::clone() const { - return std::make_shared(*this); + auto res = std::make_shared(*this); + + if (roles) + res->roles = std::static_pointer_cast(roles->clone()); + + if (grantees) + res->grantees = std::static_pointer_cast(grantees->clone()); + + return res; } diff --git a/src/Parsers/Access/ASTSetRoleQuery.cpp b/src/Parsers/Access/ASTSetRoleQuery.cpp index c886da1c8b5..c26a7f18661 100644 --- a/src/Parsers/Access/ASTSetRoleQuery.cpp +++ b/src/Parsers/Access/ASTSetRoleQuery.cpp @@ -14,7 +14,15 @@ String ASTSetRoleQuery::getID(char) const ASTPtr ASTSetRoleQuery::clone() const { - return std::make_shared(*this); + auto res = std::make_shared(*this); + + if (roles) + res->roles = std::static_pointer_cast(roles->clone()); + + if (to_users) + res->to_users = std::static_pointer_cast(to_users->clone()); + + return res; } diff --git a/src/Parsers/Access/ASTShowCreateAccessEntityQuery.cpp b/src/Parsers/Access/ASTShowCreateAccessEntityQuery.cpp index e92af22f14f..12eda260712 100644 --- a/src/Parsers/Access/ASTShowCreateAccessEntityQuery.cpp +++ b/src/Parsers/Access/ASTShowCreateAccessEntityQuery.cpp @@ -38,7 +38,12 @@ String ASTShowCreateAccessEntityQuery::getID(char) const ASTPtr ASTShowCreateAccessEntityQuery::clone() const { - return std::make_shared(*this); + auto res = std::make_shared(*this); + + if (row_policy_names) + res->row_policy_names = std::static_pointer_cast(row_policy_names->clone()); + + return res; } diff --git a/src/Parsers/Access/ASTShowGrantsQuery.cpp b/src/Parsers/Access/ASTShowGrantsQuery.cpp index 5d54cf45dc1..2b252617578 100644 --- a/src/Parsers/Access/ASTShowGrantsQuery.cpp +++ b/src/Parsers/Access/ASTShowGrantsQuery.cpp @@ -14,7 +14,12 @@ String ASTShowGrantsQuery::getID(char) const ASTPtr ASTShowGrantsQuery::clone() const { - return std::make_shared(*this); + auto res = std::make_shared(*this); + + if (for_roles) + res->for_roles = std::static_pointer_cast(for_roles->clone()); + + return res; } diff --git a/src/Parsers/Access/ParserCreateUserQuery.cpp b/src/Parsers/Access/ParserCreateUserQuery.cpp index 9e32b3c4618..ed6ecb62667 100644 --- a/src/Parsers/Access/ParserCreateUserQuery.cpp +++ b/src/Parsers/Access/ParserCreateUserQuery.cpp @@ -295,11 +295,11 @@ namespace } - bool parseHosts(IParserBase::Pos & pos, Expected & expected, const String & prefix, AllowedClientHosts & hosts) + bool parseHosts(IParserBase::Pos & pos, Expected & expected, std::string_view prefix, AllowedClientHosts & hosts) { return IParserBase::wrapParseImpl(pos, [&] { - if (!prefix.empty() && !ParserKeyword{prefix.c_str()}.ignore(pos, expected)) + if (!prefix.empty() && !ParserKeyword{prefix}.ignore(pos, expected)) return false; if (!ParserKeyword{"HOST"}.ignore(pos, expected)) @@ -492,7 +492,6 @@ bool ParserCreateUserQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec if (alter) { - String maybe_new_name; if (!new_name && (names->size() == 1) && parseRenameTo(pos, expected, new_name)) continue; diff --git a/src/Parsers/ExpressionElementParsers.cpp b/src/Parsers/ExpressionElementParsers.cpp index 88784329ece..c4e07ea2e15 100644 --- a/src/Parsers/ExpressionElementParsers.cpp +++ b/src/Parsers/ExpressionElementParsers.cpp @@ -1192,54 +1192,6 @@ bool ParserAlias::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) return true; } - -bool ParserColumnsMatcher::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) -{ - ParserKeyword columns("COLUMNS"); - ParserList columns_p(std::make_unique(false, true), std::make_unique(TokenType::Comma), false); - ParserStringLiteral regex; - - if (!columns.ignore(pos, expected)) - return false; - - if (pos->type != TokenType::OpeningRoundBracket) - return false; - ++pos; - - ASTPtr column_list; - ASTPtr regex_node; - if (!columns_p.parse(pos, column_list, expected) && !regex.parse(pos, regex_node, expected)) - return false; - - if (pos->type != TokenType::ClosingRoundBracket) - return false; - ++pos; - - ASTPtr res; - if (column_list) - { - auto list_matcher = std::make_shared(); - list_matcher->column_list = column_list; - res = list_matcher; - } - else - { - auto regexp_matcher = std::make_shared(); - regexp_matcher->setPattern(regex_node->as().value.get()); - res = regexp_matcher; - } - - ParserColumnsTransformers transformers_p(allowed_transformers); - ASTPtr transformer; - while (transformers_p.parse(pos, transformer, expected)) - { - res->children.push_back(transformer); - } - node = std::move(res); - return true; -} - - bool ParserColumnsTransformers::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { ParserKeyword apply("APPLY"); @@ -1488,6 +1440,122 @@ bool ParserQualifiedAsterisk::parseImpl(Pos & pos, ASTPtr & node, Expected & exp return true; } +/// Parse (columns_list) or ('REGEXP'). +static bool parseColumnsMatcherBody(IParser::Pos & pos, ASTPtr & node, Expected & expected, ParserColumnsTransformers::ColumnTransformers allowed_transformers) +{ + if (pos->type != TokenType::OpeningRoundBracket) + return false; + ++pos; + + ParserList columns_p(std::make_unique(false, true), std::make_unique(TokenType::Comma), false); + ParserStringLiteral regex; + + ASTPtr column_list; + ASTPtr regex_node; + if (!columns_p.parse(pos, column_list, expected) && !regex.parse(pos, regex_node, expected)) + return false; + + if (pos->type != TokenType::ClosingRoundBracket) + return false; + ++pos; + + ASTPtr res; + if (column_list) + { + auto list_matcher = std::make_shared(); + list_matcher->column_list = column_list; + res = list_matcher; + } + else + { + auto regexp_matcher = std::make_shared(); + regexp_matcher->setPattern(regex_node->as().value.get()); + res = regexp_matcher; + } + + ParserColumnsTransformers transformers_p(allowed_transformers); + ASTPtr transformer; + while (transformers_p.parse(pos, transformer, expected)) + { + res->children.push_back(transformer); + } + + node = std::move(res); + return true; +} + +bool ParserColumnsMatcher::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + ParserKeyword columns("COLUMNS"); + + if (!columns.ignore(pos, expected)) + return false; + + return parseColumnsMatcherBody(pos, node, expected, allowed_transformers); +} + +bool ParserQualifiedColumnsMatcher::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + if (!ParserCompoundIdentifier(true, true).parse(pos, node, expected)) + return false; + + auto identifier_node = node; + const auto & identifier_node_typed = identifier_node->as(); + + /// ParserCompoundIdentifier parse identifier.COLUMNS + if (identifier_node_typed.name_parts.size() == 1 || identifier_node_typed.name_parts.back() != "COLUMNS") + return false; + + /// TODO: ASTTableIdentifier can contain only 2 parts + + if (identifier_node_typed.name_parts.size() == 2) + { + auto table_name = identifier_node_typed.name_parts[0]; + identifier_node = std::make_shared(table_name); + } + else + { + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Expected identifier to contain no more than 2 parts. Actual {}", + identifier_node_typed.full_name); + } + + if (!parseColumnsMatcherBody(pos, node, expected, allowed_transformers)) + return false; + + if (auto * columns_list_matcher = node->as()) + { + auto result = std::make_shared(); + result->column_list = std::move(columns_list_matcher->column_list); + + result->children.reserve(columns_list_matcher->children.size() + 1); + result->children.push_back(std::move(identifier_node)); + + for (auto && child : columns_list_matcher->children) + result->children.push_back(std::move(child)); + + node = result; + } + else if (auto * column_regexp_matcher = node->as()) + { + auto result = std::make_shared(); + result->setMatcher(column_regexp_matcher->getMatcher()); + + result->children.reserve(column_regexp_matcher->children.size() + 1); + result->children.push_back(std::move(identifier_node)); + + for (auto && child : column_regexp_matcher->children) + result->children.push_back(std::move(child)); + + node = result; + } + else + { + throw Exception(ErrorCodes::LOGICAL_ERROR, "Qualified COLUMNS matcher expected to be list or regexp"); + } + + return true; +} bool ParserSubstitution::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { diff --git a/src/Parsers/ExpressionElementParsers.h b/src/Parsers/ExpressionElementParsers.h index f538555f0c1..8a9647dc86f 100644 --- a/src/Parsers/ExpressionElementParsers.h +++ b/src/Parsers/ExpressionElementParsers.h @@ -104,7 +104,7 @@ protected: bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; }; -/** COLUMNS('') +/** COLUMNS(columns_names) or COLUMNS('') */ class ParserColumnsMatcher : public IParserBase { @@ -121,6 +121,23 @@ protected: ColumnTransformers allowed_transformers; }; +/** Qualified columns matcher identifier.COLUMNS(columns_names) or identifier.COLUMNS('') + */ +class ParserQualifiedColumnsMatcher : public IParserBase +{ +public: + using ColumnTransformers = ParserColumnsTransformers::ColumnTransformers; + explicit ParserQualifiedColumnsMatcher(ColumnTransformers allowed_transformers_ = ParserColumnsTransformers::AllTransformers) + : allowed_transformers(allowed_transformers_) + {} + +protected: + const char * getName() const override { return "qualified COLUMNS matcher"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; + + ColumnTransformers allowed_transformers; +}; + // Allows to make queries like SELECT SUM() FILTER(WHERE ) FROM ... class ParserFilterClause : public IParserBase { diff --git a/src/Parsers/ExpressionListParsers.cpp b/src/Parsers/ExpressionListParsers.cpp index f7a016a59e4..2a41196c15d 100644 --- a/src/Parsers/ExpressionListParsers.cpp +++ b/src/Parsers/ExpressionListParsers.cpp @@ -478,7 +478,10 @@ struct Operator { Operator() = default; - Operator(const std::string & function_name_, int priority_, int arity_ = 2, OperatorType type_ = OperatorType::None) + Operator(const std::string & function_name_, + int priority_, + int arity_, + OperatorType type_ = OperatorType::None) : type(type_), priority(priority_), arity(arity_), function_name(function_name_) {} OperatorType type; @@ -487,6 +490,14 @@ struct Operator std::string function_name; }; +template +static std::shared_ptr makeASTFunction(Operator & op, Args &&... args) +{ + auto ast_function = makeASTFunction(op.function_name, std::forward(args)...); + ast_function->is_lambda_function = op.type == OperatorType::Lambda; + return ast_function; +} + enum class Checkpoint { None, @@ -506,10 +517,8 @@ enum class Checkpoint class Layer { public: - explicit Layer(bool allow_alias_ = true, bool allow_alias_without_as_keyword_ = true) : - allow_alias(allow_alias_), allow_alias_without_as_keyword(allow_alias_without_as_keyword_) - { - } + explicit Layer(bool allow_alias_ = true, bool allow_alias_without_as_keyword_ = false) : + allow_alias(allow_alias_), allow_alias_without_as_keyword(allow_alias_without_as_keyword_) {} virtual ~Layer() = default; @@ -552,13 +561,10 @@ public: virtual bool getResult(ASTPtr & node) { - if (elements.size() == 1) - { - node = std::move(elements[0]); - return true; - } + if (!finished) + return false; - return false; + return getResultImpl(node); } virtual bool parse(IParser::Pos & /*pos*/, Expected & /*expected*/, Action & /*action*/) = 0; @@ -612,13 +618,17 @@ public: /// bool mergeElement(bool push_to_elements = true) { + parsed_alias = false; + Operator cur_op; while (popOperator(cur_op)) { ASTPtr function; - // Special case of ternary operator - if (cur_op.type == OperatorType::StartIf) + // We should not meet the starting part of the operator while finishing an element + if (cur_op.type == OperatorType::StartIf || + cur_op.type == OperatorType::StartBetween || + cur_op.type == OperatorType::StartNotBetween) return false; if (cur_op.type == OperatorType::FinishIf) @@ -628,10 +638,6 @@ public: return false; } - // Special case of a BETWEEN b AND c operator - if (cur_op.type == OperatorType::StartBetween || cur_op.type == OperatorType::StartNotBetween) - return false; - if (cur_op.type == OperatorType::FinishBetween) { Operator tmp_op; @@ -651,7 +657,7 @@ public: } else { - function = makeASTFunction(cur_op.function_name); + function = makeASTFunction(cur_op); if (!popLastNOperands(function->children[0]->children, cur_op.arity)) return false; @@ -727,6 +733,9 @@ public: /// In order to distinguish them we keep a counter of BETWEENs without matching ANDs. int between_counter = 0; + /// Flag we set when we parsed alias to avoid parsing next element as alias + bool parsed_alias = false; + bool allow_alias = true; bool allow_alias_without_as_keyword = true; @@ -734,6 +743,17 @@ public: Checkpoint current_checkpoint = Checkpoint::None; protected: + virtual bool getResultImpl(ASTPtr & node) + { + if (elements.size() == 1) + { + node = std::move(elements[0]); + return true; + } + + return false; + } + std::vector operators; ASTs operands; ASTs elements; @@ -754,17 +774,12 @@ public: bool getResult(ASTPtr & node) override { /// We can exit the main cycle outside the parse() function, - /// so we need to merge the element here + /// so we need to merge the element here. + /// Because of this 'finished' flag can also not be set. if (!mergeElement()) return false; - if (elements.size() == 1) - { - node = std::move(elements[0]); - return true; - } - - return false; + return Layer::getResultImpl(node); } bool parse(IParser::Pos & pos, Expected & /*expected*/, Action & /*action*/) override @@ -776,16 +791,18 @@ public: } }; - /// Basic layer for a function with certain separator and end tokens: /// 1. If we parse a separator we should merge current operands and operators /// into one element and push in to 'elements' vector. /// 2. If we parse an ending token, we should merge everything as in (1) and /// also set 'finished' flag. template -class BaseLayer : public Layer +class LayerWithSeparator : public Layer { public: + explicit LayerWithSeparator(bool allow_alias_ = true, bool allow_alias_without_as_keyword_ = false) : + Layer(allow_alias_, allow_alias_without_as_keyword_) {} + bool parse(IParser::Pos & pos, Expected & expected, Action & action) override { if (ParserToken(separator).ignore(pos, expected)) @@ -809,11 +826,11 @@ public: } }; - -class OrdinaryFunctionLayer : public Layer +/// Layer for regular and aggregate functions without syntax sugar +class FunctionLayer : public Layer { public: - explicit OrdinaryFunctionLayer(String function_name_, bool allow_function_parameters_ = true) + explicit FunctionLayer(String function_name_, bool allow_function_parameters_ = true) : function_name(function_name_), allow_function_parameters(allow_function_parameters_){} bool parse(IParser::Pos & pos, Expected & expected, Action & action) override @@ -958,7 +975,7 @@ public: if (parameters) { - function_node->parameters = parameters; + function_node->parameters = std::move(parameters); function_node->children.push_back(function_node->parameters); } @@ -991,7 +1008,7 @@ public: return false; } - elements = {function_node}; + elements = {std::move(function_node)}; finished = true; } @@ -1015,17 +1032,6 @@ private: class RoundBracketsLayer : public Layer { public: - bool getResult(ASTPtr & node) override - { - // Round brackets can mean priority operator as well as function tuple() - if (!is_tuple && elements.size() == 1) - node = std::move(elements[0]); - else - node = makeASTFunction("tuple", std::move(elements)); - - return true; - } - bool parse(IParser::Pos & pos, Expected & expected, Action & action) override { if (ParserToken(TokenType::Comma).ignore(pos, expected)) @@ -1055,41 +1061,57 @@ public: return true; } + +protected: + bool getResultImpl(ASTPtr & node) override + { + // Round brackets can mean priority operator as well as function tuple() + if (!is_tuple && elements.size() == 1) + node = std::move(elements[0]); + else + node = makeASTFunction("tuple", std::move(elements)); + + return true; + } + private: bool is_tuple = false; }; /// Layer for array square brackets operator -class ArrayLayer : public BaseLayer +class ArrayLayer : public LayerWithSeparator { public: - bool getResult(ASTPtr & node) override + bool parse(IParser::Pos & pos, Expected & expected, Action & action) override + { + return LayerWithSeparator::parse(pos, expected, action); + } + +protected: + bool getResultImpl(ASTPtr & node) override { node = makeASTFunction("array", std::move(elements)); return true; } - - bool parse(IParser::Pos & pos, Expected & expected, Action & action) override - { - return BaseLayer::parse(pos, expected, action); - } }; /// Layer for arrayElement square brackets operator /// This layer does not create a function, it is only needed to parse closing token /// and return only one element. -class ArrayElementLayer : public BaseLayer +class ArrayElementLayer : public LayerWithSeparator { public: bool parse(IParser::Pos & pos, Expected & expected, Action & action) override { - return BaseLayer::parse(pos, expected, action); + return LayerWithSeparator::parse(pos, expected, action); } }; class CastLayer : public Layer { public: + CastLayer() : Layer(/*allow_alias*/ true, /*allow_alias_without_as_keyword*/ true) {} + bool parse(IParser::Pos & pos, Expected & expected, Action & action) override { /// CAST(x [AS alias1], T [AS alias2]) or CAST(x [AS alias1] AS T) @@ -1175,6 +1197,9 @@ public: if (!mergeElement()) return false; + if (elements.size() != 2) + return false; + elements = {makeASTFunction("CAST", elements[0], elements[1])}; finished = true; return true; @@ -1185,25 +1210,10 @@ public: } }; -class ExtractLayer : public BaseLayer +class ExtractLayer : public LayerWithSeparator { public: - bool getResult(ASTPtr & node) override - { - if (state == 2) - { - if (elements.empty()) - return false; - - node = makeASTFunction(interval_kind.toNameOfFunctionExtractTimePart(), elements[0]); - } - else - { - node = makeASTFunction("extract", std::move(elements)); - } - - return true; - } + ExtractLayer() : LayerWithSeparator(/*allow_alias*/ true, /*allow_alias_without_as_keyword*/ true) {} bool parse(IParser::Pos & pos, Expected & expected, Action & action) override { @@ -1232,7 +1242,7 @@ public: if (state == 1) { - return BaseLayer::parse(pos, expected, action); + return LayerWithSeparator::parse(pos, expected, action); } if (state == 2) @@ -1250,6 +1260,25 @@ public: return true; } +protected: + bool getResultImpl(ASTPtr & node) override + { + if (state == 2) + { + if (elements.empty()) + return false; + + node = makeASTFunction(interval_kind.toNameOfFunctionExtractTimePart(), elements[0]); + } + else + { + node = makeASTFunction("extract", std::move(elements)); + } + + return true; + } + + private: IntervalKind interval_kind; }; @@ -1257,11 +1286,7 @@ private: class SubstringLayer : public Layer { public: - bool getResult(ASTPtr & node) override - { - node = makeASTFunction("substring", std::move(elements)); - return true; - } + SubstringLayer() : Layer(/*allow_alias*/ true, /*allow_alias_without_as_keyword*/ true) {} bool parse(IParser::Pos & pos, Expected & expected, Action & action) override { @@ -1312,19 +1337,19 @@ public: return true; } + +protected: + bool getResultImpl(ASTPtr & node) override + { + node = makeASTFunction("substring", std::move(elements)); + return true; + } }; class PositionLayer : public Layer { public: - bool getResult(ASTPtr & node) override - { - if (state == 2) - std::swap(elements[1], elements[0]); - - node = makeASTFunction("position", std::move(elements)); - return true; - } + PositionLayer() : Layer(/*allow_alias*/ true, /*allow_alias_without_as_keyword*/ true) {} bool parse(IParser::Pos & pos, Expected & expected, Action & action) override { @@ -1380,12 +1405,23 @@ public: return true; } -}; +protected: + bool getResultImpl(ASTPtr & node) override + { + if (state == 2 && elements.size() == 2) + std::swap(elements[1], elements[0]); + + node = makeASTFunction("position", std::move(elements)); + return true; + } +}; class ExistsLayer : public Layer { public: + ExistsLayer() : Layer(/*allow_alias*/ true, /*allow_alias_without_as_keyword*/ true) {} + bool parse(IParser::Pos & pos, Expected & expected, Action & /*action*/) override { ASTPtr node; @@ -1410,15 +1446,8 @@ public: class TrimLayer : public Layer { public: - TrimLayer(bool trim_left_, bool trim_right_) : trim_left(trim_left_), trim_right(trim_right_) - { - } - - bool getResult(ASTPtr & node) override - { - node = makeASTFunction(function_name, std::move(elements)); - return true; - } + TrimLayer(bool trim_left_, bool trim_right_) + : Layer(/*allow_alias*/ true, /*allow_alias_without_as_keyword*/ true), trim_left(trim_left_), trim_right(trim_right_) {} bool parse(IParser::Pos & pos, Expected & expected, Action & action) override { @@ -1561,6 +1590,14 @@ public: return true; } + +protected: + bool getResultImpl(ASTPtr & node) override + { + node = makeASTFunction(function_name, std::move(elements)); + return true; + } + private: bool trim_left; bool trim_right; @@ -1570,27 +1607,11 @@ private: String function_name; }; - -class DateAddLayer : public BaseLayer +class DateAddLayer : public LayerWithSeparator { public: - explicit DateAddLayer(const char * function_name_) : function_name(function_name_) - { - } - - bool getResult(ASTPtr & node) override - { - if (parsed_interval_kind) - { - elements[0] = makeASTFunction(interval_kind.toNameOfFunctionToIntervalDataType(), elements[0]); - node = makeASTFunction(function_name, elements[1], elements[0]); - } - else - node = makeASTFunction(function_name, std::move(elements)); - - return true; - } - + explicit DateAddLayer(const char * function_name_) + : LayerWithSeparator(/*allow_alias*/ true, /*allow_alias_without_as_keyword*/ true), function_name(function_name_) {} bool parse(IParser::Pos & pos, Expected & expected, Action & action) override { @@ -1615,39 +1636,39 @@ public: if (state == 1) { - return BaseLayer::parse(pos, expected, action); + return LayerWithSeparator::parse(pos, expected, action); } return true; } +protected: + bool getResultImpl(ASTPtr & node) override + { + if (parsed_interval_kind) + { + if (elements.size() < 2) + return false; + + elements[0] = makeASTFunction(interval_kind.toNameOfFunctionToIntervalDataType(), elements[0]); + node = makeASTFunction(function_name, elements[1], elements[0]); + } + else + node = makeASTFunction(function_name, std::move(elements)); + + return true; + } + private: IntervalKind interval_kind; const char * function_name; bool parsed_interval_kind = false; }; - -class DateDiffLayer : public BaseLayer +class DateDiffLayer : public LayerWithSeparator { public: - bool getResult(ASTPtr & node) override - { - if (parsed_interval_kind) - { - if (elements.size() == 2) - node = makeASTFunction("dateDiff", std::make_shared(interval_kind.toDateDiffUnit()), elements[0], elements[1]); - else if (elements.size() == 3) - node = makeASTFunction("dateDiff", std::make_shared(interval_kind.toDateDiffUnit()), elements[0], elements[1], elements[2]); - else - return false; - } - else - { - node = makeASTFunction("dateDiff", std::move(elements)); - } - return true; - } + DateDiffLayer() : LayerWithSeparator(/*allow_alias*/ true, /*allow_alias_without_as_keyword*/ true) {} bool parse(IParser::Pos & pos, Expected & expected, Action & action) override { @@ -1669,21 +1690,41 @@ public: if (state == 1) { - return BaseLayer::parse(pos, expected, action); + return LayerWithSeparator::parse(pos, expected, action); } return true; } +protected: + bool getResultImpl(ASTPtr & node) override + { + if (parsed_interval_kind) + { + if (elements.size() == 2) + node = makeASTFunction("dateDiff", std::make_shared(interval_kind.toDateDiffUnit()), elements[0], elements[1]); + else if (elements.size() == 3) + node = makeASTFunction("dateDiff", std::make_shared(interval_kind.toDateDiffUnit()), elements[0], elements[1], elements[2]); + else + return false; + } + else + { + node = makeASTFunction("dateDiff", std::move(elements)); + } + return true; + } + private: IntervalKind interval_kind; bool parsed_interval_kind = false; }; - class IntervalLayer : public Layer { public: + IntervalLayer() : Layer(/*allow_alias*/ true, /*allow_alias_without_as_keyword*/ true) {} + bool parse(IParser::Pos & pos, Expected & expected, Action & action) override { /// INTERVAL 1 HOUR or INTERVAL expr HOUR @@ -1758,86 +1799,11 @@ private: IntervalKind interval_kind; }; -/// Layer for table function 'view' and 'viewIfPermitted' -class ViewLayer : public Layer -{ -public: - explicit ViewLayer(bool if_permitted_) : if_permitted(if_permitted_) {} - - bool getResult(ASTPtr & node) override - { - if (if_permitted) - node = makeASTFunction("viewIfPermitted", std::move(elements)); - else - node = makeASTFunction("view", std::move(elements)); - - return true; - } - - bool parse(IParser::Pos & pos, Expected & expected, Action & /*action*/) override - { - /// view(SELECT ...) - /// viewIfPermitted(SELECT ... ELSE func(...)) - /// - /// 0. Parse the SELECT query and if 'if_permitted' parse 'ELSE' keyword (-> 1) else (finished) - /// 1. Parse closing token - - if (state == 0) - { - ASTPtr query; - - bool maybe_an_subquery = pos->type == TokenType::OpeningRoundBracket; - - if (!ParserSelectWithUnionQuery().parse(pos, query, expected)) - return false; - - auto & select_ast = query->as(); - if (select_ast.list_of_selects->children.size() == 1 && maybe_an_subquery) - { - // It's an subquery. Bail out. - return false; - } - - pushResult(query); - - if (!if_permitted) - { - if (!ParserToken(TokenType::ClosingRoundBracket).ignore(pos, expected)) - return false; - - finished = true; - return true; - } - - if (!ParserKeyword{"ELSE"}.ignore(pos, expected)) - return false; - - state = 1; - return true; - } - - if (state == 1) - { - if (ParserToken(TokenType::ClosingRoundBracket).ignore(pos, expected)) - { - if (!mergeElement()) - return false; - - finished = true; - } - } - - return true; - } - -private: - bool if_permitted; -}; - - class CaseLayer : public Layer { public: + CaseLayer() : Layer(/*allow_alias*/ true, /*allow_alias_without_as_keyword*/ true) {} + bool parse(IParser::Pos & pos, Expected & expected, Action & action) override { /// CASE [x] WHEN expr THEN expr [WHEN expr THEN expr [...]] [ELSE expr] END @@ -1926,6 +1892,83 @@ private: bool has_case_expr; }; +/// Layer for table function 'view' and 'viewIfPermitted' +class ViewLayer : public Layer +{ +public: + explicit ViewLayer(bool if_permitted_) : if_permitted(if_permitted_) {} + + bool parse(IParser::Pos & pos, Expected & expected, Action & /*action*/) override + { + /// view(SELECT ...) + /// viewIfPermitted(SELECT ... ELSE func(...)) + /// + /// 0. Parse the SELECT query and if 'if_permitted' parse 'ELSE' keyword (-> 1) else (finished) + /// 1. Parse closing token + + if (state == 0) + { + ASTPtr query; + + bool maybe_an_subquery = pos->type == TokenType::OpeningRoundBracket; + + if (!ParserSelectWithUnionQuery().parse(pos, query, expected)) + return false; + + auto & select_ast = query->as(); + if (select_ast.list_of_selects->children.size() == 1 && maybe_an_subquery) + { + // It's an subquery. Bail out. + return false; + } + + pushResult(query); + + if (!if_permitted) + { + if (!ParserToken(TokenType::ClosingRoundBracket).ignore(pos, expected)) + return false; + + finished = true; + return true; + } + + if (!ParserKeyword{"ELSE"}.ignore(pos, expected)) + return false; + + state = 1; + return true; + } + + if (state == 1) + { + if (ParserToken(TokenType::ClosingRoundBracket).ignore(pos, expected)) + { + if (!mergeElement()) + return false; + + finished = true; + } + } + + return true; + } + +protected: + bool getResultImpl(ASTPtr & node) override + { + if (if_permitted) + node = makeASTFunction("viewIfPermitted", std::move(elements)); + else + node = makeASTFunction("view", std::move(elements)); + + return true; + } + +private: + bool if_permitted; +}; + std::unique_ptr getFunctionLayer(ASTPtr identifier, bool is_table_function, bool allow_function_parameters_ = true) { @@ -1990,9 +2033,9 @@ std::unique_ptr getFunctionLayer(ASTPtr identifier, bool is_table_functio || function_name_lowercase == "timestampdiff" || function_name_lowercase == "timestamp_diff") return std::make_unique(); else if (function_name_lowercase == "grouping") - return std::make_unique(function_name_lowercase, allow_function_parameters_); + return std::make_unique(function_name_lowercase, allow_function_parameters_); else - return std::make_unique(function_name, allow_function_parameters_); + return std::make_unique(function_name, allow_function_parameters_); } @@ -2076,6 +2119,7 @@ struct ParserExpressionImpl // Recursion ParserQualifiedAsterisk qualified_asterisk_parser; ParserColumnsMatcher columns_matcher_parser; + ParserQualifiedColumnsMatcher qualified_columns_matcher_parser; ParserSubquery subquery_parser; bool parse(std::unique_ptr start, IParser::Pos & pos, ASTPtr & node, Expected & expected); @@ -2141,22 +2185,22 @@ std::vector> ParserExpressionImpl::operators_t {"<", Operator("less", 9, 2, OperatorType::Comparison)}, {">", Operator("greater", 9, 2, OperatorType::Comparison)}, {"=", Operator("equals", 9, 2, OperatorType::Comparison)}, - {"LIKE", Operator("like", 9)}, - {"ILIKE", Operator("ilike", 9)}, - {"NOT LIKE", Operator("notLike", 9)}, - {"NOT ILIKE", Operator("notILike", 9)}, - {"IN", Operator("in", 9)}, - {"NOT IN", Operator("notIn", 9)}, - {"GLOBAL IN", Operator("globalIn", 9)}, - {"GLOBAL NOT IN", Operator("globalNotIn", 9)}, + {"LIKE", Operator("like", 9, 2)}, + {"ILIKE", Operator("ilike", 9, 2)}, + {"NOT LIKE", Operator("notLike", 9, 2)}, + {"NOT ILIKE", Operator("notILike", 9, 2)}, + {"IN", Operator("in", 9, 2)}, + {"NOT IN", Operator("notIn", 9, 2)}, + {"GLOBAL IN", Operator("globalIn", 9, 2)}, + {"GLOBAL NOT IN", Operator("globalNotIn", 9, 2)}, {"||", Operator("concat", 10, 2, OperatorType::Mergeable)}, - {"+", Operator("plus", 11)}, - {"-", Operator("minus", 11)}, - {"*", Operator("multiply", 12)}, - {"/", Operator("divide", 12)}, - {"%", Operator("modulo", 12)}, - {"MOD", Operator("modulo", 12)}, - {"DIV", Operator("intDiv", 12)}, + {"+", Operator("plus", 11, 2)}, + {"-", Operator("minus", 11, 2)}, + {"*", Operator("multiply", 12, 2)}, + {"/", Operator("divide", 12, 2)}, + {"%", Operator("modulo", 12, 2)}, + {"MOD", Operator("modulo", 12, 2)}, + {"DIV", Operator("intDiv", 12, 2)}, {".", Operator("tupleElement", 14, 2, OperatorType::TupleElement)}, {"[", Operator("arrayElement", 14, 2, OperatorType::ArrayElement)}, {"::", Operator("CAST", 14, 2, OperatorType::Cast)}, @@ -2304,7 +2348,7 @@ Action ParserExpressionImpl::tryParseOperand(Layers & layers, IParser::Pos & pos if (!layers.back()->popOperand(argument)) return Action::NONE; - function = makeASTFunction(prev_op.function_name, argument, tmp); + function = makeASTFunction(prev_op, argument, tmp); if (!modifyAST(function, subquery_function_type)) return Action::NONE; @@ -2353,7 +2397,8 @@ Action ParserExpressionImpl::tryParseOperand(Layers & layers, IParser::Pos & pos literal_parser.parse(pos, tmp, expected) || asterisk_parser.parse(pos, tmp, expected) || qualified_asterisk_parser.parse(pos, tmp, expected) || - columns_matcher_parser.parse(pos, tmp, expected)) + columns_matcher_parser.parse(pos, tmp, expected) || + qualified_columns_matcher_parser.parse(pos, tmp, expected)) { layers.back()->pushOperand(std::move(tmp)); } @@ -2427,11 +2472,15 @@ Action ParserExpressionImpl::tryParseOperator(Layers & layers, IParser::Pos & po if (cur_op == operators_table.end()) { + ParserAlias alias_parser(layers.back()->allow_alias_without_as_keyword); auto old_pos = pos; - if (layers.back()->allow_alias && ParserAlias(layers.back()->allow_alias_without_as_keyword).parse(pos, tmp, expected)) + if (layers.back()->allow_alias && + !layers.back()->parsed_alias && + alias_parser.parse(pos, tmp, expected) && + layers.back()->insertAlias(tmp)) { - if (layers.back()->insertAlias(tmp)) - return Action::OPERATOR; + layers.back()->parsed_alias = true; + return Action::OPERATOR; } pos = old_pos; return Action::NONE; @@ -2488,7 +2537,7 @@ Action ParserExpressionImpl::tryParseOperator(Layers & layers, IParser::Pos & po } else { - function = makeASTFunction(prev_op.function_name); + function = makeASTFunction(prev_op); if (!layers.back()->popLastNOperands(function->children[0]->children, prev_op.arity)) return Action::NONE; diff --git a/src/Parsers/ParserCreateQuery.cpp b/src/Parsers/ParserCreateQuery.cpp index 77540141b53..bf305ba4781 100644 --- a/src/Parsers/ParserCreateQuery.cpp +++ b/src/Parsers/ParserCreateQuery.cpp @@ -640,9 +640,6 @@ bool ParserCreateTableQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expe auto query = std::make_shared(); node = query; - if (as_table_function) - query->as_table_function = as_table_function; - query->attach = attach; query->replace_table = replace; query->create_or_replace = or_replace; @@ -661,6 +658,7 @@ bool ParserCreateTableQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expe query->set(query->columns_list, columns_list); query->set(query->storage, storage); + query->set(query->as_table_function, as_table_function); if (comment) query->set(query->comment, comment); diff --git a/src/Parsers/ParserDescribeTableQuery.cpp b/src/Parsers/ParserDescribeTableQuery.cpp index 0f768e22324..ad6d2c5bcc6 100644 --- a/src/Parsers/ParserDescribeTableQuery.cpp +++ b/src/Parsers/ParserDescribeTableQuery.cpp @@ -33,7 +33,8 @@ bool ParserDescribeTableQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & ex if (!ParserTableExpression().parse(pos, table_expression, expected)) return false; - query->table_expression = table_expression; + query->children.push_back(std::move(table_expression)); + query->table_expression = query->children.back(); node = query; diff --git a/src/Parsers/ParserExplainQuery.cpp b/src/Parsers/ParserExplainQuery.cpp index d32d4444c36..7fc997f9548 100644 --- a/src/Parsers/ParserExplainQuery.cpp +++ b/src/Parsers/ParserExplainQuery.cpp @@ -19,6 +19,7 @@ bool ParserExplainQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected ParserKeyword s_ast("AST"); ParserKeyword s_explain("EXPLAIN"); ParserKeyword s_syntax("SYNTAX"); + ParserKeyword s_query_tree("QUERY TREE"); ParserKeyword s_pipeline("PIPELINE"); ParserKeyword s_plan("PLAN"); ParserKeyword s_estimates("ESTIMATE"); @@ -33,6 +34,8 @@ bool ParserExplainQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected kind = ASTExplainQuery::ExplainKind::ParsedAST; else if (s_syntax.ignore(pos, expected)) kind = ASTExplainQuery::ExplainKind::AnalyzedSyntax; + else if (s_query_tree.ignore(pos, expected)) + kind = ASTExplainQuery::ExplainKind::QueryTree; else if (s_pipeline.ignore(pos, expected)) kind = ASTExplainQuery::ExplainKind::QueryPipeline; else if (s_plan.ignore(pos, expected)) @@ -84,6 +87,13 @@ bool ParserExplainQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected explain_query->setTableFunction(table_function); explain_query->setTableOverride(table_override); } + else if (kind == ASTExplainQuery::ExplainKind::QueryTree) + { + if (select_p.parse(pos, query, expected)) + explain_query->setExplainedQuery(std::move(query)); + else + return false; + } else if (kind == ASTExplainQuery::ExplainKind::CurrentTransaction) { /// Nothing to parse @@ -103,7 +113,9 @@ bool ParserExplainQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected explain_query->setExplainedQuery(std::move(query)); } else + { return false; + } node = std::move(explain_query); return true; diff --git a/src/Parsers/ParserSampleRatio.cpp b/src/Parsers/ParserSampleRatio.cpp index 2f444bcf9e8..b6be04cbcc0 100644 --- a/src/Parsers/ParserSampleRatio.cpp +++ b/src/Parsers/ParserSampleRatio.cpp @@ -14,7 +14,7 @@ static bool parseDecimal(const char * pos, const char * end, ASTSampleRatio::Rat { UInt64 num_before = 0; UInt64 num_after = 0; - Int64 exponent = 0; + Int32 exponent = 0; const char * pos_after_first_num = tryReadIntText(num_before, pos, end); @@ -28,12 +28,12 @@ static bool parseDecimal(const char * pos, const char * end, ASTSampleRatio::Rat if (!has_num_before_point && !has_point) return false; - size_t number_of_digits_after_point = 0; + int number_of_digits_after_point = 0; if (has_point) { const char * pos_after_second_num = tryReadIntText(num_after, pos, end); - number_of_digits_after_point = pos_after_second_num - pos; + number_of_digits_after_point = static_cast(pos_after_second_num - pos); pos = pos_after_second_num; } diff --git a/src/Parsers/SelectUnionMode.cpp b/src/Parsers/SelectUnionMode.cpp new file mode 100644 index 00000000000..6d56a2b219f --- /dev/null +++ b/src/Parsers/SelectUnionMode.cpp @@ -0,0 +1,32 @@ +#include + + +namespace DB +{ + +const char * toString(SelectUnionMode mode) +{ + switch (mode) + { + case SelectUnionMode::UNION_DEFAULT: + return "UNION_DEFAULT"; + case SelectUnionMode::UNION_ALL: + return "UNION_ALL"; + case SelectUnionMode::UNION_DISTINCT: + return "UNION_DISTINCT"; + case SelectUnionMode::EXCEPT_DEFAULT: + return "EXCEPT_DEFAULT"; + case SelectUnionMode::EXCEPT_ALL: + return "EXCEPT_ALL"; + case SelectUnionMode::EXCEPT_DISTINCT: + return "EXCEPT_DISTINCT"; + case SelectUnionMode::INTERSECT_DEFAULT: + return "INTERSECT_DEFAULT"; + case SelectUnionMode::INTERSECT_ALL: + return "INTERSECT_ALL"; + case SelectUnionMode::INTERSECT_DISTINCT: + return "INTERSECT_DEFAULT"; + } +} + +} diff --git a/src/Parsers/SelectUnionMode.h b/src/Parsers/SelectUnionMode.h index ca3637612aa..5c72ce65eb2 100644 --- a/src/Parsers/SelectUnionMode.h +++ b/src/Parsers/SelectUnionMode.h @@ -18,6 +18,8 @@ enum class SelectUnionMode INTERSECT_DISTINCT }; +const char * toString(SelectUnionMode mode); + using SelectUnionModes = std::vector; using SelectUnionModesSet = std::unordered_set; diff --git a/src/Parsers/parseQuery.cpp b/src/Parsers/parseQuery.cpp index af8c9dc58a6..4a0c60da48d 100644 --- a/src/Parsers/parseQuery.cpp +++ b/src/Parsers/parseQuery.cpp @@ -236,7 +236,8 @@ ASTPtr tryParseQuery( { const char * query_begin = _out_query_end; Tokens tokens(query_begin, all_queries_end, max_query_size); - IParser::Pos token_iterator(tokens, max_parser_depth); + /// NOTE: consider use UInt32 for max_parser_depth setting. + IParser::Pos token_iterator(tokens, static_cast(max_parser_depth)); if (token_iterator->isEnd() || token_iterator->type == TokenType::Semicolon) diff --git a/src/Parsers/wipePasswordFromQuery.cpp b/src/Parsers/wipePasswordFromQuery.cpp deleted file mode 100644 index d2bc2fea645..00000000000 --- a/src/Parsers/wipePasswordFromQuery.cpp +++ /dev/null @@ -1,22 +0,0 @@ -#include -#include -#include - - -namespace DB -{ - -bool canContainPassword(const IAST & ast) -{ - return ast.as(); -} - -void wipePasswordFromQuery(ASTPtr ast) -{ - if (auto * create_query = ast->as()) - { - create_query->show_password = false; - } -} - -} diff --git a/src/Parsers/wipePasswordFromQuery.h b/src/Parsers/wipePasswordFromQuery.h deleted file mode 100644 index 57e449cce3b..00000000000 --- a/src/Parsers/wipePasswordFromQuery.h +++ /dev/null @@ -1,18 +0,0 @@ -#pragma once - -#include - - -namespace DB -{ - -/// Checks the type of a specified AST and returns true if it can contain a password. -bool canContainPassword(const IAST & ast); - -/// Removes a password or its hash from a query if it's specified there or replaces it with some placeholder. -/// This function is used to prepare a query for storing in logs (we don't want logs to contain sensitive information). -/// The function changes only following types of queries: -/// CREATE/ALTER USER. -void wipePasswordFromQuery(ASTPtr ast); - -} diff --git a/src/Planner/ActionsChain.cpp b/src/Planner/ActionsChain.cpp new file mode 100644 index 00000000000..594d26a679c --- /dev/null +++ b/src/Planner/ActionsChain.cpp @@ -0,0 +1,170 @@ +#include + +#include +#include + +#include +#include +#include +#include + +namespace DB +{ + +ActionsChainStep::ActionsChainStep(ActionsDAGPtr actions_, AvailableOutputColumnsStrategy available_output_columns_stategy_) + : actions(std::move(actions_)) + , available_output_columns_strategy(available_output_columns_stategy_) +{ + initialize(); +} + +ActionsChainStep::ActionsChainStep(ActionsDAGPtr actions_, + AvailableOutputColumnsStrategy available_output_columns_stategy_, + ColumnsWithTypeAndName additional_output_columns_) + : actions(std::move(actions_)) + , available_output_columns_strategy(available_output_columns_stategy_) + , additional_output_columns(std::move(additional_output_columns_)) +{ + initialize(); +} + + +void ActionsChainStep::finalizeInputAndOutputColumns(const NameSet & child_input_columns) +{ + child_required_output_columns_names.clear(); + + auto child_input_columns_copy = child_input_columns; + + std::unordered_set output_nodes_names; + output_nodes_names.reserve(actions->getOutputs().size()); + + for (auto & output_node : actions->getOutputs()) + output_nodes_names.insert(output_node->result_name); + + for (const auto & node : actions->getNodes()) + { + auto it = child_input_columns_copy.find(node.result_name); + if (it == child_input_columns_copy.end()) + continue; + + child_input_columns_copy.erase(it); + child_required_output_columns_names.insert(node.result_name); + + if (output_nodes_names.contains(node.result_name)) + continue; + + actions->getOutputs().push_back(&node); + output_nodes_names.insert(node.result_name); + } + + actions->removeUnusedActions(); + /// TODO: Analyzer fix ActionsDAG input and constant nodes with same name + actions->projectInput(); + initialize(); +} + +void ActionsChainStep::dump(WriteBuffer & buffer) const +{ + buffer << "DAG" << '\n'; + buffer << actions->dumpDAG(); + + if (!additional_output_columns.empty()) + { + buffer << "Additional output columns " << additional_output_columns.size() << '\n'; + for (const auto & column : additional_output_columns) + buffer << "Name " << column.name << " type " << column.type->getName() << '\n'; + } + + if (!child_required_output_columns_names.empty()) + { + buffer << "Child required output columns " << boost::join(child_required_output_columns_names, ", "); + buffer << '\n'; + } +} + +String ActionsChainStep::dump() const +{ + WriteBufferFromOwnString buffer; + dump(buffer); + + return buffer.str(); +} + +void ActionsChainStep::initialize() +{ + auto required_columns_names = actions->getRequiredColumnsNames(); + input_columns_names = NameSet(required_columns_names.begin(), required_columns_names.end()); + + available_output_columns.clear(); + + /// TODO: Analyzer fix ActionsDAG input and constant nodes with same name + std::unordered_set available_output_columns_names; + + if (available_output_columns_strategy == AvailableOutputColumnsStrategy::ALL_NODES) + { + for (const auto & node : actions->getNodes()) + { + if (available_output_columns_names.contains(node.result_name)) + continue; + + available_output_columns.emplace_back(node.column, node.result_type, node.result_name); + available_output_columns_names.insert(node.result_name); + } + } + else if (available_output_columns_strategy == AvailableOutputColumnsStrategy::OUTPUT_NODES) + { + for (const auto & node : actions->getOutputs()) + { + if (available_output_columns_names.contains(node->result_name)) + continue; + + available_output_columns.emplace_back(node->column, node->result_type, node->result_name); + available_output_columns_names.insert(node->result_name); + } + } + + available_output_columns.insert(available_output_columns.end(), additional_output_columns.begin(), additional_output_columns.end()); +} + +void ActionsChain::finalize() +{ + if (steps.empty()) + return; + + /// For last chain step there are no columns required in child nodes + NameSet empty_child_input_columns; + steps.back().get()->finalizeInputAndOutputColumns(empty_child_input_columns); + + Int64 steps_last_index = steps.size() - 1; + for (Int64 i = steps_last_index; i >= 1; --i) + { + auto & current_step = steps[i]; + auto & previous_step = steps[i - 1]; + + previous_step->finalizeInputAndOutputColumns(current_step->getInputColumnNames()); + } +} + +void ActionsChain::dump(WriteBuffer & buffer) const +{ + size_t steps_size = steps.size(); + + for (size_t i = 0; i < steps_size; ++i) + { + const auto & step = steps[i]; + buffer << "Step " << i << '\n'; + step->dump(buffer); + + buffer << '\n'; + } +} + +String ActionsChain::dump() const +{ + WriteBufferFromOwnString buffer; + dump(buffer); + + return buffer.str(); +} + +} diff --git a/src/Planner/ActionsChain.h b/src/Planner/ActionsChain.h new file mode 100644 index 00000000000..e2791ab7e35 --- /dev/null +++ b/src/Planner/ActionsChain.h @@ -0,0 +1,239 @@ +#pragma once + +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + +/** Chain of query actions steps. This class is needed to eliminate unnecessary actions calculations. + * Each step is represented by actions DAG. + * + * Consider such example query: + * SELECT expr(id) FROM test_table WHERE expr(id) > 0. + * + * We want to reuse expr(id) from previous expressions step, and not recalculate it in projection. + * To do this we build a chain of all query action steps. + * For example: + * 1. Before where. + * 2. Before order by. + * 3. Projection. + * + * Initially root of chain is initialized with join tree query plan header. + * Each next chain step, must be initialized with previous step available output columns. + * That way we forward all available output columns (functions, columns, aliases) from first step of the chain to the + * last step. After chain is build we can finalize it. + * + * Each step has input columns (some of them are not necessary) and output columns. Before chain finalize output columns + * contain only necessary actions for step output calculation. + * For each step starting from last (i), we add columns that are necessary for this step to previous step (i - 1), + * and remove unused input columns of previous step(i - 1). + * That way we reuse already calculated expressions from first step to last step. + */ + +class ActionsChainStep; +using ActionsChainStepPtr = std::unique_ptr; +using ActionsChainSteps = std::vector; + +/// Actions chain step represent single step in actions chain. +class ActionsChainStep +{ +public: + /// Available output columns strategy for actions chain step + enum class AvailableOutputColumnsStrategy + { + ALL_NODES, + OUTPUT_NODES + }; + + /** Initialize actions step with actions dag. + * Input column names initialized using actions dag nodes with INPUT type. + * + * If available output columns strategy is ALL_NODES, then available output columns initialized using actions dag nodes. + * If available output columns strategy is OUTPUT_NODES, then available output columns initialized using actions dag output nodes. + */ + explicit ActionsChainStep(ActionsDAGPtr actions_, AvailableOutputColumnsStrategy available_output_columns_stategy_ = AvailableOutputColumnsStrategy::ALL_NODES); + + explicit ActionsChainStep(ActionsDAGPtr actions_, + AvailableOutputColumnsStrategy available_output_columns_stategy_, + ColumnsWithTypeAndName additional_output_columns_); + + /// Get actions + ActionsDAGPtr & getActions() + { + return actions; + } + + /// Get actions + const ActionsDAGPtr & getActions() const + { + return actions; + } + + /// Get available output columns + const ColumnsWithTypeAndName & getAvailableOutputColumns() const + { + return available_output_columns; + } + + /// Get input column names + const NameSet & getInputColumnNames() const + { + return input_columns_names; + } + + /** Get child required output columns names. + * Initialized during finalizeOutputColumns method call. + */ + const NameSet & getChildRequiredOutputColumnsNames() const + { + return child_required_output_columns_names; + } + + /** Finalize step output columns and remove unnecessary input columns. + * If actions dag node has same name as child input column, it is added to actions output nodes. + */ + void finalizeInputAndOutputColumns(const NameSet & child_input_columns); + + /// Dump step into buffer + void dump(WriteBuffer & buffer) const; + + /// Dump step + String dump() const; + +private: + void initialize(); + + ActionsDAGPtr actions; + + AvailableOutputColumnsStrategy available_output_columns_strategy; + + NameSet input_columns_names; + + NameSet child_required_output_columns_names; + + ColumnsWithTypeAndName available_output_columns; + + ColumnsWithTypeAndName additional_output_columns; +}; + +/// Query actions chain +class ActionsChain +{ +public: + /// Add step into actions chain + void addStep(ActionsChainStepPtr step) + { + steps.emplace_back(std::move(step)); + } + + /// Get steps + const ActionsChainSteps & getSteps() const + { + return steps; + } + + /// Get steps size + size_t getStepsSize() const + { + return steps.size(); + } + + const ActionsChainStepPtr & at(size_t index) const + { + if (index >= steps.size()) + throw std::out_of_range("actions chain access is out of range"); + + return steps[index]; + } + + ActionsChainStepPtr & at(size_t index) + { + if (index >= steps.size()) + throw std::out_of_range("actions chain access is out of range"); + + return steps[index]; + } + + ActionsChainStepPtr & operator[](size_t index) + { + return steps[index]; + } + + const ActionsChainStepPtr & operator[](size_t index) const + { + return steps[index]; + } + + /// Get last step + ActionsChainStep * getLastStep() + { + return steps.back().get(); + } + + /// Get last step or throw exception if chain is empty + ActionsChainStep * getLastStepOrThrow() + { + if (steps.empty()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "ActionsChain is empty"); + + return steps.back().get(); + } + + /// Get last step index + size_t getLastStepIndex() + { + return steps.size() - 1; + } + + /// Get last step index or throw exception if chain is empty + size_t getLastStepIndexOrThrow() + { + if (steps.empty()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "ActionsChain is empty"); + + return steps.size() - 1; + } + + /// Get last step available output columns + const ColumnsWithTypeAndName & getLastStepAvailableOutputColumns() const + { + return steps.back()->getAvailableOutputColumns(); + } + + /// Get last step available output columns or throw exception if chain is empty + const ColumnsWithTypeAndName & getLastStepAvailableOutputColumnsOrThrow() const + { + if (steps.empty()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "ActionsChain is empty"); + + return steps.back()->getAvailableOutputColumns(); + } + + /// Get last step available output columns or null if chain is empty + const ColumnsWithTypeAndName * getLastStepAvailableOutputColumnsOrNull() const + { + if (steps.empty()) + return nullptr; + + return &steps.back()->getAvailableOutputColumns(); + } + + /// Finalize chain + void finalize(); + + /// Dump chain into buffer + void dump(WriteBuffer & buffer) const; + + /// Dump chain + String dump() const; + +private: + ActionsChainSteps steps; +}; + +} diff --git a/src/Planner/CMakeLists.txt b/src/Planner/CMakeLists.txt new file mode 100644 index 00000000000..766767b5c13 --- /dev/null +++ b/src/Planner/CMakeLists.txt @@ -0,0 +1,7 @@ +if (ENABLE_TESTS) + add_subdirectory(tests) +endif() + +if (ENABLE_EXAMPLES) + add_subdirectory(examples) +endif() diff --git a/src/Planner/CollectSets.cpp b/src/Planner/CollectSets.cpp new file mode 100644 index 00000000000..aa7014aba48 --- /dev/null +++ b/src/Planner/CollectSets.cpp @@ -0,0 +1,101 @@ +#include + +#include + +#include + +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int UNSUPPORTED_METHOD; +} + +namespace +{ + +class CollectSetsVisitor : public ConstInDepthQueryTreeVisitor +{ +public: + explicit CollectSetsVisitor(PlannerContext & planner_context_) + : planner_context(planner_context_) + {} + + void visitImpl(const QueryTreeNodePtr & node) + { + auto * function_node = node->as(); + if (!function_node || !isNameOfInFunction(function_node->getFunctionName())) + return; + + auto in_first_argument = function_node->getArguments().getNodes().at(0); + auto in_second_argument = function_node->getArguments().getNodes().at(1); + auto in_second_argument_node_type = in_second_argument->getNodeType(); + + const auto & settings = planner_context.getQueryContext()->getSettingsRef(); + + String set_key = planner_context.createSetKey(in_second_argument); + + if (planner_context.hasSet(set_key)) + return; + + /// Tables and table functions are replaced with subquery at Analysis stage, except special Set table. + auto * second_argument_table = in_second_argument->as(); + StorageSet * storage_set = second_argument_table != nullptr ? dynamic_cast(second_argument_table->getStorage().get()) : nullptr; + + if (storage_set) + { + planner_context.registerSet(set_key, PlannerSet(storage_set->getSet())); + } + else if (auto constant_value = in_second_argument->getConstantValueOrNull()) + { + auto set = makeSetForConstantValue( + in_first_argument->getResultType(), + constant_value->getValue(), + constant_value->getType(), + settings); + + planner_context.registerSet(set_key, PlannerSet(std::move(set))); + } + else if (in_second_argument_node_type == QueryTreeNodeType::QUERY || + in_second_argument_node_type == QueryTreeNodeType::UNION) + { + SizeLimits size_limits_for_set = {settings.max_rows_in_set, settings.max_bytes_in_set, settings.set_overflow_mode}; + bool tranform_null_in = settings.transform_null_in; + auto set = std::make_shared(size_limits_for_set, false /*fill_set_elements*/, tranform_null_in); + + planner_context.registerSet(set_key, PlannerSet(std::move(set), in_second_argument)); + } + else + { + throw Exception(ErrorCodes::UNSUPPORTED_METHOD, + "Function '{}' is supported only if second argument is constant or table expression", + function_node->getFunctionName()); + } + } + + static bool needChildVisit(const QueryTreeNodePtr &, const QueryTreeNodePtr & child_node) + { + return !(child_node->getNodeType() == QueryTreeNodeType::QUERY || child_node->getNodeType() == QueryTreeNodeType::UNION); + } + +private: + PlannerContext & planner_context; +}; + +} + +void collectSets(const QueryTreeNodePtr & node, PlannerContext & planner_context) +{ + CollectSetsVisitor visitor(planner_context); + visitor.visit(node); +} + +} diff --git a/src/Planner/CollectSets.h b/src/Planner/CollectSets.h new file mode 100644 index 00000000000..94f792e877b --- /dev/null +++ b/src/Planner/CollectSets.h @@ -0,0 +1,15 @@ +#pragma once + +#include + +#include + +namespace DB +{ + +/** Collect prepared sets and sets for subqueries that are necessary to execute IN function and its variations. + * Collected sets are registered in planner context. + */ +void collectSets(const QueryTreeNodePtr & node, PlannerContext & planner_context); + +} diff --git a/src/Planner/CollectTableExpressionData.cpp b/src/Planner/CollectTableExpressionData.cpp new file mode 100644 index 00000000000..30ccc541507 --- /dev/null +++ b/src/Planner/CollectTableExpressionData.cpp @@ -0,0 +1,116 @@ +#include + +#include + +#include +#include +#include +#include +#include +#include + +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; + extern const int UNSUPPORTED_METHOD; +} + +namespace +{ + +class CollectSourceColumnsVisitor : public InDepthQueryTreeVisitor +{ +public: + explicit CollectSourceColumnsVisitor(PlannerContext & planner_context_) + : planner_context(planner_context_) + {} + + void visitImpl(QueryTreeNodePtr & node) + { + auto * column_node = node->as(); + if (!column_node) + return; + + auto column_source_node = column_node->getColumnSource(); + auto column_source_node_type = column_source_node->getNodeType(); + + if (column_source_node_type == QueryTreeNodeType::ARRAY_JOIN || + column_source_node_type == QueryTreeNodeType::LAMBDA) + return; + + /// JOIN using expression + if (column_node->hasExpression() && column_source_node->getNodeType() == QueryTreeNodeType::JOIN) + return; + + auto & table_expression_data = planner_context.getOrCreateTableExpressionData(column_source_node); + + if (column_node->hasExpression()) + { + /// Replace ALIAS column with expression + table_expression_data.addAliasColumnName(column_node->getColumnName()); + node = column_node->getExpression(); + visitImpl(node); + return; + } + + if (column_source_node_type != QueryTreeNodeType::TABLE && + column_source_node_type != QueryTreeNodeType::TABLE_FUNCTION && + column_source_node_type != QueryTreeNodeType::QUERY && + column_source_node_type != QueryTreeNodeType::UNION) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Expected table, table function, query or union column source. Actual {}", + column_source_node->formatASTForErrorMessage()); + + bool column_already_exists = table_expression_data.hasColumn(column_node->getColumnName()); + if (column_already_exists) + return; + + auto column_identifier = planner_context.getGlobalPlannerContext()->createColumnIdentifier(node); + table_expression_data.addColumn(column_node->getColumn(), column_identifier); + } + + static bool needChildVisit(const QueryTreeNodePtr &, const QueryTreeNodePtr & child_node) + { + return !(child_node->getNodeType() == QueryTreeNodeType::QUERY || child_node->getNodeType() == QueryTreeNodeType::UNION); + } + +private: + PlannerContext & planner_context; +}; + +} + +void collectTableExpressionData(QueryTreeNodePtr & query_node, PlannerContext & planner_context) +{ + auto & query_node_typed = query_node->as(); + auto table_expressions_nodes = extractTableExpressions(query_node_typed.getJoinTree()); + + for (auto & table_expression_node : table_expressions_nodes) + { + auto & table_expression_data = planner_context.getOrCreateTableExpressionData(table_expression_node); + + if (auto * table_node = table_expression_node->as()) + { + bool storage_is_remote = table_node->getStorage()->isRemote(); + table_expression_data.setIsRemote(storage_is_remote); + } + else if (auto * table_function_node = table_expression_node->as()) + { + bool storage_is_remote = table_function_node->getStorage()->isRemote(); + table_expression_data.setIsRemote(storage_is_remote); + } + + if (table_expression_data.isRemote()) + throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "Remote storages are not supported"); + } + + CollectSourceColumnsVisitor collect_source_columns_visitor(planner_context); + collect_source_columns_visitor.visit(query_node); +} + +} diff --git a/src/Planner/CollectTableExpressionData.h b/src/Planner/CollectTableExpressionData.h new file mode 100644 index 00000000000..f4e2d579dca --- /dev/null +++ b/src/Planner/CollectTableExpressionData.h @@ -0,0 +1,17 @@ +#pragma once + +#include + +#include + +namespace DB +{ + +/** Collect table expression data for query node. + * Collected table expression data is registered in planner context. + * + * ALIAS table column nodes are registered in table expression data and replaced in query tree with inner alias expression. + */ +void collectTableExpressionData(QueryTreeNodePtr & query_node, PlannerContext & planner_context); + +} diff --git a/src/Planner/Planner.cpp b/src/Planner/Planner.cpp new file mode 100644 index 00000000000..97f82d06463 --- /dev/null +++ b/src/Planner/Planner.cpp @@ -0,0 +1,873 @@ +#include + +#include + +#include + +#include + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int UNSUPPORTED_METHOD; + extern const int LOGICAL_ERROR; + extern const int BAD_ARGUMENTS; + extern const int TOO_DEEP_SUBQUERIES; + extern const int NOT_IMPLEMENTED; +} + +/** ClickHouse query planner. + * + * TODO: Support JOIN with JOIN engine. + * TODO: Support VIEWs. + * TODO: JOIN drop unnecessary columns after ON, USING section + * TODO: Support RBAC. Support RBAC for ALIAS columns + * TODO: Support distributed query processing + * TODO: Support PREWHERE + * TODO: Support DISTINCT + * TODO: Support trivial count optimization + * TODO: Support projections + * TODO: Support read in order optimization + * TODO: UNION storage limits + * TODO: Support max streams + * TODO: Support ORDER BY read in order optimization + * TODO: Support GROUP BY read in order optimization + * TODO: Support Key Condition. Support indexes for IN function. + * TODO: Better support for quota and limits. + */ + +namespace +{ + +/** Check that table and table function table expressions from planner context support transactions. + * + * There is precondition that table expression data for table expression nodes is collected in planner context. + */ +void checkStoragesSupportTransactions(const PlannerContextPtr & planner_context) +{ + const auto & query_context = planner_context->getQueryContext(); + if (query_context->getSettingsRef().throw_on_unsupported_query_inside_transaction) + return; + + if (!query_context->getCurrentTransaction()) + return; + + for (const auto & [table_expression, _] : planner_context->getTableExpressionNodeToData()) + { + StoragePtr storage; + if (auto * table_node = table_expression->as()) + storage = table_node->getStorage(); + else if (auto * table_function_node = table_expression->as()) + storage = table_function_node->getStorage(); + + if (storage->supportsTransactions()) + continue; + + throw Exception(ErrorCodes::NOT_IMPLEMENTED, + "Storage {} (table {}) does not support transactions", + storage->getName(), + storage->getStorageID().getNameForLogs()); + } +} + +void addBuildSubqueriesForSetsStepIfNeeded(QueryPlan & query_plan, const SelectQueryOptions & select_query_options, const PlannerContextPtr & planner_context) +{ + PreparedSets::SubqueriesForSets subqueries_for_sets; + const auto & set_key_to_planner_set = planner_context->getRegisteredSets(); + + for (const auto & [key, planner_set] : set_key_to_planner_set) + { + const auto subquery_node = planner_set.getSubqueryNode(); + if (!subquery_node) + continue; + + auto subquery_context = buildSubqueryContext(planner_context->getQueryContext()); + auto subquery_options = select_query_options.subquery(); + + Planner subquery_planner( + subquery_node, + subquery_options, + std::move(subquery_context), + planner_context->getGlobalPlannerContext()); + subquery_planner.buildQueryPlanIfNeeded(); + + SubqueryForSet subquery_for_set; + subquery_for_set.set = planner_set.getSet(); + subquery_for_set.source = std::make_unique(std::move(subquery_planner).extractQueryPlan()); + + subqueries_for_sets.emplace(key, std::move(subquery_for_set)); + } + + addCreatingSetsStep(query_plan, std::move(subqueries_for_sets), planner_context->getQueryContext()); +} + +/// Extend lifetime of query context, storages, and table locks +void extendQueryContextAndStoragesLifetime(QueryPlan & query_plan, const PlannerContextPtr & planner_context) +{ + query_plan.addInterpreterContext(planner_context->getQueryContext()); + + for (const auto & [table_expression, _] : planner_context->getTableExpressionNodeToData()) + { + if (auto * table_node = table_expression->as()) + { + query_plan.addStorageHolder(table_node->getStorage()); + query_plan.addTableLock(table_node->getStorageLock()); + } + else if (auto * table_function_node = table_expression->as()) + { + query_plan.addStorageHolder(table_function_node->getStorage()); + } + } +} + +} + +Planner::Planner(const QueryTreeNodePtr & query_tree_, + const SelectQueryOptions & select_query_options_, + ContextPtr context_) + : query_tree(query_tree_) + , select_query_options(select_query_options_) + , planner_context(std::make_shared(std::move(context_), std::make_shared())) +{ + initialize(); +} + +Planner::Planner(const QueryTreeNodePtr & query_tree_, + const SelectQueryOptions & select_query_options_, + ContextPtr context_, + GlobalPlannerContextPtr global_planner_context_) + : query_tree(query_tree_) + , select_query_options(select_query_options_) + , planner_context(std::make_shared(std::move(context_), std::move(global_planner_context_))) +{ + initialize(); +} + +void Planner::initialize() +{ + checkStackSize(); + + if (query_tree->getNodeType() != QueryTreeNodeType::QUERY && + query_tree->getNodeType() != QueryTreeNodeType::UNION) + throw Exception(ErrorCodes::UNSUPPORTED_METHOD, + "Expected QUERY or UNION node. Actual {}", + query_tree->formatASTForErrorMessage()); + + auto & query_context = planner_context->getQueryContext(); + + size_t max_subquery_depth = query_context->getSettingsRef().max_subquery_depth; + if (max_subquery_depth && select_query_options.subquery_depth > max_subquery_depth) + throw Exception(ErrorCodes::TOO_DEEP_SUBQUERIES, + "Too deep subqueries. Maximum: {}", + max_subquery_depth); + + auto * query_node = query_tree->as(); + if (!query_node) + return; + + bool need_apply_query_settings = query_node->hasSettingsChanges(); + + const auto & client_info = query_context->getClientInfo(); + auto min_major = static_cast(DBMS_MIN_MAJOR_VERSION_WITH_CURRENT_AGGREGATION_VARIANT_SELECTION_METHOD); + auto min_minor = static_cast(DBMS_MIN_MINOR_VERSION_WITH_CURRENT_AGGREGATION_VARIANT_SELECTION_METHOD); + + bool need_to_disable_two_level_aggregation = client_info.query_kind == ClientInfo::QueryKind::SECONDARY_QUERY && + client_info.connection_client_version_major < min_major && + client_info.connection_client_version_minor < min_minor; + + if (need_apply_query_settings || need_to_disable_two_level_aggregation) + { + auto updated_context = Context::createCopy(query_context); + + if (need_apply_query_settings) + updated_context->applySettingsChanges(query_node->getSettingsChanges()); + + /// Disable two-level aggregation due to version incompatibility + if (need_to_disable_two_level_aggregation) + { + updated_context->setSetting("group_by_two_level_threshold", Field(0)); + updated_context->setSetting("group_by_two_level_threshold_bytes", Field(0)); + } + + query_context = std::move(updated_context); + } +} + +void Planner::buildQueryPlanIfNeeded() +{ + if (query_plan.isInitialized()) + return; + + auto query_context = planner_context->getQueryContext(); + + if (auto * union_query_tree = query_tree->as()) + { + auto union_mode = union_query_tree->getUnionMode(); + if (union_mode == SelectUnionMode::UNION_DEFAULT || + union_mode == SelectUnionMode::EXCEPT_DEFAULT || + union_mode == SelectUnionMode::INTERSECT_DEFAULT) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "UNION mode must be initialized"); + + size_t queries_size = union_query_tree->getQueries().getNodes().size(); + + std::vector> query_plans; + query_plans.reserve(queries_size); + + Blocks query_plans_headers; + query_plans_headers.reserve(queries_size); + + for (auto & query_node : union_query_tree->getQueries().getNodes()) + { + Planner query_planner(query_node, select_query_options, query_context); + query_planner.buildQueryPlanIfNeeded(); + auto query_node_plan = std::make_unique(std::move(query_planner).extractQueryPlan()); + query_plans_headers.push_back(query_node_plan->getCurrentDataStream().header); + query_plans.push_back(std::move(query_node_plan)); + } + + Block union_common_header = buildCommonHeaderForUnion(query_plans_headers); + DataStreams query_plans_streams; + query_plans_streams.reserve(query_plans.size()); + + for (auto & query_node_plan : query_plans) + { + if (blocksHaveEqualStructure(query_node_plan->getCurrentDataStream().header, union_common_header)) + { + query_plans_streams.push_back(query_node_plan->getCurrentDataStream()); + continue; + } + + auto actions_dag = ActionsDAG::makeConvertingActions( + query_node_plan->getCurrentDataStream().header.getColumnsWithTypeAndName(), + union_common_header.getColumnsWithTypeAndName(), + ActionsDAG::MatchColumnsMode::Position); + auto converting_step = std::make_unique(query_node_plan->getCurrentDataStream(), std::move(actions_dag)); + converting_step->setStepDescription("Conversion before UNION"); + query_node_plan->addStep(std::move(converting_step)); + + query_plans_streams.push_back(query_node_plan->getCurrentDataStream()); + } + + const auto & settings = query_context->getSettingsRef(); + auto max_threads = settings.max_threads; + + bool is_distinct = union_mode == SelectUnionMode::UNION_DISTINCT || union_mode == SelectUnionMode::INTERSECT_DISTINCT || + union_mode == SelectUnionMode::EXCEPT_DISTINCT; + + if (union_mode == SelectUnionMode::UNION_ALL || union_mode == SelectUnionMode::UNION_DISTINCT) + { + auto union_step = std::make_unique(std::move(query_plans_streams), max_threads); + query_plan.unitePlans(std::move(union_step), std::move(query_plans)); + } + else if (union_mode == SelectUnionMode::INTERSECT_ALL || union_mode == SelectUnionMode::INTERSECT_DISTINCT || + union_mode == SelectUnionMode::EXCEPT_ALL || union_mode == SelectUnionMode::EXCEPT_DISTINCT) + { + IntersectOrExceptStep::Operator intersect_or_except_operator = IntersectOrExceptStep::Operator::UNKNOWN; + + if (union_mode == SelectUnionMode::INTERSECT_ALL) + intersect_or_except_operator = IntersectOrExceptStep::Operator::INTERSECT_ALL; + else if (union_mode == SelectUnionMode::INTERSECT_DISTINCT) + intersect_or_except_operator = IntersectOrExceptStep::Operator::INTERSECT_DISTINCT; + else if (union_mode == SelectUnionMode::EXCEPT_ALL) + intersect_or_except_operator = IntersectOrExceptStep::Operator::EXCEPT_ALL; + else if (union_mode == SelectUnionMode::EXCEPT_DISTINCT) + intersect_or_except_operator = IntersectOrExceptStep::Operator::EXCEPT_DISTINCT; + + auto union_step = std::make_unique(std::move(query_plans_streams), intersect_or_except_operator, max_threads); + query_plan.unitePlans(std::move(union_step), std::move(query_plans)); + } + + if (is_distinct) + { + /// Add distinct transform + SizeLimits limits(settings.max_rows_in_distinct, settings.max_bytes_in_distinct, settings.distinct_overflow_mode); + + auto distinct_step = std::make_unique( + query_plan.getCurrentDataStream(), + limits, + 0 /*limit hint*/, + query_plan.getCurrentDataStream().header.getNames(), + false /*pre distinct*/, + settings.optimize_distinct_in_order); + + query_plan.addStep(std::move(distinct_step)); + } + + return; + } + + auto & query_node = query_tree->as(); + + if (query_node.hasPrewhere()) + { + if (query_node.hasWhere()) + { + auto function_node = std::make_shared("and"); + auto and_function = FunctionFactory::instance().get("and", query_context); + function_node->resolveAsFunction(std::move(and_function), std::make_shared()); + function_node->getArguments().getNodes() = {query_node.getPrewhere(), query_node.getWhere()}; + query_node.getWhere() = std::move(function_node); + query_node.getPrewhere() = {}; + } + else + { + query_node.getWhere() = query_node.getPrewhere(); + } + } + + SelectQueryInfo select_query_info; + select_query_info.original_query = queryNodeToSelectQuery(query_tree); + select_query_info.query = select_query_info.original_query; + select_query_info.planner_context = planner_context; + + StorageLimitsList storage_limits; + storage_limits.push_back(buildStorageLimits(*query_context, select_query_options)); + select_query_info.storage_limits = std::make_shared(storage_limits); + + collectTableExpressionData(query_tree, *planner_context); + checkStoragesSupportTransactions(planner_context); + + collectSets(query_tree, *planner_context); + + query_plan = buildQueryPlanForJoinTreeNode(query_node.getJoinTree(), select_query_info, select_query_options, planner_context); + auto expression_analysis_result = buildExpressionAnalysisResult(query_tree, query_plan.getCurrentDataStream().header.getColumnsWithTypeAndName(), planner_context); + + if (expression_analysis_result.hasWhere()) + { + const auto & where_analysis_result = expression_analysis_result.getWhere(); + auto where_step = std::make_unique(query_plan.getCurrentDataStream(), + where_analysis_result.filter_actions, + where_analysis_result.filter_column_name, + where_analysis_result.remove_filter_column); + where_step->setStepDescription("WHERE"); + query_plan.addStep(std::move(where_step)); + } + + bool having_executed = false; + + if (expression_analysis_result.hasAggregation()) + { + const auto & aggregation_analysis_result = expression_analysis_result.getAggregation(); + + if (aggregation_analysis_result.before_aggregation_actions) + { + auto expression_before_aggregation = std::make_unique(query_plan.getCurrentDataStream(), aggregation_analysis_result.before_aggregation_actions); + expression_before_aggregation->setStepDescription("Before GROUP BY"); + query_plan.addStep(std::move(expression_before_aggregation)); + } + + const Settings & settings = planner_context->getQueryContext()->getSettingsRef(); + + const auto stats_collecting_params = Aggregator::Params::StatsCollectingParams( + select_query_info.query, + settings.collect_hash_table_stats_during_aggregation, + settings.max_entries_for_hash_table_stats, + settings.max_size_to_preallocate_for_aggregation); + + bool aggregate_overflow_row = + query_node.isGroupByWithTotals() && + settings.max_rows_to_group_by && + settings.group_by_overflow_mode == OverflowMode::ANY && + settings.totals_mode != TotalsMode::AFTER_HAVING_EXCLUSIVE; + + Aggregator::Params aggregator_params = Aggregator::Params( + aggregation_analysis_result.aggregation_keys, + aggregation_analysis_result.aggregate_descriptions, + aggregate_overflow_row, + settings.max_rows_to_group_by, + settings.group_by_overflow_mode, + settings.group_by_two_level_threshold, + settings.group_by_two_level_threshold_bytes, + settings.max_bytes_before_external_group_by, + settings.empty_result_for_aggregation_by_empty_set + || (settings.empty_result_for_aggregation_by_constant_keys_on_empty_set && aggregation_analysis_result.aggregation_keys.empty() + && aggregation_analysis_result.group_by_with_constant_keys), + planner_context->getQueryContext()->getTempDataOnDisk(), + settings.max_threads, + settings.min_free_disk_space_for_temporary_data, + settings.compile_aggregate_expressions, + settings.min_count_to_compile_aggregate_expression, + settings.max_block_size, + settings.enable_software_prefetch_in_aggregation, + /* only_merge */ false, + stats_collecting_params + ); + + SortDescription group_by_sort_description; + + auto merge_threads = settings.max_threads; + auto temporary_data_merge_threads = settings.aggregation_memory_efficient_merge_threads + ? static_cast(settings.aggregation_memory_efficient_merge_threads) + : static_cast(settings.max_threads); + + bool storage_has_evenly_distributed_read = false; + const auto & table_expression_node_to_data = planner_context->getTableExpressionNodeToData(); + + if (table_expression_node_to_data.size() == 1) + { + auto it = table_expression_node_to_data.begin(); + const auto & table_expression_node = it->first; + if (const auto * table_node = table_expression_node->as()) + storage_has_evenly_distributed_read = table_node->getStorage()->hasEvenlyDistributedRead(); + else if (const auto * table_function_node = table_expression_node->as()) + storage_has_evenly_distributed_read = table_function_node->getStorageOrThrow()->hasEvenlyDistributedRead(); + } + + const bool should_produce_results_in_order_of_bucket_number + = select_query_options.to_stage == QueryProcessingStage::WithMergeableState && settings.distributed_aggregation_memory_efficient; + + InputOrderInfoPtr input_order_info; + bool aggregate_final = + select_query_options.to_stage > QueryProcessingStage::WithMergeableState && + !query_node.isGroupByWithTotals() && !query_node.isGroupByWithRollup() && !query_node.isGroupByWithCube(); + + auto aggregating_step = std::make_unique( + query_plan.getCurrentDataStream(), + aggregator_params, + aggregation_analysis_result.grouping_sets_parameters_list, + aggregate_final, + settings.max_block_size, + settings.aggregation_in_order_max_block_bytes, + merge_threads, + temporary_data_merge_threads, + storage_has_evenly_distributed_read, + settings.group_by_use_nulls, + std::move(input_order_info), + std::move(group_by_sort_description), + should_produce_results_in_order_of_bucket_number); + query_plan.addStep(std::move(aggregating_step)); + + if (query_node.isGroupByWithRollup()) + { + auto rollup_step = std::make_unique(query_plan.getCurrentDataStream(), std::move(aggregator_params), true /*final*/, settings.group_by_use_nulls); + query_plan.addStep(std::move(rollup_step)); + } + else if (query_node.isGroupByWithCube()) + { + auto cube_step = std::make_unique(query_plan.getCurrentDataStream(), std::move(aggregator_params), true /*final*/, settings.group_by_use_nulls); + query_plan.addStep(std::move(cube_step)); + } + + if (query_node.isGroupByWithTotals()) + { + const auto & having_analysis_result = expression_analysis_result.getHaving(); + bool final = !query_node.isGroupByWithRollup() && !query_node.isGroupByWithCube(); + having_executed = true; + + auto totals_having_step = std::make_unique( + query_plan.getCurrentDataStream(), + aggregation_analysis_result.aggregate_descriptions, + aggregate_overflow_row, + having_analysis_result.filter_actions, + having_analysis_result.filter_column_name, + having_analysis_result.remove_filter_column, + settings.totals_mode, + settings.totals_auto_threshold, + final); + + query_plan.addStep(std::move(totals_having_step)); + } + } + + if (!having_executed && expression_analysis_result.hasHaving()) + { + const auto & having_analysis_result = expression_analysis_result.getHaving(); + + auto having_step = std::make_unique(query_plan.getCurrentDataStream(), + having_analysis_result.filter_actions, + having_analysis_result.filter_column_name, + having_analysis_result.remove_filter_column); + having_step->setStepDescription("HAVING"); + query_plan.addStep(std::move(having_step)); + } + + if (expression_analysis_result.hasWindow()) + { + const auto & window_analysis_result = expression_analysis_result.getWindow(); + + if (window_analysis_result.before_window_actions) + { + auto expression_step_before_window = std::make_unique(query_plan.getCurrentDataStream(), window_analysis_result.before_window_actions); + expression_step_before_window->setStepDescription("Before WINDOW"); + query_plan.addStep(std::move(expression_step_before_window)); + } + + auto window_descriptions = window_analysis_result.window_descriptions; + sortWindowDescriptions(window_descriptions); + + size_t window_descriptions_size = window_descriptions.size(); + + const auto & settings = query_context->getSettingsRef(); + for (size_t i = 0; i < window_descriptions_size; ++i) + { + const auto & window_description = window_descriptions[i]; + + /** We don't need to sort again if the input from previous window already + * has suitable sorting. Also don't create sort steps when there are no + * columns to sort by, because the sort nodes are confused by this. It + * happens in case of `over ()`. + */ + if (!window_description.full_sort_description.empty() && + (i == 0 || !sortDescriptionIsPrefix(window_description.full_sort_description, window_descriptions[i - 1].full_sort_description))) + { + auto sorting_step = std::make_unique( + query_plan.getCurrentDataStream(), + window_description.full_sort_description, + settings.max_block_size, + 0 /*limit*/, + SizeLimits(settings.max_rows_to_sort, settings.max_bytes_to_sort, settings.sort_overflow_mode), + settings.max_bytes_before_remerge_sort, + settings.remerge_sort_lowered_memory_bytes_ratio, + settings.max_bytes_before_external_sort, + query_context->getTempDataOnDisk(), + settings.min_free_disk_space_for_temporary_data, + settings.optimize_sorting_by_input_stream_properties); + + sorting_step->setStepDescription("Sorting for window '" + window_description.window_name + "'"); + query_plan.addStep(std::move(sorting_step)); + } + + auto window_step = std::make_unique(query_plan.getCurrentDataStream(), window_description, window_description.window_functions); + window_step->setStepDescription("Window step for window '" + window_description.window_name + "'"); + query_plan.addStep(std::move(window_step)); + } + } + + const auto & projection_analysis_result = expression_analysis_result.getProjection(); + auto expression_step_projection = std::make_unique(query_plan.getCurrentDataStream(), projection_analysis_result.projection_actions); + expression_step_projection->setStepDescription("Projection"); + query_plan.addStep(std::move(expression_step_projection)); + + UInt64 limit_offset = 0; + if (query_node.hasOffset()) + { + /// Constness of offset is validated during query analysis stage + limit_offset = query_node.getOffset()->getConstantValue().getValue().safeGet(); + } + + UInt64 limit_length = 0; + + if (query_node.hasLimit()) + { + /// Constness of limit is validated during query analysis stage + limit_length = query_node.getLimit()->getConstantValue().getValue().safeGet(); + } + + if (query_node.isDistinct()) + { + const Settings & settings = planner_context->getQueryContext()->getSettingsRef(); + UInt64 limit_hint_for_distinct = 0; + bool pre_distinct = true; + + SizeLimits limits(settings.max_rows_in_distinct, settings.max_bytes_in_distinct, settings.distinct_overflow_mode); + bool no_order_by = !query_node.hasOrderBy(); + + /** If after this stage of DISTINCT ORDER BY is not executed, + * then you can get no more than limit_length + limit_offset of different rows. + */ + if (no_order_by && limit_length <= std::numeric_limits::max() - limit_offset) + limit_hint_for_distinct = limit_length + limit_offset; + + auto distinct_step = std::make_unique( + query_plan.getCurrentDataStream(), + limits, + limit_hint_for_distinct, + projection_analysis_result.projection_column_names, + pre_distinct, + settings.optimize_distinct_in_order); + + if (pre_distinct) + distinct_step->setStepDescription("Preliminary DISTINCT"); + else + distinct_step->setStepDescription("DISTINCT"); + + query_plan.addStep(std::move(distinct_step)); + } + + if (expression_analysis_result.hasSort()) + { + const auto & sort_analysis_result = expression_analysis_result.getSort(); + auto expression_step_before_order_by = std::make_unique(query_plan.getCurrentDataStream(), sort_analysis_result.before_order_by_actions); + expression_step_before_order_by->setStepDescription("Before ORDER BY"); + query_plan.addStep(std::move(expression_step_before_order_by)); + } + + QueryPlanStepPtr filling_step; + SortDescription sort_description; + + if (query_node.hasOrderBy()) + { + sort_description = extractSortDescription(query_node.getOrderByNode(), *planner_context); + + bool query_has_array_join_in_join_tree = queryHasArrayJoinInJoinTree(query_tree); + + UInt64 partial_sorting_limit = 0; + + /// Partial sort can be done if there is LIMIT, but no DISTINCT, LIMIT WITH TIES, LIMIT BY, ARRAY JOIN + if (limit_length != 0 && !query_node.isDistinct() && !query_node.hasLimitBy() && !query_node.isLimitWithTies() && + !query_has_array_join_in_join_tree && limit_length <= std::numeric_limits::max() - limit_offset) + { + partial_sorting_limit = limit_length + limit_offset; + } + + const Settings & settings = query_context->getSettingsRef(); + + /// Merge the sorted blocks + auto sorting_step = std::make_unique( + query_plan.getCurrentDataStream(), + sort_description, + settings.max_block_size, + partial_sorting_limit, + SizeLimits(settings.max_rows_to_sort, settings.max_bytes_to_sort, settings.sort_overflow_mode), + settings.max_bytes_before_remerge_sort, + settings.remerge_sort_lowered_memory_bytes_ratio, + settings.max_bytes_before_external_sort, + query_context->getTempDataOnDisk(), + settings.min_free_disk_space_for_temporary_data, + settings.optimize_sorting_by_input_stream_properties); + + sorting_step->setStepDescription("Sorting for ORDER BY"); + query_plan.addStep(std::move(sorting_step)); + + NameSet column_names_with_fill; + SortDescription fill_description; + for (auto & description : sort_description) + { + if (description.with_fill) + { + fill_description.push_back(description); + column_names_with_fill.insert(description.column_name); + } + } + + if (!fill_description.empty()) + { + InterpolateDescriptionPtr interpolate_description; + + if (query_node.hasInterpolate()) + { + auto interpolate_actions_dag = std::make_shared(); + + auto & interpolate_list_node = query_node.getInterpolate()->as(); + auto & interpolate_list_nodes = interpolate_list_node.getNodes(); + + if (interpolate_list_nodes.empty()) + { + auto query_plan_columns = query_plan.getCurrentDataStream().header.getColumnsWithTypeAndName(); + for (auto & query_plan_column : query_plan_columns) + { + if (column_names_with_fill.contains(query_plan_column.name)) + continue; + + const auto * input_action_node = &interpolate_actions_dag->addInput(query_plan_column); + interpolate_actions_dag->getOutputs().push_back(input_action_node); + } + } + else + { + for (auto & interpolate_node : interpolate_list_nodes) + { + auto & interpolate_node_typed = interpolate_node->as(); + + PlannerActionsVisitor planner_actions_visitor(planner_context); + auto expression_to_interpolate_expression_nodes = planner_actions_visitor.visit(interpolate_actions_dag, interpolate_node_typed.getExpression()); + auto interpolate_expression_nodes = planner_actions_visitor.visit(interpolate_actions_dag, interpolate_node_typed.getInterpolateExpression()); + + if (expression_to_interpolate_expression_nodes.size() != 1) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expression to interpolate expected to have single action node"); + + if (interpolate_expression_nodes.size() != 1) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Interpolate expression expected to have single action node"); + + const auto * expression_to_interpolate = expression_to_interpolate_expression_nodes[0]; + const auto & expression_to_interpolate_name = expression_to_interpolate->result_name; + + const auto * interpolate_expression = interpolate_expression_nodes[0]; + if (!interpolate_expression->result_type->equals(*expression_to_interpolate->result_type)) + { + auto cast_type_name = expression_to_interpolate->result_type->getName(); + Field cast_type_constant_value(cast_type_name); + + ColumnWithTypeAndName column; + column.name = calculateConstantActionNodeName(cast_type_name); + column.column = DataTypeString().createColumnConst(0, cast_type_constant_value); + column.type = std::make_shared(); + + const auto * cast_type_constant_node = &interpolate_actions_dag->addColumn(std::move(column)); + + FunctionCastBase::Diagnostic diagnostic = {interpolate_expression->result_name, interpolate_expression->result_name}; + FunctionOverloadResolverPtr func_builder_cast + = CastInternalOverloadResolver::createImpl(std::move(diagnostic)); + + ActionsDAG::NodeRawConstPtrs children = {interpolate_expression, cast_type_constant_node}; + interpolate_expression = &interpolate_actions_dag->addFunction(func_builder_cast, std::move(children), interpolate_expression->result_name); + } + + const auto * alias_node = &interpolate_actions_dag->addAlias(*interpolate_expression, expression_to_interpolate_name); + interpolate_actions_dag->getOutputs().push_back(alias_node); + } + + interpolate_actions_dag->removeUnusedActions(); + } + + Aliases empty_aliases; + interpolate_description = std::make_shared(std::move(interpolate_actions_dag), empty_aliases); + } + + filling_step = std::make_unique(query_plan.getCurrentDataStream(), std::move(fill_description), interpolate_description); + } + } + + if (expression_analysis_result.hasLimitBy()) + { + const auto & limit_by_analysis_result = expression_analysis_result.getLimitBy(); + auto expression_step_before_limit_by = std::make_unique(query_plan.getCurrentDataStream(), limit_by_analysis_result.before_limit_by_actions); + expression_step_before_limit_by->setStepDescription("Before LIMIT BY"); + query_plan.addStep(std::move(expression_step_before_limit_by)); + + /// Constness of LIMIT BY limit is validated during query analysis stage + UInt64 limit_by_limit = query_node.getLimitByLimit()->getConstantValue().getValue().safeGet(); + UInt64 limit_by_offset = 0; + + if (query_node.hasLimitByOffset()) + { + /// Constness of LIMIT BY offset is validated during query analysis stage + limit_by_offset = query_node.getLimitByOffset()->getConstantValue().getValue().safeGet(); + } + + auto limit_by_step = std::make_unique(query_plan.getCurrentDataStream(), + limit_by_limit, + limit_by_offset, + limit_by_analysis_result.limit_by_column_names); + query_plan.addStep(std::move(limit_by_step)); + } + + if (filling_step) + query_plan.addStep(std::move(filling_step)); + + if (query_context->getSettingsRef().extremes) + { + auto extremes_step = std::make_unique(query_plan.getCurrentDataStream()); + query_plan.addStep(std::move(extremes_step)); + } + + if (query_node.hasLimit()) + { + const Settings & settings = query_context->getSettingsRef(); + bool always_read_till_end = settings.exact_rows_before_limit; + bool limit_with_ties = query_node.isLimitWithTies(); + + /** Special cases: + * + * 1. If there is WITH TOTALS and there is no ORDER BY, then read the data to the end, + * otherwise TOTALS is counted according to incomplete data. + * + * 2. If there is no WITH TOTALS and there is a subquery in FROM, and there is WITH TOTALS on one of the levels, + * then when using LIMIT, you should read the data to the end, rather than cancel the query earlier, + * because if you cancel the query, we will not get `totals` data from the remote server. + */ + if (query_node.isGroupByWithTotals() && !query_node.hasOrderBy()) + always_read_till_end = true; + + if (!query_node.isGroupByWithTotals() && queryHasWithTotalsInAnySubqueryInJoinTree(query_tree)) + always_read_till_end = true; + + SortDescription limit_with_ties_sort_description; + + if (query_node.isLimitWithTies()) + { + /// Validated during parser stage + if (!query_node.hasOrderBy()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "LIMIT WITH TIES without ORDER BY"); + + limit_with_ties_sort_description = sort_description; + } + + auto limit = std::make_unique(query_plan.getCurrentDataStream(), + limit_length, + limit_offset, + always_read_till_end, + limit_with_ties, + limit_with_ties_sort_description); + + if (limit_with_ties) + limit->setStepDescription("LIMIT WITH TIES"); + + query_plan.addStep(std::move(limit)); + } + else if (query_node.hasOffset()) + { + auto offsets_step = std::make_unique(query_plan.getCurrentDataStream(), limit_offset); + query_plan.addStep(std::move(offsets_step)); + } + + auto projection_step = std::make_unique(query_plan.getCurrentDataStream(), projection_analysis_result.project_names_actions); + projection_step->setStepDescription("Project names"); + query_plan.addStep(std::move(projection_step)); + + addBuildSubqueriesForSetsStepIfNeeded(query_plan, select_query_options, planner_context); + extendQueryContextAndStoragesLifetime(query_plan, planner_context); +} + +} diff --git a/src/Planner/Planner.h b/src/Planner/Planner.h new file mode 100644 index 00000000000..03f8e19df56 --- /dev/null +++ b/src/Planner/Planner.h @@ -0,0 +1,59 @@ +#pragma once + +#include +#include + +#include +#include +#include + +namespace DB +{ + +class GlobalPlannerContext; +using GlobalPlannerContextPtr = std::shared_ptr; + +class PlannerContext; +using PlannerContextPtr = std::shared_ptr; + +class Planner +{ +public: + /// Initialize planner with query tree after analysis phase + Planner(const QueryTreeNodePtr & query_tree_, + const SelectQueryOptions & select_query_options_, + ContextPtr context_); + + /// Initialize planner with query tree after query analysis phase and global planner context + Planner(const QueryTreeNodePtr & query_tree_, + const SelectQueryOptions & select_query_options_, + ContextPtr context_, + GlobalPlannerContextPtr global_planner_context_); + + const QueryPlan & getQueryPlan() const + { + return query_plan; + } + + QueryPlan & getQueryPlan() + { + return query_plan; + } + + void buildQueryPlanIfNeeded(); + + QueryPlan && extractQueryPlan() && + { + return std::move(query_plan); + } + +private: + void initialize(); + + QueryTreeNodePtr query_tree; + QueryPlan query_plan; + SelectQueryOptions select_query_options; + PlannerContextPtr planner_context; +}; + +} diff --git a/src/Planner/PlannerActionsVisitor.cpp b/src/Planner/PlannerActionsVisitor.cpp new file mode 100644 index 00000000000..a6f1a74f251 --- /dev/null +++ b/src/Planner/PlannerActionsVisitor.cpp @@ -0,0 +1,765 @@ +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include + +#include +#include + +#include +#include + +#include +#include + +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int UNSUPPORTED_METHOD; + extern const int LOGICAL_ERROR; + extern const int BAD_ARGUMENTS; +} + +namespace +{ + +class ActionsScopeNode +{ +public: + explicit ActionsScopeNode(ActionsDAGPtr actions_dag_, QueryTreeNodePtr scope_node_) + : actions_dag(std::move(actions_dag_)) + , scope_node(std::move(scope_node_)) + { + for (const auto & node : actions_dag->getNodes()) + node_name_to_node[node.result_name] = &node; + } + + const QueryTreeNodePtr & getScopeNode() const + { + return scope_node; + } + + [[maybe_unused]] bool containsNode(const std::string & node_name) + { + return node_name_to_node.find(node_name) != node_name_to_node.end(); + } + + [[maybe_unused]] const ActionsDAG::Node * tryGetNode(const std::string & node_name) + { + auto it = node_name_to_node.find(node_name); + if (it == node_name_to_node.end()) + return {}; + + return it->second; + } + + const ActionsDAG::Node * getNodeOrThrow(const std::string & node_name) + { + auto it = node_name_to_node.find(node_name); + if (it == node_name_to_node.end()) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "No node with name {}. There are only nodes {}", + node_name, + actions_dag->dumpNames()); + + return it->second; + } + + const ActionsDAG::Node * addInputColumnIfNecessary(const std::string & node_name, const DataTypePtr & column_type) + { + auto it = node_name_to_node.find(node_name); + if (it != node_name_to_node.end()) + return it->second; + + const auto * node = &actions_dag->addInput(node_name, column_type); + node_name_to_node[node->result_name] = node; + + return node; + } + + const ActionsDAG::Node * addInputConstantColumnIfNecessary(const std::string & node_name, const ColumnWithTypeAndName & column) + { + auto it = node_name_to_node.find(node_name); + if (it != node_name_to_node.end()) + return it->second; + + const auto * node = &actions_dag->addInput(column); + node_name_to_node[node->result_name] = node; + + return node; + } + + const ActionsDAG::Node * addConstantIfNecessary(const std::string & node_name, const ColumnWithTypeAndName & column) + { + auto it = node_name_to_node.find(node_name); + if (it != node_name_to_node.end()) + return it->second; + + const auto * node = &actions_dag->addColumn(column); + node_name_to_node[node->result_name] = node; + + return node; + } + + const ActionsDAG::Node * addFunctionIfNecessary(const std::string & node_name, ActionsDAG::NodeRawConstPtrs children, FunctionOverloadResolverPtr function) + { + auto it = node_name_to_node.find(node_name); + if (it != node_name_to_node.end()) + return it->second; + + const auto * node = &actions_dag->addFunction(function, children, node_name); + node_name_to_node[node->result_name] = node; + + return node; + } + + const ActionsDAG::Node * addArrayJoinIfNecessary(const std::string & node_name, const ActionsDAG::Node * child) + { + auto it = node_name_to_node.find(node_name); + if (it != node_name_to_node.end()) + return it->second; + + const auto * node = &actions_dag->addArrayJoin(*child, node_name); + node_name_to_node[node->result_name] = node; + + return node; + } + +private: + std::unordered_map node_name_to_node; + ActionsDAGPtr actions_dag; + QueryTreeNodePtr scope_node; +}; + +class PlannerActionsVisitorImpl +{ +public: + PlannerActionsVisitorImpl(ActionsDAGPtr actions_dag, const PlannerContextPtr & planner_context_); + + ActionsDAG::NodeRawConstPtrs visit(QueryTreeNodePtr expression_node); + +private: + using NodeNameAndNodeMinLevel = std::pair; + + NodeNameAndNodeMinLevel visitImpl(QueryTreeNodePtr node); + + NodeNameAndNodeMinLevel visitColumn(const QueryTreeNodePtr & node); + + NodeNameAndNodeMinLevel visitConstantValue(const Field & constant_literal, const DataTypePtr & constant_type); + + NodeNameAndNodeMinLevel visitConstant(const QueryTreeNodePtr & node); + + NodeNameAndNodeMinLevel visitLambda(const QueryTreeNodePtr & node); + + NodeNameAndNodeMinLevel makeSetForInFunction(const QueryTreeNodePtr & node); + + NodeNameAndNodeMinLevel visitFunction(const QueryTreeNodePtr & node); + + NodeNameAndNodeMinLevel visitQueryOrUnion(const QueryTreeNodePtr & node); + + std::vector actions_stack; + std::unordered_map node_to_node_name; + const PlannerContextPtr planner_context; +}; + +PlannerActionsVisitorImpl::PlannerActionsVisitorImpl(ActionsDAGPtr actions_dag, const PlannerContextPtr & planner_context_) + : planner_context(planner_context_) +{ + actions_stack.emplace_back(std::move(actions_dag), nullptr); +} + +ActionsDAG::NodeRawConstPtrs PlannerActionsVisitorImpl::visit(QueryTreeNodePtr expression_node) +{ + ActionsDAG::NodeRawConstPtrs result; + + if (auto * expression_list_node = expression_node->as()) + { + for (auto & node : expression_list_node->getNodes()) + { + auto [node_name, _] = visitImpl(node); + result.push_back(actions_stack.front().getNodeOrThrow(node_name)); + } + } + else + { + auto [node_name, _] = visitImpl(expression_node); + result.push_back(actions_stack.front().getNodeOrThrow(node_name)); + } + + return result; +} + +PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::visitImpl(QueryTreeNodePtr node) +{ + auto node_type = node->getNodeType(); + + if (node_type == QueryTreeNodeType::COLUMN) + return visitColumn(node); + else if (node_type == QueryTreeNodeType::CONSTANT) + return visitConstant(node); + else if (node_type == QueryTreeNodeType::FUNCTION) + return visitFunction(node); + else if (node_type == QueryTreeNodeType::QUERY || node_type == QueryTreeNodeType::UNION) + return visitQueryOrUnion(node); + + throw Exception(ErrorCodes::UNSUPPORTED_METHOD, + "Expected column, constant, function, query or union node. Actual {}", + node->formatASTForErrorMessage()); +} + +PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::visitColumn(const QueryTreeNodePtr & node) +{ + auto column_node_name = calculateActionNodeName(node, *planner_context, node_to_node_name); + const auto & column_node = node->as(); + + Int64 actions_stack_size = static_cast(actions_stack.size() - 1); + for (Int64 i = actions_stack_size; i >= 0; --i) + { + actions_stack[i].addInputColumnIfNecessary(column_node_name, column_node.getColumnType()); + + auto column_source = column_node.getColumnSourceOrNull(); + if (column_source && + column_source->getNodeType() == QueryTreeNodeType::LAMBDA && + actions_stack[i].getScopeNode().get() == column_source.get()) + { + return {column_node_name, i}; + } + } + + return {column_node_name, 0}; +} + +PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::visitConstantValue(const Field & constant_literal, const DataTypePtr & constant_type) +{ + auto constant_node_name = calculateConstantActionNodeName(constant_literal, constant_type); + + ColumnWithTypeAndName column; + column.name = constant_node_name; + column.type = constant_type; + column.column = column.type->createColumnConst(1, constant_literal); + + actions_stack[0].addConstantIfNecessary(constant_node_name, column); + + size_t actions_stack_size = actions_stack.size(); + for (size_t i = 1; i < actions_stack_size; ++i) + { + auto & actions_stack_node = actions_stack[i]; + actions_stack_node.addInputConstantColumnIfNecessary(constant_node_name, column); + } + + return {constant_node_name, 0}; +} + +PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::visitConstant(const QueryTreeNodePtr & node) +{ + const auto & constant_node = node->as(); + return visitConstantValue(constant_node.getValue(), constant_node.getResultType()); +} + +PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::visitLambda(const QueryTreeNodePtr & node) +{ + auto & lambda_node = node->as(); + auto result_type = lambda_node.getResultType(); + if (!result_type) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Lambda {} is not resolved during query analysis", + lambda_node.formatASTForErrorMessage()); + + auto & lambda_arguments_nodes = lambda_node.getArguments().getNodes(); + size_t lambda_arguments_nodes_size = lambda_arguments_nodes.size(); + + NamesAndTypesList lambda_arguments_names_and_types; + + for (size_t i = 0; i < lambda_arguments_nodes_size; ++i) + { + const auto & lambda_argument_name = lambda_node.getArgumentNames().at(i); + auto lambda_argument_type = lambda_arguments_nodes[i]->getResultType(); + lambda_arguments_names_and_types.emplace_back(lambda_argument_name, std::move(lambda_argument_type)); + } + + auto lambda_actions_dag = std::make_shared(); + actions_stack.emplace_back(lambda_actions_dag, node); + + auto [lambda_expression_node_name, level] = visitImpl(lambda_node.getExpression()); + lambda_actions_dag->getOutputs().push_back(actions_stack.back().getNodeOrThrow(lambda_expression_node_name)); + lambda_actions_dag->removeUnusedActions(Names(1, lambda_expression_node_name)); + + auto expression_actions_settings = ExpressionActionsSettings::fromContext(planner_context->getQueryContext(), CompileExpressions::yes); + auto lambda_actions = std::make_shared(lambda_actions_dag, expression_actions_settings); + + Names captured_column_names; + ActionsDAG::NodeRawConstPtrs lambda_children; + Names required_column_names = lambda_actions->getRequiredColumns(); + + if (level == actions_stack.size() - 1) + --level; + + const auto & lambda_argument_names = lambda_node.getArgumentNames(); + + for (const auto & required_column_name : required_column_names) + { + auto it = std::find(lambda_argument_names.begin(), lambda_argument_names.end(), required_column_name); + + if (it == lambda_argument_names.end()) + { + lambda_children.push_back(actions_stack[level].getNodeOrThrow(required_column_name)); + captured_column_names.push_back(required_column_name); + } + } + + auto lambda_node_name = calculateActionNodeName(node, *planner_context); + auto function_capture = std::make_shared( + lambda_actions, captured_column_names, lambda_arguments_names_and_types, result_type, lambda_expression_node_name); + actions_stack.pop_back(); + + actions_stack[level].addFunctionIfNecessary(lambda_node_name, std::move(lambda_children), std::move(function_capture)); + + size_t actions_stack_size = actions_stack.size(); + for (size_t i = level + 1; i < actions_stack_size; ++i) + { + auto & actions_stack_node = actions_stack[i]; + actions_stack_node.addInputColumnIfNecessary(lambda_node_name, result_type); + } + + return {lambda_node_name, level}; +} + +PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::makeSetForInFunction(const QueryTreeNodePtr & node) +{ + const auto & function_node = node->as(); + auto in_second_argument = function_node.getArguments().getNodes().at(1); + + auto set_key = planner_context->createSetKey(in_second_argument); + const auto & planner_set = planner_context->getSetOrThrow(set_key); + + ColumnWithTypeAndName column; + column.name = set_key; + column.type = std::make_shared(); + + bool set_is_created = planner_set.getSet()->isCreated(); + auto column_set = ColumnSet::create(1, planner_set.getSet()); + + if (set_is_created) + column.column = ColumnConst::create(std::move(column_set), 1); + else + column.column = std::move(column_set); + + actions_stack[0].addConstantIfNecessary(set_key, column); + + size_t actions_stack_size = actions_stack.size(); + for (size_t i = 1; i < actions_stack_size; ++i) + { + auto & actions_stack_node = actions_stack[i]; + actions_stack_node.addInputConstantColumnIfNecessary(set_key, column); + } + + node_to_node_name.emplace(in_second_argument, set_key); + + return {set_key, 0}; +} + +PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::visitFunction(const QueryTreeNodePtr & node) +{ + const auto & function_node = node->as(); + if (const auto constant_value_or_null = function_node.getConstantValueOrNull()) + return visitConstantValue(constant_value_or_null->getValue(), constant_value_or_null->getType()); + + std::optional in_function_second_argument_node_name_with_level; + + if (isNameOfInFunction(function_node.getFunctionName())) + in_function_second_argument_node_name_with_level = makeSetForInFunction(node); + + const auto & function_arguments = function_node.getArguments().getNodes(); + size_t function_arguments_size = function_arguments.size(); + + Names function_arguments_node_names; + function_arguments_node_names.reserve(function_arguments_size); + + size_t level = 0; + for (size_t function_argument_index = 0; function_argument_index < function_arguments_size; ++function_argument_index) + { + if (in_function_second_argument_node_name_with_level && function_argument_index == 1) + { + auto & [node_name, node_min_level] = *in_function_second_argument_node_name_with_level; + function_arguments_node_names.push_back(std::move(node_name)); + level = std::max(level, node_min_level); + continue; + } + + const auto & argument = function_arguments[function_argument_index]; + + if (argument->getNodeType() == QueryTreeNodeType::LAMBDA) + { + auto [node_name, node_min_level] = visitLambda(argument); + function_arguments_node_names.push_back(std::move(node_name)); + level = std::max(level, node_min_level); + continue; + } + + auto [node_name, node_min_level] = visitImpl(argument); + function_arguments_node_names.push_back(std::move(node_name)); + level = std::max(level, node_min_level); + } + + auto function_node_name = calculateActionNodeName(node, *planner_context, node_to_node_name); + + if (function_node.isAggregateFunction() || function_node.isWindowFunction()) + { + size_t actions_stack_size = actions_stack.size(); + + for (size_t i = 0; i < actions_stack_size; ++i) + { + auto & actions_stack_node = actions_stack[i]; + actions_stack_node.addInputColumnIfNecessary(function_node_name, function_node.getResultType()); + } + + return {function_node_name, 0}; + } + + ActionsDAG::NodeRawConstPtrs children; + children.reserve(function_arguments_size); + + for (auto & function_argument_node_name : function_arguments_node_names) + children.push_back(actions_stack[level].getNodeOrThrow(function_argument_node_name)); + + if (function_node.getFunctionName() == "arrayJoin") + { + if (level != 0) + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Expression in arrayJoin cannot depend on lambda argument: {} ", + function_arguments_node_names.at(0)); + + actions_stack[level].addArrayJoinIfNecessary(function_node_name, children.at(0)); + } + else + { + actions_stack[level].addFunctionIfNecessary(function_node_name, children, function_node.getFunction()); + } + + size_t actions_stack_size = actions_stack.size(); + for (size_t i = level + 1; i < actions_stack_size; ++i) + { + auto & actions_stack_node = actions_stack[i]; + actions_stack_node.addInputColumnIfNecessary(function_node_name, function_node.getResultType()); + } + + return {function_node_name, level}; +} + +PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::visitQueryOrUnion(const QueryTreeNodePtr & node) +{ + const auto constant_value = node->getConstantValueOrNull(); + if (!constant_value) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Scalar subqueries must be evaluated as constants"); + + return visitConstantValue(constant_value->getValue(), constant_value->getType()); +} + +} + +PlannerActionsVisitor::PlannerActionsVisitor(const PlannerContextPtr & planner_context_) + : planner_context(planner_context_) +{} + +ActionsDAG::NodeRawConstPtrs PlannerActionsVisitor::visit(ActionsDAGPtr actions_dag, QueryTreeNodePtr expression_node) +{ + PlannerActionsVisitorImpl actions_visitor_impl(actions_dag, planner_context); + return actions_visitor_impl.visit(expression_node); +} + +String calculateActionNodeName(const QueryTreeNodePtr & node, const PlannerContext & planner_context, QueryTreeNodeToName & node_to_name) +{ + auto it = node_to_name.find(node); + if (it != node_to_name.end()) + return it->second; + + String result; + auto node_type = node->getNodeType(); + + switch (node_type) + { + case QueryTreeNodeType::COLUMN: + { + const auto * column_identifier = planner_context.getColumnNodeIdentifierOrNull(node); + + if (column_identifier) + { + result = *column_identifier; + } + else + { + const auto & column_node = node->as(); + result = column_node.getColumnName(); + } + + break; + } + case QueryTreeNodeType::CONSTANT: + { + const auto & constant_node = node->as(); + result = calculateConstantActionNodeName(constant_node.getValue(), constant_node.getResultType()); + break; + } + case QueryTreeNodeType::FUNCTION: + { + if (auto node_constant_value = node->getConstantValueOrNull()) + { + result = calculateConstantActionNodeName(node_constant_value->getValue(), node_constant_value->getType()); + } + else + { + const auto & function_node = node->as(); + String in_function_second_argument_node_name; + + if (isNameOfInFunction(function_node.getFunctionName())) + { + const auto & in_second_argument_node = function_node.getArguments().getNodes().at(1); + in_function_second_argument_node_name = planner_context.createSetKey(in_second_argument_node); + } + + WriteBufferFromOwnString buffer; + buffer << function_node.getFunctionName(); + + const auto & function_parameters_nodes = function_node.getParameters().getNodes(); + + if (!function_parameters_nodes.empty()) + { + buffer << '('; + + size_t function_parameters_nodes_size = function_parameters_nodes.size(); + for (size_t i = 0; i < function_parameters_nodes_size; ++i) + { + const auto & function_parameter_node = function_parameters_nodes[i]; + buffer << calculateActionNodeName(function_parameter_node, planner_context, node_to_name); + + if (i + 1 != function_parameters_nodes_size) + buffer << ", "; + } + + buffer << ')'; + } + + const auto & function_arguments_nodes = function_node.getArguments().getNodes(); + String function_argument_name; + + buffer << '('; + + size_t function_arguments_nodes_size = function_arguments_nodes.size(); + for (size_t i = 0; i < function_arguments_nodes_size; ++i) + { + if (i == 1 && !in_function_second_argument_node_name.empty()) + { + function_argument_name = in_function_second_argument_node_name; + } + else + { + const auto & function_argument_node = function_arguments_nodes[i]; + function_argument_name = calculateActionNodeName(function_argument_node, planner_context, node_to_name); + } + + buffer << function_argument_name; + + if (i + 1 != function_arguments_nodes_size) + buffer << ", "; + } + + buffer << ')'; + + if (function_node.isWindowFunction()) + { + buffer << " OVER ("; + buffer << calculateWindowNodeActionName(function_node.getWindowNode(), planner_context, node_to_name); + buffer << ')'; + } + + result = buffer.str(); + } + break; + } + case QueryTreeNodeType::UNION: + [[fallthrough]]; + case QueryTreeNodeType::QUERY: + { + if (auto node_constant_value = node->getConstantValueOrNull()) + { + result = calculateConstantActionNodeName(node_constant_value->getValue(), node_constant_value->getType()); + } + else + { + auto query_hash = node->getTreeHash(); + result = "__subquery_" + std::to_string(query_hash.first) + '_' + std::to_string(query_hash.second); + } + break; + } + case QueryTreeNodeType::LAMBDA: + { + auto lambda_hash = node->getTreeHash(); + + result = "__lambda_" + toString(lambda_hash.first) + '_' + toString(lambda_hash.second); + break; + } + default: + { + throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid action query tree node {}", node->formatASTForErrorMessage()); + } + } + + node_to_name.emplace(node, result); + + return result; +} + +String calculateActionNodeName(const QueryTreeNodePtr & node, const PlannerContext & planner_context) +{ + QueryTreeNodeToName empty_map; + return calculateActionNodeName(node, planner_context, empty_map); +} + +String calculateConstantActionNodeName(const Field & constant_literal, const DataTypePtr & constant_type) +{ + auto constant_name = applyVisitor(FieldVisitorToString(), constant_literal); + return constant_name + "_" + constant_type->getName(); +} + +String calculateConstantActionNodeName(const Field & constant_literal) +{ + return calculateConstantActionNodeName(constant_literal, applyVisitor(FieldToDataType(), constant_literal)); +} + +String calculateWindowNodeActionName(const QueryTreeNodePtr & node, const PlannerContext & planner_context, QueryTreeNodeToName & node_to_name) +{ + auto & window_node = node->as(); + WriteBufferFromOwnString buffer; + + if (window_node.hasPartitionBy()) + { + buffer << "PARTITION BY "; + + auto & partition_by_nodes = window_node.getPartitionBy().getNodes(); + size_t partition_by_nodes_size = partition_by_nodes.size(); + + for (size_t i = 0; i < partition_by_nodes_size; ++i) + { + auto & partition_by_node = partition_by_nodes[i]; + buffer << calculateActionNodeName(partition_by_node, planner_context, node_to_name); + if (i + 1 != partition_by_nodes_size) + buffer << ", "; + } + } + + if (window_node.hasOrderBy()) + { + if (window_node.hasPartitionBy()) + buffer << ' '; + + buffer << "ORDER BY "; + + auto & order_by_nodes = window_node.getOrderBy().getNodes(); + size_t order_by_nodes_size = order_by_nodes.size(); + + for (size_t i = 0; i < order_by_nodes_size; ++i) + { + auto & sort_node = order_by_nodes[i]->as(); + buffer << calculateActionNodeName(sort_node.getExpression(), planner_context, node_to_name); + + auto sort_direction = sort_node.getSortDirection(); + buffer << (sort_direction == SortDirection::ASCENDING ? " ASC" : " DESC"); + + auto nulls_sort_direction = sort_node.getNullsSortDirection(); + + if (nulls_sort_direction) + buffer << " NULLS " << (nulls_sort_direction == sort_direction ? "LAST" : "FIRST"); + + if (auto collator = sort_node.getCollator()) + buffer << " COLLATE " << collator->getLocale(); + + if (sort_node.withFill()) + { + buffer << " WITH FILL"; + + if (sort_node.hasFillFrom()) + buffer << " FROM " << calculateActionNodeName(sort_node.getFillFrom(), planner_context, node_to_name); + + if (sort_node.hasFillTo()) + buffer << " TO " << calculateActionNodeName(sort_node.getFillTo(), planner_context, node_to_name); + + if (sort_node.hasFillStep()) + buffer << " STEP " << calculateActionNodeName(sort_node.getFillStep(), planner_context, node_to_name); + } + + if (i + 1 != order_by_nodes_size) + buffer << ", "; + } + } + + auto & window_frame = window_node.getWindowFrame(); + if (!window_frame.is_default) + { + if (window_node.hasPartitionBy() || window_node.hasOrderBy()) + buffer << ' '; + + buffer << window_frame.type << " BETWEEN "; + if (window_frame.begin_type == WindowFrame::BoundaryType::Current) + { + buffer << "CURRENT ROW"; + } + else if (window_frame.begin_type == WindowFrame::BoundaryType::Unbounded) + { + buffer << "UNBOUNDED"; + buffer << " " << (window_frame.begin_preceding ? "PRECEDING" : "FOLLOWING"); + } + else + { + buffer << calculateActionNodeName(window_node.getFrameBeginOffsetNode(), planner_context, node_to_name); + buffer << " " << (window_frame.begin_preceding ? "PRECEDING" : "FOLLOWING"); + } + + buffer << " AND "; + + if (window_frame.end_type == WindowFrame::BoundaryType::Current) + { + buffer << "CURRENT ROW"; + } + else if (window_frame.end_type == WindowFrame::BoundaryType::Unbounded) + { + buffer << "UNBOUNDED"; + buffer << " " << (window_frame.end_preceding ? "PRECEDING" : "FOLLOWING"); + } + else + { + buffer << calculateActionNodeName(window_node.getFrameEndOffsetNode(), planner_context, node_to_name); + buffer << " " << (window_frame.end_preceding ? "PRECEDING" : "FOLLOWING"); + } + } + + return buffer.str(); +} + +String calculateWindowNodeActionName(const QueryTreeNodePtr & node, const PlannerContext & planner_context) +{ + QueryTreeNodeToName empty_map; + return calculateWindowNodeActionName(node, planner_context, empty_map); +} + +} diff --git a/src/Planner/PlannerActionsVisitor.h b/src/Planner/PlannerActionsVisitor.h new file mode 100644 index 00000000000..405031daa40 --- /dev/null +++ b/src/Planner/PlannerActionsVisitor.h @@ -0,0 +1,78 @@ +#pragma once + +#include + +#include +#include + +#include + +#include + +#include + +namespace DB +{ + +class PlannerContext; +using PlannerContextPtr = std::shared_ptr; + +/** Planner actions visitor is responsible for adding necessary actions to calculate query tree expression node + * into actions dag. + * + * Preconditions: + * 1. Table expression data for table expression nodes is collected in planner context. + * For column node, that has column table expression source, identifier for column name in table expression data + * is used as action dag node name. + * 2. Sets for IN functions are already collected in planner context. + * + * During actions build, there is special handling for following functions: + * 1. Aggregate functions are added in actions dag as INPUT nodes. Aggregate functions arguments are not added. + * 2. For function `in` and its variants, already collected sets from planner context are used. + */ +class PlannerActionsVisitor +{ +public: + explicit PlannerActionsVisitor(const PlannerContextPtr & planner_context_); + + /** Add actions necessary to calculate expression node into expression dag. + * Necessary actions are not added in actions dag output. + * Returns query tree expression node actions dag nodes. + */ + ActionsDAG::NodeRawConstPtrs visit(ActionsDAGPtr actions_dag, QueryTreeNodePtr expression_node); + +private: + const PlannerContextPtr planner_context; +}; + +/** Calculate query tree expression node action dag name and add them into node to name map. + * If node exists in map, name from map is used. + * + * For column node column node identifier from planner context is used. + */ +using QueryTreeNodeToName = std::unordered_map; +String calculateActionNodeName(const QueryTreeNodePtr & node, const PlannerContext & planner_context, QueryTreeNodeToName & node_to_name); + +/** Calculate query tree expression node action dag name. + * + * For column node column node identifier from planner context is used. + */ +String calculateActionNodeName(const QueryTreeNodePtr & node, const PlannerContext & planner_context); + +/// Calculate action node name for constant +String calculateConstantActionNodeName(const Field & constant_literal, const DataTypePtr & constant_type); + +/// Calculate action node name for constant, data type will be derived from constant literal value +String calculateConstantActionNodeName(const Field & constant_literal); + +/** Calculate action node name for window node. + * Window node action name can only be part of window function action name. + */ +String calculateWindowNodeActionName(const QueryTreeNodePtr & node, const PlannerContext & planner_context, QueryTreeNodeToName & node_to_name); + +/** Calculate action node name for window node. + * Window node action name can only be part of window function action name. + */ +String calculateWindowNodeActionName(const QueryTreeNodePtr & node, const PlannerContext & planner_context); + +} diff --git a/src/Planner/PlannerAggregation.cpp b/src/Planner/PlannerAggregation.cpp new file mode 100644 index 00000000000..3322ef9364f --- /dev/null +++ b/src/Planner/PlannerAggregation.cpp @@ -0,0 +1,225 @@ +#include + +#include + +#include +#include +#include +#include +#include + +#include + +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; + extern const int BAD_ARGUMENTS; +} + +namespace +{ + +enum class GroupByKind +{ + ORDINARY, + ROLLUP, + CUBE, + GROUPING_SETS +}; + +class GroupingFunctionResolveVisitor : public InDepthQueryTreeVisitor +{ +public: + GroupingFunctionResolveVisitor(GroupByKind group_by_kind_, + const Names & aggregation_keys_, + const GroupingSetsParamsList & grouping_sets_parameters_list_, + const PlannerContext & planner_context_) + : group_by_kind(group_by_kind_) + , planner_context(planner_context_) + { + size_t aggregation_keys_size = aggregation_keys_.size(); + for (size_t i = 0; i < aggregation_keys_size; ++i) + aggegation_key_to_index.emplace(aggregation_keys_[i], i); + + for (const auto & grouping_sets_parameter : grouping_sets_parameters_list_) + { + grouping_sets_keys_indices.emplace_back(); + auto & grouping_set_keys_indices = grouping_sets_keys_indices.back(); + + for (const auto & used_key : grouping_sets_parameter.used_keys) + { + auto aggregation_key_index_it = aggegation_key_to_index.find(used_key); + if (aggregation_key_index_it == aggegation_key_to_index.end()) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Aggregation key {} in GROUPING SETS is not found in GROUP BY keys"); + + grouping_set_keys_indices.push_back(aggregation_key_index_it->second); + } + } + } + + void visitImpl(const QueryTreeNodePtr & node) + { + auto * function_node = node->as(); + if (!function_node || function_node->getFunctionName() != "grouping") + return; + + size_t aggregation_keys_size = aggegation_key_to_index.size(); + + ColumnNumbers arguments_indexes; + + for (const auto & argument : function_node->getArguments().getNodes()) + { + String action_node_name = calculateActionNodeName(argument, planner_context); + + auto it = aggegation_key_to_index.find(action_node_name); + if (it == aggegation_key_to_index.end()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Argument of GROUPING function {} is not a part of GROUP BY clause", + argument->formatASTForErrorMessage()); + + arguments_indexes.push_back(it->second); + } + + QueryTreeNodeWeakPtr column_source; + auto grouping_set_argument_column = std::make_shared(NameAndTypePair{"__grouping_set", std::make_shared()}, column_source); + function_node->getArguments().getNodes().clear(); + + bool force_grouping_standard_compatibility = planner_context.getQueryContext()->getSettingsRef().force_grouping_standard_compatibility; + + switch (group_by_kind) + { + case GroupByKind::ORDINARY: + { + auto grouping_ordinary_function = std::make_shared(arguments_indexes, force_grouping_standard_compatibility); + auto grouping_ordinary_function_adaptor = std::make_shared(std::move(grouping_ordinary_function)); + function_node->resolveAsFunction(std::move(grouping_ordinary_function_adaptor), std::make_shared()); + break; + } + case GroupByKind::ROLLUP: + { + auto grouping_rollup_function = std::make_shared(arguments_indexes, aggregation_keys_size, force_grouping_standard_compatibility); + auto grouping_rollup_function_adaptor = std::make_shared(std::move(grouping_rollup_function)); + function_node->resolveAsFunction(std::move(grouping_rollup_function_adaptor), std::make_shared()); + function_node->getArguments().getNodes().push_back(std::move(grouping_set_argument_column)); + break; + } + case GroupByKind::CUBE: + { + auto grouping_cube_function = std::make_shared(arguments_indexes, aggregation_keys_size, force_grouping_standard_compatibility); + auto grouping_cube_function_adaptor = std::make_shared(std::move(grouping_cube_function)); + function_node->resolveAsFunction(std::move(grouping_cube_function_adaptor), std::make_shared()); + function_node->getArguments().getNodes().push_back(std::move(grouping_set_argument_column)); + break; + } + case GroupByKind::GROUPING_SETS: + { + auto grouping_grouping_sets_function = std::make_shared(arguments_indexes, grouping_sets_keys_indices, force_grouping_standard_compatibility); + auto grouping_grouping_sets_function_adaptor = std::make_shared(std::move(grouping_grouping_sets_function)); + function_node->resolveAsFunction(std::move(grouping_grouping_sets_function_adaptor), std::make_shared()); + function_node->getArguments().getNodes().push_back(std::move(grouping_set_argument_column)); + break; + } + } + } + + static bool needChildVisit(const QueryTreeNodePtr &, const QueryTreeNodePtr & child_node) + { + return !(child_node->getNodeType() == QueryTreeNodeType::QUERY || child_node->getNodeType() == QueryTreeNodeType::UNION); + } + +private: + GroupByKind group_by_kind; + std::unordered_map aggegation_key_to_index; + // Indexes of aggregation keys used in each grouping set (only for GROUP BY GROUPING SETS) + ColumnNumbersList grouping_sets_keys_indices; + const PlannerContext & planner_context; +}; + +void resolveGroupingFunctions(QueryTreeNodePtr & node, + GroupByKind group_by_kind, + const Names & aggregation_keys, + const GroupingSetsParamsList & grouping_sets_parameters_list, + const PlannerContext & planner_context) +{ + auto & query_node_typed = node->as(); + + GroupingFunctionResolveVisitor visitor(group_by_kind, aggregation_keys, grouping_sets_parameters_list, planner_context); + + if (query_node_typed.hasHaving()) + visitor.visit(query_node_typed.getHaving()); + + if (query_node_typed.hasOrderBy()) + visitor.visit(query_node_typed.getOrderByNode()); + + visitor.visit(query_node_typed.getProjectionNode()); +} + +} + +void resolveGroupingFunctions(QueryTreeNodePtr & query_node, + const Names & aggregation_keys, + const GroupingSetsParamsList & grouping_sets_parameters_list, + const PlannerContext & planner_context) +{ + auto & query_node_typed = query_node->as(); + + GroupByKind group_by_kind = GroupByKind::ORDINARY; + if (query_node_typed.isGroupByWithRollup()) + group_by_kind = GroupByKind::ROLLUP; + else if (query_node_typed.isGroupByWithCube()) + group_by_kind = GroupByKind::CUBE; + else if (query_node_typed.isGroupByWithGroupingSets()) + group_by_kind = GroupByKind::GROUPING_SETS; + + resolveGroupingFunctions(query_node, group_by_kind, aggregation_keys, grouping_sets_parameters_list, planner_context); +} + +AggregateDescriptions extractAggregateDescriptions(const QueryTreeNodes & aggregate_function_nodes, const PlannerContext & planner_context) +{ + QueryTreeNodeToName node_to_name; + NameSet unique_aggregate_action_node_names; + AggregateDescriptions aggregate_descriptions; + + for (const auto & aggregate_function_node : aggregate_function_nodes) + { + const auto & aggregate_function_node_typed = aggregate_function_node->as(); + String node_name = calculateActionNodeName(aggregate_function_node, planner_context, node_to_name); + auto [_, inserted] = unique_aggregate_action_node_names.emplace(node_name); + if (!inserted) + continue; + + AggregateDescription aggregate_description; + aggregate_description.function = aggregate_function_node_typed.getAggregateFunction(); + + const auto & parameters_nodes = aggregate_function_node_typed.getParameters().getNodes(); + aggregate_description.parameters.reserve(parameters_nodes.size()); + + for (const auto & parameter_node : parameters_nodes) + { + /// Function parameters constness validated during analysis stage + aggregate_description.parameters.push_back(parameter_node->getConstantValue().getValue()); + } + + const auto & arguments_nodes = aggregate_function_node_typed.getArguments().getNodes(); + aggregate_description.argument_names.reserve(arguments_nodes.size()); + + for (const auto & argument_node : arguments_nodes) + { + String argument_node_name = calculateActionNodeName(argument_node, planner_context, node_to_name); + aggregate_description.argument_names.emplace_back(std::move(argument_node_name)); + } + + aggregate_description.column_name = std::move(node_name); + aggregate_descriptions.push_back(std::move(aggregate_description)); + } + + return aggregate_descriptions; +} + +} diff --git a/src/Planner/PlannerAggregation.h b/src/Planner/PlannerAggregation.h new file mode 100644 index 00000000000..6dfd7faca22 --- /dev/null +++ b/src/Planner/PlannerAggregation.h @@ -0,0 +1,26 @@ +#pragma once + +#include +#include + +#include +#include + +#include + +namespace DB +{ + +/** Resolve GROUPING functions in query node. + * GROUPING function is replaced with specialized GROUPING function based on GROUP BY modifiers. + * For ROLLUP, CUBE, GROUPING SETS specialized GROUPING function take special __grouping_set column as argument. + */ +void resolveGroupingFunctions(QueryTreeNodePtr & query_node, + const Names & aggregation_keys, + const GroupingSetsParamsList & grouping_sets_parameters_list, + const PlannerContext & planner_context); + +/// Extract aggregate descriptions from aggregate function nodes +AggregateDescriptions extractAggregateDescriptions(const QueryTreeNodes & aggregate_function_nodes, const PlannerContext & planner_context); + +} diff --git a/src/Planner/PlannerContext.cpp b/src/Planner/PlannerContext.cpp new file mode 100644 index 00000000000..9f4a489bf5f --- /dev/null +++ b/src/Planner/PlannerContext.cpp @@ -0,0 +1,174 @@ +#include + +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + +const ColumnIdentifier & GlobalPlannerContext::createColumnIdentifier(const QueryTreeNodePtr & column_node) +{ + const auto & column_node_typed = column_node->as(); + auto column_source_node = column_node_typed.getColumnSource(); + + return createColumnIdentifier(column_node_typed.getColumn(), column_source_node); +} + +const ColumnIdentifier & GlobalPlannerContext::createColumnIdentifier(const NameAndTypePair & column, const QueryTreeNodePtr & column_source_node) +{ + std::string column_identifier; + + if (column_source_node->hasAlias()) + column_identifier += column_source_node->getAlias(); + else if (const auto * table_source_node = column_source_node->as()) + column_identifier += table_source_node->getStorageID().getFullNameNotQuoted(); + + if (!column_identifier.empty()) + column_identifier += '.'; + + column_identifier += column.name; + column_identifier += '_' + std::to_string(column_identifiers.size()); + + auto [it, inserted] = column_identifiers.emplace(column_identifier); + assert(inserted); + + return *it; +} + +bool GlobalPlannerContext::hasColumnIdentifier(const ColumnIdentifier & column_identifier) +{ + return column_identifiers.contains(column_identifier); +} + +PlannerContext::PlannerContext(ContextPtr query_context_, GlobalPlannerContextPtr global_planner_context_) + : query_context(std::move(query_context_)) + , global_planner_context(std::move(global_planner_context_)) +{} + +TableExpressionData & PlannerContext::getOrCreateTableExpressionData(const QueryTreeNodePtr & table_expression_node) +{ + auto [it, _] = table_expression_node_to_data.emplace(table_expression_node, TableExpressionData()); + return it->second; +} + +const TableExpressionData & PlannerContext::getTableExpressionDataOrThrow(const QueryTreeNodePtr & table_expression_node) const +{ + auto table_expression_data_it = table_expression_node_to_data.find(table_expression_node); + if (table_expression_data_it == table_expression_node_to_data.end()) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Table expression {} is not registered in planner context", + table_expression_node->formatASTForErrorMessage()); + + return table_expression_data_it->second; +} + +TableExpressionData & PlannerContext::getTableExpressionDataOrThrow(const QueryTreeNodePtr & table_expression_node) +{ + auto table_expression_data_it = table_expression_node_to_data.find(table_expression_node); + if (table_expression_data_it == table_expression_node_to_data.end()) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Table expression {} is not registered in planner context", + table_expression_node->formatASTForErrorMessage()); + + return table_expression_data_it->second; +} + +const TableExpressionData * PlannerContext::getTableExpressionDataOrNull(const QueryTreeNodePtr & table_expression_node) const +{ + auto table_expression_data_it = table_expression_node_to_data.find(table_expression_node); + if (table_expression_data_it == table_expression_node_to_data.end()) + return nullptr; + + return &table_expression_data_it->second; +} + +TableExpressionData * PlannerContext::getTableExpressionDataOrNull(const QueryTreeNodePtr & table_expression_node) +{ + auto table_expression_data_it = table_expression_node_to_data.find(table_expression_node); + if (table_expression_data_it == table_expression_node_to_data.end()) + return nullptr; + + return &table_expression_data_it->second; +} + +const ColumnIdentifier & PlannerContext::getColumnNodeIdentifierOrThrow(const QueryTreeNodePtr & column_node) const +{ + auto & column_node_typed = column_node->as(); + const auto & column_name = column_node_typed.getColumnName(); + auto column_source = column_node_typed.getColumnSource(); + const auto & table_expression_data = getTableExpressionDataOrThrow(column_source); + return table_expression_data.getColumnIdentifierOrThrow(column_name); +} + +const ColumnIdentifier * PlannerContext::getColumnNodeIdentifierOrNull(const QueryTreeNodePtr & column_node) const +{ + auto & column_node_typed = column_node->as(); + const auto & column_name = column_node_typed.getColumnName(); + auto column_source = column_node_typed.getColumnSourceOrNull(); + if (!column_source) + return nullptr; + + const auto * table_expression_data = getTableExpressionDataOrNull(column_source); + if (!table_expression_data) + return nullptr; + + return table_expression_data->getColumnIdentifierOrNull(column_name); +} + +PlannerContext::SetKey PlannerContext::createSetKey(const QueryTreeNodePtr & set_source_node) +{ + auto set_source_hash = set_source_node->getTreeHash(); + return "__set_" + toString(set_source_hash.first) + '_' + toString(set_source_hash.second); +} + +void PlannerContext::registerSet(const SetKey & key, PlannerSet planner_set) +{ + if (!planner_set.getSet()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Set must be initialized"); + + const auto & subquery_node = planner_set.getSubqueryNode(); + if (subquery_node) + { + auto node_type = subquery_node->getNodeType(); + + if (node_type != QueryTreeNodeType::QUERY && + node_type != QueryTreeNodeType::UNION) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Invalid node for set table expression. Expected query or union. Actual {}", + subquery_node->formatASTForErrorMessage()); + } + + set_key_to_set.emplace(key, std::move(planner_set)); +} + +bool PlannerContext::hasSet(const SetKey & key) const +{ + return set_key_to_set.contains(key); +} + +const PlannerSet & PlannerContext::getSetOrThrow(const SetKey & key) const +{ + auto it = set_key_to_set.find(key); + if (it == set_key_to_set.end()) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "No set is registered for key {}", + key); + + return it->second; +} + +const PlannerSet * PlannerContext::getSetOrNull(const SetKey & key) const +{ + auto it = set_key_to_set.find(key); + if (it == set_key_to_set.end()) + return nullptr; + + return &it->second; +} + +} diff --git a/src/Planner/PlannerContext.h b/src/Planner/PlannerContext.h new file mode 100644 index 00000000000..63874bf7ab9 --- /dev/null +++ b/src/Planner/PlannerContext.h @@ -0,0 +1,205 @@ +#pragma once + +#include + +#include +#include + +#include +#include + +#include + +#include + +namespace DB +{ + +/** Global planner context contains common objects that are shared between each planner context. + * + * 1. Column identifiers. + */ +class GlobalPlannerContext +{ +public: + GlobalPlannerContext() = default; + + /** Create column identifier for column node. + * + * Result column identifier is added into context. + */ + const ColumnIdentifier & createColumnIdentifier(const QueryTreeNodePtr & column_node); + + /** Create column identifier for column and column source. + * + * Result column identifier is added into context. + */ + const ColumnIdentifier & createColumnIdentifier(const NameAndTypePair & column, const QueryTreeNodePtr & column_source_node); + + /// Check if context has column identifier + bool hasColumnIdentifier(const ColumnIdentifier & column_identifier); + +private: + std::unordered_set column_identifiers; +}; + +using GlobalPlannerContextPtr = std::shared_ptr; + +/** PlannerSet is wrapper around Set that is used during query planning. + * + * If subquery node is null, such set is already prepared for execution. + * + * If subquery node is not null, then set must be build from the result of the subquery. + * If subquery node is not null, it must have QUERY or UNION type. + */ +class PlannerSet +{ +public: + /// Construct planner set that is ready for execution + explicit PlannerSet(SetPtr set_) + : set(std::move(set_)) + {} + + /// Construct planner set with set and subquery node + explicit PlannerSet(SetPtr set_, QueryTreeNodePtr subquery_node_) + : set(std::move(set_)) + , subquery_node(std::move(subquery_node_)) + {} + + /// Get set + const SetPtr & getSet() const + { + return set; + } + + /// Get subquery node + const QueryTreeNodePtr & getSubqueryNode() const + { + return subquery_node; + } + +private: + SetPtr set; + + QueryTreeNodePtr subquery_node; +}; + +class PlannerContext +{ +public: + /// Create planner context with query context and global planner context + PlannerContext(ContextPtr query_context_, GlobalPlannerContextPtr global_planner_context_); + + /// Get planner context query context + const ContextPtr & getQueryContext() const + { + return query_context; + } + + /// Get planner context query context + ContextPtr & getQueryContext() + { + return query_context; + } + + /// Get global planner context + const GlobalPlannerContextPtr & getGlobalPlannerContext() const + { + return global_planner_context; + } + + /// Get global planner context + GlobalPlannerContextPtr & getGlobalPlannerContext() + { + return global_planner_context; + } + + /// Get or create table expression data for table expression node. + TableExpressionData & getOrCreateTableExpressionData(const QueryTreeNodePtr & table_expression_node); + + /** Get table expression data. + * Exception is thrown if there are no table expression data for table expression node. + */ + const TableExpressionData & getTableExpressionDataOrThrow(const QueryTreeNodePtr & table_expression_node) const; + + /** Get table expression data. + * Exception is thrown if there are no table expression data for table expression node. + */ + TableExpressionData & getTableExpressionDataOrThrow(const QueryTreeNodePtr & table_expression_node); + + /** Get table expression data. + * Null is returned if there are no table expression data for table expression node. + */ + const TableExpressionData * getTableExpressionDataOrNull(const QueryTreeNodePtr & table_expression_node) const; + + /** Get table expression data. + * Null is returned if there are no table expression data for table expression node. + */ + TableExpressionData * getTableExpressionDataOrNull(const QueryTreeNodePtr & table_expression_node); + + /// Get table expression node to data read only map + const std::unordered_map & getTableExpressionNodeToData() const + { + return table_expression_node_to_data; + } + + /** Get column node identifier. + * For column node source check if table expression data is registered. + * If table expression data is not registered exception is thrown. + * In table expression data get column node identifier using column name. + */ + const ColumnIdentifier & getColumnNodeIdentifierOrThrow(const QueryTreeNodePtr & column_node) const; + + /** Get column node identifier. + * For column node source check if table expression data is registered. + * If table expression data is not registered null is returned. + * In table expression data get column node identifier or null using column name. + */ + const ColumnIdentifier * getColumnNodeIdentifierOrNull(const QueryTreeNodePtr & column_node) const; + + using SetKey = std::string; + + using SetKeyToSet = std::unordered_map; + + /// Create set key for set source node + static SetKey createSetKey(const QueryTreeNodePtr & set_source_node); + + /// Register set for set key + void registerSet(const SetKey & key, PlannerSet planner_set); + + /// Returns true if set is registered for key, false otherwise + bool hasSet(const SetKey & key) const; + + /// Get set for key, if no set is registered logical exception is thrown + const PlannerSet & getSetOrThrow(const SetKey & key) const; + + /// Get set for key, if no set is registered null is returned + const PlannerSet * getSetOrNull(const SetKey & key) const; + + /// Get registered sets + const SetKeyToSet & getRegisteredSets() const + { + return set_key_to_set; + } + +private: + /// Query context + ContextPtr query_context; + + /// Global planner context + GlobalPlannerContextPtr global_planner_context; + + /// Column node to column identifier + std::unordered_map column_node_to_column_identifier; + + /// Table expression node to data + std::unordered_map table_expression_node_to_data; + + /// Set key to set + SetKeyToSet set_key_to_set; + +}; + +using PlannerContextPtr = std::shared_ptr; + +} diff --git a/src/Planner/PlannerExpressionAnalysis.cpp b/src/Planner/PlannerExpressionAnalysis.cpp new file mode 100644 index 00000000000..b034edf97d8 --- /dev/null +++ b/src/Planner/PlannerExpressionAnalysis.cpp @@ -0,0 +1,508 @@ +#include + +#include + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + +namespace +{ + +/** Construct filter analysis result for filter expression node + * Actions before filter are added into into actions chain. + * It is client responsibility to update filter analysis result if filter column must be removed after chain is finalized. + */ +FilterAnalysisResult analyzeFilter(const QueryTreeNodePtr & filter_expression_node, + const ColumnsWithTypeAndName & join_tree_input_columns, + const PlannerContextPtr & planner_context, + ActionsChain & actions_chain) +{ + const auto * chain_available_output_columns = actions_chain.getLastStepAvailableOutputColumnsOrNull(); + const auto & filter_input = chain_available_output_columns ? *chain_available_output_columns : join_tree_input_columns; + + FilterAnalysisResult result; + + result.filter_actions = buildActionsDAGFromExpressionNode(filter_expression_node, filter_input, planner_context); + result.filter_column_name = result.filter_actions->getOutputs().at(0)->result_name; + actions_chain.addStep(std::make_unique(result.filter_actions)); + + return result; +} + +/** Construct aggregation analysis result if query tree has GROUP BY or aggregates. + * Actions before aggregation are added into actions chain, if result is not null optional. + */ +std::optional analyzeAggregation(QueryTreeNodePtr & query_tree, + const ColumnsWithTypeAndName & join_tree_input_columns, + const PlannerContextPtr & planner_context, + ActionsChain & actions_chain) +{ + auto & query_node = query_tree->as(); + + auto aggregate_function_nodes = collectAggregateFunctionNodes(query_tree); + auto aggregates_descriptions = extractAggregateDescriptions(aggregate_function_nodes, *planner_context); + + ColumnsWithTypeAndName aggregates_columns; + aggregates_columns.reserve(aggregates_descriptions.size()); + for (auto & aggregate_description : aggregates_descriptions) + aggregates_columns.emplace_back(nullptr, aggregate_description.function->getReturnType(), aggregate_description.column_name); + + Names aggregation_keys; + + const auto * chain_available_output_columns = actions_chain.getLastStepAvailableOutputColumnsOrNull(); + const auto & group_by_input = chain_available_output_columns ? *chain_available_output_columns : join_tree_input_columns; + + ActionsDAGPtr before_aggregation_actions = std::make_shared(group_by_input); + before_aggregation_actions->getOutputs().clear(); + + std::unordered_set before_aggregation_actions_output_node_names; + + GroupingSetsParamsList grouping_sets_parameters_list; + bool group_by_with_constant_keys = false; + bool disable_grouping_sets = false; + + PlannerActionsVisitor actions_visitor(planner_context); + + /// Add expressions from GROUP BY + + if (query_node.hasGroupBy()) + { + if (query_node.isGroupByWithGroupingSets()) + { + for (auto & grouping_set_keys_list_node : query_node.getGroupBy().getNodes()) + { + auto & grouping_set_keys_list_node_typed = grouping_set_keys_list_node->as(); + grouping_sets_parameters_list.emplace_back(); + auto & grouping_sets_parameters = grouping_sets_parameters_list.back(); + + for (auto & grouping_set_key_node : grouping_set_keys_list_node_typed.getNodes()) + { + group_by_with_constant_keys |= grouping_set_key_node->hasConstantValue(); + + auto expression_dag_nodes = actions_visitor.visit(before_aggregation_actions, grouping_set_key_node); + aggregation_keys.reserve(expression_dag_nodes.size()); + + for (auto & expression_dag_node : expression_dag_nodes) + { + grouping_sets_parameters.used_keys.push_back(expression_dag_node->result_name); + if (before_aggregation_actions_output_node_names.contains(expression_dag_node->result_name)) + continue; + + aggregation_keys.push_back(expression_dag_node->result_name); + before_aggregation_actions->getOutputs().push_back(expression_dag_node); + before_aggregation_actions_output_node_names.insert(expression_dag_node->result_name); + } + } + } + + for (auto & grouping_sets_parameter : grouping_sets_parameters_list) + { + NameSet grouping_sets_used_keys; + Names grouping_sets_keys; + + for (auto & key : grouping_sets_parameter.used_keys) + { + auto [_, inserted] = grouping_sets_used_keys.insert(key); + if (inserted) + grouping_sets_keys.push_back(key); + } + + for (auto & key : aggregation_keys) + { + if (grouping_sets_used_keys.contains(key)) + continue; + + grouping_sets_parameter.missing_keys.push_back(key); + } + + grouping_sets_parameter.used_keys = std::move(grouping_sets_keys); + } + + /// It is expected by execution layer that if there are only 1 grouping sets it will be removed + if (grouping_sets_parameters_list.size() == 1) + { + disable_grouping_sets = true; + grouping_sets_parameters_list.clear(); + } + } + else + { + for (auto & group_by_key_node : query_node.getGroupBy().getNodes()) + group_by_with_constant_keys |= group_by_key_node->hasConstantValue(); + + auto expression_dag_nodes = actions_visitor.visit(before_aggregation_actions, query_node.getGroupByNode()); + aggregation_keys.reserve(expression_dag_nodes.size()); + + for (auto & expression_dag_node : expression_dag_nodes) + { + if (before_aggregation_actions_output_node_names.contains(expression_dag_node->result_name)) + continue; + + aggregation_keys.push_back(expression_dag_node->result_name); + before_aggregation_actions->getOutputs().push_back(expression_dag_node); + before_aggregation_actions_output_node_names.insert(expression_dag_node->result_name); + } + } + } + + /// Add expressions from aggregate functions arguments + + for (auto & aggregate_function_node : aggregate_function_nodes) + { + auto & aggregate_function_node_typed = aggregate_function_node->as(); + for (const auto & aggregate_function_node_argument : aggregate_function_node_typed.getArguments().getNodes()) + { + auto expression_dag_nodes = actions_visitor.visit(before_aggregation_actions, aggregate_function_node_argument); + for (auto & expression_dag_node : expression_dag_nodes) + { + if (before_aggregation_actions_output_node_names.contains(expression_dag_node->result_name)) + continue; + + before_aggregation_actions->getOutputs().push_back(expression_dag_node); + before_aggregation_actions_output_node_names.insert(expression_dag_node->result_name); + } + } + } + + if (aggregation_keys.empty() && aggregates_descriptions.empty()) + return {}; + + /** For non ordinary GROUP BY we add virtual __grouping_set column + * With set number, which is used as an additional key at the stage of merging aggregating data. + */ + if (query_node.isGroupByWithRollup() || query_node.isGroupByWithCube() || (query_node.isGroupByWithGroupingSets() && !disable_grouping_sets)) + aggregates_columns.emplace_back(nullptr, std::make_shared(), "__grouping_set"); + + resolveGroupingFunctions(query_tree, aggregation_keys, grouping_sets_parameters_list, *planner_context); + + /// Only aggregation keys and aggregates are available for next steps after GROUP BY step + auto aggregate_step = std::make_unique(before_aggregation_actions, ActionsChainStep::AvailableOutputColumnsStrategy::OUTPUT_NODES, aggregates_columns); + actions_chain.addStep(std::move(aggregate_step)); + + AggregationAnalysisResult aggregation_analysis_result; + aggregation_analysis_result.before_aggregation_actions = before_aggregation_actions; + aggregation_analysis_result.aggregation_keys = std::move(aggregation_keys); + aggregation_analysis_result.aggregate_descriptions = std::move(aggregates_descriptions); + aggregation_analysis_result.grouping_sets_parameters_list = std::move(grouping_sets_parameters_list); + aggregation_analysis_result.group_by_with_constant_keys = group_by_with_constant_keys; + + return aggregation_analysis_result; +} + +/** Construct window analysis result if query tree has window functions. + * Actions before window functions are added into actions chain, if result is not null optional. + */ +std::optional analyzeWindow(QueryTreeNodePtr & query_tree, + const ColumnsWithTypeAndName & join_tree_input_columns, + const PlannerContextPtr & planner_context, + ActionsChain & actions_chain) +{ + auto window_function_nodes = collectWindowFunctionNodes(query_tree); + if (window_function_nodes.empty()) + return {}; + + auto window_descriptions = extractWindowDescriptions(window_function_nodes, *planner_context); + + const auto * chain_available_output_columns = actions_chain.getLastStepAvailableOutputColumnsOrNull(); + const auto & window_input = chain_available_output_columns ? *chain_available_output_columns : join_tree_input_columns; + + PlannerActionsVisitor actions_visitor(planner_context); + + ActionsDAGPtr before_window_actions = std::make_shared(window_input); + before_window_actions->getOutputs().clear(); + + std::unordered_set before_window_actions_output_node_names; + + for (auto & window_function_node : window_function_nodes) + { + auto & window_function_node_typed = window_function_node->as(); + auto & window_node = window_function_node_typed.getWindowNode()->as(); + + auto expression_dag_nodes = actions_visitor.visit(before_window_actions, window_function_node_typed.getArgumentsNode()); + + for (auto & expression_dag_node : expression_dag_nodes) + { + if (before_window_actions_output_node_names.contains(expression_dag_node->result_name)) + continue; + + before_window_actions->getOutputs().push_back(expression_dag_node); + before_window_actions_output_node_names.insert(expression_dag_node->result_name); + } + + expression_dag_nodes = actions_visitor.visit(before_window_actions, window_node.getPartitionByNode()); + + for (auto & expression_dag_node : expression_dag_nodes) + { + if (before_window_actions_output_node_names.contains(expression_dag_node->result_name)) + continue; + + before_window_actions->getOutputs().push_back(expression_dag_node); + before_window_actions_output_node_names.insert(expression_dag_node->result_name); + } + + /** We add only sort column sort expression in before WINDOW actions DAG. + * WITH fill expressions must be constant nodes. + */ + auto & order_by_node_list = window_node.getOrderBy(); + for (auto & sort_node : order_by_node_list.getNodes()) + { + auto & sort_node_typed = sort_node->as(); + expression_dag_nodes = actions_visitor.visit(before_window_actions, sort_node_typed.getExpression()); + + for (auto & expression_dag_node : expression_dag_nodes) + { + if (before_window_actions_output_node_names.contains(expression_dag_node->result_name)) + continue; + + before_window_actions->getOutputs().push_back(expression_dag_node); + before_window_actions_output_node_names.insert(expression_dag_node->result_name); + } + } + } + + ColumnsWithTypeAndName window_functions_additional_columns; + + for (auto & window_description : window_descriptions) + for (auto & window_function : window_description.window_functions) + window_functions_additional_columns.emplace_back(nullptr, window_function.aggregate_function->getReturnType(), window_function.column_name); + + auto before_window_step = std::make_unique(before_window_actions, + ActionsChainStep::AvailableOutputColumnsStrategy::ALL_NODES, + window_functions_additional_columns); + actions_chain.addStep(std::move(before_window_step)); + + WindowAnalysisResult result; + result.before_window_actions = std::move(before_window_actions); + result.window_descriptions = std::move(window_descriptions); + + return result; +} + +/** Construct projection analysis result. + * Projection actions are added into actions chain. + * It is client responsibility to update projection analysis result with project names actions after chain is finalized. + */ +ProjectionAnalysisResult analyzeProjection(const QueryNode & query_node, + const ColumnsWithTypeAndName & join_tree_input_columns, + const PlannerContextPtr & planner_context, + ActionsChain & actions_chain) +{ + const auto * chain_available_output_columns = actions_chain.getLastStepAvailableOutputColumnsOrNull(); + const auto & projection_input = chain_available_output_columns ? *chain_available_output_columns : join_tree_input_columns; + auto projection_actions = buildActionsDAGFromExpressionNode(query_node.getProjectionNode(), projection_input, planner_context); + + auto projection_columns = query_node.getProjectionColumns(); + size_t projection_columns_size = projection_columns.size(); + + Names projection_column_names; + NamesWithAliases projection_column_names_with_display_aliases; + projection_column_names_with_display_aliases.reserve(projection_columns_size); + + auto & projection_actions_outputs = projection_actions->getOutputs(); + size_t projection_outputs_size = projection_actions_outputs.size(); + + if (projection_columns_size != projection_outputs_size) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "QueryTree projection nodes size mismatch. Expected {}. Actual {}", + projection_columns_size, + projection_outputs_size); + + for (size_t i = 0; i < projection_outputs_size; ++i) + { + auto & projection_column = projection_columns[i]; + const auto * projection_node = projection_actions_outputs[i]; + const auto & projection_node_name = projection_node->result_name; + + projection_column_names.push_back(projection_node_name); + projection_column_names_with_display_aliases.push_back({projection_node_name, projection_column.name}); + } + + auto projection_actions_step = std::make_unique(projection_actions); + actions_chain.addStep(std::move(projection_actions_step)); + + ProjectionAnalysisResult result; + result.projection_actions = std::move(projection_actions); + result.projection_column_names = std::move(projection_column_names); + result.projection_column_names_with_display_aliases = std::move(projection_column_names_with_display_aliases); + + return result; +} + +/** Construct sort analysis result. + * Actions before sort are added into actions chain. + */ +SortAnalysisResult analyzeSort(const QueryNode & query_node, + const ColumnsWithTypeAndName & join_tree_input_columns, + const PlannerContextPtr & planner_context, + ActionsChain & actions_chain) +{ + const auto *chain_available_output_columns = actions_chain.getLastStepAvailableOutputColumnsOrNull(); + const auto & order_by_input = chain_available_output_columns ? *chain_available_output_columns : join_tree_input_columns; + + ActionsDAGPtr before_sort_actions = std::make_shared(order_by_input); + auto & before_sort_actions_outputs = before_sort_actions->getOutputs(); + before_sort_actions_outputs.clear(); + + PlannerActionsVisitor actions_visitor(planner_context); + + std::unordered_set before_sort_actions_dag_output_node_names; + + /** We add only sort node sort expression in before ORDER BY actions DAG. + * WITH fill expressions must be constant nodes. + */ + const auto & order_by_node_list = query_node.getOrderBy(); + for (const auto & sort_node : order_by_node_list.getNodes()) + { + auto & sort_node_typed = sort_node->as(); + auto expression_dag_nodes = actions_visitor.visit(before_sort_actions, sort_node_typed.getExpression()); + + for (auto & action_dag_node : expression_dag_nodes) + { + if (before_sort_actions_dag_output_node_names.contains(action_dag_node->result_name)) + continue; + + before_sort_actions_outputs.push_back(action_dag_node); + before_sort_actions_dag_output_node_names.insert(action_dag_node->result_name); + } + } + + auto actions_step_before_sort = std::make_unique(before_sort_actions); + actions_chain.addStep(std::move(actions_step_before_sort)); + + return SortAnalysisResult{std::move(before_sort_actions)}; +} + +/** Construct limit by analysis result. + * Actions before limit by are added into actions chain. + */ +LimitByAnalysisResult analyzeLimitBy(const QueryNode & query_node, + const ColumnsWithTypeAndName & join_tree_input_columns, + const PlannerContextPtr & planner_context, + ActionsChain & actions_chain) +{ + const auto * chain_available_output_columns = actions_chain.getLastStepAvailableOutputColumnsOrNull(); + const auto & limit_by_input = chain_available_output_columns ? *chain_available_output_columns : join_tree_input_columns; + auto before_limit_by_actions = buildActionsDAGFromExpressionNode(query_node.getLimitByNode(), limit_by_input, planner_context); + + Names limit_by_column_names; + limit_by_column_names.reserve(before_limit_by_actions->getOutputs().size()); + for (auto & output_node : before_limit_by_actions->getOutputs()) + limit_by_column_names.push_back(output_node->result_name); + + auto actions_step_before_limit_by = std::make_unique(before_limit_by_actions); + actions_chain.addStep(std::move(actions_step_before_limit_by)); + + return LimitByAnalysisResult{std::move(before_limit_by_actions), std::move(limit_by_column_names)}; +} + +} + +PlannerExpressionsAnalysisResult buildExpressionAnalysisResult(QueryTreeNodePtr query_tree, + const ColumnsWithTypeAndName & join_tree_input_columns, + const PlannerContextPtr & planner_context) +{ + auto & query_node = query_tree->as(); + + ActionsChain actions_chain; + + std::optional where_analysis_result_optional; + std::optional where_action_step_index_optional; + + if (query_node.hasWhere()) + { + where_analysis_result_optional = analyzeFilter(query_node.getWhere(), join_tree_input_columns, planner_context, actions_chain); + where_action_step_index_optional = actions_chain.getLastStepIndex(); + } + + auto aggregation_analysis_result_optional = analyzeAggregation(query_tree, join_tree_input_columns, planner_context, actions_chain); + + std::optional having_analysis_result_optional; + std::optional having_action_step_index_optional; + + if (query_node.hasHaving()) + { + having_analysis_result_optional = analyzeFilter(query_node.getHaving(), join_tree_input_columns, planner_context, actions_chain); + having_action_step_index_optional = actions_chain.getLastStepIndex(); + } + + auto window_analysis_result_optional = analyzeWindow(query_tree, join_tree_input_columns, planner_context, actions_chain); + auto projection_analysis_result = analyzeProjection(query_node, join_tree_input_columns, planner_context, actions_chain); + + std::optional sort_analysis_result_optional; + if (query_node.hasOrderBy()) + sort_analysis_result_optional = analyzeSort(query_node, join_tree_input_columns, planner_context, actions_chain); + + std::optional limit_by_analysis_result_optional; + + if (query_node.hasLimitBy()) + limit_by_analysis_result_optional = analyzeLimitBy(query_node, join_tree_input_columns, planner_context, actions_chain); + + const auto * chain_available_output_columns = actions_chain.getLastStepAvailableOutputColumnsOrNull(); + const auto & project_names_input = chain_available_output_columns ? *chain_available_output_columns : join_tree_input_columns; + auto project_names_actions = std::make_shared(project_names_input); + project_names_actions->project(projection_analysis_result.projection_column_names_with_display_aliases); + actions_chain.addStep(std::make_unique(project_names_actions)); + + // std::cout << "Chain dump before finalize" << std::endl; + // std::cout << actions_chain.dump() << std::endl; + + actions_chain.finalize(); + + // std::cout << "Chain dump after finalize" << std::endl; + // std::cout << actions_chain.dump() << std::endl; + + projection_analysis_result.project_names_actions = std::move(project_names_actions); + + PlannerExpressionsAnalysisResult expressions_analysis_result(std::move(projection_analysis_result)); + + if (where_action_step_index_optional && where_analysis_result_optional) + { + auto & where_analysis_result = *where_analysis_result_optional; + auto & where_actions_chain_node = actions_chain.at(*where_action_step_index_optional); + where_analysis_result.remove_filter_column = !where_actions_chain_node->getChildRequiredOutputColumnsNames().contains(where_analysis_result.filter_column_name); + expressions_analysis_result.addWhere(std::move(where_analysis_result)); + } + + if (aggregation_analysis_result_optional) + expressions_analysis_result.addAggregation(std::move(*aggregation_analysis_result_optional)); + + if (having_action_step_index_optional && having_analysis_result_optional) + { + auto & having_analysis_result = *having_analysis_result_optional; + auto & having_actions_chain_node = actions_chain.at(*having_action_step_index_optional); + having_analysis_result.remove_filter_column = !having_actions_chain_node->getChildRequiredOutputColumnsNames().contains(having_analysis_result.filter_column_name); + expressions_analysis_result.addHaving(std::move(having_analysis_result)); + } + + if (window_analysis_result_optional) + expressions_analysis_result.addWindow(std::move(*window_analysis_result_optional)); + + if (sort_analysis_result_optional) + expressions_analysis_result.addSort(std::move(*sort_analysis_result_optional)); + + if (limit_by_analysis_result_optional) + expressions_analysis_result.addLimitBy(std::move(*limit_by_analysis_result_optional)); + + return expressions_analysis_result; +} + +} diff --git a/src/Planner/PlannerExpressionAnalysis.h b/src/Planner/PlannerExpressionAnalysis.h new file mode 100644 index 00000000000..aefb3c369d0 --- /dev/null +++ b/src/Planner/PlannerExpressionAnalysis.h @@ -0,0 +1,175 @@ +#pragma once + +#include +#include + +#include + +#include + +#include +#include +#include + +namespace DB +{ + +struct ProjectionAnalysisResult +{ + ActionsDAGPtr projection_actions; + Names projection_column_names; + NamesWithAliases projection_column_names_with_display_aliases; + ActionsDAGPtr project_names_actions; +}; + +struct FilterAnalysisResult +{ + ActionsDAGPtr filter_actions; + std::string filter_column_name; + bool remove_filter_column = false; +}; + +struct AggregationAnalysisResult +{ + ActionsDAGPtr before_aggregation_actions; + Names aggregation_keys; + AggregateDescriptions aggregate_descriptions; + GroupingSetsParamsList grouping_sets_parameters_list; + bool group_by_with_constant_keys = false; +}; + +struct WindowAnalysisResult +{ + ActionsDAGPtr before_window_actions; + std::vector window_descriptions; +}; + +struct SortAnalysisResult +{ + ActionsDAGPtr before_order_by_actions; +}; + +struct LimitByAnalysisResult +{ + ActionsDAGPtr before_limit_by_actions; + Names limit_by_column_names; +}; + +class PlannerExpressionsAnalysisResult +{ +public: + explicit PlannerExpressionsAnalysisResult(ProjectionAnalysisResult projection_analysis_result_) + : projection_analysis_result(std::move(projection_analysis_result_)) + {} + + const ProjectionAnalysisResult & getProjection() const + { + return projection_analysis_result; + } + + bool hasWhere() const + { + return where_analysis_result.filter_actions != nullptr; + } + + const FilterAnalysisResult & getWhere() const + { + return where_analysis_result; + } + + void addWhere(FilterAnalysisResult where_analysis_result_) + { + where_analysis_result = std::move(where_analysis_result_); + } + + bool hasAggregation() const + { + return !aggregation_analysis_result.aggregation_keys.empty() || !aggregation_analysis_result.aggregate_descriptions.empty(); + } + + const AggregationAnalysisResult & getAggregation() const + { + return aggregation_analysis_result; + } + + void addAggregation(AggregationAnalysisResult aggregation_analysis_result_) + { + aggregation_analysis_result = std::move(aggregation_analysis_result_); + } + + bool hasHaving() const + { + return having_analysis_result.filter_actions != nullptr; + } + + const FilterAnalysisResult & getHaving() const + { + return having_analysis_result; + } + + void addHaving(FilterAnalysisResult having_analysis_result_) + { + having_analysis_result = std::move(having_analysis_result_); + } + + bool hasWindow() const + { + return !window_analysis_result.window_descriptions.empty(); + } + + const WindowAnalysisResult & getWindow() const + { + return window_analysis_result; + } + + void addWindow(WindowAnalysisResult window_analysis_result_) + { + window_analysis_result = std::move(window_analysis_result_); + } + + bool hasSort() const + { + return sort_analysis_result.before_order_by_actions != nullptr; + } + + const SortAnalysisResult & getSort() const + { + return sort_analysis_result; + } + + void addSort(SortAnalysisResult sort_analysis_result_) + { + sort_analysis_result = std::move(sort_analysis_result_); + } + + bool hasLimitBy() const + { + return limit_by_analysis_result.before_limit_by_actions != nullptr; + } + + const LimitByAnalysisResult & getLimitBy() const + { + return limit_by_analysis_result; + } + + void addLimitBy(LimitByAnalysisResult limit_by_analysis_result_) + { + limit_by_analysis_result = std::move(limit_by_analysis_result_); + } + +private: + ProjectionAnalysisResult projection_analysis_result; + FilterAnalysisResult where_analysis_result; + AggregationAnalysisResult aggregation_analysis_result; + FilterAnalysisResult having_analysis_result; + WindowAnalysisResult window_analysis_result; + SortAnalysisResult sort_analysis_result; + LimitByAnalysisResult limit_by_analysis_result; +}; + +/// Build expression analysis result for query tree, join tree input columns and planner context +PlannerExpressionsAnalysisResult buildExpressionAnalysisResult(QueryTreeNodePtr query_tree, + const ColumnsWithTypeAndName & join_tree_input_columns, + const PlannerContextPtr & planner_context); + +} diff --git a/src/Planner/PlannerJoinTree.cpp b/src/Planner/PlannerJoinTree.cpp new file mode 100644 index 00000000000..4cb446a65a0 --- /dev/null +++ b/src/Planner/PlannerJoinTree.cpp @@ -0,0 +1,708 @@ +#include + +#include + +#include +#include + +#include +#include + +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int INVALID_JOIN_ON_EXPRESSION; + extern const int LOGICAL_ERROR; + extern const int NOT_IMPLEMENTED; + extern const int SYNTAX_ERROR; + extern const int ACCESS_DENIED; +} + +namespace +{ + +/// Check if current user has privileges to SELECT columns from table +void checkAccessRights(const TableNode & table_node, const Names & column_names, const ContextPtr & query_context) +{ + const auto & storage_id = table_node.getStorageID(); + const auto & storage_snapshot = table_node.getStorageSnapshot(); + + if (column_names.empty()) + { + /** For a trivial queries like "SELECT count() FROM table", "SELECT 1 FROM table" access is granted if at least + * one table column is accessible. + */ + auto access = query_context->getAccess(); + + for (const auto & column : storage_snapshot->metadata->getColumns()) + { + if (access->isGranted(AccessType::SELECT, storage_id.database_name, storage_id.table_name, column.name)) + return; + } + + throw Exception(ErrorCodes::ACCESS_DENIED, + "{}: Not enough privileges. To execute this query it's necessary to have grant SELECT for at least one column on {}", + query_context->getUserName(), + storage_id.getFullTableName()); + } + + query_context->checkAccess(AccessType::SELECT, storage_id, column_names); +} + +QueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expression, + SelectQueryInfo & select_query_info, + const SelectQueryOptions & select_query_options, + PlannerContextPtr & planner_context) +{ + auto * table_node = table_expression->as(); + auto * table_function_node = table_expression->as(); + auto * query_node = table_expression->as(); + auto * union_node = table_expression->as(); + + QueryPlan query_plan; + + auto & table_expression_data = planner_context->getTableExpressionDataOrThrow(table_expression); + + if (table_node || table_function_node) + { + const auto & storage = table_node ? table_node->getStorage() : table_function_node->getStorage(); + const auto & storage_snapshot = table_node ? table_node->getStorageSnapshot() : table_function_node->getStorageSnapshot(); + + auto table_expression_query_info = select_query_info; + table_expression_query_info.table_expression = table_expression; + + if (table_node) + table_expression_query_info.table_expression_modifiers = table_node->getTableExpressionModifiers(); + else + table_expression_query_info.table_expression_modifiers = table_function_node->getTableExpressionModifiers(); + + auto & query_context = planner_context->getQueryContext(); + + auto from_stage = storage->getQueryProcessingStage(query_context, select_query_options.to_stage, storage_snapshot, table_expression_query_info); + const auto & columns_names_set = table_expression_data.getColumnsNames(); + Names columns_names(columns_names_set.begin(), columns_names_set.end()); + + /** The current user must have the SELECT privilege. + * We do not check access rights for table functions because they have been already checked in ITableFunction::execute(). + */ + if (table_node) + { + auto column_names_with_aliases = columns_names; + const auto & alias_columns_names = table_expression_data.getAliasColumnsNames(); + column_names_with_aliases.insert(column_names_with_aliases.end(), alias_columns_names.begin(), alias_columns_names.end()); + checkAccessRights(*table_node, column_names_with_aliases, planner_context->getQueryContext()); + } + + if (columns_names.empty()) + { + auto column_names_and_types = storage_snapshot->getColumns(GetColumnsOptions(GetColumnsOptions::All).withSubcolumns()); + auto additional_column_to_read = column_names_and_types.front(); + + const auto & column_identifier = planner_context->getGlobalPlannerContext()->createColumnIdentifier(additional_column_to_read, table_expression); + columns_names.push_back(additional_column_to_read.name); + table_expression_data.addColumn(additional_column_to_read, column_identifier); + } + + size_t max_block_size = query_context->getSettingsRef().max_block_size; + size_t max_streams = query_context->getSettingsRef().max_threads; + + bool need_rewrite_query_with_final = storage->needRewriteQueryWithFinal(columns_names); + if (need_rewrite_query_with_final) + { + if (table_expression_query_info.table_expression_modifiers) + { + const auto & table_expression_modifiers = table_expression_query_info.table_expression_modifiers; + auto sample_size_ratio = table_expression_modifiers->getSampleSizeRatio(); + auto sample_offset_ratio = table_expression_modifiers->getSampleOffsetRatio(); + + table_expression_query_info.table_expression_modifiers = TableExpressionModifiers(true /*has_final*/, + sample_size_ratio, + sample_offset_ratio); + } + else + { + table_expression_query_info.table_expression_modifiers = TableExpressionModifiers(true /*has_final*/, + {} /*sample_size_ratio*/, + {} /*sample_offset_ratio*/); + } + } + + storage->read(query_plan, columns_names, storage_snapshot, table_expression_query_info, query_context, from_stage, max_block_size, max_streams); + + /// Create step which reads from empty source if storage has no data. + if (!query_plan.isInitialized()) + { + auto source_header = storage_snapshot->getSampleBlockForColumns(columns_names); + Pipe pipe(std::make_shared(source_header)); + auto read_from_pipe = std::make_unique(std::move(pipe)); + read_from_pipe->setStepDescription("Read from NullSource"); + query_plan.addStep(std::move(read_from_pipe)); + } + } + else if (query_node || union_node) + { + auto subquery_options = select_query_options.subquery(); + auto subquery_context = buildSubqueryContext(planner_context->getQueryContext()); + Planner subquery_planner(table_expression, subquery_options, std::move(subquery_context), planner_context->getGlobalPlannerContext()); + subquery_planner.buildQueryPlanIfNeeded(); + query_plan = std::move(subquery_planner).extractQueryPlan(); + } + else + { + throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected table, table function, query or union. Actual {}", table_expression->formatASTForErrorMessage()); + } + + auto rename_actions_dag = std::make_shared(query_plan.getCurrentDataStream().header.getColumnsWithTypeAndName()); + + for (auto & output_node : rename_actions_dag->getOutputs()) + { + const auto * column_identifier = table_expression_data.getColumnIdentifierOrNull(output_node->result_name); + + if (!column_identifier) + continue; + + const auto * node_to_rename = output_node; + output_node = &rename_actions_dag->addAlias(*node_to_rename, *column_identifier); + } + + auto rename_step = std::make_unique(query_plan.getCurrentDataStream(), rename_actions_dag); + rename_step->setStepDescription("Change column names to column identifiers"); + query_plan.addStep(std::move(rename_step)); + + return query_plan; +} + +QueryPlan buildQueryPlanForJoinNode(QueryTreeNodePtr join_tree_node, + SelectQueryInfo & select_query_info, + const SelectQueryOptions & select_query_options, + PlannerContextPtr & planner_context) +{ + auto & join_node = join_tree_node->as(); + + auto left_plan = buildQueryPlanForJoinTreeNode(join_node.getLeftTableExpression(), + select_query_info, + select_query_options, + planner_context); + auto left_plan_output_columns = left_plan.getCurrentDataStream().header.getColumnsWithTypeAndName(); + + auto right_plan = buildQueryPlanForJoinTreeNode(join_node.getRightTableExpression(), + select_query_info, + select_query_options, + planner_context); + auto right_plan_output_columns = right_plan.getCurrentDataStream().header.getColumnsWithTypeAndName(); + + JoinClausesAndActions join_clauses_and_actions; + JoinKind join_kind = join_node.getKind(); + + auto join_constant = tryExtractConstantFromJoinNode(join_tree_node); + if (join_constant) + { + /** If there is JOIN with always true constant, we transform it to cross. + * If there is JOIN with always false constant, we do not process JOIN keys. + * It is expected by join algorithm to handle such case. + * + * Example: SELECT * FROM test_table AS t1 INNER JOIN test_table AS t2 ON 1; + */ + if (*join_constant) + join_kind = JoinKind::Cross; + } + else if (join_node.isOnJoinExpression()) + { + join_clauses_and_actions = buildJoinClausesAndActions(left_plan_output_columns, + right_plan_output_columns, + join_tree_node, + planner_context); + + join_clauses_and_actions.left_join_expressions_actions->projectInput(); + auto left_join_expressions_actions_step = std::make_unique(left_plan.getCurrentDataStream(), join_clauses_and_actions.left_join_expressions_actions); + left_join_expressions_actions_step->setStepDescription("JOIN actions"); + left_plan.addStep(std::move(left_join_expressions_actions_step)); + + join_clauses_and_actions.right_join_expressions_actions->projectInput(); + auto right_join_expressions_actions_step = std::make_unique(right_plan.getCurrentDataStream(), join_clauses_and_actions.right_join_expressions_actions); + right_join_expressions_actions_step->setStepDescription("JOIN actions"); + right_plan.addStep(std::move(right_join_expressions_actions_step)); + } + + std::unordered_map left_plan_column_name_to_cast_type; + std::unordered_map right_plan_column_name_to_cast_type; + + if (join_node.isUsingJoinExpression()) + { + auto & join_node_using_columns_list = join_node.getJoinExpression()->as(); + for (auto & join_node_using_node : join_node_using_columns_list.getNodes()) + { + auto & join_node_using_column_node = join_node_using_node->as(); + auto & inner_columns_list = join_node_using_column_node.getExpressionOrThrow()->as(); + + auto & left_inner_column_node = inner_columns_list.getNodes().at(0); + auto & left_inner_column = left_inner_column_node->as(); + + auto & right_inner_column_node = inner_columns_list.getNodes().at(1); + auto & right_inner_column = right_inner_column_node->as(); + + const auto & join_node_using_column_node_type = join_node_using_column_node.getColumnType(); + if (!left_inner_column.getColumnType()->equals(*join_node_using_column_node_type)) + { + const auto & left_inner_column_identifier = planner_context->getColumnNodeIdentifierOrThrow(left_inner_column_node); + left_plan_column_name_to_cast_type.emplace(left_inner_column_identifier, join_node_using_column_node_type); + } + + if (!right_inner_column.getColumnType()->equals(*join_node_using_column_node_type)) + { + const auto & right_inner_column_identifier = planner_context->getColumnNodeIdentifierOrThrow(right_inner_column_node); + right_plan_column_name_to_cast_type.emplace(right_inner_column_identifier, join_node_using_column_node_type); + } + } + } + + auto join_cast_plan_output_nodes = [&](QueryPlan & plan_to_add_cast, std::unordered_map & plan_column_name_to_cast_type) + { + auto cast_actions_dag = std::make_shared(plan_to_add_cast.getCurrentDataStream().header.getColumnsWithTypeAndName()); + + for (auto & output_node : cast_actions_dag->getOutputs()) + { + auto it = plan_column_name_to_cast_type.find(output_node->result_name); + if (it == plan_column_name_to_cast_type.end()) + continue; + + const auto & cast_type = it->second; + auto cast_type_name = cast_type->getName(); + Field cast_type_constant_value(cast_type_name); + + ColumnWithTypeAndName column; + column.name = calculateConstantActionNodeName(cast_type_constant_value); + column.column = DataTypeString().createColumnConst(0, cast_type_constant_value); + column.type = std::make_shared(); + + const auto * cast_type_constant_node = &cast_actions_dag->addColumn(std::move(column)); + + FunctionCastBase::Diagnostic diagnostic = {output_node->result_name, output_node->result_name}; + FunctionOverloadResolverPtr func_builder_cast + = CastInternalOverloadResolver::createImpl(std::move(diagnostic)); + + ActionsDAG::NodeRawConstPtrs children = {output_node, cast_type_constant_node}; + output_node = &cast_actions_dag->addFunction(func_builder_cast, std::move(children), output_node->result_name); + } + + cast_actions_dag->projectInput(); + auto cast_join_columns_step + = std::make_unique(plan_to_add_cast.getCurrentDataStream(), std::move(cast_actions_dag)); + cast_join_columns_step->setStepDescription("Cast JOIN USING columns"); + plan_to_add_cast.addStep(std::move(cast_join_columns_step)); + }; + + if (!left_plan_column_name_to_cast_type.empty()) + join_cast_plan_output_nodes(left_plan, left_plan_column_name_to_cast_type); + + if (!right_plan_column_name_to_cast_type.empty()) + join_cast_plan_output_nodes(right_plan, right_plan_column_name_to_cast_type); + + const auto & query_context = planner_context->getQueryContext(); + const auto & settings = query_context->getSettingsRef(); + + bool join_use_nulls = settings.join_use_nulls; + auto to_nullable_function = FunctionFactory::instance().get("toNullable", query_context); + + auto join_cast_plan_columns_to_nullable = [&](QueryPlan & plan_to_add_cast) + { + auto cast_actions_dag = std::make_shared(plan_to_add_cast.getCurrentDataStream().header.getColumnsWithTypeAndName()); + + for (auto & output_node : cast_actions_dag->getOutputs()) + { + if (planner_context->getGlobalPlannerContext()->hasColumnIdentifier(output_node->result_name)) + output_node = &cast_actions_dag->addFunction(to_nullable_function, {output_node}, output_node->result_name); + } + + cast_actions_dag->projectInput(); + auto cast_join_columns_step = std::make_unique(plan_to_add_cast.getCurrentDataStream(), std::move(cast_actions_dag)); + cast_join_columns_step->setStepDescription("Cast JOIN columns to Nullable"); + plan_to_add_cast.addStep(std::move(cast_join_columns_step)); + }; + + if (join_use_nulls) + { + if (isFull(join_kind)) + { + join_cast_plan_columns_to_nullable(left_plan); + join_cast_plan_columns_to_nullable(right_plan); + } + else if (isLeft(join_kind)) + { + join_cast_plan_columns_to_nullable(right_plan); + } + else if (isRight(join_kind)) + { + join_cast_plan_columns_to_nullable(left_plan); + } + } + + auto table_join = std::make_shared(settings, query_context->getTemporaryVolume()); + table_join->getTableJoin() = join_node.toASTTableJoin()->as(); + table_join->getTableJoin().kind = join_kind; + + if (join_kind == JoinKind::Comma) + { + join_kind = JoinKind::Cross; + table_join->getTableJoin().kind = JoinKind::Cross; + } + + table_join->setIsJoinWithConstant(join_constant != std::nullopt); + + if (join_node.isOnJoinExpression()) + { + const auto & join_clauses = join_clauses_and_actions.join_clauses; + bool is_asof = table_join->strictness() == JoinStrictness::Asof; + + if (join_clauses.size() > 1) + { + if (is_asof) + throw Exception(ErrorCodes::NOT_IMPLEMENTED, + "ASOF join {} doesn't support multiple ORs for keys in JOIN ON section", + join_node.formatASTForErrorMessage()); + } + + auto & table_join_clauses = table_join->getClauses(); + + for (const auto & join_clause : join_clauses) + { + table_join_clauses.emplace_back(); + auto & table_join_clause = table_join_clauses.back(); + + const auto & join_clause_left_key_nodes = join_clause.getLeftKeyNodes(); + const auto & join_clause_right_key_nodes = join_clause.getRightKeyNodes(); + + size_t join_clause_key_nodes_size = join_clause_left_key_nodes.size(); + assert(join_clause_key_nodes_size == join_clause_right_key_nodes.size()); + + for (size_t i = 0; i < join_clause_key_nodes_size; ++i) + { + table_join_clause.key_names_left.push_back(join_clause_left_key_nodes[i]->result_name); + table_join_clause.key_names_right.push_back(join_clause_right_key_nodes[i]->result_name); + } + + const auto & join_clause_get_left_filter_condition_nodes = join_clause.getLeftFilterConditionNodes(); + if (!join_clause_get_left_filter_condition_nodes.empty()) + { + if (join_clause_get_left_filter_condition_nodes.size() != 1) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "JOIN {} left filter conditions size must be 1. Actual {}", + join_node.formatASTForErrorMessage(), + join_clause_get_left_filter_condition_nodes.size()); + + const auto & join_clause_left_filter_condition_name = join_clause_get_left_filter_condition_nodes[0]->result_name; + table_join_clause.analyzer_left_filter_condition_column_name = join_clause_left_filter_condition_name; + } + + const auto & join_clause_get_right_filter_condition_nodes = join_clause.getRightFilterConditionNodes(); + if (!join_clause_get_right_filter_condition_nodes.empty()) + { + if (join_clause_get_right_filter_condition_nodes.size() != 1) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "JOIN {} right filter conditions size must be 1. Actual {}", + join_node.formatASTForErrorMessage(), + join_clause_get_right_filter_condition_nodes.size()); + + const auto & join_clause_right_filter_condition_name = join_clause_get_right_filter_condition_nodes[0]->result_name; + table_join_clause.analyzer_right_filter_condition_column_name = join_clause_right_filter_condition_name; + } + + if (is_asof) + { + if (!join_clause.hasASOF()) + throw Exception(ErrorCodes::INVALID_JOIN_ON_EXPRESSION, + "JOIN {} no inequality in ASOF JOIN ON section.", + join_node.formatASTForErrorMessage()); + + if (table_join_clause.key_names_left.size() <= 1) + throw Exception(ErrorCodes::SYNTAX_ERROR, + "JOIN {} ASOF join needs at least one equi-join column", + join_node.formatASTForErrorMessage()); + } + + if (join_clause.hasASOF()) + { + const auto & asof_conditions = join_clause.getASOFConditions(); + assert(asof_conditions.size() == 1); + + const auto & asof_condition = asof_conditions[0]; + table_join->setAsofInequality(asof_condition.asof_inequality); + + /// Execution layer of JOIN algorithms expects that ASOF keys are last JOIN keys + std::swap(table_join_clause.key_names_left.at(asof_condition.key_index), table_join_clause.key_names_left.back()); + std::swap(table_join_clause.key_names_right.at(asof_condition.key_index), table_join_clause.key_names_right.back()); + } + } + } + else if (join_node.isUsingJoinExpression()) + { + auto & table_join_clauses = table_join->getClauses(); + table_join_clauses.emplace_back(); + auto & table_join_clause = table_join_clauses.back(); + + auto & using_list = join_node.getJoinExpression()->as(); + + for (auto & join_using_node : using_list.getNodes()) + { + auto & join_using_column_node = join_using_node->as(); + auto & using_join_columns_list = join_using_column_node.getExpressionOrThrow()->as(); + auto & using_join_left_join_column_node = using_join_columns_list.getNodes().at(0); + auto & using_join_right_join_column_node = using_join_columns_list.getNodes().at(1); + + const auto & left_column_identifier = planner_context->getColumnNodeIdentifierOrThrow(using_join_left_join_column_node); + const auto & right_column_identifier = planner_context->getColumnNodeIdentifierOrThrow(using_join_right_join_column_node); + + table_join_clause.key_names_left.push_back(left_column_identifier); + table_join_clause.key_names_right.push_back(right_column_identifier); + } + } + + auto left_table_names = left_plan.getCurrentDataStream().header.getNames(); + NameSet left_table_names_set(left_table_names.begin(), left_table_names.end()); + + auto columns_from_joined_table = right_plan.getCurrentDataStream().header.getNamesAndTypesList(); + table_join->setColumnsFromJoinedTable(columns_from_joined_table, left_table_names_set, ""); + + for (auto & column_from_joined_table : columns_from_joined_table) + { + if (planner_context->getGlobalPlannerContext()->hasColumnIdentifier(column_from_joined_table.name)) + table_join->addJoinedColumn(column_from_joined_table); + } + + auto join_algorithm = chooseJoinAlgorithm(table_join, join_node.getRightTableExpression(), right_plan.getCurrentDataStream().header, planner_context); + + auto result_plan = QueryPlan(); + + if (join_algorithm->isFilled()) + { + size_t max_block_size = query_context->getSettingsRef().max_block_size; + + auto filled_join_step = std::make_unique( + left_plan.getCurrentDataStream(), + join_algorithm, + max_block_size); + + filled_join_step->setStepDescription("Filled JOIN"); + left_plan.addStep(std::move(filled_join_step)); + + result_plan = std::move(left_plan); + } + else + { + auto add_sorting = [&] (QueryPlan & plan, const Names & key_names, JoinTableSide join_table_side) + { + SortDescription sort_description; + sort_description.reserve(key_names.size()); + for (const auto & key_name : key_names) + sort_description.emplace_back(key_name); + + auto sorting_step = std::make_unique( + plan.getCurrentDataStream(), + std::move(sort_description), + settings.max_block_size, + 0 /*limit*/, + SizeLimits(settings.max_rows_to_sort, settings.max_bytes_to_sort, settings.sort_overflow_mode), + settings.max_bytes_before_remerge_sort, + settings.remerge_sort_lowered_memory_bytes_ratio, + settings.max_bytes_before_external_sort, + query_context->getTempDataOnDisk(), + settings.min_free_disk_space_for_temporary_data, + settings.optimize_sorting_by_input_stream_properties); + sorting_step->setStepDescription(fmt::format("Sort {} before JOIN", join_table_side)); + plan.addStep(std::move(sorting_step)); + }; + + auto crosswise_connection = CreateSetAndFilterOnTheFlyStep::createCrossConnection(); + auto add_create_set = [&settings, crosswise_connection](QueryPlan & plan, const Names & key_names, JoinTableSide join_table_side) + { + auto creating_set_step = std::make_unique( + plan.getCurrentDataStream(), + key_names, + settings.max_rows_in_set_to_optimize_join, + crosswise_connection, + join_table_side); + creating_set_step->setStepDescription(fmt::format("Create set and filter {} joined stream", join_table_side)); + + auto * step_raw_ptr = creating_set_step.get(); + plan.addStep(std::move(creating_set_step)); + return step_raw_ptr; + }; + + if (join_algorithm->pipelineType() == JoinPipelineType::YShaped) + { + const auto & join_clause = table_join->getOnlyClause(); + + bool kind_allows_filtering = isInner(join_kind) || isLeft(join_kind) || isRight(join_kind); + if (settings.max_rows_in_set_to_optimize_join > 0 && kind_allows_filtering) + { + auto * left_set = add_create_set(left_plan, join_clause.key_names_left, JoinTableSide::Left); + auto * right_set = add_create_set(right_plan, join_clause.key_names_right, JoinTableSide::Right); + + if (isInnerOrLeft(join_kind)) + right_set->setFiltering(left_set->getSet()); + + if (isInnerOrRight(join_kind)) + left_set->setFiltering(right_set->getSet()); + } + + add_sorting(left_plan, join_clause.key_names_left, JoinTableSide::Left); + add_sorting(right_plan, join_clause.key_names_right, JoinTableSide::Right); + } + + size_t max_block_size = query_context->getSettingsRef().max_block_size; + size_t max_streams = query_context->getSettingsRef().max_threads; + + auto join_step = std::make_unique( + left_plan.getCurrentDataStream(), + right_plan.getCurrentDataStream(), + std::move(join_algorithm), + max_block_size, + max_streams, + false /*optimize_read_in_order*/); + + join_step->setStepDescription(fmt::format("JOIN {}", JoinPipelineType::FillRightFirst)); + + std::vector plans; + plans.emplace_back(std::make_unique(std::move(left_plan))); + plans.emplace_back(std::make_unique(std::move(right_plan))); + + result_plan.unitePlans(std::move(join_step), {std::move(plans)}); + } + + auto drop_unused_columns_after_join_actions_dag = std::make_shared(result_plan.getCurrentDataStream().header.getColumnsWithTypeAndName()); + ActionsDAG::NodeRawConstPtrs updated_outputs; + std::unordered_set updated_outputs_names; + + for (auto & output : drop_unused_columns_after_join_actions_dag->getOutputs()) + { + if (updated_outputs_names.contains(output->result_name) || !planner_context->getGlobalPlannerContext()->hasColumnIdentifier(output->result_name)) + continue; + + updated_outputs.push_back(output); + updated_outputs_names.insert(output->result_name); + } + + drop_unused_columns_after_join_actions_dag->getOutputs() = std::move(updated_outputs); + + auto drop_unused_columns_after_join_transform_step = std::make_unique(result_plan.getCurrentDataStream(), std::move(drop_unused_columns_after_join_actions_dag)); + drop_unused_columns_after_join_transform_step->setStepDescription("DROP unused columns after JOIN"); + result_plan.addStep(std::move(drop_unused_columns_after_join_transform_step)); + + return result_plan; +} + +QueryPlan buildQueryPlanForArrayJoinNode(QueryTreeNodePtr table_expression, + SelectQueryInfo & select_query_info, + const SelectQueryOptions & select_query_options, + PlannerContextPtr & planner_context) +{ + auto & array_join_node = table_expression->as(); + + auto plan = buildQueryPlanForJoinTreeNode(array_join_node.getTableExpression(), + select_query_info, + select_query_options, + planner_context); + auto plan_output_columns = plan.getCurrentDataStream().header.getColumnsWithTypeAndName(); + + ActionsDAGPtr array_join_action_dag = std::make_shared(plan_output_columns); + PlannerActionsVisitor actions_visitor(planner_context); + + NameSet array_join_columns; + for (auto & array_join_expression : array_join_node.getJoinExpressions().getNodes()) + { + auto & array_join_expression_column = array_join_expression->as(); + const auto & array_join_column_name = array_join_expression_column.getColumnName(); + array_join_columns.insert(array_join_column_name); + + auto expression_dag_index_nodes = actions_visitor.visit(array_join_action_dag, array_join_expression_column.getExpressionOrThrow()); + for (auto & expression_dag_index_node : expression_dag_index_nodes) + { + const auto * array_join_column_node = &array_join_action_dag->addAlias(*expression_dag_index_node, array_join_column_name); + array_join_action_dag->getOutputs().push_back(array_join_column_node); + } + } + + array_join_action_dag->projectInput(); + auto array_join_actions = std::make_unique(plan.getCurrentDataStream(), array_join_action_dag); + array_join_actions->setStepDescription("ARRAY JOIN actions"); + plan.addStep(std::move(array_join_actions)); + + auto array_join_action = std::make_shared(array_join_columns, array_join_node.isLeft(), planner_context->getQueryContext()); + auto array_join_step = std::make_unique(plan.getCurrentDataStream(), std::move(array_join_action)); + array_join_step->setStepDescription("ARRAY JOIN"); + plan.addStep(std::move(array_join_step)); + + return plan; +} + +} + +QueryPlan buildQueryPlanForJoinTreeNode(QueryTreeNodePtr join_tree_node, + SelectQueryInfo & select_query_info, + const SelectQueryOptions & select_query_options, + PlannerContextPtr & planner_context) +{ + auto join_tree_node_type = join_tree_node->getNodeType(); + + switch (join_tree_node_type) + { + case QueryTreeNodeType::TABLE: + [[fallthrough]]; + case QueryTreeNodeType::TABLE_FUNCTION: + [[fallthrough]]; + case QueryTreeNodeType::QUERY: + [[fallthrough]]; + case QueryTreeNodeType::UNION: + { + return buildQueryPlanForTableExpression(join_tree_node, select_query_info, select_query_options, planner_context); + } + case QueryTreeNodeType::JOIN: + { + return buildQueryPlanForJoinNode(join_tree_node, select_query_info, select_query_options, planner_context); + } + case QueryTreeNodeType::ARRAY_JOIN: + { + return buildQueryPlanForArrayJoinNode(join_tree_node, select_query_info, select_query_options, planner_context); + } + default: + { + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Expected table, table function, query, union, join or array join query node. Actual {}", + join_tree_node->formatASTForErrorMessage()); + } + } +} + +} diff --git a/src/Planner/PlannerJoinTree.h b/src/Planner/PlannerJoinTree.h new file mode 100644 index 00000000000..c93b71e0df1 --- /dev/null +++ b/src/Planner/PlannerJoinTree.h @@ -0,0 +1,20 @@ +#pragma once + +#include + +#include + +#include + +#include + +namespace DB +{ + +/// Build query plan for query JOIN TREE node +QueryPlan buildQueryPlanForJoinTreeNode(QueryTreeNodePtr join_tree_node, + SelectQueryInfo & select_query_info, + const SelectQueryOptions & select_query_options, + PlannerContextPtr & planner_context); + +} diff --git a/src/Planner/PlannerJoins.cpp b/src/Planner/PlannerJoins.cpp new file mode 100644 index 00000000000..f62517eaaad --- /dev/null +++ b/src/Planner/PlannerJoins.cpp @@ -0,0 +1,695 @@ +#include + +#include +#include + +#include +#include +#include +#include + +#include +#include + +#include +#include +#include + +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; + extern const int INVALID_JOIN_ON_EXPRESSION; + extern const int NOT_IMPLEMENTED; +} + +void JoinClause::dump(WriteBuffer & buffer) const +{ + auto dump_dag_nodes = [&](const ActionsDAG::NodeRawConstPtrs & dag_nodes) + { + String dag_nodes_dump; + + if (!dag_nodes.empty()) + { + for (const auto & dag_node : dag_nodes) + { + dag_nodes_dump += dag_node->result_name; + dag_nodes_dump += ", "; + } + + dag_nodes_dump.pop_back(); + dag_nodes_dump.pop_back(); + } + + return dag_nodes_dump; + }; + + buffer << "left_key_nodes: " << dump_dag_nodes(left_key_nodes); + buffer << " right_key_nodes: " << dump_dag_nodes(right_key_nodes); + + if (!left_filter_condition_nodes.empty()) + buffer << " left_condition_nodes: " + dump_dag_nodes(left_filter_condition_nodes); + + if (!right_filter_condition_nodes.empty()) + buffer << " right_condition_nodes: " + dump_dag_nodes(right_filter_condition_nodes); +} + +String JoinClause::dump() const +{ + WriteBufferFromOwnString buffer; + dump(buffer); + + return buffer.str(); +} + +namespace +{ + +std::optional extractJoinTableSideFromExpression(const ActionsDAG::Node * expression_root_node, + const std::unordered_set & join_expression_dag_input_nodes, + const NameSet & left_table_expression_columns_names, + const NameSet & right_table_expression_columns_names, + const JoinNode & join_node) +{ + std::optional table_side; + std::vector nodes_to_process; + nodes_to_process.push_back(expression_root_node); + + while (!nodes_to_process.empty()) + { + const auto * node_to_process = nodes_to_process.back(); + nodes_to_process.pop_back(); + + for (const auto & child : node_to_process->children) + nodes_to_process.push_back(child); + + if (!join_expression_dag_input_nodes.contains(node_to_process)) + continue; + + const auto & input_name = node_to_process->result_name; + + bool left_table_expression_contains_input = left_table_expression_columns_names.contains(input_name); + bool right_table_expression_contains_input = right_table_expression_columns_names.contains(input_name); + + if (!left_table_expression_contains_input && !right_table_expression_contains_input) + throw Exception(ErrorCodes::INVALID_JOIN_ON_EXPRESSION, + "JOIN {} actions has column {} that do not exist in left {} or right {} table expression columns", + join_node.formatASTForErrorMessage(), + input_name, + boost::join(left_table_expression_columns_names, ", "), + boost::join(right_table_expression_columns_names, ", ")); + + auto input_table_side = left_table_expression_contains_input ? JoinTableSide::Left : JoinTableSide::Right; + if (table_side && (*table_side) != input_table_side) + throw Exception(ErrorCodes::INVALID_JOIN_ON_EXPRESSION, + "JOIN {} join expression contains column from left and right table", + join_node.formatASTForErrorMessage()); + + table_side = input_table_side; + } + + return table_side; +} + +void buildJoinClause(ActionsDAGPtr join_expression_dag, + const std::unordered_set & join_expression_dag_input_nodes, + const ActionsDAG::Node * join_expressions_actions_node, + const NameSet & left_table_expression_columns_names, + const NameSet & right_table_expression_columns_names, + const JoinNode & join_node, + JoinClause & join_clause) +{ + std::string function_name; + + if (join_expressions_actions_node->function) + function_name = join_expressions_actions_node->function->getName(); + + /// For 'and' function go into children + if (function_name == "and") + { + for (const auto & child : join_expressions_actions_node->children) + { + buildJoinClause(join_expression_dag, + join_expression_dag_input_nodes, + child, + left_table_expression_columns_names, + right_table_expression_columns_names, + join_node, + join_clause); + } + + return; + } + + auto asof_inequality = getASOFJoinInequality(function_name); + bool is_asof_join_inequality = join_node.getStrictness() == JoinStrictness::Asof && asof_inequality != ASOFJoinInequality::None; + + if (function_name == "equals" || is_asof_join_inequality) + { + const auto * left_child = join_expressions_actions_node->children.at(0); + const auto * right_child = join_expressions_actions_node->children.at(1); + + auto left_expression_side_optional = extractJoinTableSideFromExpression(left_child, + join_expression_dag_input_nodes, + left_table_expression_columns_names, + right_table_expression_columns_names, + join_node); + + auto right_expression_side_optional = extractJoinTableSideFromExpression(right_child, + join_expression_dag_input_nodes, + left_table_expression_columns_names, + right_table_expression_columns_names, + join_node); + + if (!left_expression_side_optional && !right_expression_side_optional) + { + throw Exception(ErrorCodes::INVALID_JOIN_ON_EXPRESSION, + "JOIN {} ON expression {} with constants is not supported", + join_node.formatASTForErrorMessage(), + join_expressions_actions_node->result_name); + } + else if (left_expression_side_optional && !right_expression_side_optional) + { + join_clause.addCondition(*left_expression_side_optional, join_expressions_actions_node); + } + else if (!left_expression_side_optional && right_expression_side_optional) + { + join_clause.addCondition(*right_expression_side_optional, join_expressions_actions_node); + } + else + { + auto left_expression_side = *left_expression_side_optional; + auto right_expression_side = *right_expression_side_optional; + + if (left_expression_side != right_expression_side) + { + const ActionsDAG::Node * left_key = left_child; + const ActionsDAG::Node * right_key = right_child; + + if (left_expression_side == JoinTableSide::Right) + { + left_key = right_child; + right_key = left_child; + asof_inequality = reverseASOFJoinInequality(asof_inequality); + } + + if (is_asof_join_inequality) + { + if (join_clause.hasASOF()) + { + throw Exception(ErrorCodes::INVALID_JOIN_ON_EXPRESSION, + "JOIN {} ASOF JOIN expects exactly one inequality in ON section", + join_node.formatASTForErrorMessage()); + } + + join_clause.addASOFKey(left_key, right_key, asof_inequality); + } + else + { + join_clause.addKey(left_key, right_key); + } + } + else + { + join_clause.addCondition(left_expression_side, join_expressions_actions_node); + } + } + + return; + } + + auto expression_side_optional = extractJoinTableSideFromExpression(join_expressions_actions_node, + join_expression_dag_input_nodes, + left_table_expression_columns_names, + right_table_expression_columns_names, + join_node); + + if (!expression_side_optional) + throw Exception(ErrorCodes::INVALID_JOIN_ON_EXPRESSION, + "JOIN {} with constants is not supported", + join_node.formatASTForErrorMessage()); + + auto expression_side = *expression_side_optional; + join_clause.addCondition(expression_side, join_expressions_actions_node); +} + +JoinClausesAndActions buildJoinClausesAndActions(const ColumnsWithTypeAndName & join_expression_input_columns, + const ColumnsWithTypeAndName & left_table_expression_columns, + const ColumnsWithTypeAndName & right_table_expression_columns, + const JoinNode & join_node, + const PlannerContextPtr & planner_context) +{ + ActionsDAGPtr join_expression_actions = std::make_shared(join_expression_input_columns); + + /** In ActionsDAG if input node has constant representation additional constant column is added. + * That way we cannot simply check that node has INPUT type during resolution of expression join table side. + * Put all nodes after actions dag initialization in set. + * To check if actions dag node is input column, we check if set contains it. + */ + const auto & join_expression_actions_nodes = join_expression_actions->getNodes(); + + std::unordered_set join_expression_dag_input_nodes; + join_expression_dag_input_nodes.reserve(join_expression_actions_nodes.size()); + for (const auto & node : join_expression_actions_nodes) + join_expression_dag_input_nodes.insert(&node); + + PlannerActionsVisitor join_expression_visitor(planner_context); + auto join_expression_dag_node_raw_pointers = join_expression_visitor.visit(join_expression_actions, join_node.getJoinExpression()); + if (join_expression_dag_node_raw_pointers.size() != 1) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "JOIN {} ON clause contains multiple expressions", + join_node.formatASTForErrorMessage()); + + const auto * join_expressions_actions_root_node = join_expression_dag_node_raw_pointers[0]; + if (!join_expressions_actions_root_node->function) + throw Exception(ErrorCodes::INVALID_JOIN_ON_EXPRESSION, + "JOIN {} join expression expected function", + join_node.formatASTForErrorMessage()); + + size_t left_table_expression_columns_size = left_table_expression_columns.size(); + + Names join_left_actions_names; + join_left_actions_names.reserve(left_table_expression_columns_size); + + NameSet join_left_actions_names_set; + join_left_actions_names_set.reserve(left_table_expression_columns_size); + + for (const auto & left_table_expression_column : left_table_expression_columns) + { + join_left_actions_names.push_back(left_table_expression_column.name); + join_left_actions_names_set.insert(left_table_expression_column.name); + } + + size_t right_table_expression_columns_size = right_table_expression_columns.size(); + + Names join_right_actions_names; + join_right_actions_names.reserve(right_table_expression_columns_size); + + NameSet join_right_actions_names_set; + join_right_actions_names_set.reserve(right_table_expression_columns_size); + + for (const auto & right_table_expression_column : right_table_expression_columns) + { + join_right_actions_names.push_back(right_table_expression_column.name); + join_right_actions_names_set.insert(right_table_expression_column.name); + } + + JoinClausesAndActions result; + result.join_expression_actions = join_expression_actions; + + const auto & function_name = join_expressions_actions_root_node->function->getName(); + if (function_name == "or") + { + for (const auto & child : join_expressions_actions_root_node->children) + { + result.join_clauses.emplace_back(); + + buildJoinClause(join_expression_actions, + join_expression_dag_input_nodes, + child, + join_left_actions_names_set, + join_right_actions_names_set, + join_node, + result.join_clauses.back()); + } + } + else + { + result.join_clauses.emplace_back(); + + buildJoinClause(join_expression_actions, + join_expression_dag_input_nodes, + join_expressions_actions_root_node, + join_left_actions_names_set, + join_right_actions_names_set, + join_node, + result.join_clauses.back()); + } + + auto and_function = FunctionFactory::instance().get("and", planner_context->getQueryContext()); + + auto add_necessary_name_if_needed = [&](JoinTableSide join_table_side, const String & name) + { + auto & necessary_names = join_table_side == JoinTableSide::Left ? join_left_actions_names : join_right_actions_names; + auto & necessary_names_set = join_table_side == JoinTableSide::Left ? join_left_actions_names_set : join_right_actions_names_set; + + auto [_, inserted] = necessary_names_set.emplace(name); + if (inserted) + necessary_names.push_back(name); + }; + + for (auto & join_clause : result.join_clauses) + { + const auto & left_filter_condition_nodes = join_clause.getLeftFilterConditionNodes(); + if (!left_filter_condition_nodes.empty()) + { + const ActionsDAG::Node * dag_filter_condition_node = nullptr; + + if (left_filter_condition_nodes.size() > 1) + dag_filter_condition_node = &join_expression_actions->addFunction(and_function, left_filter_condition_nodes, {}); + else + dag_filter_condition_node = left_filter_condition_nodes[0]; + + join_clause.getLeftFilterConditionNodes() = {dag_filter_condition_node}; + join_expression_actions->addOrReplaceInOutputs(*dag_filter_condition_node); + + add_necessary_name_if_needed(JoinTableSide::Left, dag_filter_condition_node->result_name); + } + + const auto & right_filter_condition_nodes = join_clause.getRightFilterConditionNodes(); + if (!right_filter_condition_nodes.empty()) + { + const ActionsDAG::Node * dag_filter_condition_node = nullptr; + + if (right_filter_condition_nodes.size() > 1) + dag_filter_condition_node = &join_expression_actions->addFunction(and_function, right_filter_condition_nodes, {}); + else + dag_filter_condition_node = right_filter_condition_nodes[0]; + + join_clause.getRightFilterConditionNodes() = {dag_filter_condition_node}; + join_expression_actions->addOrReplaceInOutputs(*dag_filter_condition_node); + + add_necessary_name_if_needed(JoinTableSide::Right, dag_filter_condition_node->result_name); + } + + assert(join_clause.getLeftKeyNodes().size() == join_clause.getRightKeyNodes().size()); + size_t join_clause_key_nodes_size = join_clause.getLeftKeyNodes().size(); + + if (join_clause_key_nodes_size == 0) + throw Exception(ErrorCodes::INVALID_JOIN_ON_EXPRESSION, "JOIN {} cannot get JOIN keys", + join_node.formatASTForErrorMessage()); + + for (size_t i = 0; i < join_clause_key_nodes_size; ++i) + { + auto & left_key_node = join_clause.getLeftKeyNodes()[i]; + auto & right_key_node = join_clause.getRightKeyNodes()[i]; + + if (!left_key_node->result_type->equals(*right_key_node->result_type)) + { + DataTypePtr common_type; + + try + { + common_type = getLeastSupertype(DataTypes{left_key_node->result_type, right_key_node->result_type}); + } + catch (Exception & ex) + { + ex.addMessage("JOIN {} cannot infer common type in ON section for keys. Left key {} type {}. Right key {} type {}", + join_node.formatASTForErrorMessage(), + left_key_node->result_name, + left_key_node->result_type->getName(), + right_key_node->result_name, + right_key_node->result_type->getName()); + throw; + } + + auto cast_type_name = common_type->getName(); + Field cast_type_constant_value(cast_type_name); + + ColumnWithTypeAndName cast_column; + cast_column.name = calculateConstantActionNodeName(cast_type_constant_value); + cast_column.column = DataTypeString().createColumnConst(0, cast_type_constant_value); + cast_column.type = std::make_shared(); + + const ActionsDAG::Node * cast_type_constant_node = nullptr; + + if (!left_key_node->result_type->equals(*common_type)) + { + cast_type_constant_node = &join_expression_actions->addColumn(cast_column); + + FunctionCastBase::Diagnostic diagnostic = {left_key_node->result_name, left_key_node->result_name}; + FunctionOverloadResolverPtr func_builder_cast + = CastInternalOverloadResolver::createImpl(diagnostic); + + ActionsDAG::NodeRawConstPtrs children = {left_key_node, cast_type_constant_node}; + left_key_node = &join_expression_actions->addFunction(func_builder_cast, std::move(children), {}); + } + + if (!right_key_node->result_type->equals(*common_type)) + { + if (!cast_type_constant_node) + cast_type_constant_node = &join_expression_actions->addColumn(cast_column); + + FunctionCastBase::Diagnostic diagnostic = {right_key_node->result_name, right_key_node->result_name}; + FunctionOverloadResolverPtr func_builder_cast + = CastInternalOverloadResolver::createImpl(std::move(diagnostic)); + + ActionsDAG::NodeRawConstPtrs children = {right_key_node, cast_type_constant_node}; + right_key_node = &join_expression_actions->addFunction(func_builder_cast, std::move(children), {}); + } + } + + join_expression_actions->addOrReplaceInOutputs(*left_key_node); + join_expression_actions->addOrReplaceInOutputs(*right_key_node); + + add_necessary_name_if_needed(JoinTableSide::Left, left_key_node->result_name); + add_necessary_name_if_needed(JoinTableSide::Right, right_key_node->result_name); + } + } + + result.left_join_expressions_actions = join_expression_actions->clone(); + result.left_join_expressions_actions->removeUnusedActions(join_left_actions_names); + + result.right_join_expressions_actions = join_expression_actions->clone(); + result.right_join_expressions_actions->removeUnusedActions(join_right_actions_names); + + return result; +} + +} + +JoinClausesAndActions buildJoinClausesAndActions( + const ColumnsWithTypeAndName & left_table_expression_columns, + const ColumnsWithTypeAndName & right_table_expression_columns, + const QueryTreeNodePtr & join_node, + const PlannerContextPtr & planner_context) +{ + auto & join_node_typed = join_node->as(); + if (!join_node_typed.isOnJoinExpression()) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "JOIN {} join does not have ON section", + join_node_typed.formatASTForErrorMessage()); + + auto join_expression_input_columns = left_table_expression_columns; + join_expression_input_columns.insert(join_expression_input_columns.end(), right_table_expression_columns.begin(), right_table_expression_columns.end()); + + return buildJoinClausesAndActions(join_expression_input_columns, left_table_expression_columns, right_table_expression_columns, join_node_typed, planner_context); +} + +std::optional tryExtractConstantFromJoinNode(const QueryTreeNodePtr & join_node) +{ + auto & join_node_typed = join_node->as(); + if (!join_node_typed.getJoinExpression()) + return {}; + + auto constant_value = join_node_typed.getJoinExpression()->getConstantValueOrNull(); + if (!constant_value) + return {}; + + const auto & value = constant_value->getValue(); + auto constant_type = constant_value->getType(); + constant_type = removeNullable(removeLowCardinality(constant_type)); + + auto which_constant_type = WhichDataType(constant_type); + if (!which_constant_type.isUInt8() && !which_constant_type.isNothing()) + return {}; + + if (value.isNull()) + return false; + + UInt8 predicate_value = value.safeGet(); + return predicate_value > 0; +} + +namespace +{ + +void trySetStorageInTableJoin(const QueryTreeNodePtr & table_expression, std::shared_ptr & table_join) +{ + StoragePtr storage; + + if (auto * table_node = table_expression->as()) + storage = table_node->getStorage(); + else if (auto * table_function = table_expression->as()) + storage = table_function->getStorage(); + + auto storage_join = std::dynamic_pointer_cast(storage); + if (storage_join) + { + table_join->setStorageJoin(storage_join); + return; + } + + if (!table_join->isEnabledAlgorithm(JoinAlgorithm::DIRECT)) + return; + + if (auto storage_dictionary = std::dynamic_pointer_cast(storage); storage_dictionary) + table_join->setStorageJoin(std::dynamic_pointer_cast(storage_dictionary->getDictionary())); + else if (auto storage_key_value = std::dynamic_pointer_cast(storage); storage_key_value) + table_join->setStorageJoin(storage_key_value); +} + +std::shared_ptr tryDirectJoin(const std::shared_ptr & table_join, + const QueryTreeNodePtr & right_table_expression, + const Block & right_table_expression_header, + const PlannerContextPtr & planner_context) +{ + if (!table_join->isEnabledAlgorithm(JoinAlgorithm::DIRECT)) + return {}; + + auto storage = table_join->getStorageKeyValue(); + if (!storage) + return {}; + + bool allowed_inner = isInner(table_join->kind()) && table_join->strictness() == JoinStrictness::All; + bool allowed_left = isLeft(table_join->kind()) && (table_join->strictness() == JoinStrictness::Any || + table_join->strictness() == JoinStrictness::All || + table_join->strictness() == JoinStrictness::Semi || + table_join->strictness() == JoinStrictness::Anti); + if (!allowed_inner && !allowed_left) + return {}; + + const auto & clauses = table_join->getClauses(); + bool only_one_key = clauses.size() == 1 && + clauses[0].key_names_left.size() == 1 && + clauses[0].key_names_right.size() == 1 && + !clauses[0].on_filter_condition_left && + !clauses[0].on_filter_condition_right && + clauses[0].analyzer_left_filter_condition_column_name.empty() && + clauses[0].analyzer_right_filter_condition_column_name.empty(); + + if (!only_one_key) + return {}; + + const String & key_name = clauses[0].key_names_right[0]; + + auto & right_table_expression_data = planner_context->getTableExpressionDataOrThrow(right_table_expression); + const auto * table_column_name = right_table_expression_data.getColumnNameOrNull(key_name); + if (!table_column_name) + return {}; + + const auto & storage_primary_key = storage->getPrimaryKey(); + if (storage_primary_key.size() != 1 || storage_primary_key[0] != *table_column_name) + return {}; + + /** For right table expression during execution columns have unique name. + * Direct key value join implementation during storage querying must use storage column names. + * + * Example: + * CREATE DICTIONARY test_dictionary (id UInt64, value String) PRIMARY KEY id SOURCE(CLICKHOUSE(TABLE 'test_dictionary_table')) LIFETIME(0); + * SELECT t1.id FROM test_table AS t1 INNER JOIN test_dictionary AS t2 ON t1.id = t2.id; + * + * Unique execution name for `id` column from right table expression `test_dictionary AS t2` for example can be `t2.id_0`. + * Storage column name is `id`. + * + * Here we create header for right table expression with original storage column names. + */ + Block right_table_expression_header_with_storage_column_names; + + for (const auto & right_table_expression_column : right_table_expression_header) + { + const auto * table_column_name = right_table_expression_data.getColumnNameOrNull(right_table_expression_column.name); + if (!table_column_name) + return {}; + + auto right_table_expression_column_with_storage_column_name = right_table_expression_column; + right_table_expression_column_with_storage_column_name.name = *table_column_name; + right_table_expression_header_with_storage_column_names.insert(right_table_expression_column_with_storage_column_name); + } + + return std::make_shared(table_join, right_table_expression_header, storage, right_table_expression_header_with_storage_column_names); +} + +} + +std::shared_ptr chooseJoinAlgorithm(std::shared_ptr & table_join, + const QueryTreeNodePtr & right_table_expression, + const Block & right_table_expression_header, + const PlannerContextPtr & planner_context) +{ + trySetStorageInTableJoin(right_table_expression, table_join); + + /// JOIN with JOIN engine. + if (auto storage = table_join->getStorageJoin()) + return storage->getJoinLocked(table_join, planner_context->getQueryContext()); + + /** JOIN with constant. + * Example: SELECT * FROM test_table AS t1 INNER JOIN test_table AS t2 ON 1; + */ + if (table_join->isJoinWithConstant()) + { + if (!table_join->isEnabledAlgorithm(JoinAlgorithm::HASH)) + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "JOIN with constant supported only with join algorithm 'hash'"); + + return std::make_shared(table_join, right_table_expression_header); + } + + if (!table_join->oneDisjunct() && !table_join->isEnabledAlgorithm(JoinAlgorithm::HASH)) + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Only `hash` join supports multiple ORs for keys in JOIN ON section"); + + /// Direct JOIN with special storages that support key value access. For example JOIN with Dictionary + if (table_join->isEnabledAlgorithm(JoinAlgorithm::DIRECT)) + { + JoinPtr direct_join = tryDirectJoin(table_join, right_table_expression, right_table_expression_header, planner_context); + if (direct_join) + return direct_join; + } + + if (table_join->isEnabledAlgorithm(JoinAlgorithm::PARTIAL_MERGE) || + table_join->isEnabledAlgorithm(JoinAlgorithm::PREFER_PARTIAL_MERGE)) + { + if (MergeJoin::isSupported(table_join)) + return std::make_shared(table_join, right_table_expression_header); + } + + if (table_join->isEnabledAlgorithm(JoinAlgorithm::HASH) || + /// partial_merge is preferred, but can't be used for specified kind of join, fallback to hash + table_join->isEnabledAlgorithm(JoinAlgorithm::PREFER_PARTIAL_MERGE) || + table_join->isEnabledAlgorithm(JoinAlgorithm::PARALLEL_HASH)) + { + if (table_join->allowParallelHashJoin()) + { + auto query_context = planner_context->getQueryContext(); + return std::make_shared(query_context, table_join, query_context->getSettings().max_threads, right_table_expression_header); + } + + return std::make_shared(table_join, right_table_expression_header); + } + + if (table_join->isEnabledAlgorithm(JoinAlgorithm::FULL_SORTING_MERGE)) + { + if (FullSortingMergeJoin::isSupported(table_join)) + return std::make_shared(table_join, right_table_expression_header); + } + + if (table_join->isEnabledAlgorithm(JoinAlgorithm::AUTO)) + return std::make_shared(table_join, right_table_expression_header); + + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Can't execute any of specified algorithms for specified strictness/kind and right storage type"); +} + +} diff --git a/src/Planner/PlannerJoins.h b/src/Planner/PlannerJoins.h new file mode 100644 index 00000000000..d305249e789 --- /dev/null +++ b/src/Planner/PlannerJoins.h @@ -0,0 +1,196 @@ +#pragma once + +#include +#include + +#include +#include +#include + +#include + +namespace DB +{ + +/** Join clause represent single JOIN ON section clause. + * Join clause consists of JOIN keys and conditions. + * + * JOIN can contain multiple clauses in JOIN ON section. + * Example: SELECT * FROM test_table_1 AS t1 INNER JOIN test_table_2 AS t2 ON t1.id = t2.id OR t1.value = t2.value; + * t1.id = t2.id is first clause. + * t1.value = t2.value is second clause. + * + * JOIN ON section can also contain condition inside clause. + * Example: SELECT * FROM test_table_1 AS t1 INNER JOIN test_table_2 AS t2 ON t1.id = t2.id AND t1.id > 0 AND t2.id > 0; + * t1.id = t2.id AND t1.id > 0 AND t2.id > 0 is first clause. + * t1.id = t2.id is JOIN keys section. + * t1.id > 0 is left table condition. + * t2.id > 0 is right table condition. + * + * Additionally not only conditions, but JOIN keys can be represented as expressions. + * Example: SELECT * FROM test_table_1 AS t1 INNER JOIN test_table_2 AS t2 ON toString(t1.id) = toString(t2.id). + * toString(t1.id) = toString(t2.id) is JOIN keys section. Where toString(t1.id) is left key, and toString(t2.id) is right key. + * + * During query planning JOIN ON section represented using join clause structure. It is important to split + * keys and conditions. And for each action detect from which stream it can be performed. + * + * We have 2 streams, left stream and right stream. + * We split JOIN ON section expressions actions in two parts left join expression actions and right join expression actions. + * Left join expression actions must be used to calculate necessary actions for left stream. + * Right join expression actions must be used to calculate necessary actions for right stream. + */ +class PlannerContext; +using PlannerContextPtr = std::shared_ptr; + +struct ASOFCondition +{ + size_t key_index; + ASOFJoinInequality asof_inequality; +}; + +/// Single JOIN ON section clause representation +class JoinClause +{ +public: + /// Add keys + void addKey(const ActionsDAG::Node * left_key_node, const ActionsDAG::Node * right_key_node) + { + left_key_nodes.emplace_back(left_key_node); + right_key_nodes.emplace_back(right_key_node); + } + + void addASOFKey(const ActionsDAG::Node * left_key_node, const ActionsDAG::Node * right_key_node, ASOFJoinInequality asof_inequality) + { + left_key_nodes.emplace_back(left_key_node); + right_key_nodes.emplace_back(right_key_node); + asof_conditions.push_back(ASOFCondition{left_key_nodes.size() - 1, asof_inequality}); + } + + /// Add condition for table side + void addCondition(JoinTableSide table_side, const ActionsDAG::Node * condition_node) + { + auto & filter_condition_nodes = table_side == JoinTableSide::Left ? left_filter_condition_nodes : right_filter_condition_nodes; + filter_condition_nodes.push_back(condition_node); + } + + /// Get left key nodes + const ActionsDAG::NodeRawConstPtrs & getLeftKeyNodes() const + { + return left_key_nodes; + } + + /// Get left key nodes + ActionsDAG::NodeRawConstPtrs & getLeftKeyNodes() + { + return left_key_nodes; + } + + /// Get right key nodes + const ActionsDAG::NodeRawConstPtrs & getRightKeyNodes() const + { + return right_key_nodes; + } + + /// Get right key nodes + ActionsDAG::NodeRawConstPtrs & getRightKeyNodes() + { + return right_key_nodes; + } + + /// Returns true if JOIN clause has ASOF conditions, false otherwise + bool hasASOF() const + { + return !asof_conditions.empty(); + } + + /// Get ASOF conditions + const std::vector & getASOFConditions() const + { + return asof_conditions; + } + + /// Get left filter condition nodes + const ActionsDAG::NodeRawConstPtrs & getLeftFilterConditionNodes() const + { + return left_filter_condition_nodes; + } + + /// Get left filter condition nodes + ActionsDAG::NodeRawConstPtrs & getLeftFilterConditionNodes() + { + return left_filter_condition_nodes; + } + + /// Get right filter condition nodes + const ActionsDAG::NodeRawConstPtrs & getRightFilterConditionNodes() const + { + return right_filter_condition_nodes; + } + + /// Get right filter condition nodes + ActionsDAG::NodeRawConstPtrs & getRightFilterConditionNodes() + { + return right_filter_condition_nodes; + } + + /// Dump clause into buffer + void dump(WriteBuffer & buffer) const; + + /// Dump clause + String dump() const; + +private: + ActionsDAG::NodeRawConstPtrs left_key_nodes; + ActionsDAG::NodeRawConstPtrs right_key_nodes; + + std::vector asof_conditions; + + ActionsDAG::NodeRawConstPtrs left_filter_condition_nodes; + ActionsDAG::NodeRawConstPtrs right_filter_condition_nodes; +}; + +using JoinClauses = std::vector; + +struct JoinClausesAndActions +{ + /// Join clauses. Actions dag nodes point into join_expression_actions. + JoinClauses join_clauses; + /// Whole JOIN ON section expressions + ActionsDAGPtr join_expression_actions; + /// Left join expressions actions + ActionsDAGPtr left_join_expressions_actions; + /// Right join expressions actions + ActionsDAGPtr right_join_expressions_actions; +}; + +/** Calculate join clauses and actions for JOIN ON section. + * + * left_table_expression_columns - columns from left join stream. + * right_table_expression_columns - columns from right join stream. + * join_node - join query tree node. + * planner_context - planner context. + */ +JoinClausesAndActions buildJoinClausesAndActions( + const ColumnsWithTypeAndName & left_table_expression_columns, + const ColumnsWithTypeAndName & right_table_expression_columns, + const QueryTreeNodePtr & join_node, + const PlannerContextPtr & planner_context); + +/** Try extract boolean constant from JOIN expression. + * Example: SELECT * FROM test_table AS t1 INNER JOIN test_table AS t2 ON 1; + * Example: SELECT * FROM test_table AS t1 INNER JOIN test_table AS t2 ON 1 != 1; + * + * join_node - join query tree node. + */ +std::optional tryExtractConstantFromJoinNode(const QueryTreeNodePtr & join_node); + +/** Choose JOIN algorithm for table join, right table expression, right table expression header and planner context. + * Table join structure can be modified during JOIN algorithm choosing for special JOIN algorithms. + * For example JOIN with Dictionary engine, or JOIN with JOIN engine. + */ +std::shared_ptr chooseJoinAlgorithm(std::shared_ptr & table_join, + const QueryTreeNodePtr & right_table_expression, + const Block & right_table_expression_header, + const PlannerContextPtr & planner_context); + +} diff --git a/src/Planner/PlannerSorting.cpp b/src/Planner/PlannerSorting.cpp new file mode 100644 index 00000000000..5ae8bd1e21b --- /dev/null +++ b/src/Planner/PlannerSorting.cpp @@ -0,0 +1,157 @@ +#include + +#include + +#include + +#include + +#include +#include + +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int INVALID_WITH_FILL_EXPRESSION; +} + +namespace +{ + +std::pair extractWithFillValue(const QueryTreeNodePtr & node) +{ + const auto & constant_value = node->getConstantValue(); + + std::pair result; + result.first = constant_value.getValue(); + result.second = constant_value.getType(); + + if (!isColumnedAsNumber(result.second)) + throw Exception(ErrorCodes::INVALID_WITH_FILL_EXPRESSION, "WITH FILL expression must be constant with numeric type"); + + return result; +} + +std::pair> extractWithFillStepValue(const QueryTreeNodePtr & node) +{ + const auto & constant_value = node->getConstantValue(); + + const auto & constant_node_result_type = constant_value.getType(); + if (const auto * type_interval = typeid_cast(constant_node_result_type.get())) + return std::make_pair(constant_value.getValue(), type_interval->getKind()); + + if (!isColumnedAsNumber(constant_node_result_type)) + throw Exception(ErrorCodes::INVALID_WITH_FILL_EXPRESSION, "WITH FILL expression must be constant with numeric type"); + + return {constant_value.getValue(), {}}; +} + +FillColumnDescription extractWithFillDescription(const SortNode & sort_node) +{ + FillColumnDescription fill_column_description; + + if (sort_node.hasFillFrom()) + { + auto extract_result = extractWithFillValue(sort_node.getFillFrom()); + fill_column_description.fill_from = std::move(extract_result.first); + fill_column_description.fill_from_type = std::move(extract_result.second); + } + + if (sort_node.hasFillTo()) + { + auto extract_result = extractWithFillValue(sort_node.getFillTo()); + fill_column_description.fill_to = std::move(extract_result.first); + fill_column_description.fill_to_type = std::move(extract_result.second); + } + + if (sort_node.hasFillStep()) + { + auto extract_result = extractWithFillStepValue(sort_node.getFillStep()); + fill_column_description.fill_step = std::move(extract_result.first); + fill_column_description.step_kind = std::move(extract_result.second); + } + else + { + auto direction_value = sort_node.getSortDirection() == SortDirection::ASCENDING ? static_cast(1) : static_cast(-1); + fill_column_description.fill_step = Field(direction_value); + } + + if (applyVisitor(FieldVisitorAccurateEquals(), fill_column_description.fill_step, Field{0})) + throw Exception(ErrorCodes::INVALID_WITH_FILL_EXPRESSION, + "WITH FILL STEP value cannot be zero"); + + if (sort_node.getSortDirection() == SortDirection::ASCENDING) + { + if (applyVisitor(FieldVisitorAccurateLess(), fill_column_description.fill_step, Field{0})) + throw Exception(ErrorCodes::INVALID_WITH_FILL_EXPRESSION, + "WITH FILL STEP value cannot be negative for sorting in ascending direction"); + + if (!fill_column_description.fill_from.isNull() && !fill_column_description.fill_to.isNull() && + applyVisitor(FieldVisitorAccurateLess(), fill_column_description.fill_to, fill_column_description.fill_from)) + { + throw Exception(ErrorCodes::INVALID_WITH_FILL_EXPRESSION, + "WITH FILL TO value cannot be less than FROM value for sorting in ascending direction"); + } + } + else + { + if (applyVisitor(FieldVisitorAccurateLess(), Field{0}, fill_column_description.fill_step)) + throw Exception(ErrorCodes::INVALID_WITH_FILL_EXPRESSION, + "WITH FILL STEP value cannot be positive for sorting in descending direction"); + + if (!fill_column_description.fill_from.isNull() && !fill_column_description.fill_to.isNull() && + applyVisitor(FieldVisitorAccurateLess(), fill_column_description.fill_from, fill_column_description.fill_to)) + { + throw Exception(ErrorCodes::INVALID_WITH_FILL_EXPRESSION, + "WITH FILL FROM value cannot be less than TO value for sorting in descending direction"); + } + } + + return fill_column_description; +} + +} + +SortDescription extractSortDescription(const QueryTreeNodePtr & order_by_node, const PlannerContext & planner_context) +{ + auto & order_by_list_node = order_by_node->as(); + + SortDescription sort_column_description; + sort_column_description.reserve(order_by_list_node.getNodes().size()); + + for (const auto & sort_node : order_by_list_node.getNodes()) + { + auto & sort_node_typed = sort_node->as(); + + auto column_name = calculateActionNodeName(sort_node_typed.getExpression(), planner_context); + std::shared_ptr collator = sort_node_typed.getCollator(); + int direction = sort_node_typed.getSortDirection() == SortDirection::ASCENDING ? 1 : -1; + int nulls_direction = direction; + + auto nulls_sort_direction = sort_node_typed.getNullsSortDirection(); + if (nulls_sort_direction) + nulls_direction = *nulls_sort_direction == SortDirection::ASCENDING ? 1 : -1; + + if (sort_node_typed.withFill()) + { + FillColumnDescription fill_description = extractWithFillDescription(sort_node_typed); + sort_column_description.emplace_back(column_name, direction, nulls_direction, collator, true /*with_fill*/, fill_description); + } + else + { + sort_column_description.emplace_back(column_name, direction, nulls_direction, collator); + } + } + + const auto & settings = planner_context.getQueryContext()->getSettingsRef(); + sort_column_description.compile_sort_description = settings.compile_sort_description; + sort_column_description.min_count_to_compile_sort_description = settings.min_count_to_compile_sort_description; + + return sort_column_description; +} + +} diff --git a/src/Planner/PlannerSorting.h b/src/Planner/PlannerSorting.h new file mode 100644 index 00000000000..c4e4e634973 --- /dev/null +++ b/src/Planner/PlannerSorting.h @@ -0,0 +1,14 @@ +#pragma once + +#include + +#include + +namespace DB +{ + +/// Extract sort description from order by node +SortDescription extractSortDescription(const QueryTreeNodePtr & order_by_node, const PlannerContext & planner_context); + +} + diff --git a/src/Planner/PlannerWindowFunctions.cpp b/src/Planner/PlannerWindowFunctions.cpp new file mode 100644 index 00000000000..4fe60a18099 --- /dev/null +++ b/src/Planner/PlannerWindowFunctions.cpp @@ -0,0 +1,146 @@ +#include + +#include +#include + +#include + +#include +#include + +namespace DB +{ + +namespace +{ + +WindowDescription extractWindowDescriptionFromWindowNode(const QueryTreeNodePtr & node, const PlannerContext & planner_context) +{ + auto & window_node = node->as(); + + WindowDescription window_description; + window_description.window_name = calculateWindowNodeActionName(node, planner_context); + + for (const auto & partition_by_node : window_node.getPartitionBy().getNodes()) + { + auto partition_by_node_action_name = calculateActionNodeName(partition_by_node, planner_context); + auto partition_by_sort_column_description = SortColumnDescription(partition_by_node_action_name, 1 /* direction */, 1 /* nulls_direction */); + window_description.partition_by.push_back(std::move(partition_by_sort_column_description)); + } + + window_description.order_by = extractSortDescription(window_node.getOrderByNode(), planner_context); + + window_description.full_sort_description = window_description.partition_by; + window_description.full_sort_description.insert(window_description.full_sort_description.end(), window_description.order_by.begin(), window_description.order_by.end()); + + /// WINDOW frame is validated during query analysis stage + window_description.frame = window_node.getWindowFrame(); + + const auto & query_context = planner_context.getQueryContext(); + const auto & query_context_settings = query_context->getSettingsRef(); + + bool compile_sort_description = query_context_settings.compile_sort_description; + size_t min_count_to_compile_sort_description = query_context_settings.min_count_to_compile_sort_description; + + window_description.partition_by.compile_sort_description = compile_sort_description; + window_description.partition_by.min_count_to_compile_sort_description = min_count_to_compile_sort_description; + + window_description.order_by.compile_sort_description = compile_sort_description; + window_description.order_by.min_count_to_compile_sort_description = min_count_to_compile_sort_description; + + window_description.full_sort_description.compile_sort_description = compile_sort_description; + window_description.full_sort_description.min_count_to_compile_sort_description = min_count_to_compile_sort_description; + + return window_description; +} + +} + +std::vector extractWindowDescriptions(const QueryTreeNodes & window_function_nodes, const PlannerContext & planner_context) +{ + std::unordered_map window_name_to_description; + + for (const auto & window_function_node : window_function_nodes) + { + auto & window_function_node_typed = window_function_node->as(); + + auto function_window_description = extractWindowDescriptionFromWindowNode(window_function_node_typed.getWindowNode(), planner_context); + auto window_name = function_window_description.window_name; + + auto [it, _] = window_name_to_description.emplace(window_name, std::move(function_window_description)); + auto & window_description = it->second; + + WindowFunctionDescription window_function; + window_function.function_node = nullptr; + window_function.column_name = calculateActionNodeName(window_function_node, planner_context); + window_function.aggregate_function = window_function_node_typed.getAggregateFunction(); + + const auto & parameters_nodes = window_function_node_typed.getParameters().getNodes(); + window_function.function_parameters.reserve(parameters_nodes.size()); + + for (const auto & parameter_node : parameters_nodes) + { + /// Function parameters constness validated during analysis stage + window_function.function_parameters.push_back(parameter_node->getConstantValue().getValue()); + } + + const auto & arguments_nodes = window_function_node_typed.getArguments().getNodes(); + size_t arguments_nodes_size = arguments_nodes.size(); + + window_function.argument_names.reserve(arguments_nodes_size); + window_function.argument_types.reserve(arguments_nodes_size); + + for (const auto & argument_node : arguments_nodes) + { + String argument_node_name = calculateActionNodeName(argument_node, planner_context); + window_function.argument_names.emplace_back(std::move(argument_node_name)); + window_function.argument_types.emplace_back(argument_node->getResultType()); + } + + window_description.window_functions.push_back(window_function); + } + + std::vector result; + result.reserve(window_name_to_description.size()); + + for (auto && [_, window_description] : window_name_to_description) + result.push_back(std::move(window_description)); + + return result; +} + +void sortWindowDescriptions(std::vector & window_descriptions) +{ + auto window_description_comparator = [](const WindowDescription & lhs, const WindowDescription & rhs) + { + const auto & left = lhs.full_sort_description; + const auto & right = rhs.full_sort_description; + + for (size_t i = 0; i < std::min(left.size(), right.size()); ++i) + { + if (left[i].column_name < right[i].column_name) + return true; + else if (left[i].column_name > right[i].column_name) + return false; + else if (left[i].direction < right[i].direction) + return true; + else if (left[i].direction > right[i].direction) + return false; + else if (left[i].nulls_direction < right[i].nulls_direction) + return true; + else if (left[i].nulls_direction > right[i].nulls_direction) + return false; + + assert(left[i] == right[i]); + } + + /** Note that we check the length last, because we want to put together the + * sort orders that have common prefix but different length. + */ + return left.size() > right.size(); + }; + + ::sort(window_descriptions.begin(), window_descriptions.end(), window_description_comparator); +} + +} diff --git a/src/Planner/PlannerWindowFunctions.h b/src/Planner/PlannerWindowFunctions.h new file mode 100644 index 00000000000..1552ef5a71f --- /dev/null +++ b/src/Planner/PlannerWindowFunctions.h @@ -0,0 +1,20 @@ +#pragma once + +#include + +#include + +#include + +namespace DB +{ + +/// Extract window descriptions from window function nodes +std::vector extractWindowDescriptions(const QueryTreeNodes & window_function_nodes, const PlannerContext & planner_context); + +/** Try to sort window descriptions in such an order that the window with the longest + * sort description goes first, and all window that use its prefixes follow. + */ +void sortWindowDescriptions(std::vector & window_descriptions); + +} diff --git a/src/Planner/TableExpressionData.h b/src/Planner/TableExpressionData.h new file mode 100644 index 00000000000..0918c35a8ef --- /dev/null +++ b/src/Planner/TableExpressionData.h @@ -0,0 +1,186 @@ +#pragma once + +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + +using ColumnIdentifier = std::string; + +/** Table expression data is created for each table expression that take part in query. + * Table expression data has information about columns that participate in query, their name to identifier mapping, + * and additional table expression properties. + */ +class TableExpressionData +{ +public: + using ColumnNameToColumnIdentifier = std::unordered_map; + + using ColumnIdentifierToColumnName = std::unordered_map; + + /// Return true if column with name exists, false otherwise + bool hasColumn(const std::string & column_name) const + { + return alias_columns_names.contains(column_name) || columns_names.contains(column_name); + } + + /** Add column in table expression data. + * Column identifier must be created using global planner context. + * + * Logical error exception is thrown if column already exists. + */ + void addColumn(const NameAndTypePair & column, const ColumnIdentifier & column_identifier) + { + if (hasColumn(column.name)) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Column with name {} already exists"); + + columns_names.insert(column.name); + columns.push_back(column); + column_name_to_column_identifier.emplace(column.name, column_identifier); + column_identifier_to_column_name.emplace(column_identifier, column.name); + } + + /** Add column if it does not exists in table expression data. + * Column identifier must be created using global planner context. + */ + void addColumnIfNotExists(const NameAndTypePair & column, const ColumnIdentifier & column_identifier) + { + if (hasColumn(column.name)) + return; + + columns_names.insert(column.name); + columns.push_back(column); + column_name_to_column_identifier.emplace(column.name, column_identifier); + column_identifier_to_column_name.emplace(column_identifier, column.name); + } + + /// Add alias column name + void addAliasColumnName(const std::string & column_name) + { + alias_columns_names.insert(column_name); + } + + /// Get alias columns names + const NameSet & getAliasColumnsNames() const + { + return alias_columns_names; + } + + /// Get columns names + const NameSet & getColumnsNames() const + { + return columns_names; + } + + /// Get columns + const NamesAndTypesList & getColumns() const + { + return columns; + } + + /// Get column name to column identifier map + const ColumnNameToColumnIdentifier & getColumnNameToIdentifier() const + { + return column_name_to_column_identifier; + } + + /// Get column identifier to column name map + const ColumnNameToColumnIdentifier & getColumnIdentifierToColumnName() const + { + return column_identifier_to_column_name; + } + + /** Get column identifier for column name. + * Exception is thrown if there are no column identifier for column name. + */ + const ColumnIdentifier & getColumnIdentifierOrThrow(const std::string & column_name) const + { + auto it = column_name_to_column_identifier.find(column_name); + if (it == column_name_to_column_identifier.end()) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Column identifier for name {} does not exists", + column_name); + + return it->second; + } + + /** Get column identifier for column name. + * Null is returned if there are no column identifier for column name. + */ + const ColumnIdentifier * getColumnIdentifierOrNull(const std::string & column_name) const + { + auto it = column_name_to_column_identifier.find(column_name); + if (it == column_name_to_column_identifier.end()) + return nullptr; + + return &it->second; + } + + /** Get column name for column identifier. + * Exception is thrown if there are no column name for column identifier. + */ + const std::string & getColumnNameOrThrow(const ColumnIdentifier & column_identifier) const + { + auto it = column_identifier_to_column_name.find(column_identifier); + if (it == column_identifier_to_column_name.end()) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Column name for identifier {} does not exists", + column_identifier); + + return it->second; + } + + /** Get column name for column identifier. + * Null is returned if there are no column name for column identifier. + */ + const std::string * getColumnNameOrNull(const ColumnIdentifier & column_identifier) const + { + auto it = column_identifier_to_column_name.find(column_identifier); + if (it == column_identifier_to_column_name.end()) + return nullptr; + + return &it->second; + } + + /** Returns true if storage is remote, false otherwise. + * + * Valid only for table and table function node. + */ + bool isRemote() const + { + return is_remote; + } + + /// Set is storage remote value + void setIsRemote(bool is_remote_value) + { + is_remote = is_remote_value; + } + +private: + /// Valid for table, table function, query, union table expression nodes + NamesAndTypesList columns; + + /// Valid for table, table function, query, union table expression nodes + NameSet columns_names; + + /// Valid only for table table expression node + NameSet alias_columns_names; + + /// Valid for table, table function, query, union table expression nodes + ColumnNameToColumnIdentifier column_name_to_column_identifier; + + /// Valid for table, table function, query, union table expression nodes + ColumnIdentifierToColumnName column_identifier_to_column_name; + + /// Is storage remote + bool is_remote = false; +}; + +} diff --git a/src/Planner/Utils.cpp b/src/Planner/Utils.cpp new file mode 100644 index 00000000000..74918285453 --- /dev/null +++ b/src/Planner/Utils.cpp @@ -0,0 +1,314 @@ +#include + +#include +#include +#include + +#include + +#include + +#include + +#include +#include +#include +#include +#include +#include + +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int TYPE_MISMATCH; + extern const int LOGICAL_ERROR; +} + +String dumpQueryPlan(QueryPlan & query_plan) +{ + WriteBufferFromOwnString query_plan_buffer; + query_plan.explainPlan(query_plan_buffer, QueryPlan::ExplainPlanOptions{true, true, true, true}); + + return query_plan_buffer.str(); +} + +String dumpQueryPipeline(QueryPlan & query_plan) +{ + QueryPlan::ExplainPipelineOptions explain_pipeline; + WriteBufferFromOwnString query_pipeline_buffer; + query_plan.explainPipeline(query_pipeline_buffer, explain_pipeline); + + return query_pipeline_buffer.str(); +} + +Block buildCommonHeaderForUnion(const Blocks & queries_headers) +{ + size_t num_selects = queries_headers.size(); + Block common_header = queries_headers.front(); + size_t columns_size = common_header.columns(); + + for (size_t query_number = 1; query_number < num_selects; ++query_number) + { + if (queries_headers.at(query_number).columns() != columns_size) + throw Exception(ErrorCodes::TYPE_MISMATCH, + "Different number of columns in UNION elements: {} and {}", + common_header.dumpNames(), + queries_headers[query_number].dumpNames()); + } + + std::vector columns(num_selects); + + for (size_t column_number = 0; column_number < columns_size; ++column_number) + { + for (size_t i = 0; i < num_selects; ++i) + columns[i] = &queries_headers[i].getByPosition(column_number); + + ColumnWithTypeAndName & result_element = common_header.getByPosition(column_number); + result_element = getLeastSuperColumn(columns); + } + + return common_header; +} + +ASTPtr queryNodeToSelectQuery(const QueryTreeNodePtr & query_node) +{ + auto & query_node_typed = query_node->as(); + auto result_ast = query_node_typed.toAST(); + + while (true) + { + if (auto * select_query = result_ast->as()) + break; + else if (auto * select_with_union = result_ast->as()) + result_ast = select_with_union->list_of_selects->children.at(0); + else if (auto * subquery = result_ast->as()) + result_ast = subquery->children.at(0); + else + throw Exception(ErrorCodes::LOGICAL_ERROR, "Query node invalid conversion to select query"); + } + + if (result_ast == nullptr) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Query node invalid conversion to select query"); + + return result_ast; +} + +/** There are no limits on the maximum size of the result for the subquery. + * Since the result of the query is not the result of the entire query. + */ +ContextPtr buildSubqueryContext(const ContextPtr & context) +{ + /** The subquery in the IN / JOIN section does not have any restrictions on the maximum size of the result. + * Because the result of this query is not the result of the entire query. + * Constraints work instead + * max_rows_in_set, max_bytes_in_set, set_overflow_mode, + * max_rows_in_join, max_bytes_in_join, join_overflow_mode, + * which are checked separately (in the Set, Join objects). + */ + auto subquery_context = Context::createCopy(context); + Settings subquery_settings = context->getSettings(); + subquery_settings.max_result_rows = 0; + subquery_settings.max_result_bytes = 0; + /// The calculation of extremes does not make sense and is not necessary (if you do it, then the extremes of the subquery can be taken for whole query). + subquery_settings.extremes = false; + subquery_context->setSettings(subquery_settings); + + return subquery_context; +} + +namespace +{ + +StreamLocalLimits getLimitsForStorage(const Settings & settings, const SelectQueryOptions & options) +{ + StreamLocalLimits limits; + limits.mode = LimitsMode::LIMITS_TOTAL; + limits.size_limits = SizeLimits(settings.max_rows_to_read, settings.max_bytes_to_read, settings.read_overflow_mode); + limits.speed_limits.max_execution_time = settings.max_execution_time; + limits.timeout_overflow_mode = settings.timeout_overflow_mode; + + /** Quota and minimal speed restrictions are checked on the initiating server of the request, and not on remote servers, + * because the initiating server has a summary of the execution of the request on all servers. + * + * But limits on data size to read and maximum execution time are reasonable to check both on initiator and + * additionally on each remote server, because these limits are checked per block of data processed, + * and remote servers may process way more blocks of data than are received by initiator. + * + * The limits to throttle maximum execution speed is also checked on all servers. + */ + if (options.to_stage == QueryProcessingStage::Complete) + { + limits.speed_limits.min_execution_rps = settings.min_execution_speed; + limits.speed_limits.min_execution_bps = settings.min_execution_speed_bytes; + } + + limits.speed_limits.max_execution_rps = settings.max_execution_speed; + limits.speed_limits.max_execution_bps = settings.max_execution_speed_bytes; + limits.speed_limits.timeout_before_checking_execution_speed = settings.timeout_before_checking_execution_speed; + + return limits; +} + +} + +StorageLimits buildStorageLimits(const Context & context, const SelectQueryOptions & options) +{ + const auto & settings = context.getSettingsRef(); + + StreamLocalLimits limits; + SizeLimits leaf_limits; + + /// Set the limits and quota for reading data, the speed and time of the query. + if (!options.ignore_limits) + { + limits = getLimitsForStorage(settings, options); + leaf_limits = SizeLimits(settings.max_rows_to_read_leaf, settings.max_bytes_to_read_leaf, settings.read_overflow_mode_leaf); + } + + return {limits, leaf_limits}; +} + +ActionsDAGPtr buildActionsDAGFromExpressionNode(const QueryTreeNodePtr & expression_node, const ColumnsWithTypeAndName & input_columns, const PlannerContextPtr & planner_context) +{ + ActionsDAGPtr action_dag = std::make_shared(input_columns); + PlannerActionsVisitor actions_visitor(planner_context); + auto expression_dag_index_nodes = actions_visitor.visit(action_dag, expression_node); + action_dag->getOutputs() = std::move(expression_dag_index_nodes); + + return action_dag; +} + +bool sortDescriptionIsPrefix(const SortDescription & prefix, const SortDescription & full) +{ + size_t prefix_size = prefix.size(); + if (prefix_size > full.size()) + return false; + + for (size_t i = 0; i < prefix_size; ++i) + { + if (full[i] != prefix[i]) + return false; + } + + return true; +} + +bool queryHasArrayJoinInJoinTree(const QueryTreeNodePtr & query_node) +{ + const auto & query_node_typed = query_node->as(); + + std::vector join_tree_nodes_to_process; + join_tree_nodes_to_process.push_back(query_node_typed.getJoinTree()); + + while (!join_tree_nodes_to_process.empty()) + { + auto join_tree_node_to_process = join_tree_nodes_to_process.back(); + join_tree_nodes_to_process.pop_back(); + + auto join_tree_node_type = join_tree_node_to_process->getNodeType(); + + switch (join_tree_node_type) + { + case QueryTreeNodeType::TABLE: + [[fallthrough]]; + case QueryTreeNodeType::QUERY: + [[fallthrough]]; + case QueryTreeNodeType::UNION: + [[fallthrough]]; + case QueryTreeNodeType::TABLE_FUNCTION: + { + break; + } + case QueryTreeNodeType::ARRAY_JOIN: + { + return true; + } + case QueryTreeNodeType::JOIN: + { + auto & join_node = join_tree_node_to_process->as(); + join_tree_nodes_to_process.push_back(join_node.getLeftTableExpression()); + join_tree_nodes_to_process.push_back(join_node.getRightTableExpression()); + break; + } + default: + { + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Unexpected node type for table expression. Expected table, table function, query, union, join or array join. Actual {}", + join_tree_node_to_process->getNodeTypeName()); + } + } + } + + return false; +} + +bool queryHasWithTotalsInAnySubqueryInJoinTree(const QueryTreeNodePtr & query_node) +{ + const auto & query_node_typed = query_node->as(); + + std::vector join_tree_nodes_to_process; + join_tree_nodes_to_process.push_back(query_node_typed.getJoinTree()); + + while (!join_tree_nodes_to_process.empty()) + { + auto join_tree_node_to_process = join_tree_nodes_to_process.back(); + join_tree_nodes_to_process.pop_back(); + + auto join_tree_node_type = join_tree_node_to_process->getNodeType(); + + switch (join_tree_node_type) + { + case QueryTreeNodeType::TABLE: + [[fallthrough]]; + case QueryTreeNodeType::TABLE_FUNCTION: + { + break; + } + case QueryTreeNodeType::QUERY: + { + auto & query_node_to_process = join_tree_node_to_process->as(); + if (query_node_to_process.isGroupByWithTotals()) + return true; + + join_tree_nodes_to_process.push_back(query_node_to_process.getJoinTree()); + break; + } + case QueryTreeNodeType::UNION: + { + auto & union_node = join_tree_node_to_process->as(); + auto & union_queries = union_node.getQueries().getNodes(); + + for (auto & union_query : union_queries) + join_tree_nodes_to_process.push_back(union_query); + break; + } + case QueryTreeNodeType::ARRAY_JOIN: + { + auto & array_join_node = join_tree_node_to_process->as(); + join_tree_nodes_to_process.push_back(array_join_node.getTableExpression()); + break; + } + case QueryTreeNodeType::JOIN: + { + auto & join_node = join_tree_node_to_process->as(); + join_tree_nodes_to_process.push_back(join_node.getLeftTableExpression()); + join_tree_nodes_to_process.push_back(join_node.getRightTableExpression()); + break; + } + default: + { + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Unexpected node type for table expression. Expected table, table function, query, union, join or array join. Actual {}", + join_tree_node_to_process->getNodeTypeName()); + } + } + } + + return false; +} + +} diff --git a/src/Planner/Utils.h b/src/Planner/Utils.h new file mode 100644 index 00000000000..909cea3bf8f --- /dev/null +++ b/src/Planner/Utils.h @@ -0,0 +1,59 @@ +#pragma once + +#include +#include + +#include + +#include +#include + +#include + +#include + +#include + +#include + +namespace DB +{ + +/// Dump query plan +String dumpQueryPlan(QueryPlan & query_plan); + +/// Dump query plan result pipeline +String dumpQueryPipeline(QueryPlan & query_plan); + +/// Build common header for UNION query +Block buildCommonHeaderForUnion(const Blocks & queries_headers); + +/// Convert query node to ASTSelectQuery +ASTPtr queryNodeToSelectQuery(const QueryTreeNodePtr & query_node); + +/// Build context for subquery execution +ContextPtr buildSubqueryContext(const ContextPtr & context); + +/// Build limits for storage +StorageLimits buildStorageLimits(const Context & context, const SelectQueryOptions & options); + +/** Convert query tree expression node into actions dag. + * Inputs are not used for actions dag outputs. + * Only root query tree expression node is used as actions dag output. + */ +ActionsDAGPtr buildActionsDAGFromExpressionNode(const QueryTreeNodePtr & expression_node, + const ColumnsWithTypeAndName & input_columns, + const PlannerContextPtr & planner_context); + +/// Returns true if prefix sort description is prefix of full sort descriptor, false otherwise +bool sortDescriptionIsPrefix(const SortDescription & prefix, const SortDescription & full); + +/// Returns true if query node JOIN TREE contains ARRAY JOIN node, false otherwise +bool queryHasArrayJoinInJoinTree(const QueryTreeNodePtr & query_node); + +/** Returns true if query node JOIN TREE contains QUERY node with WITH TOTALS, false otherwise. + * Function is applied recursively to subqueries in JOIN TREE. + */ +bool queryHasWithTotalsInAnySubqueryInJoinTree(const QueryTreeNodePtr & query_node); + +} diff --git a/src/Planner/examples/CMakeLists.txt b/src/Planner/examples/CMakeLists.txt new file mode 100644 index 00000000000..e69de29bb2d diff --git a/src/Planner/tests/CMakeLists.txt b/src/Planner/tests/CMakeLists.txt new file mode 100644 index 00000000000..e69de29bb2d diff --git a/src/Processors/Executors/CompletedPipelineExecutor.cpp b/src/Processors/Executors/CompletedPipelineExecutor.cpp index 9e5ea3916bc..a4c7fe2f687 100644 --- a/src/Processors/Executors/CompletedPipelineExecutor.cpp +++ b/src/Processors/Executors/CompletedPipelineExecutor.cpp @@ -72,9 +72,9 @@ void CompletedPipelineExecutor::execute() data->executor = std::make_shared(pipeline.processors, pipeline.process_list_element); data->executor->setReadProgressCallback(pipeline.getReadProgressCallback()); - /// Avoid passing this to labmda, copy ptr to data instead. + /// Avoid passing this to lambda, copy ptr to data instead. /// Destructor of unique_ptr copy raw ptr into local variable first, only then calls object destructor. - auto func = [data_ptr = data.get(), num_threads = pipeline.getNumThreads(), thread_group = CurrentThread::getGroup()]() + auto func = [data_ptr = data.get(), num_threads = pipeline.getNumThreads(), thread_group = CurrentThread::getGroup()] { threadFunction(*data_ptr, thread_group, num_threads); }; diff --git a/src/Processors/Executors/CompletedPipelineExecutor.h b/src/Processors/Executors/CompletedPipelineExecutor.h index e616cd6a2b7..65fab6035b1 100644 --- a/src/Processors/Executors/CompletedPipelineExecutor.h +++ b/src/Processors/Executors/CompletedPipelineExecutor.h @@ -1,7 +1,9 @@ #pragma once + #include #include + namespace DB { diff --git a/src/Processors/Executors/ExecutingGraph.cpp b/src/Processors/Executors/ExecutingGraph.cpp index 651ede10cfd..4ab2c5b3802 100644 --- a/src/Processors/Executors/ExecutingGraph.cpp +++ b/src/Processors/Executors/ExecutingGraph.cpp @@ -10,17 +10,17 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } -ExecutingGraph::ExecutingGraph(Processors & processors_, bool profile_processors_) - : processors(processors_) +ExecutingGraph::ExecutingGraph(std::shared_ptr processors_, bool profile_processors_) + : processors(std::move(processors_)) , profile_processors(profile_processors_) { - uint64_t num_processors = processors.size(); + uint64_t num_processors = processors->size(); nodes.reserve(num_processors); /// Create nodes. for (uint64_t node = 0; node < num_processors; ++node) { - IProcessor * proc = processors[node].get(); + IProcessor * proc = processors->at(node).get(); processors_map[proc] = node; nodes.emplace_back(std::make_unique(proc, node)); } @@ -71,7 +71,7 @@ bool ExecutingGraph::addEdges(uint64_t node) } } - /// Add direct edges form output ports. + /// Add direct edges from output ports. auto & outputs = from->getOutputs(); auto from_output = nodes[node]->direct_edges.size(); @@ -109,10 +109,10 @@ bool ExecutingGraph::expandPipeline(std::stack & stack, uint64_t pid) { std::lock_guard guard(processors_mutex); - processors.insert(processors.end(), new_processors.begin(), new_processors.end()); + processors->insert(processors->end(), new_processors.begin(), new_processors.end()); } - uint64_t num_processors = processors.size(); + uint64_t num_processors = processors->size(); std::vector back_edges_sizes(num_processors, 0); std::vector direct_edge_sizes(num_processors, 0); @@ -126,7 +126,7 @@ bool ExecutingGraph::expandPipeline(std::stack & stack, uint64_t pid) while (nodes.size() < num_processors) { - auto * processor = processors[nodes.size()].get(); + auto * processor = processors->at(nodes.size()).get(); if (processors_map.contains(processor)) throw Exception(ErrorCodes::LOGICAL_ERROR, "Processor {} was already added to pipeline", processor->getName()); @@ -386,7 +386,7 @@ bool ExecutingGraph::updateNode(uint64_t pid, Queue & queue, Queue & async_queue void ExecutingGraph::cancel() { std::lock_guard guard(processors_mutex); - for (auto & processor : processors) + for (auto & processor : *processors) processor->cancel(); } diff --git a/src/Processors/Executors/ExecutingGraph.h b/src/Processors/Executors/ExecutingGraph.h index 587a2561ae0..b374f968122 100644 --- a/src/Processors/Executors/ExecutingGraph.h +++ b/src/Processors/Executors/ExecutingGraph.h @@ -1,4 +1,5 @@ #pragma once + #include #include #include @@ -6,6 +7,7 @@ #include #include + namespace DB { @@ -123,9 +125,9 @@ public: using ProcessorsMap = std::unordered_map; ProcessorsMap processors_map; - explicit ExecutingGraph(Processors & processors_, bool profile_processors_); + explicit ExecutingGraph(std::shared_ptr processors_, bool profile_processors_); - const Processors & getProcessors() const { return processors; } + const Processors & getProcessors() const { return *processors; } /// Traverse graph the first time to update all the childless nodes. void initializeExecution(Queue & queue); @@ -149,7 +151,7 @@ private: /// All new nodes and nodes with updated ports are pushed into stack. bool expandPipeline(std::stack & stack, uint64_t pid); - Processors & processors; + std::shared_ptr processors; std::mutex processors_mutex; UpgradableMutex nodes_mutex; diff --git a/src/Processors/Executors/PipelineExecutor.cpp b/src/Processors/Executors/PipelineExecutor.cpp index ae20d97604b..3772381de04 100644 --- a/src/Processors/Executors/PipelineExecutor.cpp +++ b/src/Processors/Executors/PipelineExecutor.cpp @@ -15,6 +15,7 @@ #include #endif + namespace DB { @@ -24,8 +25,8 @@ namespace ErrorCodes } -PipelineExecutor::PipelineExecutor(Processors & processors, QueryStatus * elem) - : process_list_element(elem) +PipelineExecutor::PipelineExecutor(std::shared_ptr & processors, QueryStatusPtr elem) + : process_list_element(std::move(elem)) { if (process_list_element) { @@ -41,7 +42,7 @@ PipelineExecutor::PipelineExecutor(Processors & processors, QueryStatus * elem) /// If exception was thrown while pipeline initialization, it means that query pipeline was not build correctly. /// It is logical error, and we need more information about pipeline. WriteBufferFromOwnString buf; - printPipeline(processors, buf); + printPipeline(*processors, buf); buf.finalize(); exception.addMessage("Query pipeline:\n" + buf.str()); diff --git a/src/Processors/Executors/PipelineExecutor.h b/src/Processors/Executors/PipelineExecutor.h index cea64d309fa..21bde312cbc 100644 --- a/src/Processors/Executors/PipelineExecutor.h +++ b/src/Processors/Executors/PipelineExecutor.h @@ -10,16 +10,19 @@ #include #include + namespace DB { class QueryStatus; +using QueryStatusPtr = std::shared_ptr; class ExecutingGraph; using ExecutingGraphPtr = std::unique_ptr; class ReadProgressCallback; using ReadProgressCallbackPtr = std::unique_ptr; + /// Executes query pipeline. class PipelineExecutor { @@ -30,7 +33,7 @@ public: /// During pipeline execution new processors can appear. They will be added to existing set. /// /// Explicit graph representation is built in constructor. Throws if graph is not correct. - explicit PipelineExecutor(Processors & processors, QueryStatus * elem); + explicit PipelineExecutor(std::shared_ptr & processors, QueryStatusPtr elem); ~PipelineExecutor(); /// Execute pipeline in multiple threads. Must be called once. @@ -79,7 +82,7 @@ private: Poco::Logger * log = &Poco::Logger::get("PipelineExecutor"); /// Now it's used to check if query was killed. - QueryStatus * const process_list_element = nullptr; + QueryStatusPtr process_list_element; ReadProgressCallbackPtr read_progress_callback; diff --git a/src/Processors/Executors/PushingAsyncPipelineExecutor.cpp b/src/Processors/Executors/PushingAsyncPipelineExecutor.cpp index 7a55d26f16c..ee8e94b6f28 100644 --- a/src/Processors/Executors/PushingAsyncPipelineExecutor.cpp +++ b/src/Processors/Executors/PushingAsyncPipelineExecutor.cpp @@ -129,7 +129,7 @@ PushingAsyncPipelineExecutor::PushingAsyncPipelineExecutor(QueryPipeline & pipel pushing_source = std::make_shared(pipeline.input->getHeader()); connect(pushing_source->getPort(), *pipeline.input); - pipeline.processors.emplace_back(pushing_source); + pipeline.processors->emplace_back(pushing_source); } PushingAsyncPipelineExecutor::~PushingAsyncPipelineExecutor() diff --git a/src/Processors/Executors/PushingPipelineExecutor.cpp b/src/Processors/Executors/PushingPipelineExecutor.cpp index bf43cd327fe..d9a14704cd0 100644 --- a/src/Processors/Executors/PushingPipelineExecutor.cpp +++ b/src/Processors/Executors/PushingPipelineExecutor.cpp @@ -58,7 +58,7 @@ PushingPipelineExecutor::PushingPipelineExecutor(QueryPipeline & pipeline_) : pi pushing_source = std::make_shared(pipeline.input->getHeader(), input_wait_flag); connect(pushing_source->getPort(), *pipeline.input); - pipeline.processors.emplace_back(pushing_source); + pipeline.processors->emplace_back(pushing_source); } PushingPipelineExecutor::~PushingPipelineExecutor() diff --git a/src/Processors/Formats/IRowInputFormat.cpp b/src/Processors/Formats/IRowInputFormat.cpp index 52395338279..6f153019df5 100644 --- a/src/Processors/Formats/IRowInputFormat.cpp +++ b/src/Processors/Formats/IRowInputFormat.cpp @@ -188,7 +188,7 @@ Chunk IRowInputFormat::generate() } e.setFileName(getFileNameFromReadBuffer(getReadBuffer())); - e.setLineNumber(total_rows); + e.setLineNumber(static_cast(total_rows)); e.addMessage(verbose_diagnostic); throw; } @@ -232,7 +232,9 @@ Chunk IRowInputFormat::generate() return {}; } - finalizeObjectColumns(columns); + for (const auto & column : columns) + column->finalize(); + Chunk chunk(std::move(columns), num_rows); return chunk; } diff --git a/src/Processors/Formats/ISchemaReader.cpp b/src/Processors/Formats/ISchemaReader.cpp index 0e4d3f091b2..a26ed6b0b40 100644 --- a/src/Processors/Formats/ISchemaReader.cpp +++ b/src/Processors/Formats/ISchemaReader.cpp @@ -132,6 +132,16 @@ NamesAndTypesList IRowSchemaReader::readSchema() ErrorCodes::INCORRECT_DATA, "The number of column names {} differs with the number of types {}", column_names.size(), data_types.size()); } + else + { + std::unordered_set names_set; + for (const auto & name : column_names) + { + if (names_set.contains(name)) + throw Exception(ErrorCodes::INCORRECT_DATA, "Duplicate column name found while schema inference: \"{}\"", name); + names_set.insert(name); + } + } for (size_t i = 0; i != column_names.size(); ++i) { @@ -224,6 +234,9 @@ NamesAndTypesList IRowWithNamesSchemaReader::readSchema() names_order.reserve(names_and_types.size()); for (const auto & [name, type] : names_and_types) { + if (names_to_types.contains(name)) + throw Exception(ErrorCodes::INCORRECT_DATA, "Duplicate column name found while schema inference: \"{}\"", name); + auto hint_it = hints.find(name); if (hint_it != hints.end()) names_to_types[name] = hint_it->second; @@ -240,8 +253,13 @@ NamesAndTypesList IRowWithNamesSchemaReader::readSchema() /// We reached eof. break; + std::unordered_set names_set; /// We should check for duplicate column names in current row for (auto & [name, new_type] : new_names_and_types) { + if (names_set.contains(name)) + throw Exception(ErrorCodes::INCORRECT_DATA, "Duplicate column name found while schema inference: \"{}\"", name); + names_set.insert(name); + auto it = names_to_types.find(name); /// If we didn't see this column before, just add it. if (it == names_to_types.end()) diff --git a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp index 70510a165e6..e9b01ec7dda 100644 --- a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp +++ b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp @@ -82,7 +82,7 @@ static ColumnWithTypeAndName readColumnWithNumericData(std::shared_ptr(*internal_column).getData(); column_data.reserve(arrow_column->length()); - for (size_t chunk_i = 0, num_chunks = static_cast(arrow_column->num_chunks()); chunk_i < num_chunks; ++chunk_i) + for (int chunk_i = 0, num_chunks = arrow_column->num_chunks(); chunk_i < num_chunks; ++chunk_i) { std::shared_ptr chunk = arrow_column->chunk(chunk_i); if (chunk->length() == 0) @@ -108,7 +108,7 @@ static ColumnWithTypeAndName readColumnWithStringData(std::shared_ptr & column_offsets = assert_cast(*internal_column).getOffsets(); size_t chars_t_size = 0; - for (size_t chunk_i = 0, num_chunks = static_cast(arrow_column->num_chunks()); chunk_i < num_chunks; ++chunk_i) + for (int chunk_i = 0, num_chunks = arrow_column->num_chunks(); chunk_i < num_chunks; ++chunk_i) { ArrowArray & chunk = dynamic_cast(*(arrow_column->chunk(chunk_i))); const size_t chunk_length = chunk.length(); @@ -123,7 +123,7 @@ static ColumnWithTypeAndName readColumnWithStringData(std::shared_ptrlength()); - for (size_t chunk_i = 0, num_chunks = static_cast(arrow_column->num_chunks()); chunk_i < num_chunks; ++chunk_i) + for (int chunk_i = 0, num_chunks = arrow_column->num_chunks(); chunk_i < num_chunks; ++chunk_i) { ArrowArray & chunk = dynamic_cast(*(arrow_column->chunk(chunk_i))); std::shared_ptr buffer = chunk.value_data(); @@ -151,7 +151,7 @@ static ColumnWithTypeAndName readColumnWithBooleanData(std::shared_ptr &>(*internal_column).getData(); column_data.reserve(arrow_column->length()); - for (size_t chunk_i = 0, num_chunks = static_cast(arrow_column->num_chunks()); chunk_i < num_chunks; ++chunk_i) + for (int chunk_i = 0, num_chunks = arrow_column->num_chunks(); chunk_i < num_chunks; ++chunk_i) { arrow::BooleanArray & chunk = dynamic_cast(*(arrow_column->chunk(chunk_i))); if (chunk.length() == 0) @@ -173,7 +173,7 @@ static ColumnWithTypeAndName readColumnWithDate32Data(std::shared_ptr & column_data = assert_cast &>(*internal_column).getData(); column_data.reserve(arrow_column->length()); - for (size_t chunk_i = 0, num_chunks = static_cast(arrow_column->num_chunks()); chunk_i < num_chunks; ++chunk_i) + for (int chunk_i = 0, num_chunks = arrow_column->num_chunks(); chunk_i < num_chunks; ++chunk_i) { arrow::Date32Array & chunk = dynamic_cast(*(arrow_column->chunk(chunk_i))); @@ -198,7 +198,7 @@ static ColumnWithTypeAndName readColumnWithDate64Data(std::shared_ptr &>(*internal_column).getData(); column_data.reserve(arrow_column->length()); - for (size_t chunk_i = 0, num_chunks = static_cast(arrow_column->num_chunks()); chunk_i < num_chunks; ++chunk_i) + for (int chunk_i = 0, num_chunks = arrow_column->num_chunks(); chunk_i < num_chunks; ++chunk_i) { auto & chunk = dynamic_cast(*(arrow_column->chunk(chunk_i))); for (size_t value_i = 0, length = static_cast(chunk.length()); value_i < length; ++value_i) @@ -219,7 +219,7 @@ static ColumnWithTypeAndName readColumnWithTimestampData(std::shared_ptr &>(*internal_column).getData(); column_data.reserve(arrow_column->length()); - for (size_t chunk_i = 0, num_chunks = static_cast(arrow_column->num_chunks()); chunk_i < num_chunks; ++chunk_i) + for (int chunk_i = 0, num_chunks = arrow_column->num_chunks(); chunk_i < num_chunks; ++chunk_i) { const auto & chunk = dynamic_cast(*(arrow_column->chunk(chunk_i))); for (size_t value_i = 0, length = static_cast(chunk.length()); value_i < length; ++value_i) @@ -239,7 +239,7 @@ static ColumnWithTypeAndName readColumnWithTimeData(std::shared_ptrcreateColumn(); internal_column->reserve(arrow_column->length()); - for (size_t chunk_i = 0, num_chunks = static_cast(arrow_column->num_chunks()); chunk_i < num_chunks; ++chunk_i) + for (int chunk_i = 0, num_chunks = arrow_column->num_chunks(); chunk_i < num_chunks; ++chunk_i) { auto & chunk = dynamic_cast(*(arrow_column->chunk(chunk_i))); if (chunk.length() == 0) @@ -272,7 +272,7 @@ static ColumnWithTypeAndName readColumnWithDecimalDataImpl(std::shared_ptrlength()); - for (size_t chunk_i = 0, num_chunks = static_cast(arrow_column->num_chunks()); chunk_i < num_chunks; ++chunk_i) + for (int chunk_i = 0, num_chunks = arrow_column->num_chunks(); chunk_i < num_chunks; ++chunk_i) { auto & chunk = dynamic_cast(*(arrow_column->chunk(chunk_i))); for (size_t value_i = 0, length = static_cast(chunk.length()); value_i < length; ++value_i) @@ -308,7 +308,7 @@ static ColumnPtr readByteMapFromArrowColumn(std::shared_ptr PaddedPODArray & bytemap_data = assert_cast &>(*nullmap_column).getData(); bytemap_data.reserve(arrow_column->length()); - for (size_t chunk_i = 0; chunk_i != static_cast(arrow_column->num_chunks()); ++chunk_i) + for (int chunk_i = 0; chunk_i != arrow_column->num_chunks(); ++chunk_i) { std::shared_ptr chunk = arrow_column->chunk(chunk_i); @@ -324,7 +324,7 @@ static ColumnPtr readOffsetsFromArrowListColumn(std::shared_ptr &>(*offsets_column).getData(); offsets_data.reserve(arrow_column->length()); - for (size_t chunk_i = 0, num_chunks = static_cast(arrow_column->num_chunks()); chunk_i < num_chunks; ++chunk_i) + for (int chunk_i = 0, num_chunks = arrow_column->num_chunks(); chunk_i < num_chunks; ++chunk_i) { arrow::ListArray & list_chunk = dynamic_cast(*(arrow_column->chunk(chunk_i))); auto arrow_offsets_array = list_chunk.offsets(); @@ -356,7 +356,7 @@ static ColumnWithTypeAndName readColumnWithIndexesDataImpl(std::shared_ptrlength()); NumericType shift = is_nullable ? 2 : 1; - for (size_t chunk_i = 0, num_chunks = static_cast(arrow_column->num_chunks()); chunk_i < num_chunks; ++chunk_i) + for (int chunk_i = 0, num_chunks = arrow_column->num_chunks(); chunk_i < num_chunks; ++chunk_i) { std::shared_ptr chunk = arrow_column->chunk(chunk_i); if (chunk->length() == 0) @@ -450,7 +450,8 @@ static ColumnPtr readColumnWithIndexesData(std::shared_ptr # define DISPATCH(ARROW_NUMERIC_TYPE, CPP_NUMERIC_TYPE) \ case ARROW_NUMERIC_TYPE: \ { \ - return readColumnWithIndexesDataImpl(arrow_column, "", default_value_index, dict_size, is_nullable).column; \ + return readColumnWithIndexesDataImpl(\ + arrow_column, "", default_value_index, static_cast(dict_size), is_nullable).column; \ } FOR_ARROW_INDEXES_TYPES(DISPATCH) # undef DISPATCH @@ -463,7 +464,7 @@ static std::shared_ptr getNestedArrowColumn(std::shared_ptr { arrow::ArrayVector array_vector; array_vector.reserve(arrow_column->num_chunks()); - for (size_t chunk_i = 0, num_chunks = static_cast(arrow_column->num_chunks()); chunk_i < num_chunks; ++chunk_i) + for (int chunk_i = 0, num_chunks = arrow_column->num_chunks(); chunk_i < num_chunks; ++chunk_i) { arrow::ListArray & list_chunk = dynamic_cast(*(arrow_column->chunk(chunk_i))); std::shared_ptr chunk = list_chunk.values(); @@ -582,7 +583,7 @@ static ColumnWithTypeAndName readColumnFromArrowColumn( auto arrow_type = arrow_column->type(); auto * arrow_struct_type = assert_cast(arrow_type.get()); std::vector nested_arrow_columns(arrow_struct_type->num_fields()); - for (size_t chunk_i = 0, num_chunks = static_cast(arrow_column->num_chunks()); chunk_i < num_chunks; ++chunk_i) + for (int chunk_i = 0, num_chunks = arrow_column->num_chunks(); chunk_i < num_chunks; ++chunk_i) { arrow::StructArray & struct_chunk = dynamic_cast(*(arrow_column->chunk(chunk_i))); for (int i = 0; i < arrow_struct_type->num_fields(); ++i) @@ -631,7 +632,7 @@ static ColumnWithTypeAndName readColumnFromArrowColumn( if (!dict_info.values) { arrow::ArrayVector dict_array; - for (size_t chunk_i = 0, num_chunks = static_cast(arrow_column->num_chunks()); chunk_i < num_chunks; ++chunk_i) + for (int chunk_i = 0, num_chunks = arrow_column->num_chunks(); chunk_i < num_chunks; ++chunk_i) { arrow::DictionaryArray & dict_chunk = dynamic_cast(*(arrow_column->chunk(chunk_i))); dict_array.emplace_back(dict_chunk.dictionary()); @@ -656,7 +657,7 @@ static ColumnWithTypeAndName readColumnFromArrowColumn( } arrow::ArrayVector indexes_array; - for (size_t chunk_i = 0, num_chunks = static_cast(arrow_column->num_chunks()); chunk_i < num_chunks; ++chunk_i) + for (int chunk_i = 0, num_chunks = arrow_column->num_chunks(); chunk_i < num_chunks; ++chunk_i) { arrow::DictionaryArray & dict_chunk = dynamic_cast(*(arrow_column->chunk(chunk_i))); indexes_array.emplace_back(dict_chunk.indices()); diff --git a/src/Processors/Formats/Impl/AvroRowInputFormat.cpp b/src/Processors/Formats/Impl/AvroRowInputFormat.cpp index 0ec04c61321..80183838277 100644 --- a/src/Processors/Formats/Impl/AvroRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/AvroRowInputFormat.cpp @@ -283,14 +283,15 @@ AvroDeserializer::DeserializeFn AvroDeserializer::createDeserializeFn(avro::Node if (root_node->leaves() == 2 && (root_node->leafAt(0)->type() == avro::AVRO_NULL || root_node->leafAt(1)->type() == avro::AVRO_NULL)) { - size_t non_null_union_index = root_node->leafAt(0)->type() == avro::AVRO_NULL ? 1 : 0; + int non_null_union_index = root_node->leafAt(0)->type() == avro::AVRO_NULL ? 1 : 0; if (target.isNullable()) { - auto nested_deserialize = this->createDeserializeFn(root_node->leafAt(non_null_union_index), removeNullable(target_type)); + auto nested_deserialize = this->createDeserializeFn( + root_node->leafAt(non_null_union_index), removeNullable(target_type)); return [non_null_union_index, nested_deserialize](IColumn & column, avro::Decoder & decoder) { ColumnNullable & col = assert_cast(column); - size_t union_index = decoder.decodeUnionIndex(); + int union_index = static_cast(decoder.decodeUnionIndex()); if (union_index == non_null_union_index) { nested_deserialize(col.getNestedColumn(), decoder); @@ -308,7 +309,7 @@ AvroDeserializer::DeserializeFn AvroDeserializer::createDeserializeFn(avro::Node auto nested_deserialize = this->createDeserializeFn(root_node->leafAt(non_null_union_index), target_type); return [non_null_union_index, nested_deserialize](IColumn & column, avro::Decoder & decoder) { - size_t union_index = decoder.decodeUnionIndex(); + int union_index = static_cast(decoder.decodeUnionIndex()); if (union_index == non_null_union_index) nested_deserialize(column, decoder); else @@ -345,7 +346,8 @@ AvroDeserializer::DeserializeFn AvroDeserializer::createDeserializeFn(avro::Node if (target.isString()) { std::vector symbols; - for (size_t i = 0; i < root_node->names(); ++i) + symbols.reserve(root_node->names()); + for (int i = 0; i < static_cast(root_node->names()); ++i) { symbols.push_back(root_node->nameAt(i)); } @@ -360,7 +362,7 @@ AvroDeserializer::DeserializeFn AvroDeserializer::createDeserializeFn(avro::Node { const auto & enum_type = dynamic_cast(*target_type); Row symbol_mapping; - for (size_t i = 0; i < root_node->names(); ++i) + for (int i = 0; i < static_cast(root_node->names()); ++i) { symbol_mapping.push_back(enum_type.castToValue(root_node->nameAt(i))); } @@ -397,7 +399,7 @@ AvroDeserializer::DeserializeFn AvroDeserializer::createDeserializeFn(avro::Node if (root_node->leaves() != nested_types.size()) throw Exception(ErrorCodes::INCORRECT_DATA, "The number of leaves in record doesn't match the number of elements in tuple"); - for (size_t i = 0; i != root_node->leaves(); ++i) + for (int i = 0; i != static_cast(root_node->leaves()); ++i) { const auto & name = root_node->nameAt(i); size_t pos = tuple_type.getPositionByName(name); @@ -505,7 +507,8 @@ AvroDeserializer::SkipFn AvroDeserializer::createSkipFn(avro::NodePtr root_node) case avro::AVRO_UNION: { std::vector union_skip_fns; - for (size_t i = 0; i < root_node->leaves(); ++i) + union_skip_fns.reserve(root_node->leaves()); + for (int i = 0; i < static_cast(root_node->leaves()); ++i) { union_skip_fns.push_back(createSkipFn(root_node->leafAt(i))); } @@ -546,7 +549,8 @@ AvroDeserializer::SkipFn AvroDeserializer::createSkipFn(avro::NodePtr root_node) case avro::AVRO_RECORD: { std::vector field_skip_fns; - for (size_t i = 0; i < root_node->leaves(); ++i) + field_skip_fns.reserve(root_node->leaves()); + for (int i = 0; i < static_cast(root_node->leaves()); ++i) { field_skip_fns.push_back(createSkipFn(root_node->leafAt(i))); } @@ -633,7 +637,7 @@ AvroDeserializer::Action AvroDeserializer::createAction(const Block & header, co const auto & column = header.getByPosition(target_column_idx); try { - AvroDeserializer::Action action(target_column_idx, createDeserializeFn(node, column.type)); + AvroDeserializer::Action action(static_cast(target_column_idx), createDeserializeFn(node, column.type)); column_found[target_column_idx] = true; return action; } @@ -646,7 +650,7 @@ AvroDeserializer::Action AvroDeserializer::createAction(const Block & header, co else if (node->type() == avro::AVRO_RECORD) { std::vector field_actions(node->leaves()); - for (size_t i = 0; i < node->leaves(); ++i) + for (int i = 0; i < static_cast(node->leaves()); ++i) { const auto & field_node = node->leafAt(i); const auto & field_name = node->nameAt(i); @@ -657,7 +661,7 @@ AvroDeserializer::Action AvroDeserializer::createAction(const Block & header, co else if (node->type() == avro::AVRO_UNION) { std::vector branch_actions(node->leaves()); - for (size_t i = 0; i < node->leaves(); ++i) + for (int i = 0; i < static_cast(node->leaves()); ++i) { const auto & branch_node = node->leafAt(i); const auto & branch_name = nodeName(branch_node); @@ -687,7 +691,7 @@ AvroDeserializer::Action AvroDeserializer::createAction(const Block & header, co /// Create nested deserializer for each nested column. std::vector nested_deserializers; std::vector nested_indexes; - for (size_t i = 0; i != nested_avro_node->leaves(); ++i) + for (int i = 0; i != static_cast(nested_avro_node->leaves()); ++i) { const auto & name = nested_avro_node->nameAt(i); if (!nested_types.contains(name)) @@ -970,7 +974,7 @@ NamesAndTypesList AvroSchemaReader::readSchema() throw Exception("Root schema must be a record", ErrorCodes::TYPE_MISMATCH); NamesAndTypesList names_and_types; - for (size_t i = 0; i != root_node->leaves(); ++i) + for (int i = 0; i != static_cast(root_node->leaves()); ++i) names_and_types.emplace_back(root_node->nameAt(i), avroNodeToDataType(root_node->leafAt(i))); return names_and_types; @@ -999,14 +1003,14 @@ DataTypePtr AvroSchemaReader::avroNodeToDataType(avro::NodePtr node) if (node->names() < 128) { EnumValues::Values values; - for (size_t i = 0; i != node->names(); ++i) + for (int i = 0; i != static_cast(node->names()); ++i) values.emplace_back(node->nameAt(i), i); return std::make_shared(std::move(values)); } else if (node->names() < 32768) { EnumValues::Values values; - for (size_t i = 0; i != node->names(); ++i) + for (int i = 0; i != static_cast(node->names()); ++i) values.emplace_back(node->nameAt(i), i); return std::make_shared(std::move(values)); } @@ -1022,7 +1026,7 @@ DataTypePtr AvroSchemaReader::avroNodeToDataType(avro::NodePtr node) case avro::Type::AVRO_UNION: if (node->leaves() == 2 && (node->leafAt(0)->type() == avro::Type::AVRO_NULL || node->leafAt(1)->type() == avro::Type::AVRO_NULL)) { - size_t nested_leaf_index = node->leafAt(0)->type() == avro::Type::AVRO_NULL ? 1 : 0; + int nested_leaf_index = node->leafAt(0)->type() == avro::Type::AVRO_NULL ? 1 : 0; auto nested_type = avroNodeToDataType(node->leafAt(nested_leaf_index)); return nested_type->canBeInsideNullable() ? makeNullable(nested_type) : nested_type; } @@ -1035,7 +1039,7 @@ DataTypePtr AvroSchemaReader::avroNodeToDataType(avro::NodePtr node) nested_types.reserve(node->leaves()); Names nested_names; nested_names.reserve(node->leaves()); - for (size_t i = 0; i != node->leaves(); ++i) + for (int i = 0; i != static_cast(node->leaves()); ++i) { nested_types.push_back(avroNodeToDataType(node->leafAt(i))); nested_names.push_back(node->nameAt(i)); diff --git a/src/Processors/Formats/Impl/AvroRowOutputFormat.cpp b/src/Processors/Formats/Impl/AvroRowOutputFormat.cpp index b63b1e7b9b1..e3d570d1876 100644 --- a/src/Processors/Formats/Impl/AvroRowOutputFormat.cpp +++ b/src/Processors/Formats/Impl/AvroRowOutputFormat.cpp @@ -193,7 +193,7 @@ AvroSerializer::SchemaWithSerializeFn AvroSerializer::createSchemaWithSerializeF case TypeIndex::FixedString: { auto size = data_type->getSizeOfValueInMemory(); - auto schema = avro::FixedSchema(size, "fixed_" + toString(type_name_increment)); + auto schema = avro::FixedSchema(static_cast(size), "fixed_" + toString(type_name_increment)); return {schema, [](const IColumn & column, size_t row_num, avro::Encoder & encoder) { const std::string_view & s = assert_cast(column).getDataAt(row_num).toView(); diff --git a/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp b/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp index 9e423f8a96b..87fff16c107 100644 --- a/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp +++ b/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp @@ -224,7 +224,14 @@ namespace DB for (size_t i = 0; i != column_tuple->tupleSize(); ++i) { ColumnPtr nested_column = column_tuple->getColumnPtr(i); - fillArrowArray(column_name + "." + nested_names[i], nested_column, nested_types[i], null_bytemap, builder.field_builder(i), format_name, start, end, output_string_as_string, dictionary_values); + fillArrowArray( + column_name + "." + nested_names[i], + nested_column, nested_types[i], null_bytemap, + builder.field_builder(static_cast(i)), + format_name, + start, end, + output_string_as_string, + dictionary_values); } for (size_t i = start; i != end; ++i) @@ -370,7 +377,7 @@ namespace DB else { std::string_view string_ref = internal_column.getDataAt(string_i).toView(); - status = builder.Append(string_ref.data(), string_ref.size()); + status = builder.Append(string_ref.data(), static_cast(string_ref.size())); } checkStatus(status, write_column->getName(), format_name); } diff --git a/src/Processors/Formats/Impl/CapnProtoRowInputFormat.cpp b/src/Processors/Formats/Impl/CapnProtoRowInputFormat.cpp index c6f8742455e..08d2cac743a 100644 --- a/src/Processors/Formats/Impl/CapnProtoRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/CapnProtoRowInputFormat.cpp @@ -91,7 +91,7 @@ static void insertSignedInteger(IColumn & column, const DataTypePtr & column_typ assert_cast(column).insertValue(value); break; case TypeIndex::Int32: - assert_cast(column).insertValue(value); + assert_cast(column).insertValue(static_cast(value)); break; case TypeIndex::Int64: assert_cast(column).insertValue(value); @@ -117,7 +117,7 @@ static void insertUnsignedInteger(IColumn & column, const DataTypePtr & column_t break; case TypeIndex::DateTime: [[fallthrough]]; case TypeIndex::UInt32: - assert_cast(column).insertValue(value); + assert_cast(column).insertValue(static_cast(value)); break; case TypeIndex::UInt64: assert_cast(column).insertValue(value); diff --git a/src/Processors/Formats/Impl/CapnProtoRowOutputFormat.cpp b/src/Processors/Formats/Impl/CapnProtoRowOutputFormat.cpp index fd33abfb587..654917b6357 100644 --- a/src/Processors/Formats/Impl/CapnProtoRowOutputFormat.cpp +++ b/src/Processors/Formats/Impl/CapnProtoRowOutputFormat.cpp @@ -79,7 +79,7 @@ static capnp::DynamicValue::Builder initStructFieldBuilder(const ColumnPtr & col if (const auto * array_column = checkAndGetColumn(*column)) { size_t size = array_column->getOffsets()[row_num] - array_column->getOffsets()[row_num - 1]; - return struct_builder.init(field, size); + return struct_builder.init(field, static_cast(size)); } if (field.getType().isStruct()) @@ -200,7 +200,7 @@ static std::optional convertToDynamicValue( size_t size = offsets[row_num] - offset; const auto * nested_array_column = checkAndGetColumn(*nested_column); - for (size_t i = 0; i != size; ++i) + for (unsigned i = 0; i != static_cast(size); ++i) { capnp::DynamicValue::Builder value_builder; /// For nested arrays we need to initialize nested list builder. @@ -208,7 +208,7 @@ static std::optional convertToDynamicValue( { const auto & nested_offset = nested_array_column->getOffsets(); size_t nested_array_size = nested_offset[offset + i] - nested_offset[offset + i - 1]; - value_builder = list_builder.init(i, nested_array_size); + value_builder = list_builder.init(i, static_cast(nested_array_size)); } else value_builder = list_builder[i]; diff --git a/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp b/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp index 739fa8735b2..bc363e5aa98 100644 --- a/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp +++ b/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp @@ -502,7 +502,7 @@ bool ConstantExpressionTemplate::parseLiteralAndAssertType(ReadBuffer & istr, co ParserTupleOfLiterals parser_tuple; Tokens tokens_number(istr.position(), istr.buffer().end()); - IParser::Pos iterator(tokens_number, settings.max_parser_depth); + IParser::Pos iterator(tokens_number, static_cast(settings.max_parser_depth)); Expected expected; ASTPtr ast; if (!parser_array.parse(iterator, ast, expected) && !parser_tuple.parse(iterator, ast, expected)) diff --git a/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.cpp b/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.cpp index 1c99a5484a2..16df132b9d8 100644 --- a/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.cpp @@ -67,6 +67,19 @@ CustomSeparatedRowInputFormat::CustomSeparatedRowInputFormat( } } +void CustomSeparatedRowInputFormat::readPrefix() +{ + RowInputFormatWithNamesAndTypes::readPrefix(); + + /// Provide better error message for unsupported delimiters + for (const auto & column_index : column_mapping->column_indexes_for_input_fields) + { + if (column_index) + checkSupportedDelimiterAfterField(format_settings.custom.escaping_rule, format_settings.custom.field_delimiter, data_types[*column_index]); + else + checkSupportedDelimiterAfterField(format_settings.custom.escaping_rule, format_settings.custom.field_delimiter, nullptr); + } +} bool CustomSeparatedRowInputFormat::allowSyncAfterError() const { diff --git a/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.h b/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.h index c7e332b983f..e7e96ab87b1 100644 --- a/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.h +++ b/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.h @@ -30,6 +30,7 @@ private: bool allowSyncAfterError() const override; void syncAfterError() override; + void readPrefix() override; std::unique_ptr buf; bool ignore_spaces; diff --git a/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp b/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp index 0c150750e09..db5a027844b 100644 --- a/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp @@ -214,7 +214,7 @@ bool JSONEachRowRowInputFormat::readRow(MutableColumns & columns, RowReadExtensi seen_columns.assign(num_columns, false); nested_prefix_length = 0; - readRowStart(); + readRowStart(columns); readJSONObject(columns); const auto & header = getPort().getHeader(); diff --git a/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.h b/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.h index 59447180f77..4e2946cfea6 100644 --- a/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.h +++ b/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.h @@ -48,7 +48,7 @@ private: void readJSONObject(MutableColumns & columns); void readNestedData(const String & name, MutableColumns & columns); - virtual void readRowStart() {} + virtual void readRowStart(MutableColumns &) {} virtual bool checkEndOfData(bool is_first_row); const FormatSettings format_settings; @@ -66,10 +66,6 @@ private: /// the nested column names are 'n.i' and 'n.s' and the nested prefix is 'n.' size_t nested_prefix_length = 0; - /// Set of columns for which the values were read. The rest will be filled with default values. - std::vector read_columns; - /// Set of columns which already met in row. Exception is thrown if there are more than one column with the same name. - std::vector seen_columns; /// These sets may be different, because if null_as_default=1 read_columns[i] will be false and seen_columns[i] will be true /// for row like {..., "non-nullable column name" : null, ...} @@ -85,6 +81,12 @@ private: bool yield_strings; protected: + + /// Set of columns for which the values were read. The rest will be filled with default values. + std::vector read_columns; + /// Set of columns which already met in row. Exception is thrown if there are more than one column with the same name. + std::vector seen_columns; + /// This flag is needed to know if data is in square brackets. bool data_in_square_brackets = false; }; diff --git a/src/Processors/Formats/Impl/JSONObjectEachRowRowInputFormat.cpp b/src/Processors/Formats/Impl/JSONObjectEachRowRowInputFormat.cpp index 5ca1ba33c27..6e6d6287840 100644 --- a/src/Processors/Formats/Impl/JSONObjectEachRowRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONObjectEachRowRowInputFormat.cpp @@ -2,12 +2,39 @@ #include #include #include +#include namespace DB { +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; +} + +std::optional getColumnIndexForJSONObjectEachRowObjectName(const Block & header, const FormatSettings & format_settings) +{ + if (format_settings.json_object_each_row.column_for_object_name.empty()) + return std::nullopt; + + if (!header.has(format_settings.json_object_each_row.column_for_object_name)) + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "Column name '{}' from setting format_json_object_each_row_column_for_object_name doesn't exists in header", + format_settings.json_object_each_row.column_for_object_name); + + size_t index = header.getPositionByName(format_settings.json_object_each_row.column_for_object_name); + if (!isStringOrFixedString(header.getDataTypes()[index])) + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "Column '{}' from setting json_object_each_row_column_for_object_name must have String type", + format_settings.json_object_each_row.column_for_object_name); + + return index; +} + JSONObjectEachRowInputFormat::JSONObjectEachRowInputFormat(ReadBuffer & in_, const Block & header_, Params params_, const FormatSettings & format_settings_) - : JSONEachRowRowInputFormat(in_, header_, params_, format_settings_, false) + : JSONEachRowRowInputFormat(in_, header_, params_, format_settings_, false), field_index_for_object_name(getColumnIndexForJSONObjectEachRowObjectName(header_, format_settings_)) { } @@ -16,9 +43,15 @@ void JSONObjectEachRowInputFormat::readPrefix() JSONUtils::skipObjectStart(*in); } -void JSONObjectEachRowInputFormat::readRowStart() +void JSONObjectEachRowInputFormat::readRowStart(MutableColumns & columns) { - JSONUtils::readFieldName(*in); + auto object_name = JSONUtils::readFieldName(*in); + if (field_index_for_object_name) + { + columns[*field_index_for_object_name]->insertData(object_name.data(), object_name.size()); + seen_columns[*field_index_for_object_name] = true; + read_columns[*field_index_for_object_name] = true; + } } bool JSONObjectEachRowInputFormat::checkEndOfData(bool is_first_row) @@ -30,7 +63,6 @@ bool JSONObjectEachRowInputFormat::checkEndOfData(bool is_first_row) return false; } - JSONObjectEachRowSchemaReader::JSONObjectEachRowSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings_) : IRowWithNamesSchemaReader(in_, format_settings_) { @@ -53,7 +85,10 @@ NamesAndTypesList JSONObjectEachRowSchemaReader::readRowAndGetNamesAndDataTypes( JSONUtils::skipComma(in); JSONUtils::readFieldName(in); - return JSONUtils::readRowAndGetNamesAndDataTypesForJSONEachRow(in, format_settings, false); + auto names_and_types = JSONUtils::readRowAndGetNamesAndDataTypesForJSONEachRow(in, format_settings, false); + if (!format_settings.json_object_each_row.column_for_object_name.empty()) + names_and_types.emplace_front(format_settings.json_object_each_row.column_for_object_name, std::make_shared()); + return names_and_types; } void JSONObjectEachRowSchemaReader::transformTypesIfNeeded(DataTypePtr & type, DataTypePtr & new_type) @@ -83,7 +118,8 @@ void registerJSONObjectEachRowSchemaReader(FormatFactory & factory) }); factory.registerAdditionalInfoForSchemaCacheGetter("JSONObjectEachRow", [](const FormatSettings & settings) { - return getAdditionalFormatInfoByEscapingRule(settings, FormatSettings::EscapingRule::JSON); + return getAdditionalFormatInfoByEscapingRule(settings, FormatSettings::EscapingRule::JSON) + + fmt::format(", format_json_object_each_row_column_for_object_name={}", settings.json_object_each_row.column_for_object_name); }); } diff --git a/src/Processors/Formats/Impl/JSONObjectEachRowRowInputFormat.h b/src/Processors/Formats/Impl/JSONObjectEachRowRowInputFormat.h index fd98f43649f..466c0111a03 100644 --- a/src/Processors/Formats/Impl/JSONObjectEachRowRowInputFormat.h +++ b/src/Processors/Formats/Impl/JSONObjectEachRowRowInputFormat.h @@ -27,8 +27,10 @@ public: private: void readPrefix() override; void readSuffix() override {} - void readRowStart() override; + void readRowStart(MutableColumns & columns) override; bool checkEndOfData(bool is_first_row) override; + + std::optional field_index_for_object_name; }; @@ -44,4 +46,6 @@ private: bool first_row = true; }; +std::optional getColumnIndexForJSONObjectEachRowObjectName(const Block & header, const FormatSettings & settings); + } diff --git a/src/Processors/Formats/Impl/JSONObjectEachRowRowOutputFormat.cpp b/src/Processors/Formats/Impl/JSONObjectEachRowRowOutputFormat.cpp index 10c1e9beda5..6155efd4b63 100644 --- a/src/Processors/Formats/Impl/JSONObjectEachRowRowOutputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONObjectEachRowRowOutputFormat.cpp @@ -1,4 +1,5 @@ #include +#include #include #include @@ -6,10 +7,38 @@ namespace DB { JSONObjectEachRowRowOutputFormat::JSONObjectEachRowRowOutputFormat(WriteBuffer & out_, const Block & header_, const RowOutputFormatParams & params_, const FormatSettings & settings_) - : JSONEachRowRowOutputFormat(out_, header_, params_, settings_) + : JSONEachRowRowOutputFormat(out_, header_, params_, settings_), field_index_for_object_name(getColumnIndexForJSONObjectEachRowObjectName(header_, settings_)) { } +void JSONObjectEachRowRowOutputFormat::writeField(const IColumn & column, const ISerialization & serialization, size_t row) +{ + if (field_number == field_index_for_object_name) + { + ++field_number; + return; + } + JSONEachRowRowOutputFormat::writeField(column, serialization, row); +} + +void JSONObjectEachRowRowOutputFormat::write(const Columns & columns, size_t row) +{ + if (field_index_for_object_name) + object_name = columns[*field_index_for_object_name]->getDataAt(row).toString(); + else + object_name = "row_" + std::to_string(row + 1); + + IRowOutputFormat::write(columns, row); +} + +void JSONObjectEachRowRowOutputFormat::writeFieldDelimiter() +{ + /// We should not write comma before column that is used for + /// object name and also after it if it's in the first place + if (field_number != field_index_for_object_name && !(field_index_for_object_name == 0 && field_number == 1)) + JSONEachRowRowOutputFormat::writeFieldDelimiter(); +} + void JSONObjectEachRowRowOutputFormat::writePrefix() { JSONUtils::writeObjectStart(*ostr); @@ -17,9 +46,7 @@ void JSONObjectEachRowRowOutputFormat::writePrefix() void JSONObjectEachRowRowOutputFormat::writeRowStartDelimiter() { - ++row_num; - String title = "row_" + std::to_string(row_num); - JSONUtils::writeCompactObjectStart(*ostr, 1, title.c_str()); + JSONUtils::writeCompactObjectStart(*ostr, 1, object_name.c_str()); } void JSONObjectEachRowRowOutputFormat::writeRowEndDelimiter() @@ -52,6 +79,7 @@ void registerOutputFormatJSONObjectEachRow(FormatFactory & factory) return std::make_shared(buf, sample, params, settings); }); factory.markOutputFormatSupportsParallelFormatting("JSONObjectEachRow"); + factory.markFormatHasNoAppendSupport("JSONObjectEachRow"); } } diff --git a/src/Processors/Formats/Impl/JSONObjectEachRowRowOutputFormat.h b/src/Processors/Formats/Impl/JSONObjectEachRowRowOutputFormat.h index 51db22fb606..19d9fe1aa53 100644 --- a/src/Processors/Formats/Impl/JSONObjectEachRowRowOutputFormat.h +++ b/src/Processors/Formats/Impl/JSONObjectEachRowRowOutputFormat.h @@ -29,6 +29,9 @@ public: String getName() const override { return "JSONObjectEachRowRowOutputFormat"; } private: + void write(const Columns & columns, size_t row) override; + void writeField(const IColumn & column, const ISerialization & serialization, size_t row) override; + void writeFieldDelimiter() override; void writeRowStartDelimiter() override; void writeRowEndDelimiter() override; void writeRowBetweenDelimiter() override; @@ -36,7 +39,8 @@ private: void writePrefix() override; void writeSuffix() override; - size_t row_num = 0; + std::optional field_index_for_object_name; + String object_name; }; } diff --git a/src/Processors/Formats/Impl/LineAsStringRowInputFormat.cpp b/src/Processors/Formats/Impl/LineAsStringRowInputFormat.cpp index 30084804d92..677f8bb28ec 100644 --- a/src/Processors/Formats/Impl/LineAsStringRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/LineAsStringRowInputFormat.cpp @@ -1,5 +1,5 @@ #include -#include +#include #include #include #include @@ -63,6 +63,12 @@ void registerInputFormatLineAsString(FormatFactory & factory) }); } +void registerFileSegmentationEngineLineAsString(FormatFactory & factory) +{ + factory.registerFileSegmentationEngine("LineAsString", &newLineFileSegmentationEngine); +} + + void registerLineAsStringSchemaReader(FormatFactory & factory) { factory.registerExternalSchemaReader("LineAsString", []( diff --git a/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp b/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp index 931a7587903..80fdda687e2 100644 --- a/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp @@ -128,7 +128,7 @@ static void insertInteger(IColumn & column, DataTypePtr type, UInt64 value) case TypeIndex::DateTime: [[fallthrough]]; case TypeIndex::UInt32: { - assert_cast(column).insertValue(value); + assert_cast(column).insertValue(static_cast(value)); break; } case TypeIndex::UInt64: @@ -148,7 +148,7 @@ static void insertInteger(IColumn & column, DataTypePtr type, UInt64 value) } case TypeIndex::Int32: { - assert_cast(column).insertValue(value); + assert_cast(column).insertValue(static_cast(value)); break; } case TypeIndex::Int64: diff --git a/src/Processors/Formats/Impl/MsgPackRowOutputFormat.cpp b/src/Processors/Formats/Impl/MsgPackRowOutputFormat.cpp index a470e193300..da683913d4d 100644 --- a/src/Processors/Formats/Impl/MsgPackRowOutputFormat.cpp +++ b/src/Processors/Formats/Impl/MsgPackRowOutputFormat.cpp @@ -99,15 +99,15 @@ void MsgPackRowOutputFormat::serializeField(const IColumn & column, DataTypePtr case TypeIndex::String: { const std::string_view & string = assert_cast(column).getDataAt(row_num).toView(); - packer.pack_bin(string.size()); - packer.pack_bin_body(string.data(), string.size()); + packer.pack_bin(static_cast(string.size())); + packer.pack_bin_body(string.data(), static_cast(string.size())); return; } case TypeIndex::FixedString: { const std::string_view & string = assert_cast(column).getDataAt(row_num).toView(); - packer.pack_bin(string.size()); - packer.pack_bin_body(string.data(), string.size()); + packer.pack_bin(static_cast(string.size())); + packer.pack_bin_body(string.data(), static_cast(string.size())); return; } case TypeIndex::Array: @@ -118,7 +118,7 @@ void MsgPackRowOutputFormat::serializeField(const IColumn & column, DataTypePtr const ColumnArray::Offsets & offsets = column_array.getOffsets(); size_t offset = offsets[row_num - 1]; size_t size = offsets[row_num] - offset; - packer.pack_array(size); + packer.pack_array(static_cast(size)); for (size_t i = 0; i < size; ++i) { serializeField(nested_column, nested_type, offset + i); @@ -152,7 +152,7 @@ void MsgPackRowOutputFormat::serializeField(const IColumn & column, DataTypePtr const auto & offsets = nested_column.getOffsets(); size_t offset = offsets[row_num - 1]; size_t size = offsets[row_num] - offset; - packer.pack_map(size); + packer.pack_map(static_cast(size)); for (size_t i = 0; i < size; ++i) { serializeField(*key_column, map_type.getKeyType(), offset + i); @@ -179,8 +179,8 @@ void MsgPackRowOutputFormat::serializeField(const IColumn & column, DataTypePtr WriteBufferFromOwnString buf; writeBinary(uuid_column.getElement(row_num), buf); std::string_view uuid_bin = buf.stringView(); - packer.pack_bin(uuid_bin.size()); - packer.pack_bin_body(uuid_bin.data(), uuid_bin.size()); + packer.pack_bin(static_cast(uuid_bin.size())); + packer.pack_bin_body(uuid_bin.data(), static_cast(uuid_bin.size())); return; } case FormatSettings::MsgPackUUIDRepresentation::STR: @@ -188,8 +188,8 @@ void MsgPackRowOutputFormat::serializeField(const IColumn & column, DataTypePtr WriteBufferFromOwnString buf; writeText(uuid_column.getElement(row_num), buf); std::string_view uuid_text = buf.stringView(); - packer.pack_str(uuid_text.size()); - packer.pack_bin_body(uuid_text.data(), uuid_text.size()); + packer.pack_str(static_cast(uuid_text.size())); + packer.pack_bin_body(uuid_text.data(), static_cast(uuid_text.size())); return; } case FormatSettings::MsgPackUUIDRepresentation::EXT: @@ -200,7 +200,7 @@ void MsgPackRowOutputFormat::serializeField(const IColumn & column, DataTypePtr writeBinaryBigEndian(value.toUnderType().items[1], buf); std::string_view uuid_ext = buf.stringView(); packer.pack_ext(sizeof(UUID), int8_t(MsgPackExtensionTypes::UUIDType)); - packer.pack_ext_body(uuid_ext.data(), uuid_ext.size()); + packer.pack_ext_body(uuid_ext.data(), static_cast(uuid_ext.size())); return; } } diff --git a/src/Processors/Formats/Impl/MySQLOutputFormat.cpp b/src/Processors/Formats/Impl/MySQLOutputFormat.cpp index 344c5c179db..75a03cb6d0e 100644 --- a/src/Processors/Formats/Impl/MySQLOutputFormat.cpp +++ b/src/Processors/Formats/Impl/MySQLOutputFormat.cpp @@ -65,7 +65,7 @@ void MySQLOutputFormat::consume(Chunk chunk) { for (size_t i = 0; i < chunk.getNumRows(); ++i) { - ProtocolText::ResultSetRow row_packet(serializations, chunk.getColumns(), i); + ProtocolText::ResultSetRow row_packet(serializations, chunk.getColumns(), static_cast(i)); packet_endpoint->sendPacket(row_packet); } } @@ -74,7 +74,7 @@ void MySQLOutputFormat::finalizeImpl() { size_t affected_rows = 0; std::string human_readable_info; - if (QueryStatus * process_list_elem = getContext()->getProcessListElement()) + if (QueryStatusPtr process_list_elem = getContext()->getProcessListElement()) { CurrentThread::finalizePerformanceCounters(); QueryStatusInfo info = process_list_elem->getInfo(); diff --git a/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp b/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp index 36126c21bf1..d6dbd69135a 100644 --- a/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp @@ -136,7 +136,7 @@ void ORCBlockInputFormat::prepareReader() if (is_stopped) return; - stripe_total = file_reader->NumberOfStripes(); + stripe_total = static_cast(file_reader->NumberOfStripes()); stripe_current = 0; arrow_column_to_ch_column = std::make_unique( @@ -159,7 +159,7 @@ void ORCBlockInputFormat::prepareReader() { /// LIST type require 2 indices, STRUCT - the number of elements + 1, /// so we should recursively count the number of indices we need for this type. - int indexes_count = countIndicesForType(schema->field(i)->type()); + int indexes_count = static_cast(countIndicesForType(schema->field(i)->type())); const auto & name = schema->field(i)->name(); if (getPort().getHeader().has(name, ignore_case) || nested_table_names.contains(ignore_case ? boost::to_lower_copy(name) : name)) { diff --git a/src/Processors/Formats/Impl/ParallelParsingInputFormat.cpp b/src/Processors/Formats/Impl/ParallelParsingInputFormat.cpp index 2f54cca466c..9172c79c890 100644 --- a/src/Processors/Formats/Impl/ParallelParsingInputFormat.cpp +++ b/src/Processors/Formats/Impl/ParallelParsingInputFormat.cpp @@ -128,8 +128,9 @@ void ParallelParsingInputFormat::onBackgroundException(size_t offset) background_exception = std::current_exception(); if (ParsingException * e = exception_cast(background_exception)) { + /// NOTE: it is not that safe to use line number hack here (may exceed INT_MAX) if (e->getLineNumber() != -1) - e->setLineNumber(e->getLineNumber() + offset); + e->setLineNumber(static_cast(e->getLineNumber() + offset)); auto file_name = getFileNameFromReadBuffer(getReadBuffer()); if (!file_name.empty()) diff --git a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp index 427c159314b..dd2826287b2 100644 --- a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp @@ -161,7 +161,7 @@ void ParquetBlockInputFormat::prepareReader() /// STRUCT type require the number of indexes equal to the number of /// nested elements, so we should recursively /// count the number of indices we need for this type. - int indexes_count = countIndicesForType(schema->field(i)->type()); + int indexes_count = static_cast(countIndicesForType(schema->field(i)->type())); const auto & name = schema->field(i)->name(); if (getPort().getHeader().has(name, ignore_case) || nested_table_names.contains(ignore_case ? boost::to_lower_copy(name) : name)) diff --git a/src/Processors/Formats/Impl/RegexpRowInputFormat.cpp b/src/Processors/Formats/Impl/RegexpRowInputFormat.cpp index c26b6b39e0d..2ad2ad6f7a3 100644 --- a/src/Processors/Formats/Impl/RegexpRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/RegexpRowInputFormat.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include namespace DB @@ -50,7 +51,11 @@ bool RegexpFieldExtractor::parseRow(PeekableReadBuffer & buf) if (line_size > 0 && buf.position()[line_size - 1] == '\r') --line_to_match; - bool match = re2_st::RE2::FullMatchN(re2_st::StringPiece(buf.position(), line_to_match), regexp, re2_arguments_ptrs.data(), re2_arguments_ptrs.size()); + bool match = re2_st::RE2::FullMatchN( + re2_st::StringPiece(buf.position(), line_to_match), + regexp, + re2_arguments_ptrs.data(), + static_cast(re2_arguments_ptrs.size())); if (!match && !skip_unmatched) throw Exception("Line \"" + std::string(buf.position(), line_to_match) + "\" doesn't match the regexp.", ErrorCodes::INCORRECT_DATA); @@ -174,46 +179,9 @@ void registerInputFormatRegexp(FormatFactory & factory) }); } -static std::pair fileSegmentationEngineRegexpImpl(ReadBuffer & in, DB::Memory<> & memory, size_t min_bytes, size_t max_rows) -{ - char * pos = in.position(); - bool need_more_data = true; - size_t number_of_rows = 0; - - while (loadAtPosition(in, memory, pos) && need_more_data) - { - pos = find_first_symbols<'\r', '\n'>(pos, in.buffer().end()); - if (pos > in.buffer().end()) - throw Exception("Position in buffer is out of bounds. There must be a bug.", ErrorCodes::LOGICAL_ERROR); - else if (pos == in.buffer().end()) - continue; - - ++number_of_rows; - if ((memory.size() + static_cast(pos - in.position()) >= min_bytes) || (number_of_rows == max_rows)) - need_more_data = false; - - if (*pos == '\n') - { - ++pos; - if (loadAtPosition(in, memory, pos) && *pos == '\r') - ++pos; - } - else if (*pos == '\r') - { - ++pos; - if (loadAtPosition(in, memory, pos) && *pos == '\n') - ++pos; - } - } - - saveUpToPosition(in, memory, pos); - - return {loadAtPosition(in, memory, pos), number_of_rows}; -} - void registerFileSegmentationEngineRegexp(FormatFactory & factory) { - factory.registerFileSegmentationEngine("Regexp", &fileSegmentationEngineRegexpImpl); + factory.registerFileSegmentationEngine("Regexp", &newLineFileSegmentationEngine); } void registerRegexpSchemaReader(FormatFactory & factory) diff --git a/src/Processors/Formats/Impl/TemplateRowInputFormat.cpp b/src/Processors/Formats/Impl/TemplateRowInputFormat.cpp index 785658c0fa2..76fd0d2a907 100644 --- a/src/Processors/Formats/Impl/TemplateRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/TemplateRowInputFormat.cpp @@ -53,18 +53,25 @@ TemplateRowInputFormat::TemplateRowInputFormat(const Block & header_, std::uniqu std::vector column_in_format(header_.columns(), false); for (size_t i = 0; i < row_format.columnsCount(); ++i) { - if (row_format.format_idx_to_column_idx[i]) + const auto & column_index = row_format.format_idx_to_column_idx[i]; + if (column_index) { - if (header_.columns() <= *row_format.format_idx_to_column_idx[i]) - row_format.throwInvalidFormat("Column index " + std::to_string(*row_format.format_idx_to_column_idx[i]) + + if (header_.columns() <= *column_index) + row_format.throwInvalidFormat("Column index " + std::to_string(*column_index) + " must be less then number of columns (" + std::to_string(header_.columns()) + ")", i); if (row_format.escaping_rules[i] == EscapingRule::None) row_format.throwInvalidFormat("Column is not skipped, but deserialization type is None", i); - size_t col_idx = *row_format.format_idx_to_column_idx[i]; + size_t col_idx = *column_index; if (column_in_format[col_idx]) row_format.throwInvalidFormat("Duplicate column", i); column_in_format[col_idx] = true; + + checkSupportedDelimiterAfterField(row_format.escaping_rules[i], row_format.delimiters[i + 1], data_types[*column_index]); + } + else + { + checkSupportedDelimiterAfterField(row_format.escaping_rules[i], row_format.delimiters[i + 1], nullptr); } } diff --git a/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp b/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp index aff4557a4b7..108b4203e3e 100644 --- a/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp @@ -101,7 +101,9 @@ Chunk ValuesBlockInputFormat::generate() return {}; } - finalizeObjectColumns(columns); + for (const auto & column : columns) + column->finalize(); + size_t rows_in_block = columns[0]->size(); return Chunk{std::move(columns), rows_in_block}; } @@ -350,7 +352,7 @@ bool ValuesBlockInputFormat::parseExpression(IColumn & column, size_t column_idx Expected expected; Tokens tokens(buf->position(), buf->buffer().end()); - IParser::Pos token_iterator(tokens, settings.max_parser_depth); + IParser::Pos token_iterator(tokens, static_cast(settings.max_parser_depth)); ASTPtr ast; bool parsed = parser.parse(token_iterator, ast, expected); diff --git a/src/Processors/Formats/RowInputFormatWithNamesAndTypes.h b/src/Processors/Formats/RowInputFormatWithNamesAndTypes.h index d2dd28eb15a..9d0734f4567 100644 --- a/src/Processors/Formats/RowInputFormatWithNamesAndTypes.h +++ b/src/Processors/Formats/RowInputFormatWithNamesAndTypes.h @@ -41,6 +41,7 @@ protected: void resetParser() override; bool isGarbageAfterField(size_t index, ReadBuffer::Position pos) override; void setReadBuffer(ReadBuffer & in_) override; + void readPrefix() override; const FormatSettings format_settings; DataTypes data_types; @@ -48,7 +49,6 @@ protected: private: bool readRow(MutableColumns & columns, RowReadExtension & ext) override; - void readPrefix() override; bool parseRowAndPrintDiagnosticInfo(MutableColumns & columns, WriteBuffer & out) override; void tryDeserializeField(const DataTypePtr & type, IColumn & column, size_t file_column) override; diff --git a/src/Processors/Merges/Algorithms/AggregatingSortedAlgorithm.cpp b/src/Processors/Merges/Algorithms/AggregatingSortedAlgorithm.cpp index ebc1b37074b..db08f3ffbd3 100644 --- a/src/Processors/Merges/Algorithms/AggregatingSortedAlgorithm.cpp +++ b/src/Processors/Merges/Algorithms/AggregatingSortedAlgorithm.cpp @@ -1,7 +1,5 @@ #include -#include -#include #include #include #include @@ -18,70 +16,6 @@ AggregatingSortedAlgorithm::ColumnsDefinition::ColumnsDefinition() = default; AggregatingSortedAlgorithm::ColumnsDefinition::ColumnsDefinition(ColumnsDefinition &&) noexcept = default; AggregatingSortedAlgorithm::ColumnsDefinition::~ColumnsDefinition() = default; -/// Stores information for aggregation of AggregateFunction columns -struct AggregatingSortedAlgorithm::AggregateDescription -{ - ColumnAggregateFunction * column = nullptr; - const size_t column_number = 0; /// Position in header. - - AggregateDescription() = default; - explicit AggregateDescription(size_t col_number) : column_number(col_number) {} -}; - -/// Stores information for aggregation of SimpleAggregateFunction columns -struct AggregatingSortedAlgorithm::SimpleAggregateDescription -{ - /// An aggregate function 'anyLast', 'sum'... - AggregateFunctionPtr function; - IAggregateFunction::AddFunc add_function = nullptr; - - size_t column_number = 0; - IColumn * column = nullptr; - - /// For LowCardinality, convert is converted to nested type. nested_type is nullptr if no conversion needed. - const DataTypePtr nested_type; /// Nested type for LowCardinality, if it is. - const DataTypePtr real_type; /// Type in header. - - AlignedBuffer state; - bool created = false; - - SimpleAggregateDescription( - AggregateFunctionPtr function_, const size_t column_number_, - DataTypePtr nested_type_, DataTypePtr real_type_) - : function(std::move(function_)), column_number(column_number_) - , nested_type(std::move(nested_type_)), real_type(std::move(real_type_)) - { - add_function = function->getAddressOfAddFunction(); - state.reset(function->sizeOfData(), function->alignOfData()); - } - - void createState() - { - if (created) - return; - function->create(state.data()); - created = true; - } - - void destroyState() - { - if (!created) - return; - function->destroy(state.data()); - created = false; - } - - /// Explicitly destroy aggregation state if the stream is terminated - ~SimpleAggregateDescription() - { - destroyState(); - } - - SimpleAggregateDescription() = default; - SimpleAggregateDescription(SimpleAggregateDescription &&) = default; - SimpleAggregateDescription(const SimpleAggregateDescription &) = delete; -}; - static AggregatingSortedAlgorithm::ColumnsDefinition defineColumns( const Block & header, const SortDescription & description) { @@ -191,6 +125,39 @@ static void postprocessChunk(Chunk & chunk, const AggregatingSortedAlgorithm::Co } +AggregatingSortedAlgorithm::SimpleAggregateDescription::SimpleAggregateDescription( + AggregateFunctionPtr function_, const size_t column_number_, + DataTypePtr nested_type_, DataTypePtr real_type_) + : function(std::move(function_)), column_number(column_number_) + , nested_type(std::move(nested_type_)), real_type(std::move(real_type_)) +{ + add_function = function->getAddressOfAddFunction(); + state.reset(function->sizeOfData(), function->alignOfData()); +} + +void AggregatingSortedAlgorithm::SimpleAggregateDescription::createState() +{ + if (created) + return; + function->create(state.data()); + created = true; +} + +void AggregatingSortedAlgorithm::SimpleAggregateDescription::destroyState() +{ + if (!created) + return; + function->destroy(state.data()); + created = false; +} + +/// Explicitly destroy aggregation state if the stream is terminated +AggregatingSortedAlgorithm::SimpleAggregateDescription::~SimpleAggregateDescription() +{ + destroyState(); +} + + AggregatingSortedAlgorithm::AggregatingMergedData::AggregatingMergedData( MutableColumns columns_, UInt64 max_block_size_, ColumnsDefinition & def_) : MergedData(std::move(columns_), false, max_block_size_), def(def_) diff --git a/src/Processors/Merges/Algorithms/AggregatingSortedAlgorithm.h b/src/Processors/Merges/Algorithms/AggregatingSortedAlgorithm.h index e572ed7d526..d670242ed81 100644 --- a/src/Processors/Merges/Algorithms/AggregatingSortedAlgorithm.h +++ b/src/Processors/Merges/Algorithms/AggregatingSortedAlgorithm.h @@ -1,5 +1,7 @@ #pragma once +#include +#include #include #include @@ -23,8 +25,48 @@ public: void consume(Input & input, size_t source_num) override; Status merge() override; - struct SimpleAggregateDescription; - struct AggregateDescription; + /// Stores information for aggregation of SimpleAggregateFunction columns + struct SimpleAggregateDescription + { + /// An aggregate function 'anyLast', 'sum'... + AggregateFunctionPtr function; + IAggregateFunction::AddFunc add_function = nullptr; + + size_t column_number = 0; + IColumn * column = nullptr; + + /// For LowCardinality, convert is converted to nested type. nested_type is nullptr if no conversion needed. + const DataTypePtr nested_type; /// Nested type for LowCardinality, if it is. + const DataTypePtr real_type; /// Type in header. + + AlignedBuffer state; + bool created = false; + + SimpleAggregateDescription( + AggregateFunctionPtr function_, const size_t column_number_, + DataTypePtr nested_type_, DataTypePtr real_type_); + + void createState(); + + void destroyState(); + + /// Explicitly destroy aggregation state if the stream is terminated + ~SimpleAggregateDescription(); + + SimpleAggregateDescription() = default; + SimpleAggregateDescription(SimpleAggregateDescription &&) = default; + SimpleAggregateDescription(const SimpleAggregateDescription &) = delete; + }; + + /// Stores information for aggregation of AggregateFunction columns + struct AggregateDescription + { + ColumnAggregateFunction * column = nullptr; + const size_t column_number = 0; /// Position in header. + + AggregateDescription() = default; + explicit AggregateDescription(size_t col_number) : column_number(col_number) {} + }; /// This structure define columns into one of three types: /// * columns which are not aggregate functions and not needed to be aggregated diff --git a/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.cpp b/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.cpp index 8636813132d..c79c667a988 100644 --- a/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.cpp +++ b/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.cpp @@ -23,10 +23,6 @@ namespace ErrorCodes extern const int CORRUPTED_DATA; } -SummingSortedAlgorithm::ColumnsDefinition::ColumnsDefinition() = default; -SummingSortedAlgorithm::ColumnsDefinition::ColumnsDefinition(ColumnsDefinition &&) noexcept = default; -SummingSortedAlgorithm::ColumnsDefinition::~ColumnsDefinition() = default; - /// Stores numbers of key-columns and value-columns. struct SummingSortedAlgorithm::MapDescription { @@ -777,4 +773,8 @@ IMergingAlgorithm::Status SummingSortedAlgorithm::merge() return Status(merged_data.pull(), true); } +SummingSortedAlgorithm::ColumnsDefinition::ColumnsDefinition() = default; +SummingSortedAlgorithm::ColumnsDefinition::ColumnsDefinition(ColumnsDefinition &&) noexcept = default; +SummingSortedAlgorithm::ColumnsDefinition::~ColumnsDefinition() = default; + } diff --git a/src/Processors/QueryPlan/BuildQueryPipelineSettings.h b/src/Processors/QueryPlan/BuildQueryPipelineSettings.h index fadbd061fbd..3b5e4e06953 100644 --- a/src/Processors/QueryPlan/BuildQueryPipelineSettings.h +++ b/src/Processors/QueryPlan/BuildQueryPipelineSettings.h @@ -5,16 +5,18 @@ #include + namespace DB { struct Settings; class QueryStatus; +using QueryStatusPtr = std::shared_ptr; struct BuildQueryPipelineSettings { ExpressionActionsSettings actions_settings; - QueryStatus * process_list_element = nullptr; + QueryStatusPtr process_list_element; ProgressCallback progress_callback = nullptr; const ExpressionActionsSettings & getActionsSettings() const { return actions_settings; } diff --git a/src/Processors/QueryPlan/CreatingSetsStep.cpp b/src/Processors/QueryPlan/CreatingSetsStep.cpp index bd079c0b8a9..b52d86aa725 100644 --- a/src/Processors/QueryPlan/CreatingSetsStep.cpp +++ b/src/Processors/QueryPlan/CreatingSetsStep.cpp @@ -122,11 +122,8 @@ void CreatingSetsStep::describePipeline(FormatSettings & settings) const IQueryPlanStep::describePipeline(processors, settings); } -void addCreatingSetsStep(QueryPlan & query_plan, PreparedSetsPtr prepared_sets, ContextPtr context) +void addCreatingSetsStep(QueryPlan & query_plan, PreparedSets::SubqueriesForSets subqueries_for_sets, ContextPtr context) { - if (!prepared_sets || prepared_sets->empty()) - return; - DataStreams input_streams; input_streams.emplace_back(query_plan.getCurrentDataStream()); @@ -134,7 +131,7 @@ void addCreatingSetsStep(QueryPlan & query_plan, PreparedSetsPtr prepared_sets, plans.emplace_back(std::make_unique(std::move(query_plan))); query_plan = QueryPlan(); - for (auto & [description, subquery_for_set] : prepared_sets->detachSubqueries()) + for (auto & [description, subquery_for_set] : subqueries_for_sets) { if (!subquery_for_set.hasSource()) continue; @@ -166,4 +163,12 @@ void addCreatingSetsStep(QueryPlan & query_plan, PreparedSetsPtr prepared_sets, query_plan.unitePlans(std::move(creating_sets), std::move(plans)); } +void addCreatingSetsStep(QueryPlan & query_plan, PreparedSetsPtr prepared_sets, ContextPtr context) +{ + if (!prepared_sets || prepared_sets->empty()) + return; + + addCreatingSetsStep(query_plan, prepared_sets->detachSubqueries(), context); +} + } diff --git a/src/Processors/QueryPlan/CreatingSetsStep.h b/src/Processors/QueryPlan/CreatingSetsStep.h index 9c61eb2012c..9995af7bca7 100644 --- a/src/Processors/QueryPlan/CreatingSetsStep.h +++ b/src/Processors/QueryPlan/CreatingSetsStep.h @@ -49,6 +49,8 @@ private: Processors processors; }; +void addCreatingSetsStep(QueryPlan & query_plan, PreparedSets::SubqueriesForSets subqueries_for_sets, ContextPtr context); + void addCreatingSetsStep(QueryPlan & query_plan, PreparedSetsPtr prepared_sets, ContextPtr context); } diff --git a/src/Processors/QueryPlan/IntersectOrExceptStep.h b/src/Processors/QueryPlan/IntersectOrExceptStep.h index b2738cb297f..d7eab574431 100644 --- a/src/Processors/QueryPlan/IntersectOrExceptStep.h +++ b/src/Processors/QueryPlan/IntersectOrExceptStep.h @@ -8,9 +8,9 @@ namespace DB class IntersectOrExceptStep : public IQueryPlanStep { -using Operator = ASTSelectIntersectExceptQuery::Operator; - public: + using Operator = ASTSelectIntersectExceptQuery::Operator; + /// max_threads is used to limit the number of threads for result pipeline. IntersectOrExceptStep(DataStreams input_streams_, Operator operator_, size_t max_threads_ = 0); diff --git a/src/Processors/QueryPlan/Optimizations/optimizePrimaryKeyCondition.cpp b/src/Processors/QueryPlan/Optimizations/optimizePrimaryKeyCondition.cpp index 7d682c408e5..984c76701ba 100644 --- a/src/Processors/QueryPlan/Optimizations/optimizePrimaryKeyCondition.cpp +++ b/src/Processors/QueryPlan/Optimizations/optimizePrimaryKeyCondition.cpp @@ -17,7 +17,7 @@ void optimizePrimaryKeyCondition(QueryPlan::Node & root) size_t next_child = 0; }; - std::deque stack; + std::vector stack; stack.push_back({.node = &root}); while (!stack.empty()) @@ -27,29 +27,29 @@ void optimizePrimaryKeyCondition(QueryPlan::Node & root) /// Traverse all children first. if (frame.next_child < frame.node->children.size()) { - stack.push_back({.node = frame.node->children[frame.next_child]}); - + auto next_frame = Frame{.node = frame.node->children[frame.next_child]}; ++frame.next_child; + stack.push_back(next_frame); continue; } - auto add_filter = [&](auto & storage) + auto add_read_from_storage_filter = [&](auto & storage) { - for (auto iter=stack.rbegin() + 1; iter!=stack.rend(); ++iter) + for (auto iter = stack.rbegin() + 1; iter != stack.rend(); ++iter) { if (auto * filter_step = typeid_cast(iter->node->step.get())) storage.addFilter(filter_step->getExpression(), filter_step->getFilterColumnName()); else if (typeid_cast(iter->node->step.get())) - ; + continue; else break; } }; if (auto * read_from_merge_tree = typeid_cast(frame.node->step.get())) - add_filter(*read_from_merge_tree); + add_read_from_storage_filter(*read_from_merge_tree); else if (auto * read_from_merge = typeid_cast(frame.node->step.get())) - add_filter(*read_from_merge); + add_read_from_storage_filter(*read_from_merge); stack.pop_back(); } diff --git a/src/Processors/QueryPlan/Optimizations/reuseStorageOrderingForWindowFunctions.cpp b/src/Processors/QueryPlan/Optimizations/reuseStorageOrderingForWindowFunctions.cpp index a8431d38a78..c74b5ed915b 100644 --- a/src/Processors/QueryPlan/Optimizations/reuseStorageOrderingForWindowFunctions.cpp +++ b/src/Processors/QueryPlan/Optimizations/reuseStorageOrderingForWindowFunctions.cpp @@ -62,7 +62,7 @@ size_t tryReuseStorageOrderingForWindowFunctions(QueryPlan::Node * parent_node, } auto context = read_from_merge_tree->getContext(); - if (!context->getSettings().optimize_read_in_window_order) + if (!context->getSettings().optimize_read_in_window_order || context->getSettingsRef().allow_experimental_analyzer) { return 0; } @@ -70,6 +70,10 @@ size_t tryReuseStorageOrderingForWindowFunctions(QueryPlan::Node * parent_node, const auto & query_info = read_from_merge_tree->getQueryInfo(); const auto * select_query = query_info.query->as(); + /// TODO: Analyzer syntax analyzer result + if (!query_info.syntax_analyzer_result) + return 0; + ManyExpressionActions order_by_elements_actions; const auto & window_desc = window->getWindowDescription(); diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index b340073e73d..b268e7deff0 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -173,6 +173,9 @@ Pipe ReadFromMergeTree::readFromPool( total_rows += part.getRowsCount(); } + if (query_info.limit > 0 && query_info.limit < total_rows) + total_rows = query_info.limit; + const auto & settings = context->getSettingsRef(); const auto & client_info = context->getClientInfo(); MergeTreeReadPool::BackoffSettings backoff_settings(settings); @@ -246,10 +249,26 @@ ProcessorPtr ReadFromMergeTree::createSource( }; } - return std::make_shared( + auto total_rows = part.getRowsCount(); + if (query_info.limit > 0 && query_info.limit < total_rows) + total_rows = query_info.limit; + + /// Actually it means that parallel reading from replicas enabled + /// and we have to collaborate with initiator. + /// In this case we won't set approximate rows, because it will be accounted multiple times. + /// Also do not count amount of read rows if we read in order of sorting key, + /// because we don't know actual amount of read rows in case when limit is set. + bool set_rows_approx = !extension.has_value() && !reader_settings.read_in_order; + + auto source = std::make_shared( data, storage_snapshot, part.data_part, max_block_size, preferred_block_size_bytes, preferred_max_column_in_block_size_bytes, required_columns, part.ranges, use_uncompressed_cache, prewhere_info, actions_settings, reader_settings, virt_column_names, part.part_index_in_query, has_limit_below_one_block, std::move(extension)); + + if (set_rows_approx) + source -> addTotalRowsApprox(total_rows); + + return source; } Pipe ReadFromMergeTree::readInOrder( @@ -853,7 +872,7 @@ MergeTreeDataSelectAnalysisResultPtr ReadFromMergeTree::selectRangesToRead( const StorageMetadataPtr & metadata_snapshot, const SelectQueryInfo & query_info, ContextPtr context, - unsigned num_streams, + size_t num_streams, std::shared_ptr max_block_numbers_to_read, const MergeTreeData & data, const Names & real_column_names, @@ -906,8 +925,15 @@ MergeTreeDataSelectAnalysisResultPtr ReadFromMergeTree::selectRangesToRead( for (const auto & node : added_filter_nodes.nodes) nodes.nodes.push_back(node); - key_condition.emplace( - std::move(nodes), query_info.syntax_analyzer_result, query_info.prepared_sets, context, primary_key_columns, primary_key.expression); + NameSet array_join_name_set; + if (query_info.syntax_analyzer_result) + array_join_name_set = query_info.syntax_analyzer_result->getArrayJoinSourceNameSet(); + + key_condition.emplace(std::move(nodes), + context, + primary_key_columns, + primary_key.expression, + array_join_name_set); } else { @@ -945,7 +971,7 @@ MergeTreeDataSelectAnalysisResultPtr ReadFromMergeTree::selectRangesToRead( result.index_stats); result.sampling = MergeTreeDataSelectExecutor::getSampling( - select, + query_info, metadata_snapshot->getColumns().getAllPhysical(), parts, *key_condition, @@ -965,7 +991,13 @@ MergeTreeDataSelectAnalysisResultPtr ReadFromMergeTree::selectRangesToRead( auto reader_settings = getMergeTreeReaderSettings(context, query_info); bool use_skip_indexes = settings.use_skip_indexes; - if (select.final() && !settings.use_skip_indexes_if_final) + bool final = false; + if (query_info.table_expression_modifiers) + final = query_info.table_expression_modifiers->hasFinal(); + else + final = select.final(); + + if (final && !settings.use_skip_indexes_if_final) use_skip_indexes = false; result.parts_with_ranges = MergeTreeDataSelectExecutor::filterPartsByPrimaryKeyAndSkipIndexes( @@ -1097,7 +1129,13 @@ void ReadFromMergeTree::initializePipeline(QueryPipelineBuilder & pipeline, cons Names column_names_to_read = std::move(result.column_names_to_read); const auto & select = query_info.query->as(); - if (!select.final() && result.sampling.use_sampling) + bool final = false; + if (query_info.table_expression_modifiers) + final = query_info.table_expression_modifiers->hasFinal(); + else + final = select.final(); + + if (!final && result.sampling.use_sampling) { /// Add columns needed for `sample_by_ast` to `column_names_to_read`. /// Skip this if final was used, because such columns were already added from PK. @@ -1112,7 +1150,7 @@ void ReadFromMergeTree::initializePipeline(QueryPipelineBuilder & pipeline, cons const auto & input_order_info = query_info.getInputOrderInfo(); - if (select.final()) + if (final) { /// Add columns needed to calculate the sorting expression and the sign. std::vector add_columns = metadata_for_reading->getColumnsRequiredForSortingKey(); diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.h b/src/Processors/QueryPlan/ReadFromMergeTree.h index 0a013748e91..15258eb6c40 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.h +++ b/src/Processors/QueryPlan/ReadFromMergeTree.h @@ -140,7 +140,7 @@ public: const StorageMetadataPtr & metadata_snapshot, const SelectQueryInfo & query_info, ContextPtr context, - unsigned num_streams, + size_t num_streams, std::shared_ptr max_block_numbers_to_read, const MergeTreeData & data, const Names & real_column_names, diff --git a/src/Processors/Sources/MySQLSource.cpp b/src/Processors/Sources/MySQLSource.cpp index e868182f49b..ecc80bef40b 100644 --- a/src/Processors/Sources/MySQLSource.cpp +++ b/src/Processors/Sources/MySQLSource.cpp @@ -141,7 +141,7 @@ namespace read_bytes_size += 2; break; case ValueType::vtUInt32: - assert_cast(column).insertValue(value.getUInt()); + assert_cast(column).insertValue(static_cast(value.getUInt())); read_bytes_size += 4; break; case ValueType::vtUInt64: @@ -171,7 +171,7 @@ namespace read_bytes_size += 2; break; case ValueType::vtInt32: - assert_cast(column).insertValue(value.getInt()); + assert_cast(column).insertValue(static_cast(value.getInt())); read_bytes_size += 4; break; case ValueType::vtInt64: @@ -236,7 +236,7 @@ namespace readDateTimeText(time, in, assert_cast(data_type).getTimeZone()); if (time < 0) time = 0; - assert_cast(column).insertValue(time); + assert_cast(column).insertValue(static_cast(time)); read_bytes_size += 4; break; } diff --git a/src/Processors/Sources/SQLiteSource.cpp b/src/Processors/Sources/SQLiteSource.cpp index d2c6f2ebb23..79c4be7f692 100644 --- a/src/Processors/Sources/SQLiteSource.cpp +++ b/src/Processors/Sources/SQLiteSource.cpp @@ -35,7 +35,11 @@ SQLiteSource::SQLiteSource( description.init(sample_block); sqlite3_stmt * compiled_stmt = nullptr; - int status = sqlite3_prepare_v2(sqlite_db.get(), query_str.c_str(), query_str.size() + 1, &compiled_stmt, nullptr); + int status = sqlite3_prepare_v2( + sqlite_db.get(), + query_str.c_str(), + static_cast(query_str.size() + 1), + &compiled_stmt, nullptr); if (status != SQLITE_OK) throw Exception(ErrorCodes::SQLITE_ENGINE_ERROR, @@ -109,7 +113,7 @@ Chunk SQLiteSource::generate() return Chunk(std::move(columns), num_rows); } -void SQLiteSource::insertValue(IColumn & column, ExternalResultDescription::ValueType type, size_t idx) +void SQLiteSource::insertValue(IColumn & column, ExternalResultDescription::ValueType type, int idx) { switch (type) { @@ -120,7 +124,7 @@ void SQLiteSource::insertValue(IColumn & column, ExternalResultDescription::Valu assert_cast(column).insertValue(sqlite3_column_int(compiled_statement.get(), idx)); break; case ValueType::vtUInt32: - assert_cast(column).insertValue(sqlite3_column_int64(compiled_statement.get(), idx)); + assert_cast(column).insertValue(static_cast(sqlite3_column_int64(compiled_statement.get(), idx))); break; case ValueType::vtUInt64: /// There is no uint64 in sqlite3, only int and int64 diff --git a/src/Processors/Sources/SQLiteSource.h b/src/Processors/Sources/SQLiteSource.h index d792483c70f..c1bae4d8a67 100644 --- a/src/Processors/Sources/SQLiteSource.h +++ b/src/Processors/Sources/SQLiteSource.h @@ -33,7 +33,7 @@ private: Chunk generate() override; - void insertValue(IColumn & column, ExternalResultDescription::ValueType type, size_t idx); + void insertValue(IColumn & column, ExternalResultDescription::ValueType type, int idx); String query_str; UInt64 max_block_size; diff --git a/src/Processors/Sources/ShellCommandSource.cpp b/src/Processors/Sources/ShellCommandSource.cpp index 8598b0197fc..3f70abaea6d 100644 --- a/src/Processors/Sources/ShellCommandSource.cpp +++ b/src/Processors/Sources/ShellCommandSource.cpp @@ -77,7 +77,7 @@ static bool pollFd(int fd, size_t timeout_milliseconds, int events) while (true) { - res = poll(&pfd, 1, timeout_milliseconds); + res = poll(&pfd, 1, static_cast(timeout_milliseconds)); if (res < 0) { @@ -527,7 +527,7 @@ Pipe ShellCommandSourceCoordinator::createPipe( } else { - auto descriptor = i + 2; + int descriptor = static_cast(i) + 2; auto it = process->write_fds.find(descriptor); if (it == process->write_fds.end()) throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "Process does not contain descriptor to write {}", descriptor); diff --git a/src/Processors/TTL/ITTLAlgorithm.cpp b/src/Processors/TTL/ITTLAlgorithm.cpp index 489941950b5..c71ad740719 100644 --- a/src/Processors/TTL/ITTLAlgorithm.cpp +++ b/src/Processors/TTL/ITTLAlgorithm.cpp @@ -48,13 +48,13 @@ ColumnPtr ITTLAlgorithm::executeExpressionAndGetColumn( UInt32 ITTLAlgorithm::getTimestampByIndex(const IColumn * column, size_t index) const { if (const ColumnUInt16 * column_date = typeid_cast(column)) - return date_lut.fromDayNum(DayNum(column_date->getData()[index])); + return static_cast(date_lut.fromDayNum(DayNum(column_date->getData()[index]))); else if (const ColumnUInt32 * column_date_time = typeid_cast(column)) return column_date_time->getData()[index]; else if (const ColumnConst * column_const = typeid_cast(column)) { if (typeid_cast(&column_const->getDataColumn())) - return date_lut.fromDayNum(DayNum(column_const->getValue())); + return static_cast(date_lut.fromDayNum(DayNum(column_const->getValue()))); else if (typeid_cast(&column_const->getDataColumn())) return column_const->getValue(); } diff --git a/src/Processors/Transforms/ArrayJoinTransform.cpp b/src/Processors/Transforms/ArrayJoinTransform.cpp index 9058d7df2a0..eea1469c7a6 100644 --- a/src/Processors/Transforms/ArrayJoinTransform.cpp +++ b/src/Processors/Transforms/ArrayJoinTransform.cpp @@ -4,11 +4,6 @@ namespace DB { -namespace ErrorCodes -{ - extern const int LOGICAL_ERROR; -} - Block ArrayJoinTransform::transformHeader(Block header, const ArrayJoinActionPtr & array_join) { array_join->execute(header); diff --git a/src/Processors/Transforms/CountingTransform.h b/src/Processors/Transforms/CountingTransform.h index bd2ec58a27f..05d8e2aeac8 100644 --- a/src/Processors/Transforms/CountingTransform.h +++ b/src/Processors/Transforms/CountingTransform.h @@ -9,6 +9,7 @@ namespace DB { class QueryStatus; +using QueryStatusPtr = std::shared_ptr; class ThreadStatus; /// Proxy class which counts number of written block, rows, bytes @@ -29,7 +30,7 @@ public: progress_callback = callback; } - void setProcessListElement(QueryStatus * elem) + void setProcessListElement(QueryStatusPtr elem) { process_elem = elem; } @@ -50,7 +51,7 @@ public: protected: Progress progress; ProgressCallback progress_callback; - QueryStatus * process_elem = nullptr; + QueryStatusPtr process_elem; ThreadStatus * thread_status = nullptr; /// Quota is used to limit amount of written bytes. diff --git a/src/Processors/Transforms/FillingTransform.cpp b/src/Processors/Transforms/FillingTransform.cpp index 16abb72cbd4..78ae6b8771f 100644 --- a/src/Processors/Transforms/FillingTransform.cpp +++ b/src/Processors/Transforms/FillingTransform.cpp @@ -40,7 +40,9 @@ static FillColumnDescription::StepFunction getStepFunction( { #define DECLARE_CASE(NAME) \ case IntervalKind::NAME: \ - return [step, scale, &date_lut](Field & field) { field = Add##NAME##sImpl::execute(static_cast(field.get()), step, date_lut, scale); }; + return [step, scale, &date_lut](Field & field) { \ + field = Add##NAME##sImpl::execute(static_cast(\ + field.get()), static_cast(step), date_lut, scale); }; FOR_EACH_INTERVAL_KIND(DECLARE_CASE) #undef DECLARE_CASE diff --git a/src/Processors/Transforms/MongoDBSource.cpp b/src/Processors/Transforms/MongoDBSource.cpp index b548e8c4184..9eef17cf40d 100644 --- a/src/Processors/Transforms/MongoDBSource.cpp +++ b/src/Processors/Transforms/MongoDBSource.cpp @@ -184,7 +184,7 @@ namespace break; case Poco::MongoDB::ElementTraits::TypeId: assert_cast &>(column).getData().push_back( - static_cast &>(value).value()); + static_cast(static_cast &>(value).value())); break; case Poco::MongoDB::ElementTraits::TypeId: assert_cast &>(column).getData().push_back(static_cast( @@ -282,7 +282,7 @@ namespace ErrorCodes::TYPE_MISMATCH}; assert_cast(column).getData().push_back( - static_cast &>(value).value().epochTime()); + static_cast(static_cast &>(value).value().epochTime())); break; } case ValueType::vtUUID: diff --git a/src/Processors/Transforms/WindowTransform.cpp b/src/Processors/Transforms/WindowTransform.cpp index 07bfb274a86..83b0b202d74 100644 --- a/src/Processors/Transforms/WindowTransform.cpp +++ b/src/Processors/Transforms/WindowTransform.cpp @@ -28,7 +28,6 @@ namespace ErrorCodes { extern const int BAD_ARGUMENTS; extern const int NOT_IMPLEMENTED; - extern const int ILLEGAL_COLUMN; extern const int ILLEGAL_TYPE_OF_ARGUMENT; } @@ -63,20 +62,21 @@ static int compareValuesWithOffset(const IColumn * _compared_column, _compared_column); const auto * reference_column = assert_cast( _reference_column); + + using ValueType = typename ColumnType::ValueType; // Note that the storage type of offset returned by get<> is different, so // we need to specify the type explicitly. - const typename ColumnType::ValueType offset - = _offset.get(); + const ValueType offset = static_cast(_offset.get()); assert(offset >= 0); const auto compared_value_data = compared_column->getDataAt(compared_row); - assert(compared_value_data.size == sizeof(typename ColumnType::ValueType)); - auto compared_value = unalignedLoad( + assert(compared_value_data.size == sizeof(ValueType)); + auto compared_value = unalignedLoad( compared_value_data.data); const auto reference_value_data = reference_column->getDataAt(reference_row); - assert(reference_value_data.size == sizeof(typename ColumnType::ValueType)); - auto reference_value = unalignedLoad( + assert(reference_value_data.size == sizeof(ValueType)); + auto reference_value = unalignedLoad( reference_value_data.data); bool is_overflow; @@ -85,15 +85,6 @@ static int compareValuesWithOffset(const IColumn * _compared_column, else is_overflow = common::addOverflow(reference_value, offset, reference_value); -// fmt::print(stderr, -// "compared [{}] = {}, old ref {}, shifted ref [{}] = {}, offset {} preceding {} overflow {} to negative {}\n", -// compared_row, toString(compared_value), -// // fmt doesn't like char8_t. -// static_cast(unalignedLoad(reference_value_data.data)), -// reference_row, toString(reference_value), -// toString(offset), offset_is_preceding, -// is_overflow, offset_is_preceding); - if (is_overflow) { if (offset_is_preceding) @@ -984,22 +975,9 @@ void WindowTransform::writeOutCurrentRow() // FIXME does it also allocate the result on the arena? // We'll have to pass it out with blocks then... - if (a->isState()) - { - /// AggregateFunction's states should be inserted into column using specific way - auto * res_col_aggregate_function = typeid_cast(result_column); - if (!res_col_aggregate_function) - { - throw Exception("State function " + a->getName() + " inserts results into non-state column ", - ErrorCodes::ILLEGAL_COLUMN); - } - res_col_aggregate_function->insertFrom(buf); - } - else - { - a->insertResultInto(buf, *result_column, arena.get()); - } - + /// We should use insertMergeResultInto to insert result into ColumnAggregateFunction + /// correctly if result contains AggregateFunction's states + a->insertMergeResultInto(buf, *result_column, arena.get()); } } diff --git a/src/Processors/Transforms/buildPushingToViewsChain.cpp b/src/Processors/Transforms/buildPushingToViewsChain.cpp index 174aaf67ec5..830f400faf2 100644 --- a/src/Processors/Transforms/buildPushingToViewsChain.cpp +++ b/src/Processors/Transforms/buildPushingToViewsChain.cpp @@ -620,9 +620,10 @@ void PushingToLiveViewSink::consume(Chunk chunk) { Progress local_progress(chunk.getNumRows(), chunk.bytes(), 0); StorageLiveView::writeIntoLiveView(live_view, getHeader().cloneWithColumns(chunk.detachColumns()), context); - auto * process = context->getProcessListElement(); - if (process) + + if (auto process = context->getProcessListElement()) process->updateProgressIn(local_progress); + ProfileEvents::increment(ProfileEvents::SelectedRows, local_progress.read_rows); ProfileEvents::increment(ProfileEvents::SelectedBytes, local_progress.read_bytes); } @@ -643,9 +644,10 @@ void PushingToWindowViewSink::consume(Chunk chunk) Progress local_progress(chunk.getNumRows(), chunk.bytes(), 0); StorageWindowView::writeIntoWindowView( window_view, getHeader().cloneWithColumns(chunk.detachColumns()), context); - auto * process = context->getProcessListElement(); - if (process) + + if (auto process = context->getProcessListElement()) process->updateProgressIn(local_progress); + ProfileEvents::increment(ProfileEvents::SelectedRows, local_progress.read_rows); ProfileEvents::increment(ProfileEvents::SelectedBytes, local_progress.read_bytes); } diff --git a/src/Processors/tests/gtest_exception_on_incorrect_pipeline.cpp b/src/Processors/tests/gtest_exception_on_incorrect_pipeline.cpp index b137eaf0f47..40718bd968a 100644 --- a/src/Processors/tests/gtest_exception_on_incorrect_pipeline.cpp +++ b/src/Processors/tests/gtest_exception_on_incorrect_pipeline.cpp @@ -23,11 +23,11 @@ TEST(Processors, PortsConnected) connect(source->getPort(), sink->getPort()); - Processors processors; - processors.emplace_back(std::move(source)); - processors.emplace_back(std::move(sink)); + auto processors = std::make_shared(); + processors->emplace_back(std::move(source)); + processors->emplace_back(std::move(sink)); - QueryStatus * element = nullptr; + QueryStatusPtr element; PipelineExecutor executor(processors, element); executor.execute(1); } @@ -46,14 +46,14 @@ TEST(Processors, PortsNotConnected) /// connect(source->getPort(), sink->getPort()); - Processors processors; - processors.emplace_back(std::move(source)); - processors.emplace_back(std::move(sink)); + auto processors = std::make_shared(); + processors->emplace_back(std::move(source)); + processors->emplace_back(std::move(sink)); #ifndef ABORT_ON_LOGICAL_ERROR try { - QueryStatus * element = nullptr; + QueryStatusPtr element; PipelineExecutor executor(processors, element); executor.execute(1); ASSERT_TRUE(false) << "Should have thrown."; diff --git a/src/QueryPipeline/BlockIO.cpp b/src/QueryPipeline/BlockIO.cpp index 35463ca6be9..9e42e06c722 100644 --- a/src/QueryPipeline/BlockIO.cpp +++ b/src/QueryPipeline/BlockIO.cpp @@ -53,9 +53,8 @@ void BlockIO::setAllDataSent() const /// - internal /// - SHOW PROCESSLIST if (process_list_entry) - (*process_list_entry)->setAllDataSent(); + process_list_entry->getQueryStatus()->setAllDataSent(); } } - diff --git a/src/QueryPipeline/BlockIO.h b/src/QueryPipeline/BlockIO.h index 1f2a8f6f033..b69f86ac684 100644 --- a/src/QueryPipeline/BlockIO.h +++ b/src/QueryPipeline/BlockIO.h @@ -34,9 +34,8 @@ struct BlockIO void onFinish() { if (finish_callback) - { finish_callback(pipeline); - } + pipeline.reset(); } diff --git a/src/QueryPipeline/Pipe.cpp b/src/QueryPipeline/Pipe.cpp index 291739079a2..62a928d814c 100644 --- a/src/QueryPipeline/Pipe.cpp +++ b/src/QueryPipeline/Pipe.cpp @@ -102,7 +102,12 @@ static OutputPort * uniteTotals(const OutputPortRawPtrs & ports, const Block & h return totals_port; } +Pipe::Pipe() : processors(std::make_shared()) +{ +} + Pipe::Pipe(ProcessorPtr source, OutputPort * output, OutputPort * totals, OutputPort * extremes) + : processors(std::make_shared()) { if (!source->getInputs().empty()) throw Exception( @@ -155,11 +160,12 @@ Pipe::Pipe(ProcessorPtr source, OutputPort * output, OutputPort * totals, Output totals_port = totals; extremes_port = extremes; output_ports.push_back(output); - processors.emplace_back(std::move(source)); + processors->emplace_back(std::move(source)); max_parallel_streams = 1; } Pipe::Pipe(ProcessorPtr source) + : processors(std::make_shared()) { checkSource(*source); @@ -168,18 +174,18 @@ Pipe::Pipe(ProcessorPtr source) output_ports.push_back(&source->getOutputs().front()); header = output_ports.front()->getHeader(); - processors.emplace_back(std::move(source)); + processors->emplace_back(std::move(source)); max_parallel_streams = 1; } -Pipe::Pipe(Processors processors_) : processors(std::move(processors_)) +Pipe::Pipe(std::shared_ptr processors_) : processors(std::move(processors_)) { /// Create hash table with processors. std::unordered_set set; - for (const auto & processor : processors) + for (const auto & processor : *processors) set.emplace(processor.get()); - for (auto & processor : processors) + for (auto & processor : *processors) { for (const auto & port : processor->getInputs()) { @@ -225,7 +231,7 @@ Pipe::Pipe(Processors processors_) : processors(std::move(processors_)) max_parallel_streams = output_ports.size(); if (collected_processors) - for (const auto & processor : processors) + for (const auto & processor : *processors) collected_processors->emplace_back(processor); } @@ -311,7 +317,7 @@ Pipe Pipe::unitePipes(Pipes pipes, Processors * collected_processors, bool allow if (!allow_empty_header || pipe.header) assertCompatibleHeader(pipe.header, res.header, "Pipe::unitePipes"); - res.processors.insert(res.processors.end(), pipe.processors.begin(), pipe.processors.end()); + res.processors->insert(res.processors->end(), pipe.processors->begin(), pipe.processors->end()); res.output_ports.insert(res.output_ports.end(), pipe.output_ports.begin(), pipe.output_ports.end()); res.max_parallel_streams += pipe.max_parallel_streams; @@ -323,15 +329,15 @@ Pipe Pipe::unitePipes(Pipes pipes, Processors * collected_processors, bool allow extremes.emplace_back(pipe.extremes_port); } - size_t num_processors = res.processors.size(); + size_t num_processors = res.processors->size(); - res.totals_port = uniteTotals(totals, res.header, res.processors); - res.extremes_port = uniteExtremes(extremes, res.header, res.processors); + res.totals_port = uniteTotals(totals, res.header, *res.processors); + res.extremes_port = uniteExtremes(extremes, res.header, *res.processors); if (res.collected_processors) { - for (; num_processors < res.processors.size(); ++num_processors) - res.collected_processors->emplace_back(res.processors[num_processors]); + for (; num_processors < res.processors->size(); ++num_processors) + res.collected_processors->emplace_back(res.processors->at(num_processors)); } return res; @@ -351,7 +357,7 @@ void Pipe::addSource(ProcessorPtr source) collected_processors->emplace_back(source); output_ports.push_back(&source->getOutputs().front()); - processors.emplace_back(std::move(source)); + processors->emplace_back(std::move(source)); max_parallel_streams = std::max(max_parallel_streams, output_ports.size()); } @@ -373,7 +379,7 @@ void Pipe::addTotalsSource(ProcessorPtr source) collected_processors->emplace_back(source); totals_port = &source->getOutputs().front(); - processors.emplace_back(std::move(source)); + processors->emplace_back(std::move(source)); } void Pipe::addExtremesSource(ProcessorPtr source) @@ -393,7 +399,7 @@ void Pipe::addExtremesSource(ProcessorPtr source) collected_processors->emplace_back(source); extremes_port = &source->getOutputs().front(); - processors.emplace_back(std::move(source)); + processors->emplace_back(std::move(source)); } static void dropPort(OutputPort *& port, Processors & processors, Processors * collected_processors) @@ -413,12 +419,12 @@ static void dropPort(OutputPort *& port, Processors & processors, Processors * c void Pipe::dropTotals() { - dropPort(totals_port, processors, collected_processors); + dropPort(totals_port, *processors, collected_processors); } void Pipe::dropExtremes() { - dropPort(extremes_port, processors, collected_processors); + dropPort(extremes_port, *processors, collected_processors); } void Pipe::addTransform(ProcessorPtr transform) @@ -504,7 +510,7 @@ void Pipe::addTransform(ProcessorPtr transform, OutputPort * totals, OutputPort if (collected_processors) collected_processors->emplace_back(transform); - processors.emplace_back(std::move(transform)); + processors->emplace_back(std::move(transform)); max_parallel_streams = std::max(max_parallel_streams, output_ports.size()); } @@ -595,7 +601,7 @@ void Pipe::addTransform(ProcessorPtr transform, InputPort * totals, InputPort * if (collected_processors) collected_processors->emplace_back(transform); - processors.emplace_back(std::move(transform)); + processors->emplace_back(std::move(transform)); max_parallel_streams = std::max(max_parallel_streams, output_ports.size()); } @@ -647,7 +653,7 @@ void Pipe::addSimpleTransform(const ProcessorGetterWithStreamKind & getter) if (collected_processors) collected_processors->emplace_back(transform); - processors.emplace_back(std::move(transform)); + processors->emplace_back(std::move(transform)); } }; @@ -698,7 +704,7 @@ void Pipe::addChains(std::vector chains) if (collected_processors) collected_processors->emplace_back(transform); - processors.emplace_back(std::move(transform)); + processors->emplace_back(std::move(transform)); } } @@ -757,7 +763,7 @@ void Pipe::setSinks(const Pipe::ProcessorGetterWithStreamKind & getter) transform = std::make_shared(stream->getHeader()); connect(*stream, transform->getInputs().front()); - processors.emplace_back(std::move(transform)); + processors->emplace_back(std::move(transform)); }; for (auto & port : output_ports) @@ -858,7 +864,7 @@ void Pipe::transform(const Transformer & transformer, bool check_ports) collected_processors->emplace_back(processor); } - processors.insert(processors.end(), new_processors.begin(), new_processors.end()); + processors->insert(processors->end(), new_processors.begin(), new_processors.end()); max_parallel_streams = std::max(max_parallel_streams, output_ports.size()); } diff --git a/src/QueryPipeline/Pipe.h b/src/QueryPipeline/Pipe.h index 79d19a18193..7e30d9c990e 100644 --- a/src/QueryPipeline/Pipe.h +++ b/src/QueryPipeline/Pipe.h @@ -5,6 +5,7 @@ #include #include + namespace DB { @@ -27,13 +28,13 @@ class Pipe public: /// Default constructor creates empty pipe. Generally, you cannot do anything with it except to check it is empty(). /// You cannot get empty pipe in any other way. All transforms check that result pipe is not empty. - Pipe() = default; + Pipe(); /// Create from source. Source must have no input ports and single output. explicit Pipe(ProcessorPtr source); /// Create from source with specified totals end extremes (may be nullptr). Ports should be owned by source. explicit Pipe(ProcessorPtr source, OutputPort * output, OutputPort * totals, OutputPort * extremes); /// Create from processors. Use all not-connected output ports as output_ports. Check invariants. - explicit Pipe(Processors processors_); + explicit Pipe(std::shared_ptr processors_); Pipe(const Pipe & other) = delete; Pipe(Pipe && other) = default; @@ -41,7 +42,7 @@ public: Pipe & operator=(Pipe && other) = default; const Block & getHeader() const { return header; } - bool empty() const { return processors.empty(); } + bool empty() const { return processors->empty(); } size_t numOutputPorts() const { return output_ports.size(); } size_t maxParallelStreams() const { return max_parallel_streams; } OutputPort * getOutputPort(size_t pos) const { return output_ports[pos]; } @@ -96,15 +97,15 @@ public: /// Unite several pipes together. They should have same header. static Pipe unitePipes(Pipes pipes); - /// Get processors from Pipe. Use it with cautious, it is easy to loss totals and extremes ports. - static Processors detachProcessors(Pipe pipe) { return std::move(pipe.processors); } + /// Get processors from Pipe. Use it with caution, it is easy to lose totals and extremes ports. + static Processors detachProcessors(Pipe pipe) { return *std::move(pipe.processors); } /// Get processors from Pipe without destroying pipe (used for EXPLAIN to keep QueryPlan). - const Processors & getProcessors() const { return processors; } + const Processors & getProcessors() const { return *processors; } private: /// Header is common for all output below. Block header; - Processors processors; + std::shared_ptr processors; /// Output ports. Totals and extremes are allowed to be empty. OutputPortRawPtrs output_ports; diff --git a/src/QueryPipeline/PipelineResourcesHolder.h b/src/QueryPipeline/PipelineResourcesHolder.h index 46b1024f384..ed9eb68b7ba 100644 --- a/src/QueryPipeline/PipelineResourcesHolder.h +++ b/src/QueryPipeline/PipelineResourcesHolder.h @@ -19,8 +19,9 @@ struct QueryPlanResourceHolder QueryPlanResourceHolder(); QueryPlanResourceHolder(QueryPlanResourceHolder &&) noexcept; ~QueryPlanResourceHolder(); + /// Custom move assignment does not destroy data from lhs. It appends data from rhs to lhs. - QueryPlanResourceHolder& operator=(QueryPlanResourceHolder &&) noexcept; + QueryPlanResourceHolder & operator=(QueryPlanResourceHolder &&) noexcept; /// Some processors may implicitly use Context or temporary Storage created by Interpreter. /// But lifetime of Streams is not nested in lifetime of Interpreters, so we have to store it here, diff --git a/src/QueryPipeline/QueryPipeline.cpp b/src/QueryPipeline/QueryPipeline.cpp index 31b18c7f7f0..e0da4c4f0eb 100644 --- a/src/QueryPipeline/QueryPipeline.cpp +++ b/src/QueryPipeline/QueryPipeline.cpp @@ -21,6 +21,7 @@ #include #include + namespace DB { @@ -29,7 +30,11 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } -QueryPipeline::QueryPipeline() = default; +QueryPipeline::QueryPipeline() + : processors(std::make_shared()) +{ +} + QueryPipeline::QueryPipeline(QueryPipeline &&) noexcept = default; QueryPipeline & QueryPipeline::operator=(QueryPipeline &&) noexcept = default; QueryPipeline::~QueryPipeline() = default; @@ -210,16 +215,16 @@ static void initRowsBeforeLimit(IOutputFormat * output_format) QueryPipeline::QueryPipeline( QueryPlanResourceHolder resources_, - Processors processors_) + std::shared_ptr processors_) : resources(std::move(resources_)) , processors(std::move(processors_)) { - checkCompleted(processors); + checkCompleted(*processors); } QueryPipeline::QueryPipeline( QueryPlanResourceHolder resources_, - Processors processors_, + std::shared_ptr processors_, InputPort * input_) : resources(std::move(resources_)) , processors(std::move(processors_)) @@ -231,7 +236,7 @@ QueryPipeline::QueryPipeline( "Cannot create pushing QueryPipeline because its input port is connected or null"); bool found_input = false; - for (const auto & processor : processors) + for (const auto & processor : *processors) { for (const auto & in : processor->getInputs()) { @@ -255,7 +260,7 @@ QueryPipeline::QueryPipeline(std::shared_ptr source) : QueryPipeline(Pi QueryPipeline::QueryPipeline( QueryPlanResourceHolder resources_, - Processors processors_, + std::shared_ptr processors_, OutputPort * output_, OutputPort * totals_, OutputPort * extremes_) @@ -265,7 +270,7 @@ QueryPipeline::QueryPipeline( , totals(totals_) , extremes(extremes_) { - checkPulling(processors, output, totals, extremes); + checkPulling(*processors, output, totals, extremes); } QueryPipeline::QueryPipeline(Pipe pipe) @@ -278,32 +283,34 @@ QueryPipeline::QueryPipeline(Pipe pipe) extremes = pipe.getExtremesPort(); processors = std::move(pipe.processors); - checkPulling(processors, output, totals, extremes); + checkPulling(*processors, output, totals, extremes); } else { processors = std::move(pipe.processors); - checkCompleted(processors); + checkCompleted(*processors); } } QueryPipeline::QueryPipeline(Chain chain) : resources(chain.detachResources()) + , processors(std::make_shared()) , input(&chain.getInputPort()) , num_threads(chain.getNumThreads()) { - processors.reserve(chain.getProcessors().size() + 1); + processors->reserve(chain.getProcessors().size() + 1); for (auto processor : chain.getProcessors()) - processors.emplace_back(std::move(processor)); + processors->emplace_back(std::move(processor)); auto sink = std::make_shared(chain.getOutputPort().getHeader()); connect(chain.getOutputPort(), sink->getPort()); - processors.emplace_back(std::move(sink)); + processors->emplace_back(std::move(sink)); input = &chain.getInputPort(); } QueryPipeline::QueryPipeline(std::shared_ptr format) + : processors(std::make_shared()) { auto & format_main = format->getPort(IOutputFormat::PortKind::Main); auto & format_totals = format->getPort(IOutputFormat::PortKind::Totals); @@ -313,14 +320,14 @@ QueryPipeline::QueryPipeline(std::shared_ptr format) { auto source = std::make_shared(format_totals.getHeader()); totals = &source->getPort(); - processors.emplace_back(std::move(source)); + processors->emplace_back(std::move(source)); } if (!extremes) { auto source = std::make_shared(format_extremes.getHeader()); extremes = &source->getPort(); - processors.emplace_back(std::move(source)); + processors->emplace_back(std::move(source)); } connect(*totals, format_totals); @@ -332,7 +339,7 @@ QueryPipeline::QueryPipeline(std::shared_ptr format) output_format = format.get(); - processors.emplace_back(std::move(format)); + processors->emplace_back(std::move(format)); } static void drop(OutputPort *& port, Processors & processors) @@ -354,11 +361,11 @@ void QueryPipeline::complete(std::shared_ptr sink) if (!pulling()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Pipeline must be pulling to be completed with sink"); - drop(totals, processors); - drop(extremes, processors); + drop(totals, *processors); + drop(extremes, *processors); connect(*output, sink->getPort()); - processors.emplace_back(std::move(sink)); + processors->emplace_back(std::move(sink)); output = nullptr; } @@ -369,17 +376,17 @@ void QueryPipeline::complete(Chain chain) resources = chain.detachResources(); - drop(totals, processors); - drop(extremes, processors); + drop(totals, *processors); + drop(extremes, *processors); - processors.reserve(processors.size() + chain.getProcessors().size() + 1); + processors->reserve(processors->size() + chain.getProcessors().size() + 1); for (auto processor : chain.getProcessors()) - processors.emplace_back(std::move(processor)); + processors->emplace_back(std::move(processor)); auto sink = std::make_shared(chain.getOutputPort().getHeader()); connect(*output, chain.getInputPort()); connect(chain.getOutputPort(), sink->getPort()); - processors.emplace_back(std::move(sink)); + processors->emplace_back(std::move(sink)); output = nullptr; } @@ -400,7 +407,7 @@ void QueryPipeline::complete(Pipe pipe) input = nullptr; auto pipe_processors = Pipe::detachProcessors(std::move(pipe)); - processors.insert(processors.end(), pipe_processors.begin(), pipe_processors.end()); + processors->insert(processors->end(), pipe_processors.begin(), pipe_processors.end()); } static void addMaterializing(OutputPort *& output, Processors & processors) @@ -421,9 +428,9 @@ void QueryPipeline::complete(std::shared_ptr format) if (format->expectMaterializedColumns()) { - addMaterializing(output, processors); - addMaterializing(totals, processors); - addMaterializing(extremes, processors); + addMaterializing(output, *processors); + addMaterializing(totals, *processors); + addMaterializing(extremes, *processors); } auto & format_main = format->getPort(IOutputFormat::PortKind::Main); @@ -434,14 +441,14 @@ void QueryPipeline::complete(std::shared_ptr format) { auto source = std::make_shared(format_totals.getHeader()); totals = &source->getPort(); - processors.emplace_back(std::move(source)); + processors->emplace_back(std::move(source)); } if (!extremes) { auto source = std::make_shared(format_extremes.getHeader()); extremes = &source->getPort(); - processors.emplace_back(std::move(source)); + processors->emplace_back(std::move(source)); } connect(*output, format_main); @@ -455,7 +462,7 @@ void QueryPipeline::complete(std::shared_ptr format) initRowsBeforeLimit(format.get()); output_format = format.get(); - processors.emplace_back(std::move(format)); + processors->emplace_back(std::move(format)); } Block QueryPipeline::getHeader() const @@ -475,7 +482,7 @@ void QueryPipeline::setProgressCallback(const ProgressCallback & callback) progress_callback = callback; } -void QueryPipeline::setProcessListElement(QueryStatus * elem) +void QueryPipeline::setProcessListElement(QueryStatusPtr elem) { process_list_element = elem; @@ -504,7 +511,7 @@ void QueryPipeline::setLimitsAndQuota(const StreamLocalLimits & limits, std::sha transform->setQuota(quota_); connect(*output, transform->getInputPort()); output = &transform->getOutputPort(); - processors.emplace_back(std::move(transform)); + processors->emplace_back(std::move(transform)); } @@ -529,7 +536,7 @@ void QueryPipeline::addCompletedPipeline(QueryPipeline other) throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot add not completed pipeline"); resources = std::move(other.resources); - processors.insert(processors.end(), other.processors.begin(), other.processors.end()); + processors->insert(processors->end(), other.processors->begin(), other.processors->end()); } void QueryPipeline::reset() @@ -560,9 +567,9 @@ void QueryPipeline::convertStructureTo(const ColumnsWithTypeAndName & columns) ActionsDAG::MatchColumnsMode::Position); auto actions = std::make_shared(std::move(converting)); - addExpression(output, actions, processors); - addExpression(totals, actions, processors); - addExpression(extremes, actions, processors); + addExpression(output, actions, *processors); + addExpression(totals, actions, *processors); + addExpression(extremes, actions, *processors); } std::unique_ptr QueryPipeline::getReadProgressCallback() const diff --git a/src/QueryPipeline/QueryPipeline.h b/src/QueryPipeline/QueryPipeline.h index 1b88ede3349..63f444e6ec1 100644 --- a/src/QueryPipeline/QueryPipeline.h +++ b/src/QueryPipeline/QueryPipeline.h @@ -4,6 +4,7 @@ #include #include + namespace DB { @@ -15,6 +16,7 @@ using ProcessorPtr = std::shared_ptr; using Processors = std::vector; class QueryStatus; +using QueryStatusPtr = std::shared_ptr; struct Progress; using ProgressCallback = std::function; @@ -34,6 +36,7 @@ class ReadProgressCallback; struct ColumnWithTypeAndName; using ColumnsWithTypeAndName = std::vector; + class QueryPipeline { public: @@ -58,23 +61,23 @@ public: /// completed QueryPipeline( QueryPlanResourceHolder resources_, - Processors processors_); + std::shared_ptr processors_); /// pushing QueryPipeline( QueryPlanResourceHolder resources_, - Processors processors_, + std::shared_ptr processors_, InputPort * input_); /// pulling QueryPipeline( QueryPlanResourceHolder resources_, - Processors processors_, + std::shared_ptr processors_, OutputPort * output_, OutputPort * totals_ = nullptr, OutputPort * extremes_ = nullptr); - bool initialized() const { return !processors.empty(); } + bool initialized() const { return !processors->empty(); } /// When initialized, exactly one of the following is true. /// Use PullingPipelineExecutor or PullingAsyncPipelineExecutor. bool pulling() const { return output != nullptr; } @@ -97,7 +100,7 @@ public: size_t getNumThreads() const { return num_threads; } void setNumThreads(size_t num_threads_) { num_threads = num_threads_; } - void setProcessListElement(QueryStatus * elem); + void setProcessListElement(QueryStatusPtr elem); void setProgressCallback(const ProgressCallback & callback); void setLimitsAndQuota(const StreamLocalLimits & limits, std::shared_ptr quota_); bool tryGetResultRowsAndBytes(UInt64 & result_rows, UInt64 & result_bytes) const; @@ -119,7 +122,7 @@ public: /// Add processors and resources from other pipeline. Other pipeline should be completed. void addCompletedPipeline(QueryPipeline other); - const Processors & getProcessors() const { return processors; } + const Processors & getProcessors() const { return *processors; } /// For pulling pipeline, convert structure to expected. /// Trash, need to remove later. @@ -134,7 +137,7 @@ private: std::shared_ptr quota; bool update_profile_events = true; - Processors processors; + std::shared_ptr processors; InputPort * input = nullptr; @@ -142,7 +145,7 @@ private: OutputPort * totals = nullptr; OutputPort * extremes = nullptr; - QueryStatus * process_list_element = nullptr; + QueryStatusPtr process_list_element; IOutputFormat * output_format = nullptr; diff --git a/src/QueryPipeline/QueryPipelineBuilder.cpp b/src/QueryPipeline/QueryPipelineBuilder.cpp index 440f123e876..812bd155b42 100644 --- a/src/QueryPipeline/QueryPipelineBuilder.cpp +++ b/src/QueryPipeline/QueryPipelineBuilder.cpp @@ -327,9 +327,9 @@ QueryPipelineBuilderPtr QueryPipelineBuilder::mergePipelines( collected_processors->emplace_back(transform); left->pipe.output_ports.front() = &transform->getOutputs().front(); - left->pipe.processors.emplace_back(transform); + left->pipe.processors->emplace_back(transform); - left->pipe.processors.insert(left->pipe.processors.end(), right->pipe.processors.begin(), right->pipe.processors.end()); + left->pipe.processors->insert(left->pipe.processors->end(), right->pipe.processors->begin(), right->pipe.processors->end()); left->pipe.header = left->pipe.output_ports.front()->getHeader(); left->pipe.max_parallel_streams = std::max(left->pipe.max_parallel_streams, right->pipe.max_parallel_streams); return left; @@ -383,7 +383,7 @@ std::unique_ptr QueryPipelineBuilder::joinPipelinesRightLe /// Collect the NEW processors for the right pipeline. QueryPipelineProcessorsCollector collector(*right); /// Remember the last step of the right pipeline. - ExpressionStep* step = typeid_cast(right->pipe.processors.back()->getQueryPlanStep()); + ExpressionStep* step = typeid_cast(right->pipe.processors->back()->getQueryPlanStep()); if (!step) { throw Exception(ErrorCodes::LOGICAL_ERROR, "The top step of the right pipeline should be ExpressionStep"); @@ -467,7 +467,7 @@ std::unique_ptr QueryPipelineBuilder::joinPipelinesRightLe if (collected_processors) collected_processors->emplace_back(joining); - left->pipe.processors.emplace_back(std::move(joining)); + left->pipe.processors->emplace_back(std::move(joining)); } if (left->hasTotals()) @@ -482,14 +482,14 @@ std::unique_ptr QueryPipelineBuilder::joinPipelinesRightLe if (collected_processors) collected_processors->emplace_back(joining); - left->pipe.processors.emplace_back(std::move(joining)); + left->pipe.processors->emplace_back(std::move(joining)); } /// Move the collected processors to the last step in the right pipeline. Processors processors = collector.detachProcessors(); step->appendExtraProcessors(processors); - left->pipe.processors.insert(left->pipe.processors.end(), right->pipe.processors.begin(), right->pipe.processors.end()); + left->pipe.processors->insert(left->pipe.processors->end(), right->pipe.processors->begin(), right->pipe.processors->end()); left->resources = std::move(right->resources); left->pipe.header = left->pipe.output_ports.front()->getHeader(); left->pipe.max_parallel_streams = std::max(left->pipe.max_parallel_streams, right->pipe.max_parallel_streams); @@ -537,7 +537,7 @@ void QueryPipelineBuilder::addPipelineBefore(QueryPipelineBuilder pipeline) addTransform(std::move(processor)); } -void QueryPipelineBuilder::setProcessListElement(QueryStatus * elem) +void QueryPipelineBuilder::setProcessListElement(QueryStatusPtr elem) { process_list_element = elem; } diff --git a/src/QueryPipeline/QueryPipelineBuilder.h b/src/QueryPipeline/QueryPipelineBuilder.h index 13b4d681b7d..5a0694100eb 100644 --- a/src/QueryPipeline/QueryPipelineBuilder.h +++ b/src/QueryPipeline/QueryPipelineBuilder.h @@ -148,7 +148,7 @@ public: const Block & getHeader() const { return pipe.getHeader(); } - void setProcessListElement(QueryStatus * elem); + void setProcessListElement(QueryStatusPtr elem); void setProgressCallback(ProgressCallback callback); /// Recommend number of threads for pipeline execution. @@ -189,7 +189,7 @@ private: /// Sometimes, more streams are created then the number of threads for more optimal execution. size_t max_threads = 0; - QueryStatus * process_list_element = nullptr; + QueryStatusPtr process_list_element; ProgressCallback progress_callback = nullptr; void checkInitialized(); diff --git a/src/QueryPipeline/ReadProgressCallback.cpp b/src/QueryPipeline/ReadProgressCallback.cpp index bbdabb8e8d8..6692b0f96bd 100644 --- a/src/QueryPipeline/ReadProgressCallback.cpp +++ b/src/QueryPipeline/ReadProgressCallback.cpp @@ -2,6 +2,7 @@ #include #include + namespace ProfileEvents { extern const Event SelectedRows; @@ -17,7 +18,7 @@ namespace ErrorCodes extern const int TOO_MANY_BYTES; } -void ReadProgressCallback::setProcessListElement(QueryStatus * elem) +void ReadProgressCallback::setProcessListElement(QueryStatusPtr elem) { process_list_elem = elem; if (!elem) diff --git a/src/QueryPipeline/ReadProgressCallback.h b/src/QueryPipeline/ReadProgressCallback.h index f64123ef39d..c8f0d4cf537 100644 --- a/src/QueryPipeline/ReadProgressCallback.h +++ b/src/QueryPipeline/ReadProgressCallback.h @@ -4,20 +4,23 @@ #include #include + namespace DB { class QueryStatus; +using QueryStatusPtr = std::shared_ptr; class EnabledQuota; struct StorageLimits; using StorageLimitsList = std::list; + class ReadProgressCallback { public: void setQuota(const std::shared_ptr & quota_) { quota = quota_; } - void setProcessListElement(QueryStatus * elem); + void setProcessListElement(QueryStatusPtr elem); void setProgressCallback(const ProgressCallback & callback) { progress_callback = callback; } void addTotalRowsApprox(size_t value) { total_rows_approx += value; } @@ -30,7 +33,7 @@ public: private: std::shared_ptr quota; ProgressCallback progress_callback; - QueryStatus * process_list_elem = nullptr; + QueryStatusPtr process_list_elem; /// The approximate total number of rows to read. For progress bar. std::atomic_size_t total_rows_approx = 0; diff --git a/src/QueryPipeline/RemoteQueryExecutorReadContext.cpp b/src/QueryPipeline/RemoteQueryExecutorReadContext.cpp index 43bb5fc7083..4596bbb8961 100644 --- a/src/QueryPipeline/RemoteQueryExecutorReadContext.cpp +++ b/src/QueryPipeline/RemoteQueryExecutorReadContext.cpp @@ -126,12 +126,12 @@ bool RemoteQueryExecutorReadContext::checkTimeoutImpl(bool blocking) epoll_event events[3]; events[0].data.fd = events[1].data.fd = events[2].data.fd = -1; - int num_events = epoll.getManyReady(3, events, blocking); + size_t num_events = epoll.getManyReady(3, events, blocking); bool is_socket_ready = false; bool is_pipe_alarmed = false; - for (int i = 0; i < num_events; ++i) + for (size_t i = 0; i < num_events; ++i) { if (events[i].data.fd == connection_fd) is_socket_ready = true; diff --git a/src/Server/GRPCServer.cpp b/src/Server/GRPCServer.cpp index a2a2db75d68..a9373555af7 100644 --- a/src/Server/GRPCServer.cpp +++ b/src/Server/GRPCServer.cpp @@ -1082,7 +1082,8 @@ namespace NamesAndTypesList columns; for (size_t column_idx : collections::range(external_table.columns_size())) { - const auto & name_and_type = external_table.columns(column_idx); + /// TODO: consider changing protocol + const auto & name_and_type = external_table.columns(static_cast(column_idx)); NameAndTypePair column; column.name = name_and_type.name(); if (column.name.empty()) diff --git a/src/Server/HTTP/WriteBufferFromHTTPServerResponse.cpp b/src/Server/HTTP/WriteBufferFromHTTPServerResponse.cpp index c8ae9c6e07c..c8015cfd185 100644 --- a/src/Server/HTTP/WriteBufferFromHTTPServerResponse.cpp +++ b/src/Server/HTTP/WriteBufferFromHTTPServerResponse.cpp @@ -136,7 +136,7 @@ void WriteBufferFromHTTPServerResponse::nextImpl() WriteBufferFromHTTPServerResponse::WriteBufferFromHTTPServerResponse( HTTPServerResponse & response_, bool is_http_method_head_, - unsigned keep_alive_timeout_, + size_t keep_alive_timeout_, bool compress_, CompressionMethod compression_method_) : BufferWithOwnMemory(DBMS_DEFAULT_BUFFER_SIZE) diff --git a/src/Server/HTTP/WriteBufferFromHTTPServerResponse.h b/src/Server/HTTP/WriteBufferFromHTTPServerResponse.h index 6905d5df8b5..ce677616755 100644 --- a/src/Server/HTTP/WriteBufferFromHTTPServerResponse.h +++ b/src/Server/HTTP/WriteBufferFromHTTPServerResponse.h @@ -36,7 +36,7 @@ public: WriteBufferFromHTTPServerResponse( HTTPServerResponse & response_, bool is_http_method_head_, - unsigned keep_alive_timeout_, + size_t keep_alive_timeout_, bool compress_ = false, /// If true - set Content-Encoding header and compress the result. CompressionMethod compression_method_ = CompressionMethod::None); @@ -105,7 +105,7 @@ private: bool is_http_method_head; bool add_cors_header = false; - unsigned keep_alive_timeout = 0; + size_t keep_alive_timeout = 0; bool compress = false; CompressionMethod compression_method; int compression_level = 1; diff --git a/src/Server/HTTPHandler.cpp b/src/Server/HTTPHandler.cpp index d02da92c613..2b63524fb79 100644 --- a/src/Server/HTTPHandler.cpp +++ b/src/Server/HTTPHandler.cpp @@ -622,8 +622,10 @@ void HTTPHandler::processQuery( /// Request body can be compressed using algorithm specified in the Content-Encoding header. String http_request_compression_method_str = request.get("Content-Encoding", ""); + int zstd_window_log_max = static_cast(context->getSettingsRef().zstd_window_log_max); auto in_post = wrapReadBufferWithCompressionMethod( - wrapReadBufferReference(request.getStream()), chooseCompressionMethod({}, http_request_compression_method_str), context->getSettingsRef().zstd_window_log_max); + wrapReadBufferReference(request.getStream()), + chooseCompressionMethod({}, http_request_compression_method_str), zstd_window_log_max); /// The data can also be compressed using incompatible internal algorithm. This is indicated by /// 'decompress' query parameter. @@ -749,7 +751,7 @@ void HTTPHandler::processQuery( /// (using Accept-Encoding header) and 'enable_http_compression' setting is turned on. used_output.out->setCompression(client_supports_http_compression && settings.enable_http_compression); if (client_supports_http_compression) - used_output.out->setCompressionLevel(settings.http_zlib_compression_level); + used_output.out->setCompressionLevel(static_cast(settings.http_zlib_compression_level)); used_output.out->setSendProgress(settings.send_progress_in_http_headers); used_output.out->setSendProgressInterval(settings.http_headers_progress_interval_ms); diff --git a/src/Server/KeeperTCPHandler.cpp b/src/Server/KeeperTCPHandler.cpp index 94e3597f88e..38a10926036 100644 --- a/src/Server/KeeperTCPHandler.cpp +++ b/src/Server/KeeperTCPHandler.cpp @@ -126,7 +126,8 @@ struct SocketInterruptablePollWrapper do { Poco::Timestamp start; - rc = epoll_wait(epollfd, evout, 2, remaining_time.totalMilliseconds()); + /// TODO: use epoll_pwait() for more precise timers + rc = epoll_wait(epollfd, evout, 2, static_cast(remaining_time.totalMilliseconds())); if (rc < 0 && errno == EINTR) { Poco::Timestamp end; @@ -156,7 +157,7 @@ struct SocketInterruptablePollWrapper do { Poco::Timestamp start; - rc = ::poll(poll_buf, 2, remaining_time.totalMilliseconds()); + rc = ::poll(poll_buf, 2, static_cast(remaining_time.totalMilliseconds())); if (rc < 0 && errno == POCO_EINTR) { Poco::Timestamp end; @@ -325,6 +326,7 @@ void KeeperTCPHandler::runImpl() int32_t four_letter_cmd = header; if (!isHandShake(four_letter_cmd)) { + connected.store(true, std::memory_order_relaxed); tryExecuteFourLetterWordCmd(four_letter_cmd); return; } @@ -380,7 +382,7 @@ void KeeperTCPHandler::runImpl() response->zxid); UInt8 single_byte = 1; - [[maybe_unused]] int result = write(response_fd, &single_byte, sizeof(single_byte)); + [[maybe_unused]] ssize_t result = write(response_fd, &single_byte, sizeof(single_byte)); }; keeper_dispatcher->registerSession(session_id, response_callback); @@ -395,6 +397,7 @@ void KeeperTCPHandler::runImpl() }; session_stopwatch.start(); + connected.store(true, std::memory_order_release); bool close_received = false; try @@ -584,6 +587,9 @@ KeeperConnectionStats & KeeperTCPHandler::getConnectionStats() void KeeperTCPHandler::dumpStats(WriteBufferFromOwnString & buf, bool brief) { + if (!connected.load(std::memory_order_acquire)) + return; + auto & stats = getConnectionStats(); writeText(' ', buf); diff --git a/src/Server/KeeperTCPHandler.h b/src/Server/KeeperTCPHandler.h index e9bd211628f..ffdd50b805a 100644 --- a/src/Server/KeeperTCPHandler.h +++ b/src/Server/KeeperTCPHandler.h @@ -81,6 +81,8 @@ private: std::shared_ptr in; std::shared_ptr out; + std::atomic connected{false}; + void runImpl(); void sendHandshake(bool has_leader); diff --git a/src/Server/MySQLHandler.cpp b/src/Server/MySQLHandler.cpp index 8e701956d29..8e2d99e2909 100644 --- a/src/Server/MySQLHandler.cpp +++ b/src/Server/MySQLHandler.cpp @@ -63,8 +63,11 @@ static String showTableStatusReplacementQuery(const String & query); static String killConnectionIdReplacementQuery(const String & query); static String selectLimitReplacementQuery(const String & query); -MySQLHandler::MySQLHandler(IServer & server_, TCPServer & tcp_server_, const Poco::Net::StreamSocket & socket_, - bool ssl_enabled, size_t connection_id_) +MySQLHandler::MySQLHandler( + IServer & server_, + TCPServer & tcp_server_, + const Poco::Net::StreamSocket & socket_, + bool ssl_enabled, uint32_t connection_id_) : Poco::Net::TCPServerConnection(socket_) , server(server_) , tcp_server(tcp_server_) @@ -215,7 +218,7 @@ void MySQLHandler::finishHandshake(MySQLProtocol::ConnectionPhase::HandshakeResp auto read_bytes = [this, &buf, &pos, &packet_size](size_t count) -> void { while (pos < count) { - int ret = socket().receiveBytes(buf + pos, packet_size - pos); + int ret = socket().receiveBytes(buf + pos, static_cast(packet_size - pos)); if (ret == 0) { throw Exception("Cannot read all data. Bytes read: " + std::to_string(pos) + ". Bytes expected: 3", ErrorCodes::CANNOT_READ_ALL_DATA); @@ -376,7 +379,14 @@ void MySQLHandler::finishHandshakeSSL( } #if USE_SSL -MySQLHandlerSSL::MySQLHandlerSSL(IServer & server_, TCPServer & tcp_server_, const Poco::Net::StreamSocket & socket_, bool ssl_enabled, size_t connection_id_, RSA & public_key_, RSA & private_key_) +MySQLHandlerSSL::MySQLHandlerSSL( + IServer & server_, + TCPServer & tcp_server_, + const Poco::Net::StreamSocket & socket_, + bool ssl_enabled, + uint32_t connection_id_, + RSA & public_key_, + RSA & private_key_) : MySQLHandler(server_, tcp_server_, socket_, ssl_enabled, connection_id_) , public_key(public_key_) , private_key(private_key_) diff --git a/src/Server/MySQLHandler.h b/src/Server/MySQLHandler.h index 2f43d471c40..3366e8792c9 100644 --- a/src/Server/MySQLHandler.h +++ b/src/Server/MySQLHandler.h @@ -31,7 +31,12 @@ class TCPServer; class MySQLHandler : public Poco::Net::TCPServerConnection { public: - MySQLHandler(IServer & server_, TCPServer & tcp_server_, const Poco::Net::StreamSocket & socket_, bool ssl_enabled, size_t connection_id_); + MySQLHandler( + IServer & server_, + TCPServer & tcp_server_, + const Poco::Net::StreamSocket & socket_, + bool ssl_enabled, + uint32_t connection_id_); void run() final; @@ -57,7 +62,7 @@ protected: IServer & server; TCPServer & tcp_server; Poco::Logger * log; - UInt64 connection_id = 0; + uint32_t connection_id = 0; uint32_t server_capabilities = 0; uint32_t client_capabilities = 0; @@ -81,7 +86,14 @@ protected: class MySQLHandlerSSL : public MySQLHandler { public: - MySQLHandlerSSL(IServer & server_, TCPServer & tcp_server_, const Poco::Net::StreamSocket & socket_, bool ssl_enabled, size_t connection_id_, RSA & public_key_, RSA & private_key_); + MySQLHandlerSSL( + IServer & server_, + TCPServer & tcp_server_, + const Poco::Net::StreamSocket & socket_, + bool ssl_enabled, + uint32_t connection_id_, + RSA & public_key_, + RSA & private_key_); private: void authPluginSSL() override; diff --git a/src/Server/MySQLHandlerFactory.cpp b/src/Server/MySQLHandlerFactory.cpp index c02a3015945..cbcddbb444a 100644 --- a/src/Server/MySQLHandlerFactory.cpp +++ b/src/Server/MySQLHandlerFactory.cpp @@ -127,7 +127,7 @@ void MySQLHandlerFactory::generateRSAKeys() Poco::Net::TCPServerConnection * MySQLHandlerFactory::createConnection(const Poco::Net::StreamSocket & socket, TCPServer & tcp_server) { - size_t connection_id = last_connection_id++; + uint32_t connection_id = last_connection_id++; LOG_TRACE(log, "MySQL connection. Id: {}. Address: {}", connection_id, socket.peerAddress().toString()); #if USE_SSL return new MySQLHandlerSSL(server, tcp_server, socket, ssl_enabled, connection_id, *public_key, *private_key); diff --git a/src/Server/MySQLHandlerFactory.h b/src/Server/MySQLHandlerFactory.h index 38caae922ee..fa4ce93f765 100644 --- a/src/Server/MySQLHandlerFactory.h +++ b/src/Server/MySQLHandlerFactory.h @@ -36,7 +36,7 @@ private: bool ssl_enabled = false; #endif - std::atomic last_connection_id = 0; + std::atomic last_connection_id = 0; public: explicit MySQLHandlerFactory(IServer & server_); diff --git a/src/Server/ProxyV1Handler.cpp b/src/Server/ProxyV1Handler.cpp new file mode 100644 index 00000000000..838a1de1c04 --- /dev/null +++ b/src/Server/ProxyV1Handler.cpp @@ -0,0 +1,123 @@ +#include +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int NETWORK_ERROR; + extern const int SOCKET_TIMEOUT; + extern const int CANNOT_READ_FROM_SOCKET; + extern const int CANNOT_PARSE_INPUT_ASSERTION_FAILED; +} + +void ProxyV1Handler::run() +{ + const auto & settings = server.context()->getSettingsRef(); + socket().setReceiveTimeout(settings.receive_timeout); + + std::string word; + bool eol; + + // Read PROXYv1 protocol header + // http://www.haproxy.org/download/1.8/doc/proxy-protocol.txt + + // read "PROXY" + if (!readWord(5, word, eol) || word != "PROXY" || eol) + throw ParsingException("PROXY protocol violation", ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED); + + // read "TCP4" or "TCP6" or "UNKNOWN" + if (!readWord(7, word, eol)) + throw ParsingException("PROXY protocol violation", ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED); + + if (word != "TCP4" && word != "TCP6" && word != "UNKNOWN") + throw ParsingException("PROXY protocol violation", ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED); + + if (word == "UNKNOWN" && eol) + return; + + if (eol) + throw ParsingException("PROXY protocol violation", ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED); + + // read address + if (!readWord(39, word, eol) || eol) + throw ParsingException("PROXY protocol violation", ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED); + + stack_data.forwarded_for = std::move(word); + + // read address + if (!readWord(39, word, eol) || eol) + throw ParsingException("PROXY protocol violation", ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED); + + // read port + if (!readWord(5, word, eol) || eol) + throw ParsingException("PROXY protocol violation", ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED); + + // read port and "\r\n" + if (!readWord(5, word, eol) || !eol) + throw ParsingException("PROXY protocol violation", ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED); +} + +bool ProxyV1Handler::readWord(int max_len, std::string & word, bool & eol) +{ + word.clear(); + eol = false; + + char ch = 0; + int n = 0; + bool is_cr = false; + try + { + for (++max_len; max_len > 0 || is_cr; --max_len) + { + n = socket().receiveBytes(&ch, 1); + if (n == 0) + { + socket().shutdown(); + return false; + } + if (n < 0) + break; + + if (is_cr) + return ch == 0x0A; + + if (ch == 0x0D) + { + is_cr = true; + eol = true; + continue; + } + + if (ch == ' ') + return true; + + word.push_back(ch); + } + } + catch (const Poco::Net::NetException & e) + { + throw NetException(e.displayText() + ", while reading from socket (" + socket().peerAddress().toString() + ")", ErrorCodes::NETWORK_ERROR); + } + catch (const Poco::TimeoutException &) + { + throw NetException(fmt::format("Timeout exceeded while reading from socket ({}, {} ms)", + socket().peerAddress().toString(), + socket().getReceiveTimeout().totalMilliseconds()), ErrorCodes::SOCKET_TIMEOUT); + } + catch (const Poco::IOException & e) + { + throw NetException(e.displayText() + ", while reading from socket (" + socket().peerAddress().toString() + ")", ErrorCodes::NETWORK_ERROR); + } + + if (n < 0) + throw NetException("Cannot read from socket (" + socket().peerAddress().toString() + ")", ErrorCodes::CANNOT_READ_FROM_SOCKET); + + return false; +} + +} diff --git a/src/Server/ProxyV1Handler.h b/src/Server/ProxyV1Handler.h new file mode 100644 index 00000000000..062cc0e291a --- /dev/null +++ b/src/Server/ProxyV1Handler.h @@ -0,0 +1,29 @@ +#pragma once + +#include +#include +#include + + +namespace DB +{ + +class ProxyV1Handler : public Poco::Net::TCPServerConnection +{ + using StreamSocket = Poco::Net::StreamSocket; +public: + explicit ProxyV1Handler(const StreamSocket & socket, IServer & server_, const std::string & conf_name_, TCPProtocolStackData & stack_data_) + : Poco::Net::TCPServerConnection(socket), server(server_), conf_name(conf_name_), stack_data(stack_data_) {} + + void run() override; + +protected: + bool readWord(int max_len, std::string & word, bool & eol); + +private: + IServer & server; + std::string conf_name; + TCPProtocolStackData & stack_data; +}; + +} diff --git a/src/Server/ProxyV1HandlerFactory.h b/src/Server/ProxyV1HandlerFactory.h new file mode 100644 index 00000000000..028596d745d --- /dev/null +++ b/src/Server/ProxyV1HandlerFactory.h @@ -0,0 +1,56 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + +class ProxyV1HandlerFactory : public TCPServerConnectionFactory +{ +private: + IServer & server; + Poco::Logger * log; + std::string conf_name; + + class DummyTCPHandler : public Poco::Net::TCPServerConnection + { + public: + using Poco::Net::TCPServerConnection::TCPServerConnection; + void run() override {} + }; + +public: + explicit ProxyV1HandlerFactory(IServer & server_, const std::string & conf_name_) + : server(server_), log(&Poco::Logger::get("ProxyV1HandlerFactory")), conf_name(conf_name_) + { + } + + Poco::Net::TCPServerConnection * createConnection(const Poco::Net::StreamSocket & socket, TCPServer & tcp_server) override + { + TCPProtocolStackData stack_data; + return createConnection(socket, tcp_server, stack_data); + } + + Poco::Net::TCPServerConnection * createConnection(const Poco::Net::StreamSocket & socket, TCPServer &/* tcp_server*/, TCPProtocolStackData & stack_data) override + { + try + { + LOG_TRACE(log, "TCP Request. Address: {}", socket.peerAddress().toString()); + return new ProxyV1Handler(socket, server, conf_name, stack_data); + } + catch (const Poco::Net::NetException &) + { + LOG_TRACE(log, "TCP Request. Client is not connected (most likely RST packet was sent)."); + return new DummyTCPHandler(socket); + } + } +}; + +} diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index da9d7fb3d2c..73b91b29f31 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -109,6 +109,18 @@ TCPHandler::TCPHandler(IServer & server_, TCPServer & tcp_server_, const Poco::N { } +TCPHandler::TCPHandler(IServer & server_, TCPServer & tcp_server_, const Poco::Net::StreamSocket & socket_, TCPProtocolStackData & stack_data, std::string server_display_name_) +: Poco::Net::TCPServerConnection(socket_) + , server(server_) + , tcp_server(tcp_server_) + , log(&Poco::Logger::get("TCPHandler")) + , forwarded_for(stack_data.forwarded_for) + , certificate(stack_data.certificate) + , default_database(stack_data.default_database) + , server_display_name(std::move(server_display_name_)) +{ +} + TCPHandler::~TCPHandler() { try @@ -365,8 +377,8 @@ void TCPHandler::runImpl() after_send_progress.restart(); if (state.io.pipeline.pushing()) - /// FIXME: check explicitly that insert query suggests to receive data via native protocol, { + /// FIXME: check explicitly that insert query suggests to receive data via native protocol, state.need_receive_data_for_insert = true; processInsertQuery(); state.io.onFinish(); @@ -378,27 +390,30 @@ void TCPHandler::runImpl() } else if (state.io.pipeline.completed()) { - CompletedPipelineExecutor executor(state.io.pipeline); - /// Should not check for cancel in case of input. - if (!state.need_receive_data_for_input) { - auto callback = [this]() + CompletedPipelineExecutor executor(state.io.pipeline); + + /// Should not check for cancel in case of input. + if (!state.need_receive_data_for_input) { - std::lock_guard lock(fatal_error_mutex); + auto callback = [this]() + { + std::lock_guard lock(fatal_error_mutex); - if (isQueryCancelled()) - return true; + if (isQueryCancelled()) + return true; - sendProgress(); - sendSelectProfileEvents(); - sendLogs(); + sendProgress(); + sendSelectProfileEvents(); + sendLogs(); - return false; - }; + return false; + }; - executor.setCancelCallback(callback, interactive_delay / 1000); + executor.setCancelCallback(callback, interactive_delay / 1000); + } + executor.execute(); } - executor.execute(); state.io.onFinish(); /// Send final progress after calling onFinish(), since it will update the progress. @@ -829,7 +844,7 @@ void TCPHandler::processTablesStatusRequest() if (auto * replicated_table = dynamic_cast(table.get())) { status.is_replicated = true; - status.absolute_delay = replicated_table->getAbsoluteDelay(); + status.absolute_delay = static_cast(replicated_table->getAbsoluteDelay()); } else status.is_replicated = false; //-V1048 @@ -1060,7 +1075,7 @@ std::unique_ptr TCPHandler::makeSession() { auto interface = is_interserver_mode ? ClientInfo::Interface::TCP_INTERSERVER : ClientInfo::Interface::TCP; - auto res = std::make_unique(server.context(), interface, socket().secure()); + auto res = std::make_unique(server.context(), interface, socket().secure(), certificate); auto & client_info = res->getClientInfo(); client_info.forwarded_for = forwarded_for; @@ -1087,6 +1102,7 @@ void TCPHandler::receiveHello() UInt64 packet_type = 0; String user; String password; + String default_db; readVarUInt(packet_type, *in); if (packet_type != Protocol::Client::Hello) @@ -1108,7 +1124,9 @@ void TCPHandler::receiveHello() readVarUInt(client_version_minor, *in); // NOTE For backward compatibility of the protocol, client cannot send its version_patch. readVarUInt(client_tcp_protocol_version, *in); - readStringBinary(default_database, *in); + readStringBinary(default_db, *in); + if (!default_db.empty()) + default_database = default_db; readStringBinary(user, *in); readStringBinary(password, *in); diff --git a/src/Server/TCPHandler.h b/src/Server/TCPHandler.h index ea5fb2f9fe0..9c8d3ca60f3 100644 --- a/src/Server/TCPHandler.h +++ b/src/Server/TCPHandler.h @@ -22,6 +22,7 @@ #include #include "IServer.h" +#include "Server/TCPProtocolStackData.h" #include "base/types.h" @@ -137,6 +138,7 @@ public: * Proxy-forwarded (original client) IP address is used for quota accounting if quota is keyed by forwarded IP. */ TCPHandler(IServer & server_, TCPServer & tcp_server_, const Poco::Net::StreamSocket & socket_, bool parse_proxy_protocol_, std::string server_display_name_); + TCPHandler(IServer & server_, TCPServer & tcp_server_, const Poco::Net::StreamSocket & socket_, TCPProtocolStackData & stack_data, std::string server_display_name_); ~TCPHandler() override; void run() override; @@ -151,12 +153,13 @@ private: Poco::Logger * log; String forwarded_for; + String certificate; String client_name; UInt64 client_version_major = 0; UInt64 client_version_minor = 0; UInt64 client_version_patch = 0; - UInt64 client_tcp_protocol_version = 0; + UInt32 client_tcp_protocol_version = 0; String quota_key; /// Connection settings, which are extracted from a context. diff --git a/src/Server/TCPHandlerFactory.h b/src/Server/TCPHandlerFactory.h index 354c886f4c0..fde04c6e0ab 100644 --- a/src/Server/TCPHandlerFactory.h +++ b/src/Server/TCPHandlerFactory.h @@ -3,6 +3,7 @@ #include #include #include +#include "Server/TCPProtocolStackData.h" #include #include #include @@ -53,6 +54,21 @@ public: return new DummyTCPHandler(socket); } } + + Poco::Net::TCPServerConnection * createConnection(const Poco::Net::StreamSocket & socket, TCPServer & tcp_server, TCPProtocolStackData & stack_data) override + { + try + { + LOG_TRACE(log, "TCP Request. Address: {}", socket.peerAddress().toString()); + + return new TCPHandler(server, tcp_server, socket, stack_data, server_display_name); + } + catch (const Poco::Net::NetException &) + { + LOG_TRACE(log, "TCP Request. Client is not connected (most likely RST packet was sent)."); + return new DummyTCPHandler(socket); + } + } }; } diff --git a/src/Server/TCPProtocolStackData.h b/src/Server/TCPProtocolStackData.h new file mode 100644 index 00000000000..4ad401e723f --- /dev/null +++ b/src/Server/TCPProtocolStackData.h @@ -0,0 +1,22 @@ +#pragma once + +#include +#include + +namespace DB +{ + +// Data to communicate between protocol layers +struct TCPProtocolStackData +{ + // socket implementation can be replaced by some layer - TLS as an example + Poco::Net::StreamSocket socket; + // host from PROXY layer + std::string forwarded_for; + // certificate path from TLS layer to TCP layer + std::string certificate; + // default database from endpoint configuration to TCP layer + std::string default_database; +}; + +} diff --git a/src/Server/TCPProtocolStackFactory.h b/src/Server/TCPProtocolStackFactory.h new file mode 100644 index 00000000000..16b57649a72 --- /dev/null +++ b/src/Server/TCPProtocolStackFactory.h @@ -0,0 +1,92 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int UNKNOWN_ADDRESS_PATTERN_TYPE; + extern const int IP_ADDRESS_NOT_ALLOWED; +} + + +class TCPProtocolStackFactory : public TCPServerConnectionFactory +{ +private: + IServer & server [[maybe_unused]]; + Poco::Logger * log; + std::string conf_name; + std::vector stack; + AllowedClientHosts allowed_client_hosts; + + class DummyTCPHandler : public Poco::Net::TCPServerConnection + { + public: + using Poco::Net::TCPServerConnection::TCPServerConnection; + void run() override {} + }; + +public: + template + explicit TCPProtocolStackFactory(IServer & server_, const std::string & conf_name_, T... factory) + : server(server_), log(&Poco::Logger::get("TCPProtocolStackFactory")), conf_name(conf_name_), stack({factory...}) + { + const auto & config = server.config(); + /// Fill list of allowed hosts. + const auto networks_config = conf_name + ".networks"; + if (config.has(networks_config)) + { + Poco::Util::AbstractConfiguration::Keys keys; + config.keys(networks_config, keys); + for (const String & key : keys) + { + String value = config.getString(networks_config + "." + key); + if (key.starts_with("ip")) + allowed_client_hosts.addSubnet(value); + else if (key.starts_with("host_regexp")) + allowed_client_hosts.addNameRegexp(value); + else if (key.starts_with("host")) + allowed_client_hosts.addName(value); + else + throw Exception("Unknown address pattern type: " + key, ErrorCodes::UNKNOWN_ADDRESS_PATTERN_TYPE); + } + } + } + + Poco::Net::TCPServerConnection * createConnection(const Poco::Net::StreamSocket & socket, TCPServer & tcp_server) override + { + if (!allowed_client_hosts.empty() && !allowed_client_hosts.contains(socket.peerAddress().host())) + throw Exception("Connections from " + socket.peerAddress().toString() + " are not allowed", ErrorCodes::IP_ADDRESS_NOT_ALLOWED); + + try + { + LOG_TRACE(log, "TCP Request. Address: {}", socket.peerAddress().toString()); + return new TCPProtocolStackHandler(server, tcp_server, socket, stack, conf_name); + } + catch (const Poco::Net::NetException &) + { + LOG_TRACE(log, "TCP Request. Client is not connected (most likely RST packet was sent)."); + return new DummyTCPHandler(socket); + } + } + + void append(TCPServerConnectionFactory::Ptr factory) + { + stack.push_back(std::move(factory)); + } + + size_t size() { return stack.size(); } + bool empty() { return stack.empty(); } +}; + + +} diff --git a/src/Server/TCPProtocolStackHandler.h b/src/Server/TCPProtocolStackHandler.h new file mode 100644 index 00000000000..e16a6b6b2ca --- /dev/null +++ b/src/Server/TCPProtocolStackHandler.h @@ -0,0 +1,46 @@ +#pragma once + +#include +#include +#include +#include +#include + + +namespace DB +{ + + +class TCPProtocolStackHandler : public Poco::Net::TCPServerConnection +{ + using StreamSocket = Poco::Net::StreamSocket; + using TCPServerConnection = Poco::Net::TCPServerConnection; +private: + IServer & server; + TCPServer & tcp_server; + std::vector stack; + std::string conf_name; + +public: + TCPProtocolStackHandler(IServer & server_, TCPServer & tcp_server_, const StreamSocket & socket, const std::vector & stack_, const std::string & conf_name_) + : TCPServerConnection(socket), server(server_), tcp_server(tcp_server_), stack(stack_), conf_name(conf_name_) + {} + + void run() override + { + const auto & conf = server.config(); + TCPProtocolStackData stack_data; + stack_data.socket = socket(); + stack_data.default_database = conf.getString(conf_name + ".default_database", ""); + for (auto & factory : stack) + { + std::unique_ptr connection(factory->createConnection(socket(), tcp_server, stack_data)); + connection->run(); + if (stack_data.socket != socket()) + socket() = stack_data.socket; + } + } +}; + + +} diff --git a/src/Server/TCPServerConnectionFactory.h b/src/Server/TCPServerConnectionFactory.h index 613f98352bd..18b30557b00 100644 --- a/src/Server/TCPServerConnectionFactory.h +++ b/src/Server/TCPServerConnectionFactory.h @@ -1,6 +1,7 @@ #pragma once #include +#include namespace Poco { @@ -23,5 +24,9 @@ public: /// Same as Poco::Net::TCPServerConnectionFactory except we can pass the TCPServer virtual Poco::Net::TCPServerConnection * createConnection(const Poco::Net::StreamSocket & socket, TCPServer & tcp_server) = 0; + virtual Poco::Net::TCPServerConnection * createConnection(const Poco::Net::StreamSocket & socket, TCPServer & tcp_server, TCPProtocolStackData &/* stack_data */) + { + return createConnection(socket, tcp_server); + } }; } diff --git a/src/Server/TLSHandler.h b/src/Server/TLSHandler.h new file mode 100644 index 00000000000..5b7377515c1 --- /dev/null +++ b/src/Server/TLSHandler.h @@ -0,0 +1,59 @@ +#pragma once + +#include +#include +#include +#include + +#if USE_SSL +# include +# include +# include +#endif + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int SUPPORT_IS_DISABLED; +} + +class TLSHandler : public Poco::Net::TCPServerConnection +{ +#if USE_SSL + using SecureStreamSocket = Poco::Net::SecureStreamSocket; + using SSLManager = Poco::Net::SSLManager; + using Context = Poco::Net::Context; +#endif + using StreamSocket = Poco::Net::StreamSocket; +public: + explicit TLSHandler(const StreamSocket & socket, const std::string & key_, const std::string & certificate_, TCPProtocolStackData & stack_data_) + : Poco::Net::TCPServerConnection(socket) + , key(key_) + , certificate(certificate_) + , stack_data(stack_data_) + {} + + void run() override + { +#if USE_SSL + auto ctx = SSLManager::instance().defaultServerContext(); + if (!key.empty() && !certificate.empty()) + ctx = new Context(Context::Usage::SERVER_USE, key, certificate, ctx->getCAPaths().caLocation); + socket() = SecureStreamSocket::attach(socket(), ctx); + stack_data.socket = socket(); + stack_data.certificate = certificate; +#else + throw Exception{"SSL support for TCP protocol is disabled because Poco library was built without NetSSL support.", + ErrorCodes::SUPPORT_IS_DISABLED}; +#endif + } +private: + std::string key [[maybe_unused]]; + std::string certificate [[maybe_unused]]; + TCPProtocolStackData & stack_data [[maybe_unused]]; +}; + + +} diff --git a/src/Server/TLSHandlerFactory.h b/src/Server/TLSHandlerFactory.h new file mode 100644 index 00000000000..9e3002d2971 --- /dev/null +++ b/src/Server/TLSHandlerFactory.h @@ -0,0 +1,64 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + + +class TLSHandlerFactory : public TCPServerConnectionFactory +{ +private: + IServer & server; + Poco::Logger * log; + std::string conf_name; + + class DummyTCPHandler : public Poco::Net::TCPServerConnection + { + public: + using Poco::Net::TCPServerConnection::TCPServerConnection; + void run() override {} + }; + +public: + explicit TLSHandlerFactory(IServer & server_, const std::string & conf_name_) + : server(server_), log(&Poco::Logger::get("TLSHandlerFactory")), conf_name(conf_name_) + { + } + + Poco::Net::TCPServerConnection * createConnection(const Poco::Net::StreamSocket & socket, TCPServer & tcp_server) override + { + TCPProtocolStackData stack_data; + return createConnection(socket, tcp_server, stack_data); + } + + Poco::Net::TCPServerConnection * createConnection(const Poco::Net::StreamSocket & socket, TCPServer &/* tcp_server*/, TCPProtocolStackData & stack_data) override + { + try + { + LOG_TRACE(log, "TCP Request. Address: {}", socket.peerAddress().toString()); + return new TLSHandler( + socket, + server.config().getString(conf_name + ".privateKeyFile", ""), + server.config().getString(conf_name + ".certificateFile", ""), + stack_data); + } + catch (const Poco::Net::NetException &) + { + LOG_TRACE(log, "TCP Request. Client is not connected (most likely RST packet was sent)."); + return new DummyTCPHandler(socket); + } + } +}; + + +} diff --git a/src/Storages/AlterCommands.cpp b/src/Storages/AlterCommands.cpp index dcd7abae68a..c1e7cefd19e 100644 --- a/src/Storages/AlterCommands.cpp +++ b/src/Storages/AlterCommands.cpp @@ -755,9 +755,10 @@ bool isMetadataOnlyConversion(const IDataType * from, const IDataType * to) const auto * nullable_from = typeid_cast(from); const auto * nullable_to = typeid_cast(to); - if (nullable_from && nullable_to) + if (nullable_to) { - from = nullable_from->getNestedType().get(); + /// Here we allow a conversion X -> Nullable(X) to make a metadata-only conversion. + from = nullable_from ? nullable_from->getNestedType().get() : from; to = nullable_to->getNestedType().get(); continue; } diff --git a/src/Storages/Distributed/DirectoryMonitor.cpp b/src/Storages/Distributed/DirectoryMonitor.cpp index 9dc3d773e01..4b9667aa95d 100644 --- a/src/Storages/Distributed/DirectoryMonitor.cpp +++ b/src/Storages/Distributed/DirectoryMonitor.cpp @@ -106,7 +106,7 @@ namespace for (size_t replica_index = 1; replica_index <= replicas; ++replica_index) { - address.replica_index = replica_index; + address.replica_index = static_cast(replica_index); make_connection(address); } } diff --git a/src/Storages/ExternalDataSourceConfiguration.h b/src/Storages/ExternalDataSourceConfiguration.h index 0890247eb45..5736336983a 100644 --- a/src/Storages/ExternalDataSourceConfiguration.h +++ b/src/Storages/ExternalDataSourceConfiguration.h @@ -117,7 +117,7 @@ struct URLBasedDataSourceConfiguration struct StorageS3Configuration : URLBasedDataSourceConfiguration { - S3Settings::AuthSettings auth_settings; + S3::AuthSettings auth_settings; S3Settings::ReadWriteSettings rw_settings; }; diff --git a/src/Storages/FileLog/DirectoryWatcherBase.cpp b/src/Storages/FileLog/DirectoryWatcherBase.cpp index 005e1e5fd1b..efcd70d6742 100644 --- a/src/Storages/FileLog/DirectoryWatcherBase.cpp +++ b/src/Storages/FileLog/DirectoryWatcherBase.cpp @@ -70,10 +70,10 @@ void DirectoryWatcherBase::watchFunc() while (!stopped) { const auto & settings = owner.storage.getFileLogSettings(); - if (poll(&pfd, 1, milliseconds_to_wait) > 0 && pfd.revents & POLLIN) + if (poll(&pfd, 1, static_cast(milliseconds_to_wait)) > 0 && pfd.revents & POLLIN) { milliseconds_to_wait = settings->poll_directory_watch_events_backoff_init.totalMilliseconds(); - int n = read(fd, buffer.data(), buffer.size()); + ssize_t n = read(fd, buffer.data(), buffer.size()); int i = 0; if (n > 0) { diff --git a/src/Storages/FileLog/StorageFileLog.cpp b/src/Storages/FileLog/StorageFileLog.cpp index 7848b75deec..722843a7ab6 100644 --- a/src/Storages/FileLog/StorageFileLog.cpp +++ b/src/Storages/FileLog/StorageFileLog.cpp @@ -315,7 +315,7 @@ Pipe StorageFileLog::read( ContextPtr local_context, QueryProcessingStage::Enum /* processed_stage */, size_t /* max_block_size */, - unsigned /* num_streams */) + size_t /* num_streams */) { /// If there are MVs depended on this table, we just forbid reading if (!local_context->getSettingsRef().stream_like_engine_allow_direct_select) diff --git a/src/Storages/FileLog/StorageFileLog.h b/src/Storages/FileLog/StorageFileLog.h index 4295a8a764a..56f2d40ef5a 100644 --- a/src/Storages/FileLog/StorageFileLog.h +++ b/src/Storages/FileLog/StorageFileLog.h @@ -54,7 +54,7 @@ public: ContextPtr context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, - unsigned num_streams) override; + size_t num_streams) override; void drop() override; diff --git a/src/Storages/Freeze.cpp b/src/Storages/Freeze.cpp index a2f0395b001..74adf3de0ae 100644 --- a/src/Storages/Freeze.cpp +++ b/src/Storages/Freeze.cpp @@ -194,7 +194,7 @@ bool Unfreezer::removeFreezedPart(DiskPtr disk, const String & path, const Strin if (meta.load(disk, path)) { FreezeMetaData::clean(disk, path); - return StorageReplicatedMergeTree::removeSharedDetachedPart(disk, path, part_name, meta.table_shared_id, meta.zookeeper_name, meta.replica_name, "", local_context, zookeeper); + return StorageReplicatedMergeTree::removeSharedDetachedPart(disk, path, part_name, meta.table_shared_id, meta.replica_name, "", local_context, zookeeper); } } diff --git a/src/Storages/HDFS/AsynchronousReadBufferFromHDFS.h b/src/Storages/HDFS/AsynchronousReadBufferFromHDFS.h index dd77fc70358..3726d3aae96 100644 --- a/src/Storages/HDFS/AsynchronousReadBufferFromHDFS.h +++ b/src/Storages/HDFS/AsynchronousReadBufferFromHDFS.h @@ -52,7 +52,7 @@ private: std::future asyncReadInto(char * data, size_t size); IAsynchronousReader & reader; - Int32 priority; + size_t priority; std::shared_ptr impl; std::future prefetch_future; Memory<> prefetch_buffer; diff --git a/src/Storages/HDFS/ReadBufferFromHDFS.cpp b/src/Storages/HDFS/ReadBufferFromHDFS.cpp index 4aebcd6f6ab..3f5c81dc01b 100644 --- a/src/Storages/HDFS/ReadBufferFromHDFS.cpp +++ b/src/Storages/HDFS/ReadBufferFromHDFS.cpp @@ -3,6 +3,7 @@ #if USE_HDFS #include #include +#include #include #include @@ -90,7 +91,7 @@ struct ReadBufferFromHDFS::ReadBufferFromHDFSImpl : public BufferWithOwnMemory(num_bytes_to_read)); if (bytes_read < 0) throw Exception(ErrorCodes::NETWORK_ERROR, "Fail to read from HDFS: {}, file path: {}. Error: {}", diff --git a/src/Storages/HDFS/StorageHDFS.cpp b/src/Storages/HDFS/StorageHDFS.cpp index 2170b4142e8..bbabd523c45 100644 --- a/src/Storages/HDFS/StorageHDFS.cpp +++ b/src/Storages/HDFS/StorageHDFS.cpp @@ -120,8 +120,15 @@ namespace std::pair getPathFromUriAndUriWithoutPath(const String & uri) { - const size_t begin_of_path = uri.find('/', uri.find("//") + 2); - return {uri.substr(begin_of_path), uri.substr(0, begin_of_path)}; + auto pos = uri.find("//"); + if (pos != std::string::npos && pos + 2 < uri.length()) + { + pos = uri.find('/', pos + 2); + if (pos != std::string::npos) + return {uri.substr(pos), uri.substr(0, pos)}; + } + + throw Exception("Storage HDFS requires valid URL to be set", ErrorCodes::BAD_ARGUMENTS); } std::vector getPathsList(const String & path_from_uri, const String & uri_without_path, ContextPtr context, std::unordered_map * last_mod_times = nullptr) @@ -207,8 +214,8 @@ ColumnsDescription StorageHDFS::getTableStructureFromData( return nullptr; auto compression = chooseCompressionMethod(*it, compression_method); auto impl = std::make_unique(uri_without_path, *it++, ctx->getGlobalContext()->getConfigRef(), ctx->getReadSettings()); - const auto zstd_window_log_max = ctx->getSettingsRef().zstd_window_log_max; - return wrapReadBufferWithCompressionMethod(std::move(impl), compression, zstd_window_log_max); + const Int64 zstd_window_log_max = ctx->getSettingsRef().zstd_window_log_max; + return wrapReadBufferWithCompressionMethod(std::move(impl), compression, static_cast(zstd_window_log_max)); }; ColumnsDescription columns; @@ -349,8 +356,8 @@ bool HDFSSource::initialize() auto compression = chooseCompressionMethod(path_from_uri, storage->compression_method); auto impl = std::make_unique( uri_without_path, path_from_uri, getContext()->getGlobalContext()->getConfigRef(), getContext()->getReadSettings()); - const auto zstd_window_log_max = getContext()->getSettingsRef().zstd_window_log_max; - read_buf = wrapReadBufferWithCompressionMethod(std::move(impl), compression, zstd_window_log_max); + const Int64 zstd_window_log_max = getContext()->getSettingsRef().zstd_window_log_max; + read_buf = wrapReadBufferWithCompressionMethod(std::move(impl), compression, static_cast(zstd_window_log_max)); auto input_format = getContext()->getInputFormat(storage->format_name, *read_buf, block_for_format, max_block_size); @@ -543,7 +550,7 @@ Pipe StorageHDFS::read( ContextPtr context_, QueryProcessingStage::Enum /*processed_stage*/, size_t max_block_size, - unsigned num_streams) + size_t num_streams) { std::shared_ptr iterator_wrapper{nullptr}; if (distributed_processing) diff --git a/src/Storages/HDFS/StorageHDFS.h b/src/Storages/HDFS/StorageHDFS.h index 90a42d0c692..b641f5bfb43 100644 --- a/src/Storages/HDFS/StorageHDFS.h +++ b/src/Storages/HDFS/StorageHDFS.h @@ -40,7 +40,7 @@ public: ContextPtr context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, - unsigned num_streams) override; + size_t num_streams) override; SinkToStoragePtr write(const ASTPtr & query, const StorageMetadataPtr & metadata_snapshot, ContextPtr context) override; diff --git a/src/Storages/HDFS/StorageHDFSCluster.cpp b/src/Storages/HDFS/StorageHDFSCluster.cpp index 467203c58f6..5f9d5ea3d6d 100644 --- a/src/Storages/HDFS/StorageHDFSCluster.cpp +++ b/src/Storages/HDFS/StorageHDFSCluster.cpp @@ -11,7 +11,6 @@ #include #include #include -#include #include #include #include @@ -25,6 +24,8 @@ #include #include #include +#include +#include #include @@ -56,6 +57,7 @@ StorageHDFSCluster::StorageHDFSCluster( { auto columns = StorageHDFS::getTableStructureFromData(format_name, uri_, compression_method, context_); storage_metadata.setColumns(columns); + add_columns_structure_to_query = true; } else storage_metadata.setColumns(columns_); @@ -72,7 +74,7 @@ Pipe StorageHDFSCluster::read( ContextPtr context, QueryProcessingStage::Enum processed_stage, size_t /*max_block_size*/, - unsigned /*num_streams*/) + size_t /*num_streams*/) { auto cluster = context->getCluster(cluster_name)->getClusterWithReplicasAsShards(context->getSettingsRef()); @@ -92,6 +94,11 @@ Pipe StorageHDFSCluster::read( const bool add_agg_info = processed_stage == QueryProcessingStage::WithMergeableState; + auto query_to_send = query_info.original_query->clone(); + if (add_columns_structure_to_query) + addColumnsStructureToQueryWithClusterEngine( + query_to_send, StorageDictionary::generateNamesAndTypesDescription(storage_snapshot->metadata->getColumns().getAll()), 3, getName()); + for (const auto & replicas : cluster->getShardsAddresses()) { /// There will be only one replica, because we consider each replica as a shard @@ -110,7 +117,7 @@ Pipe StorageHDFSCluster::read( /// So, task_identifier is passed as constructor argument. It is more obvious. auto remote_query_executor = std::make_shared( connection, - queryToString(query_info.original_query), + queryToString(query_to_send), header, context, /*throttler=*/nullptr, diff --git a/src/Storages/HDFS/StorageHDFSCluster.h b/src/Storages/HDFS/StorageHDFSCluster.h index 3239a1e4076..adcc3f5db6e 100644 --- a/src/Storages/HDFS/StorageHDFSCluster.h +++ b/src/Storages/HDFS/StorageHDFSCluster.h @@ -32,7 +32,7 @@ public: std::string getName() const override { return "HDFSCluster"; } Pipe read(const Names &, const StorageSnapshotPtr &, SelectQueryInfo &, - ContextPtr, QueryProcessingStage::Enum, size_t /*max_block_size*/, unsigned /*num_streams*/) override; + ContextPtr, QueryProcessingStage::Enum, size_t /*max_block_size*/, size_t /*num_streams*/) override; QueryProcessingStage::Enum getQueryProcessingStage(ContextPtr, QueryProcessingStage::Enum, const StorageSnapshotPtr &, SelectQueryInfo &) const override; @@ -44,6 +44,7 @@ private: String uri; String format_name; String compression_method; + bool add_columns_structure_to_query = false; }; diff --git a/src/Storages/HDFS/WriteBufferFromHDFS.cpp b/src/Storages/HDFS/WriteBufferFromHDFS.cpp index a179f484652..1f952ec2bd9 100644 --- a/src/Storages/HDFS/WriteBufferFromHDFS.cpp +++ b/src/Storages/HDFS/WriteBufferFromHDFS.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include namespace DB @@ -57,7 +58,7 @@ struct WriteBufferFromHDFS::WriteBufferFromHDFSImpl int write(const char * start, size_t size) const { - int bytes_written = hdfsWrite(fs.get(), fout, start, size); + int bytes_written = hdfsWrite(fs.get(), fout, start, safe_cast(size)); if (write_settings.remote_throttler) write_settings.remote_throttler->add(bytes_written); diff --git a/src/Storages/Hive/HiveFile.cpp b/src/Storages/Hive/HiveFile.cpp index fc08c046f93..8f5b1b5f5fd 100644 --- a/src/Storages/Hive/HiveFile.cpp +++ b/src/Storages/Hive/HiveFile.cpp @@ -210,7 +210,7 @@ std::unique_ptr HiveORCFile::buildMinMaxIndex(c { size_t pos = it->second; /// Attention: column statistics start from 1. 0 has special purpose. - const orc::ColumnStatistics * col_stats = statistics->getColumnStatistics(pos + 1); + const orc::ColumnStatistics * col_stats = statistics->getColumnStatistics(static_cast(pos + 1)); idx->hyperrectangle[i] = buildRange(col_stats); } ++i; @@ -297,7 +297,7 @@ void HiveParquetFile::loadSplitMinMaxIndexesImpl() const auto * schema = meta->schema(); for (size_t pos = 0; pos < num_cols; ++pos) { - String column{schema->Column(pos)->name()}; + String column{schema->Column(static_cast(pos))->name()}; boost::to_lower(column); parquet_column_positions[column] = pos; } @@ -306,7 +306,7 @@ void HiveParquetFile::loadSplitMinMaxIndexesImpl() split_minmax_idxes.resize(num_row_groups); for (size_t i = 0; i < num_row_groups; ++i) { - auto row_group_meta = meta->RowGroup(i); + auto row_group_meta = meta->RowGroup(static_cast(i)); split_minmax_idxes[i] = std::make_shared(); split_minmax_idxes[i]->hyperrectangle.resize(num_cols); @@ -321,7 +321,7 @@ void HiveParquetFile::loadSplitMinMaxIndexesImpl() continue; size_t pos = mit->second; - auto col_chunk = row_group_meta->ColumnChunk(pos); + auto col_chunk = row_group_meta->ColumnChunk(static_cast(pos)); if (!col_chunk->is_stats_set()) continue; diff --git a/src/Storages/Hive/StorageHive.cpp b/src/Storages/Hive/StorageHive.cpp index 01ee5a8c3c5..47d7382f7ca 100644 --- a/src/Storages/Hive/StorageHive.cpp +++ b/src/Storages/Hive/StorageHive.cpp @@ -727,7 +727,7 @@ HiveFilePtr StorageHive::getHiveFileIfNeeded( hive_file->getPath(), hive_file->describeMinMaxIndex(sub_minmax_idxes[i])); - skip_splits.insert(i); + skip_splits.insert(static_cast(i)); } } hive_file->setSkipSplits(skip_splits); @@ -749,7 +749,7 @@ Pipe StorageHive::read( ContextPtr context_, QueryProcessingStage::Enum /* processed_stage */, size_t max_block_size, - unsigned num_streams) + size_t num_streams) { lazyInitialize(); @@ -829,7 +829,7 @@ Pipe StorageHive::read( } HiveFiles StorageHive::collectHiveFiles( - unsigned max_threads, + size_t max_threads, const SelectQueryInfo & query_info, const HiveTableMetadataPtr & hive_table_metadata, const HDFSFSPtr & fs, @@ -937,7 +937,13 @@ StorageHive::totalRowsImpl(const Settings & settings, const SelectQueryInfo & qu auto hive_table_metadata = hive_metastore_client->getTableMetadata(hive_database, hive_table); HDFSBuilderWrapper builder = createHDFSBuilder(hdfs_namenode_url, getContext()->getGlobalContext()->getConfigRef()); HDFSFSPtr fs = createHDFSFS(builder.get()); - HiveFiles hive_files = collectHiveFiles(settings.max_threads, query_info, hive_table_metadata, fs, context_, prune_level); + HiveFiles hive_files = collectHiveFiles( + settings.max_threads, + query_info, + hive_table_metadata, + fs, + context_, + prune_level); UInt64 total_rows = 0; for (const auto & hive_file : hive_files) diff --git a/src/Storages/Hive/StorageHive.h b/src/Storages/Hive/StorageHive.h index 9c02d228f97..363042621c7 100644 --- a/src/Storages/Hive/StorageHive.h +++ b/src/Storages/Hive/StorageHive.h @@ -60,7 +60,7 @@ public: ContextPtr context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, - unsigned num_streams) override; + size_t num_streams) override; SinkToStoragePtr write(const ASTPtr & /*query*/, const StorageMetadataPtr & metadata_snapshot, ContextPtr /*context*/) override; @@ -98,7 +98,7 @@ private: void initMinMaxIndexExpression(); HiveFiles collectHiveFiles( - unsigned max_threads, + size_t max_threads, const SelectQueryInfo & query_info, const HiveTableMetadataPtr & hive_table_metadata, const HDFSFSPtr & fs, diff --git a/src/Storages/IStorage.cpp b/src/Storages/IStorage.cpp index 5b12b720f1c..7a704a17f4d 100644 --- a/src/Storages/IStorage.cpp +++ b/src/Storages/IStorage.cpp @@ -108,7 +108,7 @@ Pipe IStorage::watch( ContextPtr /*context*/, QueryProcessingStage::Enum & /*processed_stage*/, size_t /*max_block_size*/, - unsigned /*num_streams*/) + size_t /*num_streams*/) { throw Exception("Method watch is not supported by storage " + getName(), ErrorCodes::NOT_IMPLEMENTED); } @@ -120,7 +120,7 @@ Pipe IStorage::read( ContextPtr /*context*/, QueryProcessingStage::Enum /*processed_stage*/, size_t /*max_block_size*/, - unsigned /*num_streams*/) + size_t /*num_streams*/) { throw Exception("Method read is not supported by storage " + getName(), ErrorCodes::NOT_IMPLEMENTED); } @@ -133,7 +133,7 @@ void IStorage::read( ContextPtr context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, - unsigned num_streams) + size_t num_streams) { auto pipe = read(column_names, storage_snapshot, query_info, context, processed_stage, max_block_size, num_streams); readFromPipe(query_plan, std::move(pipe), column_names, storage_snapshot, query_info, context, getName()); diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h index 242f17d6f20..fd48d22b12b 100644 --- a/src/Storages/IStorage.h +++ b/src/Storages/IStorage.h @@ -323,7 +323,7 @@ public: ContextPtr /*context*/, QueryProcessingStage::Enum & /*processed_stage*/, size_t /*max_block_size*/, - unsigned /*num_streams*/); + size_t /*num_streams*/); /// Returns true if FINAL modifier must be added to SELECT query depending on required columns. /// It's needed for ReplacingMergeTree wrappers such as MaterializedMySQL and MaterializedPostrgeSQL @@ -357,7 +357,7 @@ private: ContextPtr /*context*/, QueryProcessingStage::Enum /*processed_stage*/, size_t /*max_block_size*/, - unsigned /*num_streams*/); + size_t /*num_streams*/); public: /// Other version of read which adds reading step to query plan. @@ -370,7 +370,7 @@ public: ContextPtr /*context*/, QueryProcessingStage::Enum /*processed_stage*/, size_t /*max_block_size*/, - unsigned /*num_streams*/); + size_t /*num_streams*/); /** Writes the data to a table. * Receives a description of the query, which can contain information about the data write method. diff --git a/src/Storages/Kafka/StorageKafka.cpp b/src/Storages/Kafka/StorageKafka.cpp index fa52850fb39..8e4dd78379e 100644 --- a/src/Storages/Kafka/StorageKafka.cpp +++ b/src/Storages/Kafka/StorageKafka.cpp @@ -212,7 +212,7 @@ StorageKafka::StorageKafka( , schema_name(getContext()->getMacros()->expand(kafka_settings->kafka_schema.value)) , num_consumers(kafka_settings->kafka_num_consumers.value) , log(&Poco::Logger::get("StorageKafka (" + table_id_.table_name + ")")) - , semaphore(0, num_consumers) + , semaphore(0, static_cast(num_consumers)) , intermediate_commit(kafka_settings->kafka_commit_every_batch.value) , settings_adjustments(createSettingsAdjustments()) , thread_per_consumer(kafka_settings->kafka_thread_per_consumer.value) @@ -291,7 +291,7 @@ Pipe StorageKafka::read( ContextPtr local_context, QueryProcessingStage::Enum /* processed_stage */, size_t /* max_block_size */, - unsigned /* num_streams */) + size_t /* num_streams */) { if (num_created_consumers == 0) return {}; diff --git a/src/Storages/Kafka/StorageKafka.h b/src/Storages/Kafka/StorageKafka.h index 77bad6e17a9..c1c67b19c51 100644 --- a/src/Storages/Kafka/StorageKafka.h +++ b/src/Storages/Kafka/StorageKafka.h @@ -53,7 +53,7 @@ public: ContextPtr context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, - unsigned num_streams) override; + size_t num_streams) override; SinkToStoragePtr write( const ASTPtr & query, diff --git a/src/Storages/LiveView/StorageBlocks.h b/src/Storages/LiveView/StorageBlocks.h index bc860a1fa3c..a732ada1da2 100644 --- a/src/Storages/LiveView/StorageBlocks.h +++ b/src/Storages/LiveView/StorageBlocks.h @@ -46,7 +46,7 @@ public: ContextPtr /*context*/, QueryProcessingStage::Enum /*processed_stage*/, size_t /*max_block_size*/, - unsigned /*num_streams*/) override + size_t /*num_streams*/) override { return Pipe::unitePipes(std::move(pipes)); } diff --git a/src/Storages/LiveView/StorageLiveView.cpp b/src/Storages/LiveView/StorageLiveView.cpp index e3d19d0a433..3d27205d638 100644 --- a/src/Storages/LiveView/StorageLiveView.cpp +++ b/src/Storages/LiveView/StorageLiveView.cpp @@ -531,7 +531,7 @@ Pipe StorageLiveView::read( ContextPtr /*context*/, QueryProcessingStage::Enum /*processed_stage*/, const size_t /*max_block_size*/, - const unsigned /*num_streams*/) + const size_t /*num_streams*/) { std::lock_guard lock(mutex); @@ -556,7 +556,7 @@ Pipe StorageLiveView::watch( ContextPtr local_context, QueryProcessingStage::Enum & processed_stage, size_t /*max_block_size*/, - const unsigned /*num_streams*/) + const size_t /*num_streams*/) { ASTWatchQuery & query = typeid_cast(*query_info.query); diff --git a/src/Storages/LiveView/StorageLiveView.h b/src/Storages/LiveView/StorageLiveView.h index c6a0379e2ab..31b1c425709 100644 --- a/src/Storages/LiveView/StorageLiveView.h +++ b/src/Storages/LiveView/StorageLiveView.h @@ -143,7 +143,7 @@ public: ContextPtr context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, - unsigned num_streams) override; + size_t num_streams) override; Pipe watch( const Names & column_names, @@ -151,7 +151,7 @@ public: ContextPtr context, QueryProcessingStage::Enum & processed_stage, size_t max_block_size, - unsigned num_streams) override; + size_t num_streams) override; std::shared_ptr getBlocksPtr() { return blocks_ptr; } MergeableBlocksPtr getMergeableBlocks() { return mergeable_blocks; } diff --git a/src/Storages/MeiliSearch/SourceMeiliSearch.cpp b/src/Storages/MeiliSearch/SourceMeiliSearch.cpp index 8e37e469e96..b516ad8d0cf 100644 --- a/src/Storages/MeiliSearch/SourceMeiliSearch.cpp +++ b/src/Storages/MeiliSearch/SourceMeiliSearch.cpp @@ -174,7 +174,7 @@ size_t MeiliSearchSource::parseJSON(MutableColumns & columns, const JSON & jres) { ++cnt_fields; const auto & name = kv_pair.getName(); - int pos = description.sample_block.getPositionByName(name); + size_t pos = description.sample_block.getPositionByName(name); MutableColumnPtr & col = columns[pos]; DataTypePtr type_ptr = description.sample_block.getByPosition(pos).type; insertWithTypeId(col, kv_pair.getValue(), type_ptr); diff --git a/src/Storages/MeiliSearch/StorageMeiliSearch.cpp b/src/Storages/MeiliSearch/StorageMeiliSearch.cpp index c5966d9e322..30d49edbb10 100644 --- a/src/Storages/MeiliSearch/StorageMeiliSearch.cpp +++ b/src/Storages/MeiliSearch/StorageMeiliSearch.cpp @@ -80,7 +80,7 @@ Pipe StorageMeiliSearch::read( ContextPtr /*context*/, QueryProcessingStage::Enum /*processed_stage*/, size_t max_block_size, - unsigned) + size_t /*num_streams*/) { storage_snapshot->check(column_names); diff --git a/src/Storages/MeiliSearch/StorageMeiliSearch.h b/src/Storages/MeiliSearch/StorageMeiliSearch.h index d7a2697730c..5fa7ac2c0e3 100644 --- a/src/Storages/MeiliSearch/StorageMeiliSearch.h +++ b/src/Storages/MeiliSearch/StorageMeiliSearch.h @@ -25,7 +25,7 @@ public: ContextPtr context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, - unsigned num_streams) override; + size_t num_streams) override; SinkToStoragePtr write(const ASTPtr & query, const StorageMetadataPtr & metadata_snapshot, ContextPtr local_context) override; diff --git a/src/Storages/MergeTree/DataPartStorageOnDisk.cpp b/src/Storages/MergeTree/DataPartStorageOnDisk.cpp index e2a2f3f793f..7b36a9873e4 100644 --- a/src/Storages/MergeTree/DataPartStorageOnDisk.cpp +++ b/src/Storages/MergeTree/DataPartStorageOnDisk.cpp @@ -6,12 +6,12 @@ #include #include #include -#include #include #include #include #include #include +#include namespace DB { @@ -29,6 +29,16 @@ DataPartStorageOnDisk::DataPartStorageOnDisk(VolumePtr volume_, std::string root { } +DataPartStorageOnDisk::DataPartStorageOnDisk( + VolumePtr volume_, std::string root_path_, std::string part_dir_, DiskTransactionPtr transaction_) + : volume(std::move(volume_)) + , root_path(std::move(root_path_)) + , part_dir(std::move(part_dir_)) + , transaction(std::move(transaction_)) + , has_shared_transaction(transaction != nullptr) +{ +} + std::string DataPartStorageOnDisk::getFullPath() const { return fs::path(volume->getDisk()->getPath()) / root_path / part_dir / ""; @@ -49,6 +59,11 @@ std::string DataPartStorageOnDisk::getFullRootPath() const return fs::path(volume->getDisk()->getPath()) / root_path / ""; } +MutableDataPartStoragePtr DataPartStorageOnDisk::getProjection(const std::string & name) +{ + return std::shared_ptr(new DataPartStorageOnDisk(volume, std::string(fs::path(root_path) / part_dir), name, transaction)); +} + DataPartStoragePtr DataPartStorageOnDisk::getProjection(const std::string & name) const { return std::make_shared(volume, std::string(fs::path(root_path) / part_dir), name); @@ -113,6 +128,7 @@ static UInt64 calculateTotalSizeOnDiskImpl(const DiskPtr & disk, const String & { if (disk->isFile(from)) return disk->getFileSize(from); + std::vector files; disk->listFiles(from, files); UInt64 res = 0; @@ -135,75 +151,11 @@ std::unique_ptr DataPartStorageOnDisk::readFile( return volume->getDisk()->readFile(fs::path(root_path) / part_dir / name, settings, read_hint, file_size); } -static std::unique_ptr openForReading(const DiskPtr & disk, const String & path) -{ - size_t file_size = disk->getFileSize(path); - return disk->readFile(path, ReadSettings().adjustBufferSize(file_size), file_size); -} - -void DataPartStorageOnDisk::loadVersionMetadata(VersionMetadata & version, Poco::Logger * log) const -{ - std::string version_file_name = fs::path(root_path) / part_dir / "txn_version.txt"; - String tmp_version_file_name = version_file_name + ".tmp"; - DiskPtr disk = volume->getDisk(); - - auto remove_tmp_file = [&]() - { - auto last_modified = disk->getLastModified(tmp_version_file_name); - auto buf = openForReading(disk, tmp_version_file_name); - String content; - readStringUntilEOF(content, *buf); - LOG_WARNING(log, "Found file {} that was last modified on {}, has size {} and the following content: {}", - tmp_version_file_name, last_modified.epochTime(), content.size(), content); - disk->removeFile(tmp_version_file_name); - }; - - if (disk->exists(version_file_name)) - { - auto buf = openForReading(disk, version_file_name); - version.read(*buf); - if (disk->exists(tmp_version_file_name)) - remove_tmp_file(); - return; - } - - /// Four (?) cases are possible: - /// 1. Part was created without transactions. - /// 2. Version metadata file was not renamed from *.tmp on part creation. - /// 3. Version metadata were written to *.tmp file, but hard restart happened before fsync. - /// 4. Fsyncs in storeVersionMetadata() work incorrectly. - - if (!disk->exists(tmp_version_file_name)) - { - /// Case 1. - /// We do not have version metadata and transactions history for old parts, - /// so let's consider that such parts were created by some ancient transaction - /// and were committed with some prehistoric CSN. - /// NOTE It might be Case 3, but version metadata file is written on part creation before other files, - /// so it's not Case 3 if part is not broken. - version.setCreationTID(Tx::PrehistoricTID, nullptr); - version.creation_csn = Tx::PrehistoricCSN; - return; - } - - /// Case 2. - /// Content of *.tmp file may be broken, just use fake TID. - /// Transaction was not committed if *.tmp file was not renamed, so we should complete rollback by removing part. - version.setCreationTID(Tx::DummyTID, nullptr); - version.creation_csn = Tx::RolledBackCSN; - remove_tmp_file(); -} - void DataPartStorageOnDisk::checkConsistency(const MergeTreeDataPartChecksums & checksums) const { checksums.checkSizes(volume->getDisk(), getRelativePath()); } -DataPartStorageBuilderPtr DataPartStorageOnDisk::getBuilder() const -{ - return std::make_shared(volume, root_path, part_dir); -} - void DataPartStorageOnDisk::remove( CanRemoveCallback && can_remove_callback, const MergeTreeDataPartChecksums & checksums, @@ -273,7 +225,7 @@ void DataPartStorageOnDisk::remove( try { disk->moveDirectory(from, to); - onRename(root_path, part_dir_without_slash); + part_dir = part_dir_without_slash; } catch (const Exception & e) { @@ -406,14 +358,18 @@ void DataPartStorageOnDisk::clearDirectory( } } -std::string DataPartStorageOnDisk::getRelativePathForPrefix(Poco::Logger * log, const String & prefix, bool detached) const +std::optional DataPartStorageOnDisk::getRelativePathForPrefix(Poco::Logger * log, const String & prefix, bool detached, bool broken) const { + assert(!broken || detached); String res; auto full_relative_path = fs::path(root_path); if (detached) full_relative_path /= "detached"; + std::optional original_checksums_content; + std::optional original_files_list; + for (int try_no = 0; try_no < 10; ++try_no) { res = (prefix.empty() ? "" : prefix + "_") + part_dir + (try_no ? "_try" + DB::toString(try_no) : ""); @@ -421,15 +377,67 @@ std::string DataPartStorageOnDisk::getRelativePathForPrefix(Poco::Logger * log, if (!volume->getDisk()->exists(full_relative_path / res)) return res; + if (broken && looksLikeBrokenDetachedPartHasTheSameContent(res, original_checksums_content, original_files_list)) + { + LOG_WARNING(log, "Directory {} (to detach to) already exists, " + "but its content looks similar to content of the broken part which we are going to detach. " + "Assuming it was already cloned to detached, will not do it again to avoid redundant copies of broken part.", res); + return {}; + } + LOG_WARNING(log, "Directory {} (to detach to) already exists. Will detach to directory with '_tryN' suffix.", res); } return res; } -void DataPartStorageBuilderOnDisk::setRelativePath(const std::string & path) +bool DataPartStorageOnDisk::looksLikeBrokenDetachedPartHasTheSameContent(const String & detached_part_path, + std::optional & original_checksums_content, + std::optional & original_files_list) const { - part_dir = path; + /// We cannot know for sure that content of detached part is the same, + /// but in most cases it's enough to compare checksums.txt and list of files. + + if (!exists("checksums.txt")) + return false; + + auto detached_full_path = fs::path(root_path) / "detached" / detached_part_path; + auto disk = volume->getDisk(); + if (!disk->exists(detached_full_path / "checksums.txt")) + return false; + + if (!original_checksums_content) + { + auto in = disk->readFile(detached_full_path / "checksums.txt", /* settings */ {}, /* read_hint */ {}, /* file_size */ {}); + original_checksums_content.emplace(); + readStringUntilEOF(*original_checksums_content, *in); + } + + if (original_checksums_content->empty()) + return false; + + auto part_full_path = fs::path(root_path) / part_dir; + String detached_checksums_content; + { + auto in = readFile("checksums.txt", /* settings */ {}, /* read_hint */ {}, /* file_size */ {}); + readStringUntilEOF(detached_checksums_content, *in); + } + + if (original_checksums_content != detached_checksums_content) + return false; + + if (!original_files_list) + { + original_files_list.emplace(); + disk->listFiles(part_full_path, *original_files_list); + std::sort(original_files_list->begin(), original_files_list->end()); + } + + Strings detached_files_list; + disk->listFiles(detached_full_path, detached_files_list); + std::sort(detached_files_list.begin(), detached_files_list.end()); + + return original_files_list == detached_files_list; } std::string DataPartStorageOnDisk::getDiskName() const @@ -462,7 +470,7 @@ bool DataPartStorageOnDisk::isBroken() const return volume->getDisk()->isBroken(); } -void DataPartStorageOnDisk::syncRevision(UInt64 revision) +void DataPartStorageOnDisk::syncRevision(UInt64 revision) const { volume->getDisk()->syncRevision(revision); } @@ -482,11 +490,6 @@ std::string DataPartStorageOnDisk::getDiskPath() const return volume->getDisk()->getPath(); } -DataPartStorageOnDisk::DisksSet::const_iterator DataPartStorageOnDisk::isStoredOnDisk(const DisksSet & disks) const -{ - return disks.find(volume->getDisk()); -} - ReservationPtr DataPartStorageOnDisk::reserve(UInt64 bytes) const { auto res = volume->reserve(bytes); @@ -501,159 +504,6 @@ ReservationPtr DataPartStorageOnDisk::tryReserve(UInt64 bytes) const return volume->reserve(bytes); } -size_t DataPartStorageOnDisk::getVolumeIndex(const IStoragePolicy & storage_policy) const -{ - return storage_policy.getVolumeIndexByDisk(volume->getDisk()); -} - -void DataPartStorageOnDisk::writeChecksums(const MergeTreeDataPartChecksums & checksums, const WriteSettings & settings) const -{ - std::string path = fs::path(root_path) / part_dir / "checksums.txt"; - - try - { - { - auto out = volume->getDisk()->writeFile(path + ".tmp", 4096, WriteMode::Rewrite, settings); - checksums.write(*out); - } - - volume->getDisk()->moveFile(path + ".tmp", path); - } - catch (...) - { - try - { - if (volume->getDisk()->exists(path + ".tmp")) - volume->getDisk()->removeFile(path + ".tmp"); - } - catch (...) - { - tryLogCurrentException("DataPartStorageOnDisk"); - } - - throw; - } -} - -void DataPartStorageOnDisk::writeColumns(const NamesAndTypesList & columns, const WriteSettings & settings) const -{ - std::string path = fs::path(root_path) / part_dir / "columns.txt"; - - try - { - auto buf = volume->getDisk()->writeFile(path + ".tmp", 4096, WriteMode::Rewrite, settings); - columns.writeText(*buf); - buf->finalize(); - - volume->getDisk()->moveFile(path + ".tmp", path); - } - catch (...) - { - try - { - if (volume->getDisk()->exists(path + ".tmp")) - volume->getDisk()->removeFile(path + ".tmp"); - } - catch (...) - { - tryLogCurrentException("DataPartStorageOnDisk"); - } - - throw; - } -} - -void DataPartStorageOnDisk::writeVersionMetadata(const VersionMetadata & version, bool fsync_part_dir) const -{ - std::string path = fs::path(root_path) / part_dir / "txn_version.txt"; - try - { - { - /// TODO IDisk interface does not allow to open file with O_EXCL flag (for DiskLocal), - /// so we create empty file at first (expecting that createFile throws if file already exists) - /// and then overwrite it. - volume->getDisk()->createFile(path + ".tmp"); - auto buf = volume->getDisk()->writeFile(path + ".tmp", 256); - version.write(*buf); - buf->finalize(); - buf->sync(); - } - - SyncGuardPtr sync_guard; - if (fsync_part_dir) - sync_guard = volume->getDisk()->getDirectorySyncGuard(getRelativePath()); - volume->getDisk()->replaceFile(path + ".tmp", path); - - } - catch (...) - { - try - { - if (volume->getDisk()->exists(path + ".tmp")) - volume->getDisk()->removeFile(path + ".tmp"); - } - catch (...) - { - tryLogCurrentException("DataPartStorageOnDisk"); - } - - throw; - } -} - -void DataPartStorageOnDisk::appendCSNToVersionMetadata(const VersionMetadata & version, VersionMetadata::WhichCSN which_csn) const -{ - /// Small enough appends to file are usually atomic, - /// so we append new metadata instead of rewriting file to reduce number of fsyncs. - /// We don't need to do fsync when writing CSN, because in case of hard restart - /// we will be able to restore CSN from transaction log in Keeper. - - std::string version_file_name = fs::path(root_path) / part_dir / "txn_version.txt"; - DiskPtr disk = volume->getDisk(); - auto out = disk->writeFile(version_file_name, 256, WriteMode::Append); - version.writeCSN(*out, which_csn); - out->finalize(); -} - -void DataPartStorageOnDisk::appendRemovalTIDToVersionMetadata(const VersionMetadata & version, bool clear) const -{ - String version_file_name = fs::path(root_path) / part_dir / "txn_version.txt"; - DiskPtr disk = volume->getDisk(); - auto out = disk->writeFile(version_file_name, 256, WriteMode::Append); - version.writeRemovalTID(*out, clear); - out->finalize(); - - /// fsync is not required when we clearing removal TID, because after hard restart we will fix metadata - if (!clear) - out->sync(); -} - -void DataPartStorageOnDisk::writeDeleteOnDestroyMarker(Poco::Logger * log) const -{ - String marker_path = fs::path(root_path) / part_dir / "delete-on-destroy.txt"; - auto disk = volume->getDisk(); - try - { - volume->getDisk()->createFile(marker_path); - } - catch (Poco::Exception & e) - { - LOG_ERROR(log, "{} (while creating DeleteOnDestroy marker: {})", e.what(), backQuote(fullPath(disk, marker_path))); - } -} - -void DataPartStorageOnDisk::removeDeleteOnDestroyMarker() const -{ - std::string delete_on_destroy_file_name = fs::path(root_path) / part_dir / "delete-on-destroy.txt"; - volume->getDisk()->removeFileIfExists(delete_on_destroy_file_name); -} - -void DataPartStorageOnDisk::removeVersionMetadata() const -{ - std::string version_file_name = fs::path(root_path) / part_dir / "txn_version.txt"; - volume->getDisk()->removeFileIfExists(version_file_name); -} - String DataPartStorageOnDisk::getUniqueId() const { auto disk = volume->getDisk(); @@ -663,16 +513,6 @@ String DataPartStorageOnDisk::getUniqueId() const return disk->getUniqueId(fs::path(getRelativePath()) / "checksums.txt"); } -bool DataPartStorageOnDisk::shallParticipateInMerges(const IStoragePolicy & storage_policy) const -{ - /// `IMergeTreeDataPart::volume` describes space where current part belongs, and holds - /// `SingleDiskVolume` object which does not contain up-to-date settings of corresponding volume. - /// Therefore we shall obtain volume from storage policy. - auto volume_ptr = storage_policy.getVolume(storage_policy.getVolumeIndexByDisk(volume->getDisk())); - - return !volume_ptr->areMergesAvoided(); -} - void DataPartStorageOnDisk::backup( const MergeTreeDataPartChecksums & checksums, const NameSet & files_without_checksums, @@ -737,7 +577,7 @@ void DataPartStorageOnDisk::backup( } } -DataPartStoragePtr DataPartStorageOnDisk::freeze( +MutableDataPartStoragePtr DataPartStorageOnDisk::freeze( const std::string & to, const std::string & dir_path, bool make_source_readonly, @@ -761,7 +601,7 @@ DataPartStoragePtr DataPartStorageOnDisk::freeze( return std::make_shared(single_disk_volume, to, dir_path); } -DataPartStoragePtr DataPartStorageOnDisk::clone( +MutableDataPartStoragePtr DataPartStorageOnDisk::clonePart( const std::string & to, const std::string & dir_path, const DiskPtr & disk, @@ -774,6 +614,7 @@ DataPartStoragePtr DataPartStorageOnDisk::clone( LOG_WARNING(log, "Path {} already exists. Will remove it and clone again.", fullPath(disk, path_to_clone)); disk->removeRecursive(path_to_clone); } + disk->createDirectories(to); volume->getDisk()->copy(getRelativePath(), disk, to); volume->getDisk()->removeFileIfExists(fs::path(path_to_clone) / "delete-on-destroy.txt"); @@ -782,13 +623,7 @@ DataPartStoragePtr DataPartStorageOnDisk::clone( return std::make_shared(single_disk_volume, to, dir_path); } -void DataPartStorageOnDisk::onRename(const std::string & new_root_path, const std::string & new_part_dir) -{ - part_dir = new_part_dir; - root_path = new_root_path; -} - -void DataPartStorageBuilderOnDisk::rename( +void DataPartStorageOnDisk::rename( const std::string & new_root_path, const std::string & new_part_dir, Poco::Logger * log, @@ -809,7 +644,7 @@ void DataPartStorageBuilderOnDisk::rename( "Part directory {} already exists and contains {} files. Removing it.", fullPath(volume->getDisk(), to), files.size()); - transaction->removeRecursive(to); + executeOperation([&](auto & disk) { disk.removeRecursive(to); }); } else { @@ -823,8 +658,12 @@ void DataPartStorageBuilderOnDisk::rename( String from = getRelativePath(); /// Why? - transaction->setLastModified(from, Poco::Timestamp::fromEpochTime(time(nullptr))); - transaction->moveDirectory(from, to); + executeOperation([&](auto & disk) + { + disk.setLastModified(from, Poco::Timestamp::fromEpochTime(time(nullptr))); + disk.moveDirectory(from, to); + }); + part_dir = new_part_dir; root_path = new_root_path; @@ -846,7 +685,7 @@ void DataPartStorageOnDisk::changeRootPath(const std::string & from_root, const --prefix_size; if (prefix_size > root_path.size() - || std::string_view(from_root).substr(0, prefix_size) != std::string_view(root_path).substr(0, prefix_size)) + || std::string_view(from_root).substr(0, prefix_size) != std::string_view(root_path).substr(0, prefix_size)) throw Exception( ErrorCodes::LOGICAL_ERROR, "Cannot change part root to {} because it is not a prefix of current root {}", @@ -859,51 +698,80 @@ void DataPartStorageOnDisk::changeRootPath(const std::string & from_root, const root_path = to_root.substr(0, dst_size) + root_path.substr(prefix_size); } -DataPartStorageBuilderOnDisk::DataPartStorageBuilderOnDisk( - VolumePtr volume_, - std::string root_path_, - std::string part_dir_) - : volume(std::move(volume_)) - , root_path(std::move(root_path_)) - , part_dir(std::move(part_dir_)) - , transaction(volume->getDisk()->createTransaction()) -{ -} - -std::unique_ptr DataPartStorageBuilderOnDisk::writeFile( - const String & name, - size_t buf_size, - const WriteSettings & settings) -{ - return transaction->writeFile(fs::path(root_path) / part_dir / name, buf_size, WriteMode::Rewrite, settings, /* autocommit = */ false); -} - -void DataPartStorageBuilderOnDisk::removeFile(const String & name) -{ - transaction->removeFile(fs::path(root_path) / part_dir / name); -} - -void DataPartStorageBuilderOnDisk::removeFileIfExists(const String & name) -{ - transaction->removeFileIfExists(fs::path(root_path) / part_dir / name); -} - -void DataPartStorageBuilderOnDisk::removeRecursive() -{ - transaction->removeRecursive(fs::path(root_path) / part_dir); -} - -void DataPartStorageBuilderOnDisk::removeSharedRecursive(bool keep_in_remote_fs) -{ - transaction->removeSharedRecursive(fs::path(root_path) / part_dir, keep_in_remote_fs, {}); -} - -SyncGuardPtr DataPartStorageBuilderOnDisk::getDirectorySyncGuard() const +SyncGuardPtr DataPartStorageOnDisk::getDirectorySyncGuard() const { return volume->getDisk()->getDirectorySyncGuard(fs::path(root_path) / part_dir); } -void DataPartStorageBuilderOnDisk::createHardLinkFrom(const IDataPartStorage & source, const std::string & from, const std::string & to) const +template +void DataPartStorageOnDisk::executeOperation(Op && op) +{ + if (transaction) + op(*transaction); + else + op(*volume->getDisk()); +} + +std::unique_ptr DataPartStorageOnDisk::writeFile( + const String & name, + size_t buf_size, + const WriteSettings & settings) +{ + if (transaction) + return transaction->writeFile(fs::path(root_path) / part_dir / name, buf_size, WriteMode::Rewrite, settings, /* autocommit = */ false); + + return volume->getDisk()->writeFile(fs::path(root_path) / part_dir / name, buf_size, WriteMode::Rewrite, settings); +} + +std::unique_ptr DataPartStorageOnDisk::writeTransactionFile(WriteMode mode) const +{ + return volume->getDisk()->writeFile(fs::path(root_path) / part_dir / "txn_version.txt", 256, mode); +} + +void DataPartStorageOnDisk::createFile(const String & name) +{ + executeOperation([&](auto & disk) { disk.createFile(fs::path(root_path) / part_dir / name); }); +} + +void DataPartStorageOnDisk::moveFile(const String & from_name, const String & to_name) +{ + executeOperation([&](auto & disk) + { + auto relative_path = fs::path(root_path) / part_dir; + disk.moveFile(relative_path / from_name, relative_path / to_name); + }); +} + +void DataPartStorageOnDisk::replaceFile(const String & from_name, const String & to_name) +{ + executeOperation([&](auto & disk) + { + auto relative_path = fs::path(root_path) / part_dir; + disk.replaceFile(relative_path / from_name, relative_path / to_name); + }); +} + +void DataPartStorageOnDisk::removeFile(const String & name) +{ + executeOperation([&](auto & disk) { disk.removeFile(fs::path(root_path) / part_dir / name); }); +} + +void DataPartStorageOnDisk::removeFileIfExists(const String & name) +{ + executeOperation([&](auto & disk) { disk.removeFileIfExists(fs::path(root_path) / part_dir / name); }); +} + +void DataPartStorageOnDisk::removeRecursive() +{ + executeOperation([&](auto & disk) { disk.removeRecursive(fs::path(root_path) / part_dir); }); +} + +void DataPartStorageOnDisk::removeSharedRecursive(bool keep_in_remote_fs) +{ + executeOperation([&](auto & disk) { disk.removeSharedRecursive(fs::path(root_path) / part_dir, keep_in_remote_fs, {}); }); +} + +void DataPartStorageOnDisk::createHardLinkFrom(const IDataPartStorage & source, const std::string & from, const std::string & to) { const auto * source_on_disk = typeid_cast(&source); if (!source_on_disk) @@ -912,58 +780,43 @@ void DataPartStorageBuilderOnDisk::createHardLinkFrom(const IDataPartStorage & s "Cannot create hardlink from different storage. Expected DataPartStorageOnDisk, got {}", typeid(source).name()); - transaction->createHardLink( - fs::path(source_on_disk->getRelativePath()) / from, - fs::path(root_path) / part_dir / to); + executeOperation([&](auto & disk) + { + disk.createHardLink( + fs::path(source_on_disk->getRelativePath()) / from, + fs::path(root_path) / part_dir / to); + }); } -bool DataPartStorageBuilderOnDisk::exists() const +void DataPartStorageOnDisk::createDirectories() { - return volume->getDisk()->exists(fs::path(root_path) / part_dir); + executeOperation([&](auto & disk) { disk.createDirectories(fs::path(root_path) / part_dir); }); } -std::string DataPartStorageBuilderOnDisk::getFullPath() const +void DataPartStorageOnDisk::createProjection(const std::string & name) { - return fs::path(volume->getDisk()->getPath()) / root_path / part_dir; + executeOperation([&](auto & disk) { disk.createDirectory(fs::path(root_path) / part_dir / name); }); } -std::string DataPartStorageBuilderOnDisk::getRelativePath() const +void DataPartStorageOnDisk::beginTransaction() { - return fs::path(root_path) / part_dir; + if (transaction) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Uncommitted {}transaction already exists", has_shared_transaction ? "shared " : ""); + + transaction = volume->getDisk()->createTransaction(); } -void DataPartStorageBuilderOnDisk::createDirectories() +void DataPartStorageOnDisk::commitTransaction() { - transaction->createDirectories(fs::path(root_path) / part_dir); -} + if (!transaction) + throw Exception(ErrorCodes::LOGICAL_ERROR, "There is no uncommitted transaction"); -void DataPartStorageBuilderOnDisk::createProjection(const std::string & name) -{ - transaction->createDirectory(fs::path(root_path) / part_dir / name); -} + if (has_shared_transaction) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot commit shared transaction"); -ReservationPtr DataPartStorageBuilderOnDisk::reserve(UInt64 bytes) -{ - auto res = volume->reserve(bytes); - if (!res) - throw Exception(ErrorCodes::NOT_ENOUGH_SPACE, "Cannot reserve {}, not enough space", ReadableSize(bytes)); - - return res; -} - -DataPartStorageBuilderPtr DataPartStorageBuilderOnDisk::getProjection(const std::string & name) const -{ - return std::make_shared(volume, std::string(fs::path(root_path) / part_dir), name); -} - -DataPartStoragePtr DataPartStorageBuilderOnDisk::getStorage() const -{ - return std::make_shared(volume, root_path, part_dir); -} - -void DataPartStorageBuilderOnDisk::commit() -{ transaction->commit(); + transaction.reset(); } } diff --git a/src/Storages/MergeTree/DataPartStorageOnDisk.h b/src/Storages/MergeTree/DataPartStorageOnDisk.h index adf1b78cdfb..bea1596e1f7 100644 --- a/src/Storages/MergeTree/DataPartStorageOnDisk.h +++ b/src/Storages/MergeTree/DataPartStorageOnDisk.h @@ -21,6 +21,7 @@ public: std::string getPartDirectory() const override { return part_dir; } std::string getFullRootPath() const override; + MutableDataPartStoragePtr getProjection(const std::string & name) override; DataPartStoragePtr getProjection(const std::string & name) const override; bool exists() const override; @@ -41,7 +42,6 @@ public: std::optional read_hint, std::optional file_size) const override; - void loadVersionMetadata(VersionMetadata & version, Poco::Logger * log) const override; void checkConsistency(const MergeTreeDataPartChecksums & checksums) const override; void remove( @@ -52,10 +52,14 @@ public: MergeTreeDataPartState state, Poco::Logger * log) override; - std::string getRelativePathForPrefix(Poco::Logger * log, const String & prefix, bool detached) const override; + /// Returns path to place detached part in or nullopt if we don't need to detach part (if it already exists and has the same content) + std::optional getRelativePathForPrefix(Poco::Logger * log, const String & prefix, bool detached, bool broken) const override; + + /// Returns true if detached part already exists and has the same content (compares checksums.txt and the list of files) + bool looksLikeBrokenDetachedPartHasTheSameContent(const String & detached_part_path, std::optional & original_checksums_content, + std::optional & original_files_list) const; void setRelativePath(const std::string & path) override; - void onRename(const std::string & new_root_path, const std::string & new_part_dir) override; std::string getDiskName() const override; std::string getDiskType() const override; @@ -63,30 +67,14 @@ public: bool supportZeroCopyReplication() const override; bool supportParallelWrite() const override; bool isBroken() const override; - void syncRevision(UInt64 revision) override; + void syncRevision(UInt64 revision) const override; UInt64 getRevision() const override; std::unordered_map getSerializedMetadata(const std::vector & paths) const override; std::string getDiskPath() const override; - - DisksSet::const_iterator isStoredOnDisk(const DisksSet & disks) const override; - ReservationPtr reserve(UInt64 bytes) const override; ReservationPtr tryReserve(UInt64 bytes) const override; - size_t getVolumeIndex(const IStoragePolicy &) const override; - - void writeChecksums(const MergeTreeDataPartChecksums & checksums, const WriteSettings & settings) const override; - void writeColumns(const NamesAndTypesList & columns, const WriteSettings & settings) const override; - void writeVersionMetadata(const VersionMetadata & version, bool fsync_part_dir) const override; - void appendCSNToVersionMetadata(const VersionMetadata & version, VersionMetadata::WhichCSN which_csn) const override; - void appendRemovalTIDToVersionMetadata(const VersionMetadata & version, bool clear) const override; - void writeDeleteOnDestroyMarker(Poco::Logger * log) const override; - void removeDeleteOnDestroyMarker() const override; - void removeVersionMetadata() const override; - String getUniqueId() const override; - bool shallParticipateInMerges(const IStoragePolicy &) const override; - void backup( const MergeTreeDataPartChecksums & checksums, const NameSet & files_without_checksums, @@ -95,7 +83,7 @@ public: bool make_temporary_hard_links, TemporaryFilesOnDisks * temp_dirs) const override; - DataPartStoragePtr freeze( + MutableDataPartStoragePtr freeze( const std::string & to, const std::string & dir_path, bool make_source_readonly, @@ -103,7 +91,7 @@ public: bool copy_instead_of_hardlink, const NameSet & files_to_copy_instead_of_hardlinks) const override; - DataPartStoragePtr clone( + MutableDataPartStoragePtr clonePart( const std::string & to, const std::string & dir_path, const DiskPtr & disk, @@ -111,11 +99,51 @@ public: void changeRootPath(const std::string & from_root, const std::string & to_root) override; - DataPartStorageBuilderPtr getBuilder() const override; + void createDirectories() override; + void createProjection(const std::string & name) override; + + std::unique_ptr writeFile( + const String & name, + size_t buf_size, + const WriteSettings & settings) override; + + std::unique_ptr writeTransactionFile(WriteMode mode) const override; + + void createFile(const String & name) override; + void moveFile(const String & from_name, const String & to_name) override; + void replaceFile(const String & from_name, const String & to_name) override; + + void removeFile(const String & name) override; + void removeFileIfExists(const String & name) override; + void removeRecursive() override; + void removeSharedRecursive(bool keep_in_remote_fs) override; + + SyncGuardPtr getDirectorySyncGuard() const override; + + void createHardLinkFrom(const IDataPartStorage & source, const std::string & from, const std::string & to) override; + + void rename( + const std::string & new_root_path, + const std::string & new_part_dir, + Poco::Logger * log, + bool remove_new_dir_if_exists, + bool fsync_part_dir) override; + + void beginTransaction() override; + void commitTransaction() override; + bool hasActiveTransaction() const override { return transaction != nullptr; } + private: VolumePtr volume; std::string root_path; std::string part_dir; + DiskTransactionPtr transaction; + bool has_shared_transaction = false; + + DataPartStorageOnDisk(VolumePtr volume_, std::string root_path_, std::string part_dir_, DiskTransactionPtr transaction_); + + template + void executeOperation(Op && op); void clearDirectory( const std::string & dir, @@ -129,56 +157,4 @@ private: bool is_projection) const; }; -class DataPartStorageBuilderOnDisk final : public IDataPartStorageBuilder -{ -public: - DataPartStorageBuilderOnDisk(VolumePtr volume_, std::string root_path_, std::string part_dir_); - - void setRelativePath(const std::string & path) override; - - bool exists() const override; - - void createDirectories() override; - void createProjection(const std::string & name) override; - - std::string getPartDirectory() const override { return part_dir; } - std::string getFullPath() const override; - std::string getRelativePath() const override; - - std::unique_ptr writeFile( - const String & name, - size_t buf_size, - const WriteSettings & settings) override; - - void removeFile(const String & name) override; - void removeFileIfExists(const String & name) override; - void removeRecursive() override; - void removeSharedRecursive(bool keep_in_remote_fs) override; - - SyncGuardPtr getDirectorySyncGuard() const override; - - void createHardLinkFrom(const IDataPartStorage & source, const std::string & from, const std::string & to) const override; - - ReservationPtr reserve(UInt64 bytes) override; - - DataPartStorageBuilderPtr getProjection(const std::string & name) const override; - - DataPartStoragePtr getStorage() const override; - - void rename( - const std::string & new_root_path, - const std::string & new_part_dir, - Poco::Logger * log, - bool remove_new_dir_if_exists, - bool fsync_part_dir) override; - - void commit() override; - -private: - VolumePtr volume; - std::string root_path; - std::string part_dir; - DiskTransactionPtr transaction; -}; - } diff --git a/src/Storages/MergeTree/DataPartsExchange.cpp b/src/Storages/MergeTree/DataPartsExchange.cpp index 475461aa0d6..4f9c9ffd596 100644 --- a/src/Storages/MergeTree/DataPartsExchange.cpp +++ b/src/Storages/MergeTree/DataPartsExchange.cpp @@ -13,9 +13,9 @@ #include #include #include +#include #include #include -#include #include #include #include @@ -147,12 +147,13 @@ void Service::processQuery(const HTMLForm & params, ReadBuffer & /*body*/, Write CurrentMetrics::Increment metric_increment{CurrentMetrics::ReplicatedSend}; - if (part->data_part_storage->isStoredOnRemoteDisk()) + if (part->getDataPartStorage().isStoredOnRemoteDisk()) { UInt64 revision = parse(params.get("disk_revision", "0")); if (revision) - part->data_part_storage->syncRevision(revision); - revision = part->data_part_storage->getRevision(); + part->getDataPartStorage().syncRevision(revision); + + revision = part->getDataPartStorage().getRevision(); if (revision) response.addCookie({"disk_revision", toString(revision)}); } @@ -179,43 +180,32 @@ void Service::processQuery(const HTMLForm & params, ReadBuffer & /*body*/, Write std::sregex_token_iterator(remote_fs_metadata.begin(), remote_fs_metadata.end(), re, -1), std::sregex_token_iterator()); + bool send_projections = client_protocol_version >= REPLICATION_PROTOCOL_VERSION_WITH_PARTS_PROJECTION; + if (send_projections) + { + const auto & projections = part->getProjectionParts(); + writeBinary(projections.size(), out); + } + if (data_settings->allow_remote_fs_zero_copy_replication && /// In memory data part does not have metadata yet. !isInMemoryPart(part) && client_protocol_version >= REPLICATION_PROTOCOL_VERSION_WITH_PARTS_ZERO_COPY) { - auto disk_type = part->data_part_storage->getDiskType(); - if (part->data_part_storage->supportZeroCopyReplication() && std::find(capability.begin(), capability.end(), disk_type) != capability.end()) + auto disk_type = part->getDataPartStorage().getDiskType(); + if (part->getDataPartStorage().supportZeroCopyReplication() && std::find(capability.begin(), capability.end(), disk_type) != capability.end()) { /// Send metadata if the receiver's capability covers the source disk type. response.addCookie({"remote_fs_metadata", disk_type}); - if (client_protocol_version >= REPLICATION_PROTOCOL_VERSION_WITH_PARTS_PROJECTION) - { - const auto & projections = part->getProjectionParts(); - writeBinary(projections.size(), out); - } - - sendPartFromDiskRemoteMeta(part, out, true, part->getProjectionParts()); + sendPartFromDiskRemoteMeta(part, out, true, send_projections); return; } } - if (client_protocol_version >= REPLICATION_PROTOCOL_VERSION_WITH_PARTS_PROJECTION) - { - const auto & projections = part->getProjectionParts(); - writeBinary(projections.size(), out); - if (isInMemoryPart(part)) - sendPartFromMemory(part, out, projections); - else - sendPartFromDisk(part, out, client_protocol_version, projections); - } + if (isInMemoryPart(part)) + sendPartFromMemory(part, out, send_projections); else - { - if (isInMemoryPart(part)) - sendPartFromMemory(part, out); - else - sendPartFromDisk(part, out, client_protocol_version); - } + sendPartFromDisk(part, out, client_protocol_version, send_projections); } catch (const NetException &) { @@ -237,20 +227,23 @@ void Service::processQuery(const HTMLForm & params, ReadBuffer & /*body*/, Write } void Service::sendPartFromMemory( - const MergeTreeData::DataPartPtr & part, WriteBuffer & out, const std::map> & projections) + const MergeTreeData::DataPartPtr & part, WriteBuffer & out, bool send_projections) { auto metadata_snapshot = data.getInMemoryMetadataPtr(); - for (const auto & [name, projection] : projections) + if (send_projections) { - auto projection_sample_block = metadata_snapshot->projections.get(name).sample_block; - auto part_in_memory = asInMemoryPart(projection); - if (!part_in_memory) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Projection {} of part {} is not stored in memory", name, part->name); + for (const auto & [name, projection] : part->getProjectionParts()) + { + auto projection_sample_block = metadata_snapshot->projections.get(name).sample_block; + auto part_in_memory = asInMemoryPart(projection); + if (!part_in_memory) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Projection {} of part {} is not stored in memory", name, part->name); - writeStringBinary(name, out); - projection->checksums.write(out); - NativeWriter block_out(out, 0, projection_sample_block); - block_out.write(part_in_memory->block); + writeStringBinary(name, out); + projection->checksums.write(out); + NativeWriter block_out(out, 0, projection_sample_block); + block_out.write(part_in_memory->block); + } } auto part_in_memory = asInMemoryPart(part); @@ -268,7 +261,7 @@ MergeTreeData::DataPart::Checksums Service::sendPartFromDisk( const MergeTreeData::DataPartPtr & part, WriteBuffer & out, int client_protocol_version, - const std::map> & projections) + bool send_projections) { /// We'll take a list of files from the list of checksums. MergeTreeData::DataPart::Checksums checksums = part->checksums; @@ -276,7 +269,8 @@ MergeTreeData::DataPart::Checksums Service::sendPartFromDisk( auto file_names_without_checksums = part->getFileNamesWithoutChecksums(); for (const auto & file_name : file_names_without_checksums) { - if (client_protocol_version < REPLICATION_PROTOCOL_VERSION_WITH_PARTS_DEFAULT_COMPRESSION && file_name == IMergeTreeDataPart::DEFAULT_COMPRESSION_CODEC_FILE_NAME) + if (client_protocol_version < REPLICATION_PROTOCOL_VERSION_WITH_PARTS_DEFAULT_COMPRESSION + && file_name == IMergeTreeDataPart::DEFAULT_COMPRESSION_CODEC_FILE_NAME) continue; checksums.files[file_name] = {}; @@ -287,11 +281,10 @@ MergeTreeData::DataPart::Checksums Service::sendPartFromDisk( { // Get rid of projection files checksums.files.erase(name + ".proj"); - auto it = projections.find(name); - if (it != projections.end()) + if (send_projections) { writeStringBinary(name, out); - MergeTreeData::DataPart::Checksums projection_checksum = sendPartFromDisk(it->second, out, client_protocol_version); + MergeTreeData::DataPart::Checksums projection_checksum = sendPartFromDisk(projection, out, client_protocol_version, false); data_checksums.addFile(name + ".proj", projection_checksum.getTotalSizeOnDisk(), projection_checksum.getTotalChecksumUInt128()); } else if (part->checksums.has(name + ".proj")) @@ -307,12 +300,12 @@ MergeTreeData::DataPart::Checksums Service::sendPartFromDisk( { String file_name = it.first; - UInt64 size = part->data_part_storage->getFileSize(file_name); + UInt64 size = part->getDataPartStorage().getFileSize(file_name); writeStringBinary(it.first, out); writeBinary(size, out); - auto file_in = part->data_part_storage->readFile(file_name, {}, std::nullopt, std::nullopt); + auto file_in = part->getDataPartStorage().readFile(file_name, {}, std::nullopt, std::nullopt); HashingWriteBuffer hashing_out(out); copyDataWithThrottler(*file_in, hashing_out, blocker.getCounter(), data.getSendsThrottler()); @@ -323,7 +316,7 @@ MergeTreeData::DataPart::Checksums Service::sendPartFromDisk( throw Exception( ErrorCodes::BAD_SIZE_OF_FILE_IN_DATA_PART, "Unexpected size of file {}, expected {} got {}", - std::string(fs::path(part->data_part_storage->getRelativePath()) / file_name), + std::string(fs::path(part->getDataPartStorage().getRelativePath()) / file_name), hashing_out.count(), size); writePODBinary(hashing_out.getHash(), out); @@ -336,18 +329,15 @@ MergeTreeData::DataPart::Checksums Service::sendPartFromDisk( return data_checksums; } -MergeTreeData::DataPart::Checksums Service::sendPartFromDiskRemoteMeta( +void Service::sendPartFromDiskRemoteMeta( const MergeTreeData::DataPartPtr & part, WriteBuffer & out, bool send_part_id, - const std::map> & projections) + bool send_projections) { - const auto * data_part_storage_on_disk = dynamic_cast(part->data_part_storage.get()); - if (!data_part_storage_on_disk) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Storage '{}' doesn't support zero-copy replication", part->data_part_storage->getDiskName()); - - if (!data_part_storage_on_disk->supportZeroCopyReplication()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Disk '{}' doesn't support zero-copy replication", data_part_storage_on_disk->getDiskName()); + auto data_part_storage = part->getDataPartStoragePtr(); + if (!data_part_storage->supportZeroCopyReplication()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Disk '{}' doesn't support zero-copy replication", data_part_storage->getDiskName()); /// We'll take a list of files from the list of checksums. MergeTreeData::DataPart::Checksums checksums = part->checksums; @@ -365,33 +355,23 @@ MergeTreeData::DataPart::Checksums Service::sendPartFromDiskRemoteMeta( std::vector paths; paths.reserve(checksums.files.size()); for (const auto & it : checksums.files) - paths.push_back(fs::path(part->data_part_storage->getRelativePath()) / it.first); + paths.push_back(fs::path(part->getDataPartStorage().getRelativePath()) / it.first); /// Serialized metadatadatas with zero ref counts. - auto metadatas = data_part_storage_on_disk->getSerializedMetadata(paths); + auto metadatas = data_part_storage->getSerializedMetadata(paths); if (send_part_id) { - String part_id = data_part_storage_on_disk->getUniqueId(); + String part_id = data_part_storage->getUniqueId(); writeStringBinary(part_id, out); } - MergeTreeData::DataPart::Checksums data_checksums; - for (const auto & [name, projection] : part->getProjectionParts()) + if (send_projections) { - auto it = projections.find(name); - if (it != projections.end()) + for (const auto & [name, projection] : part->getProjectionParts()) { - writeStringBinary(name, out); - MergeTreeData::DataPart::Checksums projection_checksum = sendPartFromDiskRemoteMeta(it->second, out, false); - data_checksums.addFile(name + ".proj", projection_checksum.getTotalSizeOnDisk(), projection_checksum.getTotalChecksumUInt128()); - } - else if (part->checksums.has(name + ".proj")) - { - // We don't send this projection, just add out checksum to bypass the following check - const auto & our_checksum = part->checksums.files.find(name + ".proj")->second; - data_checksums.addFile(name + ".proj", our_checksum.file_size, our_checksum.file_hash); + sendPartFromDiskRemoteMeta(projection, out, false, false); } } @@ -399,10 +379,10 @@ MergeTreeData::DataPart::Checksums Service::sendPartFromDiskRemoteMeta( for (const auto & it : checksums.files) { const String & file_name = it.first; - String file_path_prefix = fs::path(part->data_part_storage->getRelativePath()) / file_name; + String file_path_prefix = fs::path(part->getDataPartStorage().getRelativePath()) / file_name; /// Just some additional checks - String metadata_file_path = fs::path(data_part_storage_on_disk->getDiskPath()) / file_path_prefix; + String metadata_file_path = fs::path(data_part_storage->getDiskPath()) / file_path_prefix; fs::path metadata(metadata_file_path); if (!fs::exists(metadata)) throw Exception(ErrorCodes::CORRUPTED_DATA, "Remote metadata '{}' is not exists", file_name); @@ -426,12 +406,7 @@ MergeTreeData::DataPart::Checksums Service::sendPartFromDiskRemoteMeta( throw Exception(ErrorCodes::BAD_SIZE_OF_FILE_IN_DATA_PART, "Unexpected size of file {}", metadata_file_path); writePODBinary(hashing_out.getHash(), out); - - if (!file_names_without_checksums.contains(file_name)) - data_checksums.addFile(file_name, hashing_out.count(), hashing_out.getHash()); } - - return data_checksums; } MergeTreeData::DataPartPtr Service::findPart(const String & name) @@ -706,74 +681,54 @@ MergeTreeData::MutableDataPartPtr Fetcher::fetchSelectedPart( in->setNextCallback(ReplicatedFetchReadCallback(*entry)); - return part_type == "InMemory" - ? downloadPartToMemory(part_name, part_uuid, metadata_snapshot, context, disk, *in, projections, throttler) - : downloadPartToDisk(part_name, replica_path, to_detached, tmp_prefix, sync, disk, *in, projections, checksums, throttler); + if (part_type == "InMemory") + { + auto volume = std::make_shared("volume_" + part_name, disk, 0); + + auto data_part_storage = std::make_shared( + volume, + data.getRelativeDataPath(), + part_name); + + return downloadPartToMemory( + data_part_storage, part_name, + MergeTreePartInfo::fromPartName(part_name, data.format_version), + part_uuid, metadata_snapshot, context, *in, + projections, false, throttler); + } + + return downloadPartToDisk( + part_name, replica_path, to_detached, tmp_prefix, + sync, disk, *in, projections, checksums, throttler); } MergeTreeData::MutableDataPartPtr Fetcher::downloadPartToMemory( + MutableDataPartStoragePtr data_part_storage, const String & part_name, + const MergeTreePartInfo & part_info, const UUID & part_uuid, const StorageMetadataPtr & metadata_snapshot, ContextPtr context, - DiskPtr disk, PooledReadWriteBufferFromHTTP & in, size_t projections, + bool is_projection, ThrottlerPtr throttler) { - auto volume = std::make_shared("volume_" + part_name, disk, 0); + auto new_data_part = std::make_shared(data, part_name, part_info, data_part_storage); - auto data_part_storage = std::make_shared( - volume, - data.getRelativeDataPath(), - part_name); - - auto data_part_storage_builder = std::make_shared( - volume, - data.getRelativeDataPath(), - part_name); - - MergeTreeData::MutableDataPartPtr new_data_part = - std::make_shared(data, part_name, data_part_storage); - new_data_part->version.setCreationTID(Tx::PrehistoricTID, nullptr); - - for (auto i = 0ul; i < projections; ++i) + for (size_t i = 0; i < projections; ++i) { String projection_name; readStringBinary(projection_name, in); - MergeTreeData::DataPart::Checksums checksums; - if (!checksums.read(in)) - throw Exception("Cannot deserialize checksums", ErrorCodes::CORRUPTED_DATA); - - NativeReader block_in(in, 0); - auto block = block_in.read(); - throttler->add(block.bytes()); - - auto projection_part_storage = data_part_storage->getProjection(projection_name + ".proj"); - auto projection_part_storage_builder = data_part_storage_builder->getProjection(projection_name + ".proj"); MergeTreePartInfo new_part_info("all", 0, 0, 0); - MergeTreeData::MutableDataPartPtr new_projection_part = - std::make_shared(data, projection_name, new_part_info, projection_part_storage, new_data_part.get()); + auto projection_part_storage = data_part_storage->getProjection(projection_name + ".proj"); - new_projection_part->is_temp = false; - new_projection_part->setColumns(block.getNamesAndTypesList(), {}); - MergeTreePartition partition{}; - new_projection_part->partition = std::move(partition); - new_projection_part->minmax_idx = std::make_shared(); + auto new_projection_part = downloadPartToMemory( + projection_part_storage, projection_name, + new_part_info, part_uuid, metadata_snapshot, + context, in, 0, true, throttler); - MergedBlockOutputStream part_out( - new_projection_part, - projection_part_storage_builder, - metadata_snapshot->projections.get(projection_name).metadata, - block.getNamesAndTypesList(), - {}, - CompressionCodecFactory::instance().get("NONE", {}), - NO_TRANSACTION_PTR); - - part_out.write(block); - part_out.finalizePart(new_projection_part, false); - new_projection_part->checksums.checkEqual(checksums, /* have_uncompressed = */ true); new_data_part->addProjectionPart(projection_name, std::move(new_projection_part)); } @@ -785,14 +740,19 @@ MergeTreeData::MutableDataPartPtr Fetcher::downloadPartToMemory( auto block = block_in.read(); throttler->add(block.bytes()); - new_data_part->uuid = part_uuid; - new_data_part->is_temp = true; new_data_part->setColumns(block.getNamesAndTypesList(), {}); - new_data_part->minmax_idx->update(block, data.getMinMaxColumnsNames(metadata_snapshot->getPartitionKey())); - new_data_part->partition.create(metadata_snapshot, block, 0, context); + + if (!is_projection) + { + new_data_part->version.setCreationTID(Tx::PrehistoricTID, nullptr); + new_data_part->uuid = part_uuid; + new_data_part->is_temp = true; + new_data_part->minmax_idx->update(block, data.getMinMaxColumnsNames(metadata_snapshot->getPartitionKey())); + new_data_part->partition.create(metadata_snapshot, block, 0, context); + } MergedBlockOutputStream part_out( - new_data_part, data_part_storage_builder, metadata_snapshot, block.getNamesAndTypesList(), {}, + new_data_part, metadata_snapshot, block.getNamesAndTypesList(), {}, CompressionCodecFactory::instance().get("NONE", {}), NO_TRANSACTION_PTR); part_out.write(block); @@ -804,7 +764,7 @@ MergeTreeData::MutableDataPartPtr Fetcher::downloadPartToMemory( void Fetcher::downloadBasePartOrProjectionPartToDiskRemoteMeta( const String & replica_path, - DataPartStorageBuilderPtr & data_part_storage_builder, + const MutableDataPartStoragePtr & data_part_storage, PooledReadWriteBufferFromHTTP & in, MergeTreeData::DataPart::Checksums & checksums, ThrottlerPtr throttler) const @@ -820,7 +780,7 @@ void Fetcher::downloadBasePartOrProjectionPartToDiskRemoteMeta( readStringBinary(file_name, in); readBinary(file_size, in); - String metadata_file = fs::path(data_part_storage_builder->getFullPath()) / file_name; + String metadata_file = fs::path(data_part_storage->getFullPath()) / file_name; { auto file_out = std::make_unique(metadata_file, DBMS_DEFAULT_BUFFER_SIZE, -1, 0666, nullptr, 0); @@ -834,8 +794,8 @@ void Fetcher::downloadBasePartOrProjectionPartToDiskRemoteMeta( /// NOTE The is_cancelled flag also makes sense to check every time you read over the network, /// performing a poll with a not very large timeout. /// And now we check it only between read chunks (in the `copyData` function). - data_part_storage_builder->removeSharedRecursive(true); - data_part_storage_builder->commit(); + data_part_storage->removeSharedRecursive(true); + data_part_storage->commitTransaction(); throw Exception("Fetching of part was cancelled", ErrorCodes::ABORTED); } @@ -855,13 +815,12 @@ void Fetcher::downloadBasePartOrProjectionPartToDiskRemoteMeta( checksums.addFile(file_name, file_size, expected_hash); } } - } void Fetcher::downloadBaseOrProjectionPartToDisk( const String & replica_path, - DataPartStorageBuilderPtr & data_part_storage_builder, + const MutableDataPartStoragePtr & data_part_storage, bool sync, PooledReadWriteBufferFromHTTP & in, MergeTreeData::DataPart::Checksums & checksums, @@ -880,14 +839,14 @@ void Fetcher::downloadBaseOrProjectionPartToDisk( /// File must be inside "absolute_part_path" directory. /// Otherwise malicious ClickHouse replica may force us to write to arbitrary path. - String absolute_file_path = fs::weakly_canonical(fs::path(data_part_storage_builder->getRelativePath()) / file_name); - if (!startsWith(absolute_file_path, fs::weakly_canonical(data_part_storage_builder->getRelativePath()).string())) + String absolute_file_path = fs::weakly_canonical(fs::path(data_part_storage->getRelativePath()) / file_name); + if (!startsWith(absolute_file_path, fs::weakly_canonical(data_part_storage->getRelativePath()).string())) throw Exception(ErrorCodes::INSECURE_PATH, "File path ({}) doesn't appear to be inside part path ({}). " "This may happen if we are trying to download part from malicious replica or logical error.", - absolute_file_path, data_part_storage_builder->getRelativePath()); + absolute_file_path, data_part_storage->getRelativePath()); - auto file_out = data_part_storage_builder->writeFile(file_name, std::min(file_size, DBMS_DEFAULT_BUFFER_SIZE), {}); + auto file_out = data_part_storage->writeFile(file_name, std::min(file_size, DBMS_DEFAULT_BUFFER_SIZE), {}); HashingWriteBuffer hashing_out(*file_out); copyDataWithThrottler(in, hashing_out, file_size, blocker.getCounter(), throttler); @@ -896,7 +855,7 @@ void Fetcher::downloadBaseOrProjectionPartToDisk( /// NOTE The is_cancelled flag also makes sense to check every time you read over the network, /// performing a poll with a not very large timeout. /// And now we check it only between read chunks (in the `copyData` function). - data_part_storage_builder->removeRecursive(); + data_part_storage->removeRecursive(); throw Exception("Fetching of part was cancelled", ErrorCodes::ABORTED); } @@ -906,7 +865,7 @@ void Fetcher::downloadBaseOrProjectionPartToDisk( if (expected_hash != hashing_out.getHash()) throw Exception(ErrorCodes::CHECKSUM_DOESNT_MATCH, "Checksum mismatch for file {} transferred from {}", - (fs::path(data_part_storage_builder->getFullPath()) / file_name).string(), + (fs::path(data_part_storage->getFullPath()) / file_name).string(), replica_path); if (file_name != "checksums.txt" && @@ -951,15 +910,12 @@ MergeTreeData::MutableDataPartPtr Fetcher::downloadPartToDisk( part_relative_path, part_dir); - DataPartStorageBuilderPtr data_part_storage_builder = std::make_shared( - volume, - part_relative_path, - part_dir); + data_part_storage->beginTransaction(); - if (data_part_storage_builder->exists()) + if (data_part_storage->exists()) { LOG_WARNING(log, "Directory {} already exists, probably result of a failed fetch. Will remove it before fetching part.", - data_part_storage_builder->getFullPath()); + data_part_storage->getFullPath()); /// Even if it's a temporary part it could be downloaded with zero copy replication and this function /// is executed as a callback. @@ -967,37 +923,36 @@ MergeTreeData::MutableDataPartPtr Fetcher::downloadPartToDisk( /// We don't control the amount of refs for temporary parts so we cannot decide can we remove blobs /// or not. So we are not doing it bool keep_shared = disk->supportZeroCopyReplication() && data_settings->allow_remote_fs_zero_copy_replication; - data_part_storage_builder->removeSharedRecursive(keep_shared); + data_part_storage->removeSharedRecursive(keep_shared); } - data_part_storage_builder->createDirectories(); + data_part_storage->createDirectories(); SyncGuardPtr sync_guard; if (data.getSettings()->fsync_part_directory) - sync_guard = disk->getDirectorySyncGuard(data_part_storage->getRelativePath()); + sync_guard = data_part_storage->getDirectorySyncGuard(); CurrentMetrics::Increment metric_increment{CurrentMetrics::ReplicatedFetch}; - for (auto i = 0ul; i < projections; ++i) + for (size_t i = 0; i < projections; ++i) { String projection_name; readStringBinary(projection_name, in); MergeTreeData::DataPart::Checksums projection_checksum; auto projection_part_storage = data_part_storage->getProjection(projection_name + ".proj"); - auto projection_part_storage_builder = data_part_storage_builder->getProjection(projection_name + ".proj"); - - projection_part_storage_builder->createDirectories(); + projection_part_storage->createDirectories(); downloadBaseOrProjectionPartToDisk( - replica_path, projection_part_storage_builder, sync, in, projection_checksum, throttler); + replica_path, projection_part_storage, sync, in, projection_checksum, throttler); checksums.addFile( projection_name + ".proj", projection_checksum.getTotalSizeOnDisk(), projection_checksum.getTotalChecksumUInt128()); } // Download the base part - downloadBaseOrProjectionPartToDisk(replica_path, data_part_storage_builder, sync, in, checksums, throttler); + downloadBaseOrProjectionPartToDisk(replica_path, data_part_storage, sync, in, checksums, throttler); assertEOF(in); + data_part_storage->commitTransaction(); MergeTreeData::MutableDataPartPtr new_data_part = data.createPart(part_name, data_part_storage); new_data_part->version.setCreationTID(Tx::PrehistoricTID, nullptr); new_data_part->is_temp = true; @@ -1043,49 +998,43 @@ MergeTreeData::MutableDataPartPtr Fetcher::downloadPartToDiskRemoteMeta( part_relative_path, part_dir); - DataPartStorageBuilderPtr data_part_storage_builder = std::make_shared( - volume, - part_relative_path, - part_dir); + data_part_storage->beginTransaction(); if (data_part_storage->exists()) throw Exception(ErrorCodes::DIRECTORY_ALREADY_EXISTS, "Directory {} already exists.", data_part_storage->getFullPath()); CurrentMetrics::Increment metric_increment{CurrentMetrics::ReplicatedFetch}; - volume->getDisk()->createDirectories(data_part_storage->getFullPath()); + data_part_storage->createDirectories(); - for (auto i = 0ul; i < projections; ++i) + for (size_t i = 0; i < projections; ++i) { String projection_name; readStringBinary(projection_name, in); MergeTreeData::DataPart::Checksums projection_checksum; auto projection_part_storage = data_part_storage->getProjection(projection_name + ".proj"); - auto projection_part_storage_builder = data_part_storage_builder->getProjection(projection_name + ".proj"); - - projection_part_storage_builder->createDirectories(); + projection_part_storage->createDirectories(); downloadBasePartOrProjectionPartToDiskRemoteMeta( - replica_path, projection_part_storage_builder, in, projection_checksum, throttler); + replica_path, projection_part_storage, in, projection_checksum, throttler); checksums.addFile( projection_name + ".proj", projection_checksum.getTotalSizeOnDisk(), projection_checksum.getTotalChecksumUInt128()); } downloadBasePartOrProjectionPartToDiskRemoteMeta( - replica_path, data_part_storage_builder, in, checksums, throttler); + replica_path, data_part_storage, in, checksums, throttler); assertEOF(in); MergeTreeData::MutableDataPartPtr new_data_part; try { - data_part_storage_builder->commit(); + data_part_storage->commitTransaction(); new_data_part = data.createPart(part_name, data_part_storage); new_data_part->version.setCreationTID(Tx::PrehistoricTID, nullptr); new_data_part->is_temp = true; new_data_part->modification_time = time(nullptr); - new_data_part->loadColumnsChecksumsIndexes(true, false); } #if USE_AWS_S3 diff --git a/src/Storages/MergeTree/DataPartsExchange.h b/src/Storages/MergeTree/DataPartsExchange.h index 9e453ffb422..6c92fad4092 100644 --- a/src/Storages/MergeTree/DataPartsExchange.h +++ b/src/Storages/MergeTree/DataPartsExchange.h @@ -1,5 +1,6 @@ #pragma once +#include "Storages/MergeTree/MergeTreePartInfo.h" #include #include #include @@ -42,19 +43,19 @@ private: void sendPartFromMemory( const MergeTreeData::DataPartPtr & part, WriteBuffer & out, - const std::map> & projections = {}); + bool send_projections); MergeTreeData::DataPart::Checksums sendPartFromDisk( const MergeTreeData::DataPartPtr & part, WriteBuffer & out, int client_protocol_version, - const std::map> & projections = {}); + bool send_projections); - MergeTreeData::DataPart::Checksums sendPartFromDiskRemoteMeta( + void sendPartFromDiskRemoteMeta( const MergeTreeData::DataPartPtr & part, WriteBuffer & out, bool send_part_id, - const std::map> & projections = {}); + bool send_projections); /// StorageReplicatedMergeTree::shutdown() waits for all parts exchange handlers to finish, /// so Service will never access dangling reference to storage @@ -94,7 +95,7 @@ public: private: void downloadBaseOrProjectionPartToDisk( const String & replica_path, - DataPartStorageBuilderPtr & data_part_storage_builder, + const MutableDataPartStoragePtr & data_part_storage, bool sync, PooledReadWriteBufferFromHTTP & in, MergeTreeData::DataPart::Checksums & checksums, @@ -102,12 +103,11 @@ private: void downloadBasePartOrProjectionPartToDiskRemoteMeta( const String & replica_path, - DataPartStorageBuilderPtr & data_part_storage_builder, + const MutableDataPartStoragePtr & data_part_storage, PooledReadWriteBufferFromHTTP & in, MergeTreeData::DataPart::Checksums & checksums, ThrottlerPtr throttler) const; - MergeTreeData::MutableDataPartPtr downloadPartToDisk( const String & part_name, const String & replica_path, @@ -121,13 +121,15 @@ private: ThrottlerPtr throttler); MergeTreeData::MutableDataPartPtr downloadPartToMemory( + MutableDataPartStoragePtr data_part_storage, const String & part_name, + const MergeTreePartInfo & part_info, const UUID & part_uuid, const StorageMetadataPtr & metadata_snapshot, ContextPtr context, - DiskPtr disk, PooledReadWriteBufferFromHTTP & in, size_t projections, + bool is_projection, ThrottlerPtr throttler); MergeTreeData::MutableDataPartPtr downloadPartToDiskRemoteMeta( diff --git a/src/Storages/MergeTree/IDataPartStorage.h b/src/Storages/MergeTree/IDataPartStorage.h index 17af6dd2909..c6669908db4 100644 --- a/src/Storages/MergeTree/IDataPartStorage.h +++ b/src/Storages/MergeTree/IDataPartStorage.h @@ -4,6 +4,9 @@ #include #include #include +#include +#include +#include #include namespace DB @@ -18,6 +21,7 @@ struct CanRemoveDescription NameSet files_not_to_remove; }; + using CanRemoveCallback = std::function; class IDataPartStorageIterator @@ -61,13 +65,10 @@ struct WriteSettings; class TemporaryFileOnDisk; -class IDataPartStorageBuilder; -using DataPartStorageBuilderPtr = std::shared_ptr; - /// This is an abstraction of storage for data part files. /// Ideally, it is assumed to contains read-only methods from IDisk. /// It is not fulfilled now, but let's try our best. -class IDataPartStorage +class IDataPartStorage : public boost::noncopyable { public: virtual ~IDataPartStorage() = default; @@ -81,16 +82,19 @@ public: /// virtual std::string getRelativeRootPath() const = 0; /// Get a storage for projection. - virtual std::shared_ptr getProjection(const std::string & name) const = 0; + virtual std::shared_ptr getProjection(const std::string & name) = 0; + virtual std::shared_ptr getProjection(const std::string & name) const = 0; /// Part directory exists. virtual bool exists() const = 0; + /// File inside part directory exists. Specified path is relative to the part path. virtual bool exists(const std::string & name) const = 0; virtual bool isDirectory(const std::string & name) const = 0; /// Modification time for part directory. virtual Poco::Timestamp getLastModified() const = 0; + /// Iterate part directory. Iteration in subdirectory is not needed yet. virtual DataPartStorageIteratorPtr iterate() const = 0; @@ -107,7 +111,6 @@ public: std::optional read_hint, std::optional file_size) const = 0; - virtual void loadVersionMetadata(VersionMetadata & version, Poco::Logger * log) const = 0; virtual void checkConsistency(const MergeTreeDataPartChecksums & checksums) const = 0; struct ProjectionChecksums @@ -129,12 +132,12 @@ public: /// Get a name like 'prefix_partdir_tryN' which does not exist in a root dir. /// TODO: remove it. - virtual std::string getRelativePathForPrefix(Poco::Logger * log, const String & prefix, bool detached) const = 0; + virtual std::optional getRelativePathForPrefix( + Poco::Logger * log, const String & prefix, bool detached, bool broken) const = 0; - /// Reset part directory, used for im-memory parts. + /// Reset part directory, used for in-memory parts. /// TODO: remove it. virtual void setRelativePath(const std::string & path) = 0; - virtual void onRename(const std::string & new_root_path, const std::string & new_part_dir) = 0; /// Some methods from IDisk. Needed to avoid getting internal IDisk interface. virtual std::string getDiskName() const = 0; @@ -143,41 +146,26 @@ public: virtual bool supportZeroCopyReplication() const { return false; } virtual bool supportParallelWrite() const = 0; virtual bool isBroken() const = 0; - virtual void syncRevision(UInt64 revision) = 0; + + /// TODO: remove or at least remove const. + virtual void syncRevision(UInt64 revision) const = 0; virtual UInt64 getRevision() const = 0; + virtual std::unordered_map getSerializedMetadata(const std::vector & paths) const = 0; /// Get a path for internal disk if relevant. It is used mainly for logging. virtual std::string getDiskPath() const = 0; - /// Check if data part is stored on one of the specified disk in set. - using DisksSet = std::unordered_set; - virtual DisksSet::const_iterator isStoredOnDisk(const DisksSet & disks) const { return disks.end(); } - /// Reserve space on the same disk. /// Probably we should try to remove it later. - virtual ReservationPtr reserve(UInt64 /*bytes*/) const { return nullptr; } - virtual ReservationPtr tryReserve(UInt64 /*bytes*/) const { return nullptr; } - virtual size_t getVolumeIndex(const IStoragePolicy &) const { return 0; } - - /// Some methods which change data part internals possibly after creation. - /// Probably we should try to remove it later. - virtual void writeChecksums(const MergeTreeDataPartChecksums & checksums, const WriteSettings & settings) const = 0; - virtual void writeColumns(const NamesAndTypesList & columns, const WriteSettings & settings) const = 0; - virtual void writeVersionMetadata(const VersionMetadata & version, bool fsync_part_dir) const = 0; - virtual void appendCSNToVersionMetadata(const VersionMetadata & version, VersionMetadata::WhichCSN which_csn) const = 0; - virtual void appendRemovalTIDToVersionMetadata(const VersionMetadata & version, bool clear) const = 0; - virtual void writeDeleteOnDestroyMarker(Poco::Logger * log) const = 0; - virtual void removeDeleteOnDestroyMarker() const = 0; - virtual void removeVersionMetadata() const = 0; + /// TODO: remove constness + virtual ReservationPtr reserve(UInt64 /*bytes*/) const { return nullptr; } + virtual ReservationPtr tryReserve(UInt64 /*bytes*/) const { return nullptr; } /// A leak of abstraction. /// Return some uniq string for file. /// Required for distinguish different copies of the same part on remote FS. virtual String getUniqueId() const = 0; - /// A leak of abstraction - virtual bool shallParticipateInMerges(const IStoragePolicy &) const { return true; } - /// Create a backup of a data part. /// This method adds a new entry to backup_entries. /// Also creates a new tmp_dir for internal disk (if disk is mentioned the first time). @@ -205,7 +193,7 @@ public: const NameSet & files_to_copy_instead_of_hardlinks) const = 0; /// Make a full copy of a data part into 'to/dir_path' (possibly to a different disk). - virtual std::shared_ptr clone( + virtual std::shared_ptr clonePart( const std::string & to, const std::string & dir_path, const DiskPtr & disk, @@ -215,33 +203,22 @@ public: /// Right now, this is needed for rename table query. virtual void changeRootPath(const std::string & from_root, const std::string & to_root) = 0; - /// Leak of abstraction as well. We should use builder as one-time object which allow - /// us to build parts, while storage should be read-only method to access part properties - /// related to disk. However our code is really tricky and sometimes we need ad-hoc builders. - virtual DataPartStorageBuilderPtr getBuilder() const = 0; -}; - -using DataPartStoragePtr = std::shared_ptr; - -/// This interface is needed to write data part. -class IDataPartStorageBuilder -{ -public: - virtual ~IDataPartStorageBuilder() = default; - - /// Reset part directory, used for im-memory parts - virtual void setRelativePath(const std::string & path) = 0; - - virtual std::string getPartDirectory() const = 0; - virtual std::string getFullPath() const = 0; - virtual std::string getRelativePath() const = 0; - - virtual bool exists() const = 0; - virtual void createDirectories() = 0; virtual void createProjection(const std::string & name) = 0; - virtual std::unique_ptr writeFile(const String & name, size_t buf_size, const WriteSettings & settings) = 0; + virtual std::unique_ptr writeFile( + const String & name, + size_t buf_size, + const WriteSettings & settings) = 0; + + /// A special const method to write transaction file. + /// It's const, because file with transaction metadata + /// can be modified after part creation. + virtual std::unique_ptr writeTransactionFile(WriteMode mode) const = 0; + + virtual void createFile(const String & name) = 0; + virtual void moveFile(const String & from_name, const String & to_name) = 0; + virtual void replaceFile(const String & from_name, const String & to_name) = 0; virtual void removeFile(const String & name) = 0; virtual void removeFileIfExists(const String & name) = 0; @@ -250,20 +227,12 @@ public: virtual SyncGuardPtr getDirectorySyncGuard() const { return nullptr; } - virtual void createHardLinkFrom(const IDataPartStorage & source, const std::string & from, const std::string & to) const = 0; - - virtual ReservationPtr reserve(UInt64 /*bytes*/) { return nullptr; } - - virtual std::shared_ptr getProjection(const std::string & name) const = 0; - - virtual DataPartStoragePtr getStorage() const = 0; + virtual void createHardLinkFrom(const IDataPartStorage & source, const std::string & from, const std::string & to) = 0; /// Rename part. /// Ideally, new_root_path should be the same as current root (but it is not true). /// Examples are: 'all_1_2_1' -> 'detached/all_1_2_1' /// 'moving/tmp_all_1_2_1' -> 'all_1_2_1' - /// - /// To notify storage also call onRename for it with first two args virtual void rename( const std::string & new_root_path, const std::string & new_part_dir, @@ -271,7 +240,35 @@ public: bool remove_new_dir_if_exists, bool fsync_part_dir) = 0; - virtual void commit() = 0; + /// Starts a transaction of mutable operations. + virtual void beginTransaction() = 0; + /// Commits a transaction of mutable operations. + virtual void commitTransaction() = 0; + virtual bool hasActiveTransaction() const = 0; +}; + +using DataPartStoragePtr = std::shared_ptr; +using MutableDataPartStoragePtr = std::shared_ptr; + +/// A holder that encapsulates data part storage and +/// gives access to const storage from const methods +/// and to mutable storage from non-const methods. +class DataPartStorageHolder : public boost::noncopyable +{ +public: + explicit DataPartStorageHolder(MutableDataPartStoragePtr storage_) + : storage(std::move(storage_)) + { + } + + IDataPartStorage & getDataPartStorage() { return *storage; } + const IDataPartStorage & getDataPartStorage() const { return *storage; } + + MutableDataPartStoragePtr getDataPartStoragePtr() { return storage; } + DataPartStoragePtr getDataPartStoragePtr() const { return storage; } + +private: + MutableDataPartStoragePtr storage; }; } diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index 46323f12305..368af55aa15 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -1,4 +1,5 @@ #include "IMergeTreeDataPart.h" +#include "Storages/MergeTree/IDataPartStorage.h" #include #include @@ -101,7 +102,7 @@ void IMergeTreeDataPart::MinMaxIndex::load(const MergeTreeData & data, const Par } IMergeTreeDataPart::MinMaxIndex::WrittenFiles IMergeTreeDataPart::MinMaxIndex::store( - const MergeTreeData & data, const DataPartStorageBuilderPtr & data_part_storage_builder, Checksums & out_checksums) const + const MergeTreeData & data, IDataPartStorage & part_storage, Checksums & out_checksums) const { auto metadata_snapshot = data.getInMemoryMetadataPtr(); const auto & partition_key = metadata_snapshot->getPartitionKey(); @@ -109,20 +110,20 @@ IMergeTreeDataPart::MinMaxIndex::WrittenFiles IMergeTreeDataPart::MinMaxIndex::s auto minmax_column_names = data.getMinMaxColumnsNames(partition_key); auto minmax_column_types = data.getMinMaxColumnsTypes(partition_key); - return store(minmax_column_names, minmax_column_types, data_part_storage_builder, out_checksums); + return store(minmax_column_names, minmax_column_types, part_storage, out_checksums); } IMergeTreeDataPart::MinMaxIndex::WrittenFiles IMergeTreeDataPart::MinMaxIndex::store( const Names & column_names, const DataTypes & data_types, - const DataPartStorageBuilderPtr & data_part_storage_builder, + IDataPartStorage & part_storage, Checksums & out_checksums) const { if (!initialized) throw Exception( ErrorCodes::LOGICAL_ERROR, "Attempt to store uninitialized MinMax index for part {}. This is a bug", - data_part_storage_builder->getFullPath()); + part_storage.getFullPath()); WrittenFiles written_files; @@ -131,7 +132,7 @@ IMergeTreeDataPart::MinMaxIndex::WrittenFiles IMergeTreeDataPart::MinMaxIndex::s String file_name = "minmax_" + escapeForFileName(column_names[i]) + ".idx"; auto serialization = data_types.at(i)->getDefaultSerialization(); - auto out = data_part_storage_builder->writeFile(file_name, DBMS_DEFAULT_BUFFER_SIZE, {}); + auto out = part_storage.writeFile(file_name, DBMS_DEFAULT_BUFFER_SIZE, {}); HashingWriteBuffer out_hashing(*out); serialization->serializeBinary(hyperrectangle[i].left, out_hashing); serialization->serializeBinary(hyperrectangle[i].right, out_hashing); @@ -301,13 +302,13 @@ static void decrementTypeMetric(MergeTreeDataPartType type) IMergeTreeDataPart::IMergeTreeDataPart( const MergeTreeData & storage_, const String & name_, - const DataPartStoragePtr & data_part_storage_, + const MutableDataPartStoragePtr & data_part_storage_, Type part_type_, const IMergeTreeDataPart * parent_part_) - : storage(storage_) + : DataPartStorageHolder(data_part_storage_) + , storage(storage_) , name(name_) , info(MergeTreePartInfo::fromPartName(name_, storage.format_version)) - , data_part_storage(parent_part_ ? parent_part_->data_part_storage : data_part_storage_) , index_granularity_info(storage_, part_type_) , part_type(part_type_) , parent_part(parent_part_) @@ -315,6 +316,7 @@ IMergeTreeDataPart::IMergeTreeDataPart( { if (parent_part) state = MergeTreeDataPartState::Active; + incrementStateMetric(state); incrementTypeMetric(part_type); @@ -328,13 +330,13 @@ IMergeTreeDataPart::IMergeTreeDataPart( const MergeTreeData & storage_, const String & name_, const MergeTreePartInfo & info_, - const DataPartStoragePtr & data_part_storage_, + const MutableDataPartStoragePtr & data_part_storage_, Type part_type_, const IMergeTreeDataPart * parent_part_) - : storage(storage_) + : DataPartStorageHolder(data_part_storage_) + , storage(storage_) , name(name_) , info(info_) - , data_part_storage(data_part_storage_) , index_granularity_info(storage_, part_type_) , part_type(part_type_) , parent_part(parent_part_) @@ -342,6 +344,7 @@ IMergeTreeDataPart::IMergeTreeDataPart( { if (parent_part) state = MergeTreeDataPartState::Active; + incrementStateMetric(state); incrementTypeMetric(part_type); @@ -505,17 +508,17 @@ void IMergeTreeDataPart::removeIfNeeded() std::string path; try { - path = data_part_storage->getRelativePath(); + path = getDataPartStorage().getRelativePath(); - if (!data_part_storage->exists()) // path + if (!getDataPartStorage().exists()) // path return; if (is_temp) { - String file_name = fileName(data_part_storage->getPartDirectory()); + String file_name = fileName(getDataPartStorage().getPartDirectory()); if (file_name.empty()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "relative_path {} of part {} is invalid or not set", data_part_storage->getPartDirectory(), name); + throw Exception(ErrorCodes::LOGICAL_ERROR, "relative_path {} of part {} is invalid or not set", getDataPartStorage().getPartDirectory(), name); if (!startsWith(file_name, "tmp") && !endsWith(file_name, ".tmp_proj")) { @@ -620,7 +623,7 @@ String IMergeTreeDataPart::getColumnNameWithMinimumCompressedSize(bool with_subc } if (!minimum_size_column) - throw Exception("Could not find a column of minimum size in MergeTree, part " + data_part_storage->getFullPath(), ErrorCodes::LOGICAL_ERROR); + throw Exception("Could not find a column of minimum size in MergeTree, part " + getDataPartStorage().getFullPath(), ErrorCodes::LOGICAL_ERROR); return *minimum_size_column; } @@ -698,9 +701,9 @@ void IMergeTreeDataPart::loadProjections(bool require_columns_checksums, bool ch for (const auto & projection : metadata_snapshot->projections) { String path = /*getRelativePath() + */ projection.name + ".proj"; - if (data_part_storage->exists(path)) + if (getDataPartStorage().exists(path)) { - auto projection_part_storage = data_part_storage->getProjection(projection.name + ".proj"); + auto projection_part_storage = getDataPartStorage().getProjection(projection.name + ".proj"); auto part = storage.createPart(projection.name, {"all", 0, 0, 0}, projection_part_storage, this); part->loadColumnsChecksumsIndexes(require_columns_checksums, check_consistency); projection_parts.emplace(projection.name, std::move(part)); @@ -741,8 +744,8 @@ void IMergeTreeDataPart::loadIndex() loaded_index[i]->reserve(index_granularity.getMarksCount()); } - String index_name = "primary" + getIndexExtensionFromFilesystem(data_part_storage).value(); - String index_path = fs::path(data_part_storage->getRelativePath()) / index_name; + String index_name = "primary" + getIndexExtensionFromFilesystem(getDataPartStorage()).value(); + String index_path = fs::path(getDataPartStorage().getRelativePath()) / index_name; auto index_file = metadata_manager->read(index_name); size_t marks_count = index_granularity.getMarksCount(); @@ -781,7 +784,7 @@ void IMergeTreeDataPart::appendFilesOfIndex(Strings & files) const if (metadata_snapshot->hasPrimaryKey()) { - String index_name = "primary" + getIndexExtensionFromFilesystem(data_part_storage).value(); + String index_name = "primary" + getIndexExtensionFromFilesystem(getDataPartStorage()).value(); files.push_back(index_name); } } @@ -793,10 +796,10 @@ NameSet IMergeTreeDataPart::getFileNamesWithoutChecksums() const NameSet result = {"checksums.txt", "columns.txt"}; - if (data_part_storage->exists(DEFAULT_COMPRESSION_CODEC_FILE_NAME)) + if (getDataPartStorage().exists(DEFAULT_COMPRESSION_CODEC_FILE_NAME)) result.emplace(DEFAULT_COMPRESSION_CODEC_FILE_NAME); - if (data_part_storage->exists(TXN_VERSION_METADATA_FILE_NAME)) + if (getDataPartStorage().exists(TXN_VERSION_METADATA_FILE_NAME)) result.emplace(TXN_VERSION_METADATA_FILE_NAME); return result; @@ -811,7 +814,7 @@ void IMergeTreeDataPart::loadDefaultCompressionCodec() return; } - String path = fs::path(data_part_storage->getRelativePath()) / DEFAULT_COMPRESSION_CODEC_FILE_NAME; + String path = fs::path(getDataPartStorage().getRelativePath()) / DEFAULT_COMPRESSION_CODEC_FILE_NAME; bool exists = metadata_manager->exists(DEFAULT_COMPRESSION_CODEC_FILE_NAME); if (!exists) { @@ -851,6 +854,120 @@ void IMergeTreeDataPart::loadDefaultCompressionCodec() } } +template +void IMergeTreeDataPart::writeMetadata(const String & filename, const WriteSettings & settings, Writer && writer) +{ + auto & data_part_storage = getDataPartStorage(); + auto tmp_filename = filename + ".tmp"; + + try + { + { + auto out = data_part_storage.writeFile(tmp_filename, 4096, settings); + writer(*out); + out->finalize(); + } + + data_part_storage.moveFile(tmp_filename, filename); + } + catch (...) + { + try + { + if (data_part_storage.exists(tmp_filename)) + data_part_storage.removeFile(tmp_filename); + } + catch (...) + { + tryLogCurrentException("DataPartStorageOnDisk"); + } + + throw; + } +} + +void IMergeTreeDataPart::writeChecksums(const MergeTreeDataPartChecksums & checksums_, const WriteSettings & settings) +{ + writeMetadata("checksums.txt", settings, [&checksums_](auto & buffer) + { + checksums_.write(buffer); + }); +} + +void IMergeTreeDataPart::writeColumns(const NamesAndTypesList & columns_, const WriteSettings & settings) +{ + writeMetadata("columns.txt", settings, [&columns_](auto & buffer) + { + columns_.writeText(buffer); + }); +} + +void IMergeTreeDataPart::writeVersionMetadata(const VersionMetadata & version_, bool fsync_part_dir) const +{ + static constexpr auto filename = "txn_version.txt"; + static constexpr auto tmp_filename = "txn_version.txt.tmp"; + auto & data_part_storage = const_cast(getDataPartStorage()); + + try + { + { + /// TODO IDisk interface does not allow to open file with O_EXCL flag (for DiskLocal), + /// so we create empty file at first (expecting that createFile throws if file already exists) + /// and then overwrite it. + data_part_storage.createFile(tmp_filename); + auto write_settings = storage.getContext()->getWriteSettings(); + auto buf = data_part_storage.writeFile(tmp_filename, 256, write_settings); + version_.write(*buf); + buf->finalize(); + buf->sync(); + } + + SyncGuardPtr sync_guard; + if (fsync_part_dir) + sync_guard = data_part_storage.getDirectorySyncGuard(); + data_part_storage.replaceFile(tmp_filename, filename); + } + catch (...) + { + try + { + if (data_part_storage.exists(tmp_filename)) + data_part_storage.removeFile(tmp_filename); + } + catch (...) + { + tryLogCurrentException("DataPartStorageOnDisk"); + } + + throw; + } +} + +void IMergeTreeDataPart::writeDeleteOnDestroyMarker() +{ + static constexpr auto marker_path = "delete-on-destroy.txt"; + + try + { + getDataPartStorage().createFile(marker_path); + } + catch (Poco::Exception & e) + { + LOG_ERROR(storage.log, "{} (while creating DeleteOnDestroy marker: {})", + e.what(), (fs::path(getDataPartStorage().getFullPath()) / marker_path).string()); + } +} + +void IMergeTreeDataPart::removeDeleteOnDestroyMarker() +{ + getDataPartStorage().removeFileIfExists("delete-on-destroy.txt"); +} + +void IMergeTreeDataPart::removeVersionMetadata() +{ + getDataPartStorage().removeFileIfExists("txn_version.txt"); +} + void IMergeTreeDataPart::appendFilesOfDefaultCompressionCodec(Strings & files) { files.push_back(DEFAULT_COMPRESSION_CODEC_FILE_NAME); @@ -880,7 +997,7 @@ CompressionCodecPtr IMergeTreeDataPart::detectDefaultCompressionCodec() const String candidate_path = /*fs::path(getRelativePath()) */ (ISerialization::getFileNameForStream(part_column, substream_path) + ".bin"); /// We can have existing, but empty .bin files. Example: LowCardinality(Nullable(...)) columns and column_name.dict.null.bin file. - if (data_part_storage->exists(candidate_path) && data_part_storage->getFileSize(candidate_path) != 0) + if (getDataPartStorage().exists(candidate_path) && getDataPartStorage().getFileSize(candidate_path) != 0) path_to_data_file = candidate_path; } }); @@ -891,7 +1008,7 @@ CompressionCodecPtr IMergeTreeDataPart::detectDefaultCompressionCodec() const continue; } - result = getCompressionCodecForFile(data_part_storage, path_to_data_file); + result = getCompressionCodecForFile(getDataPartStorage(), path_to_data_file); break; } } @@ -936,7 +1053,7 @@ void IMergeTreeDataPart::loadPartitionAndMinMaxIndex() String calculated_partition_id = partition.getID(metadata_snapshot->getPartitionKey().sample_block); if (calculated_partition_id != info.partition_id) throw Exception( - "While loading part " + data_part_storage->getFullPath() + ": calculated partition ID: " + calculated_partition_id + "While loading part " + getDataPartStorage().getFullPath() + ": calculated partition ID: " + calculated_partition_id + " differs from partition ID in part name: " + info.partition_id, ErrorCodes::CORRUPTED_DATA); } @@ -965,7 +1082,7 @@ void IMergeTreeDataPart::loadChecksums(bool require) bytes_on_disk = checksums.getTotalSizeOnDisk(); } else - bytes_on_disk = data_part_storage->calculateTotalSizeOnDisk(); + bytes_on_disk = getDataPartStorage().calculateTotalSizeOnDisk(); } else { @@ -977,7 +1094,7 @@ void IMergeTreeDataPart::loadChecksums(bool require) LOG_WARNING(storage.log, "Checksums for part {} not found. Will calculate them from data on disk.", name); checksums = checkDataPart(shared_from_this(), false); - data_part_storage->writeChecksums(checksums, {}); + writeChecksums(checksums, {}); bytes_on_disk = checksums.getTotalSizeOnDisk(); } @@ -990,8 +1107,6 @@ void IMergeTreeDataPart::appendFilesOfChecksums(Strings & files) void IMergeTreeDataPart::loadRowsCount() { - //String path = fs::path(getRelativePath()) / "count.txt"; - auto read_rows_count = [&]() { auto buf = metadata_manager->read("count.txt"); @@ -1062,7 +1177,7 @@ void IMergeTreeDataPart::loadRowsCount() } else { - if (data_part_storage->exists("count.txt")) + if (getDataPartStorage().exists("count.txt")) { read_rows_count(); return; @@ -1161,7 +1276,7 @@ void IMergeTreeDataPart::appendFilesOfUUID(Strings & files) void IMergeTreeDataPart::loadColumns(bool require) { - String path = fs::path(data_part_storage->getRelativePath()) / "columns.txt"; + String path = fs::path(getDataPartStorage().getRelativePath()) / "columns.txt"; auto metadata_snapshot = storage.getInMemoryMetadataPtr(); if (parent_part) metadata_snapshot = metadata_snapshot->projections.get(name).metadata; @@ -1172,30 +1287,26 @@ void IMergeTreeDataPart::loadColumns(bool require) { /// We can get list of columns only from columns.txt in compact parts. if (require || part_type == Type::Compact) - throw Exception("No columns.txt in part " + name + ", expected path " + path + " on drive " + data_part_storage->getDiskName(), + throw Exception("No columns.txt in part " + name + ", expected path " + path + " on drive " + getDataPartStorage().getDiskName(), ErrorCodes::NO_FILE_IN_DATA_PART); /// If there is no file with a list of columns, write it down. for (const NameAndTypePair & column : metadata_snapshot->getColumns().getAllPhysical()) - if (data_part_storage->exists(getFileNameForColumn(column) + ".bin")) + if (getDataPartStorage().exists(getFileNameForColumn(column) + ".bin")) loaded_columns.push_back(column); if (columns.empty()) throw Exception("No columns in part " + name, ErrorCodes::NO_FILE_IN_DATA_PART); - data_part_storage->writeColumns(loaded_columns, {}); + writeColumns(loaded_columns, {}); } else { auto in = metadata_manager->read("columns.txt"); loaded_columns.readText(*in); - for (const auto & column : loaded_columns) - { - const auto * aggregate_function_data_type = typeid_cast(column.type.get()); - if (aggregate_function_data_type && aggregate_function_data_type->isVersioned()) - aggregate_function_data_type->setVersion(0, /* if_empty */true); - } + for (auto & column : loaded_columns) + setVersionToAggregateFunctions(column.type, true); } SerializationInfo::Settings settings = @@ -1231,7 +1342,7 @@ void IMergeTreeDataPart::assertHasVersionMetadata(MergeTreeTransaction * txn) co name, storage.getStorageID().getNameForLogs(), version.creation_tid, txn ? txn->dumpDescription() : ""); assert(!txn || storage.supportsTransactions()); - assert(!txn || data_part_storage->exists(TXN_VERSION_METADATA_FILE_NAME)); + assert(!txn || getDataPartStorage().exists(TXN_VERSION_METADATA_FILE_NAME)); } void IMergeTreeDataPart::storeVersionMetadata(bool force) const @@ -1246,7 +1357,7 @@ void IMergeTreeDataPart::storeVersionMetadata(bool force) const throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Transactions are not supported for in-memory parts (table: {}, part: {})", storage.getStorageID().getNameForLogs(), name); - data_part_storage->writeVersionMetadata(version, storage.getSettings()->fsync_part_directory); + writeVersionMetadata(version, storage.getSettings()->fsync_part_directory); } void IMergeTreeDataPart::appendCSNToVersionMetadata(VersionMetadata::WhichCSN which_csn) const @@ -1258,7 +1369,14 @@ void IMergeTreeDataPart::appendCSNToVersionMetadata(VersionMetadata::WhichCSN wh chassert(!(which_csn == VersionMetadata::WhichCSN::REMOVAL && version.removal_csn == 0)); chassert(isStoredOnDisk()); - data_part_storage->appendCSNToVersionMetadata(version, which_csn); + /// Small enough appends to file are usually atomic, + /// so we append new metadata instead of rewriting file to reduce number of fsyncs. + /// We don't need to do fsync when writing CSN, because in case of hard restart + /// we will be able to restore CSN from transaction log in Keeper. + + auto out = getDataPartStorage().writeTransactionFile(WriteMode::Append); + version.writeCSN(*out, which_csn); + out->finalize(); } void IMergeTreeDataPart::appendRemovalTIDToVersionMetadata(bool clear) const @@ -1281,13 +1399,74 @@ void IMergeTreeDataPart::appendRemovalTIDToVersionMetadata(bool clear) const else LOG_TEST(storage.log, "Appending removal TID for {} (creation: {}, removal {})", name, version.creation_tid, version.removal_tid); - data_part_storage->appendRemovalTIDToVersionMetadata(version, clear); + auto out = getDataPartStorage().writeTransactionFile(WriteMode::Append); + version.writeRemovalTID(*out, clear); + out->finalize(); + + /// fsync is not required when we clearing removal TID, because after hard restart we will fix metadata + if (!clear) + out->sync(); +} + +static std::unique_ptr openForReading(const IDataPartStorage & part_storage, const String & filename) +{ + size_t file_size = part_storage.getFileSize(filename); + return part_storage.readFile(filename, ReadSettings().adjustBufferSize(file_size), file_size, file_size); } void IMergeTreeDataPart::loadVersionMetadata() const try { - data_part_storage->loadVersionMetadata(version, storage.log); + static constexpr auto version_file_name = "txn_version.txt"; + static constexpr auto tmp_version_file_name = "txn_version.txt.tmp"; + auto & data_part_storage = const_cast(getDataPartStorage()); + + auto remove_tmp_file = [&]() + { + auto last_modified = data_part_storage.getLastModified(); + auto buf = openForReading(data_part_storage, tmp_version_file_name); + + String content; + readStringUntilEOF(content, *buf); + LOG_WARNING(storage.log, "Found file {} that was last modified on {}, has size {} and the following content: {}", + tmp_version_file_name, last_modified.epochTime(), content.size(), content); + data_part_storage.removeFile(tmp_version_file_name); + }; + + if (data_part_storage.exists(version_file_name)) + { + auto buf = openForReading(data_part_storage, version_file_name); + version.read(*buf); + if (data_part_storage.exists(tmp_version_file_name)) + remove_tmp_file(); + return; + } + + /// Four (?) cases are possible: + /// 1. Part was created without transactions. + /// 2. Version metadata file was not renamed from *.tmp on part creation. + /// 3. Version metadata were written to *.tmp file, but hard restart happened before fsync. + /// 4. Fsyncs in storeVersionMetadata() work incorrectly. + + if (!data_part_storage.exists(tmp_version_file_name)) + { + /// Case 1. + /// We do not have version metadata and transactions history for old parts, + /// so let's consider that such parts were created by some ancient transaction + /// and were committed with some prehistoric CSN. + /// NOTE It might be Case 3, but version metadata file is written on part creation before other files, + /// so it's not Case 3 if part is not broken. + version.setCreationTID(Tx::PrehistoricTID, nullptr); + version.creation_csn = Tx::PrehistoricCSN; + return; + } + + /// Case 2. + /// Content of *.tmp file may be broken, just use fake TID. + /// Transaction was not committed if *.tmp file was not renamed, so we should complete rollback by removing part. + version.setCreationTID(Tx::DummyTID, nullptr); + version.creation_csn = Tx::RolledBackCSN; + remove_tmp_file(); } catch (Exception & e) { @@ -1324,15 +1503,15 @@ bool IMergeTreeDataPart::assertHasValidVersionMetadata() const if (state == MergeTreeDataPartState::Temporary) return true; - if (!data_part_storage->exists()) + if (!getDataPartStorage().exists()) return true; String content; String version_file_name = TXN_VERSION_METADATA_FILE_NAME; try { - size_t file_size = data_part_storage->getFileSize(TXN_VERSION_METADATA_FILE_NAME); - auto buf = data_part_storage->readFile(TXN_VERSION_METADATA_FILE_NAME, ReadSettings().adjustBufferSize(file_size), file_size, std::nullopt); + size_t file_size = getDataPartStorage().getFileSize(TXN_VERSION_METADATA_FILE_NAME); + auto buf = getDataPartStorage().readFile(TXN_VERSION_METADATA_FILE_NAME, ReadSettings().adjustBufferSize(file_size), file_size, std::nullopt); readStringUntilEOF(content, *buf); ReadBufferFromString str_buf{content}; @@ -1366,10 +1545,11 @@ void IMergeTreeDataPart::appendFilesOfColumns(Strings & files) bool IMergeTreeDataPart::shallParticipateInMerges(const StoragePolicyPtr & storage_policy) const { - return data_part_storage->shallParticipateInMerges(*storage_policy); + auto disk_name = getDataPartStorage().getDiskName(); + return !storage_policy->getVolumeByDiskName(disk_name)->areMergesAvoided(); } -void IMergeTreeDataPart::renameTo(const String & new_relative_path, bool remove_new_dir_if_exists, DataPartStorageBuilderPtr builder) const +void IMergeTreeDataPart::renameTo(const String & new_relative_path, bool remove_new_dir_if_exists) try { assertOnDisk(); @@ -1380,22 +1560,21 @@ try if (parent_part) { /// For projections, move is only possible inside parent part dir. - relative_path = parent_part->data_part_storage->getRelativePath(); + relative_path = parent_part->getDataPartStorage().getRelativePath(); } - String from = data_part_storage->getRelativePath(); + auto old_projection_root_path = getDataPartStorage().getRelativePath(); auto to = fs::path(relative_path) / new_relative_path; metadata_manager->deleteAll(true); metadata_manager->assertAllDeleted(true); - builder->rename(to.parent_path(), to.filename(), storage.log, remove_new_dir_if_exists, fsync_dir); - data_part_storage->onRename(to.parent_path(), to.filename()); + getDataPartStorage().rename(to.parent_path(), to.filename(), storage.log, remove_new_dir_if_exists, fsync_dir); metadata_manager->updateAll(true); - for (const auto & [p_name, part] : projection_parts) - { - part->data_part_storage = data_part_storage->getProjection(p_name + ".proj"); - } + auto new_projection_root_path = to.string(); + + for (const auto & [_, part] : projection_parts) + part->getDataPartStorage().changeRootPath(old_projection_root_path, new_projection_root_path); } catch (...) { @@ -1436,14 +1615,14 @@ void IMergeTreeDataPart::initializePartMetadataManager() void IMergeTreeDataPart::initializeIndexGranularityInfo() { - auto mrk_ext = MergeTreeIndexGranularityInfo::getMarksExtensionFromFilesystem(data_part_storage); + auto mrk_ext = MergeTreeIndexGranularityInfo::getMarksExtensionFromFilesystem(getDataPartStorage()); if (mrk_ext) index_granularity_info = MergeTreeIndexGranularityInfo(storage, MarkType{*mrk_ext}); else index_granularity_info = MergeTreeIndexGranularityInfo(storage, part_type); } -void IMergeTreeDataPart::remove() const +void IMergeTreeDataPart::remove() { assert(assertHasValidVersionMetadata()); part_is_probably_removed_from_disk = true; @@ -1460,7 +1639,6 @@ void IMergeTreeDataPart::remove() const return CanRemoveDescription{.can_remove_anything = can_remove, .files_not_to_remove = files_not_to_remove }; }; - if (!isStoredOnDisk()) return; @@ -1479,11 +1657,12 @@ void IMergeTreeDataPart::remove() const projection_checksums.emplace_back(IDataPartStorage::ProjectionChecksums{.name = p_name, .checksums = projection_part->checksums}); } - data_part_storage->remove(std::move(can_remove_callback), checksums, projection_checksums, is_temp, getState(), storage.log); + getDataPartStorage().remove(std::move(can_remove_callback), checksums, projection_checksums, is_temp, getState(), storage.log); } -String IMergeTreeDataPart::getRelativePathForPrefix(const String & prefix, bool detached) const +std::optional IMergeTreeDataPart::getRelativePathForPrefix(const String & prefix, bool detached, bool broken) const { + assert(!broken || detached); String res; /** If you need to detach a part, and directory into which we want to rename it already exists, @@ -1495,22 +1674,26 @@ String IMergeTreeDataPart::getRelativePathForPrefix(const String & prefix, bool if (detached && parent_part) throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot detach projection"); - return data_part_storage->getRelativePathForPrefix(storage.log, prefix, detached); + return getDataPartStorage().getRelativePathForPrefix(storage.log, prefix, detached, broken); } -String IMergeTreeDataPart::getRelativePathForDetachedPart(const String & prefix) const +std::optional IMergeTreeDataPart::getRelativePathForDetachedPart(const String & prefix, bool broken) const { /// Do not allow underscores in the prefix because they are used as separators. assert(prefix.find_first_of('_') == String::npos); assert(prefix.empty() || std::find(DetachedPartInfo::DETACH_REASONS.begin(), DetachedPartInfo::DETACH_REASONS.end(), prefix) != DetachedPartInfo::DETACH_REASONS.end()); - return "detached/" + getRelativePathForPrefix(prefix, /* detached */ true); + if (auto path = getRelativePathForPrefix(prefix, /* detached */ true, broken)) + return "detached/" + *path; + return {}; } -void IMergeTreeDataPart::renameToDetached(const String & prefix, DataPartStorageBuilderPtr builder) const +void IMergeTreeDataPart::renameToDetached(const String & prefix) { - renameTo(getRelativePathForDetachedPart(prefix), true, builder); + auto path_to_detach = getRelativePathForDetachedPart(prefix, /* broken */ false); + assert(path_to_detach); + renameTo(path_to_detach.value(), true); part_is_probably_removed_from_disk = true; } @@ -1522,26 +1705,33 @@ void IMergeTreeDataPart::makeCloneInDetached(const String & prefix, const Storag /// because hardlinks tracking doesn't work for detached parts. bool copy_instead_of_hardlink = isStoredOnRemoteDiskWithZeroCopySupport() && storage.supportsReplication() && storage_settings->allow_remote_fs_zero_copy_replication; - data_part_storage->freeze( + /// Avoid unneeded duplicates of broken parts if we try to detach the same broken part multiple times. + /// Otherwise it may pollute detached/ with dirs with _tryN suffix and we will fail to remove broken part after 10 attempts. + bool broken = !prefix.empty(); + auto maybe_path_in_detached = getRelativePathForDetachedPart(prefix, broken); + if (!maybe_path_in_detached) + return; + + getDataPartStorage().freeze( storage.relative_data_path, - getRelativePathForDetachedPart(prefix), + *maybe_path_in_detached, /*make_source_readonly*/ true, {}, copy_instead_of_hardlink, {}); } -DataPartStoragePtr IMergeTreeDataPart::makeCloneOnDisk(const DiskPtr & disk, const String & directory_name) const +MutableDataPartStoragePtr IMergeTreeDataPart::makeCloneOnDisk(const DiskPtr & disk, const String & directory_name) const { assertOnDisk(); - if (disk->getName() == data_part_storage->getDiskName()) - throw Exception("Can not clone data part " + name + " to same disk " + data_part_storage->getDiskName(), ErrorCodes::LOGICAL_ERROR); + if (disk->getName() == getDataPartStorage().getDiskName()) + throw Exception("Can not clone data part " + name + " to same disk " + getDataPartStorage().getDiskName(), ErrorCodes::LOGICAL_ERROR); if (directory_name.empty()) throw Exception("Can not clone data part " + name + " to empty directory.", ErrorCodes::LOGICAL_ERROR); String path_to_clone = fs::path(storage.relative_data_path) / directory_name / ""; - return data_part_storage->clone(path_to_clone, data_part_storage->getPartDirectory(), disk, storage.log); + return getDataPartStorage().clonePart(path_to_clone, getDataPartStorage().getPartDirectory(), disk, storage.log); } void IMergeTreeDataPart::checkConsistencyBase() const @@ -1582,26 +1772,26 @@ void IMergeTreeDataPart::checkConsistencyBase() const } } - data_part_storage->checkConsistency(checksums); + getDataPartStorage().checkConsistency(checksums); } else { auto check_file_not_empty = [this](const String & file_path) { UInt64 file_size; - if (!data_part_storage->exists(file_path) || (file_size = data_part_storage->getFileSize(file_path)) == 0) + if (!getDataPartStorage().exists(file_path) || (file_size = getDataPartStorage().getFileSize(file_path)) == 0) throw Exception( ErrorCodes::BAD_SIZE_OF_FILE_IN_DATA_PART, "Part {} is broken: {} is empty", - data_part_storage->getFullPath(), - std::string(fs::path(data_part_storage->getFullPath()) / file_path)); + getDataPartStorage().getFullPath(), + std::string(fs::path(getDataPartStorage().getFullPath()) / file_path)); return file_size; }; /// Check that the primary key index is not empty. if (!pk.column_names.empty()) { - String index_name = "primary" + getIndexExtensionFromFilesystem(data_part_storage).value(); + String index_name = "primary" + getIndexExtensionFromFilesystem(getDataPartStorage()).value(); check_file_not_empty(index_name); } @@ -1745,7 +1935,7 @@ bool IMergeTreeDataPart::checkAllTTLCalculated(const StorageMetadataPtr & metada String IMergeTreeDataPart::getUniqueId() const { - return data_part_storage->getUniqueId(); + return getDataPartStorage().getUniqueId(); } String IMergeTreeDataPart::getZeroLevelPartBlockID(std::string_view token) const @@ -1784,11 +1974,11 @@ IMergeTreeDataPart::uint128 IMergeTreeDataPart::getActualChecksumByFile(const St return it->second.file_hash; } - if (!data_part_storage->exists(file_name)) + if (!getDataPartStorage().exists(file_name)) { return {}; } - std::unique_ptr in_file = data_part_storage->readFile(file_name, {}, std::nullopt, std::nullopt); + std::unique_ptr in_file = getDataPartStorage().readFile(file_name, {}, std::nullopt, std::nullopt); HashingReadBuffer in_hash(*in_file); String value; @@ -1816,11 +2006,11 @@ bool isInMemoryPart(const MergeTreeDataPartPtr & data_part) return (data_part && data_part->getType() == MergeTreeDataPartType::InMemory); } -std::optional getIndexExtensionFromFilesystem(const DataPartStoragePtr & data_part_storage) +std::optional getIndexExtensionFromFilesystem(const IDataPartStorage & data_part_storage) { - if (data_part_storage->exists()) + if (data_part_storage.exists()) { - for (auto it = data_part_storage->iterate(); it->isValid(); it->next()) + for (auto it = data_part_storage.iterate(); it->isValid(); it->next()) { const auto & extension = fs::path(it->name()).extension(); if (extension == getIndexExtension(false) diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.h b/src/Storages/MergeTree/IMergeTreeDataPart.h index 32afa2a482d..6515eb1a65c 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.h +++ b/src/Storages/MergeTree/IMergeTreeDataPart.h @@ -1,5 +1,6 @@ #pragma once +#include "IO/WriteSettings.h" #include #include #include @@ -46,7 +47,7 @@ class UncompressedCache; class MergeTreeTransaction; /// Description of the data part. -class IMergeTreeDataPart : public std::enable_shared_from_this +class IMergeTreeDataPart : public std::enable_shared_from_this, public DataPartStorageHolder { public: static constexpr auto DATA_FILE_EXTENSION = ".bin"; @@ -67,19 +68,18 @@ public: using uint128 = IPartMetadataManager::uint128; - IMergeTreeDataPart( const MergeTreeData & storage_, const String & name_, const MergeTreePartInfo & info_, - const DataPartStoragePtr & data_part_storage_, + const MutableDataPartStoragePtr & data_part_storage_, Type part_type_, const IMergeTreeDataPart * parent_part_); IMergeTreeDataPart( const MergeTreeData & storage_, const String & name_, - const DataPartStoragePtr & data_part_storage_, + const MutableDataPartStoragePtr & data_part_storage_, Type part_type_, const IMergeTreeDataPart * parent_part_); @@ -94,13 +94,12 @@ public: const ReadBufferFromFileBase::ProfileCallback & profile_callback_) const = 0; virtual MergeTreeWriterPtr getWriter( - DataPartStorageBuilderPtr data_part_storage_builder, const NamesAndTypesList & columns_list, const StorageMetadataPtr & metadata_snapshot, const std::vector & indices_to_recalc, const CompressionCodecPtr & default_codec_, const MergeTreeWriterSettings & writer_settings, - const MergeTreeIndexGranularity & computed_index_granularity) const = 0; + const MergeTreeIndexGranularity & computed_index_granularity) = 0; virtual bool isStoredOnDisk() const = 0; @@ -152,7 +151,7 @@ public: /// Throws an exception if part is not stored in on-disk format. void assertOnDisk() const; - void remove() const; + void remove(); /// Initialize columns (from columns.txt if exists, or create from column files if not). /// Load checksums from checksums.txt if exists. Load index if required. @@ -200,10 +199,6 @@ public: /// processed by multiple shards. UUID uuid = UUIDHelpers::Nil; - /// This is an object which encapsulates all the operations with disk. - /// Contains a path to stored data. - DataPartStoragePtr data_part_storage; - MergeTreeIndexGranularityInfo index_granularity_info; size_t rows_count = 0; @@ -289,8 +284,8 @@ public: using WrittenFiles = std::vector>; - [[nodiscard]] WrittenFiles store(const MergeTreeData & data, const DataPartStorageBuilderPtr & data_part_storage_builder, Checksums & checksums) const; - [[nodiscard]] WrittenFiles store(const Names & column_names, const DataTypes & data_types, const DataPartStorageBuilderPtr & data_part_storage_builder, Checksums & checksums) const; + [[nodiscard]] WrittenFiles store(const MergeTreeData & data, IDataPartStorage & part_storage, Checksums & checksums) const; + [[nodiscard]] WrittenFiles store(const Names & column_names, const DataTypes & data_types, IDataPartStorage & part_storage, Checksums & checksums) const; void update(const Block & block, const Names & column_names); void merge(const MinMaxIndex & other); @@ -321,17 +316,17 @@ public: size_t getFileSizeOrZero(const String & file_name) const; /// Moves a part to detached/ directory and adds prefix to its name - void renameToDetached(const String & prefix, DataPartStorageBuilderPtr builder) const; + void renameToDetached(const String & prefix); /// Makes checks and move part to new directory /// Changes only relative_dir_name, you need to update other metadata (name, is_temp) explicitly - virtual void renameTo(const String & new_relative_path, bool remove_new_dir_if_exists, DataPartStorageBuilderPtr builder) const; + virtual void renameTo(const String & new_relative_path, bool remove_new_dir_if_exists); /// Makes clone of a part in detached/ directory via hard links virtual void makeCloneInDetached(const String & prefix, const StorageMetadataPtr & metadata_snapshot) const; /// Makes full clone of part in specified subdirectory (relative to storage data directory, e.g. "detached") on another disk - DataPartStoragePtr makeCloneOnDisk(const DiskPtr & disk, const String & directory_name) const; + MutableDataPartStoragePtr makeCloneOnDisk(const DiskPtr & disk, const String & directory_name) const; /// Checks that .bin and .mrk files exist. /// @@ -347,7 +342,7 @@ public: /// Calculate column and secondary indices sizes on disk. void calculateColumnsAndSecondaryIndicesSizesOnDisk(); - String getRelativePathForPrefix(const String & prefix, bool detached = false) const; + std::optional getRelativePathForPrefix(const String & prefix, bool detached = false, bool broken = false) const; bool isProjectionPart() const { return parent_part != nullptr; } @@ -445,6 +440,12 @@ public: /// True if here is lightweight deleted mask file in part. bool hasLightweightDelete() const { return columns.contains(LightweightDeleteDescription::FILTER_COLUMN.name); } + void writeChecksums(const MergeTreeDataPartChecksums & checksums_, const WriteSettings & settings); + + void writeDeleteOnDestroyMarker(); + void removeDeleteOnDestroyMarker(); + void removeVersionMetadata(); + protected: /// Total size of all columns, calculated once in calcuateColumnSizesOnDisk @@ -485,7 +486,7 @@ protected: /// disk using columns and checksums. virtual void calculateEachColumnSizes(ColumnSizeByName & each_columns_size, ColumnSize & total_size) const = 0; - String getRelativePathForDetachedPart(const String & prefix) const; + std::optional getRelativePathForDetachedPart(const String & prefix, bool broken) const; /// Checks that part can be actually removed from disk. /// In ordinary scenario always returns true, but in case of @@ -566,6 +567,12 @@ private: /// any specifial compression. void loadDefaultCompressionCodec(); + void writeColumns(const NamesAndTypesList & columns_, const WriteSettings & settings); + void writeVersionMetadata(const VersionMetadata & version_, bool fsync_part_dir) const; + + template + void writeMetadata(const String & filename, const WriteSettings & settings, Writer && writer); + static void appendFilesOfDefaultCompressionCodec(Strings & files); /// Found column without specific compression and return codec @@ -585,7 +592,7 @@ bool isCompactPart(const MergeTreeDataPartPtr & data_part); bool isWidePart(const MergeTreeDataPartPtr & data_part); bool isInMemoryPart(const MergeTreeDataPartPtr & data_part); inline String getIndexExtension(bool is_compressed_primary_key) { return is_compressed_primary_key ? ".cidx" : ".idx"; } -std::optional getIndexExtensionFromFilesystem(const DataPartStoragePtr & data_part_storage); +std::optional getIndexExtensionFromFilesystem(const IDataPartStorage & data_part_storage); bool isCompressedFromIndexExtension(const String & index_extension); } diff --git a/src/Storages/MergeTree/IMergeTreeDataPartInfoForReader.h b/src/Storages/MergeTree/IMergeTreeDataPartInfoForReader.h index 28f834d661d..2e4972c2788 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPartInfoForReader.h +++ b/src/Storages/MergeTree/IMergeTreeDataPartInfoForReader.h @@ -7,7 +7,8 @@ namespace DB { class IDataPartStorage; -using DataPartStoragePtr = std::shared_ptr; +using DataPartStoragePtr = std::shared_ptr; + class MergeTreeIndexGranularity; struct MergeTreeDataPartChecksums; struct MergeTreeIndexGranularityInfo; @@ -36,7 +37,7 @@ public: virtual bool isProjectionPart() const = 0; - virtual const DataPartStoragePtr & getDataPartStorage() const = 0; + virtual DataPartStoragePtr getDataPartStorage() const = 0; virtual const NamesAndTypesList & getColumns() const = 0; diff --git a/src/Storages/MergeTree/IMergeTreeDataPartWriter.cpp b/src/Storages/MergeTree/IMergeTreeDataPartWriter.cpp index 84d0b50ae2f..2488c63e309 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPartWriter.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPartWriter.cpp @@ -38,14 +38,12 @@ Block permuteBlockIfNeeded(const Block & block, const IColumn::Permutation * per } IMergeTreeDataPartWriter::IMergeTreeDataPartWriter( - const MergeTreeData::DataPartPtr & data_part_, - DataPartStorageBuilderPtr data_part_storage_builder_, + const MergeTreeMutableDataPartPtr & data_part_, const NamesAndTypesList & columns_list_, const StorageMetadataPtr & metadata_snapshot_, const MergeTreeWriterSettings & settings_, const MergeTreeIndexGranularity & index_granularity_) : data_part(data_part_) - , data_part_storage_builder(std::move(data_part_storage_builder_)) , storage(data_part_->storage) , metadata_snapshot(metadata_snapshot_) , columns_list(columns_list_) diff --git a/src/Storages/MergeTree/IMergeTreeDataPartWriter.h b/src/Storages/MergeTree/IMergeTreeDataPartWriter.h index 417e2713180..fa3c675f7da 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPartWriter.h +++ b/src/Storages/MergeTree/IMergeTreeDataPartWriter.h @@ -22,8 +22,7 @@ class IMergeTreeDataPartWriter : private boost::noncopyable { public: IMergeTreeDataPartWriter( - const MergeTreeData::DataPartPtr & data_part_, - DataPartStorageBuilderPtr data_part_storage_builder_, + const MergeTreeMutableDataPartPtr & data_part_, const NamesAndTypesList & columns_list_, const StorageMetadataPtr & metadata_snapshot_, const MergeTreeWriterSettings & settings_, @@ -42,8 +41,7 @@ public: protected: - const MergeTreeData::DataPartPtr data_part; - DataPartStorageBuilderPtr data_part_storage_builder; + const MergeTreeMutableDataPartPtr data_part; const MergeTreeData & storage; const StorageMetadataPtr metadata_snapshot; const NamesAndTypesList columns_list; diff --git a/src/Storages/MergeTree/IMergedBlockOutputStream.cpp b/src/Storages/MergeTree/IMergedBlockOutputStream.cpp index 5af9bbd3ed8..37da6014d1b 100644 --- a/src/Storages/MergeTree/IMergedBlockOutputStream.cpp +++ b/src/Storages/MergeTree/IMergedBlockOutputStream.cpp @@ -6,14 +6,13 @@ namespace DB { IMergedBlockOutputStream::IMergedBlockOutputStream( - DataPartStorageBuilderPtr data_part_storage_builder_, - const MergeTreeDataPartPtr & data_part, + const MergeTreeMutableDataPartPtr & data_part, const StorageMetadataPtr & metadata_snapshot_, const NamesAndTypesList & columns_list, bool reset_columns_) : storage(data_part->storage) , metadata_snapshot(metadata_snapshot_) - , data_part_storage_builder(std::move(data_part_storage_builder_)) + , data_part_storage(data_part->getDataPartStoragePtr()) , reset_columns(reset_columns_) { if (reset_columns) diff --git a/src/Storages/MergeTree/IMergedBlockOutputStream.h b/src/Storages/MergeTree/IMergedBlockOutputStream.h index dbcca1443b5..ca4e3899b29 100644 --- a/src/Storages/MergeTree/IMergedBlockOutputStream.h +++ b/src/Storages/MergeTree/IMergedBlockOutputStream.h @@ -1,5 +1,6 @@ #pragma once +#include "Storages/MergeTree/IDataPartStorage.h" #include #include #include @@ -12,8 +13,7 @@ class IMergedBlockOutputStream { public: IMergedBlockOutputStream( - DataPartStorageBuilderPtr data_part_storage_builder_, - const MergeTreeDataPartPtr & data_part, + const MergeTreeMutableDataPartPtr & data_part, const StorageMetadataPtr & metadata_snapshot_, const NamesAndTypesList & columns_list, bool reset_columns_); @@ -42,7 +42,7 @@ protected: const MergeTreeData & storage; StorageMetadataPtr metadata_snapshot; - DataPartStorageBuilderPtr data_part_storage_builder; + MutableDataPartStoragePtr data_part_storage; IMergeTreeDataPart::MergeTreeWriterPtr writer; bool reset_columns = false; diff --git a/src/Storages/MergeTree/KeyCondition.cpp b/src/Storages/MergeTree/KeyCondition.cpp index d7c33c8663b..99c14ede3e2 100644 --- a/src/Storages/MergeTree/KeyCondition.cpp +++ b/src/Storages/MergeTree/KeyCondition.cpp @@ -27,6 +27,7 @@ #include #include #include +#include #include #include @@ -86,6 +87,88 @@ String extractFixedPrefixFromLikePattern(const String & like_pattern) return fixed_prefix; } +/// for "^prefix..." string it returns "prefix" +static String extractFixedPrefixFromRegularExpression(const String & regexp) +{ + if (regexp.size() <= 1 || regexp[0] != '^') + return {}; + + String fixed_prefix; + const char * begin = regexp.data() + 1; + const char * pos = begin; + const char * end = regexp.data() + regexp.size(); + + while (pos != end) + { + switch (*pos) + { + case '\0': + pos = end; + break; + + case '\\': + { + ++pos; + if (pos == end) + break; + + switch (*pos) + { + case '|': + case '(': + case ')': + case '^': + case '$': + case '.': + case '[': + case '?': + case '*': + case '+': + case '{': + fixed_prefix += *pos; + break; + default: + /// all other escape sequences are not supported + pos = end; + break; + } + + ++pos; + break; + } + + /// non-trivial cases + case '|': + fixed_prefix.clear(); + [[fallthrough]]; + case '(': + case '[': + case '^': + case '$': + case '.': + case '+': + pos = end; + break; + + /// Quantifiers that allow a zero number of occurrences. + case '{': + case '?': + case '*': + if (!fixed_prefix.empty()) + fixed_prefix.pop_back(); + + pos = end; + break; + default: + fixed_prefix += *pos; + pos++; + break; + } + } + + return fixed_prefix; +} + /** For a given string, get a minimum string that is strictly greater than all strings with this prefix, * or return an empty string if there are no such strings. @@ -112,289 +195,6 @@ static String firstStringThatIsGreaterThanAllStringsWithPrefix(const String & pr return res; } -static void appendColumnNameWithoutAlias(const ActionsDAG::Node & node, WriteBuffer & out, bool legacy = false) -{ - switch (node.type) - { - case (ActionsDAG::ActionType::INPUT): - writeString(node.result_name, out); - break; - case (ActionsDAG::ActionType::COLUMN): - { - /// If it was created from ASTLiteral, then result_name can be an alias. - /// We need to convert value back to string here. - if (const auto * column_const = typeid_cast(node.column.get())) - writeString(applyVisitor(FieldVisitorToString(), column_const->getField()), out); - /// It may be possible that column is ColumnSet - else - writeString(node.result_name, out); - break; - } - case (ActionsDAG::ActionType::ALIAS): - appendColumnNameWithoutAlias(*node.children.front(), out, legacy); - break; - case (ActionsDAG::ActionType::ARRAY_JOIN): - writeCString("arrayJoin(", out); - appendColumnNameWithoutAlias(*node.children.front(), out, legacy); - writeChar(')', out); - break; - case (ActionsDAG::ActionType::FUNCTION): - { - auto name = node.function_base->getName(); - if (legacy && name == "modulo") - writeCString("moduleLegacy", out); - else - writeString(name, out); - - writeChar('(', out); - bool first = true; - for (const auto * arg : node.children) - { - if (!first) - writeCString(", ", out); - first = false; - - appendColumnNameWithoutAlias(*arg, out, legacy); - } - writeChar(')', out); - } - } -} - -static std::string getColumnNameWithoutAlias(const ActionsDAG::Node & node, bool legacy = false) -{ - WriteBufferFromOwnString out; - appendColumnNameWithoutAlias(node, out, legacy); - return std::move(out.str()); -} - -class KeyCondition::Tree -{ -public: - explicit Tree(const IAST * ast_) : ast(ast_) { assert(ast); } - explicit Tree(const ActionsDAG::Node * dag_) : dag(dag_) { assert(dag); } - - std::string getColumnName() const - { - if (ast) - return ast->getColumnNameWithoutAlias(); - else - return getColumnNameWithoutAlias(*dag); - } - - std::string getColumnNameLegacy() const - { - if (ast) - { - auto adjusted_ast = ast->clone(); - KeyDescription::moduloToModuloLegacyRecursive(adjusted_ast); - return adjusted_ast->getColumnNameWithoutAlias(); - } - else - return getColumnNameWithoutAlias(*dag, true); - } - - bool isFunction() const - { - if (ast) - return typeid_cast(ast); - else - return dag->type == ActionsDAG::ActionType::FUNCTION; - } - - bool isConstant() const - { - if (ast) - return typeid_cast(ast); - else - return dag->column && isColumnConst(*dag->column); - } - - ColumnWithTypeAndName getConstant() const - { - if (!isConstant()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "KeyCondition::Tree node is not a constant"); - - ColumnWithTypeAndName res; - - if (ast) - { - const auto * literal = assert_cast(ast); - res.type = applyVisitor(FieldToDataType(), literal->value); - res.column = res.type->createColumnConst(0, literal->value); - - } - else - { - res.type = dag->result_type; - res.column = dag->column; - } - - return res; - } - - bool tryGetConstant(const Block & block_with_constants, Field & out_value, DataTypePtr & out_type) const - { - if (ast) - { - // Constant expr should use alias names if any - String column_name = ast->getColumnName(); - - if (const auto * lit = ast->as()) - { - /// By default block_with_constants has only one column named "_dummy". - /// If block contains only constants it's may not be preprocessed by - // ExpressionAnalyzer, so try to look up in the default column. - if (!block_with_constants.has(column_name)) - column_name = "_dummy"; - - /// Simple literal - out_value = lit->value; - out_type = block_with_constants.getByName(column_name).type; - - /// If constant is not Null, we can assume it's type is not Nullable as well. - if (!out_value.isNull()) - out_type = removeNullable(out_type); - - return true; - } - else if (block_with_constants.has(column_name) && isColumnConst(*block_with_constants.getByName(column_name).column)) - { - /// An expression which is dependent on constants only - const auto & expr_info = block_with_constants.getByName(column_name); - out_value = (*expr_info.column)[0]; - out_type = expr_info.type; - - if (!out_value.isNull()) - out_type = removeNullable(out_type); - - return true; - } - } - else - { - if (dag->column && isColumnConst(*dag->column)) - { - out_value = (*dag->column)[0]; - out_type = dag->result_type; - - if (!out_value.isNull()) - out_type = removeNullable(out_type); - - return true; - } - } - - return false; - } - - ConstSetPtr tryGetPreparedSet( - const PreparedSetsPtr & sets, - const std::vector & indexes_mapping, - const DataTypes & data_types) const - { - if (sets && ast) - { - if (ast->as() || ast->as()) - return sets->get(PreparedSetKey::forSubquery(*ast)); - - /// We have `PreparedSetKey::forLiteral` but it is useless here as we don't have enough information - /// about types in left argument of the IN operator. Instead, we manually iterate through all the sets - /// and find the one for the right arg based on the AST structure (getTreeHash), after that we check - /// that the types it was prepared with are compatible with the types of the primary key. - auto types_match = [&indexes_mapping, &data_types](const SetPtr & candidate_set) - { - assert(indexes_mapping.size() == data_types.size()); - - for (size_t i = 0; i < indexes_mapping.size(); ++i) - { - if (!candidate_set->areTypesEqual(indexes_mapping[i].tuple_index, data_types[i])) - return false; - } - - return true; - }; - - for (const auto & set : sets->getByTreeHash(ast->getTreeHash())) - { - if (types_match(set)) - return set; - } - } - else if (dag->column) - { - const IColumn * col = dag->column.get(); - if (const auto * col_const = typeid_cast(col)) - col = &col_const->getDataColumn(); - - if (const auto * col_set = typeid_cast(col)) - { - auto set = col_set->getData(); - if (set->isCreated()) - return set; - } - } - - return nullptr; - } - - FunctionTree asFunction() const; - -protected: - const IAST * ast = nullptr; - const ActionsDAG::Node * dag = nullptr; -}; - -class KeyCondition::FunctionTree : public KeyCondition::Tree -{ -public: - std::string getFunctionName() const - { - if (ast) - return assert_cast(ast)->name; - else - return dag->function_base->getName(); - } - - size_t numArguments() const - { - if (ast) - { - const auto * func = assert_cast(ast); - return func->arguments ? func->arguments->children.size() : 0; - } - else - return dag->children.size(); - } - - Tree getArgumentAt(size_t idx) const - { - if (ast) - return Tree(assert_cast(ast)->arguments->children[idx].get()); - else - return Tree(dag->children[idx]); - } - -private: - using Tree::Tree; - - friend class Tree; -}; - - -KeyCondition::FunctionTree KeyCondition::Tree::asFunction() const -{ - if (!isFunction()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "KeyCondition::Tree node is not a function"); - - if (ast) - return KeyCondition::FunctionTree(ast); - else - return KeyCondition::FunctionTree(dag); -} - - -/// A dictionary containing actions to the corresponding functions to turn them into `RPNElement` const KeyCondition::AtomMap KeyCondition::atom_map { { @@ -581,6 +381,27 @@ const KeyCondition::AtomMap KeyCondition::atom_map return true; } }, + { + "match", + [] (RPNElement & out, const Field & value) + { + if (value.getType() != Field::Types::String) + return false; + + String prefix = extractFixedPrefixFromRegularExpression(value.get()); + if (prefix.empty()) + return false; + + String right_bound = firstStringThatIsGreaterThanAllStringsWithPrefix(prefix); + + out.function = RPNElement::FUNCTION_IN_RANGE; + out.range = !right_bound.empty() + ? Range(prefix, true, right_bound, false) + : Range::createLeftBounded(prefix, true); + + return true; + } + }, { "isNotNull", [] (RPNElement & out, const Field &) @@ -848,9 +669,11 @@ Block KeyCondition::getBlockWithConstants( { DataTypeUInt8().createColumnConstWithDefaultValue(1), std::make_shared(), "_dummy" } }; - const auto expr_for_constant_folding = ExpressionAnalyzer(query, syntax_analyzer_result, context).getConstActions(); - - expr_for_constant_folding->execute(result); + if (syntax_analyzer_result) + { + const auto expr_for_constant_folding = ExpressionAnalyzer(query, syntax_analyzer_result, context).getConstActions(); + expr_for_constant_folding->execute(result); + } return result; } @@ -867,16 +690,17 @@ static NameSet getAllSubexpressionNames(const ExpressionActions & key_expr) KeyCondition::KeyCondition( const ASTPtr & query, const ASTs & additional_filter_asts, - TreeRewriterResultPtr syntax_analyzer_result, - PreparedSetsPtr prepared_sets_, + Block block_with_constants, + PreparedSetsPtr prepared_sets, ContextPtr context, const Names & key_column_names, const ExpressionActionsPtr & key_expr_, + NameSet array_joined_column_names_, bool single_point_, bool strict_) : key_expr(key_expr_) , key_subexpr_names(getAllSubexpressionNames(*key_expr)) - , prepared_sets(prepared_sets_) + , array_joined_column_names(std::move(array_joined_column_names_)) , single_point(single_point_) , strict(strict_) { @@ -887,73 +711,64 @@ KeyCondition::KeyCondition( key_columns[name] = i; } - /** Evaluation of expressions that depend only on constants. - * For the index to be used, if it is written, for example `WHERE Date = toDate(now())`. - */ - Block block_with_constants = getBlockWithConstants(query, syntax_analyzer_result, context); + auto filter_node = buildFilterNode(query, additional_filter_asts); - for (const auto & [name, _] : syntax_analyzer_result->array_join_result_to_source) - array_joined_columns.insert(name); - - const ASTSelectQuery & select = query->as(); - - ASTs filters; - if (select.where()) - filters.push_back(select.where()); - - if (select.prewhere()) - filters.push_back(select.prewhere()); - - for (const auto & filter_ast : additional_filter_asts) - filters.push_back(filter_ast); - - if (!filters.empty()) - { - ASTPtr filter_query; - if (filters.size() == 1) - { - filter_query = filters.front(); - } - else - { - auto function = std::make_shared(); - - function->name = "and"; - function->arguments = std::make_shared(); - function->children.push_back(function->arguments); - function->arguments->children = std::move(filters); - - filter_query = function; - } - - /** When non-strictly monotonic functions are employed in functional index (e.g. ORDER BY toStartOfHour(dateTime)), - * the use of NOT operator in predicate will result in the indexing algorithm leave out some data. - * This is caused by rewriting in KeyCondition::tryParseAtomFromAST of relational operators to less strict - * when parsing the AST into internal RPN representation. - * To overcome the problem, before parsing the AST we transform it to its semantically equivalent form where all NOT's - * are pushed down and applied (when possible) to leaf nodes. - */ - auto ast = cloneASTWithInversionPushDown(filter_query); - traverseAST(Tree(ast.get()), context, block_with_constants); - } - else + if (!filter_node) { rpn.emplace_back(RPNElement::FUNCTION_UNKNOWN); + return; } + + /** When non-strictly monotonic functions are employed in functional index (e.g. ORDER BY toStartOfHour(dateTime)), + * the use of NOT operator in predicate will result in the indexing algorithm leave out some data. + * This is caused by rewriting in KeyCondition::tryParseAtomFromAST of relational operators to less strict + * when parsing the AST into internal RPN representation. + * To overcome the problem, before parsing the AST we transform it to its semantically equivalent form where all NOT's + * are pushed down and applied (when possible) to leaf nodes. + */ + auto inverted_filter_node = cloneASTWithInversionPushDown(filter_node); + + RPNBuilder builder( + inverted_filter_node, + std::move(context), + std::move(block_with_constants), + std::move(prepared_sets), + [&](const RPNBuilderTreeNode & node, RPNElement & out) { return extractAtomFromTree(node, out); }); + rpn = std::move(builder).extractRPN(); +} + +KeyCondition::KeyCondition( + const SelectQueryInfo & query_info, + ContextPtr context, + const Names & key_column_names, + const ExpressionActionsPtr & key_expr_, + bool single_point_, + bool strict_) + : KeyCondition( + query_info.query, + query_info.filter_asts, + KeyCondition::getBlockWithConstants(query_info.query, query_info.syntax_analyzer_result, context), + query_info.prepared_sets, + context, + key_column_names, + key_expr_, + query_info.syntax_analyzer_result->getArrayJoinSourceNameSet(), + single_point_, + strict_) +{ } KeyCondition::KeyCondition( ActionDAGNodes dag_nodes, - TreeRewriterResultPtr syntax_analyzer_result, - PreparedSetsPtr prepared_sets_, ContextPtr context, const Names & key_column_names, const ExpressionActionsPtr & key_expr_, + NameSet array_joined_column_names_, bool single_point_, bool strict_) : key_expr(key_expr_) , key_subexpr_names(getAllSubexpressionNames(*key_expr)) - , prepared_sets(prepared_sets_) + , array_joined_column_names(std::move(array_joined_column_names_)) , single_point(single_point_) , strict(strict_) { @@ -964,23 +779,23 @@ KeyCondition::KeyCondition( key_columns[name] = i; } - for (const auto & [name, _] : syntax_analyzer_result->array_join_result_to_source) - array_joined_columns.insert(name); - - if (!dag_nodes.nodes.empty()) - { - auto inverted_dag = cloneASTWithInversionPushDown(std::move(dag_nodes.nodes), context); - - // std::cerr << "========== inverted dag: " << inverted_dag->dumpDAG() << std::endl; - - Block empty; - for (const auto * node : inverted_dag->getOutputs()) - traverseAST(Tree(node), context, empty); - } - else + if (dag_nodes.nodes.empty()) { rpn.emplace_back(RPNElement::FUNCTION_UNKNOWN); + return; } + + auto inverted_dag = cloneASTWithInversionPushDown(std::move(dag_nodes.nodes), context); + assert(inverted_dag->getOutputs().size() == 1); + + const auto * inverted_dag_filter_node = inverted_dag->getOutputs()[0]; + + RPNBuilder builder(inverted_dag_filter_node, context, [&](const RPNBuilderTreeNode & node, RPNElement & out) + { + return extractAtomFromTree(node, out); + }); + + rpn = std::move(builder).extractRPN(); } bool KeyCondition::addCondition(const String & column, const Range & range) @@ -992,12 +807,12 @@ bool KeyCondition::addCondition(const String & column, const Range & range) return true; } -/** Computes value of constant expression and its data type. - * Returns false, if expression isn't constant. - */ bool KeyCondition::getConstant(const ASTPtr & expr, Block & block_with_constants, Field & out_value, DataTypePtr & out_type) { - return Tree(expr.get()).tryGetConstant(block_with_constants, out_value, out_type); + RPNBuilderTreeContext tree_context(nullptr, block_with_constants, nullptr); + RPNBuilderTreeNode node(expr.get(), tree_context); + + return node.tryGetConstant(out_value, out_type); } @@ -1081,39 +896,6 @@ static FieldRef applyFunction(const FunctionBasePtr & func, const DataTypePtr & return {field.columns, field.row_idx, result_idx}; } -void KeyCondition::traverseAST(const Tree & node, ContextPtr context, Block & block_with_constants) -{ - RPNElement element; - - if (node.isFunction()) - { - auto func = node.asFunction(); - if (tryParseLogicalOperatorFromAST(func, element)) - { - size_t num_args = func.numArguments(); - for (size_t i = 0; i < num_args; ++i) - { - traverseAST(func.getArgumentAt(i), context, block_with_constants); - - /** The first part of the condition is for the correct support of `and` and `or` functions of arbitrary arity - * - in this case `n - 1` elements are added (where `n` is the number of arguments). - */ - if (i != 0 || element.function == RPNElement::FUNCTION_NOT) - rpn.emplace_back(element); - } - - return; - } - } - - if (!tryParseAtomFromAST(node, context, block_with_constants, element)) - { - element.function = RPNElement::FUNCTION_UNKNOWN; - } - - rpn.emplace_back(std::move(element)); -} - /** The key functional expression constraint may be inferred from a plain column in the expression. * For example, if the key contains `toStartOfHour(Timestamp)` and query contains `WHERE Timestamp >= now()`, * it can be assumed that if `toStartOfHour()` is monotonic on [now(), inf), the `toStartOfHour(Timestamp) >= toStartOfHour(now())` @@ -1180,7 +962,8 @@ bool KeyCondition::transformConstantWithValidFunctions( if (is_valid_chain) { - auto const_type = cur_node->result_type; + out_type = removeLowCardinality(out_type); + auto const_type = removeLowCardinality(cur_node->result_type); auto const_column = out_type->createColumnConst(1, out_value); auto const_value = (*castColumnAccurateOrNull({const_column, out_type, ""}, const_type))[0]; @@ -1234,7 +1017,7 @@ bool KeyCondition::transformConstantWithValidFunctions( } bool KeyCondition::canConstantBeWrappedByMonotonicFunctions( - const Tree & node, + const RPNBuilderTreeNode & node, size_t & out_key_column_num, DataTypePtr & out_key_column_type, Field & out_value, @@ -1242,7 +1025,7 @@ bool KeyCondition::canConstantBeWrappedByMonotonicFunctions( { String expr_name = node.getColumnName(); - if (array_joined_columns.contains(expr_name)) + if (array_joined_column_names.contains(expr_name)) return false; if (!key_subexpr_names.contains(expr_name)) @@ -1269,11 +1052,15 @@ bool KeyCondition::canConstantBeWrappedByMonotonicFunctions( /// Looking for possible transformation of `column = constant` into `partition_expr = function(constant)` bool KeyCondition::canConstantBeWrappedByFunctions( - const Tree & node, size_t & out_key_column_num, DataTypePtr & out_key_column_type, Field & out_value, DataTypePtr & out_type) + const RPNBuilderTreeNode & node, + size_t & out_key_column_num, + DataTypePtr & out_key_column_type, + Field & out_value, + DataTypePtr & out_type) { String expr_name = node.getColumnName(); - if (array_joined_columns.contains(expr_name)) + if (array_joined_column_names.contains(expr_name)) return false; if (!key_subexpr_names.contains(expr_name)) @@ -1287,7 +1074,7 @@ bool KeyCondition::canConstantBeWrappedByFunctions( /// The case `f(modulo(...))` for totally monotonic `f ` is considered to be rare. /// /// Note: for negative values, we can filter more partitions then needed. - expr_name = node.getColumnNameLegacy(); + expr_name = node.getColumnNameWithModuloLegacy(); if (!key_subexpr_names.contains(expr_name)) return false; @@ -1304,8 +1091,7 @@ bool KeyCondition::canConstantBeWrappedByFunctions( } bool KeyCondition::tryPrepareSetIndex( - const FunctionTree & func, - ContextPtr context, + const RPNBuilderFunctionTreeNode & func, RPNElement & out, size_t & out_key_column_num) { @@ -1315,13 +1101,12 @@ bool KeyCondition::tryPrepareSetIndex( std::vector indexes_mapping; DataTypes data_types; - auto get_key_tuple_position_mapping = [&](const Tree & node, size_t tuple_index) + auto get_key_tuple_position_mapping = [&](const RPNBuilderTreeNode & node, size_t tuple_index) { MergeTreeSetIndex::KeyTuplePositionMapping index_mapping; index_mapping.tuple_index = tuple_index; DataTypePtr data_type; - if (isKeyPossiblyWrappedByMonotonicFunctions( - node, context, index_mapping.key_index, data_type, index_mapping.functions)) + if (isKeyPossiblyWrappedByMonotonicFunctions(node, index_mapping.key_index, data_type, index_mapping.functions)) { indexes_mapping.push_back(index_mapping); data_types.push_back(data_type); @@ -1335,25 +1120,29 @@ bool KeyCondition::tryPrepareSetIndex( { /// Note: in case of ActionsDAG, tuple may be a constant. /// In this case, there is no keys in tuple. So, we don't have to check it. - auto left_arg_tuple = left_arg.asFunction(); + auto left_arg_tuple = left_arg.toFunctionNode(); if (left_arg_tuple.getFunctionName() == "tuple") { - left_args_count = left_arg_tuple.numArguments(); + left_args_count = left_arg_tuple.getArgumentsSize(); for (size_t i = 0; i < left_args_count; ++i) get_key_tuple_position_mapping(left_arg_tuple.getArgumentAt(i), i); } else + { get_key_tuple_position_mapping(left_arg, 0); + } } else + { get_key_tuple_position_mapping(left_arg, 0); + } if (indexes_mapping.empty()) return false; const auto right_arg = func.getArgumentAt(1); - auto prepared_set = right_arg.tryGetPreparedSet(prepared_sets, indexes_mapping, data_types); + auto prepared_set = right_arg.tryGetPreparedSet(indexes_mapping, data_types); if (!prepared_set) return false; @@ -1407,6 +1196,7 @@ public: ColumnsWithTypeAndName new_arguments; new_arguments.reserve(arguments.size() + 1); new_arguments.push_back(const_arg); + new_arguments.front().column = new_arguments.front().column->cloneResized(input_rows_count); for (const auto & arg : arguments) new_arguments.push_back(arg); return func->prepare(new_arguments)->execute(new_arguments, result_type, input_rows_count, dry_run); @@ -1415,6 +1205,7 @@ public: { auto new_arguments = arguments; new_arguments.push_back(const_arg); + new_arguments.back().column = new_arguments.back().column->cloneResized(input_rows_count); return func->prepare(new_arguments)->execute(new_arguments, result_type, input_rows_count, dry_run); } else @@ -1445,13 +1236,12 @@ private: bool KeyCondition::isKeyPossiblyWrappedByMonotonicFunctions( - const Tree & node, - ContextPtr context, + const RPNBuilderTreeNode & node, size_t & out_key_column_num, DataTypePtr & out_key_res_column_type, MonotonicFunctionsChain & out_functions_chain) { - std::vector chain_not_tested_for_monotonicity; + std::vector chain_not_tested_for_monotonicity; DataTypePtr key_column_type; if (!isKeyPossiblyWrappedByMonotonicFunctionsImpl(node, out_key_column_num, key_column_type, chain_not_tested_for_monotonicity)) @@ -1460,17 +1250,17 @@ bool KeyCondition::isKeyPossiblyWrappedByMonotonicFunctions( for (auto it = chain_not_tested_for_monotonicity.rbegin(); it != chain_not_tested_for_monotonicity.rend(); ++it) { auto function = *it; - auto func_builder = FunctionFactory::instance().tryGet(function.getFunctionName(), context); + auto func_builder = FunctionFactory::instance().tryGet(function.getFunctionName(), node.getTreeContext().getQueryContext()); if (!func_builder) return false; ColumnsWithTypeAndName arguments; ColumnWithTypeAndName const_arg; FunctionWithOptionalConstArg::Kind kind = FunctionWithOptionalConstArg::Kind::NO_CONST; - if (function.numArguments() == 2) + if (function.getArgumentsSize() == 2) { if (function.getArgumentAt(0).isConstant()) { - const_arg = function.getArgumentAt(0).getConstant(); + const_arg = function.getArgumentAt(0).getConstantColumn(); arguments.push_back(const_arg); arguments.push_back({ nullptr, key_column_type, "" }); kind = FunctionWithOptionalConstArg::Kind::LEFT_CONST; @@ -1478,7 +1268,7 @@ bool KeyCondition::isKeyPossiblyWrappedByMonotonicFunctions( else if (function.getArgumentAt(1).isConstant()) { arguments.push_back({ nullptr, key_column_type, "" }); - const_arg = function.getArgumentAt(1).getConstant(); + const_arg = function.getArgumentAt(1).getConstantColumn(); arguments.push_back(const_arg); kind = FunctionWithOptionalConstArg::Kind::RIGHT_CONST; } @@ -1504,10 +1294,10 @@ bool KeyCondition::isKeyPossiblyWrappedByMonotonicFunctions( } bool KeyCondition::isKeyPossiblyWrappedByMonotonicFunctionsImpl( - const Tree & node, + const RPNBuilderTreeNode & node, size_t & out_key_column_num, DataTypePtr & out_key_column_type, - std::vector & out_functions_chain) + std::vector & out_functions_chain) { /** By itself, the key column can be a functional expression. for example, `intHash32(UserID)`. * Therefore, use the full name of the expression for search. @@ -1517,7 +1307,7 @@ bool KeyCondition::isKeyPossiblyWrappedByMonotonicFunctionsImpl( // Key columns should use canonical names for index analysis String name = node.getColumnName(); - if (array_joined_columns.contains(name)) + if (array_joined_column_names.contains(name)) return false; auto it = key_columns.find(name); @@ -1530,37 +1320,39 @@ bool KeyCondition::isKeyPossiblyWrappedByMonotonicFunctionsImpl( if (node.isFunction()) { - auto func = node.asFunction(); + auto function_node = node.toFunctionNode(); - size_t num_args = func.numArguments(); - if (num_args > 2 || num_args == 0) + size_t arguments_size = function_node.getArgumentsSize(); + if (arguments_size > 2 || arguments_size == 0) return false; - out_functions_chain.push_back(func); - bool ret = false; - if (num_args == 2) + out_functions_chain.push_back(function_node); + + bool result = false; + if (arguments_size == 2) { - if (func.getArgumentAt(0).isConstant()) + if (function_node.getArgumentAt(0).isConstant()) { - ret = isKeyPossiblyWrappedByMonotonicFunctionsImpl(func.getArgumentAt(1), out_key_column_num, out_key_column_type, out_functions_chain); + result = isKeyPossiblyWrappedByMonotonicFunctionsImpl(function_node.getArgumentAt(1), out_key_column_num, out_key_column_type, out_functions_chain); } - else if (func.getArgumentAt(1).isConstant()) + else if (function_node.getArgumentAt(1).isConstant()) { - ret = isKeyPossiblyWrappedByMonotonicFunctionsImpl(func.getArgumentAt(0), out_key_column_num, out_key_column_type, out_functions_chain); + result = isKeyPossiblyWrappedByMonotonicFunctionsImpl(function_node.getArgumentAt(0), out_key_column_num, out_key_column_type, out_functions_chain); } } else { - ret = isKeyPossiblyWrappedByMonotonicFunctionsImpl(func.getArgumentAt(0), out_key_column_num, out_key_column_type, out_functions_chain); + result = isKeyPossiblyWrappedByMonotonicFunctionsImpl(function_node.getArgumentAt(0), out_key_column_num, out_key_column_type, out_functions_chain); } - return ret; + + return result; } return false; } -static void castValueToType(const DataTypePtr & desired_type, Field & src_value, const DataTypePtr & src_type, const KeyCondition::Tree & node) +static void castValueToType(const DataTypePtr & desired_type, Field & src_value, const DataTypePtr & src_type, const String & node_column_name) { try { @@ -1570,13 +1362,13 @@ static void castValueToType(const DataTypePtr & desired_type, Field & src_value, { throw Exception("Key expression contains comparison between inconvertible types: " + desired_type->getName() + " and " + src_type->getName() + - " inside " + node.getColumnName(), + " inside " + node_column_name, ErrorCodes::BAD_TYPE_OF_FIELD); } } -bool KeyCondition::tryParseAtomFromAST(const Tree & node, ContextPtr context, Block & block_with_constants, RPNElement & out) +bool KeyCondition::extractAtomFromTree(const RPNBuilderTreeNode & node, RPNElement & out) { /** Functions < > = != <= >= in `notIn` isNull isNotNull, where one argument is a constant, and the other is one of columns of key, * or itself, wrapped in a chain of possibly-monotonic functions, @@ -1586,8 +1378,8 @@ bool KeyCondition::tryParseAtomFromAST(const Tree & node, ContextPtr context, Bl DataTypePtr const_type; if (node.isFunction()) { - auto func = node.asFunction(); - size_t num_args = func.numArguments(); + auto func = node.toFunctionNode(); + size_t num_args = func.getArgumentsSize(); DataTypePtr key_expr_type; /// Type of expression containing key column size_t key_column_num = -1; /// Number of a key column (inside key_column_names array) @@ -1599,7 +1391,7 @@ bool KeyCondition::tryParseAtomFromAST(const Tree & node, ContextPtr context, Bl if (num_args == 1) { - if (!(isKeyPossiblyWrappedByMonotonicFunctions(func.getArgumentAt(0), context, key_column_num, key_expr_type, chain))) + if (!(isKeyPossiblyWrappedByMonotonicFunctions(func.getArgumentAt(0), key_column_num, key_expr_type, chain))) return false; if (key_column_num == static_cast(-1)) @@ -1630,7 +1422,7 @@ bool KeyCondition::tryParseAtomFromAST(const Tree & node, ContextPtr context, Bl if (functionIsInOrGlobalInOperator(func_name)) { - if (tryPrepareSetIndex(func, context, out, key_column_num)) + if (tryPrepareSetIndex(func, out, key_column_num)) { key_arg_pos = 0; is_set_const = true; @@ -1638,7 +1430,7 @@ bool KeyCondition::tryParseAtomFromAST(const Tree & node, ContextPtr context, Bl else return false; } - else if (func.getArgumentAt(1).tryGetConstant(block_with_constants, const_value, const_type)) + else if (func.getArgumentAt(1).tryGetConstant(const_value, const_type)) { /// If the const operand is null, the atom will be always false if (const_value.isNull()) @@ -1647,7 +1439,7 @@ bool KeyCondition::tryParseAtomFromAST(const Tree & node, ContextPtr context, Bl return true; } - if (isKeyPossiblyWrappedByMonotonicFunctions(func.getArgumentAt(0), context, key_column_num, key_expr_type, chain)) + if (isKeyPossiblyWrappedByMonotonicFunctions(func.getArgumentAt(0), key_column_num, key_expr_type, chain)) { key_arg_pos = 0; } @@ -1668,7 +1460,7 @@ bool KeyCondition::tryParseAtomFromAST(const Tree & node, ContextPtr context, Bl else return false; } - else if (func.getArgumentAt(0).tryGetConstant(block_with_constants, const_value, const_type)) + else if (func.getArgumentAt(0).tryGetConstant(const_value, const_type)) { /// If the const operand is null, the atom will be always false if (const_value.isNull()) @@ -1677,7 +1469,7 @@ bool KeyCondition::tryParseAtomFromAST(const Tree & node, ContextPtr context, Bl return true; } - if (isKeyPossiblyWrappedByMonotonicFunctions(func.getArgumentAt(1), context, key_column_num, key_expr_type, chain)) + if (isKeyPossiblyWrappedByMonotonicFunctions(func.getArgumentAt(1), key_column_num, key_expr_type, chain)) { key_arg_pos = 1; } @@ -1718,7 +1510,7 @@ bool KeyCondition::tryParseAtomFromAST(const Tree & node, ContextPtr context, Bl else if (func_name == "in" || func_name == "notIn" || func_name == "like" || func_name == "notLike" || func_name == "ilike" || func_name == "notIlike" || - func_name == "startsWith") + func_name == "startsWith" || func_name == "match") { /// "const IN data_column" doesn't make sense (unlike "data_column IN const") return false; @@ -1757,7 +1549,7 @@ bool KeyCondition::tryParseAtomFromAST(const Tree & node, ContextPtr context, Bl if (!const_type->equals(*common_type)) { - castValueToType(common_type, const_value, const_type, node); + castValueToType(common_type, const_value, const_type, node.getColumnName()); // Need to set is_constant_transformed unless we're doing exact conversion if (!key_expr_type_not_null->equals(*common_type)) @@ -1802,7 +1594,7 @@ bool KeyCondition::tryParseAtomFromAST(const Tree & node, ContextPtr context, Bl return atom_it->second(out, const_value); } - else if (node.tryGetConstant(block_with_constants, const_value, const_type)) + else if (node.tryGetConstant(const_value, const_type)) { /// For cases where it says, for example, `WHERE 0 AND something` @@ -1825,32 +1617,6 @@ bool KeyCondition::tryParseAtomFromAST(const Tree & node, ContextPtr context, Bl return false; } -bool KeyCondition::tryParseLogicalOperatorFromAST(const FunctionTree & func, RPNElement & out) -{ - /// Functions AND, OR, NOT. - /// Also a special function `indexHint` - works as if instead of calling a function there are just parentheses - /// (or, the same thing - calling the function `and` from one argument). - - if (func.getFunctionName() == "not") - { - if (func.numArguments() != 1) - return false; - - out.function = RPNElement::FUNCTION_NOT; - } - else - { - if (func.getFunctionName() == "and" || func.getFunctionName() == "indexHint") - out.function = RPNElement::FUNCTION_AND; - else if (func.getFunctionName() == "or") - out.function = RPNElement::FUNCTION_OR; - else - return false; - } - - return true; -} - String KeyCondition::toString() const { String res; diff --git a/src/Storages/MergeTree/KeyCondition.h b/src/Storages/MergeTree/KeyCondition.h index d00a25a1077..fe1bffa9305 100644 --- a/src/Storages/MergeTree/KeyCondition.h +++ b/src/Storages/MergeTree/KeyCondition.h @@ -2,11 +2,16 @@ #include -#include #include -#include -#include +#include + +#include +#include +#include + +#include +#include namespace DB { @@ -205,45 +210,37 @@ public: class KeyCondition { public: - /// Does not take into account the SAMPLE section. all_columns - the set of all columns of the table. + /// Construct key condition from AST SELECT query WHERE, PREWHERE and additional filters KeyCondition( const ASTPtr & query, const ASTs & additional_filter_asts, - TreeRewriterResultPtr syntax_analyzer_result, + Block block_with_constants, PreparedSetsPtr prepared_sets_, ContextPtr context, const Names & key_column_names, const ExpressionActionsPtr & key_expr, + NameSet array_joined_column_names, bool single_point_ = false, bool strict_ = false); + /** Construct key condition from AST SELECT query WHERE, PREWHERE and additional filters. + * Select query, additional filters, prepared sets are initialized using query info. + */ KeyCondition( const SelectQueryInfo & query_info, ContextPtr context, const Names & key_column_names, const ExpressionActionsPtr & key_expr_, bool single_point_ = false, - bool strict_ = false) - : KeyCondition( - query_info.query, - query_info.filter_asts, - query_info.syntax_analyzer_result, - query_info.prepared_sets, - context, - key_column_names, - key_expr_, - single_point_, - strict_) - { - } + bool strict_ = false); + /// Construct key condition from ActionsDAG nodes KeyCondition( ActionDAGNodes dag_nodes, - TreeRewriterResultPtr syntax_analyzer_result, - PreparedSetsPtr prepared_sets_, ContextPtr context, const Names & key_column_names, const ExpressionActionsPtr & key_expr, + NameSet array_joined_column_names, bool single_point_ = false, bool strict_ = false); @@ -275,6 +272,7 @@ public: /// Checks that the index can not be used /// FUNCTION_UNKNOWN will be AND'ed (if any). bool alwaysUnknownOrTrue() const; + /// Checks that the index can not be used /// Does not allow any FUNCTION_UNKNOWN (will instantly return true). bool anyUnknownOrAlwaysTrue() const; @@ -313,10 +311,18 @@ public: * Returns false, if expression isn't constant. */ static bool getConstant( - const ASTPtr & expr, Block & block_with_constants, Field & out_value, DataTypePtr & out_type); + const ASTPtr & expr, + Block & block_with_constants, + Field & out_value, + DataTypePtr & out_type); + /** Calculate expressions, that depend only on constants. + * For index to work when something like "WHERE Date = toDate(now())" is written. + */ static Block getBlockWithConstants( - const ASTPtr & query, const TreeRewriterResultPtr & syntax_analyzer_result, ContextPtr context); + const ASTPtr & query, + const TreeRewriterResultPtr & syntax_analyzer_result, + ContextPtr context); static std::optional applyMonotonicFunctionsChainToRange( Range key_range, @@ -373,14 +379,11 @@ private: using RPN = std::vector; using ColumnIndices = std::map; - using AtomMap = std::unordered_map; public: + using AtomMap = std::unordered_map; static const AtomMap atom_map; - class Tree; - class FunctionTree; - private: BoolMask checkInRange( size_t used_key_size, @@ -390,9 +393,7 @@ private: bool right_bounded, BoolMask initial_mask) const; - void traverseAST(const Tree & node, ContextPtr context, Block & block_with_constants); - bool tryParseAtomFromAST(const Tree & node, ContextPtr context, Block & block_with_constants, RPNElement & out); - static bool tryParseLogicalOperatorFromAST(const FunctionTree & func, RPNElement & out); + bool extractAtomFromTree(const RPNBuilderTreeNode & node, RPNElement & out); /** Is node the key column * or expression in which column of key is wrapped by chain of functions, @@ -401,17 +402,16 @@ private: * and fills chain of possibly-monotonic functions. */ bool isKeyPossiblyWrappedByMonotonicFunctions( - const Tree & node, - ContextPtr context, + const RPNBuilderTreeNode & node, size_t & out_key_column_num, DataTypePtr & out_key_res_column_type, MonotonicFunctionsChain & out_functions_chain); bool isKeyPossiblyWrappedByMonotonicFunctionsImpl( - const Tree & node, + const RPNBuilderTreeNode & node, size_t & out_key_column_num, DataTypePtr & out_key_column_type, - std::vector & out_functions_chain); + std::vector & out_functions_chain); bool transformConstantWithValidFunctions( const String & expr_name, @@ -422,21 +422,24 @@ private: std::function always_monotonic) const; bool canConstantBeWrappedByMonotonicFunctions( - const Tree & node, + const RPNBuilderTreeNode & node, size_t & out_key_column_num, DataTypePtr & out_key_column_type, Field & out_value, DataTypePtr & out_type); bool canConstantBeWrappedByFunctions( - const Tree & node, size_t & out_key_column_num, DataTypePtr & out_key_column_type, Field & out_value, DataTypePtr & out_type); + const RPNBuilderTreeNode & node, + size_t & out_key_column_num, + DataTypePtr & out_key_column_type, + Field & out_value, + DataTypePtr & out_type); /// If it's possible to make an RPNElement /// that will filter values (possibly tuples) by the content of 'prepared_set', /// do it and return true. bool tryPrepareSetIndex( - const FunctionTree & func, - ContextPtr context, + const RPNBuilderFunctionTreeNode & func, RPNElement & out, size_t & out_key_column_num); @@ -472,11 +475,12 @@ private: /// All intermediate columns are used to calculate key_expr. const NameSet key_subexpr_names; - NameSet array_joined_columns; - PreparedSetsPtr prepared_sets; + /// Array joined column names + NameSet array_joined_column_names; // If true, always allow key_expr to be wrapped by function bool single_point; + // If true, do not use always_monotonic information to transform constants bool strict; }; diff --git a/src/Storages/MergeTree/LoadedMergeTreeDataPartInfoForReader.h b/src/Storages/MergeTree/LoadedMergeTreeDataPartInfoForReader.h index a16aaa728ae..bc786ec0428 100644 --- a/src/Storages/MergeTree/LoadedMergeTreeDataPartInfoForReader.h +++ b/src/Storages/MergeTree/LoadedMergeTreeDataPartInfoForReader.h @@ -12,7 +12,8 @@ public: explicit LoadedMergeTreeDataPartInfoForReader(MergeTreeData::DataPartPtr data_part_) : IMergeTreeDataPartInfoForReader(data_part_->storage.getContext()) , data_part(data_part_) - {} + { + } bool isCompactPart() const override { return DB::isCompactPart(data_part); } @@ -22,7 +23,7 @@ public: bool isProjectionPart() const override { return data_part->isProjectionPart(); } - const DataPartStoragePtr & getDataPartStorage() const override { return data_part->data_part_storage; } + DataPartStoragePtr getDataPartStorage() const override { return data_part->getDataPartStoragePtr(); } const NamesAndTypesList & getColumns() const override { return data_part->getColumns(); } diff --git a/src/Storages/MergeTree/MergeFromLogEntryTask.cpp b/src/Storages/MergeTree/MergeFromLogEntryTask.cpp index 18982c3bbf4..9a9b8a4a6bb 100644 --- a/src/Storages/MergeTree/MergeFromLogEntryTask.cpp +++ b/src/Storages/MergeTree/MergeFromLogEntryTask.cpp @@ -160,7 +160,9 @@ ReplicatedMergeMutateTaskBase::PrepareResult MergeFromLogEntryTask::prepare() for (auto & part_ptr : parts) { ttl_infos.update(part_ptr->ttl_infos); - max_volume_index = std::max(max_volume_index, part_ptr->data_part_storage->getVolumeIndex(*storage.getStoragePolicy())); + auto disk_name = part_ptr->getDataPartStorage().getDiskName(); + size_t volume_index = storage.getStoragePolicy()->getVolumeIndexByDiskName(disk_name); + max_volume_index = std::max(max_volume_index, volume_index); } /// It will live until the whole task is being destroyed @@ -294,12 +296,10 @@ ReplicatedMergeMutateTaskBase::PrepareResult MergeFromLogEntryTask::prepare() bool MergeFromLogEntryTask::finalize(ReplicatedMergeMutateTaskBase::PartLogWriter write_part_log) { part = merge_task->getFuture().get(); - auto builder = merge_task->getBuilder(); /// Task is not needed merge_task.reset(); - - storage.merger_mutator.renameMergedTemporaryPart(part, parts, NO_TRANSACTION_PTR, *transaction_ptr, builder); + storage.merger_mutator.renameMergedTemporaryPart(part, parts, NO_TRANSACTION_PTR, *transaction_ptr); try { diff --git a/src/Storages/MergeTree/MergeList.cpp b/src/Storages/MergeTree/MergeList.cpp index ebe826531d2..02e61a70eb6 100644 --- a/src/Storages/MergeTree/MergeList.cpp +++ b/src/Storages/MergeTree/MergeList.cpp @@ -65,7 +65,7 @@ MergeListElement::MergeListElement( for (const auto & source_part : future_part->parts) { source_part_names.emplace_back(source_part->name); - source_part_paths.emplace_back(source_part->data_part_storage->getFullPath()); + source_part_paths.emplace_back(source_part->getDataPartStorage().getFullPath()); total_size_bytes_compressed += source_part->getBytesOnDisk(); total_size_marks += source_part->getMarksCount(); diff --git a/src/Storages/MergeTree/MergePlainMergeTreeTask.cpp b/src/Storages/MergeTree/MergePlainMergeTreeTask.cpp index 0dcdd927e7b..cc5e87956a1 100644 --- a/src/Storages/MergeTree/MergePlainMergeTreeTask.cpp +++ b/src/Storages/MergeTree/MergePlainMergeTreeTask.cpp @@ -115,10 +115,9 @@ void MergePlainMergeTreeTask::prepare() void MergePlainMergeTreeTask::finish() { new_part = merge_task->getFuture().get(); - auto builder = merge_task->getBuilder(); MergeTreeData::Transaction transaction(storage, txn.get()); - storage.merger_mutator.renameMergedTemporaryPart(new_part, future_part->parts, txn, transaction, builder); + storage.merger_mutator.renameMergedTemporaryPart(new_part, future_part->parts, txn, transaction); transaction.commit(); write_part_log({}); diff --git a/src/Storages/MergeTree/MergePlainMergeTreeTask.h b/src/Storages/MergeTree/MergePlainMergeTreeTask.h index c08853f8e1b..d84db36bac2 100644 --- a/src/Storages/MergeTree/MergePlainMergeTreeTask.h +++ b/src/Storages/MergeTree/MergePlainMergeTreeTask.h @@ -66,7 +66,7 @@ private: StorageMetadataPtr metadata_snapshot; bool deduplicate; Names deduplicate_by_columns; - std::shared_ptr merge_mutate_entry{nullptr}; + MergeMutateSelectedEntryPtr merge_mutate_entry{nullptr}; TableLockHolder table_lock_holder; FutureMergedMutatedPartPtr future_part{nullptr}; MergeTreeData::MutableDataPartPtr new_part; diff --git a/src/Storages/MergeTree/MergeTask.cpp b/src/Storages/MergeTree/MergeTask.cpp index c247d2d2476..0b6fe23e961 100644 --- a/src/Storages/MergeTree/MergeTask.cpp +++ b/src/Storages/MergeTree/MergeTask.cpp @@ -1,3 +1,4 @@ +#include "Storages/MergeTree/IDataPartStorage.h" #include #include @@ -125,23 +126,26 @@ bool MergeTask::ExecuteAndFinalizeHorizontalPart::prepare() ctx->disk = global_ctx->space_reservation->getDisk(); String local_tmp_part_basename = local_tmp_prefix + global_ctx->future_part->name + local_tmp_suffix; + MutableDataPartStoragePtr data_part_storage; - if (global_ctx->parent_path_storage_builder) + if (global_ctx->parent_part) { - global_ctx->data_part_storage_builder = global_ctx->parent_path_storage_builder->getProjection(local_tmp_part_basename); + data_part_storage = global_ctx->parent_part->getDataPartStorage().getProjection(local_tmp_part_basename); } else { auto local_single_disk_volume = std::make_shared("volume_" + global_ctx->future_part->name, ctx->disk, 0); - global_ctx->data_part_storage_builder = std::make_shared( + data_part_storage = std::make_shared( local_single_disk_volume, global_ctx->data->relative_data_path, local_tmp_part_basename); + + data_part_storage->beginTransaction(); } - if (global_ctx->data_part_storage_builder->exists()) - throw Exception("Directory " + global_ctx->data_part_storage_builder->getFullPath() + " already exists", ErrorCodes::DIRECTORY_ALREADY_EXISTS); + if (data_part_storage->exists()) + throw Exception("Directory " + data_part_storage->getFullPath() + " already exists", ErrorCodes::DIRECTORY_ALREADY_EXISTS); if (!global_ctx->parent_part) global_ctx->temporary_directory_lock = global_ctx->data->getTemporaryPartDirectoryHolder(local_tmp_part_basename); @@ -149,7 +153,7 @@ bool MergeTask::ExecuteAndFinalizeHorizontalPart::prepare() global_ctx->all_column_names = global_ctx->metadata_snapshot->getColumns().getNamesOfPhysical(); global_ctx->storage_columns = global_ctx->metadata_snapshot->getColumns().getAllPhysical(); - auto object_columns = MergeTreeData::getObjectColumns(global_ctx->future_part->parts, global_ctx->metadata_snapshot->getColumns()); + auto object_columns = MergeTreeData::getConcreteObjectColumns(global_ctx->future_part->parts, global_ctx->metadata_snapshot->getColumns()); global_ctx->storage_snapshot = std::make_shared(*global_ctx->data, global_ctx->metadata_snapshot, object_columns); extendObjectColumns(global_ctx->storage_columns, object_columns, false); @@ -163,8 +167,6 @@ bool MergeTask::ExecuteAndFinalizeHorizontalPart::prepare() global_ctx->merging_columns, global_ctx->merging_column_names); - auto data_part_storage = global_ctx->data_part_storage_builder->getStorage(); - global_ctx->new_data_part = global_ctx->data->createPart( global_ctx->future_part->name, global_ctx->future_part->type, @@ -302,7 +304,6 @@ bool MergeTask::ExecuteAndFinalizeHorizontalPart::prepare() global_ctx->to = std::make_shared( global_ctx->new_data_part, - global_ctx->data_part_storage_builder, global_ctx->metadata_snapshot, global_ctx->merging_columns, MergeTreeIndexFactory::instance().getMany(global_ctx->metadata_snapshot->getSecondaryIndices()), @@ -501,7 +502,6 @@ void MergeTask::VerticalMergeStage::prepareVerticalMergeForOneColumn() const ctx->executor = std::make_unique(ctx->column_parts_pipeline); ctx->column_to = std::make_unique( - global_ctx->data_part_storage_builder, global_ctx->new_data_part, global_ctx->metadata_snapshot, ctx->executor->getHeader(), @@ -654,7 +654,6 @@ bool MergeTask::MergeProjectionsStage::mergeMinMaxIndexAndPrepareProjections() c global_ctx->deduplicate_by_columns, projection_merging_params, global_ctx->new_data_part.get(), - global_ctx->data_part_storage_builder.get(), ".proj", NO_TRANSACTION_PTR, global_ctx->data, diff --git a/src/Storages/MergeTree/MergeTask.h b/src/Storages/MergeTree/MergeTask.h index 43aba602052..6a29cdbb5ca 100644 --- a/src/Storages/MergeTree/MergeTask.h +++ b/src/Storages/MergeTree/MergeTask.h @@ -59,8 +59,7 @@ public: bool deduplicate_, Names deduplicate_by_columns_, MergeTreeData::MergingParams merging_params_, - const IMergeTreeDataPart * parent_part_, - const IDataPartStorageBuilder * parent_path_storage_builder_, + IMergeTreeDataPart * parent_part_, String suffix_, MergeTreeTransactionPtr txn, MergeTreeData * data_, @@ -82,7 +81,6 @@ public: global_ctx->deduplicate = std::move(deduplicate_); global_ctx->deduplicate_by_columns = std::move(deduplicate_by_columns_); global_ctx->parent_part = std::move(parent_part_); - global_ctx->parent_path_storage_builder = std::move(parent_path_storage_builder_); global_ctx->data = std::move(data_); global_ctx->mutator = std::move(mutator_); global_ctx->merges_blocker = std::move(merges_blocker_); @@ -102,11 +100,6 @@ public: return global_ctx->promise.get_future(); } - DataPartStorageBuilderPtr getBuilder() - { - return global_ctx->data_part_storage_builder; - } - bool execute(); private: @@ -141,8 +134,7 @@ private: StorageMetadataPtr metadata_snapshot{nullptr}; FutureMergedMutatedPartPtr future_part{nullptr}; /// This will be either nullptr or new_data_part, so raw pointer is ok. - const IMergeTreeDataPart * parent_part{nullptr}; - const IDataPartStorageBuilder * parent_path_storage_builder{nullptr}; + IMergeTreeDataPart * parent_part{nullptr}; ContextPtr context{nullptr}; time_t time_of_merge{0}; ReservationSharedPtr space_reservation{nullptr}; @@ -168,7 +160,6 @@ private: std::unique_ptr merging_executor; MergeTreeData::MutableDataPartPtr new_data_part{nullptr}; - DataPartStorageBuilderPtr data_part_storage_builder; /// If lightweight delete mask is present then some input rows are filtered out right after reading. std::shared_ptr> input_rows_filtered{std::make_shared>(0)}; diff --git a/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp b/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp index 475407a402b..b63e08b733d 100644 --- a/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp +++ b/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp @@ -607,7 +607,7 @@ Block MergeTreeBaseSelectProcessor::transformHeader( if (!row_level_column.type->canBeUsedInBooleanContext()) { throw Exception("Invalid type for filter in PREWHERE: " + row_level_column.type->getName(), - ErrorCodes::LOGICAL_ERROR); + ErrorCodes::ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER); } block.erase(prewhere_info->row_level_column_name); @@ -620,7 +620,7 @@ Block MergeTreeBaseSelectProcessor::transformHeader( if (!prewhere_column.type->canBeUsedInBooleanContext()) { throw Exception("Invalid type for filter in PREWHERE: " + prewhere_column.type->getName(), - ErrorCodes::LOGICAL_ERROR); + ErrorCodes::ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER); } if (prewhere_info->remove_prewhere_column) @@ -628,13 +628,13 @@ Block MergeTreeBaseSelectProcessor::transformHeader( else { WhichDataType which(removeNullable(recursiveRemoveLowCardinality(prewhere_column.type))); - if (which.isInt() || which.isUInt()) + if (which.isNativeInt() || which.isNativeUInt()) prewhere_column.column = prewhere_column.type->createColumnConst(block.rows(), 1u)->convertToFullColumnIfConst(); else if (which.isFloat()) prewhere_column.column = prewhere_column.type->createColumnConst(block.rows(), 1.0f)->convertToFullColumnIfConst(); else - throw Exception("Illegal type " + prewhere_column.type->getName() + " of column for filter.", - ErrorCodes::ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER); + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER, "Illegal type {} of column for filter", prewhere_column.type->getName()); } } diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 0ba434137dd..977133a8ad8 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -943,8 +943,8 @@ Int64 MergeTreeData::getMaxBlockNumber() const } void MergeTreeData::loadDataPartsFromDisk( - DataPartsVector & broken_parts_to_detach, - DataPartsVector & duplicate_parts_to_remove, + MutableDataPartsVector & broken_parts_to_detach, + MutableDataPartsVector & duplicate_parts_to_remove, ThreadPool & pool, size_t num_parts, std::queue>> & parts_queue, @@ -1082,7 +1082,6 @@ void MergeTreeData::loadDataPartsFromDisk( if (size_of_part.has_value()) part_size_str = formatReadableSizeWithBinarySuffix(*size_of_part); - LOG_ERROR(log, "Detaching broken part {}{} (size: {}). " "If it happened after update, it is likely because of backward incompatibility. " @@ -1200,8 +1199,7 @@ void MergeTreeData::loadDataPartsFromDisk( void MergeTreeData::loadDataPartsFromWAL( - DataPartsVector & /* broken_parts_to_detach */, - DataPartsVector & duplicate_parts_to_remove, + MutableDataPartsVector & duplicate_parts_to_remove, MutableDataPartsVector & parts_from_wal) { for (auto & part : parts_from_wal) @@ -1215,7 +1213,7 @@ void MergeTreeData::loadDataPartsFromWAL( { if ((*it)->checksums.getTotalChecksumHex() == part->checksums.getTotalChecksumHex()) { - LOG_ERROR(log, "Remove duplicate part {}", part->data_part_storage->getFullPath()); + LOG_ERROR(log, "Remove duplicate part {}", part->getDataPartStorage().getFullPath()); duplicate_parts_to_remove.push_back(part); } else @@ -1329,8 +1327,8 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks) auto part_lock = lockParts(); data_parts_indexes.clear(); - DataPartsVector broken_parts_to_detach; - DataPartsVector duplicate_parts_to_remove; + MutableDataPartsVector broken_parts_to_detach; + MutableDataPartsVector duplicate_parts_to_remove; if (num_parts > 0) loadDataPartsFromDisk( @@ -1384,7 +1382,7 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks) parts_from_wal.insert( parts_from_wal.end(), std::make_move_iterator(disk_wal_parts.begin()), std::make_move_iterator(disk_wal_parts.end())); - loadDataPartsFromWAL(broken_parts_to_detach, duplicate_parts_to_remove, parts_from_wal); + loadDataPartsFromWAL(duplicate_parts_to_remove, parts_from_wal); num_parts += parts_from_wal.size(); } @@ -1397,11 +1395,7 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks) } for (auto & part : broken_parts_to_detach) - { - auto builder = part->data_part_storage->getBuilder(); - part->renameToDetached("broken-on-start", builder); /// detached parts must not have '_' in prefixes - builder->commit(); - } + part->renameToDetached("broken-on-start"); /// detached parts must not have '_' in prefixes for (auto & part : duplicate_parts_to_remove) part->remove(); @@ -1689,6 +1683,15 @@ scope_guard MergeTreeData::getTemporaryPartDirectoryHolder(const String & part_d return [this, part_dir_name]() { temporary_parts.remove(part_dir_name); }; } +MergeTreeData::MutableDataPartPtr MergeTreeData::preparePartForRemoval(const DataPartPtr & part) +{ + auto state = part->getState(); + if (state != DataPartState::Deleting && state != DataPartState::DeleteOnDestroy) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Cannot remove part {}, because it has state: {}", part->name, magic_enum::enum_name(part->getState())); + + return std::const_pointer_cast(part); +} MergeTreeData::DataPartsVector MergeTreeData::grabOldParts(bool force) { @@ -1699,7 +1702,13 @@ MergeTreeData::DataPartsVector MergeTreeData::grabOldParts(bool force) if (!lock.try_lock()) return res; + /// Concurrent parts removal is disabled for "zero-copy replication" (a non-production feature), + /// because parts removal involves hard links and concurrent hard link operations don't work correctly + /// in the "zero-copy replication" (because it is a non-production feature). + /// Please don't use "zero-copy replication" (a non-production feature) in production. + /// It is not ready for production usage. Don't use it. bool need_remove_parts_in_order = supportsReplication() && getSettings()->allow_remote_fs_zero_copy_replication; + if (need_remove_parts_in_order) { bool has_zero_copy_disk = false; @@ -1858,7 +1867,7 @@ void MergeTreeData::flushAllInMemoryPartsIfNeeded() { if (auto part_in_memory = asInMemoryPart(part)) { - part_in_memory->flushToDisk(part_in_memory->data_part_storage->getPartDirectory(), metadata_snapshot); + part_in_memory->flushToDisk(part_in_memory->getDataPartStorage().getPartDirectory(), metadata_snapshot); } } } @@ -1914,9 +1923,19 @@ void MergeTreeData::clearPartsFromFilesystem(const DataPartsVector & parts, bool void MergeTreeData::clearPartsFromFilesystemImpl(const DataPartsVector & parts_to_remove, NameSet * part_names_succeed) { const auto settings = getSettings(); + bool has_zero_copy_parts = false; + if (supportsReplication() && settings->allow_remote_fs_zero_copy_replication) + { + has_zero_copy_parts = std::any_of( + parts_to_remove.begin(), parts_to_remove.end(), + [] (const auto & data_part) { return data_part->isStoredOnRemoteDiskWithZeroCopySupport(); } + ); + } + if (parts_to_remove.size() > 1 && settings->max_part_removal_threads > 1 - && parts_to_remove.size() > settings->concurrent_part_removal_threshold) + && parts_to_remove.size() > settings->concurrent_part_removal_threshold + && !has_zero_copy_parts) /// parts must be removed in order for zero-copy replication { /// Parallel parts removal. size_t num_threads = std::min(settings->max_part_removal_threads, parts_to_remove.size()); @@ -1932,7 +1951,7 @@ void MergeTreeData::clearPartsFromFilesystemImpl(const DataPartsVector & parts_t if (thread_group) CurrentThread::attachToIfDetached(thread_group); - part->remove(); + preparePartForRemoval(part)->remove(); if (part_names_succeed) { std::lock_guard lock(part_names_mutex); @@ -1948,7 +1967,7 @@ void MergeTreeData::clearPartsFromFilesystemImpl(const DataPartsVector & parts_t LOG_DEBUG(log, "Removing {} parts from filesystem: {}", parts_to_remove.size(), fmt::join(parts_to_remove, ", ")); for (const DataPartPtr & part : parts_to_remove) { - part->remove(); + preparePartForRemoval(part)->remove(); if (part_names_succeed) part_names_succeed->insert(part->name); } @@ -2128,11 +2147,14 @@ void MergeTreeData::rename(const String & new_table_path, const StorageID & new_ if (!getStorageID().hasUUID()) getContext()->dropCaches(); + /// TODO: remove const_cast for (const auto & part : data_parts_by_info) - part->data_part_storage->changeRootPath(relative_data_path, new_table_path); + { + auto & part_mutable = const_cast(*part); + part_mutable.getDataPartStorage().changeRootPath(relative_data_path, new_table_path); + } relative_data_path = new_table_path; - renameInMemory(new_table_id); } @@ -2150,7 +2172,12 @@ void MergeTreeData::dropAllData() auto lock = lockParts(); - DataPartsVector all_parts(data_parts_by_info.begin(), data_parts_by_info.end()); + DataPartsVector all_parts; + for (auto it = data_parts_by_info.begin(); it != data_parts_by_info.end(); ++it) + { + modifyPartState(it, DataPartState::Deleting); + all_parts.push_back(*it); + } { std::lock_guard wal_lock(write_ahead_log_mutex); @@ -2163,7 +2190,6 @@ void MergeTreeData::dropAllData() if (!getStorageID().hasUUID()) getContext()->dropCaches(); - /// Removing of each data part before recursive removal of directory is to speed-up removal, because there will be less number of syscalls. NameSet part_names_failed; try @@ -2173,6 +2199,7 @@ void MergeTreeData::dropAllData() LOG_TRACE(log, "dropAllData: removing all data parts from memory."); data_parts_indexes.clear(); + all_data_dropped = true; } catch (...) { @@ -2710,7 +2737,7 @@ MergeTreeDataPartType MergeTreeData::choosePartTypeOnDisk(size_t bytes_uncompres MergeTreeData::MutableDataPartPtr MergeTreeData::createPart(const String & name, MergeTreeDataPartType type, const MergeTreePartInfo & part_info, - const DataPartStoragePtr & data_part_storage, const IMergeTreeDataPart * parent_part) const + const MutableDataPartStoragePtr & data_part_storage, const IMergeTreeDataPart * parent_part) const { if (type == MergeTreeDataPartType::Compact) return std::make_shared(*this, name, part_info, data_part_storage, parent_part); @@ -2723,17 +2750,17 @@ MergeTreeData::MutableDataPartPtr MergeTreeData::createPart(const String & name, } MergeTreeData::MutableDataPartPtr MergeTreeData::createPart( - const String & name, const DataPartStoragePtr & data_part_storage, const IMergeTreeDataPart * parent_part) const + const String & name, const MutableDataPartStoragePtr & data_part_storage, const IMergeTreeDataPart * parent_part) const { return createPart(name, MergeTreePartInfo::fromPartName(name, format_version), data_part_storage, parent_part); } MergeTreeData::MutableDataPartPtr MergeTreeData::createPart( const String & name, const MergeTreePartInfo & part_info, - const DataPartStoragePtr & data_part_storage, const IMergeTreeDataPart * parent_part) const + const MutableDataPartStoragePtr & data_part_storage, const IMergeTreeDataPart * parent_part) const { MergeTreeDataPartType type; - auto mrk_ext = MergeTreeIndexGranularityInfo::getMarksExtensionFromFilesystem(data_part_storage); + auto mrk_ext = MergeTreeIndexGranularityInfo::getMarksExtensionFromFilesystem(*data_part_storage); if (mrk_ext) { @@ -2927,12 +2954,11 @@ MergeTreeData::DataPartsVector MergeTreeData::getActivePartsToReplace( bool MergeTreeData::renameTempPartAndAdd( MutableDataPartPtr & part, Transaction & out_transaction, - DataPartStorageBuilderPtr builder, DataPartsLock & lock) { DataPartsVector covered_parts; - if (!renameTempPartAndReplaceImpl(part, out_transaction, lock, builder, &covered_parts)) + if (!renameTempPartAndReplaceImpl(part, out_transaction, lock, &covered_parts)) return false; if (!covered_parts.empty()) @@ -2966,32 +2992,31 @@ void MergeTreeData::checkPartCanBeAddedToTable(MutableDataPartPtr & part, DataPa } } -void MergeTreeData::preparePartForCommit(MutableDataPartPtr & part, Transaction & out_transaction, DataPartStorageBuilderPtr builder) +void MergeTreeData::preparePartForCommit(MutableDataPartPtr & part, Transaction & out_transaction) { part->is_temp = false; part->setState(DataPartState::PreActive); assert([&]() { - String dir_name = fs::path(part->data_part_storage->getRelativePath()).filename(); + String dir_name = fs::path(part->getDataPartStorage().getRelativePath()).filename(); bool may_be_cleaned_up = dir_name.starts_with("tmp_") || dir_name.starts_with("tmp-fetch_"); return !may_be_cleaned_up || temporary_parts.contains(dir_name); }()); - part->renameTo(part->name, true, builder); + part->renameTo(part->name, true); data_parts_indexes.insert(part); - out_transaction.addPart(part, builder); + out_transaction.addPart(part); } bool MergeTreeData::renameTempPartAndReplaceImpl( MutableDataPartPtr & part, Transaction & out_transaction, DataPartsLock & lock, - DataPartStorageBuilderPtr builder, DataPartsVector * out_covered_parts) { - LOG_TRACE(log, "Renaming temporary part {} to {}.", part->data_part_storage->getPartDirectory(), part->name); + LOG_TRACE(log, "Renaming temporary part {} to {}.", part->getDataPartStorage().getPartDirectory(), part->name); if (&out_transaction.data != this) throw Exception("MergeTreeData::Transaction for one table cannot be used with another. It is a bug.", @@ -3013,7 +3038,7 @@ bool MergeTreeData::renameTempPartAndReplaceImpl( /// All checks are passed. Now we can rename the part on disk. /// So, we maintain invariant: if a non-temporary part in filesystem then it is in data_parts - preparePartForCommit(part, out_transaction, builder); + preparePartForCommit(part, out_transaction); if (out_covered_parts) { @@ -3029,21 +3054,19 @@ bool MergeTreeData::renameTempPartAndReplaceImpl( MergeTreeData::DataPartsVector MergeTreeData::renameTempPartAndReplaceUnlocked( MutableDataPartPtr & part, Transaction & out_transaction, - DataPartStorageBuilderPtr builder, DataPartsLock & lock) { DataPartsVector covered_parts; - renameTempPartAndReplaceImpl(part, out_transaction, lock, builder, &covered_parts); + renameTempPartAndReplaceImpl(part, out_transaction, lock, &covered_parts); return covered_parts; } MergeTreeData::DataPartsVector MergeTreeData::renameTempPartAndReplace( MutableDataPartPtr & part, - Transaction & out_transaction, - DataPartStorageBuilderPtr builder) + Transaction & out_transaction) { auto part_lock = lockParts(); - return renameTempPartAndReplaceUnlocked(part, out_transaction, builder, part_lock); + return renameTempPartAndReplaceUnlocked(part, out_transaction, part_lock); } void MergeTreeData::removePartsFromWorkingSet(MergeTreeTransaction * txn, const MergeTreeData::DataPartsVector & remove, bool clear_without_timeout, DataPartsLock & acquired_lock) @@ -3120,7 +3143,7 @@ void MergeTreeData::removePartsInRangeFromWorkingSet(MergeTreeTransaction * txn, removePartsInRangeFromWorkingSetAndGetPartsToRemoveFromZooKeeper(txn, drop_range, lock); } -MergeTreeData::DataPartsVector MergeTreeData::removePartsInRangeFromWorkingSetAndGetPartsToRemoveFromZooKeeper( +MergeTreeData::PartsToRemoveFromZooKeeper MergeTreeData::removePartsInRangeFromWorkingSetAndGetPartsToRemoveFromZooKeeper( MergeTreeTransaction * txn, const MergeTreePartInfo & drop_range, DataPartsLock & lock) { DataPartsVector parts_to_remove; @@ -3198,15 +3221,20 @@ MergeTreeData::DataPartsVector MergeTreeData::removePartsInRangeFromWorkingSetAn /// FIXME refactor removePartsFromWorkingSet(...), do not remove parts twice removePartsFromWorkingSet(txn, parts_to_remove, clear_without_timeout, lock); + /// Since we can return parts in Deleting state, we have to use a wrapper that restricts access to such parts. + PartsToRemoveFromZooKeeper parts_to_remove_from_zookeeper; + for (auto & part : parts_to_remove) + parts_to_remove_from_zookeeper.emplace_back(std::move(part)); + for (auto & part : inactive_parts_to_remove_immediately) { if (!drop_range.contains(part->info)) continue; part->remove_time.store(0, std::memory_order_relaxed); - parts_to_remove.push_back(std::move(part)); + parts_to_remove_from_zookeeper.emplace_back(std::move(part), /* was_active */ false); } - return parts_to_remove; + return parts_to_remove_from_zookeeper; } void MergeTreeData::restoreAndActivatePart(const DataPartPtr & part, DataPartsLock * acquired_lock) @@ -3224,20 +3252,23 @@ void MergeTreeData::outdateBrokenPartAndCloneToDetached(const DataPartPtr & part { auto metadata_snapshot = getInMemoryMetadataPtr(); if (prefix.empty()) - LOG_INFO(log, "Cloning part {} to {} and making it obsolete.", part_to_detach->data_part_storage->getPartDirectory(), part_to_detach->name); + LOG_INFO(log, "Cloning part {} to {} and making it obsolete.", part_to_detach->getDataPartStorage().getPartDirectory(), part_to_detach->name); else - LOG_INFO(log, "Cloning part {} to {}_{} and making it obsolete.", part_to_detach->data_part_storage->getPartDirectory(), prefix, part_to_detach->name); + LOG_INFO(log, "Cloning part {} to {}_{} and making it obsolete.", part_to_detach->getDataPartStorage().getPartDirectory(), prefix, part_to_detach->name); part_to_detach->makeCloneInDetached(prefix, metadata_snapshot); - removePartsFromWorkingSet(NO_TRANSACTION_RAW, {part_to_detach}, true); + + DataPartsLock lock = lockParts(); + if (part_to_detach->getState() == DataPartState::Active) + removePartsFromWorkingSet(NO_TRANSACTION_RAW, {part_to_detach}, true, &lock); } void MergeTreeData::forcefullyMovePartToDetachedAndRemoveFromMemory(const MergeTreeData::DataPartPtr & part_to_detach, const String & prefix, bool restore_covered) { if (prefix.empty()) - LOG_INFO(log, "Renaming {} to {} and forgetting it.", part_to_detach->data_part_storage->getPartDirectory(), part_to_detach->name); + LOG_INFO(log, "Renaming {} to {} and forgetting it.", part_to_detach->getDataPartStorage().getPartDirectory(), part_to_detach->name); else - LOG_INFO(log, "Renaming {} to {}_{} and forgetting it.", part_to_detach->data_part_storage->getPartDirectory(), prefix, part_to_detach->name); + LOG_INFO(log, "Renaming {} to {}_{} and forgetting it.", part_to_detach->getDataPartStorage().getPartDirectory(), prefix, part_to_detach->name); auto lock = lockParts(); bool removed_active_part = false; @@ -3260,11 +3291,7 @@ void MergeTreeData::forcefullyMovePartToDetachedAndRemoveFromMemory(const MergeT } modifyPartState(it_part, DataPartState::Deleting); - - auto builder = part->data_part_storage->getBuilder(); - part->renameToDetached(prefix, builder); - builder->commit(); - + preparePartForRemoval(part)->renameToDetached(prefix); data_parts_indexes.erase(it_part); if (restore_covered && part->info.level == 0) @@ -3418,7 +3445,7 @@ void MergeTreeData::tryRemovePartImmediately(DataPartPtr && part) try { - part_to_delete->remove(); + preparePartForRemoval(part_to_delete)->remove(); } catch (...) { @@ -3628,9 +3655,9 @@ void MergeTreeData::swapActivePart(MergeTreeData::DataPartPtr part_copy) /// when allow_remote_fs_zero_copy_replication turned on and off again original_active_part->force_keep_shared_data = false; - if (original_active_part->data_part_storage->supportZeroCopyReplication() && - part_copy->data_part_storage->supportZeroCopyReplication() && - original_active_part->data_part_storage->getUniqueId() == part_copy->data_part_storage->getUniqueId()) + if (original_active_part->getDataPartStorage().supportZeroCopyReplication() && + part_copy->getDataPartStorage().supportZeroCopyReplication() && + original_active_part->getDataPartStorage().getUniqueId() == part_copy->getDataPartStorage().getUniqueId()) { /// May be when several volumes use the same S3/HDFS storage original_active_part->force_keep_shared_data = true; @@ -3650,7 +3677,7 @@ void MergeTreeData::swapActivePart(MergeTreeData::DataPartPtr part_copy) /// All other locks are taken in StorageReplicatedMergeTree lockSharedData(*part_copy); - original_active_part->data_part_storage->writeDeleteOnDestroyMarker(log); + preparePartForRemoval(original_active_part)->writeDeleteOnDestroyMarker(); return; } } @@ -3784,9 +3811,9 @@ MergeTreeData::DataPartPtr MergeTreeData::getPartIfExists(const String & part_na static void loadPartAndFixMetadataImpl(MergeTreeData::MutableDataPartPtr part) { part->loadColumnsChecksumsIndexes(false, true); - part->modification_time = part->data_part_storage->getLastModified().epochTime(); - part->data_part_storage->removeDeleteOnDestroyMarker(); - part->data_part_storage->removeVersionMetadata(); + part->modification_time = part->getDataPartStorage().getLastModified().epochTime(); + part->removeDeleteOnDestroyMarker(); + part->removeVersionMetadata(); } void MergeTreeData::calculateColumnAndSecondaryIndexSizesImpl() @@ -3946,7 +3973,7 @@ void MergeTreeData::movePartitionToDisk(const ASTPtr & partition, const String & auto disk = getStoragePolicy()->getDiskByName(name); std::erase_if(parts, [&](auto part_ptr) { - return part_ptr->data_part_storage->getDiskName() == disk->getName(); + return part_ptr->getDataPartStorage().getDiskName() == disk->getName(); }); if (parts.empty()) @@ -3996,7 +4023,7 @@ void MergeTreeData::movePartitionToVolume(const ASTPtr & partition, const String { for (const auto & disk : volume->getDisks()) { - if (part_ptr->data_part_storage->getDiskName() == disk->getName()) + if (part_ptr->getDataPartStorage().getDiskName() == disk->getName()) { return true; } @@ -4193,7 +4220,7 @@ BackupEntries MergeTreeData::backupParts(const DataPartsVector & data_parts, con make_temporary_hard_links = false; hold_storage_and_part_ptrs = true; } - else if (supportsReplication() && part->data_part_storage->supportZeroCopyReplication() && getSettings()->allow_remote_fs_zero_copy_replication) + else if (supportsReplication() && part->getDataPartStorage().supportZeroCopyReplication() && getSettings()->allow_remote_fs_zero_copy_replication) { /// Hard links don't work correctly with zero copy replication. make_temporary_hard_links = false; @@ -4205,7 +4232,7 @@ BackupEntries MergeTreeData::backupParts(const DataPartsVector & data_parts, con table_lock = lockForShare(local_context->getCurrentQueryId(), local_context->getSettingsRef().lock_acquire_timeout); BackupEntries backup_entries_from_part; - part->data_part_storage->backup( + part->getDataPartStorage().backup( part->checksums, part->getFileNamesWithoutChecksums(), data_path_in_backup, @@ -4216,7 +4243,7 @@ BackupEntries MergeTreeData::backupParts(const DataPartsVector & data_parts, con auto projection_parts = part->getProjectionParts(); for (const auto & [projection_name, projection_part] : projection_parts) { - projection_part->data_part_storage->backup( + projection_part->getDataPartStorage().backup( projection_part->checksums, projection_part->getFileNamesWithoutChecksums(), fs::path{data_path_in_backup} / part->name, @@ -4892,22 +4919,16 @@ ReservationPtr MergeTreeData::reserveSpace(UInt64 expected_size, SpacePtr space) return checkAndReturnReservation(expected_size, std::move(reservation)); } -ReservationPtr MergeTreeData::reserveSpace(UInt64 expected_size, const DataPartStoragePtr & data_part_storage) +ReservationPtr MergeTreeData::reserveSpace(UInt64 expected_size, const IDataPartStorage & data_part_storage) { expected_size = std::max(RESERVATION_MIN_ESTIMATION_SIZE, expected_size); - return data_part_storage->reserve(expected_size); + return data_part_storage.reserve(expected_size); } -ReservationPtr MergeTreeData::reserveSpace(UInt64 expected_size, const DataPartStorageBuilderPtr & data_part_storage_builder) +ReservationPtr MergeTreeData::tryReserveSpace(UInt64 expected_size, const IDataPartStorage & data_part_storage) { expected_size = std::max(RESERVATION_MIN_ESTIMATION_SIZE, expected_size); - return data_part_storage_builder->reserve(expected_size); -} - -ReservationPtr MergeTreeData::tryReserveSpace(UInt64 expected_size, const DataPartStoragePtr & data_part_storage) -{ - expected_size = std::max(RESERVATION_MIN_ESTIMATION_SIZE, expected_size); - return data_part_storage->tryReserve(expected_size); + return data_part_storage.tryReserve(expected_size); } ReservationPtr MergeTreeData::tryReserveSpace(UInt64 expected_size, SpacePtr space) @@ -5044,7 +5065,7 @@ bool MergeTreeData::shouldPerformTTLMoveOnInsert(const SpacePtr & move_destinati if (move_destination->isDisk()) { auto disk = std::static_pointer_cast(move_destination); - if (auto volume = getStoragePolicy()->tryGetVolumeByDisk(disk)) + if (auto volume = getStoragePolicy()->tryGetVolumeByDiskName(disk->getName())) return volume->perform_ttl_move_on_insert; } return false; @@ -5056,11 +5077,11 @@ bool MergeTreeData::isPartInTTLDestination(const TTLDescription & ttl, const IMe if (ttl.destination_type == DataDestinationType::VOLUME) { for (const auto & disk : policy->getVolumeByName(ttl.destination_name)->getDisks()) - if (disk->getName() == part.data_part_storage->getDiskName()) + if (disk->getName() == part.getDataPartStorage().getDiskName()) return true; } else if (ttl.destination_type == DataDestinationType::DISK) - return policy->getDiskByName(ttl.destination_name)->getName() == part.data_part_storage->getDiskName(); + return policy->getDiskByName(ttl.destination_name)->getName() == part.getDataPartStorage().getDiskName(); return false; } @@ -5132,7 +5153,7 @@ void MergeTreeData::Transaction::rollbackPartsToTemporaryState() WriteBufferFromOwnString buf; buf << " Rollbacking parts state to temporary and removing from working set:"; for (const auto & part : precommitted_parts) - buf << " " << part->data_part_storage->getPartDirectory(); + buf << " " << part->getDataPartStorage().getPartDirectory(); buf << "."; LOG_DEBUG(data.log, "Undoing transaction.{}", buf.str()); @@ -5143,12 +5164,11 @@ void MergeTreeData::Transaction::rollbackPartsToTemporaryState() clear(); } -void MergeTreeData::Transaction::addPart(MutableDataPartPtr & part, DataPartStorageBuilderPtr builder) +void MergeTreeData::Transaction::addPart(MutableDataPartPtr & part) { precommitted_parts.insert(part); if (asInMemoryPart(part)) has_in_memory_parts = true; - part_builders.push_back(builder); } void MergeTreeData::Transaction::rollback() @@ -5158,13 +5178,31 @@ void MergeTreeData::Transaction::rollback() WriteBufferFromOwnString buf; buf << " Removing parts:"; for (const auto & part : precommitted_parts) - buf << " " << part->data_part_storage->getPartDirectory(); + buf << " " << part->getDataPartStorage().getPartDirectory(); buf << "."; LOG_DEBUG(data.log, "Undoing transaction.{}", buf.str()); - data.removePartsFromWorkingSet(txn, - DataPartsVector(precommitted_parts.begin(), precommitted_parts.end()), - /* clear_without_timeout = */ true); + auto lock = data.lockParts(); + + if (data.data_parts_indexes.empty()) + { + /// Table was dropped concurrently and all parts (including PreActive parts) were cleared, so there's nothing to rollback + if (!data.all_data_dropped) + { + Strings part_names; + for (const auto & part : precommitted_parts) + part_names.emplace_back(part->name); + throw Exception(ErrorCodes::LOGICAL_ERROR, "There are some PreActive parts ({}) to rollback, " + "but data parts set is empty and table {} was not dropped. It's a bug", + fmt::join(part_names, ", "), data.getStorageID().getNameForLogs()); + } + } + else + { + data.removePartsFromWorkingSet(txn, + DataPartsVector(precommitted_parts.begin(), precommitted_parts.end()), + /* clear_without_timeout = */ true, &lock); + } } clear(); @@ -5186,8 +5224,9 @@ MergeTreeData::DataPartsVector MergeTreeData::Transaction::commit(MergeTreeData: auto parts_lock = acquired_parts_lock ? MergeTreeData::DataPartsLock() : data.lockParts(); auto * owing_parts_lock = acquired_parts_lock ? acquired_parts_lock : &parts_lock; - for (auto & builder : part_builders) - builder->commit(); + for (const auto & part : precommitted_parts) + if (part->getDataPartStorage().hasActiveTransaction()) + part->getDataPartStorage().commitTransaction(); bool commit_to_wal = has_in_memory_parts && settings->in_memory_parts_enable_wal; if (txn || commit_to_wal) @@ -5196,7 +5235,7 @@ MergeTreeData::DataPartsVector MergeTreeData::Transaction::commit(MergeTreeData: if (commit_to_wal) wal = data.getWriteAheadLog(); - for (const DataPartPtr & part : precommitted_parts) + for (const auto & part : precommitted_parts) { if (txn) { @@ -5221,7 +5260,7 @@ MergeTreeData::DataPartsVector MergeTreeData::Transaction::commit(MergeTreeData: size_t reduce_rows = 0; size_t reduce_parts = 0; - for (const DataPartPtr & part : precommitted_parts) + for (const auto & part : precommitted_parts) { DataPartPtr covering_part; DataPartsVector covered_parts = data.getActivePartsToReplace(part->info, part->name, covering_part, *owing_parts_lock); @@ -5387,6 +5426,7 @@ static void selectBestProjection( auto projection_result_ptr = reader.estimateNumMarksToRead( projection_parts, + candidate.prewhere_info, candidate.required_columns, storage_snapshot->metadata, candidate.desc->metadata, @@ -5410,6 +5450,7 @@ static void selectBestProjection( { auto normal_result_ptr = reader.estimateNumMarksToRead( normal_parts, + query_info.prewhere_info, required_columns, storage_snapshot->metadata, storage_snapshot->metadata, @@ -5683,6 +5724,11 @@ std::optional MergeTreeData::getQueryProcessingStageWithAgg { const auto & metadata_snapshot = storage_snapshot->metadata; const auto & settings = query_context->getSettingsRef(); + + /// TODO: Analyzer syntax analyzer result + if (!query_info.syntax_analyzer_result) + return std::nullopt; + if (!settings.allow_experimental_projection_optimization || query_info.ignore_projections || query_info.is_projection_query || settings.aggregate_functions_null_for_empty /* projections don't work correctly with this setting */) return std::nullopt; @@ -5739,7 +5785,6 @@ std::optional MergeTreeData::getQueryProcessingStageWithAgg const auto & analysis_result = select.getAnalysisResult(); query_info.prepared_sets = select.getQueryAnalyzer()->getPreparedSets(); - query_info.prewhere_info = analysis_result.prewhere_info; const auto & before_where = analysis_result.before_where; const auto & where_column_name = analysis_result.where_column_name; @@ -6016,6 +6061,7 @@ std::optional MergeTreeData::getQueryProcessingStageWithAgg { auto normal_result_ptr = reader.estimateNumMarksToRead( normal_parts, + query_info.prewhere_info, analysis_result.required_columns, metadata_snapshot, metadata_snapshot, @@ -6048,6 +6094,7 @@ std::optional MergeTreeData::getQueryProcessingStageWithAgg { query_info.merge_tree_select_result_ptr = reader.estimateNumMarksToRead( parts, + query_info.prewhere_info, analysis_result.required_columns, metadata_snapshot, metadata_snapshot, @@ -6129,8 +6176,6 @@ std::optional MergeTreeData::getQueryProcessingStageWithAgg selected_candidate->aggregate_descriptions = select.getQueryAnalyzer()->aggregates(); } - /// Just in case, reset prewhere info calculated from projection. - query_info.prewhere_info.reset(); return *selected_candidate; } @@ -6208,7 +6253,7 @@ std::pair MergeTreeData::cloneAn bool does_storage_policy_allow_same_disk = false; for (const DiskPtr & disk : getStoragePolicy()->getDisks()) { - if (disk->getName() == src_part->data_part_storage->getDiskName()) + if (disk->getName() == src_part->getDataPartStorage().getDiskName()) { does_storage_policy_allow_same_disk = true; break; @@ -6218,7 +6263,7 @@ std::pair MergeTreeData::cloneAn throw Exception( ErrorCodes::BAD_ARGUMENTS, "Could not clone and load part {} because disk does not belong to storage policy", - quoteString(src_part->data_part_storage->getFullPath())); + quoteString(src_part->getDataPartStorage().getFullPath())); String dst_part_name = src_part->getNewName(dst_part_info); assert(!tmp_part_prefix.empty()); @@ -6226,15 +6271,14 @@ std::pair MergeTreeData::cloneAn auto temporary_directory_lock = getTemporaryPartDirectoryHolder(tmp_dst_part_name); /// Why it is needed if we only hardlink files? - auto reservation = src_part->data_part_storage->reserve(src_part->getBytesOnDisk()); - - auto src_part_storage = src_part->data_part_storage; + auto reservation = src_part->getDataPartStorage().reserve(src_part->getBytesOnDisk()); + auto src_part_storage = src_part->getDataPartStoragePtr(); /// If source part is in memory, flush it to disk and clone it already in on-disk format if (auto src_part_in_memory = asInMemoryPart(src_part)) { auto flushed_part_path = src_part_in_memory->getRelativePathForPrefix(tmp_part_prefix); - src_part_storage = src_part_in_memory->flushToDisk(flushed_part_path, metadata_snapshot); + src_part_storage = src_part_in_memory->flushToDisk(*flushed_part_path, metadata_snapshot); } String with_copy; @@ -6255,7 +6299,7 @@ std::pair MergeTreeData::cloneAn hardlinked_files->source_part_name = src_part->name; hardlinked_files->source_table_shared_id = src_part->storage.getTableSharedID(); - for (auto it = src_part->data_part_storage->iterate(); it->isValid(); it->next()) + for (auto it = src_part->getDataPartStorage().iterate(); it->isValid(); it->next()) { if (!files_to_copy_instead_of_hardlinks.contains(it->name()) && it->name() != IMergeTreeDataPart::DELETE_ON_DESTROY_MARKER_FILE_NAME @@ -6314,14 +6358,14 @@ Strings MergeTreeData::getDataPaths() const void MergeTreeData::reportBrokenPart(MergeTreeData::DataPartPtr & data_part) const { - if (data_part->data_part_storage && data_part->data_part_storage->isBroken()) + if (data_part->getDataPartStorage().isBroken()) { auto parts = getDataPartsForInternalUsage(); - LOG_WARNING(log, "Scanning parts to recover on broken disk {}@{}.", data_part->data_part_storage->getDiskName(), data_part->data_part_storage->getDiskPath()); + LOG_WARNING(log, "Scanning parts to recover on broken disk {}@{}.", data_part->getDataPartStorage().getDiskName(), data_part->getDataPartStorage().getDiskPath()); for (const auto & part : parts) { - if (part->data_part_storage && part->data_part_storage->getDiskName() == data_part->data_part_storage->getDiskName()) + if (part->getDataPartStorage().getDiskName() == data_part->getDataPartStorage().getDiskName()) broken_part_callback(part->name); } } @@ -6412,13 +6456,13 @@ PartitionCommandsResultInfo MergeTreeData::freezePartitionsByMatcher( LOG_DEBUG(log, "Freezing part {} snapshot will be placed at {}", part->name, backup_path); - auto data_part_storage = part->data_part_storage; + auto data_part_storage = part->getDataPartStoragePtr(); String src_part_path = data_part_storage->getRelativePath(); String backup_part_path = fs::path(backup_path) / relative_data_path; if (auto part_in_memory = asInMemoryPart(part)) { auto flushed_part_path = part_in_memory->getRelativePathForPrefix("tmp_freeze"); - data_part_storage = part_in_memory->flushToDisk(flushed_part_path, metadata_snapshot); + data_part_storage = part_in_memory->flushToDisk(*flushed_part_path, metadata_snapshot); } auto callback = [this, &part, &backup_part_path](const DiskPtr & disk) @@ -6426,12 +6470,12 @@ PartitionCommandsResultInfo MergeTreeData::freezePartitionsByMatcher( // Store metadata for replicated table. // Do nothing for non-replicated. - createAndStoreFreezeMetadata(disk, part, fs::path(backup_part_path) / part->data_part_storage->getPartDirectory()); + createAndStoreFreezeMetadata(disk, part, fs::path(backup_part_path) / part->getDataPartStorage().getPartDirectory()); }; auto new_storage = data_part_storage->freeze( backup_part_path, - part->data_part_storage->getPartDirectory(), + part->getDataPartStorage().getPartDirectory(), /*make_source_readonly*/ true, callback, /*copy_instead_of_hardlink*/ false, @@ -6553,8 +6597,8 @@ try if (result_part) { - part_log_elem.disk_name = result_part->data_part_storage->getDiskName(); - part_log_elem.path_on_disk = result_part->data_part_storage->getFullPath(); + part_log_elem.disk_name = result_part->getDataPartStorage().getDiskName(); + part_log_elem.path_on_disk = result_part->getDataPartStorage().getFullPath(); part_log_elem.bytes_compressed_on_disk = result_part->getBytesOnDisk(); part_log_elem.rows = result_part->rows_count; part_log_elem.part_type = result_part->getType(); @@ -6710,7 +6754,7 @@ bool MergeTreeData::moveParts(const CurrentlyMovingPartsTaggerPtr & moving_tagge for (const auto & moving_part : moving_tagger->parts_to_move) { Stopwatch stopwatch; - DataPartPtr cloned_part; + MutableDataPartPtr cloned_part; auto write_part_log = [&](const ExecutionStatus & execution_status) { @@ -6973,7 +7017,7 @@ ReservationPtr MergeTreeData::balancedReservation( if (part->isStoredOnDisk() && part->getBytesOnDisk() >= min_bytes_to_rebalance_partition_over_jbod && part_info.partition_id == part->info.partition_id) { - auto name = part->data_part_storage->getDiskName(); + auto name = part->getDataPartStorage().getDiskName(); auto it = disk_occupation.find(name); if (it != disk_occupation.end()) { @@ -7081,18 +7125,18 @@ ReservationPtr MergeTreeData::balancedReservation( return reserved_space; } -ColumnsDescription MergeTreeData::getObjectColumns( +ColumnsDescription MergeTreeData::getConcreteObjectColumns( const DataPartsVector & parts, const ColumnsDescription & storage_columns) { - return DB::getObjectColumns( + return DB::getConcreteObjectColumns( parts.begin(), parts.end(), storage_columns, [](const auto & part) -> const auto & { return part->getColumns(); }); } -ColumnsDescription MergeTreeData::getObjectColumns( +ColumnsDescription MergeTreeData::getConcreteObjectColumns( boost::iterator_range range, const ColumnsDescription & storage_columns) { - return DB::getObjectColumns( + return DB::getConcreteObjectColumns( range.begin(), range.end(), storage_columns, [](const auto & part) -> const auto & { return part->getColumns(); }); } @@ -7101,21 +7145,21 @@ void MergeTreeData::resetObjectColumnsFromActiveParts(const DataPartsLock & /*lo { auto metadata_snapshot = getInMemoryMetadataPtr(); const auto & columns = metadata_snapshot->getColumns(); - if (!hasObjectColumns(columns)) + if (!hasDynamicSubcolumns(columns)) return; auto range = getDataPartsStateRange(DataPartState::Active); - object_columns = getObjectColumns(range, columns); + object_columns = getConcreteObjectColumns(range, columns); } void MergeTreeData::updateObjectColumns(const DataPartPtr & part, const DataPartsLock & /*lock*/) { auto metadata_snapshot = getInMemoryMetadataPtr(); const auto & columns = metadata_snapshot->getColumns(); - if (!hasObjectColumns(columns)) + if (!hasDynamicSubcolumns(columns)) return; - DB::updateObjectColumns(object_columns, part->getColumns()); + DB::updateObjectColumns(object_columns, columns, part->getColumns()); } StorageSnapshotPtr MergeTreeData::getStorageSnapshot(const StorageMetadataPtr & metadata_snapshot, ContextPtr query_context) const diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index c4a5d66ccbe..8bd0fc1f280 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -214,6 +214,7 @@ public: }; using DataParts = std::set; + using MutableDataParts = std::set; using DataPartsVector = std::vector; using DataPartsLock = std::unique_lock; @@ -225,15 +226,15 @@ public: /// After this method setColumns must be called MutableDataPartPtr createPart(const String & name, MergeTreeDataPartType type, const MergeTreePartInfo & part_info, - const DataPartStoragePtr & data_part_storage, const IMergeTreeDataPart * parent_part = nullptr) const; + const MutableDataPartStoragePtr & data_part_storage, const IMergeTreeDataPart * parent_part = nullptr) const; /// Create part, that already exists on filesystem. /// After this methods 'loadColumnsChecksumsIndexes' must be called. MutableDataPartPtr createPart(const String & name, - const DataPartStoragePtr & data_part_storage, const IMergeTreeDataPart * parent_part = nullptr) const; + const MutableDataPartStoragePtr & data_part_storage, const IMergeTreeDataPart * parent_part = nullptr) const; MutableDataPartPtr createPart(const String & name, const MergeTreePartInfo & part_info, - const DataPartStoragePtr & data_part_storage, const IMergeTreeDataPart * parent_part = nullptr) const; + const MutableDataPartStoragePtr & data_part_storage, const IMergeTreeDataPart * parent_part = nullptr) const; /// Auxiliary object to add a set of parts into the working set in two steps: /// * First, as PreActive parts (the parts are ready, but not yet in the active set). @@ -247,7 +248,7 @@ public: DataPartsVector commit(MergeTreeData::DataPartsLock * acquired_parts_lock = nullptr); - void addPart(MutableDataPartPtr & part, DataPartStorageBuilderPtr builder); + void addPart(MutableDataPartPtr & part); void rollback(); @@ -275,9 +276,8 @@ public: MergeTreeData & data; MergeTreeTransaction * txn; - DataParts precommitted_parts; - std::vector part_builders; - DataParts locked_parts; + MutableDataParts precommitted_parts; + MutableDataParts locked_parts; bool has_in_memory_parts = false; void clear(); @@ -414,9 +414,8 @@ public: SelectQueryInfo & info) const override; ReservationPtr reserveSpace(UInt64 expected_size, VolumePtr & volume) const; - static ReservationPtr tryReserveSpace(UInt64 expected_size, const DataPartStoragePtr & data_part_storage); - static ReservationPtr reserveSpace(UInt64 expected_size, const DataPartStoragePtr & data_part_storage); - static ReservationPtr reserveSpace(UInt64 expected_size, const DataPartStorageBuilderPtr & data_part_storage_builder); + static ReservationPtr tryReserveSpace(UInt64 expected_size, const IDataPartStorage & data_part_storage); + static ReservationPtr reserveSpace(UInt64 expected_size, const IDataPartStorage & data_part_storage); static bool partsContainSameProjections(const DataPartPtr & left, const DataPartPtr & right); @@ -555,21 +554,18 @@ public: bool renameTempPartAndAdd( MutableDataPartPtr & part, Transaction & transaction, - DataPartStorageBuilderPtr builder, DataPartsLock & lock); /// The same as renameTempPartAndAdd but the block range of the part can contain existing parts. /// Returns all parts covered by the added part (in ascending order). DataPartsVector renameTempPartAndReplace( MutableDataPartPtr & part, - Transaction & out_transaction, - DataPartStorageBuilderPtr builder); + Transaction & out_transaction); /// Unlocked version of previous one. Useful when added multiple parts with a single lock. DataPartsVector renameTempPartAndReplaceUnlocked( MutableDataPartPtr & part, Transaction & out_transaction, - DataPartStorageBuilderPtr builder, DataPartsLock & lock); /// Remove parts from working set immediately (without wait for background @@ -588,10 +584,33 @@ public: /// Used in REPLACE PARTITION command. void removePartsInRangeFromWorkingSet(MergeTreeTransaction * txn, const MergeTreePartInfo & drop_range, DataPartsLock & lock); + /// This wrapper is required to restrict access to parts in Deleting state + class PartToRemoveFromZooKeeper + { + DataPartPtr part; + bool was_active; + + public: + explicit PartToRemoveFromZooKeeper(DataPartPtr && part_, bool was_active_ = true) + : part(std::move(part_)), was_active(was_active_) + { + } + + /// It's safe to get name of any part + const String & getPartName() const { return part->name; } + + DataPartPtr getPartIfItWasActive() const + { + return was_active ? part : nullptr; + } + }; + + using PartsToRemoveFromZooKeeper = std::vector; + /// Same as above, but also returns list of parts to remove from ZooKeeper. /// It includes parts that have been just removed by these method /// and Outdated parts covered by drop_range that were removed earlier for any reason. - DataPartsVector removePartsInRangeFromWorkingSetAndGetPartsToRemoveFromZooKeeper( + PartsToRemoveFromZooKeeper removePartsInRangeFromWorkingSetAndGetPartsToRemoveFromZooKeeper( MergeTreeTransaction * txn, const MergeTreePartInfo & drop_range, DataPartsLock & lock); /// Restores Outdated part and adds it to working set @@ -644,6 +663,9 @@ public: /// Deletes the data directory and flushes the uncompressed blocks cache and the marks cache. void dropAllData(); + /// This flag is for hardening and assertions. + bool all_data_dropped = false; + /// Drop data directories if they are empty. It is safe to call this method if table creation was unsuccessful. void dropIfEmpty(); @@ -757,10 +779,10 @@ public: return column_sizes; } - const ColumnsDescription & getObjectColumns() const { return object_columns; } + const ColumnsDescription & getConcreteObjectColumns() const { return object_columns; } /// Creates description of columns of data type Object from the range of data parts. - static ColumnsDescription getObjectColumns( + static ColumnsDescription getConcreteObjectColumns( const DataPartsVector & parts, const ColumnsDescription & storage_columns); IndexSizeByName getSecondaryIndexSizes() const override @@ -979,7 +1001,7 @@ public: /// Fetch part only if some replica has it on shared storage like S3 /// Overridden in StorageReplicatedMergeTree - virtual DataPartStoragePtr tryToFetchIfShared(const IMergeTreeDataPart &, const DiskPtr &, const String &) { return nullptr; } + virtual MutableDataPartStoragePtr tryToFetchIfShared(const IMergeTreeDataPart &, const DiskPtr &, const String &) { return nullptr; } /// Check shared data usage on other replicas for detached/freezed part /// Remove local files and remote files if needed @@ -1129,7 +1151,7 @@ protected: } /// Creates description of columns of data type Object from the range of data parts. - static ColumnsDescription getObjectColumns( + static ColumnsDescription getConcreteObjectColumns( boost::iterator_range range, const ColumnsDescription & storage_columns); std::optional totalRowsByPartitionPredicateImpl( @@ -1264,13 +1286,12 @@ protected: static void incrementMergedPartsProfileEvent(MergeTreeDataPartType type); private: - /// Checking that candidate part doesn't break invariants: correct partition and doesn't exist already void checkPartCanBeAddedToTable(MutableDataPartPtr & part, DataPartsLock & lock) const; /// Preparing itself to be committed in memory: fill some fields inside part, add it to data_parts_indexes /// in precommitted state and to transaction - void preparePartForCommit(MutableDataPartPtr & part, Transaction & out_transaction, DataPartStorageBuilderPtr builder); + void preparePartForCommit(MutableDataPartPtr & part, Transaction & out_transaction); /// Low-level method for preparing parts for commit (in-memory). /// FIXME Merge MergeTreeTransaction and Transaction @@ -1278,7 +1299,6 @@ private: MutableDataPartPtr & part, Transaction & out_transaction, DataPartsLock & lock, - DataPartStorageBuilderPtr builder, DataPartsVector * out_covered_parts); /// RAII Wrapper for atomic work with currently moving parts @@ -1334,8 +1354,8 @@ private: virtual std::unique_ptr getDefaultSettings() const = 0; void loadDataPartsFromDisk( - DataPartsVector & broken_parts_to_detach, - DataPartsVector & duplicate_parts_to_remove, + MutableDataPartsVector & broken_parts_to_detach, + MutableDataPartsVector & duplicate_parts_to_remove, ThreadPool & pool, size_t num_parts, std::queue>> & parts_queue, @@ -1343,8 +1363,7 @@ private: const MergeTreeSettingsPtr & settings); void loadDataPartsFromWAL( - DataPartsVector & broken_parts_to_detach, - DataPartsVector & duplicate_parts_to_remove, + MutableDataPartsVector & duplicate_parts_to_remove, MutableDataPartsVector & parts_from_wal); /// Create zero-copy exclusive lock for part and disk. Useful for coordination of @@ -1356,6 +1375,8 @@ private: /// Otherwise, in non-parallel case will break and return. void clearPartsFromFilesystemImpl(const DataPartsVector & parts, NameSet * part_names_succeed); + static MutableDataPartPtr preparePartForRemoval(const DataPartPtr & part); + TemporaryParts temporary_parts; }; diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp index 1a5c94a2e26..fcc1b4cb3e2 100644 --- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp +++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp @@ -214,6 +214,14 @@ SelectPartsDecision MergeTreeDataMergerMutator::selectPartsToMerge( /// Previous part only in boundaries of partition frame const MergeTreeData::DataPartPtr * prev_part = nullptr; + /// collect min_age for each partition while iterating parts + struct PartitionInfo + { + time_t min_age{std::numeric_limits::max()}; + }; + + std::unordered_map partitions_info; + size_t parts_selected_precondition = 0; for (const MergeTreeData::DataPartPtr & part : data_parts) { @@ -277,6 +285,9 @@ SelectPartsDecision MergeTreeDataMergerMutator::selectPartsToMerge( part_info.compression_codec_desc = part->default_codec->getFullCodecDesc(); part_info.shall_participate_in_merges = has_volumes_with_disabled_merges ? part->shallParticipateInMerges(storage_policy) : true; + auto & partition_info = partitions_info[partition_id]; + partition_info.min_age = std::min(partition_info.min_age, part_info.age); + ++parts_selected_precondition; parts_ranges.back().emplace_back(part_info); @@ -333,6 +344,8 @@ SelectPartsDecision MergeTreeDataMergerMutator::selectPartsToMerge( SimpleMergeSelector::Settings merge_settings; /// Override value from table settings merge_settings.max_parts_to_merge_at_once = data_settings->max_parts_to_merge_at_once; + if (!data_settings->min_age_to_force_merge_on_partition_only) + merge_settings.min_age_to_force_merge = data_settings->min_age_to_force_merge_seconds; if (aggressive) merge_settings.base = 1; @@ -346,6 +359,20 @@ SelectPartsDecision MergeTreeDataMergerMutator::selectPartsToMerge( if (parts_to_merge.empty()) { + if (data_settings->min_age_to_force_merge_on_partition_only && data_settings->min_age_to_force_merge_seconds) + { + auto best_partition_it = std::max_element( + partitions_info.begin(), + partitions_info.end(), + [](const auto & e1, const auto & e2) { return e1.second.min_age < e2.second.min_age; }); + + assert(best_partition_it != partitions_info.end()); + + if (static_cast(best_partition_it->second.min_age) >= data_settings->min_age_to_force_merge_seconds) + return selectAllPartsToMergeWithinPartition( + future_part, can_merge_callback, best_partition_it->first, true, metadata_snapshot, txn, out_disable_reason); + } + if (out_disable_reason) *out_disable_reason = "There is no need to merge parts according to merge selector algorithm"; return SelectPartsDecision::CANNOT_SELECT; @@ -482,8 +509,7 @@ MergeTaskPtr MergeTreeDataMergerMutator::mergePartsToTemporaryPart( const Names & deduplicate_by_columns, const MergeTreeData::MergingParams & merging_params, const MergeTreeTransactionPtr & txn, - const IMergeTreeDataPart * parent_part, - const IDataPartStorageBuilder * parent_path_storage_builder, + IMergeTreeDataPart * parent_part, const String & suffix) { return std::make_shared( @@ -498,7 +524,6 @@ MergeTaskPtr MergeTreeDataMergerMutator::mergePartsToTemporaryPart( deduplicate_by_columns, merging_params, parent_part, - parent_path_storage_builder, suffix, txn, &data, @@ -540,8 +565,7 @@ MergeTreeData::DataPartPtr MergeTreeDataMergerMutator::renameMergedTemporaryPart MergeTreeData::MutableDataPartPtr & new_data_part, const MergeTreeData::DataPartsVector & parts, const MergeTreeTransactionPtr & txn, - MergeTreeData::Transaction & out_transaction, - DataPartStorageBuilderPtr builder) + MergeTreeData::Transaction & out_transaction) { /// Some of source parts was possibly created in transaction, so non-transactional merge may break isolation. if (data.transactions_enabled.load(std::memory_order_relaxed) && !txn) @@ -549,7 +573,7 @@ MergeTreeData::DataPartPtr MergeTreeDataMergerMutator::renameMergedTemporaryPart "but transactions were enabled for this table"); /// Rename new part, add to the set and remove original parts. - auto replaced_parts = data.renameTempPartAndReplace(new_data_part, out_transaction, builder); + auto replaced_parts = data.renameTempPartAndReplace(new_data_part, out_transaction); /// Let's check that all original parts have been deleted and only them. if (replaced_parts.size() != parts.size()) diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.h b/src/Storages/MergeTree/MergeTreeDataMergerMutator.h index 14eb82c641c..5d98f526325 100644 --- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.h +++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.h @@ -113,8 +113,7 @@ public: const Names & deduplicate_by_columns, const MergeTreeData::MergingParams & merging_params, const MergeTreeTransactionPtr & txn, - const IMergeTreeDataPart * parent_part = nullptr, - const IDataPartStorageBuilder * parent_path_storage_builder = nullptr, + IMergeTreeDataPart * parent_part = nullptr, const String & suffix = ""); /// Mutate a single data part with the specified commands. Will create and return a temporary part. @@ -133,8 +132,7 @@ public: MergeTreeData::MutableDataPartPtr & new_data_part, const MergeTreeData::DataPartsVector & parts, const MergeTreeTransactionPtr & txn, - MergeTreeData::Transaction & out_transaction, - DataPartStorageBuilderPtr builder); + MergeTreeData::Transaction & out_transaction); /// The approximate amount of disk space needed for merge or mutation. With a surplus. diff --git a/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp b/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp index 9298e841072..a537b44d9ea 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp @@ -22,7 +22,7 @@ namespace ErrorCodes MergeTreeDataPartCompact::MergeTreeDataPartCompact( MergeTreeData & storage_, const String & name_, - const DataPartStoragePtr & data_part_storage_, + const MutableDataPartStoragePtr & data_part_storage_, const IMergeTreeDataPart * parent_part_) : IMergeTreeDataPart(storage_, name_, data_part_storage_, Type::Compact, parent_part_) { @@ -32,7 +32,7 @@ MergeTreeDataPartCompact::MergeTreeDataPartCompact( const MergeTreeData & storage_, const String & name_, const MergeTreePartInfo & info_, - const DataPartStoragePtr & data_part_storage_, + const MutableDataPartStoragePtr & data_part_storage_, const IMergeTreeDataPart * parent_part_) : IMergeTreeDataPart(storage_, name_, info_, data_part_storage_, Type::Compact, parent_part_) { @@ -58,13 +58,12 @@ IMergeTreeDataPart::MergeTreeReaderPtr MergeTreeDataPartCompact::getReader( } IMergeTreeDataPart::MergeTreeWriterPtr MergeTreeDataPartCompact::getWriter( - DataPartStorageBuilderPtr data_part_storage_builder, const NamesAndTypesList & columns_list, const StorageMetadataPtr & metadata_snapshot, const std::vector & indices_to_recalc, const CompressionCodecPtr & default_codec_, const MergeTreeWriterSettings & writer_settings, - const MergeTreeIndexGranularity & computed_index_granularity) const + const MergeTreeIndexGranularity & computed_index_granularity) { NamesAndTypesList ordered_columns_list; std::copy_if(columns_list.begin(), columns_list.end(), std::back_inserter(ordered_columns_list), @@ -75,7 +74,7 @@ IMergeTreeDataPart::MergeTreeWriterPtr MergeTreeDataPartCompact::getWriter( { return *getColumnPosition(lhs.name) < *getColumnPosition(rhs.name); }); return std::make_unique( - shared_from_this(), std::move(data_part_storage_builder), ordered_columns_list, metadata_snapshot, + shared_from_this(), ordered_columns_list, metadata_snapshot, indices_to_recalc, getMarksFileExtension(), default_codec_, writer_settings, computed_index_granularity); } @@ -97,21 +96,21 @@ void MergeTreeDataPartCompact::calculateEachColumnSizes(ColumnSizeByName & /*eac void MergeTreeDataPartCompact::loadIndexGranularityImpl( MergeTreeIndexGranularity & index_granularity_, const MergeTreeIndexGranularityInfo & index_granularity_info_, - size_t columns_count, const DataPartStoragePtr & data_part_storage_) + size_t columns_count, const IDataPartStorage & data_part_storage_) { if (!index_granularity_info_.mark_type.adaptive) throw Exception("MergeTreeDataPartCompact cannot be created with non-adaptive granulary.", ErrorCodes::NOT_IMPLEMENTED); auto marks_file_path = index_granularity_info_.getMarksFilePath("data"); - if (!data_part_storage_->exists(marks_file_path)) + if (!data_part_storage_.exists(marks_file_path)) throw Exception( ErrorCodes::NO_FILE_IN_DATA_PART, "Marks file '{}' doesn't exist", - std::string(fs::path(data_part_storage_->getFullPath()) / marks_file_path)); + std::string(fs::path(data_part_storage_.getFullPath()) / marks_file_path)); - size_t marks_file_size = data_part_storage_->getFileSize(marks_file_path); + size_t marks_file_size = data_part_storage_.getFileSize(marks_file_path); - std::unique_ptr buffer = data_part_storage_->readFile( + std::unique_ptr buffer = data_part_storage_.readFile( marks_file_path, ReadSettings().adjustBufferSize(marks_file_size), marks_file_size, std::nullopt); std::unique_ptr marks_reader; @@ -140,7 +139,7 @@ void MergeTreeDataPartCompact::loadIndexGranularity() if (columns.empty()) throw Exception("No columns in part " + name, ErrorCodes::NO_FILE_IN_DATA_PART); - loadIndexGranularityImpl(index_granularity, index_granularity_info, columns.size(), data_part_storage); + loadIndexGranularityImpl(index_granularity, index_granularity_info, columns.size(), getDataPartStorage()); } bool MergeTreeDataPartCompact::hasColumnFiles(const NameAndTypePair & column) const @@ -171,12 +170,12 @@ void MergeTreeDataPartCompact::checkConsistency(bool require_part_metadata) cons throw Exception( ErrorCodes::NO_FILE_IN_DATA_PART, "No marks file checksum for column in part {}", - data_part_storage->getFullPath()); + getDataPartStorage().getFullPath()); if (!checksums.files.contains(DATA_FILE_NAME_WITH_EXTENSION)) throw Exception( ErrorCodes::NO_FILE_IN_DATA_PART, "No data file checksum for in part {}", - data_part_storage->getFullPath()); + getDataPartStorage().getFullPath()); } } else @@ -184,33 +183,33 @@ void MergeTreeDataPartCompact::checkConsistency(bool require_part_metadata) cons { /// count.txt should be present even in non custom-partitioned parts std::string file_path = "count.txt"; - if (!data_part_storage->exists(file_path) || data_part_storage->getFileSize(file_path) == 0) + if (!getDataPartStorage().exists(file_path) || getDataPartStorage().getFileSize(file_path) == 0) throw Exception( ErrorCodes::BAD_SIZE_OF_FILE_IN_DATA_PART, "Part {} is broken: {} is empty", - data_part_storage->getRelativePath(), - std::string(fs::path(data_part_storage->getFullPath()) / file_path)); + getDataPartStorage().getRelativePath(), + std::string(fs::path(getDataPartStorage().getFullPath()) / file_path)); } /// Check that marks are nonempty and have the consistent size with columns number. - if (data_part_storage->exists(mrk_file_name)) + if (getDataPartStorage().exists(mrk_file_name)) { - UInt64 file_size = data_part_storage->getFileSize(mrk_file_name); + UInt64 file_size = getDataPartStorage().getFileSize(mrk_file_name); if (!file_size) throw Exception( ErrorCodes::BAD_SIZE_OF_FILE_IN_DATA_PART, "Part {} is broken: {} is empty.", - data_part_storage->getRelativePath(), - std::string(fs::path(data_part_storage->getFullPath()) / mrk_file_name)); + getDataPartStorage().getRelativePath(), + std::string(fs::path(getDataPartStorage().getFullPath()) / mrk_file_name)); UInt64 expected_file_size = index_granularity_info.getMarkSizeInBytes(columns.size()) * index_granularity.getMarksCount(); if (expected_file_size != file_size) throw Exception( ErrorCodes::BAD_SIZE_OF_FILE_IN_DATA_PART, "Part {} is broken: bad size of marks file '{}': {}, must be: {}", - data_part_storage->getRelativePath(), - std::string(fs::path(data_part_storage->getFullPath()) / mrk_file_name), + getDataPartStorage().getRelativePath(), + std::string(fs::path(getDataPartStorage().getFullPath()) / mrk_file_name), std::to_string(file_size), std::to_string(expected_file_size)); } } @@ -218,12 +217,12 @@ void MergeTreeDataPartCompact::checkConsistency(bool require_part_metadata) cons bool MergeTreeDataPartCompact::isStoredOnRemoteDisk() const { - return data_part_storage->isStoredOnRemoteDisk(); + return getDataPartStorage().isStoredOnRemoteDisk(); } bool MergeTreeDataPartCompact::isStoredOnRemoteDiskWithZeroCopySupport() const { - return data_part_storage->supportZeroCopyReplication(); + return getDataPartStorage().supportZeroCopyReplication(); } MergeTreeDataPartCompact::~MergeTreeDataPartCompact() diff --git a/src/Storages/MergeTree/MergeTreeDataPartCompact.h b/src/Storages/MergeTree/MergeTreeDataPartCompact.h index d3ac71cb02a..e275c586cb9 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartCompact.h +++ b/src/Storages/MergeTree/MergeTreeDataPartCompact.h @@ -25,13 +25,13 @@ public: const MergeTreeData & storage_, const String & name_, const MergeTreePartInfo & info_, - const DataPartStoragePtr & data_part_storage_, + const MutableDataPartStoragePtr & data_part_storage_, const IMergeTreeDataPart * parent_part_ = nullptr); MergeTreeDataPartCompact( MergeTreeData & storage_, const String & name_, - const DataPartStoragePtr & data_part_storage_, + const MutableDataPartStoragePtr & data_part_storage_, const IMergeTreeDataPart * parent_part_ = nullptr); MergeTreeReaderPtr getReader( @@ -45,13 +45,12 @@ public: const ReadBufferFromFileBase::ProfileCallback & profile_callback) const override; MergeTreeWriterPtr getWriter( - DataPartStorageBuilderPtr data_part_storage_builder, const NamesAndTypesList & columns_list, const StorageMetadataPtr & metadata_snapshot, const std::vector & indices_to_recalc, const CompressionCodecPtr & default_codec_, const MergeTreeWriterSettings & writer_settings, - const MergeTreeIndexGranularity & computed_index_granularity) const override; + const MergeTreeIndexGranularity & computed_index_granularity) override; bool isStoredOnDisk() const override { return true; } @@ -68,7 +67,7 @@ public: protected: static void loadIndexGranularityImpl( MergeTreeIndexGranularity & index_granularity_, const MergeTreeIndexGranularityInfo & index_granularity_info_, - size_t columns_count, const DataPartStoragePtr & data_part_storage_); + size_t columns_count, const IDataPartStorage & data_part_storage_); private: void checkConsistency(bool require_part_metadata) const override; diff --git a/src/Storages/MergeTree/MergeTreeDataPartInMemory.cpp b/src/Storages/MergeTree/MergeTreeDataPartInMemory.cpp index c7c831c23ec..48b1b6bab60 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartInMemory.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartInMemory.cpp @@ -1,10 +1,12 @@ -#include "MergeTreeDataPartInMemory.h" +#include #include #include #include #include #include +#include #include +#include #include #include #include @@ -21,7 +23,7 @@ namespace ErrorCodes MergeTreeDataPartInMemory::MergeTreeDataPartInMemory( MergeTreeData & storage_, const String & name_, - const DataPartStoragePtr & data_part_storage_, + const MutableDataPartStoragePtr & data_part_storage_, const IMergeTreeDataPart * parent_part_) : IMergeTreeDataPart(storage_, name_, data_part_storage_, Type::InMemory, parent_part_) { @@ -32,7 +34,7 @@ MergeTreeDataPartInMemory::MergeTreeDataPartInMemory( const MergeTreeData & storage_, const String & name_, const MergeTreePartInfo & info_, - const DataPartStoragePtr & data_part_storage_, + const MutableDataPartStoragePtr & data_part_storage_, const IMergeTreeDataPart * parent_part_) : IMergeTreeDataPart(storage_, name_, info_, data_part_storage_, Type::InMemory, parent_part_) { @@ -56,27 +58,33 @@ IMergeTreeDataPart::MergeTreeReaderPtr MergeTreeDataPartInMemory::getReader( } IMergeTreeDataPart::MergeTreeWriterPtr MergeTreeDataPartInMemory::getWriter( - DataPartStorageBuilderPtr data_part_storage_builder_, const NamesAndTypesList & columns_list, const StorageMetadataPtr & metadata_snapshot, const std::vector & /* indices_to_recalc */, const CompressionCodecPtr & /* default_codec */, const MergeTreeWriterSettings & writer_settings, - const MergeTreeIndexGranularity & /* computed_index_granularity */) const + const MergeTreeIndexGranularity & /* computed_index_granularity */) { - data_part_storage_builder = data_part_storage_builder_; - auto ptr = std::static_pointer_cast(shared_from_this()); + auto ptr = std::static_pointer_cast(shared_from_this()); return std::make_unique( ptr, columns_list, metadata_snapshot, writer_settings); } -DataPartStoragePtr MergeTreeDataPartInMemory::flushToDisk(const String & new_relative_path, const StorageMetadataPtr & metadata_snapshot) const +MutableDataPartStoragePtr MergeTreeDataPartInMemory::flushToDisk(const String & new_relative_path, const StorageMetadataPtr & metadata_snapshot) const { - auto current_full_path = data_part_storage_builder->getFullPath(); - data_part_storage_builder->setRelativePath(new_relative_path); + auto reservation = storage.reserveSpace(block.bytes(), getDataPartStorage()); + VolumePtr volume = storage.getStoragePolicy()->getVolume(0); + VolumePtr data_part_volume = createVolumeFromReservation(reservation, volume); + auto new_data_part_storage = std::make_shared( + data_part_volume, + storage.getRelativeDataPath(), + new_relative_path); + + new_data_part_storage->beginTransaction(); + + auto current_full_path = getDataPartStorage().getFullPath(); auto new_type = storage.choosePartTypeOnDisk(block.bytes(), rows_count); - auto new_data_part_storage = data_part_storage_builder->getStorage(); auto new_data_part = storage.createPart(name, new_type, info, new_data_part_storage); new_data_part->uuid = uuid; @@ -84,50 +92,50 @@ DataPartStoragePtr MergeTreeDataPartInMemory::flushToDisk(const String & new_rel new_data_part->partition.value = partition.value; new_data_part->minmax_idx = minmax_idx; - if (data_part_storage_builder->exists()) + if (new_data_part_storage->exists()) { throw Exception( ErrorCodes::DIRECTORY_ALREADY_EXISTS, "Could not flush part {}. Part in {} already exists", quoteString(current_full_path), - data_part_storage_builder->getFullPath()); + new_data_part_storage->getFullPath()); } - data_part_storage_builder->createDirectories(); + new_data_part_storage->createDirectories(); auto compression_codec = storage.getContext()->chooseCompressionCodec(0, 0); auto indices = MergeTreeIndexFactory::instance().getMany(metadata_snapshot->getSecondaryIndices()); - MergedBlockOutputStream out(new_data_part, data_part_storage_builder, metadata_snapshot, columns, indices, compression_codec, NO_TRANSACTION_PTR); + MergedBlockOutputStream out(new_data_part, metadata_snapshot, columns, indices, compression_codec, NO_TRANSACTION_PTR); out.write(block); const auto & projections = metadata_snapshot->getProjections(); for (const auto & [projection_name, projection] : projection_parts) { if (projections.has(projection_name)) { - auto projection_part_storage_builder = data_part_storage_builder->getProjection(projection_name + ".proj"); - if (projection_part_storage_builder->exists()) + auto projection_part_storage = new_data_part_storage->getProjection(projection_name + ".proj"); + if (projection_part_storage->exists()) { throw Exception( ErrorCodes::DIRECTORY_ALREADY_EXISTS, "Could not flush projection part {}. Projection part in {} already exists", projection_name, - projection_part_storage_builder->getFullPath()); + projection_part_storage->getFullPath()); } auto projection_part = asInMemoryPart(projection); auto projection_type = storage.choosePartTypeOnDisk(projection_part->block.bytes(), rows_count); MergeTreePartInfo projection_info("all", 0, 0, 0); auto projection_data_part - = storage.createPart(projection_name, projection_type, projection_info, projection_part_storage_builder->getStorage(), parent_part); + = storage.createPart(projection_name, projection_type, projection_info, projection_part_storage, parent_part); projection_data_part->is_temp = false; // clean up will be done on parent part projection_data_part->setColumns(projection->getColumns(), {}); - projection_part_storage_builder->createDirectories(); + projection_part_storage->createDirectories(); const auto & desc = projections.get(name); auto projection_compression_codec = storage.getContext()->chooseCompressionCodec(0, 0); auto projection_indices = MergeTreeIndexFactory::instance().getMany(desc.metadata->getSecondaryIndices()); MergedBlockOutputStream projection_out( - projection_data_part, projection_part_storage_builder, desc.metadata, projection_part->columns, projection_indices, + projection_data_part, desc.metadata, projection_part->columns, projection_indices, projection_compression_codec, NO_TRANSACTION_PTR); projection_out.write(projection_part->block); @@ -137,21 +145,19 @@ DataPartStoragePtr MergeTreeDataPartInMemory::flushToDisk(const String & new_rel } out.finalizePart(new_data_part, false); + new_data_part_storage->commitTransaction(); return new_data_part_storage; } void MergeTreeDataPartInMemory::makeCloneInDetached(const String & prefix, const StorageMetadataPtr & metadata_snapshot) const { - String detached_path = getRelativePathForDetachedPart(prefix); + String detached_path = *getRelativePathForDetachedPart(prefix, /* broken */ false); flushToDisk(detached_path, metadata_snapshot); } -void MergeTreeDataPartInMemory::renameTo(const String & new_relative_path, bool /* remove_new_dir_if_exists */, DataPartStorageBuilderPtr) const +void MergeTreeDataPartInMemory::renameTo(const String & new_relative_path, bool /* remove_new_dir_if_exists */) { - data_part_storage->setRelativePath(new_relative_path); - - if (data_part_storage_builder) - data_part_storage_builder->setRelativePath(new_relative_path); + getDataPartStorage().setRelativePath(new_relative_path); } void MergeTreeDataPartInMemory::calculateEachColumnSizes(ColumnSizeByName & each_columns_size, ColumnSize & total_size) const diff --git a/src/Storages/MergeTree/MergeTreeDataPartInMemory.h b/src/Storages/MergeTree/MergeTreeDataPartInMemory.h index d985c7f055e..e58701b04a1 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartInMemory.h +++ b/src/Storages/MergeTree/MergeTreeDataPartInMemory.h @@ -14,13 +14,13 @@ public: const MergeTreeData & storage_, const String & name_, const MergeTreePartInfo & info_, - const DataPartStoragePtr & data_part_storage_, + const MutableDataPartStoragePtr & data_part_storage_, const IMergeTreeDataPart * parent_part_ = nullptr); MergeTreeDataPartInMemory( MergeTreeData & storage_, const String & name_, - const DataPartStoragePtr & data_part_storage_, + const MutableDataPartStoragePtr & data_part_storage_, const IMergeTreeDataPart * parent_part_ = nullptr); MergeTreeReaderPtr getReader( @@ -34,29 +34,27 @@ public: const ReadBufferFromFileBase::ProfileCallback & profile_callback) const override; MergeTreeWriterPtr getWriter( - DataPartStorageBuilderPtr data_part_storage_builder_, const NamesAndTypesList & columns_list, const StorageMetadataPtr & metadata_snapshot, const std::vector & indices_to_recalc, const CompressionCodecPtr & default_codec_, const MergeTreeWriterSettings & writer_settings, - const MergeTreeIndexGranularity & computed_index_granularity) const override; + const MergeTreeIndexGranularity & computed_index_granularity) override; bool isStoredOnDisk() const override { return false; } bool isStoredOnRemoteDisk() const override { return false; } bool isStoredOnRemoteDiskWithZeroCopySupport() const override { return false; } bool hasColumnFiles(const NameAndTypePair & column) const override { return !!getColumnPosition(column.getNameInStorage()); } String getFileNameForColumn(const NameAndTypePair & /* column */) const override { return ""; } - void renameTo(const String & new_relative_path, bool remove_new_dir_if_exists, DataPartStorageBuilderPtr) const override; + void renameTo(const String & new_relative_path, bool remove_new_dir_if_exists) override; void makeCloneInDetached(const String & prefix, const StorageMetadataPtr & metadata_snapshot) const override; - DataPartStoragePtr flushToDisk(const String & new_relative_path, const StorageMetadataPtr & metadata_snapshot) const; + MutableDataPartStoragePtr flushToDisk(const String & new_relative_path, const StorageMetadataPtr & metadata_snapshot) const; /// Returns hash of parts's block Checksum calculateBlockChecksum() const; mutable Block block; - mutable DataPartStorageBuilderPtr data_part_storage_builder; private: mutable std::condition_variable is_merged; @@ -66,6 +64,8 @@ private: }; using DataPartInMemoryPtr = std::shared_ptr; +using MutableDataPartInMemoryPtr = std::shared_ptr; + DataPartInMemoryPtr asInMemoryPart(const MergeTreeDataPartPtr & part); } diff --git a/src/Storages/MergeTree/MergeTreeDataPartWide.cpp b/src/Storages/MergeTree/MergeTreeDataPartWide.cpp index 170d1b1d703..2418960f992 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWide.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWide.cpp @@ -21,7 +21,7 @@ namespace ErrorCodes MergeTreeDataPartWide::MergeTreeDataPartWide( MergeTreeData & storage_, const String & name_, - const DataPartStoragePtr & data_part_storage_, + const MutableDataPartStoragePtr & data_part_storage_, const IMergeTreeDataPart * parent_part_) : IMergeTreeDataPart(storage_, name_, data_part_storage_, Type::Wide, parent_part_) { @@ -31,7 +31,7 @@ MergeTreeDataPartWide::MergeTreeDataPartWide( const MergeTreeData & storage_, const String & name_, const MergeTreePartInfo & info_, - const DataPartStoragePtr & data_part_storage_, + const MutableDataPartStoragePtr & data_part_storage_, const IMergeTreeDataPart * parent_part_) : IMergeTreeDataPart(storage_, name_, info_, data_part_storage_, Type::Wide, parent_part_) { @@ -56,17 +56,16 @@ IMergeTreeDataPart::MergeTreeReaderPtr MergeTreeDataPartWide::getReader( } IMergeTreeDataPart::MergeTreeWriterPtr MergeTreeDataPartWide::getWriter( - DataPartStorageBuilderPtr data_part_storage_builder, const NamesAndTypesList & columns_list, const StorageMetadataPtr & metadata_snapshot, const std::vector & indices_to_recalc, const CompressionCodecPtr & default_codec_, const MergeTreeWriterSettings & writer_settings, - const MergeTreeIndexGranularity & computed_index_granularity) const + const MergeTreeIndexGranularity & computed_index_granularity) { return std::make_unique( - shared_from_this(), data_part_storage_builder, - columns_list, metadata_snapshot, indices_to_recalc, + shared_from_this(), columns_list, + metadata_snapshot, indices_to_recalc, getMarksFileExtension(), default_codec_, writer_settings, computed_index_granularity); } @@ -105,18 +104,18 @@ ColumnSize MergeTreeDataPartWide::getColumnSizeImpl( void MergeTreeDataPartWide::loadIndexGranularityImpl( MergeTreeIndexGranularity & index_granularity_, MergeTreeIndexGranularityInfo & index_granularity_info_, - const DataPartStoragePtr & data_part_storage_, const std::string & any_column_file_name) + const IDataPartStorage & data_part_storage_, const std::string & any_column_file_name) { index_granularity_info_.changeGranularityIfRequired(data_part_storage_); /// We can use any column, it doesn't matter std::string marks_file_path = index_granularity_info_.getMarksFilePath(any_column_file_name); - if (!data_part_storage_->exists(marks_file_path)) + if (!data_part_storage_.exists(marks_file_path)) throw Exception( ErrorCodes::NO_FILE_IN_DATA_PART, "Marks file '{}' doesn't exist", - std::string(fs::path(data_part_storage_->getFullPath()) / marks_file_path)); + std::string(fs::path(data_part_storage_.getFullPath()) / marks_file_path)); - size_t marks_file_size = data_part_storage_->getFileSize(marks_file_path); + size_t marks_file_size = data_part_storage_.getFileSize(marks_file_path); if (!index_granularity_info_.mark_type.adaptive && !index_granularity_info_.mark_type.compressed) { @@ -126,7 +125,7 @@ void MergeTreeDataPartWide::loadIndexGranularityImpl( } else { - auto marks_file = data_part_storage_->readFile(marks_file_path, ReadSettings().adjustBufferSize(marks_file_size), marks_file_size, std::nullopt); + auto marks_file = data_part_storage_.readFile(marks_file_path, ReadSettings().adjustBufferSize(marks_file_size), marks_file_size, std::nullopt); std::unique_ptr marks_reader; if (!index_granularity_info_.mark_type.compressed) @@ -163,18 +162,18 @@ void MergeTreeDataPartWide::loadIndexGranularity() if (columns.empty()) throw Exception("No columns in part " + name, ErrorCodes::NO_FILE_IN_DATA_PART); - loadIndexGranularityImpl(index_granularity, index_granularity_info, data_part_storage, getFileNameForColumn(columns.front())); + loadIndexGranularityImpl(index_granularity, index_granularity_info, getDataPartStorage(), getFileNameForColumn(columns.front())); } bool MergeTreeDataPartWide::isStoredOnRemoteDisk() const { - return data_part_storage->isStoredOnRemoteDisk(); + return getDataPartStorage().isStoredOnRemoteDisk(); } bool MergeTreeDataPartWide::isStoredOnRemoteDiskWithZeroCopySupport() const { - return data_part_storage->supportZeroCopyReplication(); + return getDataPartStorage().supportZeroCopyReplication(); } MergeTreeDataPartWide::~MergeTreeDataPartWide() @@ -203,13 +202,13 @@ void MergeTreeDataPartWide::checkConsistency(bool require_part_metadata) const throw Exception( ErrorCodes::NO_FILE_IN_DATA_PART, "No {} file checksum for column {} in part {} ", - mrk_file_name, name_type.name, data_part_storage->getFullPath()); + mrk_file_name, name_type.name, getDataPartStorage().getFullPath()); if (!checksums.files.contains(bin_file_name)) throw Exception( ErrorCodes::NO_FILE_IN_DATA_PART, "No {} file checksum for column {} in part ", - bin_file_name, name_type.name, data_part_storage->getFullPath()); + bin_file_name, name_type.name, getDataPartStorage().getFullPath()); }); } } @@ -225,23 +224,23 @@ void MergeTreeDataPartWide::checkConsistency(bool require_part_metadata) const auto file_path = ISerialization::getFileNameForStream(name_type, substream_path) + marks_file_extension; /// Missing file is Ok for case when new column was added. - if (data_part_storage->exists(file_path)) + if (getDataPartStorage().exists(file_path)) { - UInt64 file_size = data_part_storage->getFileSize(file_path); + UInt64 file_size = getDataPartStorage().getFileSize(file_path); if (!file_size) throw Exception( ErrorCodes::BAD_SIZE_OF_FILE_IN_DATA_PART, "Part {} is broken: {} is empty.", - data_part_storage->getFullPath(), - std::string(fs::path(data_part_storage->getFullPath()) / file_path)); + getDataPartStorage().getFullPath(), + std::string(fs::path(getDataPartStorage().getFullPath()) / file_path)); if (!marks_size) marks_size = file_size; else if (file_size != *marks_size) throw Exception( ErrorCodes::BAD_SIZE_OF_FILE_IN_DATA_PART, - "Part {} is broken: marks have different sizes.", data_part_storage->getFullPath()); + "Part {} is broken: marks have different sizes.", getDataPartStorage().getFullPath()); } }); } diff --git a/src/Storages/MergeTree/MergeTreeDataPartWide.h b/src/Storages/MergeTree/MergeTreeDataPartWide.h index 52afa9e82d4..601bdff51a1 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWide.h +++ b/src/Storages/MergeTree/MergeTreeDataPartWide.h @@ -1,5 +1,6 @@ #pragma once +#include "Storages/MergeTree/IDataPartStorage.h" #include namespace DB @@ -19,13 +20,13 @@ public: const MergeTreeData & storage_, const String & name_, const MergeTreePartInfo & info_, - const DataPartStoragePtr & data_part_storage_, + const MutableDataPartStoragePtr & data_part_storage_, const IMergeTreeDataPart * parent_part_ = nullptr); MergeTreeDataPartWide( MergeTreeData & storage_, const String & name_, - const DataPartStoragePtr & data_part_storage_, + const MutableDataPartStoragePtr & data_part_storage_, const IMergeTreeDataPart * parent_part_ = nullptr); MergeTreeReaderPtr getReader( @@ -39,13 +40,12 @@ public: const ReadBufferFromFileBase::ProfileCallback & profile_callback) const override; MergeTreeWriterPtr getWriter( - DataPartStorageBuilderPtr data_part_storage_builder, const NamesAndTypesList & columns_list, const StorageMetadataPtr & metadata_snapshot, const std::vector & indices_to_recalc, const CompressionCodecPtr & default_codec_, const MergeTreeWriterSettings & writer_settings, - const MergeTreeIndexGranularity & computed_index_granularity) const override; + const MergeTreeIndexGranularity & computed_index_granularity) override; bool isStoredOnDisk() const override { return true; } @@ -64,7 +64,7 @@ public: protected: static void loadIndexGranularityImpl( MergeTreeIndexGranularity & index_granularity_, MergeTreeIndexGranularityInfo & index_granularity_info_, - const DataPartStoragePtr & data_part_storage_, const std::string & any_column_file_name); + const IDataPartStorage & data_part_storage_, const std::string & any_column_file_name); private: void checkConsistency(bool require_part_metadata) const override; diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp index 457aad55023..020121e59d7 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp @@ -10,8 +10,7 @@ namespace ErrorCodes } MergeTreeDataPartWriterCompact::MergeTreeDataPartWriterCompact( - const MergeTreeData::DataPartPtr & data_part_, - DataPartStorageBuilderPtr data_part_storage_builder_, + const MergeTreeMutableDataPartPtr & data_part_, const NamesAndTypesList & columns_list_, const StorageMetadataPtr & metadata_snapshot_, const std::vector & indices_to_recalc_, @@ -19,16 +18,16 @@ MergeTreeDataPartWriterCompact::MergeTreeDataPartWriterCompact( const CompressionCodecPtr & default_codec_, const MergeTreeWriterSettings & settings_, const MergeTreeIndexGranularity & index_granularity_) - : MergeTreeDataPartWriterOnDisk(data_part_, std::move(data_part_storage_builder_), columns_list_, metadata_snapshot_, + : MergeTreeDataPartWriterOnDisk(data_part_, columns_list_, metadata_snapshot_, indices_to_recalc_, marks_file_extension_, default_codec_, settings_, index_granularity_) - , plain_file(data_part_storage_builder->writeFile( + , plain_file(data_part_->getDataPartStorage().writeFile( MergeTreeDataPartCompact::DATA_FILE_NAME_WITH_EXTENSION, settings.max_compress_block_size, settings_.query_write_settings)) , plain_hashing(*plain_file) { - marks_file = data_part_storage_builder->writeFile( + marks_file = data_part_->getDataPartStorage().writeFile( MergeTreeDataPartCompact::DATA_FILE_NAME + marks_file_extension_, 4096, settings_.query_write_settings); @@ -132,7 +131,7 @@ void writeColumnSingleGranule( serialize_settings.position_independent_encoding = true; //-V1048 serialize_settings.low_cardinality_max_dictionary_size = 0; //-V1048 - serialization->serializeBinaryBulkStatePrefix(serialize_settings, state); + serialization->serializeBinaryBulkStatePrefix(*column.column, serialize_settings, state); serialization->serializeBinaryBulkWithMultipleStreams(*column.column, from_row, number_of_rows, serialize_settings, state); serialization->serializeBinaryBulkStateSuffix(serialize_settings, state); } diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.h b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.h index 7b68f61925f..06f8122393f 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.h +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.h @@ -11,8 +11,7 @@ class MergeTreeDataPartWriterCompact : public MergeTreeDataPartWriterOnDisk { public: MergeTreeDataPartWriterCompact( - const MergeTreeData::DataPartPtr & data_part, - DataPartStorageBuilderPtr data_part_storage_builder_, + const MergeTreeMutableDataPartPtr & data_part, const NamesAndTypesList & columns_list, const StorageMetadataPtr & metadata_snapshot_, const std::vector & indices_to_recalc, diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterInMemory.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterInMemory.cpp index e1145868ce2..8066a097499 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterInMemory.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterInMemory.cpp @@ -11,11 +11,11 @@ namespace ErrorCodes } MergeTreeDataPartWriterInMemory::MergeTreeDataPartWriterInMemory( - const DataPartInMemoryPtr & part_, + const MutableDataPartInMemoryPtr & part_, const NamesAndTypesList & columns_list_, const StorageMetadataPtr & metadata_snapshot_, const MergeTreeWriterSettings & settings_) - : IMergeTreeDataPartWriter(part_, nullptr, columns_list_, metadata_snapshot_, settings_) + : IMergeTreeDataPartWriter(part_, columns_list_, metadata_snapshot_, settings_) , part_in_memory(part_) {} void MergeTreeDataPartWriterInMemory::write( diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterInMemory.h b/src/Storages/MergeTree/MergeTreeDataPartWriterInMemory.h index 233ca81a697..9e1e868beac 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterInMemory.h +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterInMemory.h @@ -10,7 +10,7 @@ class MergeTreeDataPartWriterInMemory : public IMergeTreeDataPartWriter { public: MergeTreeDataPartWriterInMemory( - const DataPartInMemoryPtr & part_, + const MutableDataPartInMemoryPtr & part_, const NamesAndTypesList & columns_list_, const StorageMetadataPtr & metadata_snapshot, const MergeTreeWriterSettings & settings_); @@ -24,7 +24,7 @@ public: private: void calculateAndSerializePrimaryIndex(const Block & primary_index_block); - DataPartInMemoryPtr part_in_memory; + MutableDataPartInMemoryPtr part_in_memory; }; } diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp index 1d2b095330e..d085bb29b20 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp @@ -48,7 +48,7 @@ void MergeTreeDataPartWriterOnDisk::Stream::sync() const MergeTreeDataPartWriterOnDisk::Stream::Stream( const String & escaped_column_name_, - const DataPartStorageBuilderPtr & data_part_storage_builder, + const MutableDataPartStoragePtr & data_part_storage, const String & data_path_, const std::string & data_file_extension_, const std::string & marks_path_, @@ -61,11 +61,11 @@ MergeTreeDataPartWriterOnDisk::Stream::Stream( escaped_column_name(escaped_column_name_), data_file_extension{data_file_extension_}, marks_file_extension{marks_file_extension_}, - plain_file(data_part_storage_builder->writeFile(data_path_ + data_file_extension, max_compress_block_size_, query_write_settings)), + plain_file(data_part_storage->writeFile(data_path_ + data_file_extension, max_compress_block_size_, query_write_settings)), plain_hashing(*plain_file), compressor(plain_hashing, compression_codec_, max_compress_block_size_), compressed_hashing(compressor), - marks_file(data_part_storage_builder->writeFile(marks_path_ + marks_file_extension, 4096, query_write_settings)), + marks_file(data_part_storage->writeFile(marks_path_ + marks_file_extension, 4096, query_write_settings)), marks_hashing(*marks_file), marks_compressor(marks_hashing, marks_compression_codec_, marks_compress_block_size_), marks_compressed_hashing(marks_compressor), @@ -96,8 +96,7 @@ void MergeTreeDataPartWriterOnDisk::Stream::addToChecksums(MergeTreeData::DataPa MergeTreeDataPartWriterOnDisk::MergeTreeDataPartWriterOnDisk( - const MergeTreeData::DataPartPtr & data_part_, - DataPartStorageBuilderPtr data_part_storage_builder_, + const MergeTreeMutableDataPartPtr & data_part_, const NamesAndTypesList & columns_list_, const StorageMetadataPtr & metadata_snapshot_, const MergeTreeIndices & indices_to_recalc_, @@ -105,8 +104,7 @@ MergeTreeDataPartWriterOnDisk::MergeTreeDataPartWriterOnDisk( const CompressionCodecPtr & default_codec_, const MergeTreeWriterSettings & settings_, const MergeTreeIndexGranularity & index_granularity_) - : IMergeTreeDataPartWriter(data_part_, std::move(data_part_storage_builder_), - columns_list_, metadata_snapshot_, settings_, index_granularity_) + : IMergeTreeDataPartWriter(data_part_, columns_list_, metadata_snapshot_, settings_, index_granularity_) , skip_indices(indices_to_recalc_) , marks_file_extension(marks_file_extension_) , default_codec(default_codec_) @@ -116,8 +114,8 @@ MergeTreeDataPartWriterOnDisk::MergeTreeDataPartWriterOnDisk( if (settings.blocks_are_granules_size && !index_granularity.empty()) throw Exception("Can't take information about index granularity from blocks, when non empty index_granularity array specified", ErrorCodes::LOGICAL_ERROR); - if (!data_part_storage_builder->exists()) - data_part_storage_builder->createDirectories(); + if (!data_part->getDataPartStorage().exists()) + data_part->getDataPartStorage().createDirectories(); if (settings.rewrite_primary_key) initPrimaryIndex(); @@ -178,7 +176,7 @@ void MergeTreeDataPartWriterOnDisk::initPrimaryIndex() if (metadata_snapshot->hasPrimaryKey()) { String index_name = "primary" + getIndexExtension(compress_primary_key); - index_file_stream = data_part_storage_builder->writeFile(index_name, DBMS_DEFAULT_BUFFER_SIZE, settings.query_write_settings); + index_file_stream = data_part->getDataPartStorage().writeFile(index_name, DBMS_DEFAULT_BUFFER_SIZE, settings.query_write_settings); index_file_hashing_stream = std::make_unique(*index_file_stream); if (compress_primary_key) @@ -204,7 +202,7 @@ void MergeTreeDataPartWriterOnDisk::initSkipIndices() skip_indices_streams.emplace_back( std::make_unique( stream_name, - data_part_storage_builder, + data_part->getDataPartStoragePtr(), stream_name, index_helper->getSerializedFileExtension(), stream_name, marks_file_extension, default_codec, settings.max_compress_block_size, diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h index 4b58224de78..ab1adfe7f59 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h @@ -50,7 +50,7 @@ public: { Stream( const String & escaped_column_name_, - const DataPartStorageBuilderPtr & data_part_storage_builder, + const MutableDataPartStoragePtr & data_part_storage, const String & data_path_, const std::string & data_file_extension_, const std::string & marks_path_, @@ -92,8 +92,7 @@ public: using StreamPtr = std::unique_ptr; MergeTreeDataPartWriterOnDisk( - const MergeTreeData::DataPartPtr & data_part_, - DataPartStorageBuilderPtr data_part_storage_builder_, + const MergeTreeMutableDataPartPtr & data_part_, const NamesAndTypesList & columns_list, const StorageMetadataPtr & metadata_snapshot_, const std::vector & indices_to_recalc, diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp index 70654f521a1..62917bcb084 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp @@ -71,8 +71,7 @@ Granules getGranulesToWrite(const MergeTreeIndexGranularity & index_granularity, } MergeTreeDataPartWriterWide::MergeTreeDataPartWriterWide( - const MergeTreeData::DataPartPtr & data_part_, - DataPartStorageBuilderPtr data_part_storage_builder_, + const MergeTreeMutableDataPartPtr & data_part_, const NamesAndTypesList & columns_list_, const StorageMetadataPtr & metadata_snapshot_, const std::vector & indices_to_recalc_, @@ -80,7 +79,7 @@ MergeTreeDataPartWriterWide::MergeTreeDataPartWriterWide( const CompressionCodecPtr & default_codec_, const MergeTreeWriterSettings & settings_, const MergeTreeIndexGranularity & index_granularity_) - : MergeTreeDataPartWriterOnDisk(data_part_, std::move(data_part_storage_builder_), columns_list_, metadata_snapshot_, + : MergeTreeDataPartWriterOnDisk(data_part_, columns_list_, metadata_snapshot_, indices_to_recalc_, marks_file_extension_, default_codec_, settings_, index_granularity_) { @@ -117,7 +116,7 @@ void MergeTreeDataPartWriterWide::addStreams( column_streams[stream_name] = std::make_unique( stream_name, - data_part_storage_builder, + data_part->getDataPartStoragePtr(), stream_name, DATA_FILE_EXTENSION, stream_name, marks_file_extension, compression_codec, @@ -356,7 +355,7 @@ void MergeTreeDataPartWriterWide::writeColumn( { ISerialization::SerializeBinaryBulkSettings serialize_settings; serialize_settings.getter = createStreamGetter(name_and_type, offset_columns); - serialization->serializeBinaryBulkStatePrefix(serialize_settings, it->second); + serialization->serializeBinaryBulkStatePrefix(column, serialize_settings, it->second); } const auto & global_settings = storage.getContext()->getSettingsRef(); @@ -421,20 +420,18 @@ void MergeTreeDataPartWriterWide::validateColumnOfFixedSize(const NameAndTypePai String mrk_path = escaped_name + marks_file_extension; String bin_path = escaped_name + DATA_FILE_EXTENSION; - auto data_part_storage = data_part_storage_builder->getStorage(); - /// Some columns may be removed because of ttl. Skip them. - if (!data_part_storage->exists(mrk_path)) + if (!data_part->getDataPartStorage().exists(mrk_path)) return; - auto mrk_file_in = data_part_storage->readFile(mrk_path, {}, std::nullopt, std::nullopt); + auto mrk_file_in = data_part->getDataPartStorage().readFile(mrk_path, {}, std::nullopt, std::nullopt); std::unique_ptr mrk_in; if (data_part->index_granularity_info.mark_type.compressed) mrk_in = std::make_unique(std::move(mrk_file_in)); else mrk_in = std::move(mrk_file_in); - DB::CompressedReadBufferFromFile bin_in(data_part_storage->readFile(bin_path, {}, std::nullopt, std::nullopt)); + DB::CompressedReadBufferFromFile bin_in(data_part->getDataPartStorage().readFile(bin_path, {}, std::nullopt, std::nullopt)); bool must_be_last = false; UInt64 offset_in_compressed_file = 0; UInt64 offset_in_decompressed_block = 0; @@ -485,7 +482,7 @@ void MergeTreeDataPartWriterWide::validateColumnOfFixedSize(const NameAndTypePai if (index_granularity_rows != index_granularity.getMarkRows(mark_num)) throw Exception( ErrorCodes::LOGICAL_ERROR, "Incorrect mark rows for part {} for mark #{} (compressed offset {}, decompressed offset {}), in-memory {}, on disk {}, total marks {}", - data_part_storage_builder->getFullPath(), mark_num, offset_in_compressed_file, offset_in_decompressed_block, index_granularity.getMarkRows(mark_num), index_granularity_rows, index_granularity.getMarksCount()); + data_part->getDataPartStorage().getFullPath(), mark_num, offset_in_compressed_file, offset_in_decompressed_block, index_granularity.getMarkRows(mark_num), index_granularity_rows, index_granularity.getMarksCount()); auto column = type->createColumn(); diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.h b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.h index 08815d9930a..633b5119474 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.h +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.h @@ -18,8 +18,7 @@ class MergeTreeDataPartWriterWide : public MergeTreeDataPartWriterOnDisk { public: MergeTreeDataPartWriterWide( - const MergeTreeData::DataPartPtr & data_part, - DataPartStorageBuilderPtr data_part_storage_builder_, + const MergeTreeMutableDataPartPtr & data_part, const NamesAndTypesList & columns_list, const StorageMetadataPtr & metadata_snapshot, const std::vector & indices_to_recalc, diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index 91ecb3a37a0..674e02b16ec 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -107,14 +107,12 @@ static std::string toString(const RelativeSize & x) } /// Converts sample size to an approximate number of rows (ex. `SAMPLE 1000000`) to relative value (ex. `SAMPLE 0.1`). -static RelativeSize convertAbsoluteSampleSizeToRelative(const ASTPtr & node, size_t approx_total_rows) +static RelativeSize convertAbsoluteSampleSizeToRelative(const ASTSampleRatio::Rational & ratio, size_t approx_total_rows) { if (approx_total_rows == 0) return 1; - const auto & node_sample = node->as(); - - auto absolute_sample_size = node_sample.ratio.numerator / node_sample.ratio.denominator; + auto absolute_sample_size = ratio.numerator / ratio.denominator; return std::min(RelativeSize(1), RelativeSize(absolute_sample_size) / RelativeSize(approx_total_rows)); } @@ -140,7 +138,7 @@ QueryPlanPtr MergeTreeDataSelectExecutor::read( const SelectQueryInfo & query_info, ContextPtr context, const UInt64 max_block_size, - const unsigned num_streams, + const size_t num_streams, QueryProcessingStage::Enum processed_stage, std::shared_ptr max_block_numbers_to_read, bool enable_parallel_reading) const @@ -467,7 +465,7 @@ QueryPlanPtr MergeTreeDataSelectExecutor::read( } MergeTreeDataSelectSamplingData MergeTreeDataSelectExecutor::getSampling( - const ASTSelectQuery & select, + const SelectQueryInfo & select_query_info, NamesAndTypesList available_real_columns, const MergeTreeData::DataPartsVector & parts, KeyCondition & key_condition, @@ -484,23 +482,42 @@ MergeTreeDataSelectSamplingData MergeTreeDataSelectExecutor::getSampling( RelativeSize relative_sample_size = 0; RelativeSize relative_sample_offset = 0; - auto select_sample_size = select.sampleSize(); - auto select_sample_offset = select.sampleOffset(); + bool final = false; + std::optional sample_size_ratio; + std::optional sample_offset_ratio; - if (select_sample_size) + if (select_query_info.table_expression_modifiers) { - relative_sample_size.assign( - select_sample_size->as().ratio.numerator, - select_sample_size->as().ratio.denominator); + const auto & table_expression_modifiers = *select_query_info.table_expression_modifiers; + final = table_expression_modifiers.hasFinal(); + sample_size_ratio = table_expression_modifiers.getSampleSizeRatio(); + sample_offset_ratio = table_expression_modifiers.getSampleOffsetRatio(); + } + else + { + auto & select = select_query_info.query->as(); + + final = select.final(); + auto select_sample_size = select.sampleSize(); + auto select_sample_offset = select.sampleOffset(); + + if (select_sample_size) + sample_size_ratio = select_sample_size->as().ratio; + + if (select_sample_offset) + sample_offset_ratio = select_sample_offset->as().ratio; + } + + if (sample_size_ratio) + { + relative_sample_size.assign(sample_size_ratio->numerator, sample_size_ratio->denominator); if (relative_sample_size < 0) throw Exception("Negative sample size", ErrorCodes::ARGUMENT_OUT_OF_BOUND); relative_sample_offset = 0; - if (select_sample_offset) - relative_sample_offset.assign( - select_sample_offset->as().ratio.numerator, - select_sample_offset->as().ratio.denominator); + if (sample_offset_ratio) + relative_sample_offset.assign(sample_offset_ratio->numerator, sample_offset_ratio->denominator); if (relative_sample_offset < 0) throw Exception("Negative sample offset", ErrorCodes::ARGUMENT_OUT_OF_BOUND); @@ -513,7 +530,7 @@ MergeTreeDataSelectSamplingData MergeTreeDataSelectExecutor::getSampling( if (relative_sample_size > 1) { - relative_sample_size = convertAbsoluteSampleSizeToRelative(select_sample_size, approx_total_rows); + relative_sample_size = convertAbsoluteSampleSizeToRelative(*sample_size_ratio, approx_total_rows); LOG_DEBUG(log, "Selected relative sample size: {}", toString(relative_sample_size)); } @@ -526,7 +543,7 @@ MergeTreeDataSelectSamplingData MergeTreeDataSelectExecutor::getSampling( if (relative_sample_offset > 1) { - relative_sample_offset = convertAbsoluteSampleSizeToRelative(select_sample_offset, approx_total_rows); + relative_sample_offset = convertAbsoluteSampleSizeToRelative(*sample_offset_ratio, approx_total_rows); LOG_DEBUG(log, "Selected relative sample offset: {}", toString(relative_sample_offset)); } } @@ -660,7 +677,7 @@ MergeTreeDataSelectSamplingData MergeTreeDataSelectExecutor::getSampling( /// So, assume that we already have calculated column. ASTPtr sampling_key_ast = metadata_snapshot->getSamplingKeyAST(); - if (select.final()) + if (final) { sampling_key_ast = std::make_shared(sampling_key.column_names[0]); /// We do spoil available_real_columns here, but it is not used later. @@ -930,7 +947,7 @@ RangesInDataParts MergeTreeDataSelectExecutor::filterPartsByPrimaryKeyAndSkipInd Strings forced_indices; { Tokens tokens(indices.data(), &indices[indices.size()], settings.max_query_size); - IParser::Pos pos(tokens, settings.max_parser_depth); + IParser::Pos pos(tokens, static_cast(settings.max_parser_depth)); Expected expected; if (!parseIdentifiersOrStringLiterals(pos, expected, forced_indices)) throw Exception(ErrorCodes::CANNOT_PARSE_TEXT, "Cannot parse force_data_skipping_indices ('{}')", indices); @@ -1061,6 +1078,10 @@ RangesInDataParts MergeTreeDataSelectExecutor::filterPartsByPrimaryKeyAndSkipInd auto current_rows_estimate = ranges.getRowsCount(); size_t prev_total_rows_estimate = total_rows.fetch_add(current_rows_estimate); size_t total_rows_estimate = current_rows_estimate + prev_total_rows_estimate; + if (query_info.limit > 0 && total_rows_estimate > query_info.limit) + { + total_rows_estimate = query_info.limit; + } limits.check(total_rows_estimate, 0, "rows (controlled by 'max_rows_to_read' setting)", ErrorCodes::TOO_MANY_ROWS); leaf_limits.check( total_rows_estimate, 0, "rows (controlled by 'max_rows_to_read_leaf' setting)", ErrorCodes::TOO_MANY_ROWS); @@ -1273,13 +1294,14 @@ static void selectColumnNames( MergeTreeDataSelectAnalysisResultPtr MergeTreeDataSelectExecutor::estimateNumMarksToRead( MergeTreeData::DataPartsVector parts, + const PrewhereInfoPtr & prewhere_info, const Names & column_names_to_return, const StorageMetadataPtr & metadata_snapshot_base, const StorageMetadataPtr & metadata_snapshot, const SelectQueryInfo & query_info, const ActionDAGNodes & added_filter_nodes, ContextPtr context, - unsigned num_streams, + size_t num_streams, std::shared_ptr max_block_numbers_to_read) const { size_t total_parts = parts.size(); @@ -1297,7 +1319,7 @@ MergeTreeDataSelectAnalysisResultPtr MergeTreeDataSelectExecutor::estimateNumMar return ReadFromMergeTree::selectRangesToRead( std::move(parts), - query_info.prewhere_info, + prewhere_info, added_filter_nodes, metadata_snapshot_base, metadata_snapshot, @@ -1318,7 +1340,7 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts( const SelectQueryInfo & query_info, ContextPtr context, const UInt64 max_block_size, - const unsigned num_streams, + const size_t num_streams, std::shared_ptr max_block_numbers_to_read, MergeTreeDataSelectAnalysisResultPtr merge_tree_select_result_ptr, bool enable_parallel_reading) const @@ -1618,10 +1640,10 @@ MarkRanges MergeTreeDataSelectExecutor::filterMarksUsingIndex( UncompressedCache * uncompressed_cache, Poco::Logger * log) { - if (!index_helper->getDeserializedFormat(part->data_part_storage, index_helper->getFileName())) + if (!index_helper->getDeserializedFormat(part->getDataPartStorage(), index_helper->getFileName())) { LOG_DEBUG(log, "File for index {} does not exist ({}.*). Skipping it.", backQuote(index_helper->index.name), - (fs::path(part->data_part_storage->getFullPath()) / index_helper->getFileName()).string()); + (fs::path(part->getDataPartStorage().getFullPath()) / index_helper->getFileName()).string()); return ranges; } @@ -1736,7 +1758,7 @@ MarkRanges MergeTreeDataSelectExecutor::filterMarksUsingMergedIndex( { for (const auto & index_helper : indices) { - if (!part->data_part_storage->exists(index_helper->getFileName() + ".idx")) + if (!part->getDataPartStorage().exists(index_helper->getFileName() + ".idx")) { LOG_DEBUG(log, "File for index {} does not exist. Skipping it.", backQuote(index_helper->index.name)); return ranges; diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h index bb44f260eec..e302663597d 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h @@ -33,7 +33,7 @@ public: const SelectQueryInfo & query_info, ContextPtr context, UInt64 max_block_size, - unsigned num_streams, + size_t num_streams, QueryProcessingStage::Enum processed_stage, std::shared_ptr max_block_numbers_to_read = nullptr, bool enable_parallel_reading = false) const; @@ -46,7 +46,7 @@ public: const SelectQueryInfo & query_info, ContextPtr context, UInt64 max_block_size, - unsigned num_streams, + size_t num_streams, std::shared_ptr max_block_numbers_to_read = nullptr, MergeTreeDataSelectAnalysisResultPtr merge_tree_select_result_ptr = nullptr, bool enable_parallel_reading = false) const; @@ -56,13 +56,14 @@ public: /// This method is used to select best projection for table. MergeTreeDataSelectAnalysisResultPtr estimateNumMarksToRead( MergeTreeData::DataPartsVector parts, + const PrewhereInfoPtr & prewhere_info, const Names & column_names, const StorageMetadataPtr & metadata_snapshot_base, const StorageMetadataPtr & metadata_snapshot, const SelectQueryInfo & query_info, const ActionDAGNodes & added_filter_nodes, ContextPtr context, - unsigned num_streams, + size_t num_streams, std::shared_ptr max_block_numbers_to_read = nullptr) const; private: @@ -201,7 +202,7 @@ public: /// Also, calculate _sample_factor if needed. /// Also, update key condition with selected sampling range. static MergeTreeDataSelectSamplingData getSampling( - const ASTSelectQuery & select, + const SelectQueryInfo & select_query_info, NamesAndTypesList available_real_columns, const MergeTreeData::DataPartsVector & parts, KeyCondition & key_condition, diff --git a/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/src/Storages/MergeTree/MergeTreeDataWriter.cpp index 7b99819340e..815e62848a2 100644 --- a/src/Storages/MergeTree/MergeTreeDataWriter.cpp +++ b/src/Storages/MergeTree/MergeTreeDataWriter.cpp @@ -288,7 +288,7 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeTempPart( auto columns = metadata_snapshot->getColumns().getAllPhysical().filter(block.getNames()); for (auto & column : columns) - if (isObject(column.type)) + if (column.type->hasDynamicSubcolumns()) column.type = block.getByName(column.name).type; static const String TMP_PREFIX = "tmp_insert_"; @@ -378,10 +378,7 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeTempPart( data.relative_data_path, TMP_PREFIX + part_name); - auto data_part_storage_builder = std::make_shared( - data_part_volume, - data.relative_data_path, - TMP_PREFIX + part_name); + data_part_storage->beginTransaction(); auto new_data_part = data.createPart( part_name, @@ -408,15 +405,15 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeTempPart( if (new_data_part->isStoredOnDisk()) { /// The name could be non-unique in case of stale files from previous runs. - String full_path = new_data_part->data_part_storage->getFullPath(); + String full_path = new_data_part->getDataPartStorage().getFullPath(); - if (new_data_part->data_part_storage->exists()) + if (new_data_part->getDataPartStorage().exists()) { LOG_WARNING(log, "Removing old temporary directory {}", full_path); - data_part_storage_builder->removeRecursive(); + data_part_storage->removeRecursive(); } - data_part_storage_builder->createDirectories(); + data_part_storage->createDirectories(); if (data.getSettings()->fsync_part_directory) { @@ -448,7 +445,7 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeTempPart( auto compression_codec = data.getContext()->chooseCompressionCodec(0, 0); const auto & index_factory = MergeTreeIndexFactory::instance(); - auto out = std::make_unique(new_data_part, data_part_storage_builder, metadata_snapshot, columns, + auto out = std::make_unique(new_data_part, metadata_snapshot, columns, index_factory.getMany(metadata_snapshot->getSecondaryIndices()), compression_codec, context->getCurrentTransaction(), false, false, context->getWriteSettings()); @@ -459,9 +456,8 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeTempPart( auto projection_block = projection.calculate(block, context); if (projection_block.rows()) { - auto proj_temp_part = writeProjectionPart(data, log, projection_block, projection, data_part_storage_builder, new_data_part.get()); + auto proj_temp_part = writeProjectionPart(data, log, projection_block, projection, new_data_part.get()); new_data_part->addProjectionPart(projection.name, std::move(proj_temp_part.part)); - proj_temp_part.builder->commit(); for (auto & stream : proj_temp_part.streams) temp_part.streams.emplace_back(std::move(stream)); } @@ -473,7 +469,6 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeTempPart( nullptr, nullptr); temp_part.part = new_data_part; - temp_part.builder = data_part_storage_builder; temp_part.streams.emplace_back(TemporaryPart::Stream{.stream = std::move(out), .finalizer = std::move(finalizer)}); ProfileEvents::increment(ProfileEvents::MergeTreeDataWriterRows, block.rows()); @@ -485,11 +480,8 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeTempPart( MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeProjectionPartImpl( const String & part_name, - MergeTreeDataPartType part_type, - const String & relative_path, - const DataPartStorageBuilderPtr & data_part_storage_builder, bool is_temp, - const IMergeTreeDataPart * parent_part, + IMergeTreeDataPart * parent_part, const MergeTreeData & data, Poco::Logger * log, Block block, @@ -498,7 +490,23 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeProjectionPartImpl( TemporaryPart temp_part; const StorageMetadataPtr & metadata_snapshot = projection.metadata; MergeTreePartInfo new_part_info("all", 0, 0, 0); - auto projection_part_storage = parent_part->data_part_storage->getProjection(relative_path); + + MergeTreeDataPartType part_type; + if (parent_part->getType() == MergeTreeDataPartType::InMemory) + { + part_type = MergeTreeDataPartType::InMemory; + } + else + { + /// Size of part would not be greater than block.bytes() + epsilon + size_t expected_size = block.bytes(); + // just check if there is enough space on parent volume + data.reserveSpace(expected_size, parent_part->getDataPartStorage()); + part_type = data.choosePartTypeOnDisk(expected_size, block.rows()); + } + + auto relative_path = part_name + (is_temp ? ".tmp_proj" : ".proj"); + auto projection_part_storage = parent_part->getDataPartStorage().getProjection(relative_path); auto new_data_part = data.createPart( part_name, part_type, @@ -506,7 +514,6 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeProjectionPartImpl( projection_part_storage, parent_part); - auto projection_part_storage_builder = data_part_storage_builder->getProjection(relative_path); new_data_part->is_temp = is_temp; NamesAndTypesList columns = metadata_snapshot->getColumns().getAllPhysical().filter(block.getNames()); @@ -522,10 +529,10 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeProjectionPartImpl( if (projection_part_storage->exists()) { LOG_WARNING(log, "Removing old temporary directory {}", projection_part_storage->getFullPath()); - projection_part_storage_builder->removeRecursive(); + projection_part_storage->removeRecursive(); } - projection_part_storage_builder->createDirectories(); + projection_part_storage->createDirectories(); } /// If we need to calculate some columns to sort. @@ -569,7 +576,6 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeProjectionPartImpl( auto out = std::make_unique( new_data_part, - projection_part_storage_builder, metadata_snapshot, columns, MergeTreeIndices{}, @@ -580,7 +586,6 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeProjectionPartImpl( out->writeWithPermutation(block, perm_ptr); auto finalizer = out->finalizePartAsync(new_data_part, false); temp_part.part = new_data_part; - temp_part.builder = projection_part_storage_builder; temp_part.streams.emplace_back(TemporaryPart::Stream{.stream = std::move(out), .finalizer = std::move(finalizer)}); ProfileEvents::increment(ProfileEvents::MergeTreeDataProjectionWriterRows, block.rows()); @@ -591,98 +596,40 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeProjectionPartImpl( } MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeProjectionPart( - MergeTreeData & data, + const MergeTreeData & data, Poco::Logger * log, Block block, const ProjectionDescription & projection, - const DataPartStorageBuilderPtr & data_part_storage_builder, - const IMergeTreeDataPart * parent_part) + IMergeTreeDataPart * parent_part) { - String part_name = projection.name; - MergeTreeDataPartType part_type; - if (parent_part->getType() == MergeTreeDataPartType::InMemory) - { - part_type = MergeTreeDataPartType::InMemory; - } - else - { - /// Size of part would not be greater than block.bytes() + epsilon - size_t expected_size = block.bytes(); - // just check if there is enough space on parent volume - data.reserveSpace(expected_size, data_part_storage_builder); - part_type = data.choosePartTypeOnDisk(expected_size, block.rows()); - } - return writeProjectionPartImpl( - part_name, - part_type, - part_name + ".proj" /* relative_path */, - data_part_storage_builder, + projection.name, false /* is_temp */, parent_part, data, log, - block, + std::move(block), projection); } /// This is used for projection materialization process which may contain multiple stages of /// projection part merges. MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeTempProjectionPart( - MergeTreeData & data, - Poco::Logger * log, - Block block, - const ProjectionDescription & projection, - const DataPartStorageBuilderPtr & data_part_storage_builder, - const IMergeTreeDataPart * parent_part, - size_t block_num) -{ - String part_name = fmt::format("{}_{}", projection.name, block_num); - MergeTreeDataPartType part_type; - if (parent_part->getType() == MergeTreeDataPartType::InMemory) - { - part_type = MergeTreeDataPartType::InMemory; - } - else - { - /// Size of part would not be greater than block.bytes() + epsilon - size_t expected_size = block.bytes(); - // just check if there is enough space on parent volume - data.reserveSpace(expected_size, data_part_storage_builder); - part_type = data.choosePartTypeOnDisk(expected_size, block.rows()); - } - - return writeProjectionPartImpl( - part_name, - part_type, - part_name + ".tmp_proj" /* relative_path */, - data_part_storage_builder, - true /* is_temp */, - parent_part, - data, - log, - block, - projection); -} - -MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeInMemoryProjectionPart( const MergeTreeData & data, Poco::Logger * log, Block block, const ProjectionDescription & projection, - const DataPartStorageBuilderPtr & data_part_storage_builder, - const IMergeTreeDataPart * parent_part) + IMergeTreeDataPart * parent_part, + size_t block_num) { + String part_name = fmt::format("{}_{}", projection.name, block_num); return writeProjectionPartImpl( - projection.name, - MergeTreeDataPartType::InMemory, - projection.name + ".proj" /* relative_path */, - data_part_storage_builder, - false /* is_temp */, + part_name, + true /* is_temp */, parent_part, data, log, - block, + std::move(block), projection); } diff --git a/src/Storages/MergeTree/MergeTreeDataWriter.h b/src/Storages/MergeTree/MergeTreeDataWriter.h index 00438a29fa1..8c2bf66e8f8 100644 --- a/src/Storages/MergeTree/MergeTreeDataWriter.h +++ b/src/Storages/MergeTree/MergeTreeDataWriter.h @@ -52,7 +52,6 @@ public: struct TemporaryPart { MergeTreeData::MutableDataPartPtr part; - DataPartStorageBuilderPtr builder; struct Stream { @@ -74,31 +73,20 @@ public: /// For insertion. static TemporaryPart writeProjectionPart( - MergeTreeData & data, - Poco::Logger * log, - Block block, - const ProjectionDescription & projection, - const DataPartStorageBuilderPtr & data_part_storage_builder, - const IMergeTreeDataPart * parent_part); - - /// For mutation: MATERIALIZE PROJECTION. - static TemporaryPart writeTempProjectionPart( - MergeTreeData & data, - Poco::Logger * log, - Block block, - const ProjectionDescription & projection, - const DataPartStorageBuilderPtr & data_part_storage_builder, - const IMergeTreeDataPart * parent_part, - size_t block_num); - - /// For WriteAheadLog AddPart. - static TemporaryPart writeInMemoryProjectionPart( const MergeTreeData & data, Poco::Logger * log, Block block, const ProjectionDescription & projection, - const DataPartStorageBuilderPtr & data_part_storage_builder, - const IMergeTreeDataPart * parent_part); + IMergeTreeDataPart * parent_part); + + /// For mutation: MATERIALIZE PROJECTION. + static TemporaryPart writeTempProjectionPart( + const MergeTreeData & data, + Poco::Logger * log, + Block block, + const ProjectionDescription & projection, + IMergeTreeDataPart * parent_part, + size_t block_num); static Block mergeBlock( const Block & block, @@ -110,18 +98,14 @@ public: private: static TemporaryPart writeProjectionPartImpl( const String & part_name, - MergeTreeDataPartType part_type, - const String & relative_path, - const DataPartStorageBuilderPtr & data_part_storage_builder, bool is_temp, - const IMergeTreeDataPart * parent_part, + IMergeTreeDataPart * parent_part, const MergeTreeData & data, Poco::Logger * log, Block block, const ProjectionDescription & projection); MergeTreeData & data; - Poco::Logger * log; }; diff --git a/src/Storages/MergeTree/MergeTreeIndexAnnoy.cpp b/src/Storages/MergeTree/MergeTreeIndexAnnoy.cpp index 3b16998337e..052834358bb 100644 --- a/src/Storages/MergeTree/MergeTreeIndexAnnoy.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexAnnoy.cpp @@ -9,6 +9,7 @@ #include #include #include +#include namespace DB @@ -64,9 +65,11 @@ uint64_t AnnoyIndex::getNumOfDimensions() const namespace ErrorCodes { - extern const int LOGICAL_ERROR; - extern const int INCORRECT_QUERY; + extern const int ILLEGAL_COLUMN; extern const int INCORRECT_DATA; + extern const int INCORRECT_NUMBER_OF_COLUMNS; + extern const int INCORRECT_QUERY; + extern const int LOGICAL_ERROR; } MergeTreeIndexGranuleAnnoy::MergeTreeIndexGranuleAnnoy(const String & index_name_, const Block & index_sample_block_) @@ -113,7 +116,7 @@ MergeTreeIndexAggregatorAnnoy::MergeTreeIndexAggregatorAnnoy( MergeTreeIndexGranulePtr MergeTreeIndexAggregatorAnnoy::getGranuleAndReset() { // NOLINTNEXTLINE(*) - index->build(number_of_trees, /*number_of_threads=*/1); + index->build(static_cast(number_of_trees), /*number_of_threads=*/1); auto granule = std::make_shared(index_name, index_sample_block, index); index = nullptr; return granule; @@ -132,9 +135,7 @@ void MergeTreeIndexAggregatorAnnoy::update(const Block & block, size_t * pos, si return; if (index_sample_block.columns() > 1) - { throw Exception("Only one column is supported", ErrorCodes::LOGICAL_ERROR); - } auto index_column_name = index_sample_block.getByPosition(0).name; const auto & column_cut = block.getByName(index_column_name).column->cut(*pos, rows_read); @@ -144,27 +145,22 @@ void MergeTreeIndexAggregatorAnnoy::update(const Block & block, size_t * pos, si const auto & data = column_array->getData(); const auto & array = typeid_cast(data).getData(); if (array.empty()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Array have 0 rows, but {} expected", rows_read); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Array has 0 rows, {} rows expected", rows_read); const auto & offsets = column_array->getOffsets(); size_t num_rows = offsets.size(); - /// All sizes are the same + /// Check all sizes are the same size_t size = offsets[0]; for (size_t i = 0; i < num_rows - 1; ++i) - { if (offsets[i + 1] - offsets[i] != size) - { throw Exception(ErrorCodes::INCORRECT_DATA, "Arrays should have same length"); - } - } + index = std::make_shared(size); index->add_item(index->get_n_items(), array.data()); /// add all rows from 1 to num_rows - 1 (this is the same as the beginning of the last element) for (size_t current_row = 1; current_row < num_rows; ++current_row) - { index->add_item(index->get_n_items(), &array[offsets[current_row - 1]]); - } } else { @@ -181,19 +177,13 @@ void MergeTreeIndexAggregatorAnnoy::update(const Block & block, size_t * pos, si { const auto& pod_array = typeid_cast(column.get())->getData(); for (size_t i = 0; i < pod_array.size(); ++i) - { data[i].push_back(pod_array[i]); - } } assert(!data.empty()); if (!index) - { index = std::make_shared(data[0].size()); - } for (const auto& item : data) - { index->add_item(index->get_n_items(), item.data()); - } } *pos += rows_read; @@ -222,7 +212,7 @@ std::vector MergeTreeIndexConditionAnnoy::getUsefulRanges(MergeTreeIndex { UInt64 limit = condition.getLimit(); UInt64 index_granularity = condition.getIndexGranularity(); - std::optional comp_dist = condition.getQueryType() == ANN::ANNQueryInformation::Type::Where ? + std::optional comp_dist = condition.getQueryType() == ApproximateNearestNeighbour::ANNQueryInformation::Type::Where ? std::optional(condition.getComparisonDistanceForWhereQuery()) : std::nullopt; if (comp_dist && comp_dist.value() < 0) @@ -232,16 +222,13 @@ std::vector MergeTreeIndexConditionAnnoy::getUsefulRanges(MergeTreeIndex auto granule = std::dynamic_pointer_cast(idx_granule); if (granule == nullptr) - { throw Exception("Granule has the wrong type", ErrorCodes::LOGICAL_ERROR); - } + auto annoy = granule->index; if (condition.getNumOfDimensions() != annoy->getNumOfDimensions()) - { throw Exception("The dimension of the space in the request (" + toString(condition.getNumOfDimensions()) + ") " + "does not match with the dimension in the index (" + toString(annoy->getNumOfDimensions()) + ")", ErrorCodes::INCORRECT_QUERY); - } /// neighbors contain indexes of dots which were closest to target vector std::vector neighbors; @@ -268,23 +255,25 @@ std::vector MergeTreeIndexConditionAnnoy::getUsefulRanges(MergeTreeIndex for (size_t i = 0; i < neighbors.size(); ++i) { if (comp_dist && distances[i] > comp_dist) - { continue; - } granule_numbers.insert(neighbors[i] / index_granularity); } std::vector result_vector; result_vector.reserve(granule_numbers.size()); for (auto granule_number : granule_numbers) - { result_vector.push_back(granule_number); - } return result_vector; } +MergeTreeIndexAnnoy::MergeTreeIndexAnnoy(const IndexDescription & index_, uint64_t number_of_trees_) + : IMergeTreeIndex(index_) + , number_of_trees(number_of_trees_) +{ +} + MergeTreeIndexGranulePtr MergeTreeIndexAnnoy::createIndexGranule() const { return std::make_shared(index.name, index.sample_block); @@ -307,6 +296,40 @@ MergeTreeIndexPtr annoyIndexCreator(const IndexDescription & index) return std::make_shared(index, param); } +static void assertIndexColumnsType(const Block & header) +{ + DataTypePtr column_data_type_ptr = header.getDataTypes()[0]; + + if (const auto * array_type = typeid_cast(column_data_type_ptr.get())) + { + TypeIndex nested_type_index = array_type->getNestedType()->getTypeId(); + if (!WhichDataType(nested_type_index).isFloat32()) + throw Exception( + ErrorCodes::ILLEGAL_COLUMN, + "Unexpected type {} of Annoy index. Only Array(Float32) and Tuple(Float32) are supported.", + column_data_type_ptr->getName()); + } + else if (const auto * tuple_type = typeid_cast(column_data_type_ptr.get())) + { + const DataTypes & nested_types = tuple_type->getElements(); + for (const auto & type : nested_types) + { + TypeIndex nested_type_index = type->getTypeId(); + if (!WhichDataType(nested_type_index).isFloat32()) + throw Exception( + ErrorCodes::ILLEGAL_COLUMN, + "Unexpected type {} of Annoy index. Only Array(Float32) and Tuple(Float32) are supported.", + column_data_type_ptr->getName()); + } + } + else + throw Exception( + ErrorCodes::ILLEGAL_COLUMN, + "Unexpected type {} of Annoy index. Only Array(Float32) and Tuple(Float32) are supported.", + column_data_type_ptr->getName()); + +} + void annoyIndexValidator(const IndexDescription & index, bool /* attach */) { if (index.arguments.size() != 1) @@ -317,6 +340,11 @@ void annoyIndexValidator(const IndexDescription & index, bool /* attach */) { throw Exception("Annoy index argument must be UInt64.", ErrorCodes::INCORRECT_QUERY); } + + if (index.column_names.size() != 1 || index.data_types.size() != 1) + throw Exception("Annoy indexes must be created on a single column", ErrorCodes::INCORRECT_NUMBER_OF_COLUMNS); + + assertIndexColumnsType(index.sample_block); } } diff --git a/src/Storages/MergeTree/MergeTreeIndexAnnoy.h b/src/Storages/MergeTree/MergeTreeIndexAnnoy.h index 85bbb0a1bd2..6a844947bd2 100644 --- a/src/Storages/MergeTree/MergeTreeIndexAnnoy.h +++ b/src/Storages/MergeTree/MergeTreeIndexAnnoy.h @@ -10,8 +10,6 @@ namespace DB { -namespace ANN = ApproximateNearestNeighbour; - // auxiliary namespace for working with spotify-annoy library // mainly for serialization and deserialization of the index namespace ApproximateNearestNeighbour @@ -33,7 +31,7 @@ namespace ApproximateNearestNeighbour struct MergeTreeIndexGranuleAnnoy final : public IMergeTreeIndexGranule { - using AnnoyIndex = ANN::AnnoyIndex<>; + using AnnoyIndex = ApproximateNearestNeighbour::AnnoyIndex<>; using AnnoyIndexPtr = std::shared_ptr; MergeTreeIndexGranuleAnnoy(const String & index_name_, const Block & index_sample_block_); @@ -57,7 +55,7 @@ struct MergeTreeIndexGranuleAnnoy final : public IMergeTreeIndexGranule struct MergeTreeIndexAggregatorAnnoy final : IMergeTreeIndexAggregator { - using AnnoyIndex = ANN::AnnoyIndex<>; + using AnnoyIndex = ApproximateNearestNeighbour::AnnoyIndex<>; using AnnoyIndexPtr = std::shared_ptr; MergeTreeIndexAggregatorAnnoy(const String & index_name_, const Block & index_sample_block, uint64_t number_of_trees); @@ -74,7 +72,7 @@ struct MergeTreeIndexAggregatorAnnoy final : IMergeTreeIndexAggregator }; -class MergeTreeIndexConditionAnnoy final : public ANN::IMergeTreeIndexConditionAnn +class MergeTreeIndexConditionAnnoy final : public ApproximateNearestNeighbour::IMergeTreeIndexConditionAnn { public: MergeTreeIndexConditionAnnoy( @@ -91,18 +89,14 @@ public: ~MergeTreeIndexConditionAnnoy() override = default; private: - ANN::ANNCondition condition; + ApproximateNearestNeighbour::ANNCondition condition; }; class MergeTreeIndexAnnoy : public IMergeTreeIndex { public: - MergeTreeIndexAnnoy(const IndexDescription & index_, uint64_t number_of_trees_) - : IMergeTreeIndex(index_) - , number_of_trees(number_of_trees_) - {} - + MergeTreeIndexAnnoy(const IndexDescription & index_, uint64_t number_of_trees_); ~MergeTreeIndexAnnoy() override = default; MergeTreeIndexGranulePtr createIndexGranule() const override; diff --git a/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.cpp b/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.cpp index 3dd0568107e..be7118066bb 100644 --- a/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.cpp @@ -6,11 +6,13 @@ #include #include #include +#include #include #include #include #include #include +#include #include #include #include @@ -28,19 +30,7 @@ namespace ErrorCodes namespace { -PreparedSetKey getPreparedSetKey(const ASTPtr & node, const DataTypePtr & data_type) -{ - /// If the data type is tuple, let's try unbox once - if (node->as() || node->as()) - return PreparedSetKey::forSubquery(*node); - - if (const auto * date_type_tuple = typeid_cast(&*data_type)) - return PreparedSetKey::forLiteral(*node, date_type_tuple->getElements()); - - return PreparedSetKey::forLiteral(*node, DataTypes(1, data_type)); -} - -ColumnWithTypeAndName getPreparedSetInfo(const SetPtr & prepared_set) +ColumnWithTypeAndName getPreparedSetInfo(const ConstSetPtr & prepared_set) { if (prepared_set->getDataTypes().size() == 1) return {prepared_set->getSetElements()[0], prepared_set->getElementsTypes()[0], "dummy"}; @@ -110,8 +100,22 @@ MergeTreeIndexConditionBloomFilter::MergeTreeIndexConditionBloomFilter( const SelectQueryInfo & info_, ContextPtr context_, const Block & header_, size_t hash_functions_) : WithContext(context_), header(header_), query_info(info_), hash_functions(hash_functions_) { - auto atom_from_ast = [this](auto & node, auto, auto & constants, auto & out) { return traverseAtomAST(node, constants, out); }; - rpn = std::move(RPNBuilder(info_, getContext(), atom_from_ast).extractRPN()); + ASTPtr filter_node = buildFilterNode(query_info.query); + + if (!filter_node) + { + rpn.push_back(RPNElement::FUNCTION_UNKNOWN); + return; + } + + auto block_with_constants = KeyCondition::getBlockWithConstants(query_info.query, query_info.syntax_analyzer_result, context_); + RPNBuilder builder( + filter_node, + context_, + std::move(block_with_constants), + query_info.prepared_sets, + [&](const RPNBuilderTreeNode & node, RPNElement & out) { return extractAtomFromTree(node, out); }); + rpn = std::move(builder).extractRPN(); } bool MergeTreeIndexConditionBloomFilter::alwaysUnknownOrTrue() const @@ -235,12 +239,13 @@ bool MergeTreeIndexConditionBloomFilter::mayBeTrueOnGranule(const MergeTreeIndex return rpn_stack[0].can_be_true; } -bool MergeTreeIndexConditionBloomFilter::traverseAtomAST(const ASTPtr & node, Block & block_with_constants, RPNElement & out) +bool MergeTreeIndexConditionBloomFilter::extractAtomFromTree(const RPNBuilderTreeNode & node, RPNElement & out) { { Field const_value; DataTypePtr const_type; - if (KeyCondition::getConstant(node, block_with_constants, const_value, const_type)) + + if (node.tryGetConstant(const_value, const_type)) { if (const_value.getType() == Field::Types::UInt64) { @@ -262,56 +267,62 @@ bool MergeTreeIndexConditionBloomFilter::traverseAtomAST(const ASTPtr & node, Bl } } - return traverseFunction(node, block_with_constants, out, nullptr); + return traverseFunction(node, out, nullptr /*parent*/); } -bool MergeTreeIndexConditionBloomFilter::traverseFunction(const ASTPtr & node, Block & block_with_constants, RPNElement & out, const ASTPtr & parent) +bool MergeTreeIndexConditionBloomFilter::traverseFunction(const RPNBuilderTreeNode & node, RPNElement & out, const RPNBuilderTreeNode * parent) { bool maybe_useful = false; - if (const auto * function = node->as()) + if (node.isFunction()) { - if (!function->arguments) - return false; + const auto function = node.toFunctionNode(); + auto arguments_size = function.getArgumentsSize(); + auto function_name = function.getFunctionName(); - const ASTs & arguments = function->arguments->children; - for (const auto & arg : arguments) + for (size_t i = 0; i < arguments_size; ++i) { - if (traverseFunction(arg, block_with_constants, out, node)) + auto argument = function.getArgumentAt(i); + if (traverseFunction(argument, out, &node)) maybe_useful = true; } - if (arguments.size() != 2) + if (arguments_size != 2) return false; - if (functionIsInOrGlobalInOperator(function->name)) - { - auto prepared_set = getPreparedSet(arguments[1]); + auto lhs_argument = function.getArgumentAt(0); + auto rhs_argument = function.getArgumentAt(1); - if (prepared_set) + if (functionIsInOrGlobalInOperator(function_name)) + { + ConstSetPtr prepared_set = rhs_argument.tryGetPreparedSet(); + + if (prepared_set && prepared_set->hasExplicitSetElements()) { - if (traverseASTIn(function->name, arguments[0], prepared_set, out)) + const auto prepared_info = getPreparedSetInfo(prepared_set); + if (traverseTreeIn(function_name, lhs_argument, prepared_set, prepared_info.type, prepared_info.column, out)) maybe_useful = true; } } - else if (function->name == "equals" || - function->name == "notEquals" || - function->name == "has" || - function->name == "mapContains" || - function->name == "indexOf" || - function->name == "hasAny" || - function->name == "hasAll") + else if (function_name == "equals" || + function_name == "notEquals" || + function_name == "has" || + function_name == "mapContains" || + function_name == "indexOf" || + function_name == "hasAny" || + function_name == "hasAll") { Field const_value; DataTypePtr const_type; - if (KeyCondition::getConstant(arguments[1], block_with_constants, const_value, const_type)) + + if (rhs_argument.tryGetConstant(const_value, const_type)) { - if (traverseASTEquals(function->name, arguments[0], const_type, const_value, out, parent)) + if (traverseTreeEquals(function_name, lhs_argument, const_type, const_value, out, parent)) maybe_useful = true; } - else if (KeyCondition::getConstant(arguments[0], block_with_constants, const_value, const_type)) + else if (lhs_argument.tryGetConstant(const_value, const_type)) { - if (traverseASTEquals(function->name, arguments[1], const_type, const_value, out, parent)) + if (traverseTreeEquals(function_name, rhs_argument, const_type, const_value, out, parent)) maybe_useful = true; } } @@ -320,28 +331,20 @@ bool MergeTreeIndexConditionBloomFilter::traverseFunction(const ASTPtr & node, B return maybe_useful; } -bool MergeTreeIndexConditionBloomFilter::traverseASTIn( +bool MergeTreeIndexConditionBloomFilter::traverseTreeIn( const String & function_name, - const ASTPtr & key_ast, - const SetPtr & prepared_set, - RPNElement & out) -{ - const auto prepared_info = getPreparedSetInfo(prepared_set); - return traverseASTIn(function_name, key_ast, prepared_set, prepared_info.type, prepared_info.column, out); -} - -bool MergeTreeIndexConditionBloomFilter::traverseASTIn( - const String & function_name, - const ASTPtr & key_ast, - const SetPtr & prepared_set, + const RPNBuilderTreeNode & key_node, + const ConstSetPtr & prepared_set, const DataTypePtr & type, const ColumnPtr & column, RPNElement & out) { - if (header.has(key_ast->getColumnName())) + auto key_node_column_name = key_node.getColumnName(); + + if (header.has(key_node_column_name)) { size_t row_size = column->size(); - size_t position = header.getPositionByName(key_ast->getColumnName()); + size_t position = header.getPositionByName(key_node_column_name); const DataTypePtr & index_type = header.getByPosition(position).type; const auto & converted_column = castColumn(ColumnWithTypeAndName{column, type, ""}, index_type); out.predicate.emplace_back(std::make_pair(position, BloomFilterHash::hashWithColumn(index_type, converted_column, 0, row_size))); @@ -355,30 +358,33 @@ bool MergeTreeIndexConditionBloomFilter::traverseASTIn( return true; } - if (const auto * function = key_ast->as()) + if (key_node.isFunction()) { + auto key_node_function = key_node.toFunctionNode(); + auto key_node_function_name = key_node_function.getFunctionName(); + size_t key_node_function_arguments_size = key_node_function.getArgumentsSize(); + WhichDataType which(type); - if (which.isTuple() && function->name == "tuple") + if (which.isTuple() && key_node_function_name == "tuple") { const auto & tuple_column = typeid_cast(column.get()); const auto & tuple_data_type = typeid_cast(type.get()); - const ASTs & arguments = typeid_cast(*function->arguments).children; - if (tuple_data_type->getElements().size() != arguments.size() || tuple_column->getColumns().size() != arguments.size()) + if (tuple_data_type->getElements().size() != key_node_function_arguments_size || tuple_column->getColumns().size() != key_node_function_arguments_size) throw Exception("Illegal types of arguments of function " + function_name, ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); bool match_with_subtype = false; const auto & sub_columns = tuple_column->getColumns(); const auto & sub_data_types = tuple_data_type->getElements(); - for (size_t index = 0; index < arguments.size(); ++index) - match_with_subtype |= traverseASTIn(function_name, arguments[index], nullptr, sub_data_types[index], sub_columns[index], out); + for (size_t index = 0; index < key_node_function_arguments_size; ++index) + match_with_subtype |= traverseTreeIn(function_name, key_node_function.getArgumentAt(index), nullptr, sub_data_types[index], sub_columns[index], out); return match_with_subtype; } - if (function->name == "arrayElement") + if (key_node_function_name == "arrayElement") { /** Try to parse arrayElement for mapKeys index. * It is important to ignore keys like column_map['Key'] IN ('') because if key does not exists in map @@ -387,7 +393,6 @@ bool MergeTreeIndexConditionBloomFilter::traverseASTIn( * We cannot skip keys that does not exist in map if comparison is with default type value because * that way we skip necessary granules where map key does not exists. */ - if (!prepared_set) return false; @@ -400,28 +405,26 @@ bool MergeTreeIndexConditionBloomFilter::traverseASTIn( if (set_contain_default_value) return false; - const auto * column_ast_identifier = function->arguments.get()->children[0].get()->as(); - if (!column_ast_identifier) - return false; - - const auto & col_name = column_ast_identifier->name(); - auto map_keys_index_column_name = fmt::format("mapKeys({})", col_name); - auto map_values_index_column_name = fmt::format("mapValues({})", col_name); + auto first_argument = key_node_function.getArgumentAt(0); + const auto column_name = first_argument.getColumnName(); + auto map_keys_index_column_name = fmt::format("mapKeys({})", column_name); + auto map_values_index_column_name = fmt::format("mapValues({})", column_name); if (header.has(map_keys_index_column_name)) { /// For mapKeys we serialize key argument with bloom filter - auto & argument = function->arguments.get()->children[1]; + auto second_argument = key_node_function.getArgumentAt(1); - if (const auto * literal = argument->as()) + Field constant_value; + DataTypePtr constant_type; + + if (second_argument.tryGetConstant(constant_value, constant_type)) { size_t position = header.getPositionByName(map_keys_index_column_name); const DataTypePtr & index_type = header.getByPosition(position).type; - - auto element_key = literal->value; const DataTypePtr actual_type = BloomFilter::getPrimitiveType(index_type); - out.predicate.emplace_back(std::make_pair(position, BloomFilterHash::hashWithField(actual_type.get(), element_key))); + out.predicate.emplace_back(std::make_pair(position, BloomFilterHash::hashWithField(actual_type.get(), constant_value))); } else { @@ -459,74 +462,97 @@ bool MergeTreeIndexConditionBloomFilter::traverseASTIn( } -static bool indexOfCanUseBloomFilter(const ASTPtr & parent) +static bool indexOfCanUseBloomFilter(const RPNBuilderTreeNode * parent) { if (!parent) return true; + if (!parent->isFunction()) + return false; + + auto function = parent->toFunctionNode(); + auto function_name = function.getFunctionName(); + /// `parent` is a function where `indexOf` is located. /// Example: `indexOf(arr, x) = 1`, parent is a function named `equals`. - if (const auto * function = parent->as()) + if (function_name == "and") { - if (function->name == "and") + return true; + } + else if (function_name == "equals" /// notEquals is not applicable + || function_name == "greater" || function_name == "greaterOrEquals" + || function_name == "less" || function_name == "lessOrEquals") + { + size_t function_arguments_size = function.getArgumentsSize(); + if (function_arguments_size != 2) + return false; + + /// We don't allow constant expressions like `indexOf(arr, x) = 1 + 0` but it's negligible. + + /// We should return true when the corresponding expression implies that the array contains the element. + /// Example: when `indexOf(arr, x)` > 10 is written, it means that arr definitely should contain the element + /// (at least at 11th position but it does not matter). + + bool reversed = false; + Field constant_value; + DataTypePtr constant_type; + + if (function.getArgumentAt(0).tryGetConstant(constant_value, constant_type)) { + reversed = true; + } + else if (function.getArgumentAt(1).tryGetConstant(constant_value, constant_type)) + { + } + else + { + return false; + } + + Field zero(0); + bool constant_equal_zero = applyVisitor(FieldVisitorAccurateEquals(), constant_value, zero); + + if (function_name == "equals" && !constant_equal_zero) + { + /// indexOf(...) = c, c != 0 return true; } - else if (function->name == "equals" /// notEquals is not applicable - || function->name == "greater" || function->name == "greaterOrEquals" - || function->name == "less" || function->name == "lessOrEquals") + else if (function_name == "notEquals" && constant_equal_zero) { - if (function->arguments->children.size() != 2) - return false; - - /// We don't allow constant expressions like `indexOf(arr, x) = 1 + 0` but it's negligible. - - /// We should return true when the corresponding expression implies that the array contains the element. - /// Example: when `indexOf(arr, x)` > 10 is written, it means that arr definitely should contain the element - /// (at least at 11th position but it does not matter). - - bool reversed = false; - const ASTLiteral * constant = nullptr; - - if (const ASTLiteral * left = function->arguments->children[0]->as()) - { - constant = left; - reversed = true; - } - else if (const ASTLiteral * right = function->arguments->children[1]->as()) - { - constant = right; - } - else - return false; - - Field zero(0); - return (function->name == "equals" /// indexOf(...) = c, c != 0 - && !applyVisitor(FieldVisitorAccurateEquals(), constant->value, zero)) - || (function->name == "notEquals" /// indexOf(...) != c, c = 0 - && applyVisitor(FieldVisitorAccurateEquals(), constant->value, zero)) - || (function->name == (reversed ? "less" : "greater") /// indexOf(...) > c, c >= 0 - && !applyVisitor(FieldVisitorAccurateLess(), constant->value, zero)) - || (function->name == (reversed ? "lessOrEquals" : "greaterOrEquals") /// indexOf(...) >= c, c > 0 - && applyVisitor(FieldVisitorAccurateLess(), zero, constant->value)); + /// indexOf(...) != c, c = 0 + return true; } + else if (function_name == (reversed ? "less" : "greater") && !applyVisitor(FieldVisitorAccurateLess(), constant_value, zero)) + { + /// indexOf(...) > c, c >= 0 + return true; + } + else if (function_name == (reversed ? "lessOrEquals" : "greaterOrEquals") && applyVisitor(FieldVisitorAccurateLess(), zero, constant_value)) + { + /// indexOf(...) >= c, c > 0 + return true; + } + + return false; } return false; } -bool MergeTreeIndexConditionBloomFilter::traverseASTEquals( +bool MergeTreeIndexConditionBloomFilter::traverseTreeEquals( const String & function_name, - const ASTPtr & key_ast, + const RPNBuilderTreeNode & key_node, const DataTypePtr & value_type, const Field & value_field, RPNElement & out, - const ASTPtr & parent) + const RPNBuilderTreeNode * parent) { - if (header.has(key_ast->getColumnName())) + auto key_column_name = key_node.getColumnName(); + + if (header.has(key_column_name)) { - size_t position = header.getPositionByName(key_ast->getColumnName()); + size_t position = header.getPositionByName(key_column_name); const DataTypePtr & index_type = header.getByPosition(position).type; const auto * array_type = typeid_cast(index_type.get()); @@ -602,13 +628,7 @@ bool MergeTreeIndexConditionBloomFilter::traverseASTEquals( if (function_name == "mapContains" || function_name == "has") { - const auto * key_ast_identifier = key_ast.get()->as(); - if (!key_ast_identifier) - return false; - - const auto & col_name = key_ast_identifier->name(); - auto map_keys_index_column_name = fmt::format("mapKeys({})", col_name); - + auto map_keys_index_column_name = fmt::format("mapKeys({})", key_column_name); if (!header.has(map_keys_index_column_name)) return false; @@ -629,29 +649,32 @@ bool MergeTreeIndexConditionBloomFilter::traverseASTEquals( return true; } - if (const auto * function = key_ast->as()) + if (key_node.isFunction()) { WhichDataType which(value_type); - if (which.isTuple() && function->name == "tuple") + auto key_node_function = key_node.toFunctionNode(); + auto key_node_function_name = key_node_function.getFunctionName(); + size_t key_node_function_arguments_size = key_node_function.getArgumentsSize(); + + if (which.isTuple() && key_node_function_name == "tuple") { const Tuple & tuple = value_field.get(); const auto * value_tuple_data_type = typeid_cast(value_type.get()); - const ASTs & arguments = typeid_cast(*function->arguments).children; - if (tuple.size() != arguments.size()) + if (tuple.size() != key_node_function_arguments_size) throw Exception("Illegal types of arguments of function " + function_name, ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); bool match_with_subtype = false; const DataTypes & subtypes = value_tuple_data_type->getElements(); for (size_t index = 0; index < tuple.size(); ++index) - match_with_subtype |= traverseASTEquals(function_name, arguments[index], subtypes[index], tuple[index], out, key_ast); + match_with_subtype |= traverseTreeEquals(function_name, key_node_function.getArgumentAt(index), subtypes[index], tuple[index], out, &key_node); return match_with_subtype; } - if (function->name == "arrayElement" && (function_name == "equals" || function_name == "notEquals")) + if (key_node_function_name == "arrayElement" && (function_name == "equals" || function_name == "notEquals")) { /** Try to parse arrayElement for mapKeys index. * It is important to ignore keys like column_map['Key'] = '' because if key does not exists in map @@ -663,27 +686,22 @@ bool MergeTreeIndexConditionBloomFilter::traverseASTEquals( if (value_field == value_type->getDefault()) return false; - const auto * column_ast_identifier = function->arguments.get()->children[0].get()->as(); - if (!column_ast_identifier) - return false; + auto first_argument = key_node_function.getArgumentAt(0); + const auto column_name = first_argument.getColumnName(); - const auto & col_name = column_ast_identifier->name(); - - auto map_keys_index_column_name = fmt::format("mapKeys({})", col_name); - auto map_values_index_column_name = fmt::format("mapValues({})", col_name); + auto map_keys_index_column_name = fmt::format("mapKeys({})", column_name); + auto map_values_index_column_name = fmt::format("mapValues({})", column_name); size_t position = 0; Field const_value = value_field; + DataTypePtr const_type; if (header.has(map_keys_index_column_name)) { position = header.getPositionByName(map_keys_index_column_name); + auto second_argument = key_node_function.getArgumentAt(1); - auto & argument = function->arguments.get()->children[1]; - - if (const auto * literal = argument->as()) - const_value = literal->value; - else + if (!second_argument.tryGetConstant(const_value, const_type)) return false; } else if (header.has(map_values_index_column_name)) @@ -708,23 +726,4 @@ bool MergeTreeIndexConditionBloomFilter::traverseASTEquals( return false; } -SetPtr MergeTreeIndexConditionBloomFilter::getPreparedSet(const ASTPtr & node) -{ - if (header.has(node->getColumnName())) - { - const auto & column_and_type = header.getByName(node->getColumnName()); - auto set_key = getPreparedSetKey(node, column_and_type.type); - if (auto prepared_set = query_info.prepared_sets->get(set_key)) - return prepared_set; - } - else - { - for (const auto & set : query_info.prepared_sets->getByTreeHash(node->getTreeHash())) - if (set->hasExplicitSetElements()) - return set; - } - - return DB::SetPtr(); -} - } diff --git a/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.h b/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.h index 27fd701c67b..5d7ea371a83 100644 --- a/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.h +++ b/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.h @@ -62,35 +62,27 @@ private: const size_t hash_functions; std::vector rpn; - SetPtr getPreparedSet(const ASTPtr & node); - bool mayBeTrueOnGranule(const MergeTreeIndexGranuleBloomFilter * granule) const; - bool traverseAtomAST(const ASTPtr & node, Block & block_with_constants, RPNElement & out); + bool extractAtomFromTree(const RPNBuilderTreeNode & node, RPNElement & out); - bool traverseFunction(const ASTPtr & node, Block & block_with_constants, RPNElement & out, const ASTPtr & parent); + bool traverseFunction(const RPNBuilderTreeNode & node, RPNElement & out, const RPNBuilderTreeNode * parent); - bool traverseASTIn( + bool traverseTreeIn( const String & function_name, - const ASTPtr & key_ast, - const SetPtr & prepared_set, - RPNElement & out); - - bool traverseASTIn( - const String & function_name, - const ASTPtr & key_ast, - const SetPtr & prepared_set, + const RPNBuilderTreeNode & key_node, + const ConstSetPtr & prepared_set, const DataTypePtr & type, const ColumnPtr & column, RPNElement & out); - bool traverseASTEquals( + bool traverseTreeEquals( const String & function_name, - const ASTPtr & key_ast, + const RPNBuilderTreeNode & key_node, const DataTypePtr & value_type, const Field & value_field, RPNElement & out, - const ASTPtr & parent); + const RPNBuilderTreeNode * parent); }; } diff --git a/src/Storages/MergeTree/MergeTreeIndexFullText.cpp b/src/Storages/MergeTree/MergeTreeIndexFullText.cpp index ff924290783..b96d40f5759 100644 --- a/src/Storages/MergeTree/MergeTreeIndexFullText.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexFullText.cpp @@ -11,9 +11,11 @@ #include #include #include +#include #include #include #include +#include #include #include @@ -148,13 +150,22 @@ MergeTreeConditionFullText::MergeTreeConditionFullText( , token_extractor(token_extactor_) , prepared_sets(query_info.prepared_sets) { - rpn = std::move( - RPNBuilder( - query_info, context, - [this] (const ASTPtr & node, ContextPtr /* context */, Block & block_with_constants, RPNElement & out) -> bool - { - return this->traverseAtomAST(node, block_with_constants, out); - }).extractRPN()); + ASTPtr filter_node = buildFilterNode(query_info.query); + + if (!filter_node) + { + rpn.push_back(RPNElement::FUNCTION_UNKNOWN); + return; + } + + auto block_with_constants = KeyCondition::getBlockWithConstants(query_info.query, query_info.syntax_analyzer_result, context); + RPNBuilder builder( + filter_node, + context, + std::move(block_with_constants), + query_info.prepared_sets, + [&](const RPNBuilderTreeNode & node, RPNElement & out) { return extractAtomFromTree(node, out); }); + rpn = std::move(builder).extractRPN(); } bool MergeTreeConditionFullText::alwaysUnknownOrTrue() const @@ -306,13 +317,13 @@ bool MergeTreeConditionFullText::getKey(const std::string & key_column_name, siz return true; } -bool MergeTreeConditionFullText::traverseAtomAST(const ASTPtr & node, Block & block_with_constants, RPNElement & out) +bool MergeTreeConditionFullText::extractAtomFromTree(const RPNBuilderTreeNode & node, RPNElement & out) { { Field const_value; DataTypePtr const_type; - if (KeyCondition::getConstant(node, block_with_constants, const_value, const_type)) + if (node.tryGetConstant(const_value, const_type)) { /// Check constant like in KeyCondition if (const_value.getType() == Field::Types::UInt64 @@ -329,53 +340,56 @@ bool MergeTreeConditionFullText::traverseAtomAST(const ASTPtr & node, Block & bl } } - if (const auto * function = node->as()) + if (node.isFunction()) { - if (!function->arguments) + auto function_node = node.toFunctionNode(); + auto function_name = function_node.getFunctionName(); + + size_t arguments_size = function_node.getArgumentsSize(); + if (arguments_size != 2) return false; - const ASTs & arguments = function->arguments->children; + auto left_argument = function_node.getArgumentAt(0); + auto right_argument = function_node.getArgumentAt(1); - if (arguments.size() != 2) - return false; - - if (functionIsInOrGlobalInOperator(function->name)) + if (functionIsInOrGlobalInOperator(function_name)) { - if (tryPrepareSetBloomFilter(arguments, out)) + if (tryPrepareSetBloomFilter(left_argument, right_argument, out)) { - if (function->name == "notIn") + if (function_name == "notIn") { out.function = RPNElement::FUNCTION_NOT_IN; return true; } - else if (function->name == "in") + else if (function_name == "in") { out.function = RPNElement::FUNCTION_IN; return true; } } } - else if (function->name == "equals" || - function->name == "notEquals" || - function->name == "has" || - function->name == "mapContains" || - function->name == "like" || - function->name == "notLike" || - function->name == "hasToken" || - function->name == "startsWith" || - function->name == "endsWith" || - function->name == "multiSearchAny") + else if (function_name == "equals" || + function_name == "notEquals" || + function_name == "has" || + function_name == "mapContains" || + function_name == "like" || + function_name == "notLike" || + function_name == "hasToken" || + function_name == "startsWith" || + function_name == "endsWith" || + function_name == "multiSearchAny") { Field const_value; DataTypePtr const_type; - if (KeyCondition::getConstant(arguments[1], block_with_constants, const_value, const_type)) + + if (right_argument.tryGetConstant(const_value, const_type)) { - if (traverseASTEquals(function->name, arguments[0], const_type, const_value, out)) + if (traverseTreeEquals(function_name, left_argument, const_type, const_value, out)) return true; } - else if (KeyCondition::getConstant(arguments[0], block_with_constants, const_value, const_type) && (function->name == "equals" || function->name == "notEquals")) + else if (left_argument.tryGetConstant(const_value, const_type) && (function_name == "equals" || function_name == "notEquals")) { - if (traverseASTEquals(function->name, arguments[1], const_type, const_value, out)) + if (traverseTreeEquals(function_name, right_argument, const_type, const_value, out)) return true; } } @@ -384,9 +398,9 @@ bool MergeTreeConditionFullText::traverseAtomAST(const ASTPtr & node, Block & bl return false; } -bool MergeTreeConditionFullText::traverseASTEquals( +bool MergeTreeConditionFullText::traverseTreeEquals( const String & function_name, - const ASTPtr & key_ast, + const RPNBuilderTreeNode & key_node, const DataTypePtr & value_type, const Field & value_field, RPNElement & out) @@ -397,13 +411,17 @@ bool MergeTreeConditionFullText::traverseASTEquals( Field const_value = value_field; + auto column_name = key_node.getColumnName(); size_t key_column_num = 0; - bool key_exists = getKey(key_ast->getColumnName(), key_column_num); - bool map_key_exists = getKey(fmt::format("mapKeys({})", key_ast->getColumnName()), key_column_num); + bool key_exists = getKey(column_name, key_column_num); + bool map_key_exists = getKey(fmt::format("mapKeys({})", column_name), key_column_num); - if (const auto * function = key_ast->as()) + if (key_node.isFunction()) { - if (function->name == "arrayElement") + auto key_function_node = key_node.toFunctionNode(); + auto key_function_node_function_name = key_function_node.getFunctionName(); + + if (key_function_node_function_name == "arrayElement") { /** Try to parse arrayElement for mapKeys index. * It is important to ignore keys like column_map['Key'] = '' because if key does not exists in map @@ -415,11 +433,8 @@ bool MergeTreeConditionFullText::traverseASTEquals( if (value_field == value_type->getDefault()) return false; - const auto * column_ast_identifier = function->arguments.get()->children[0].get()->as(); - if (!column_ast_identifier) - return false; - - const auto & map_column_name = column_ast_identifier->name(); + auto first_argument = key_function_node.getArgumentAt(0); + const auto map_column_name = first_argument.getColumnName(); size_t map_keys_key_column_num = 0; auto map_keys_index_column_name = fmt::format("mapKeys({})", map_column_name); @@ -431,12 +446,11 @@ bool MergeTreeConditionFullText::traverseASTEquals( if (map_keys_exists) { - auto & argument = function->arguments.get()->children[1]; + auto second_argument = key_function_node.getArgumentAt(1); + DataTypePtr const_type; - if (const auto * literal = argument->as()) + if (second_argument.tryGetConstant(const_value, const_type)) { - auto element_key = literal->value; - const_value = element_key; key_column_num = map_keys_key_column_num; key_exists = true; } @@ -567,23 +581,24 @@ bool MergeTreeConditionFullText::traverseASTEquals( } bool MergeTreeConditionFullText::tryPrepareSetBloomFilter( - const ASTs & args, + const RPNBuilderTreeNode & left_argument, + const RPNBuilderTreeNode & right_argument, RPNElement & out) { - const ASTPtr & left_arg = args[0]; - const ASTPtr & right_arg = args[1]; - std::vector key_tuple_mapping; DataTypes data_types; - const auto * left_arg_tuple = typeid_cast(left_arg.get()); - if (left_arg_tuple && left_arg_tuple->name == "tuple") + auto left_argument_function_node_optional = left_argument.toFunctionNodeOrNull(); + + if (left_argument_function_node_optional && left_argument_function_node_optional->getFunctionName() == "tuple") { - const auto & tuple_elements = left_arg_tuple->arguments->children; - for (size_t i = 0; i < tuple_elements.size(); ++i) + const auto & left_argument_function_node = *left_argument_function_node_optional; + size_t left_argument_function_node_arguments_size = left_argument_function_node.getArgumentsSize(); + + for (size_t i = 0; i < left_argument_function_node_arguments_size; ++i) { size_t key = 0; - if (getKey(tuple_elements[i]->getColumnName(), key)) + if (getKey(left_argument_function_node.getArgumentAt(i).getColumnName(), key)) { key_tuple_mapping.emplace_back(i, key); data_types.push_back(index_data_types[key]); @@ -593,7 +608,7 @@ bool MergeTreeConditionFullText::tryPrepareSetBloomFilter( else { size_t key = 0; - if (getKey(left_arg->getColumnName(), key)) + if (getKey(left_argument.getColumnName(), key)) { key_tuple_mapping.emplace_back(0, key); data_types.push_back(index_data_types[key]); @@ -603,19 +618,10 @@ bool MergeTreeConditionFullText::tryPrepareSetBloomFilter( if (key_tuple_mapping.empty()) return false; - PreparedSetKey set_key; - if (typeid_cast(right_arg.get()) || typeid_cast(right_arg.get())) - set_key = PreparedSetKey::forSubquery(*right_arg); - else - set_key = PreparedSetKey::forLiteral(*right_arg, data_types); - - auto prepared_set = prepared_sets->get(set_key); + auto prepared_set = right_argument.tryGetPreparedSet(data_types); if (!prepared_set) return false; - if (!prepared_set->hasExplicitSetElements()) - return false; - for (const auto & data_type : prepared_set->getDataTypes()) if (data_type->getTypeId() != TypeIndex::String && data_type->getTypeId() != TypeIndex::FixedString) return false; diff --git a/src/Storages/MergeTree/MergeTreeIndexFullText.h b/src/Storages/MergeTree/MergeTreeIndexFullText.h index bb4f52a463e..ad487816aef 100644 --- a/src/Storages/MergeTree/MergeTreeIndexFullText.h +++ b/src/Storages/MergeTree/MergeTreeIndexFullText.h @@ -122,17 +122,17 @@ private: using RPN = std::vector; - bool traverseAtomAST(const ASTPtr & node, Block & block_with_constants, RPNElement & out); + bool extractAtomFromTree(const RPNBuilderTreeNode & node, RPNElement & out); - bool traverseASTEquals( + bool traverseTreeEquals( const String & function_name, - const ASTPtr & key_ast, + const RPNBuilderTreeNode & key_node, const DataTypePtr & value_type, const Field & value_field, RPNElement & out); bool getKey(const std::string & key_column_name, size_t & key_column_num); - bool tryPrepareSetBloomFilter(const ASTs & args, RPNElement & out); + bool tryPrepareSetBloomFilter(const RPNBuilderTreeNode & left_argument, const RPNBuilderTreeNode & right_argument, RPNElement & out); static bool createFunctionEqualsCondition( RPNElement & out, const Field & value, const BloomFilterParameters & params, TokenExtractorPtr token_extractor); diff --git a/src/Storages/MergeTree/MergeTreeIndexGranularityInfo.cpp b/src/Storages/MergeTree/MergeTreeIndexGranularityInfo.cpp index 9c154f786f7..11e1f9efcc2 100644 --- a/src/Storages/MergeTree/MergeTreeIndexGranularityInfo.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexGranularityInfo.cpp @@ -89,10 +89,10 @@ std::string MarkType::getFileExtension() const } -std::optional MergeTreeIndexGranularityInfo::getMarksExtensionFromFilesystem(const DataPartStoragePtr & data_part_storage) +std::optional MergeTreeIndexGranularityInfo::getMarksExtensionFromFilesystem(const IDataPartStorage & data_part_storage) { - if (data_part_storage->exists()) - for (auto it = data_part_storage->iterate(); it->isValid(); it->next()) + if (data_part_storage.exists()) + for (auto it = data_part_storage.iterate(); it->isValid(); it->next()) if (it->isFile()) if (std::string ext = fs::path(it->name()).extension(); MarkType::isMarkFileExtension(ext)) return ext; @@ -110,7 +110,7 @@ MergeTreeIndexGranularityInfo::MergeTreeIndexGranularityInfo(const MergeTreeData fixed_index_granularity = storage.getSettings()->index_granularity; } -void MergeTreeIndexGranularityInfo::changeGranularityIfRequired(const DataPartStoragePtr & data_part_storage) +void MergeTreeIndexGranularityInfo::changeGranularityIfRequired(const IDataPartStorage & data_part_storage) { auto mrk_ext = getMarksExtensionFromFilesystem(data_part_storage); if (mrk_ext && !MarkType(*mrk_ext).adaptive) diff --git a/src/Storages/MergeTree/MergeTreeIndexGranularityInfo.h b/src/Storages/MergeTree/MergeTreeIndexGranularityInfo.h index 883fe3c899e..aed3081d3d0 100644 --- a/src/Storages/MergeTree/MergeTreeIndexGranularityInfo.h +++ b/src/Storages/MergeTree/MergeTreeIndexGranularityInfo.h @@ -48,7 +48,7 @@ public: MergeTreeIndexGranularityInfo(MergeTreeDataPartType type_, bool is_adaptive_, size_t index_granularity_, size_t index_granularity_bytes_); - void changeGranularityIfRequired(const DataPartStoragePtr & data_part_storage); + void changeGranularityIfRequired(const IDataPartStorage & data_part_storage); String getMarksFilePath(const String & path_prefix) const { @@ -57,7 +57,7 @@ public: size_t getMarkSizeInBytes(size_t columns_num = 1) const; - static std::optional getMarksExtensionFromFilesystem(const DataPartStoragePtr & data_part_storage); + static std::optional getMarksExtensionFromFilesystem(const IDataPartStorage & data_part_storage); }; constexpr inline auto getNonAdaptiveMrkSizeWide() { return sizeof(UInt64) * 2; } diff --git a/src/Storages/MergeTree/MergeTreeIndexMinMax.cpp b/src/Storages/MergeTree/MergeTreeIndexMinMax.cpp index b190ac2b2fd..43e655a4ee5 100644 --- a/src/Storages/MergeTree/MergeTreeIndexMinMax.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexMinMax.cpp @@ -211,11 +211,11 @@ bool MergeTreeIndexMinMax::mayBenefitFromIndexForIn(const ASTPtr & node) const return false; } -MergeTreeIndexFormat MergeTreeIndexMinMax::getDeserializedFormat(const DataPartStoragePtr & data_part_storage, const std::string & relative_path_prefix) const +MergeTreeIndexFormat MergeTreeIndexMinMax::getDeserializedFormat(const IDataPartStorage & data_part_storage, const std::string & relative_path_prefix) const { - if (data_part_storage->exists(relative_path_prefix + ".idx2")) + if (data_part_storage.exists(relative_path_prefix + ".idx2")) return {2, ".idx2"}; - else if (data_part_storage->exists(relative_path_prefix + ".idx")) + else if (data_part_storage.exists(relative_path_prefix + ".idx")) return {1, ".idx"}; return {0 /* unknown */, ""}; } diff --git a/src/Storages/MergeTree/MergeTreeIndexMinMax.h b/src/Storages/MergeTree/MergeTreeIndexMinMax.h index 0566a15d535..af420613855 100644 --- a/src/Storages/MergeTree/MergeTreeIndexMinMax.h +++ b/src/Storages/MergeTree/MergeTreeIndexMinMax.h @@ -83,7 +83,7 @@ public: bool mayBenefitFromIndexForIn(const ASTPtr & node) const override; const char* getSerializedFileExtension() const override { return ".idx2"; } - MergeTreeIndexFormat getDeserializedFormat(const DataPartStoragePtr & data_part_storage, const std::string & path_prefix) const override; /// NOLINT + MergeTreeIndexFormat getDeserializedFormat(const IDataPartStorage & data_part_storage, const std::string & path_prefix) const override; /// NOLINT }; } diff --git a/src/Storages/MergeTree/MergeTreeIndexReader.cpp b/src/Storages/MergeTree/MergeTreeIndexReader.cpp index 33106f7ab64..7d7024a8ac2 100644 --- a/src/Storages/MergeTree/MergeTreeIndexReader.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexReader.cpp @@ -20,7 +20,7 @@ std::unique_ptr makeIndexReader( auto * load_marks_threadpool = settings.read_settings.load_marks_asynchronously ? &context->getLoadMarksThreadpool() : nullptr; return std::make_unique( - part->data_part_storage, + part->getDataPartStoragePtr(), index->getFileName(), extension, marks_count, all_mark_ranges, std::move(settings), mark_cache, uncompressed_cache, @@ -44,7 +44,7 @@ MergeTreeIndexReader::MergeTreeIndexReader( MergeTreeReaderSettings settings) : index(index_) { - auto index_format = index->getDeserializedFormat(part_->data_part_storage, index->getFileName()); + auto index_format = index->getDeserializedFormat(part_->getDataPartStorage(), index->getFileName()); stream = makeIndexReader( index_format.extension, diff --git a/src/Storages/MergeTree/MergeTreeIndexSet.cpp b/src/Storages/MergeTree/MergeTreeIndexSet.cpp index 3c31deda823..0e15f2c4cb6 100644 --- a/src/Storages/MergeTree/MergeTreeIndexSet.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexSet.cpp @@ -74,8 +74,9 @@ void MergeTreeIndexGranuleSet::serializeBinary(WriteBuffer & ostr) const auto serialization = type->getDefaultSerialization(); ISerialization::SerializeBinaryBulkStatePtr state; - serialization->serializeBinaryBulkStatePrefix(settings, state); - serialization->serializeBinaryBulkWithMultipleStreams(*block.getByPosition(i).column, 0, size(), settings, state); + const auto & column = *block.getByPosition(i).column; + serialization->serializeBinaryBulkStatePrefix(column, settings, state); + serialization->serializeBinaryBulkWithMultipleStreams(column, 0, size(), settings, state); serialization->serializeBinaryBulkStateSuffix(settings, state); } } diff --git a/src/Storages/MergeTree/MergeTreeIndexUtils.cpp b/src/Storages/MergeTree/MergeTreeIndexUtils.cpp new file mode 100644 index 00000000000..652f0c853d4 --- /dev/null +++ b/src/Storages/MergeTree/MergeTreeIndexUtils.cpp @@ -0,0 +1,47 @@ +#include + +#include +#include +#include + +namespace DB +{ + +ASTPtr buildFilterNode(const ASTPtr & select_query, ASTs additional_filters) +{ + auto & select_query_typed = select_query->as(); + + ASTs filters; + if (select_query_typed.where()) + filters.push_back(select_query_typed.where()); + + if (select_query_typed.prewhere()) + filters.push_back(select_query_typed.prewhere()); + + filters.insert(filters.end(), additional_filters.begin(), additional_filters.end()); + + if (filters.empty()) + return nullptr; + + ASTPtr filter_node; + + if (filters.size() == 1) + { + filter_node = filters.front(); + } + else + { + auto function = std::make_shared(); + + function->name = "and"; + function->arguments = std::make_shared(); + function->children.push_back(function->arguments); + function->arguments->children = std::move(filters); + + filter_node = std::move(function); + } + + return filter_node; +} + +} diff --git a/src/Storages/MergeTree/MergeTreeIndexUtils.h b/src/Storages/MergeTree/MergeTreeIndexUtils.h new file mode 100644 index 00000000000..6ba9725b564 --- /dev/null +++ b/src/Storages/MergeTree/MergeTreeIndexUtils.h @@ -0,0 +1,13 @@ +#pragma once + +#include + +namespace DB +{ + +/** Build AST filter node for index analysis from WHERE and PREWHERE sections of select query and additional filters. + * If select query does not have WHERE and PREWHERE and additional filters are empty null is returned. + */ +ASTPtr buildFilterNode(const ASTPtr & select_query, ASTs additional_filters = {}); + +} diff --git a/src/Storages/MergeTree/MergeTreeIndices.h b/src/Storages/MergeTree/MergeTreeIndices.h index 14002534c94..6a671c31944 100644 --- a/src/Storages/MergeTree/MergeTreeIndices.h +++ b/src/Storages/MergeTree/MergeTreeIndices.h @@ -148,9 +148,9 @@ struct IMergeTreeIndex /// Returns extension for deserialization. /// /// Return pair. - virtual MergeTreeIndexFormat getDeserializedFormat(const DataPartStoragePtr & data_part_storage, const std::string & relative_path_prefix) const + virtual MergeTreeIndexFormat getDeserializedFormat(const IDataPartStorage & data_part_storage, const std::string & relative_path_prefix) const { - if (data_part_storage->exists(relative_path_prefix + ".idx")) + if (data_part_storage.exists(relative_path_prefix + ".idx")) return {1, ".idx"}; return {0 /*unknown*/, ""}; } diff --git a/src/Storages/MergeTree/MergeTreePartition.cpp b/src/Storages/MergeTree/MergeTreePartition.cpp index 4ea6ec11ecc..10f5cc95baf 100644 --- a/src/Storages/MergeTree/MergeTreePartition.cpp +++ b/src/Storages/MergeTree/MergeTreePartition.cpp @@ -382,20 +382,20 @@ void MergeTreePartition::load(const MergeTreeData & storage, const PartMetadataM partition_key_sample.getByPosition(i).type->getDefaultSerialization()->deserializeBinary(value[i], *file); } -std::unique_ptr MergeTreePartition::store(const MergeTreeData & storage, const DataPartStorageBuilderPtr & data_part_storage_builder, MergeTreeDataPartChecksums & checksums) const +std::unique_ptr MergeTreePartition::store(const MergeTreeData & storage, IDataPartStorage & data_part_storage, MergeTreeDataPartChecksums & checksums) const { auto metadata_snapshot = storage.getInMemoryMetadataPtr(); const auto & context = storage.getContext(); const auto & partition_key_sample = adjustPartitionKey(metadata_snapshot, storage.getContext()).sample_block; - return store(partition_key_sample, data_part_storage_builder, checksums, context->getWriteSettings()); + return store(partition_key_sample, data_part_storage, checksums, context->getWriteSettings()); } -std::unique_ptr MergeTreePartition::store(const Block & partition_key_sample, const DataPartStorageBuilderPtr & data_part_storage_builder, MergeTreeDataPartChecksums & checksums, const WriteSettings & settings) const +std::unique_ptr MergeTreePartition::store(const Block & partition_key_sample, IDataPartStorage & data_part_storage, MergeTreeDataPartChecksums & checksums, const WriteSettings & settings) const { if (!partition_key_sample) return nullptr; - auto out = data_part_storage_builder->writeFile("partition.dat", DBMS_DEFAULT_BUFFER_SIZE, settings); + auto out = data_part_storage.writeFile("partition.dat", DBMS_DEFAULT_BUFFER_SIZE, settings); HashingWriteBuffer out_hashing(*out); for (size_t i = 0; i < value.size(); ++i) { diff --git a/src/Storages/MergeTree/MergeTreePartition.h b/src/Storages/MergeTree/MergeTreePartition.h index 6394641dfa3..78b141f26ec 100644 --- a/src/Storages/MergeTree/MergeTreePartition.h +++ b/src/Storages/MergeTree/MergeTreePartition.h @@ -15,10 +15,10 @@ class MergeTreeData; struct FormatSettings; struct MergeTreeDataPartChecksums; struct StorageInMemoryMetadata; -class IDataPartStorageBuilder; +class IDataPartStorage; using StorageMetadataPtr = std::shared_ptr; -using DataPartStorageBuilderPtr = std::shared_ptr; +using MutableDataPartStoragePtr = std::shared_ptr; /// This class represents a partition value of a single part and encapsulates its loading/storing logic. struct MergeTreePartition @@ -44,8 +44,8 @@ public: /// Store functions return write buffer with written but not finalized data. /// User must call finish() for returned object. - [[nodiscard]] std::unique_ptr store(const MergeTreeData & storage, const DataPartStorageBuilderPtr & data_part_storage_builder, MergeTreeDataPartChecksums & checksums) const; - [[nodiscard]] std::unique_ptr store(const Block & partition_key_sample, const DataPartStorageBuilderPtr & data_part_storage_builder, MergeTreeDataPartChecksums & checksums, const WriteSettings & settings) const; + [[nodiscard]] std::unique_ptr store(const MergeTreeData & storage, IDataPartStorage & data_part_storage, MergeTreeDataPartChecksums & checksums) const; + [[nodiscard]] std::unique_ptr store(const Block & partition_key_sample, IDataPartStorage & data_part_storage, MergeTreeDataPartChecksums & checksums, const WriteSettings & settings) const; void assign(const MergeTreePartition & other) { value = other.value; } diff --git a/src/Storages/MergeTree/MergeTreePartsMover.cpp b/src/Storages/MergeTree/MergeTreePartsMover.cpp index afeeacbe5d6..b618b068769 100644 --- a/src/Storages/MergeTree/MergeTreePartsMover.cpp +++ b/src/Storages/MergeTree/MergeTreePartsMover.cpp @@ -100,7 +100,6 @@ bool MergeTreePartsMover::selectPartsForMove( return false; std::unordered_map need_to_move; - std::unordered_set need_to_move_disks; const auto policy = data->getStoragePolicy(); const auto & volumes = policy->getVolumes(); @@ -115,10 +114,7 @@ bool MergeTreePartsMover::selectPartsForMove( UInt64 unreserved_space = disk->getUnreservedSpace(); if (unreserved_space < required_maximum_available_space && !disk->isBroken()) - { need_to_move.emplace(disk, required_maximum_available_space - unreserved_space); - need_to_move_disks.emplace(disk); - } } } } @@ -140,8 +136,16 @@ bool MergeTreePartsMover::selectPartsForMove( auto ttl_entry = selectTTLDescriptionForTTLInfos(metadata_snapshot->getMoveTTLs(), part->ttl_infos.moves_ttl, time_of_move, true); auto to_insert = need_to_move.end(); - if (auto disk_it = part->data_part_storage->isStoredOnDisk(need_to_move_disks); disk_it != need_to_move_disks.end()) - to_insert = need_to_move.find(*disk_it); + auto part_disk_name = part->getDataPartStorage().getDiskName(); + + for (auto it = need_to_move.begin(); it != need_to_move.end(); ++it) + { + if (it->first->getName() == part_disk_name) + { + to_insert = it; + break; + } + } ReservationPtr reservation; if (ttl_entry) @@ -158,9 +162,8 @@ bool MergeTreePartsMover::selectPartsForMove( /// In order to not over-move, we need to "release" required space on this disk, /// possibly to zero. if (to_insert != need_to_move.end()) - { to_insert->second.decreaseRequiredSizeAndRemoveRedundantParts(part->getBytesOnDisk()); - } + ++parts_to_move_by_ttl_rules; parts_to_move_total_size_bytes += part->getBytesOnDisk(); } @@ -173,7 +176,7 @@ bool MergeTreePartsMover::selectPartsForMove( for (auto && move : need_to_move) { - auto min_volume_index = policy->getVolumeIndexByDisk(move.first) + 1; + auto min_volume_index = policy->getVolumeIndexByDiskName(move.first->getName()) + 1; for (auto && part : move.second.getAccumulatedParts()) { auto reservation = policy->reserve(part->getBytesOnDisk(), min_volume_index); @@ -199,7 +202,7 @@ bool MergeTreePartsMover::selectPartsForMove( return false; } -MergeTreeData::DataPartPtr MergeTreePartsMover::clonePart(const MergeTreeMoveEntry & moving_part) const +MergeTreeMutableDataPartPtr MergeTreePartsMover::clonePart(const MergeTreeMoveEntry & moving_part) const { if (moves_blocker.isCancelled()) throw Exception("Cancelled moving parts.", ErrorCodes::ABORTED); @@ -207,16 +210,15 @@ MergeTreeData::DataPartPtr MergeTreePartsMover::clonePart(const MergeTreeMoveEnt auto settings = data->getSettings(); auto part = moving_part.part; auto disk = moving_part.reserved_space->getDisk(); - LOG_DEBUG(log, "Cloning part {} from '{}' to '{}'", part->name, part->data_part_storage->getDiskName(), disk->getName()); - - DataPartStoragePtr cloned_part_storage; + LOG_DEBUG(log, "Cloning part {} from '{}' to '{}'", part->name, part->getDataPartStorage().getDiskName(), disk->getName()); + MutableDataPartStoragePtr cloned_part_storage; if (disk->supportZeroCopyReplication() && settings->allow_remote_fs_zero_copy_replication) { /// Try zero-copy replication and fallback to default copy if it's not possible moving_part.part->assertOnDisk(); String path_to_clone = fs::path(data->getRelativeDataPath()) / MergeTreeData::MOVING_DIR_NAME / ""; - String relative_path = part->data_part_storage->getPartDirectory(); + String relative_path = part->getDataPartStorage().getPartDirectory(); if (disk->exists(path_to_clone + relative_path)) { LOG_WARNING(log, "Path {} already exists. Will remove it and clone again.", fullPath(disk, path_to_clone + relative_path)); @@ -230,7 +232,7 @@ MergeTreeData::DataPartPtr MergeTreePartsMover::clonePart(const MergeTreeMoveEnt if (!cloned_part_storage) { LOG_INFO(log, "Part {} was not fetched, we are the first who move it to another disk, so we will copy it", part->name); - cloned_part_storage = part->data_part_storage->clone(path_to_clone, part->data_part_storage->getPartDirectory(), disk, log); + cloned_part_storage = part->getDataPartStorage().clonePart(path_to_clone, part->getDataPartStorage().getPartDirectory(), disk, log); } } else @@ -238,18 +240,17 @@ MergeTreeData::DataPartPtr MergeTreePartsMover::clonePart(const MergeTreeMoveEnt cloned_part_storage = part->makeCloneOnDisk(disk, MergeTreeData::MOVING_DIR_NAME); } - MergeTreeData::MutableDataPartPtr cloned_part = data->createPart(part->name, cloned_part_storage); - LOG_TRACE(log, "Part {} was cloned to {}", part->name, cloned_part->data_part_storage->getFullPath()); + auto cloned_part = data->createPart(part->name, cloned_part_storage); + LOG_TRACE(log, "Part {} was cloned to {}", part->name, cloned_part->getDataPartStorage().getFullPath()); cloned_part->loadColumnsChecksumsIndexes(true, true); cloned_part->loadVersionMetadata(); - cloned_part->modification_time = cloned_part->data_part_storage->getLastModified().epochTime(); + cloned_part->modification_time = cloned_part->getDataPartStorage().getLastModified().epochTime(); return cloned_part; - } -void MergeTreePartsMover::swapClonedPart(const MergeTreeData::DataPartPtr & cloned_part) const +void MergeTreePartsMover::swapClonedPart(const MergeTreeMutableDataPartPtr & cloned_part) const { if (moves_blocker.isCancelled()) throw Exception("Cancelled moving parts.", ErrorCodes::ABORTED); @@ -259,20 +260,17 @@ void MergeTreePartsMover::swapClonedPart(const MergeTreeData::DataPartPtr & clon /// It's ok, because we don't block moving parts for merges or mutations if (!active_part || active_part->name != cloned_part->name) { - LOG_INFO(log, "Failed to swap {}. Active part doesn't exist. Possible it was merged or mutated. Will remove copy on path '{}'.", cloned_part->name, cloned_part->data_part_storage->getFullPath()); + LOG_INFO(log, "Failed to swap {}. Active part doesn't exist. Possible it was merged or mutated. Will remove copy on path '{}'.", cloned_part->name, cloned_part->getDataPartStorage().getFullPath()); return; } - auto builder = cloned_part->data_part_storage->getBuilder(); /// Don't remove new directory but throw an error because it may contain part which is currently in use. - cloned_part->renameTo(active_part->name, false, builder); - - builder->commit(); + cloned_part->renameTo(active_part->name, false); /// TODO what happen if server goes down here? data->swapActivePart(cloned_part); - LOG_TRACE(log, "Part {} was moved to {}", cloned_part->name, cloned_part->data_part_storage->getFullPath()); + LOG_TRACE(log, "Part {} was moved to {}", cloned_part->name, cloned_part->getDataPartStorage().getFullPath()); } } diff --git a/src/Storages/MergeTree/MergeTreePartsMover.h b/src/Storages/MergeTree/MergeTreePartsMover.h index 6ad658c2cb3..0266b2daa46 100644 --- a/src/Storages/MergeTree/MergeTreePartsMover.h +++ b/src/Storages/MergeTree/MergeTreePartsMover.h @@ -50,14 +50,14 @@ public: const std::lock_guard & moving_parts_lock); /// Copies part to selected reservation in detached folder. Throws exception if part already exists. - MergeTreeDataPartPtr clonePart(const MergeTreeMoveEntry & moving_part) const; + MergeTreeMutableDataPartPtr clonePart(const MergeTreeMoveEntry & moving_part) const; /// Replaces cloned part from detached directory into active data parts set. /// Replacing part changes state to DeleteOnDestroy and will be removed from disk after destructor of ///IMergeTreeDataPart called. If replacing part doesn't exists or not active (committed) than /// cloned part will be removed and log message will be reported. It may happen in case of concurrent /// merge or mutation. - void swapClonedPart(const MergeTreeDataPartPtr & cloned_parts) const; + void swapClonedPart(const MergeTreeMutableDataPartPtr & cloned_parts) const; /// Can stop background moves and moves from queries ActionBlocker moves_blocker; diff --git a/src/Storages/MergeTree/MergeTreeReadPool.cpp b/src/Storages/MergeTree/MergeTreeReadPool.cpp index 3f51673a6b1..ca9cde0ae61 100644 --- a/src/Storages/MergeTree/MergeTreeReadPool.cpp +++ b/src/Storages/MergeTree/MergeTreeReadPool.cpp @@ -263,7 +263,7 @@ void MergeTreeReadPool::fillPerThreadInfo( { PartInfo part_info{parts[i], per_part_sum_marks[i], i}; if (parts[i].data_part->isStoredOnDisk()) - parts_per_disk[parts[i].data_part->data_part_storage->getDiskName()].push_back(std::move(part_info)); + parts_per_disk[parts[i].data_part->getDataPartStorage().getDiskName()].push_back(std::move(part_info)); else parts_per_disk[""].push_back(std::move(part_info)); } diff --git a/src/Storages/MergeTree/MergeTreeReaderCompact.cpp b/src/Storages/MergeTree/MergeTreeReaderCompact.cpp index 4801c9a4058..b0488d29f8e 100644 --- a/src/Storages/MergeTree/MergeTreeReaderCompact.cpp +++ b/src/Storages/MergeTree/MergeTreeReaderCompact.cpp @@ -59,13 +59,15 @@ MergeTreeReaderCompact::MergeTreeReaderCompact( throw Exception(ErrorCodes::CANNOT_READ_ALL_DATA, "Cannot read to empty buffer."); const String path = MergeTreeDataPartCompact::DATA_FILE_NAME_WITH_EXTENSION; + auto data_part_storage = data_part_info_for_read->getDataPartStorage(); + if (uncompressed_cache) { auto buffer = std::make_unique( - std::string(fs::path(data_part_info_for_read->getDataPartStorage()->getFullPath()) / path), - [this, path]() + std::string(fs::path(data_part_storage->getFullPath()) / path), + [this, path, data_part_storage]() { - return data_part_info_for_read->getDataPartStorage()->readFile( + return data_part_storage->readFile( path, settings.read_settings, std::nullopt, std::nullopt); @@ -87,7 +89,7 @@ MergeTreeReaderCompact::MergeTreeReaderCompact( { auto buffer = std::make_unique( - data_part_info_for_read->getDataPartStorage()->readFile( + data_part_storage->readFile( path, settings.read_settings, std::nullopt, std::nullopt), diff --git a/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp b/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp index 59cbae3f914..2490eb77772 100644 --- a/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp +++ b/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp @@ -38,14 +38,6 @@ MergeTreeSelectProcessor::MergeTreeSelectProcessor( has_limit_below_one_block(has_limit_below_one_block_), total_rows(data_part->index_granularity.getRowsCountInRanges(all_mark_ranges)) { - /// Actually it means that parallel reading from replicas enabled - /// and we have to collaborate with initiator. - /// In this case we won't set approximate rows, because it will be accounted multiple times. - /// Also do not count amount of read rows if we read in order of sorting key, - /// because we don't know actual amount of read rows in case when limit is set. - if (!extension_.has_value() && !reader_settings.read_in_order) - addTotalRowsApprox(total_rows); - ordered_names = header_without_virtual_columns.getNames(); } diff --git a/src/Storages/MergeTree/MergeTreeSettings.h b/src/Storages/MergeTree/MergeTreeSettings.h index a0db39a97f1..844c1ddbfe5 100644 --- a/src/Storages/MergeTree/MergeTreeSettings.h +++ b/src/Storages/MergeTree/MergeTreeSettings.h @@ -62,6 +62,8 @@ struct Settings; M(UInt64, merge_tree_clear_old_temporary_directories_interval_seconds, 60, "The period of executing the clear old temporary directories operation in background.", 0) \ M(UInt64, merge_tree_clear_old_parts_interval_seconds, 1, "The period of executing the clear old parts operation in background.", 0) \ M(UInt64, merge_tree_clear_old_broken_detached_parts_ttl_timeout_seconds, 1ULL * 3600 * 24 * 30, "Remove old broken detached parts in the background if they remained intouched for a specified by this setting period of time.", 0) \ + M(UInt64, min_age_to_force_merge_seconds, 0, "If all parts in a certain range are older than this value, range will be always eligible for merging. Set to 0 to disable.", 0) \ + M(Bool, min_age_to_force_merge_on_partition_only, false, "Whether min_age_to_force_merge_seconds should be applied only on the entire partition and not on subset.", false) \ M(UInt64, merge_tree_enable_clear_old_broken_detached, false, "Enable clearing old broken detached parts operation in background.", 0) \ M(Bool, remove_rolled_back_parts_immediately, 1, "Setting for an incomplete experimental feature.", 0) \ \ diff --git a/src/Storages/MergeTree/MergeTreeSink.cpp b/src/Storages/MergeTree/MergeTreeSink.cpp index 5d00db861a8..13a72c24c59 100644 --- a/src/Storages/MergeTree/MergeTreeSink.cpp +++ b/src/Storages/MergeTree/MergeTreeSink.cpp @@ -1,8 +1,8 @@ #include #include #include -#include #include +#include namespace ProfileEvents { @@ -56,8 +56,9 @@ struct MergeTreeSink::DelayedChunk void MergeTreeSink::consume(Chunk chunk) { auto block = getHeader().cloneWithColumns(chunk.detachColumns()); + if (!storage_snapshot->object_columns.empty()) + convertDynamicColumnsToTuples(block, storage_snapshot); - deduceTypesOfObjectColumns(storage_snapshot, block); auto part_blocks = storage.writer.splitBlockIntoParts(block, max_parts_per_block, metadata_snapshot, context); using DelayedPartitions = std::vector; @@ -81,7 +82,7 @@ void MergeTreeSink::consume(Chunk chunk) if (!temp_part.part) continue; - if (!support_parallel_write && temp_part.part->data_part_storage->supportParallelWrite()) + if (!support_parallel_write && temp_part.part->getDataPartStorage().supportParallelWrite()) support_parallel_write = true; if (storage.getDeduplicationLog()) @@ -160,7 +161,7 @@ void MergeTreeSink::finishDelayedChunk() } } - added = storage.renameTempPartAndAdd(part, transaction, partition.temp_part.builder, lock); + added = storage.renameTempPartAndAdd(part, transaction, lock); transaction.commit(&lock); } diff --git a/src/Storages/MergeTree/MergeTreeWriteAheadLog.cpp b/src/Storages/MergeTree/MergeTreeWriteAheadLog.cpp index 4735eae8fdd..b3625ba8e93 100644 --- a/src/Storages/MergeTree/MergeTreeWriteAheadLog.cpp +++ b/src/Storages/MergeTree/MergeTreeWriteAheadLog.cpp @@ -150,7 +150,6 @@ MergeTreeData::MutableDataPartsVector MergeTreeWriteAheadLog::restore( while (!in->eof()) { MergeTreeData::MutableDataPartPtr part; - DataPartStorageBuilderPtr data_part_storage_builder; UInt8 version; String part_name; Block block; @@ -177,7 +176,6 @@ MergeTreeData::MutableDataPartsVector MergeTreeWriteAheadLog::restore( { auto single_disk_volume = std::make_shared("volume_" + part_name, disk, 0); auto data_part_storage = std::make_shared(single_disk_volume, storage.getRelativeDataPath(), part_name); - data_part_storage_builder = std::make_shared(single_disk_volume, storage.getRelativeDataPath(), part_name); part = storage.createPart( part_name, @@ -222,7 +220,6 @@ MergeTreeData::MutableDataPartsVector MergeTreeWriteAheadLog::restore( { MergedBlockOutputStream part_out( part, - data_part_storage_builder, metadata_snapshot, block.getNamesAndTypesList(), {}, @@ -240,11 +237,12 @@ MergeTreeData::MutableDataPartsVector MergeTreeWriteAheadLog::restore( for (const auto & projection : metadata_snapshot->getProjections()) { auto projection_block = projection.calculate(block, context); - auto temp_part = MergeTreeDataWriter::writeInMemoryProjectionPart(storage, log, projection_block, projection, data_part_storage_builder, part.get()); + auto temp_part = MergeTreeDataWriter::writeProjectionPart(storage, log, projection_block, projection, part.get()); temp_part.finalize(); if (projection_block.rows()) part->addProjectionPart(projection.name, std::move(temp_part.part)); } + part_out.finalizePart(part, false); min_block_number = std::min(min_block_number, part->info.min_block); diff --git a/src/Storages/MergeTree/MergeType.cpp b/src/Storages/MergeTree/MergeType.cpp index 4b03f5ab57c..045114578d0 100644 --- a/src/Storages/MergeTree/MergeType.cpp +++ b/src/Storages/MergeTree/MergeType.cpp @@ -10,7 +10,7 @@ namespace ErrorCodes extern const int NOT_IMPLEMENTED; } -MergeType checkAndGetMergeType(UInt64 merge_type) +MergeType checkAndGetMergeType(UInt32 merge_type) { if (auto maybe_merge_type = magic_enum::enum_cast(merge_type)) return *maybe_merge_type; diff --git a/src/Storages/MergeTree/MergeType.h b/src/Storages/MergeTree/MergeType.h index fad1ba33e3e..ce9a40c5931 100644 --- a/src/Storages/MergeTree/MergeType.h +++ b/src/Storages/MergeTree/MergeType.h @@ -22,7 +22,7 @@ enum class MergeType }; /// Check parsed merge_type from raw int and get enum value. -MergeType checkAndGetMergeType(UInt64 merge_type); +MergeType checkAndGetMergeType(UInt32 merge_type); /// Check this merge assigned with TTL bool isTTLMergeType(MergeType merge_type); diff --git a/src/Storages/MergeTree/MergedBlockOutputStream.cpp b/src/Storages/MergeTree/MergedBlockOutputStream.cpp index 269a78977ad..991a8d359a8 100644 --- a/src/Storages/MergeTree/MergedBlockOutputStream.cpp +++ b/src/Storages/MergeTree/MergedBlockOutputStream.cpp @@ -14,8 +14,7 @@ namespace ErrorCodes MergedBlockOutputStream::MergedBlockOutputStream( - const MergeTreeDataPartPtr & data_part, - DataPartStorageBuilderPtr data_part_storage_builder_, + const MergeTreeMutableDataPartPtr & data_part, const StorageMetadataPtr & metadata_snapshot_, const NamesAndTypesList & columns_list_, const MergeTreeIndices & skip_indices, @@ -24,7 +23,7 @@ MergedBlockOutputStream::MergedBlockOutputStream( bool reset_columns_, bool blocks_are_granules_size, const WriteSettings & write_settings_) - : IMergedBlockOutputStream(std::move(data_part_storage_builder_), data_part, metadata_snapshot_, columns_list_, reset_columns_) + : IMergedBlockOutputStream(data_part, metadata_snapshot_, columns_list_, reset_columns_) , columns_list(columns_list_) , default_codec(default_codec_) , write_settings(write_settings_) @@ -38,7 +37,7 @@ MergedBlockOutputStream::MergedBlockOutputStream( blocks_are_granules_size); if (data_part->isStoredOnDisk()) - data_part_storage_builder->createDirectories(); + data_part_storage->createDirectories(); /// We should write version metadata on part creation to distinguish it from parts that were created without transaction. TransactionID tid = txn ? txn->tid : Tx::PrehistoricTID; @@ -47,7 +46,7 @@ MergedBlockOutputStream::MergedBlockOutputStream( data_part->version.setCreationTID(tid, nullptr); data_part->storeVersionMetadata(); - writer = data_part->getWriter(data_part_storage_builder, columns_list, metadata_snapshot, skip_indices, default_codec, writer_settings, {}); + writer = data_part->getWriter(columns_list, metadata_snapshot, skip_indices, default_codec, writer_settings, {}); } /// If data is pre-sorted. @@ -68,17 +67,17 @@ struct MergedBlockOutputStream::Finalizer::Impl { IMergeTreeDataPartWriter & writer; MergeTreeData::MutableDataPartPtr part; - DataPartStorageBuilderPtr data_part_storage_builder; NameSet files_to_remove_after_finish; std::vector> written_files; bool sync; - Impl(IMergeTreeDataPartWriter & writer_, MergeTreeData::MutableDataPartPtr part_, DataPartStorageBuilderPtr data_part_storage_builder_, const NameSet & files_to_remove_after_finish_, bool sync_) + Impl(IMergeTreeDataPartWriter & writer_, MergeTreeData::MutableDataPartPtr part_, const NameSet & files_to_remove_after_finish_, bool sync_) : writer(writer_) , part(std::move(part_)) - , data_part_storage_builder(std::move(data_part_storage_builder_)) , files_to_remove_after_finish(files_to_remove_after_finish_) - , sync(sync_) {} + , sync(sync_) + { + } void finish(); }; @@ -95,7 +94,7 @@ void MergedBlockOutputStream::Finalizer::Impl::finish() writer.finish(sync); for (const auto & file_name : files_to_remove_after_finish) - data_part_storage_builder->removeFile(file_name); + part->getDataPartStorage().removeFile(file_name); for (auto & file : written_files) { @@ -122,19 +121,19 @@ MergedBlockOutputStream::Finalizer & MergedBlockOutputStream::Finalizer::operato MergedBlockOutputStream::Finalizer::Finalizer(std::unique_ptr impl_) : impl(std::move(impl_)) {} void MergedBlockOutputStream::finalizePart( - MergeTreeData::MutableDataPartPtr & new_part, - bool sync, - const NamesAndTypesList * total_columns_list, - MergeTreeData::DataPart::Checksums * additional_column_checksums) + const MergeTreeMutableDataPartPtr & new_part, + bool sync, + const NamesAndTypesList * total_columns_list, + MergeTreeData::DataPart::Checksums * additional_column_checksums) { finalizePartAsync(new_part, sync, total_columns_list, additional_column_checksums).finish(); } MergedBlockOutputStream::Finalizer MergedBlockOutputStream::finalizePartAsync( - MergeTreeData::MutableDataPartPtr & new_part, - bool sync, - const NamesAndTypesList * total_columns_list, - MergeTreeData::DataPart::Checksums * additional_column_checksums) + const MergeTreeMutableDataPartPtr & new_part, + bool sync, + const NamesAndTypesList * total_columns_list, + MergeTreeData::DataPart::Checksums * additional_column_checksums) { /// Finish write and get checksums. MergeTreeData::DataPart::Checksums checksums; @@ -165,7 +164,7 @@ MergedBlockOutputStream::Finalizer MergedBlockOutputStream::finalizePartAsync( new_part->setColumns(part_columns, serialization_infos); } - auto finalizer = std::make_unique(*writer, new_part, data_part_storage_builder, files_to_remove_after_sync, sync); + auto finalizer = std::make_unique(*writer, new_part, files_to_remove_after_sync, sync); if (new_part->isStoredOnDisk()) finalizer->written_files = finalizePartOnDisk(new_part, checksums); @@ -184,7 +183,7 @@ MergedBlockOutputStream::Finalizer MergedBlockOutputStream::finalizePartAsync( } MergedBlockOutputStream::WrittenFiles MergedBlockOutputStream::finalizePartOnDisk( - const MergeTreeData::DataPartPtr & new_part, + const MergeTreeMutableDataPartPtr & new_part, MergeTreeData::DataPart::Checksums & checksums) { WrittenFiles written_files; @@ -192,7 +191,7 @@ MergedBlockOutputStream::WrittenFiles MergedBlockOutputStream::finalizePartOnDis { if (storage.format_version >= MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING || isCompactPart(new_part)) { - auto count_out = data_part_storage_builder->writeFile("count.txt", 4096, write_settings); + auto count_out = new_part->getDataPartStorage().writeFile("count.txt", 4096, write_settings); HashingWriteBuffer count_out_hashing(*count_out); writeIntText(rows_count, count_out_hashing); count_out_hashing.next(); @@ -206,7 +205,7 @@ MergedBlockOutputStream::WrittenFiles MergedBlockOutputStream::finalizePartOnDis { if (new_part->uuid != UUIDHelpers::Nil) { - auto out = data_part_storage_builder->writeFile(IMergeTreeDataPart::UUID_FILE_NAME, 4096, write_settings); + auto out = new_part->getDataPartStorage().writeFile(IMergeTreeDataPart::UUID_FILE_NAME, 4096, write_settings); HashingWriteBuffer out_hashing(*out); writeUUIDText(new_part->uuid, out_hashing); checksums.files[IMergeTreeDataPart::UUID_FILE_NAME].file_size = out_hashing.count(); @@ -217,12 +216,12 @@ MergedBlockOutputStream::WrittenFiles MergedBlockOutputStream::finalizePartOnDis if (storage.format_version >= MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING) { - if (auto file = new_part->partition.store(storage, data_part_storage_builder, checksums)) + if (auto file = new_part->partition.store(storage, new_part->getDataPartStorage(), checksums)) written_files.emplace_back(std::move(file)); if (new_part->minmax_idx->initialized) { - auto files = new_part->minmax_idx->store(storage, data_part_storage_builder, checksums); + auto files = new_part->minmax_idx->store(storage, new_part->getDataPartStorage(), checksums); for (auto & file : files) written_files.emplace_back(std::move(file)); } @@ -232,7 +231,7 @@ MergedBlockOutputStream::WrittenFiles MergedBlockOutputStream::finalizePartOnDis } { - auto count_out = data_part_storage_builder->writeFile("count.txt", 4096, write_settings); + auto count_out = new_part->getDataPartStorage().writeFile("count.txt", 4096, write_settings); HashingWriteBuffer count_out_hashing(*count_out); writeIntText(rows_count, count_out_hashing); count_out_hashing.next(); @@ -246,7 +245,7 @@ MergedBlockOutputStream::WrittenFiles MergedBlockOutputStream::finalizePartOnDis if (!new_part->ttl_infos.empty()) { /// Write a file with ttl infos in json format. - auto out = data_part_storage_builder->writeFile("ttl.txt", 4096, write_settings); + auto out = new_part->getDataPartStorage().writeFile("ttl.txt", 4096, write_settings); HashingWriteBuffer out_hashing(*out); new_part->ttl_infos.write(out_hashing); checksums.files["ttl.txt"].file_size = out_hashing.count(); @@ -257,7 +256,7 @@ MergedBlockOutputStream::WrittenFiles MergedBlockOutputStream::finalizePartOnDis if (!new_part->getSerializationInfos().empty()) { - auto out = data_part_storage_builder->writeFile(IMergeTreeDataPart::SERIALIZATION_FILE_NAME, 4096, write_settings); + auto out = new_part->getDataPartStorage().writeFile(IMergeTreeDataPart::SERIALIZATION_FILE_NAME, 4096, write_settings); HashingWriteBuffer out_hashing(*out); new_part->getSerializationInfos().writeJSON(out_hashing); checksums.files[IMergeTreeDataPart::SERIALIZATION_FILE_NAME].file_size = out_hashing.count(); @@ -268,7 +267,7 @@ MergedBlockOutputStream::WrittenFiles MergedBlockOutputStream::finalizePartOnDis { /// Write a file with a description of columns. - auto out = data_part_storage_builder->writeFile("columns.txt", 4096, write_settings); + auto out = new_part->getDataPartStorage().writeFile("columns.txt", 4096, write_settings); new_part->getColumns().writeText(*out); out->preFinalize(); written_files.emplace_back(std::move(out)); @@ -276,7 +275,7 @@ MergedBlockOutputStream::WrittenFiles MergedBlockOutputStream::finalizePartOnDis if (default_codec != nullptr) { - auto out = data_part_storage_builder->writeFile(IMergeTreeDataPart::DEFAULT_COMPRESSION_CODEC_FILE_NAME, 4096, write_settings); + auto out = new_part->getDataPartStorage().writeFile(IMergeTreeDataPart::DEFAULT_COMPRESSION_CODEC_FILE_NAME, 4096, write_settings); DB::writeText(queryToString(default_codec->getFullCodecDesc()), *out); out->preFinalize(); written_files.emplace_back(std::move(out)); @@ -289,7 +288,7 @@ MergedBlockOutputStream::WrittenFiles MergedBlockOutputStream::finalizePartOnDis { /// Write file with checksums. - auto out = data_part_storage_builder->writeFile("checksums.txt", 4096, write_settings); + auto out = new_part->getDataPartStorage().writeFile("checksums.txt", 4096, write_settings); checksums.write(*out); out->preFinalize(); written_files.emplace_back(std::move(out)); diff --git a/src/Storages/MergeTree/MergedBlockOutputStream.h b/src/Storages/MergeTree/MergedBlockOutputStream.h index 92dcd8dd272..ad1bb584788 100644 --- a/src/Storages/MergeTree/MergedBlockOutputStream.h +++ b/src/Storages/MergeTree/MergedBlockOutputStream.h @@ -15,8 +15,7 @@ class MergedBlockOutputStream final : public IMergedBlockOutputStream { public: MergedBlockOutputStream( - const MergeTreeDataPartPtr & data_part, - DataPartStorageBuilderPtr data_part_storage_builder_, + const MergeTreeMutableDataPartPtr & data_part, const StorageMetadataPtr & metadata_snapshot_, const NamesAndTypesList & columns_list_, const MergeTreeIndices & skip_indices, @@ -55,16 +54,16 @@ public: /// Finalize writing part and fill inner structures /// If part is new and contains projections, they should be added before invoking this method. Finalizer finalizePartAsync( - MergeTreeData::MutableDataPartPtr & new_part, - bool sync, - const NamesAndTypesList * total_columns_list = nullptr, - MergeTreeData::DataPart::Checksums * additional_column_checksums = nullptr); + const MergeTreeMutableDataPartPtr & new_part, + bool sync, + const NamesAndTypesList * total_columns_list = nullptr, + MergeTreeData::DataPart::Checksums * additional_column_checksums = nullptr); void finalizePart( - MergeTreeData::MutableDataPartPtr & new_part, - bool sync, - const NamesAndTypesList * total_columns_list = nullptr, - MergeTreeData::DataPart::Checksums * additional_column_checksums = nullptr); + const MergeTreeMutableDataPartPtr & new_part, + bool sync, + const NamesAndTypesList * total_columns_list = nullptr, + MergeTreeData::DataPart::Checksums * additional_column_checksums = nullptr); private: /** If `permutation` is given, it rearranges the values in the columns when writing. @@ -74,8 +73,8 @@ private: using WrittenFiles = std::vector>; WrittenFiles finalizePartOnDisk( - const MergeTreeData::DataPartPtr & new_part, - MergeTreeData::DataPart::Checksums & checksums); + const MergeTreeMutableDataPartPtr & new_part, + MergeTreeData::DataPart::Checksums & checksums); NamesAndTypesList columns_list; IMergeTreeDataPart::MinMaxIndex minmax_idx; diff --git a/src/Storages/MergeTree/MergedColumnOnlyOutputStream.cpp b/src/Storages/MergeTree/MergedColumnOnlyOutputStream.cpp index dd75cddd380..e4a5a0bc3ba 100644 --- a/src/Storages/MergeTree/MergedColumnOnlyOutputStream.cpp +++ b/src/Storages/MergeTree/MergedColumnOnlyOutputStream.cpp @@ -11,8 +11,7 @@ namespace ErrorCodes } MergedColumnOnlyOutputStream::MergedColumnOnlyOutputStream( - DataPartStorageBuilderPtr data_part_storage_builder_, - const MergeTreeDataPartPtr & data_part, + const MergeTreeMutableDataPartPtr & data_part, const StorageMetadataPtr & metadata_snapshot_, const Block & header_, CompressionCodecPtr default_codec, @@ -20,7 +19,7 @@ MergedColumnOnlyOutputStream::MergedColumnOnlyOutputStream( WrittenOffsetColumns * offset_columns_, const MergeTreeIndexGranularity & index_granularity, const MergeTreeIndexGranularityInfo * index_granularity_info) - : IMergedBlockOutputStream(std::move(data_part_storage_builder_), data_part, metadata_snapshot_, header_.getNamesAndTypesList(), /*reset_columns=*/ true) + : IMergedBlockOutputStream(data_part, metadata_snapshot_, header_.getNamesAndTypesList(), /*reset_columns=*/ true) , header(header_) { const auto & global_settings = data_part->storage.getContext()->getSettings(); @@ -34,7 +33,6 @@ MergedColumnOnlyOutputStream::MergedColumnOnlyOutputStream( /* rewrite_primary_key = */ false); writer = data_part->getWriter( - data_part_storage_builder, header.getNamesAndTypesList(), metadata_snapshot_, indices_to_recalc, @@ -81,7 +79,7 @@ MergedColumnOnlyOutputStream::fillChecksums( for (const String & removed_file : removed_files) { - data_part_storage_builder->removeFileIfExists(removed_file); + new_part->getDataPartStorage().removeFileIfExists(removed_file); if (all_checksums.files.contains(removed_file)) all_checksums.files.erase(removed_file); diff --git a/src/Storages/MergeTree/MergedColumnOnlyOutputStream.h b/src/Storages/MergeTree/MergedColumnOnlyOutputStream.h index 1fd1c752226..f382b0fef60 100644 --- a/src/Storages/MergeTree/MergedColumnOnlyOutputStream.h +++ b/src/Storages/MergeTree/MergedColumnOnlyOutputStream.h @@ -14,8 +14,7 @@ public: /// Pass empty 'already_written_offset_columns' first time then and pass the same object to subsequent instances of MergedColumnOnlyOutputStream /// if you want to serialize elements of Nested data structure in different instances of MergedColumnOnlyOutputStream. MergedColumnOnlyOutputStream( - DataPartStorageBuilderPtr data_part_storage_builder_, - const MergeTreeDataPartPtr & data_part, + const MergeTreeMutableDataPartPtr & data_part, const StorageMetadataPtr & metadata_snapshot_, const Block & header_, CompressionCodecPtr default_codec_, diff --git a/src/Storages/MergeTree/MutateFromLogEntryTask.cpp b/src/Storages/MergeTree/MutateFromLogEntryTask.cpp index 549c4e7373f..9e3cbb0640b 100644 --- a/src/Storages/MergeTree/MutateFromLogEntryTask.cpp +++ b/src/Storages/MergeTree/MutateFromLogEntryTask.cpp @@ -92,7 +92,7 @@ ReplicatedMergeMutateTaskBase::PrepareResult MutateFromLogEntryTask::prepare() /// Once we mutate part, we must reserve space on the same disk, because mutations can possibly create hardlinks. /// Can throw an exception. - reserved_space = storage.reserveSpace(estimated_space_for_result, source_part->data_part_storage); + reserved_space = storage.reserveSpace(estimated_space_for_result, source_part->getDataPartStorage()); table_lock_holder = storage.lockForShare( RWLockImpl::NO_QUERY, storage_settings_ptr->lock_acquire_timeout_for_background_operations); @@ -193,12 +193,7 @@ ReplicatedMergeMutateTaskBase::PrepareResult MutateFromLogEntryTask::prepare() bool MutateFromLogEntryTask::finalize(ReplicatedMergeMutateTaskBase::PartLogWriter write_part_log) { new_part = mutate_task->getFuture().get(); - auto builder = mutate_task->getBuilder(); - - if (!builder) - builder = new_part->data_part_storage->getBuilder(); - - storage.renameTempPartAndReplace(new_part, *transaction_ptr, builder); + storage.renameTempPartAndReplace(new_part, *transaction_ptr); try { diff --git a/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp b/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp index 0cf10ee1935..b1714076a46 100644 --- a/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp +++ b/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp @@ -83,14 +83,9 @@ bool MutatePlainMergeTreeTask::executeStep() new_part = mutate_task->getFuture().get(); - auto builder = mutate_task->getBuilder(); - if (!builder) - builder = new_part->data_part_storage->getBuilder(); - - MergeTreeData::Transaction transaction(storage, merge_mutate_entry->txn.get()); /// FIXME Transactions: it's too optimistic, better to lock parts before starting transaction - storage.renameTempPartAndReplace(new_part, transaction, builder); + storage.renameTempPartAndReplace(new_part, transaction); transaction.commit(); storage.updateMutationEntriesErrors(future_part, true, ""); diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp index 9f3c3100349..e5ba771a198 100644 --- a/src/Storages/MergeTree/MutateTask.cpp +++ b/src/Storages/MergeTree/MutateTask.cpp @@ -220,8 +220,11 @@ getColumnsForNewDataPart( if (!isWidePart(source_part)) return {updated_header.getNamesAndTypesList(), new_serialization_infos}; - Names source_column_names = source_part->getColumns().getNames(); - NameSet source_columns_name_set(source_column_names.begin(), source_column_names.end()); + const auto & source_columns = source_part->getColumns(); + std::unordered_map source_columns_name_to_type; + for (const auto & it : source_columns) + source_columns_name_to_type[it.name] = it.type; + for (auto it = storage_columns.begin(); it != storage_columns.end();) { if (updated_header.has(it->name)) @@ -233,14 +236,25 @@ getColumnsForNewDataPart( } else { - if (!source_columns_name_set.contains(it->name)) + auto source_col = source_columns_name_to_type.find(it->name); + if (source_col == source_columns_name_to_type.end()) { /// Source part doesn't have column but some other column /// was renamed to it's name. auto renamed_it = renamed_columns_to_from.find(it->name); - if (renamed_it != renamed_columns_to_from.end() - && source_columns_name_set.contains(renamed_it->second)) - ++it; + if (renamed_it != renamed_columns_to_from.end()) + { + source_col = source_columns_name_to_type.find(renamed_it->second); + if (source_col == source_columns_name_to_type.end()) + it = storage_columns.erase(it); + else + { + /// Take a type from source part column. + /// It may differ from column type in storage. + it->type = source_col->second; + ++it; + } + } else it = storage_columns.erase(it); } @@ -262,7 +276,12 @@ getColumnsForNewDataPart( if (!renamed_columns_to_from.contains(it->name) && (was_renamed || was_removed)) it = storage_columns.erase(it); else + { + /// Take a type from source part column. + /// It may differ from column type in storage. + it->type = source_col->second; ++it; + } } } } @@ -600,7 +619,6 @@ static NameToNameVector collectFilesForRenames( /// Initialize and write to disk new part fields like checksums, columns, etc. void finalizeMutatedPart( const MergeTreeDataPartPtr & source_part, - const DataPartStorageBuilderPtr & data_part_storage_builder, MergeTreeData::MutableDataPartPtr new_data_part, ExecuteTTLType execute_ttl_type, const CompressionCodecPtr & codec, @@ -608,7 +626,7 @@ void finalizeMutatedPart( { if (new_data_part->uuid != UUIDHelpers::Nil) { - auto out = data_part_storage_builder->writeFile(IMergeTreeDataPart::UUID_FILE_NAME, 4096, context->getWriteSettings()); + auto out = new_data_part->getDataPartStorage().writeFile(IMergeTreeDataPart::UUID_FILE_NAME, 4096, context->getWriteSettings()); HashingWriteBuffer out_hashing(*out); writeUUIDText(new_data_part->uuid, out_hashing); new_data_part->checksums.files[IMergeTreeDataPart::UUID_FILE_NAME].file_size = out_hashing.count(); @@ -618,7 +636,7 @@ void finalizeMutatedPart( if (execute_ttl_type != ExecuteTTLType::NONE) { /// Write a file with ttl infos in json format. - auto out_ttl = data_part_storage_builder->writeFile("ttl.txt", 4096, context->getWriteSettings()); + auto out_ttl = new_data_part->getDataPartStorage().writeFile("ttl.txt", 4096, context->getWriteSettings()); HashingWriteBuffer out_hashing(*out_ttl); new_data_part->ttl_infos.write(out_hashing); new_data_part->checksums.files["ttl.txt"].file_size = out_hashing.count(); @@ -627,7 +645,7 @@ void finalizeMutatedPart( if (!new_data_part->getSerializationInfos().empty()) { - auto out = data_part_storage_builder->writeFile(IMergeTreeDataPart::SERIALIZATION_FILE_NAME, 4096, context->getWriteSettings()); + auto out = new_data_part->getDataPartStorage().writeFile(IMergeTreeDataPart::SERIALIZATION_FILE_NAME, 4096, context->getWriteSettings()); HashingWriteBuffer out_hashing(*out); new_data_part->getSerializationInfos().writeJSON(out_hashing); new_data_part->checksums.files[IMergeTreeDataPart::SERIALIZATION_FILE_NAME].file_size = out_hashing.count(); @@ -636,18 +654,18 @@ void finalizeMutatedPart( { /// Write file with checksums. - auto out_checksums = data_part_storage_builder->writeFile("checksums.txt", 4096, context->getWriteSettings()); + auto out_checksums = new_data_part->getDataPartStorage().writeFile("checksums.txt", 4096, context->getWriteSettings()); new_data_part->checksums.write(*out_checksums); } /// close fd { - auto out = data_part_storage_builder->writeFile(IMergeTreeDataPart::DEFAULT_COMPRESSION_CODEC_FILE_NAME, 4096, context->getWriteSettings()); + auto out = new_data_part->getDataPartStorage().writeFile(IMergeTreeDataPart::DEFAULT_COMPRESSION_CODEC_FILE_NAME, 4096, context->getWriteSettings()); DB::writeText(queryToString(codec->getFullCodecDesc()), *out); } /// close fd { /// Write a file with a description of columns. - auto out_columns = data_part_storage_builder->writeFile("columns.txt", 4096, context->getWriteSettings()); + auto out_columns = new_data_part->getDataPartStorage().writeFile("columns.txt", 4096, context->getWriteSettings()); new_data_part->getColumns().writeText(*out_columns); } /// close fd @@ -715,8 +733,6 @@ struct MutationContext = MutationsInterpreter::MutationKind::MutationKindEnum::MUTATE_UNKNOWN; MergeTreeData::MutableDataPartPtr new_data_part; - DataPartStorageBuilderPtr data_part_storage_builder; - IMergedBlockOutputStreamPtr out{nullptr}; String mrk_extension; @@ -797,11 +813,9 @@ public: if (next_level_parts.empty()) { LOG_DEBUG(log, "Merged a projection part in level {}", current_level); - auto builder = selected_parts[0]->data_part_storage->getBuilder(); - selected_parts[0]->renameTo(projection.name + ".proj", true, builder); + selected_parts[0]->renameTo(projection.name + ".proj", true); selected_parts[0]->name = projection.name; selected_parts[0]->is_temp = false; - builder->commit(); ctx->new_data_part->addProjectionPart(name, std::move(selected_parts[0])); /// Task is finished @@ -846,7 +860,6 @@ public: projection_merging_params, NO_TRANSACTION_PTR, ctx->new_data_part.get(), - ctx->data_part_storage_builder.get(), ".tmp_proj"); next_level_parts.push_back(executeHere(tmp_part_merge_task)); @@ -1006,8 +1019,7 @@ bool PartMergerWriter::mutateOriginalPartAndPrepareProjections() if (projection_block) { auto tmp_part = MergeTreeDataWriter::writeTempProjectionPart( - *ctx->data, ctx->log, projection_block, projection, ctx->data_part_storage_builder, ctx->new_data_part.get(), ++block_num); - tmp_part.builder->commit(); + *ctx->data, ctx->log, projection_block, projection, ctx->new_data_part.get(), ++block_num); tmp_part.finalize(); projection_parts[projection.name].emplace_back(std::move(tmp_part.part)); } @@ -1029,8 +1041,7 @@ bool PartMergerWriter::mutateOriginalPartAndPrepareProjections() if (projection_block) { auto temp_part = MergeTreeDataWriter::writeTempProjectionPart( - *ctx->data, ctx->log, projection_block, projection, ctx->data_part_storage_builder, ctx->new_data_part.get(), ++block_num); - temp_part.builder->commit(); + *ctx->data, ctx->log, projection_block, projection, ctx->new_data_part.get(), ++block_num); temp_part.finalize(); projection_parts[projection.name].emplace_back(std::move(temp_part.part)); } @@ -1130,7 +1141,7 @@ private: void prepare() { - ctx->data_part_storage_builder->createDirectories(); + ctx->new_data_part->getDataPartStorage().createDirectories(); /// Note: this is done before creating input streams, because otherwise data.data_parts_mutex /// (which is locked in data.getTotalActiveSizeInBytes()) @@ -1165,7 +1176,6 @@ private: ctx->out = std::make_shared( ctx->new_data_part, - ctx->data_part_storage_builder, ctx->metadata_snapshot, ctx->new_data_part->getColumns(), skip_part_indices, @@ -1261,7 +1271,7 @@ private: if (ctx->execute_ttl_type != ExecuteTTLType::NONE) ctx->files_to_skip.insert("ttl.txt"); - ctx->data_part_storage_builder->createDirectories(); + ctx->new_data_part->getDataPartStorage().createDirectories(); /// We should write version metadata on part creation to distinguish it from parts that were created without transaction. TransactionID tid = ctx->txn ? ctx->txn->tid : Tx::PrehistoricTID; @@ -1272,7 +1282,7 @@ private: NameSet hardlinked_files; /// Create hardlinks for unchanged files - for (auto it = ctx->source_part->data_part_storage->iterate(); it->isValid(); it->next()) + for (auto it = ctx->source_part->getDataPartStorage().iterate(); it->isValid(); it->next()) { if (ctx->files_to_skip.contains(it->name())) continue; @@ -1298,22 +1308,22 @@ private: if (it->isFile()) { - ctx->data_part_storage_builder->createHardLinkFrom( - *ctx->source_part->data_part_storage, it->name(), destination); + ctx->new_data_part->getDataPartStorage().createHardLinkFrom( + ctx->source_part->getDataPartStorage(), it->name(), destination); hardlinked_files.insert(it->name()); } else if (!endsWith(it->name(), ".tmp_proj")) // ignore projection tmp merge dir { // it's a projection part directory - ctx->data_part_storage_builder->createProjection(destination); + ctx->new_data_part->getDataPartStorage().createProjection(destination); - auto projection_data_part_storage = ctx->source_part->data_part_storage->getProjection(destination); - auto projection_data_part_storage_builder = ctx->data_part_storage_builder->getProjection(destination); + auto projection_data_part_storage_src = ctx->source_part->getDataPartStorage().getProjection(destination); + auto projection_data_part_storage_dst = ctx->new_data_part->getDataPartStorage().getProjection(destination); - for (auto p_it = projection_data_part_storage->iterate(); p_it->isValid(); p_it->next()) + for (auto p_it = projection_data_part_storage_src->iterate(); p_it->isValid(); p_it->next()) { - projection_data_part_storage_builder->createHardLinkFrom( - *projection_data_part_storage, p_it->name(), p_it->name()); + projection_data_part_storage_dst->createHardLinkFrom( + *projection_data_part_storage_src, p_it->name(), p_it->name()); hardlinked_files.insert(p_it->name()); } } @@ -1343,7 +1353,6 @@ private: builder.addTransform(std::make_shared(builder.getHeader(), *ctx->data, ctx->metadata_snapshot, ctx->new_data_part, ctx->time_of_mutation, true)); ctx->out = std::make_shared( - ctx->data_part_storage_builder, ctx->new_data_part, ctx->metadata_snapshot, ctx->updated_header, @@ -1395,7 +1404,7 @@ private: } } - MutationHelpers::finalizeMutatedPart(ctx->source_part, ctx->data_part_storage_builder, ctx->new_data_part, ctx->execute_ttl_type, ctx->compression_codec, ctx->context); + MutationHelpers::finalizeMutatedPart(ctx->source_part, ctx->new_data_part, ctx->execute_ttl_type, ctx->compression_codec, ctx->context); } @@ -1565,10 +1574,7 @@ bool MutateTask::prepare() ctx->data->getRelativeDataPath(), tmp_part_dir_name); - ctx->data_part_storage_builder = std::make_shared( - single_disk_volume, - ctx->data->getRelativeDataPath(), - tmp_part_dir_name); + data_part_storage->beginTransaction(); ctx->new_data_part = ctx->data->createPart( ctx->future_part->name, ctx->future_part->type, ctx->future_part->part_info, data_part_storage); @@ -1671,9 +1677,4 @@ const MergeTreeData::HardlinkedFiles & MutateTask::getHardlinkedFiles() const return ctx->hardlinked_files; } -DataPartStorageBuilderPtr MutateTask::getBuilder() const -{ - return ctx->data_part_storage_builder; -} - } diff --git a/src/Storages/MergeTree/MutateTask.h b/src/Storages/MergeTree/MutateTask.h index 1f2e8a6fd20..3df30670d7f 100644 --- a/src/Storages/MergeTree/MutateTask.h +++ b/src/Storages/MergeTree/MutateTask.h @@ -46,7 +46,7 @@ public: const MergeTreeData::HardlinkedFiles & getHardlinkedFiles() const; - DataPartStorageBuilderPtr getBuilder() const; + MutableDataPartStoragePtr getBuilder() const; private: diff --git a/src/Storages/MergeTree/PartMetadataManagerOrdinary.cpp b/src/Storages/MergeTree/PartMetadataManagerOrdinary.cpp index 7eb868f7754..30823d593a2 100644 --- a/src/Storages/MergeTree/PartMetadataManagerOrdinary.cpp +++ b/src/Storages/MergeTree/PartMetadataManagerOrdinary.cpp @@ -8,20 +8,10 @@ namespace DB { -static std::unique_ptr openForReading(const DataPartStoragePtr & data_part_storage, const String & path) -{ - size_t file_size = data_part_storage->getFileSize(path); - return data_part_storage->readFile(path, ReadSettings().adjustBufferSize(file_size), file_size, std::nullopt); -} - -PartMetadataManagerOrdinary::PartMetadataManagerOrdinary(const IMergeTreeDataPart * part_) : IPartMetadataManager(part_) -{ -} - - std::unique_ptr PartMetadataManagerOrdinary::read(const String & file_name) const { - auto res = openForReading(part->data_part_storage, file_name); + size_t file_size = part->getDataPartStorage().getFileSize(file_name); + auto res = part->getDataPartStorage().readFile(file_name, ReadSettings().adjustBufferSize(file_size), file_size, std::nullopt); if (isCompressedFromFileName(file_name)) return std::make_unique(std::move(res)); @@ -31,7 +21,7 @@ std::unique_ptr PartMetadataManagerOrdinary::read(const String & fil bool PartMetadataManagerOrdinary::exists(const String & file_name) const { - return part->data_part_storage->exists(file_name); + return part->getDataPartStorage().exists(file_name); } diff --git a/src/Storages/MergeTree/PartMetadataManagerOrdinary.h b/src/Storages/MergeTree/PartMetadataManagerOrdinary.h index d86d5c54c00..428b6d4710a 100644 --- a/src/Storages/MergeTree/PartMetadataManagerOrdinary.h +++ b/src/Storages/MergeTree/PartMetadataManagerOrdinary.h @@ -8,7 +8,7 @@ namespace DB class PartMetadataManagerOrdinary : public IPartMetadataManager { public: - explicit PartMetadataManagerOrdinary(const IMergeTreeDataPart * part_); + explicit PartMetadataManagerOrdinary(const IMergeTreeDataPart * part_) : IPartMetadataManager(part_) {} ~PartMetadataManagerOrdinary() override = default; diff --git a/src/Storages/MergeTree/PartMetadataManagerWithCache.cpp b/src/Storages/MergeTree/PartMetadataManagerWithCache.cpp index ee0970984f9..90fd25bc4e7 100644 --- a/src/Storages/MergeTree/PartMetadataManagerWithCache.cpp +++ b/src/Storages/MergeTree/PartMetadataManagerWithCache.cpp @@ -31,24 +31,24 @@ PartMetadataManagerWithCache::PartMetadataManagerWithCache(const IMergeTreeDataP String PartMetadataManagerWithCache::getKeyFromFilePath(const String & file_path) const { - return part->data_part_storage->getDiskName() + ":" + file_path; + return part->getDataPartStorage().getDiskName() + ":" + file_path; } String PartMetadataManagerWithCache::getFilePathFromKey(const String & key) const { - return key.substr(part->data_part_storage->getDiskName().size() + 1); + return key.substr(part->getDataPartStorage().getDiskName().size() + 1); } std::unique_ptr PartMetadataManagerWithCache::read(const String & file_name) const { - String file_path = fs::path(part->data_part_storage->getRelativePath()) / file_name; + String file_path = fs::path(part->getDataPartStorage().getRelativePath()) / file_name; String key = getKeyFromFilePath(file_path); String value; auto status = cache->get(key, value); if (!status.ok()) { ProfileEvents::increment(ProfileEvents::MergeTreeMetadataCacheMiss); - auto in = part->data_part_storage->readFile(file_name, {}, std::nullopt, std::nullopt); + auto in = part->getDataPartStorage().readFile(file_name, {}, std::nullopt, std::nullopt); std::unique_ptr reader; if (!isCompressedFromFileName(file_name)) reader = std::move(in); @@ -67,7 +67,7 @@ std::unique_ptr PartMetadataManagerWithCache::read(const String & fi bool PartMetadataManagerWithCache::exists(const String & file_name) const { - String file_path = fs::path(part->data_part_storage->getRelativePath()) / file_name; + String file_path = fs::path(part->getDataPartStorage().getRelativePath()) / file_name; String key = getKeyFromFilePath(file_path); String value; auto status = cache->get(key, value); @@ -79,7 +79,7 @@ bool PartMetadataManagerWithCache::exists(const String & file_name) const else { ProfileEvents::increment(ProfileEvents::MergeTreeMetadataCacheMiss); - return part->data_part_storage->exists(file_name); + return part->getDataPartStorage().exists(file_name); } } @@ -91,7 +91,7 @@ void PartMetadataManagerWithCache::deleteAll(bool include_projection) String value; for (const auto & file_name : file_names) { - String file_path = fs::path(part->data_part_storage->getRelativePath()) / file_name; + String file_path = fs::path(part->getDataPartStorage().getRelativePath()) / file_name; String key = getKeyFromFilePath(file_path); auto status = cache->del(key); if (!status.ok()) @@ -119,10 +119,10 @@ void PartMetadataManagerWithCache::updateAll(bool include_projection) String read_value; for (const auto & file_name : file_names) { - String file_path = fs::path(part->data_part_storage->getRelativePath()) / file_name; - if (!part->data_part_storage->exists(file_name)) + String file_path = fs::path(part->getDataPartStorage().getRelativePath()) / file_name; + if (!part->getDataPartStorage().exists(file_name)) continue; - auto in = part->data_part_storage->readFile(file_name, {}, std::nullopt, std::nullopt); + auto in = part->getDataPartStorage().readFile(file_name, {}, std::nullopt, std::nullopt); readStringUntilEOF(value, *in); String key = getKeyFromFilePath(file_path); @@ -159,7 +159,7 @@ void PartMetadataManagerWithCache::assertAllDeleted(bool include_projection) con file_name = fs::path(file_path).filename(); /// Metadata file belongs to current part - if (fs::path(part->data_part_storage->getRelativePath()) / file_name == file_path) + if (fs::path(part->getDataPartStorage().getRelativePath()) / file_name == file_path) throw Exception( ErrorCodes::LOGICAL_ERROR, "Data part {} with type {} with meta file {} still in cache", @@ -173,7 +173,7 @@ void PartMetadataManagerWithCache::assertAllDeleted(bool include_projection) con const auto & projection_parts = part->getProjectionParts(); for (const auto & [projection_name, projection_part] : projection_parts) { - if (fs::path(part->data_part_storage->getRelativePath()) / (projection_name + ".proj") / file_name == file_path) + if (fs::path(part->getDataPartStorage().getRelativePath()) / (projection_name + ".proj") / file_name == file_path) { throw Exception( ErrorCodes::LOGICAL_ERROR, @@ -190,7 +190,7 @@ void PartMetadataManagerWithCache::assertAllDeleted(bool include_projection) con void PartMetadataManagerWithCache::getKeysAndCheckSums(Strings & keys, std::vector & checksums) const { - String prefix = getKeyFromFilePath(fs::path(part->data_part_storage->getRelativePath()) / ""); + String prefix = getKeyFromFilePath(fs::path(part->getDataPartStorage().getRelativePath()) / ""); Strings values; cache->getByPrefix(prefix, keys, values); size_t size = keys.size(); @@ -225,7 +225,7 @@ std::unordered_map PartMetadataManagerWit results.emplace(file_name, cache_checksums[i]); /// File belongs to normal part - if (fs::path(part->data_part_storage->getRelativePath()) / file_name == file_path) + if (fs::path(part->getDataPartStorage().getRelativePath()) / file_name == file_path) { auto disk_checksum = part->getActualChecksumByFile(file_name); if (disk_checksum != cache_checksums[i]) diff --git a/src/Storages/MergeTree/RPNBuilder.cpp b/src/Storages/MergeTree/RPNBuilder.cpp new file mode 100644 index 00000000000..d7ea68e7d64 --- /dev/null +++ b/src/Storages/MergeTree/RPNBuilder.cpp @@ -0,0 +1,417 @@ +#include + +#include + +#include +#include +#include +#include + +#include +#include + +#include +#include + +#include + +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + +namespace +{ + +void appendColumnNameWithoutAlias(const ActionsDAG::Node & node, WriteBuffer & out, bool legacy = false) +{ + switch (node.type) + { + case ActionsDAG::ActionType::INPUT: + writeString(node.result_name, out); + break; + case ActionsDAG::ActionType::COLUMN: + { + /// If it was created from ASTLiteral, then result_name can be an alias. + /// We need to convert value back to string here. + if (const auto * column_const = typeid_cast(node.column.get())) + writeString(applyVisitor(FieldVisitorToString(), column_const->getField()), out); + /// It may be possible that column is ColumnSet + else + writeString(node.result_name, out); + break; + } + case ActionsDAG::ActionType::ALIAS: + appendColumnNameWithoutAlias(*node.children.front(), out, legacy); + break; + case ActionsDAG::ActionType::ARRAY_JOIN: + writeCString("arrayJoin(", out); + appendColumnNameWithoutAlias(*node.children.front(), out, legacy); + writeChar(')', out); + break; + case ActionsDAG::ActionType::FUNCTION: + { + auto name = node.function_base->getName(); + if (legacy && name == "modulo") + writeCString("moduleLegacy", out); + else + writeString(name, out); + + writeChar('(', out); + bool first = true; + for (const auto * arg : node.children) + { + if (!first) + writeCString(", ", out); + first = false; + + appendColumnNameWithoutAlias(*arg, out, legacy); + } + writeChar(')', out); + } + } +} + +String getColumnNameWithoutAlias(const ActionsDAG::Node & node, bool legacy = false) +{ + WriteBufferFromOwnString out; + appendColumnNameWithoutAlias(node, out, legacy); + return std::move(out.str()); +} + +} + +RPNBuilderTreeContext::RPNBuilderTreeContext(ContextPtr query_context_) + : query_context(std::move(query_context_)) +{} + +RPNBuilderTreeContext::RPNBuilderTreeContext(ContextPtr query_context_, Block block_with_constants_, PreparedSetsPtr prepared_sets_) + : query_context(std::move(query_context_)) + , block_with_constants(std::move(block_with_constants_)) + , prepared_sets(std::move(prepared_sets_)) +{} + +RPNBuilderTreeNode::RPNBuilderTreeNode(const ActionsDAG::Node * dag_node_, RPNBuilderTreeContext & tree_context_) + : dag_node(dag_node_) + , tree_context(tree_context_) +{ + assert(dag_node); +} + +RPNBuilderTreeNode::RPNBuilderTreeNode(const IAST * ast_node_, RPNBuilderTreeContext & tree_context_) + : ast_node(ast_node_) + , tree_context(tree_context_) +{ + assert(ast_node); +} + +std::string RPNBuilderTreeNode::getColumnName() const +{ + if (ast_node) + return ast_node->getColumnNameWithoutAlias(); + else + return getColumnNameWithoutAlias(*dag_node); +} + +std::string RPNBuilderTreeNode::getColumnNameWithModuloLegacy() const +{ + if (ast_node) + { + auto adjusted_ast = ast_node->clone(); + KeyDescription::moduloToModuloLegacyRecursive(adjusted_ast); + return adjusted_ast->getColumnNameWithoutAlias(); + } + else + { + return getColumnNameWithoutAlias(*dag_node, true /*legacy*/); + } +} + +bool RPNBuilderTreeNode::isFunction() const +{ + if (ast_node) + return typeid_cast(ast_node); + else + return dag_node->type == ActionsDAG::ActionType::FUNCTION; +} + +bool RPNBuilderTreeNode::isConstant() const +{ + if (ast_node) + { + bool is_literal = typeid_cast(ast_node); + if (is_literal) + return true; + + String column_name = ast_node->getColumnName(); + const auto & block_with_constants = tree_context.getBlockWithConstants(); + + if (block_with_constants.has(column_name) && isColumnConst(*block_with_constants.getByName(column_name).column)) + return true; + + return false; + } + else + { + return dag_node->column && isColumnConst(*dag_node->column); + } +} + +ColumnWithTypeAndName RPNBuilderTreeNode::getConstantColumn() const +{ + if (!isConstant()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "RPNBuilderTree node is not a constant"); + + ColumnWithTypeAndName result; + + if (ast_node) + { + const auto * literal = assert_cast(ast_node); + if (literal) + { + result.type = applyVisitor(FieldToDataType(), literal->value); + result.column = result.type->createColumnConst(0, literal->value); + + return result; + } + + String column_name = ast_node->getColumnName(); + const auto & block_with_constants = tree_context.getBlockWithConstants(); + + return block_with_constants.getByName(column_name); + } + else + { + result.type = dag_node->result_type; + result.column = dag_node->column; + } + + return result; +} + +bool RPNBuilderTreeNode::tryGetConstant(Field & output_value, DataTypePtr & output_type) const +{ + if (ast_node) + { + // Constant expr should use alias names if any + String column_name = ast_node->getColumnName(); + const auto & block_with_constants = tree_context.getBlockWithConstants(); + + if (const auto * literal = ast_node->as()) + { + /// By default block_with_constants has only one column named "_dummy". + /// If block contains only constants it's may not be preprocessed by + // ExpressionAnalyzer, so try to look up in the default column. + if (!block_with_constants.has(column_name)) + column_name = "_dummy"; + + /// Simple literal + output_value = literal->value; + output_type = block_with_constants.getByName(column_name).type; + + /// If constant is not Null, we can assume it's type is not Nullable as well. + if (!output_value.isNull()) + output_type = removeNullable(output_type); + + return true; + } + else if (block_with_constants.has(column_name) && + isColumnConst(*block_with_constants.getByName(column_name).column)) + { + /// An expression which is dependent on constants only + const auto & constant_column = block_with_constants.getByName(column_name); + output_value = (*constant_column.column)[0]; + output_type = constant_column.type; + + if (!output_value.isNull()) + output_type = removeNullable(output_type); + + return true; + } + } + else + { + if (dag_node->column && isColumnConst(*dag_node->column)) + { + output_value = (*dag_node->column)[0]; + output_type = dag_node->result_type; + + if (!output_value.isNull()) + output_type = removeNullable(output_type); + + return true; + } + } + + return false; +} + +namespace +{ + +ConstSetPtr tryGetSetFromDAGNode(const ActionsDAG::Node * dag_node) +{ + if (!dag_node->column) + return {}; + + const IColumn * column = dag_node->column.get(); + if (const auto * column_const = typeid_cast(column)) + column = &column_const->getDataColumn(); + + if (const auto * column_set = typeid_cast(column)) + { + auto set = column_set->getData(); + + if (set->isCreated()) + return set; + } + + return {}; +} + +} + +ConstSetPtr RPNBuilderTreeNode::tryGetPreparedSet() const +{ + const auto & prepared_sets = getTreeContext().getPreparedSets(); + + if (ast_node && prepared_sets) + { + auto prepared_sets_with_same_hash = prepared_sets->getByTreeHash(ast_node->getTreeHash()); + for (auto & set : prepared_sets_with_same_hash) + if (set->isCreated()) + return set; + } + else if (dag_node) + { + return tryGetSetFromDAGNode(dag_node); + } + + return {}; +} + +ConstSetPtr RPNBuilderTreeNode::tryGetPreparedSet(const DataTypes & data_types) const +{ + const auto & prepared_sets = getTreeContext().getPreparedSets(); + + if (prepared_sets && ast_node) + { + if (ast_node->as() || ast_node->as()) + return prepared_sets->get(PreparedSetKey::forSubquery(*ast_node)); + + return prepared_sets->get(PreparedSetKey::forLiteral(*ast_node, data_types)); + } + else if (dag_node) + { + return tryGetSetFromDAGNode(dag_node); + } + + return nullptr; +} + +ConstSetPtr RPNBuilderTreeNode::tryGetPreparedSet( + const std::vector & indexes_mapping, + const DataTypes & data_types) const +{ + const auto & prepared_sets = getTreeContext().getPreparedSets(); + + if (prepared_sets && ast_node) + { + if (ast_node->as() || ast_node->as()) + return prepared_sets->get(PreparedSetKey::forSubquery(*ast_node)); + + /// We have `PreparedSetKey::forLiteral` but it is useless here as we don't have enough information + /// about types in left argument of the IN operator. Instead, we manually iterate through all the sets + /// and find the one for the right arg based on the AST structure (getTreeHash), after that we check + /// that the types it was prepared with are compatible with the types of the primary key. + auto types_match = [&indexes_mapping, &data_types](const SetPtr & candidate_set) + { + assert(indexes_mapping.size() == data_types.size()); + + for (size_t i = 0; i < indexes_mapping.size(); ++i) + { + if (!candidate_set->areTypesEqual(indexes_mapping[i].tuple_index, data_types[i])) + return false; + } + + return true; + }; + + auto tree_hash = ast_node->getTreeHash(); + for (const auto & set : prepared_sets->getByTreeHash(tree_hash)) + { + if (types_match(set)) + return set; + } + } + else if (dag_node->column) + { + return tryGetSetFromDAGNode(dag_node); + } + + return nullptr; +} + +RPNBuilderFunctionTreeNode RPNBuilderTreeNode::toFunctionNode() const +{ + if (!isFunction()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "RPNBuilderTree node is not a function"); + + if (this->ast_node) + return RPNBuilderFunctionTreeNode(this->ast_node, tree_context); + else + return RPNBuilderFunctionTreeNode(this->dag_node, tree_context); +} + +std::optional RPNBuilderTreeNode::toFunctionNodeOrNull() const +{ + if (!isFunction()) + return {}; + + if (this->ast_node) + return RPNBuilderFunctionTreeNode(this->ast_node, tree_context); + else + return RPNBuilderFunctionTreeNode(this->dag_node, tree_context); +} + +std::string RPNBuilderFunctionTreeNode::getFunctionName() const +{ + if (ast_node) + return assert_cast(ast_node)->name; + else + return dag_node->function_base->getName(); +} + +size_t RPNBuilderFunctionTreeNode::getArgumentsSize() const +{ + if (ast_node) + { + const auto * ast_function = assert_cast(ast_node); + return ast_function->arguments ? ast_function->arguments->children.size() : 0; + } + else + { + return dag_node->children.size(); + } +} + +RPNBuilderTreeNode RPNBuilderFunctionTreeNode::getArgumentAt(size_t index) const +{ + if (ast_node) + { + const auto * ast_function = assert_cast(ast_node); + return RPNBuilderTreeNode(ast_function->arguments->children[index].get(), tree_context); + } + else + { + return RPNBuilderTreeNode(dag_node->children[index], tree_context); + } +} + +} diff --git a/src/Storages/MergeTree/RPNBuilder.h b/src/Storages/MergeTree/RPNBuilder.h index 27b616dc301..132d3aa44e8 100644 --- a/src/Storages/MergeTree/RPNBuilder.h +++ b/src/Storages/MergeTree/RPNBuilder.h @@ -1,111 +1,266 @@ #pragma once #include -#include -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include namespace DB { -/// Builds reverse polish notation -template -class RPNBuilder : WithContext +/** Context of RPNBuilderTree. + * + * For AST tree context, precalculated block with constants and prepared sets are required for index analysis. + * For DAG tree precalculated block with constants and prepared sets are not required, because constants and sets already + * calculated inside COLUMN actions dag node. + */ +class RPNBuilderTreeContext { public: - using RPN = std::vector; - using AtomFromASTFunc = std::function< - bool(const ASTPtr & node, ContextPtr context, Block & block_with_constants, RPNElement & out)>; + /// Construct RPNBuilderTreeContext for ActionsDAG tree + explicit RPNBuilderTreeContext(ContextPtr query_context_); - RPNBuilder(const SelectQueryInfo & query_info, ContextPtr context_, const AtomFromASTFunc & atom_from_ast_) - : WithContext(context_), atom_from_ast(atom_from_ast_) + /// Construct RPNBuilderTreeContext for AST tree + explicit RPNBuilderTreeContext(ContextPtr query_context_, Block block_with_constants_, PreparedSetsPtr prepared_sets_); + + /// Get query context + const ContextPtr & getQueryContext() const { - /** Evaluation of expressions that depend only on constants. - * For the index to be used, if it is written, for example `WHERE Date = toDate(now())`. - */ - block_with_constants = KeyCondition::getBlockWithConstants(query_info.query, query_info.syntax_analyzer_result, getContext()); - - /// Transform WHERE section to Reverse Polish notation - const ASTSelectQuery & select = typeid_cast(*query_info.query); - if (select.where()) - { - traverseAST(select.where()); - - if (select.prewhere()) - { - traverseAST(select.prewhere()); - rpn.emplace_back(RPNElement::FUNCTION_AND); - } - } - else if (select.prewhere()) - { - traverseAST(select.prewhere()); - } - else - { - rpn.emplace_back(RPNElement::FUNCTION_UNKNOWN); - } + return query_context; } - RPN && extractRPN() { return std::move(rpn); } + /** Get block with constants. + * Valid only for AST tree. + */ + const Block & getBlockWithConstants() const + { + return block_with_constants; + } + + /** Get prepared sets. + * Valid only for AST tree. + */ + const PreparedSetsPtr & getPreparedSets() const + { + return prepared_sets; + } private: - void traverseAST(const ASTPtr & node) + /// Valid for both AST and ActionDAG tree + ContextPtr query_context; + + /// Valid only for AST tree + Block block_with_constants; + + /// Valid only for AST tree + PreparedSetsPtr prepared_sets; +}; + +class RPNBuilderFunctionTreeNode; + +/** RPNBuilderTreeNode is wrapper around DAG or AST node. + * It defines unified interface for index analysis. + */ +class RPNBuilderTreeNode +{ +public: + /// Construct RPNBuilderTreeNode with non null dag node and tree context + explicit RPNBuilderTreeNode(const ActionsDAG::Node * dag_node_, RPNBuilderTreeContext & tree_context_); + + /// Construct RPNBuilderTreeNode with non null ast node and tree context + explicit RPNBuilderTreeNode(const IAST * ast_node_, RPNBuilderTreeContext & tree_context_); + + /// Get column name + std::string getColumnName() const; + + /** Get column name. + * Function `modulo` is replaced with `moduloLegacy`. + */ + std::string getColumnNameWithModuloLegacy() const; + + /// Is node function + bool isFunction() const; + + /// Is node constant + bool isConstant() const; + + /** Get constant as constant column. + * Node must be constant before calling these method, otherwise logical exception is thrown. + */ + ColumnWithTypeAndName getConstantColumn() const; + + /** Try get constant from node. If node is constant returns true, and constant value and constant type output parameters are set. + * Otherwise false is returned. + */ + bool tryGetConstant(Field & output_value, DataTypePtr & output_type) const; + + /// Try get prepared set from node + ConstSetPtr tryGetPreparedSet() const; + + /// Try get prepared set from node that match data types + ConstSetPtr tryGetPreparedSet(const DataTypes & data_types) const; + + /// Try get prepared set from node that match indexes mapping and data types + ConstSetPtr tryGetPreparedSet( + const std::vector & indexes_mapping, + const DataTypes & data_types) const; + + /** Convert node to function node. + * Node must be function before calling these method, otherwise exception is thrown. + */ + RPNBuilderFunctionTreeNode toFunctionNode() const; + + /// Convert node to function node or null optional + std::optional toFunctionNodeOrNull() const; + + /// Get tree context + const RPNBuilderTreeContext & getTreeContext() const + { + return tree_context; + } + + /// Get tree context + RPNBuilderTreeContext & getTreeContext() + { + return tree_context; + } + +protected: + const IAST * ast_node = nullptr; + const ActionsDAG::Node * dag_node = nullptr; + RPNBuilderTreeContext & tree_context; +}; + +/** RPNBuilderFunctionTreeNode is wrapper around RPNBuilderTreeNode with function type. + * It provide additional functionality that is specific for function. + */ +class RPNBuilderFunctionTreeNode : public RPNBuilderTreeNode +{ +public: + using RPNBuilderTreeNode::RPNBuilderTreeNode; + + /// Get function name + std::string getFunctionName() const; + + /// Get function arguments size + size_t getArgumentsSize() const; + + /// Get function argument at index + RPNBuilderTreeNode getArgumentAt(size_t index) const; +}; + +/** RPN Builder build stack of reverse polish notation elements (RPNElements) required for index analysis. + * + * RPNBuilder client must provide RPNElement type that has following interface: + * + * struct RPNElementInterface + * { + * enum Function + * { + * FUNCTION_UNKNOWN, /// Can take any value. + * /// Operators of the logical expression. + * FUNCTION_NOT, + * FUNCTION_AND, + * FUNCTION_OR, + * ... + * }; + * + * RPNElementInterface(); + * + * Function function = FUNCTION_UNKNOWN; + * + * } + * + * RPNBuilder take care of building stack of RPNElements with `NOT`, `AND`, `OR` types. + * In addition client must provide ExtractAtomFromTreeFunction that returns true and RPNElement as output parameter, + * if it can convert RPNBuilderTree node to RPNElement, false otherwise. + */ +template +class RPNBuilder +{ +public: + using RPNElements = std::vector; + using ExtractAtomFromTreeFunction = std::function; + + explicit RPNBuilder(const ActionsDAG::Node * filter_actions_dag_node, + ContextPtr query_context_, + const ExtractAtomFromTreeFunction & extract_atom_from_tree_function_) + : tree_context(std::move(query_context_)) + , extract_atom_from_tree_function(extract_atom_from_tree_function_) + { + traverseTree(RPNBuilderTreeNode(filter_actions_dag_node, tree_context)); + } + + RPNBuilder(const ASTPtr & filter_node, + ContextPtr query_context_, + Block block_with_constants_, + PreparedSetsPtr prepared_sets_, + const ExtractAtomFromTreeFunction & extract_atom_from_tree_function_) + : tree_context(std::move(query_context_), std::move(block_with_constants_), std::move(prepared_sets_)) + , extract_atom_from_tree_function(extract_atom_from_tree_function_) + { + traverseTree(RPNBuilderTreeNode(filter_node.get(), tree_context)); + } + + RPNElements && extractRPN() && { return std::move(rpn_elements); } + +private: + void traverseTree(const RPNBuilderTreeNode & node) { RPNElement element; - if (ASTFunction * func = typeid_cast(&*node)) + if (node.isFunction()) { - if (operatorFromAST(func, element)) + auto function_node = node.toFunctionNode(); + + if (extractLogicalOperatorFromTree(function_node, element)) { - auto & args = typeid_cast(*func->arguments).children; - for (size_t i = 0, size = args.size(); i < size; ++i) + size_t arguments_size = function_node.getArgumentsSize(); + + for (size_t argument_index = 0; argument_index < arguments_size; ++argument_index) { - traverseAST(args[i]); + auto function_node_argument = function_node.getArgumentAt(argument_index); + traverseTree(function_node_argument); /** The first part of the condition is for the correct support of `and` and `or` functions of arbitrary arity * - in this case `n - 1` elements are added (where `n` is the number of arguments). */ - if (i != 0 || element.function == RPNElement::FUNCTION_NOT) - rpn.emplace_back(std::move(element)); + if (argument_index != 0 || element.function == RPNElement::FUNCTION_NOT) + rpn_elements.emplace_back(std::move(element)); } return; } } - if (!atom_from_ast(node, getContext(), block_with_constants, element)) - { + if (!extract_atom_from_tree_function(node, element)) element.function = RPNElement::FUNCTION_UNKNOWN; - } - rpn.emplace_back(std::move(element)); + rpn_elements.emplace_back(std::move(element)); } - bool operatorFromAST(const ASTFunction * func, RPNElement & out) + bool extractLogicalOperatorFromTree(const RPNBuilderFunctionTreeNode & function_node, RPNElement & out) { - /// Functions AND, OR, NOT. - /// Also a special function `indexHint` - works as if instead of calling a function there are just parentheses - /// (or, the same thing - calling the function `and` from one argument). - const ASTs & args = typeid_cast(*func->arguments).children; + /** Functions AND, OR, NOT. + * Also a special function `indexHint` - works as if instead of calling a function there are just parentheses + * (or, the same thing - calling the function `and` from one argument). + */ - if (func->name == "not") + auto function_name = function_node.getFunctionName(); + if (function_name == "not") { - if (args.size() != 1) + if (function_node.getArgumentsSize() != 1) return false; out.function = RPNElement::FUNCTION_NOT; } else { - if (func->name == "and" || func->name == "indexHint") + if (function_name == "and" || function_name == "indexHint") out.function = RPNElement::FUNCTION_AND; - else if (func->name == "or") + else if (function_name == "or") out.function = RPNElement::FUNCTION_OR; else return false; @@ -114,10 +269,9 @@ private: return true; } - const AtomFromASTFunc & atom_from_ast; - Block block_with_constants; - RPN rpn; + RPNBuilderTreeContext tree_context; + const ExtractAtomFromTreeFunction & extract_atom_from_tree_function; + RPNElements rpn_elements; }; - } diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp index 3936ee61b70..7993840f1d9 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp @@ -419,14 +419,14 @@ void ReplicatedMergeTreeCleanupThread::getBlocksSortedByTime(zkutil::ZooKeeper & LOG_TRACE(log, "Checking {} blocks ({} are not cached){}", stat.numChildren, not_cached_blocks, " to clear old ones from ZooKeeper."); } - zkutil::AsyncResponses exists_futures; + std::vector exists_paths; for (const String & block : blocks) { auto it = cached_block_stats.find(block); if (it == cached_block_stats.end()) { /// New block. Fetch its stat asynchronously. - exists_futures.emplace_back(block, zookeeper.asyncExists(storage.zookeeper_path + "/blocks/" + block)); + exists_paths.emplace_back(storage.zookeeper_path + "/blocks/" + block); } else { @@ -436,14 +436,18 @@ void ReplicatedMergeTreeCleanupThread::getBlocksSortedByTime(zkutil::ZooKeeper & } } + auto exists_size = exists_paths.size(); + auto exists_results = zookeeper.exists(exists_paths); + /// Put fetched stats into the cache - for (auto & elem : exists_futures) + for (size_t i = 0; i < exists_size; ++i) { - auto status = elem.second.get(); + auto status = exists_results[i]; if (status.error != Coordination::Error::ZNONODE) { - cached_block_stats.emplace(elem.first, std::make_pair(status.stat.ctime, status.stat.version)); - timed_blocks.emplace_back(elem.first, status.stat.ctime, status.stat.version); + auto node_name = fs::path(exists_paths[i]).filename(); + cached_block_stats.emplace(node_name, std::make_pair(status.stat.ctime, status.stat.version)); + timed_blocks.emplace_back(node_name, status.stat.ctime, status.stat.version); } } diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.cpp index e7882ce4952..d7e3c3b1955 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.cpp @@ -240,7 +240,7 @@ void ReplicatedMergeTreeLogEntryData::readText(ReadBuffer & in) if (checkString("merge_type: ", in)) { - UInt64 value; + UInt32 value; in >> value; merge_type = checkAndGetMergeType(value); } diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeMergeStrategyPicker.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeMergeStrategyPicker.cpp index ce33ac8c467..626295d7255 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeMergeStrategyPicker.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeMergeStrategyPicker.cpp @@ -91,8 +91,8 @@ std::optional ReplicatedMergeTreeMergeStrategyPicker::pickReplicaToExecu void ReplicatedMergeTreeMergeStrategyPicker::refreshState() { const auto settings = storage.getSettings(); - auto threshold = settings->execute_merges_on_single_replica_time_threshold.totalSeconds(); - auto threshold_init = 0; + time_t threshold = settings->execute_merges_on_single_replica_time_threshold.totalSeconds(); + time_t threshold_init = 0; if (settings->allow_remote_fs_zero_copy_replication) threshold_init = settings->remote_fs_execute_merges_on_single_replica_time_threshold.totalSeconds(); @@ -127,7 +127,7 @@ void ReplicatedMergeTreeMergeStrategyPicker::refreshState() active_replicas_tmp.push_back(replica); if (replica == storage.replica_name) { - current_replica_index_tmp = active_replicas_tmp.size() - 1; + current_replica_index_tmp = static_cast(active_replicas_tmp.size() - 1); } } } diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp index 0305ce440f9..d6d937ce66f 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp @@ -41,7 +41,7 @@ ReplicatedMergeTreeQueue::ReplicatedMergeTreeQueue(StorageReplicatedMergeTree & void ReplicatedMergeTreeQueue::clear() { auto locks = lockQueue(); - assert(future_parts.empty()); + chassert(future_parts.empty()); current_parts.clear(); virtual_parts.clear(); queue.clear(); @@ -62,6 +62,7 @@ void ReplicatedMergeTreeQueue::setBrokenPartsToEnqueueFetchesOnLoading(Strings & void ReplicatedMergeTreeQueue::initialize(zkutil::ZooKeeperPtr zookeeper) { + clear(); std::lock_guard lock(state_mutex); LOG_TRACE(log, "Initializing parts in queue"); @@ -153,17 +154,19 @@ bool ReplicatedMergeTreeQueue::load(zkutil::ZooKeeperPtr zookeeper) ::sort(children.begin(), children.end()); - zkutil::AsyncResponses futures; - futures.reserve(children.size()); + auto children_num = children.size(); + std::vector paths; + paths.reserve(children_num); for (const String & child : children) - futures.emplace_back(child, zookeeper->asyncGet(fs::path(queue_path) / child)); + paths.emplace_back(fs::path(queue_path) / child); - for (auto & future : futures) + auto results = zookeeper->get(paths); + for (size_t i = 0; i < children_num; ++i) { - Coordination::GetResponse res = future.second.get(); + auto res = results[i]; LogEntryPtr entry = LogEntry::parse(res.data, res.stat); - entry->znode_name = future.first; + entry->znode_name = children[i]; std::lock_guard lock(state_mutex); @@ -641,11 +644,11 @@ int32_t ReplicatedMergeTreeQueue::pullLogsToQueue(zkutil::ZooKeeperPtr zookeeper LOG_DEBUG(log, "Pulling {} entries to queue: {} - {}", (end - begin), *begin, *last); - zkutil::AsyncResponses futures; - futures.reserve(end - begin); + Strings get_paths; + get_paths.reserve(end - begin); for (auto it = begin; it != end; ++it) - futures.emplace_back(*it, zookeeper->asyncGet(fs::path(zookeeper_path) / "log" / *it)); + get_paths.emplace_back(fs::path(zookeeper_path) / "log" / *it); /// Simultaneously add all new entries to the queue and move the pointer to the log. @@ -655,9 +658,11 @@ int32_t ReplicatedMergeTreeQueue::pullLogsToQueue(zkutil::ZooKeeperPtr zookeeper std::optional min_unprocessed_insert_time_changed; - for (auto & future : futures) + auto get_results = zookeeper->get(get_paths); + auto get_num = get_results.size(); + for (size_t i = 0; i < get_num; ++i) { - Coordination::GetResponse res = future.second.get(); + auto res = get_results[i]; copied_entries.emplace_back(LogEntry::parse(res.data, res.stat)); @@ -1804,9 +1809,9 @@ ReplicatedMergeTreeQueue::Status ReplicatedMergeTreeQueue::getStatus() const Status res; - res.future_parts = future_parts.size(); - res.queue_size = queue.size(); - res.last_queue_update = last_queue_update; + res.future_parts = static_cast(future_parts.size()); + res.queue_size = static_cast(queue.size()); + res.last_queue_update = static_cast(last_queue_update); res.inserts_in_queue = 0; res.merges_in_queue = 0; @@ -1819,7 +1824,7 @@ ReplicatedMergeTreeQueue::Status ReplicatedMergeTreeQueue::getStatus() const for (const LogEntryPtr & entry : queue) { if (entry->create_time && (!res.queue_oldest_time || entry->create_time < res.queue_oldest_time)) - res.queue_oldest_time = entry->create_time; + res.queue_oldest_time = static_cast(entry->create_time); if (entry->type == LogEntry::GET_PART || entry->type == LogEntry::ATTACH_PART) { @@ -1827,7 +1832,7 @@ ReplicatedMergeTreeQueue::Status ReplicatedMergeTreeQueue::getStatus() const if (entry->create_time && (!res.inserts_oldest_time || entry->create_time < res.inserts_oldest_time)) { - res.inserts_oldest_time = entry->create_time; + res.inserts_oldest_time = static_cast(entry->create_time); res.oldest_part_to_get = entry->new_part_name; } } @@ -1838,7 +1843,7 @@ ReplicatedMergeTreeQueue::Status ReplicatedMergeTreeQueue::getStatus() const if (entry->create_time && (!res.merges_oldest_time || entry->create_time < res.merges_oldest_time)) { - res.merges_oldest_time = entry->create_time; + res.merges_oldest_time = static_cast(entry->create_time); res.oldest_part_to_merge_to = entry->new_part_name; } } @@ -1849,7 +1854,7 @@ ReplicatedMergeTreeQueue::Status ReplicatedMergeTreeQueue::getStatus() const if (entry->create_time && (!res.part_mutations_oldest_time || entry->create_time < res.part_mutations_oldest_time)) { - res.part_mutations_oldest_time = entry->create_time; + res.part_mutations_oldest_time = static_cast(entry->create_time); res.oldest_part_to_mutate_to = entry->new_part_name; } } diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h index 6d1a3efb01d..2ebdd604af2 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h @@ -425,6 +425,7 @@ public: struct Status { + /// TODO: consider using UInt64 here UInt32 future_parts; UInt32 queue_size; UInt32 inserts_in_queue; diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp index e2b23d75746..10ec4702b53 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include @@ -26,19 +27,12 @@ namespace DB namespace ErrorCodes { extern const int REPLICA_IS_ALREADY_ACTIVE; - extern const int REPLICA_STATUS_CHANGED; - -} - -namespace -{ - constexpr auto retry_period_ms = 1000; } /// Used to check whether it's us who set node `is_active`, or not. static String generateActiveNodeIdentifier() { - return "pid: " + toString(getpid()) + ", random: " + toString(randomSeed()); + return Field(ServerUUID::get()).dump(); } ReplicatedMergeTreeRestartingThread::ReplicatedMergeTreeRestartingThread(StorageReplicatedMergeTree & storage_) @@ -58,27 +52,34 @@ void ReplicatedMergeTreeRestartingThread::run() if (need_stop) return; - size_t reschedule_period_ms = check_period_ms; + /// In case of any exceptions we want to rerun the this task as fast as possible but we also don't want to keep retrying immediately + /// in a close loop (as fast as tasks can be processed), so we'll retry in between 100 and 10000 ms + const size_t backoff_ms = 100 * ((consecutive_check_failures + 1) * (consecutive_check_failures + 2)) / 2; + const size_t next_failure_retry_ms = std::min(size_t{10000}, backoff_ms); try { bool replica_is_active = runImpl(); - if (!replica_is_active) - reschedule_period_ms = retry_period_ms; - } - catch (const Exception & e) - { - /// We couldn't activate table let's set it into readonly mode - partialShutdown(); - tryLogCurrentException(log, __PRETTY_FUNCTION__); - - if (e.code() == ErrorCodes::REPLICA_STATUS_CHANGED) - reschedule_period_ms = 0; + if (replica_is_active) + { + consecutive_check_failures = 0; + task->scheduleAfter(check_period_ms); + } + else + { + consecutive_check_failures++; + task->scheduleAfter(next_failure_retry_ms); + } } catch (...) { + consecutive_check_failures++; + task->scheduleAfter(next_failure_retry_ms); + + /// We couldn't activate table let's set it into readonly mode if necessary + /// We do this after scheduling the task in case it throws partialShutdown(); - tryLogCurrentException(log, __PRETTY_FUNCTION__); + tryLogCurrentException(log, "Failed to restart the table. Will try again"); } if (first_time) @@ -92,14 +93,6 @@ void ReplicatedMergeTreeRestartingThread::run() storage.startup_event.set(); first_time = false; } - - if (need_stop) - return; - - if (reschedule_period_ms) - task->scheduleAfter(reschedule_period_ms); - else - task->schedule(); } bool ReplicatedMergeTreeRestartingThread::runImpl() @@ -132,8 +125,8 @@ bool ReplicatedMergeTreeRestartingThread::runImpl() } catch (const Coordination::Exception &) { - /// The exception when you try to zookeeper_init usually happens if DNS does not work. We will try to do it again. - tryLogCurrentException(log, __PRETTY_FUNCTION__); + /// The exception when you try to zookeeper_init usually happens if DNS does not work or the connection with ZK fails + tryLogCurrentException(log, "Failed to establish a new ZK connection. Will try again"); assert(storage.is_readonly); return false; } @@ -158,12 +151,15 @@ bool ReplicatedMergeTreeRestartingThread::runImpl() storage.cleanup_thread.start(); storage.part_check_thread.start(); + LOG_DEBUG(log, "Table started successfully"); + return true; } bool ReplicatedMergeTreeRestartingThread::tryStartup() { + LOG_DEBUG(log, "Trying to start replica up"); try { removeFailedQuorumParts(); @@ -177,9 +173,7 @@ bool ReplicatedMergeTreeRestartingThread::tryStartup() try { storage.queue.initialize(zookeeper); - storage.queue.load(zookeeper); - storage.queue.createLogEntriesToFetchBrokenParts(); /// pullLogsToQueue() after we mark replica 'is_active' (and after we repair if it was lost); @@ -302,7 +296,7 @@ void ReplicatedMergeTreeRestartingThread::activateReplica() ReplicatedMergeTreeAddress address = storage.getReplicatedMergeTreeAddress(); String is_active_path = fs::path(storage.replica_path) / "is_active"; - zookeeper->waitForEphemeralToDisappearIfAny(is_active_path); + zookeeper->handleEphemeralNodeExistence(is_active_path, active_node_identifier); /// Simultaneously declare that this replica is active, and update the host. Coordination::Requests ops; @@ -348,7 +342,6 @@ void ReplicatedMergeTreeRestartingThread::partialShutdown(bool part_of_full_shut storage.replica_is_active_node = nullptr; LOG_TRACE(log, "Waiting for threads to finish"); - storage.merge_selecting_task->deactivate(); storage.queue_updating_task->deactivate(); storage.mutations_updating_task->deactivate(); diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.h b/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.h index 3d443a236ed..bb4b0c0fdd2 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.h @@ -41,6 +41,7 @@ private: BackgroundSchedulePool::TaskHolder task; Int64 check_period_ms; /// The frequency of checking expiration of session in ZK. + UInt32 consecutive_check_failures = 0; /// How many consecutive checks have failed bool first_time = true; /// Activate replica for the first time. void run(); diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp index b9bd027cde2..dbc2bd98e20 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp @@ -1,10 +1,10 @@ #include #include #include -#include #include #include #include +#include #include #include @@ -99,19 +99,22 @@ size_t ReplicatedMergeTreeSink::checkQuorumPrecondition(zkutil::ZooKeeperPtr & z quorum_info.status_path = storage.zookeeper_path + "/quorum/status"; Strings replicas = zookeeper->getChildren(fs::path(storage.zookeeper_path) / "replicas"); - std::vector> replicas_status_futures; - replicas_status_futures.reserve(replicas.size()); + + Strings exists_paths; for (const auto & replica : replicas) if (replica != storage.replica_name) - replicas_status_futures.emplace_back(zookeeper->asyncExists(fs::path(storage.zookeeper_path) / "replicas" / replica / "is_active")); + exists_paths.emplace_back(fs::path(storage.zookeeper_path) / "replicas" / replica / "is_active"); - std::future is_active_future = zookeeper->asyncTryGet(storage.replica_path + "/is_active"); - std::future host_future = zookeeper->asyncTryGet(storage.replica_path + "/host"); + auto exists_result = zookeeper->exists(exists_paths); + auto get_results = zookeeper->get(Strings{storage.replica_path + "/is_active", storage.replica_path + "/host"}); size_t active_replicas = 1; /// Assume current replica is active (will check below) - for (auto & status : replicas_status_futures) - if (status.get().error == Coordination::Error::ZOK) + for (size_t i = 0; i < exists_paths.size(); ++i) + { + auto status = exists_result[i]; + if (status.error == Coordination::Error::ZOK) ++active_replicas; + } size_t replicas_number = replicas.size(); size_t quorum_size = getQuorumSize(replicas_number); @@ -135,8 +138,8 @@ size_t ReplicatedMergeTreeSink::checkQuorumPrecondition(zkutil::ZooKeeperPtr & z /// Both checks are implicitly made also later (otherwise there would be a race condition). - auto is_active = is_active_future.get(); - auto host = host_future.get(); + auto is_active = get_results[0]; + auto host = get_results[1]; if (is_active.error == Coordination::Error::ZNONODE || host.error == Coordination::Error::ZNONODE) throw Exception("Replica is not active right now", ErrorCodes::READONLY); @@ -162,7 +165,9 @@ void ReplicatedMergeTreeSink::consume(Chunk chunk) */ size_t replicas_num = checkQuorumPrecondition(zookeeper); - deduceTypesOfObjectColumns(storage_snapshot, block); + if (!storage_snapshot->object_columns.empty()) + convertDynamicColumnsToTuples(block, storage_snapshot); + auto part_blocks = storage.writer.splitBlockIntoParts(block, max_parts_per_block, metadata_snapshot, context); using DelayedPartitions = std::vector; @@ -265,7 +270,7 @@ void ReplicatedMergeTreeSink::finishDelayedChunk(zkutil::ZooKeeperPtr & zookeepe try { - commitPart(zookeeper, part, partition.block_id, partition.temp_part.builder, delayed_chunk->replicas_num); + commitPart(zookeeper, part, partition.block_id, delayed_chunk->replicas_num); last_block_is_duplicate = last_block_is_duplicate || part->is_duplicate; @@ -298,7 +303,7 @@ void ReplicatedMergeTreeSink::writeExistingPart(MergeTreeData::MutableDataPartPt try { part->version.setCreationTID(Tx::PrehistoricTID, nullptr); - commitPart(zookeeper, part, "", part->data_part_storage->getBuilder(), replicas_num); + commitPart(zookeeper, part, "", replicas_num); PartLog::addNewPart(storage.getContext(), part, watch.elapsed()); } catch (...) @@ -312,13 +317,17 @@ void ReplicatedMergeTreeSink::commitPart( zkutil::ZooKeeperPtr & zookeeper, MergeTreeData::MutableDataPartPtr & part, const String & block_id, - DataPartStorageBuilderPtr builder, size_t replicas_num) { - metadata_snapshot->check(part->getColumns()); + /// It is possible that we alter a part with different types of source columns. + /// In this case, if column was not altered, the result type will be different with what we have in metadata. + /// For now, consider it is ok. See 02461_alter_update_respect_part_column_type_bug for an example. + /// + /// metadata_snapshot->check(part->getColumns()); + assertSessionIsNotExpired(zookeeper); - String temporary_part_relative_path = part->data_part_storage->getPartDirectory(); + String temporary_part_relative_path = part->getDataPartStorage().getPartDirectory(); /// There is one case when we need to retry transaction in a loop. /// But don't do it too many times - just as defensive measure. @@ -491,7 +500,7 @@ void ReplicatedMergeTreeSink::commitPart( try { auto lock = storage.lockParts(); - renamed = storage.renameTempPartAndAdd(part, transaction, builder, lock); + renamed = storage.renameTempPartAndAdd(part, transaction, lock); } catch (const Exception & e) { @@ -555,8 +564,7 @@ void ReplicatedMergeTreeSink::commitPart( transaction.rollbackPartsToTemporaryState(); part->is_temp = true; - part->renameTo(temporary_part_relative_path, false, builder); - builder->commit(); + part->renameTo(temporary_part_relative_path, false); /// If this part appeared on other replica than it's better to try to write it locally one more time. If it's our part /// than it will be ignored on the next itration. diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.h b/src/Storages/MergeTree/ReplicatedMergeTreeSink.h index ab729e6edec..da87ddc0d63 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.h @@ -79,7 +79,6 @@ private: zkutil::ZooKeeperPtr & zookeeper, MergeTreeData::MutableDataPartPtr & part, const String & block_id, - DataPartStorageBuilderPtr part_builder, size_t replicas_num); /// Wait for quorum to be satisfied on path (quorum_path) form part (part_name) diff --git a/src/Storages/MergeTree/SimpleMergeSelector.cpp b/src/Storages/MergeTree/SimpleMergeSelector.cpp index 3b71e2720c8..f9ed6aedc60 100644 --- a/src/Storages/MergeTree/SimpleMergeSelector.cpp +++ b/src/Storages/MergeTree/SimpleMergeSelector.cpp @@ -102,6 +102,9 @@ bool allow( double max_size_to_lower_base_log, const SimpleMergeSelector::Settings & settings) { + if (settings.min_age_to_force_merge && min_age >= settings.min_age_to_force_merge) + return true; + // std::cerr << "sum_size: " << sum_size << "\n"; /// Map size to 0..1 using logarithmic scale diff --git a/src/Storages/MergeTree/SimpleMergeSelector.h b/src/Storages/MergeTree/SimpleMergeSelector.h index 11ffe8b672a..c20eaa6e8de 100644 --- a/src/Storages/MergeTree/SimpleMergeSelector.h +++ b/src/Storages/MergeTree/SimpleMergeSelector.h @@ -141,6 +141,11 @@ public: double heuristic_to_align_parts_max_absolute_difference_in_powers_of_two = 0.5; double heuristic_to_align_parts_max_score_adjustment = 0.75; + /** If it's not 0, all part ranges that have min_age larger than min_age_to_force_merge + * will be considered for merging + */ + size_t min_age_to_force_merge = 0; + /** Heuristic: * From right side of range, remove all parts, that size is less than specified ratio of sum_size. */ diff --git a/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h b/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h index fd313a10bc8..7bad9947a88 100644 --- a/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h +++ b/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h @@ -47,10 +47,10 @@ public: const StorageMetadataPtr & metadata_snapshot, ContextPtr /*query_context*/) const override { const auto & storage_columns = metadata_snapshot->getColumns(); - if (!hasObjectColumns(storage_columns)) + if (!hasDynamicSubcolumns(storage_columns)) return std::make_shared(*this, metadata_snapshot); - auto object_columns = getObjectColumns( + auto object_columns = getConcreteObjectColumns( parts.begin(), parts.end(), storage_columns, [](const auto & part) -> const auto & { return part->getColumns(); }); @@ -65,7 +65,7 @@ public: ContextPtr context, QueryProcessingStage::Enum /*processed_stage*/, size_t max_block_size, - unsigned num_streams) override + size_t num_streams) override { query_plan = std::move(*MergeTreeDataSelectExecutor(storage) .readFromParts( diff --git a/src/Storages/MergeTree/checkDataPart.cpp b/src/Storages/MergeTree/checkDataPart.cpp index d5a838668d2..6f9f16b6155 100644 --- a/src/Storages/MergeTree/checkDataPart.cpp +++ b/src/Storages/MergeTree/checkDataPart.cpp @@ -1,3 +1,4 @@ +#include "Storages/MergeTree/IDataPartStorage.h" #include #include @@ -46,7 +47,7 @@ bool isNotEnoughMemoryErrorCode(int code) IMergeTreeDataPart::Checksums checkDataPart( MergeTreeData::DataPartPtr data_part, - const DataPartStoragePtr & data_part_storage, + const IDataPartStorage & data_part_storage, const NamesAndTypesList & columns_list, const MergeTreeDataPartType & part_type, const NameSet & files_without_checksums, @@ -64,13 +65,13 @@ IMergeTreeDataPart::Checksums checkDataPart( NamesAndTypesList columns_txt; { - auto buf = data_part_storage->readFile("columns.txt", {}, std::nullopt, std::nullopt); + auto buf = data_part_storage.readFile("columns.txt", {}, std::nullopt, std::nullopt); columns_txt.readText(*buf); assertEOF(*buf); } if (columns_txt != columns_list) - throw Exception("Columns doesn't match in part " + data_part_storage->getFullPath() + throw Exception("Columns doesn't match in part " + data_part_storage.getFullPath() + ". Expected: " + columns_list.toString() + ". Found: " + columns_txt.toString(), ErrorCodes::CORRUPTED_DATA); @@ -78,9 +79,9 @@ IMergeTreeDataPart::Checksums checkDataPart( IMergeTreeDataPart::Checksums checksums_data; /// This function calculates checksum for both compressed and decompressed contents of compressed file. - auto checksum_compressed_file = [](const DataPartStoragePtr & data_part_storage_, const String & file_path) + auto checksum_compressed_file = [](const IDataPartStorage & data_part_storage_, const String & file_path) { - auto file_buf = data_part_storage_->readFile(file_path, {}, std::nullopt, std::nullopt); + auto file_buf = data_part_storage_.readFile(file_path, {}, std::nullopt, std::nullopt); HashingReadBuffer compressed_hashing_buf(*file_buf); CompressedReadBuffer uncompressing_buf(compressed_hashing_buf); HashingReadBuffer uncompressed_hashing_buf(uncompressing_buf); @@ -96,9 +97,9 @@ IMergeTreeDataPart::Checksums checkDataPart( auto ratio_of_defaults = data_part->storage.getSettings()->ratio_of_defaults_for_sparse_serialization; SerializationInfoByName serialization_infos(columns_txt, SerializationInfo::Settings{ratio_of_defaults, false}); - if (data_part_storage->exists(IMergeTreeDataPart::SERIALIZATION_FILE_NAME)) + if (data_part_storage.exists(IMergeTreeDataPart::SERIALIZATION_FILE_NAME)) { - auto serialization_file = data_part_storage->readFile(IMergeTreeDataPart::SERIALIZATION_FILE_NAME, {}, std::nullopt, std::nullopt); + auto serialization_file = data_part_storage.readFile(IMergeTreeDataPart::SERIALIZATION_FILE_NAME, {}, std::nullopt, std::nullopt); serialization_infos.readJSON(*serialization_file); } @@ -111,98 +112,17 @@ IMergeTreeDataPart::Checksums checkDataPart( }; /// This function calculates only checksum of file content (compressed or uncompressed). - /// It also calculates checksum of projections. auto checksum_file = [&](const String & file_name) { - if (data_part_storage->isDirectory(file_name) && endsWith(file_name, ".proj")) - { - auto projection_name = file_name.substr(0, file_name.size() - sizeof(".proj") + 1); - auto pit = data_part->getProjectionParts().find(projection_name); - if (pit == data_part->getProjectionParts().end()) - { - if (require_checksums) - throw Exception("Unexpected file " + file_name + " in data part", ErrorCodes::UNEXPECTED_FILE_IN_DATA_PART); - else - return; - } - - const auto & projection = pit->second; - IMergeTreeDataPart::Checksums projection_checksums_data; - - auto projection_part_storage = data_part_storage->getProjection(file_name); - - if (projection->getType() == MergeTreeDataPartType::Compact) - { - auto file_buf = projection_part_storage->readFile(MergeTreeDataPartCompact::DATA_FILE_NAME_WITH_EXTENSION, {}, std::nullopt, std::nullopt); - HashingReadBuffer hashing_buf(*file_buf); - hashing_buf.ignoreAll(); - projection_checksums_data.files[MergeTreeDataPartCompact::DATA_FILE_NAME_WITH_EXTENSION] - = IMergeTreeDataPart::Checksums::Checksum(hashing_buf.count(), hashing_buf.getHash()); - } - else - { - const NamesAndTypesList & projection_columns_list = projection->getColumns(); - for (const auto & projection_column : projection_columns_list) - { - get_serialization(projection_column)->enumerateStreams( - [&](const ISerialization::SubstreamPath & substream_path) - { - String projection_file_name = ISerialization::getFileNameForStream(projection_column, substream_path) + ".bin"; - projection_checksums_data.files[projection_file_name] = checksum_compressed_file(projection_part_storage, projection_file_name); - }); - } - } - - IMergeTreeDataPart::Checksums projection_checksums_txt; - - if (require_checksums || projection_part_storage->exists("checksums.txt")) - { - auto buf = projection_part_storage->readFile("checksums.txt", {}, std::nullopt, std::nullopt); - projection_checksums_txt.read(*buf); - assertEOF(*buf); - } - - const auto & projection_checksum_files_txt = projection_checksums_txt.files; - for (auto projection_it = projection_part_storage->iterate(); projection_it->isValid(); projection_it->next()) - { - const String & projection_file_name = projection_it->name(); - auto projection_checksum_it = projection_checksums_data.files.find(projection_file_name); - - /// Skip files that we already calculated. Also skip metadata files that are not checksummed. - if (projection_checksum_it == projection_checksums_data.files.end() && !files_without_checksums.contains(projection_file_name)) - { - auto projection_txt_checksum_it = projection_checksum_files_txt.find(file_name); - if (projection_txt_checksum_it == projection_checksum_files_txt.end() - || projection_txt_checksum_it->second.uncompressed_size == 0) - { - auto projection_file_buf = projection_part_storage->readFile(projection_file_name, {}, std::nullopt, std::nullopt); - HashingReadBuffer projection_hashing_buf(*projection_file_buf); - projection_hashing_buf.ignoreAll(); - projection_checksums_data.files[projection_file_name] = IMergeTreeDataPart::Checksums::Checksum( - projection_hashing_buf.count(), projection_hashing_buf.getHash()); - } - else - { - projection_checksums_data.files[projection_file_name] = checksum_compressed_file(projection_part_storage, projection_file_name); - } - } - } - checksums_data.files[file_name] = IMergeTreeDataPart::Checksums::Checksum( - projection_checksums_data.getTotalSizeOnDisk(), projection_checksums_data.getTotalChecksumUInt128()); - - if (require_checksums || !projection_checksums_txt.files.empty()) - projection_checksums_txt.checkEqual(projection_checksums_data, false); - } - else - { - auto file_buf = data_part_storage->readFile(file_name, {}, std::nullopt, std::nullopt); - HashingReadBuffer hashing_buf(*file_buf); - hashing_buf.ignoreAll(); - checksums_data.files[file_name] = IMergeTreeDataPart::Checksums::Checksum(hashing_buf.count(), hashing_buf.getHash()); - } + auto file_buf = data_part_storage.readFile(file_name, {}, std::nullopt, std::nullopt); + HashingReadBuffer hashing_buf(*file_buf); + hashing_buf.ignoreAll(); + checksums_data.files[file_name] = IMergeTreeDataPart::Checksums::Checksum(hashing_buf.count(), hashing_buf.getHash()); }; - bool check_uncompressed = true; + /// Do not check uncompressed for projections. But why? + bool check_uncompressed = !data_part->isProjectionPart(); + /// First calculate checksums for columns data if (part_type == MergeTreeDataPartType::Compact) { @@ -224,23 +144,32 @@ IMergeTreeDataPart::Checksums checkDataPart( } else { - throw Exception("Unknown type in part " + data_part_storage->getFullPath(), ErrorCodes::UNKNOWN_PART_TYPE); + throw Exception("Unknown type in part " + data_part_storage.getFullPath(), ErrorCodes::UNKNOWN_PART_TYPE); } /// Checksums from the rest files listed in checksums.txt. May be absent. If present, they are subsequently compared with the actual data checksums. IMergeTreeDataPart::Checksums checksums_txt; - if (require_checksums || data_part_storage->exists("checksums.txt")) + if (require_checksums || data_part_storage.exists("checksums.txt")) { - auto buf = data_part_storage->readFile("checksums.txt", {}, std::nullopt, std::nullopt); + auto buf = data_part_storage.readFile("checksums.txt", {}, std::nullopt, std::nullopt); checksums_txt.read(*buf); assertEOF(*buf); } + NameSet projections_on_disk; const auto & checksum_files_txt = checksums_txt.files; - for (auto it = data_part_storage->iterate(); it->isValid(); it->next()) + for (auto it = data_part_storage.iterate(); it->isValid(); it->next()) { - const String & file_name = it->name(); + auto file_name = it->name(); + + /// We will check projections later. + if (data_part_storage.isDirectory(file_name) && endsWith(file_name, ".proj")) + { + projections_on_disk.insert(file_name); + continue; + } + auto checksum_it = checksums_data.files.find(file_name); /// Skip files that we already calculated. Also skip metadata files that are not checksummed. @@ -259,11 +188,38 @@ IMergeTreeDataPart::Checksums checkDataPart( } } + for (const auto & [name, projection] : data_part->getProjectionParts()) + { + if (is_cancelled()) + return {}; + + auto projection_file = name + ".proj"; + auto projection_checksums = checkDataPart( + projection, *data_part_storage.getProjection(projection_file), + projection->getColumns(), projection->getType(), + projection->getFileNamesWithoutChecksums(), + require_checksums, is_cancelled); + + checksums_data.files[projection_file] = IMergeTreeDataPart::Checksums::Checksum( + projection_checksums.getTotalSizeOnDisk(), + projection_checksums.getTotalChecksumUInt128()); + + projections_on_disk.erase(projection_file); + } + + if (require_checksums && !projections_on_disk.empty()) + { + throw Exception(ErrorCodes::UNEXPECTED_FILE_IN_DATA_PART, + "Found unexpected projection directories: {}", + fmt::join(projections_on_disk, ",")); + } + if (is_cancelled()) return {}; if (require_checksums || !checksums_txt.files.empty()) checksums_txt.checkEqual(checksums_data, check_uncompressed); + return checksums_data; } @@ -285,7 +241,7 @@ IMergeTreeDataPart::Checksums checkDataPart( return checkDataPart( data_part, - data_part->data_part_storage, + data_part->getDataPartStorage(), data_part->getColumns(), data_part->getType(), data_part->getFileNamesWithoutChecksums(), diff --git a/src/Storages/MySQL/MySQLHelpers.cpp b/src/Storages/MySQL/MySQLHelpers.cpp index 94c07d2670f..127bdb96eaf 100644 --- a/src/Storages/MySQL/MySQLHelpers.cpp +++ b/src/Storages/MySQL/MySQLHelpers.cpp @@ -23,7 +23,7 @@ createMySQLPoolWithFailover(const StorageMySQLConfiguration & configuration, con return mysqlxx::PoolWithFailover( configuration.database, configuration.addresses, configuration.username, configuration.password, MYSQLXX_POOL_WITH_FAILOVER_DEFAULT_START_CONNECTIONS, - mysql_settings.connection_pool_size, + static_cast(mysql_settings.connection_pool_size), mysql_settings.connection_max_tries, mysql_settings.connection_wait_timeout, mysql_settings.connect_timeout, diff --git a/src/Storages/NATS/NATSConnection.cpp b/src/Storages/NATS/NATSConnection.cpp index d33138419e2..70b3599aa09 100644 --- a/src/Storages/NATS/NATSConnection.cpp +++ b/src/Storages/NATS/NATSConnection.cpp @@ -111,7 +111,7 @@ void NATSConnectionManager::connectImpl() { servers[i] = configuration.servers[i].c_str(); } - natsOptions_SetServers(options, servers, configuration.servers.size()); + natsOptions_SetServers(options, servers, static_cast(configuration.servers.size())); } natsOptions_SetMaxReconnect(options, configuration.max_reconnect); natsOptions_SetReconnectWait(options, configuration.reconnect_wait); diff --git a/src/Storages/NATS/StorageNATS.cpp b/src/Storages/NATS/StorageNATS.cpp index 4a3ba973e67..dea2553700b 100644 --- a/src/Storages/NATS/StorageNATS.cpp +++ b/src/Storages/NATS/StorageNATS.cpp @@ -60,7 +60,7 @@ StorageNATS::StorageNATS( , schema_name(getContext()->getMacros()->expand(nats_settings->nats_schema)) , num_consumers(nats_settings->nats_num_consumers.value) , log(&Poco::Logger::get("StorageNATS (" + table_id_.table_name + ")")) - , semaphore(0, num_consumers) + , semaphore(0, static_cast(num_consumers)) , queue_size(std::max(QUEUE_SIZE, static_cast(getMaxBlockSize()))) , is_attach(is_attach_) { @@ -289,7 +289,7 @@ void StorageNATS::read( ContextPtr local_context, QueryProcessingStage::Enum /* processed_stage */, size_t /* max_block_size */, - unsigned /* num_streams */) + size_t /* num_streams */) { if (!consumers_ready) throw Exception("NATS consumers setup not finished. Connection might be lost", ErrorCodes::CANNOT_CONNECT_NATS); diff --git a/src/Storages/NATS/StorageNATS.h b/src/Storages/NATS/StorageNATS.h index 185b39250c8..a5a050d566f 100644 --- a/src/Storages/NATS/StorageNATS.h +++ b/src/Storages/NATS/StorageNATS.h @@ -47,7 +47,7 @@ public: ContextPtr local_context, QueryProcessingStage::Enum /* processed_stage */, size_t /* max_block_size */, - unsigned /* num_streams */) override; + size_t /* num_streams */) override; SinkToStoragePtr write(const ASTPtr & query, const StorageMetadataPtr & metadata_snapshot, ContextPtr context) override; diff --git a/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp b/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp index cc80d567d1d..6d12960824a 100644 --- a/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp +++ b/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp @@ -279,7 +279,7 @@ void StorageMaterializedPostgreSQL::read( ContextPtr context_, QueryProcessingStage::Enum processed_stage, size_t max_block_size, - unsigned num_streams) + size_t num_streams) { auto nested_table = getNested(); diff --git a/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.h b/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.h index d8e9e98c662..af0adb10f9f 100644 --- a/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.h +++ b/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.h @@ -98,7 +98,7 @@ public: ContextPtr context_, QueryProcessingStage::Enum processed_stage, size_t max_block_size, - unsigned num_streams) override; + size_t num_streams) override; /// This method is called only from MateriaizePostgreSQL database engine, because it needs to maintain /// an invariant: a table exists only if its nested table exists. This atomic variable is set to _true_ diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp index 70838daec24..57f5ddd86e6 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp @@ -87,7 +87,7 @@ StorageRabbitMQ::StorageRabbitMQ( , use_user_setup(rabbitmq_settings->rabbitmq_queue_consume.value) , hash_exchange(num_consumers > 1 || num_queues > 1) , log(&Poco::Logger::get("StorageRabbitMQ (" + table_id_.table_name + ")")) - , semaphore(0, num_consumers) + , semaphore(0, static_cast(num_consumers)) , unique_strbase(getRandomName()) , queue_size(std::max(QUEUE_SIZE, static_cast(getMaxBlockSize()))) , milliseconds_to_wait(RESCHEDULE_MS) @@ -674,7 +674,7 @@ void StorageRabbitMQ::read( ContextPtr local_context, QueryProcessingStage::Enum /* processed_stage */, size_t /* max_block_size */, - unsigned /* num_streams */) + size_t /* num_streams */) { if (!rabbit_is_ready) throw Exception("RabbitMQ setup not finished. Connection might be lost", ErrorCodes::CANNOT_CONNECT_RABBITMQ); diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.h b/src/Storages/RabbitMQ/StorageRabbitMQ.h index 455b2fe8f09..a1250f50829 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.h +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.h @@ -50,7 +50,7 @@ public: ContextPtr context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, - unsigned num_streams) override; + size_t num_streams) override; SinkToStoragePtr write( const ASTPtr & query, diff --git a/src/Storages/ReadFinalForExternalReplicaStorage.cpp b/src/Storages/ReadFinalForExternalReplicaStorage.cpp index 3ec7a074fd4..28053c84e20 100644 --- a/src/Storages/ReadFinalForExternalReplicaStorage.cpp +++ b/src/Storages/ReadFinalForExternalReplicaStorage.cpp @@ -35,7 +35,7 @@ void readFinalFromNestedStorage( ContextPtr context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, - unsigned int num_streams) + size_t num_streams) { NameSet column_names_set = NameSet(column_names.begin(), column_names.end()); auto lock = nested_storage->lockForShare(context->getCurrentQueryId(), context->getSettingsRef().lock_acquire_timeout); @@ -59,7 +59,8 @@ void readFinalFromNestedStorage( } auto nested_snapshot = nested_storage->getStorageSnapshot(nested_metadata, context); - nested_storage->read(query_plan, require_columns_name, nested_snapshot, query_info, context, processed_stage, max_block_size, num_streams); + nested_storage->read( + query_plan, require_columns_name, nested_snapshot, query_info, context, processed_stage, max_block_size, num_streams); if (!query_plan.isInitialized()) { diff --git a/src/Storages/ReadFinalForExternalReplicaStorage.h b/src/Storages/ReadFinalForExternalReplicaStorage.h index 178164b6643..f8d1264ccb3 100644 --- a/src/Storages/ReadFinalForExternalReplicaStorage.h +++ b/src/Storages/ReadFinalForExternalReplicaStorage.h @@ -21,7 +21,7 @@ void readFinalFromNestedStorage( ContextPtr context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, - unsigned int num_streams); + size_t num_streams); } diff --git a/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp b/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp index 20b1de51a30..46ddb650eee 100644 --- a/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp +++ b/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp @@ -403,7 +403,7 @@ Pipe StorageEmbeddedRocksDB::read( ContextPtr context_, QueryProcessingStage::Enum /*processed_stage*/, size_t max_block_size, - unsigned num_streams) + size_t num_streams) { storage_snapshot->check(column_names); @@ -467,7 +467,7 @@ static StoragePtr create(const StorageFactory::Arguments & args) String rocksdb_dir; bool read_only{false}; if (!engine_args.empty()) - ttl = checkAndGetLiteralArgument(engine_args[0], "ttl"); + ttl = static_cast(checkAndGetLiteralArgument(engine_args[0], "ttl")); if (engine_args.size() > 1) rocksdb_dir = checkAndGetLiteralArgument(engine_args[1], "rocksdb_dir"); if (engine_args.size() > 2) diff --git a/src/Storages/RocksDB/StorageEmbeddedRocksDB.h b/src/Storages/RocksDB/StorageEmbeddedRocksDB.h index 03848510e66..ca0ab7a1840 100644 --- a/src/Storages/RocksDB/StorageEmbeddedRocksDB.h +++ b/src/Storages/RocksDB/StorageEmbeddedRocksDB.h @@ -46,7 +46,7 @@ public: ContextPtr context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, - unsigned num_streams) override; + size_t num_streams) override; SinkToStoragePtr write(const ASTPtr & query, const StorageMetadataPtr & /*metadata_snapshot*/, ContextPtr context) override; void truncate(const ASTPtr &, const StorageMetadataPtr & metadata_snapshot, ContextPtr, TableExclusiveLockHolder &) override; diff --git a/src/Storages/SelectQueryInfo.h b/src/Storages/SelectQueryInfo.h index f2835ab4dbf..329bb650171 100644 --- a/src/Storages/SelectQueryInfo.h +++ b/src/Storages/SelectQueryInfo.h @@ -7,6 +7,9 @@ #include #include #include +#include +#include +#include #include @@ -177,6 +180,15 @@ struct SelectQueryInfo ASTPtr view_query; /// Optimized VIEW query ASTPtr original_query; /// Unmodified query for projection analysis + /// Planner context + PlannerContextPtr planner_context; + + /// Storage table expression + QueryTreeNodePtr table_expression; + + /// Table expression modifiers for storage + std::optional table_expression_modifiers; + std::shared_ptr storage_limits; /// Cluster for the query. @@ -220,6 +232,9 @@ struct SelectQueryInfo Block minmax_count_projection_block; MergeTreeDataSelectAnalysisResultPtr merge_tree_select_result_ptr; + // If limit is not 0, that means it's a trivial limit query. + UInt64 limit = 0; + InputOrderInfoPtr getInputOrderInfo() const { return input_order_info ? input_order_info : (projection ? projection->input_order_info : nullptr); diff --git a/src/Storages/StorageBuffer.cpp b/src/Storages/StorageBuffer.cpp index 3fc00a79bbe..65b4dce3ad2 100644 --- a/src/Storages/StorageBuffer.cpp +++ b/src/Storages/StorageBuffer.cpp @@ -26,6 +26,8 @@ #include #include #include +#include +#include #include #include #include @@ -226,7 +228,7 @@ void StorageBuffer::read( ContextPtr local_context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, - unsigned num_streams) + size_t num_streams) { const auto & metadata_snapshot = storage_snapshot->metadata; @@ -334,6 +336,14 @@ void StorageBuffer::read( pipes_from_buffers.emplace_back(std::make_shared(column_names, buf, storage_snapshot)); pipe_from_buffers = Pipe::unitePipes(std::move(pipes_from_buffers)); + if (query_info.getInputOrderInfo()) + { + /// Each buffer has one block, and it not guaranteed that rows in each block are sorted by order keys + pipe_from_buffers.addSimpleTransform([&](const Block & header) + { + return std::make_shared(header, query_info.getInputOrderInfo()->sort_description_for_merging, 0); + }); + } } if (pipe_from_buffers.empty()) diff --git a/src/Storages/StorageBuffer.h b/src/Storages/StorageBuffer.h index 580742c0c84..387165171b9 100644 --- a/src/Storages/StorageBuffer.h +++ b/src/Storages/StorageBuffer.h @@ -82,7 +82,7 @@ public: ContextPtr context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, - unsigned num_streams) override; + size_t num_streams) override; bool supportsParallelInsert() const override { return true; } diff --git a/src/Storages/StorageDictionary.cpp b/src/Storages/StorageDictionary.cpp index 2839ac03a5b..a76c4dffb5b 100644 --- a/src/Storages/StorageDictionary.cpp +++ b/src/Storages/StorageDictionary.cpp @@ -169,13 +169,19 @@ Pipe StorageDictionary::read( ContextPtr local_context, QueryProcessingStage::Enum /*processed_stage*/, const size_t max_block_size, - const unsigned threads) + const size_t threads) { auto registered_dictionary_name = location == Location::SameDatabaseAndNameAsDictionary ? getStorageID().getInternalDictionaryName() : dictionary_name; auto dictionary = getContext()->getExternalDictionariesLoader().getDictionary(registered_dictionary_name, local_context); return dictionary->read(column_names, max_block_size, threads); } +std::shared_ptr StorageDictionary::getDictionary() const +{ + auto registered_dictionary_name = location == Location::SameDatabaseAndNameAsDictionary ? getStorageID().getInternalDictionaryName() : dictionary_name; + return getContext()->getExternalDictionariesLoader().getDictionary(registered_dictionary_name, getContext()); +} + void StorageDictionary::shutdown() { removeDictionaryConfigurationFromRepository(); diff --git a/src/Storages/StorageDictionary.h b/src/Storages/StorageDictionary.h index f81503910ca..b3442ec2f99 100644 --- a/src/Storages/StorageDictionary.h +++ b/src/Storages/StorageDictionary.h @@ -8,8 +8,10 @@ namespace DB { + struct DictionaryStructure; class TableFunctionDictionary; +class IDictionary; class StorageDictionary final : public IStorage, public WithContext { @@ -69,7 +71,9 @@ public: ContextPtr context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, - unsigned threads) override; + size_t threads) override; + + std::shared_ptr getDictionary() const; static NamesAndTypesList getNamesAndTypes(const DictionaryStructure & dictionary_structure); static String generateNamesAndTypesDescription(const NamesAndTypesList & list); diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index 4eb6697dd6e..f7f68eba30f 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -564,6 +564,10 @@ std::optional StorageDistributed::getOptimizedQueryP return {}; } + /// TODO: Analyzer syntax analyzer result + if (!query_info.syntax_analyzer_result) + return {}; + // GROUP BY const ASTPtr group_by = select.groupBy(); if (!query_info.syntax_analyzer_result->aggregates.empty() || group_by) @@ -594,7 +598,7 @@ std::optional StorageDistributed::getOptimizedQueryP static bool requiresObjectColumns(const ColumnsDescription & all_columns, ASTPtr query) { - if (!hasObjectColumns(all_columns)) + if (!hasDynamicSubcolumns(all_columns)) return false; if (!query) @@ -609,7 +613,7 @@ static bool requiresObjectColumns(const ColumnsDescription & all_columns, ASTPtr auto name_in_storage = Nested::splitName(required_column).first; auto column_in_storage = all_columns.tryGetPhysical(name_in_storage); - if (column_in_storage && isObject(column_in_storage->type)) + if (column_in_storage && column_in_storage->type->hasDynamicSubcolumns()) return true; } @@ -636,7 +640,7 @@ StorageSnapshotPtr StorageDistributed::getStorageSnapshotForQuery( metadata_snapshot->getColumns(), getContext()); - auto object_columns = DB::getObjectColumns( + auto object_columns = DB::getConcreteObjectColumns( snapshot_data->objects_by_shard.begin(), snapshot_data->objects_by_shard.end(), metadata_snapshot->getColumns(), @@ -653,7 +657,7 @@ void StorageDistributed::read( ContextPtr local_context, QueryProcessingStage::Enum processed_stage, const size_t /*max_block_size*/, - const unsigned /*num_streams*/) + const size_t /*num_streams*/) { const auto * select_query = query_info.query->as(); if (select_query->final() && local_context->getSettingsRef().allow_experimental_parallel_reading_from_replicas) diff --git a/src/Storages/StorageDistributed.h b/src/Storages/StorageDistributed.h index 7cb25ae46ab..334f44a90f9 100644 --- a/src/Storages/StorageDistributed.h +++ b/src/Storages/StorageDistributed.h @@ -112,7 +112,7 @@ public: ContextPtr context, QueryProcessingStage::Enum processed_stage, size_t /*max_block_size*/, - unsigned /*num_streams*/) override; + size_t /*num_streams*/) override; bool supportsParallelInsert() const override { return true; } std::optional totalBytes(const Settings &) const override; diff --git a/src/Storages/StorageExecutable.cpp b/src/Storages/StorageExecutable.cpp index 2931e62b7ef..cd3cc4d48ac 100644 --- a/src/Storages/StorageExecutable.cpp +++ b/src/Storages/StorageExecutable.cpp @@ -111,7 +111,7 @@ void StorageExecutable::read( ContextPtr context, QueryProcessingStage::Enum /*processed_stage*/, size_t max_block_size, - unsigned /*threads*/) + size_t /*threads*/) { auto & script_name = settings.script_name; diff --git a/src/Storages/StorageExecutable.h b/src/Storages/StorageExecutable.h index 2638474082a..2393920fa3c 100644 --- a/src/Storages/StorageExecutable.h +++ b/src/Storages/StorageExecutable.h @@ -41,7 +41,7 @@ public: ContextPtr context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, - unsigned threads) override; + size_t threads) override; private: ExecutableSettings settings; diff --git a/src/Storages/StorageExternalDistributed.cpp b/src/Storages/StorageExternalDistributed.cpp index dcb7a90b2f6..7d1eef1e47c 100644 --- a/src/Storages/StorageExternalDistributed.cpp +++ b/src/Storages/StorageExternalDistributed.cpp @@ -181,7 +181,7 @@ void StorageExternalDistributed::read( ContextPtr context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, - unsigned num_streams) + size_t num_streams) { std::vector> plans; for (const auto & shard : shards) diff --git a/src/Storages/StorageExternalDistributed.h b/src/Storages/StorageExternalDistributed.h index 52a2a7a4106..a1bdb41dded 100644 --- a/src/Storages/StorageExternalDistributed.h +++ b/src/Storages/StorageExternalDistributed.h @@ -55,7 +55,7 @@ public: ContextPtr context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, - unsigned num_streams) override; + size_t num_streams) override; private: using Shards = std::unordered_set; diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp index 29f2d0667d9..6e032a47943 100644 --- a/src/Storages/StorageFile.cpp +++ b/src/Storages/StorageFile.cpp @@ -81,7 +81,8 @@ void listFilesWithRegexpMatchingImpl( const std::string & path_for_ls, const std::string & for_match, size_t & total_bytes_to_read, - std::vector & result) + std::vector & result, + bool recursive = false) { const size_t first_glob = for_match.find_first_of("*?{"); @@ -89,10 +90,17 @@ void listFilesWithRegexpMatchingImpl( const std::string suffix_with_globs = for_match.substr(end_of_path_without_globs); /// begin with '/' const size_t next_slash = suffix_with_globs.find('/', 1); - auto regexp = makeRegexpPatternFromGlobs(suffix_with_globs.substr(0, next_slash)); + const std::string current_glob = suffix_with_globs.substr(0, next_slash); + auto regexp = makeRegexpPatternFromGlobs(current_glob); + re2::RE2 matcher(regexp); + bool skip_regex = current_glob == "/*" ? true : false; + if (!recursive) + recursive = current_glob == "/**" ; + const std::string prefix_without_globs = path_for_ls + for_match.substr(1, end_of_path_without_globs); + if (!fs::exists(prefix_without_globs)) return; @@ -107,15 +115,21 @@ void listFilesWithRegexpMatchingImpl( /// Condition is_directory means what kind of path is it in current iteration of ls if (!it->is_directory() && !looking_for_directory) { - if (re2::RE2::FullMatch(file_name, matcher)) + if (skip_regex || re2::RE2::FullMatch(file_name, matcher)) { total_bytes_to_read += it->file_size(); result.push_back(it->path().string()); } } - else if (it->is_directory() && looking_for_directory) + else if (it->is_directory()) { - if (re2::RE2::FullMatch(file_name, matcher)) + if (recursive) + { + listFilesWithRegexpMatchingImpl(fs::path(full_path).append(it->path().string()) / "" , + looking_for_directory ? suffix_with_globs.substr(next_slash) : current_glob , + total_bytes_to_read, result, recursive); + } + else if (looking_for_directory && re2::RE2::FullMatch(file_name, matcher)) { /// Recursion depth is limited by pattern. '*' works only for depth = 1, for depth = 2 pattern path is '*/*'. So we do not need additional check. listFilesWithRegexpMatchingImpl(fs::path(full_path) / "", suffix_with_globs.substr(next_slash), total_bytes_to_read, result); @@ -209,7 +223,7 @@ std::unique_ptr createReadBuffer( in.setProgressCallback(context); } - auto zstd_window_log_max = context->getSettingsRef().zstd_window_log_max; + int zstd_window_log_max = static_cast(context->getSettingsRef().zstd_window_log_max); return wrapReadBufferWithCompressionMethod(std::move(nested_buffer), method, zstd_window_log_max); } @@ -645,7 +659,7 @@ Pipe StorageFile::read( ContextPtr context, QueryProcessingStage::Enum /*processed_stage*/, size_t max_block_size, - unsigned num_streams) + size_t num_streams) { if (use_table_fd) { diff --git a/src/Storages/StorageFile.h b/src/Storages/StorageFile.h index e60e5f6b371..03b3aacb67f 100644 --- a/src/Storages/StorageFile.h +++ b/src/Storages/StorageFile.h @@ -48,7 +48,7 @@ public: ContextPtr context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, - unsigned num_streams) override; + size_t num_streams) override; SinkToStoragePtr write( const ASTPtr & query, diff --git a/src/Storages/StorageGenerateRandom.cpp b/src/Storages/StorageGenerateRandom.cpp index 9cac1e57297..c00e82598b2 100644 --- a/src/Storages/StorageGenerateRandom.cpp +++ b/src/Storages/StorageGenerateRandom.cpp @@ -494,7 +494,7 @@ Pipe StorageGenerateRandom::read( ContextPtr context, QueryProcessingStage::Enum /*processed_stage*/, size_t max_block_size, - unsigned num_streams) + size_t num_streams) { storage_snapshot->check(column_names); diff --git a/src/Storages/StorageGenerateRandom.h b/src/Storages/StorageGenerateRandom.h index 8dc3e490ae7..6b050c07e52 100644 --- a/src/Storages/StorageGenerateRandom.h +++ b/src/Storages/StorageGenerateRandom.h @@ -28,7 +28,7 @@ public: ContextPtr context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, - unsigned num_streams) override; + size_t num_streams) override; bool supportsTransactions() const override { return true; } private: diff --git a/src/Storages/StorageInMemoryMetadata.cpp b/src/Storages/StorageInMemoryMetadata.cpp index 66dcc938aef..a80f21834db 100644 --- a/src/Storages/StorageInMemoryMetadata.cpp +++ b/src/Storages/StorageInMemoryMetadata.cpp @@ -526,7 +526,7 @@ void StorageInMemoryMetadata::check(const NamesAndTypesList & provided_columns) const auto * available_type = it->getMapped(); - if (!isObject(*available_type) + if (!available_type->hasDynamicSubcolumns() && !column.type->equals(*available_type) && !isCompatibleEnumTypes(available_type, column.type.get())) throw Exception( @@ -575,7 +575,7 @@ void StorageInMemoryMetadata::check(const NamesAndTypesList & provided_columns, const auto * provided_column_type = it->getMapped(); const auto * available_column_type = jt->getMapped(); - if (!isObject(*provided_column_type) + if (!provided_column_type->hasDynamicSubcolumns() && !provided_column_type->equals(*available_column_type) && !isCompatibleEnumTypes(available_column_type, provided_column_type)) throw Exception( @@ -619,7 +619,7 @@ void StorageInMemoryMetadata::check(const Block & block, bool need_all) const listOfColumns(available_columns)); const auto * available_type = it->getMapped(); - if (!isObject(*available_type) + if (!available_type->hasDynamicSubcolumns() && !column.type->equals(*available_type) && !isCompatibleEnumTypes(available_type, column.type.get())) throw Exception( diff --git a/src/Storages/StorageInput.cpp b/src/Storages/StorageInput.cpp index 4729d0a5bf8..18e8442c1b5 100644 --- a/src/Storages/StorageInput.cpp +++ b/src/Storages/StorageInput.cpp @@ -57,7 +57,7 @@ Pipe StorageInput::read( ContextPtr context, QueryProcessingStage::Enum /*processed_stage*/, size_t /*max_block_size*/, - unsigned /*num_streams*/) + size_t /*num_streams*/) { Pipes pipes; auto query_context = context->getQueryContext(); diff --git a/src/Storages/StorageInput.h b/src/Storages/StorageInput.h index 991a4f35b7b..da4669aaf37 100644 --- a/src/Storages/StorageInput.h +++ b/src/Storages/StorageInput.h @@ -25,7 +25,7 @@ public: ContextPtr context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, - unsigned num_streams) override; + size_t num_streams) override; private: Pipe pipe; diff --git a/src/Storages/StorageJoin.cpp b/src/Storages/StorageJoin.cpp index 2e3e1d443ae..e4f786cd23b 100644 --- a/src/Storages/StorageJoin.cpp +++ b/src/Storages/StorageJoin.cpp @@ -585,7 +585,7 @@ Pipe StorageJoin::read( ContextPtr context, QueryProcessingStage::Enum /*processed_stage*/, size_t max_block_size, - unsigned /*num_streams*/) + size_t /*num_streams*/) { storage_snapshot->check(column_names); diff --git a/src/Storages/StorageJoin.h b/src/Storages/StorageJoin.h index 390af09422c..43515f800d9 100644 --- a/src/Storages/StorageJoin.h +++ b/src/Storages/StorageJoin.h @@ -68,7 +68,7 @@ public: ContextPtr context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, - unsigned num_streams) override; + size_t num_streams) override; std::optional totalRows(const Settings & settings) const override; std::optional totalBytes(const Settings & settings) const override; diff --git a/src/Storages/StorageKeeperMap.cpp b/src/Storages/StorageKeeperMap.cpp index e62874490f8..21be205c0f6 100644 --- a/src/Storages/StorageKeeperMap.cpp +++ b/src/Storages/StorageKeeperMap.cpp @@ -408,7 +408,7 @@ Pipe StorageKeeperMap::read( ContextPtr context_, QueryProcessingStage::Enum /*processed_stage*/, size_t max_block_size, - unsigned num_streams) + size_t num_streams) { checkTable(); storage_snapshot->check(column_names); @@ -682,24 +682,20 @@ Chunk StorageKeeperMap::getBySerializedKeys(const std::span k auto client = getClient(); - std::vector> values; - values.reserve(keys.size()); + Strings full_key_paths; + full_key_paths.reserve(keys.size()); for (const auto & key : keys) { - const auto full_path = fullPathForKey(key); - values.emplace_back(client->asyncTryGet(full_path)); + full_key_paths.emplace_back(fullPathForKey(key)); } - auto wait_until = std::chrono::system_clock::now() + std::chrono::milliseconds(Coordination::DEFAULT_OPERATION_TIMEOUT_MS); + auto values = client->tryGet(full_key_paths); for (size_t i = 0; i < keys.size(); ++i) { - auto & value = values[i]; - if (value.wait_until(wait_until) != std::future_status::ready) - throw DB::Exception(ErrorCodes::KEEPER_EXCEPTION, "Failed to fetch values: timeout"); + auto response = values[i]; - auto response = value.get(); Coordination::Error code = response.error; if (code == Coordination::Error::ZOK) diff --git a/src/Storages/StorageKeeperMap.h b/src/Storages/StorageKeeperMap.h index 87861362e42..45b32434f15 100644 --- a/src/Storages/StorageKeeperMap.h +++ b/src/Storages/StorageKeeperMap.h @@ -39,7 +39,7 @@ public: ContextPtr context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, - unsigned num_streams) override; + size_t num_streams) override; SinkToStoragePtr write(const ASTPtr & query, const StorageMetadataPtr & metadata_snapshot, ContextPtr context) override; diff --git a/src/Storages/StorageLog.cpp b/src/Storages/StorageLog.cpp index c6bc55fd620..8ed33220507 100644 --- a/src/Storages/StorageLog.cpp +++ b/src/Storages/StorageLog.cpp @@ -462,7 +462,7 @@ void LogSink::writeData(const NameAndTypePair & name_and_type, const IColumn & c settings.getter = createStreamGetter(name_and_type); if (!serialize_states.contains(name)) - serialization->serializeBinaryBulkStatePrefix(settings, serialize_states[name]); + serialization->serializeBinaryBulkStatePrefix(column, settings, serialize_states[name]); if (storage.use_marks_file) { @@ -782,7 +782,7 @@ Pipe StorageLog::read( ContextPtr local_context, QueryProcessingStage::Enum /*processed_stage*/, size_t max_block_size, - unsigned num_streams) + size_t num_streams) { storage_snapshot->check(column_names); diff --git a/src/Storages/StorageLog.h b/src/Storages/StorageLog.h index 2e677dd3161..a2b1356f240 100644 --- a/src/Storages/StorageLog.h +++ b/src/Storages/StorageLog.h @@ -53,7 +53,7 @@ public: ContextPtr local_context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, - unsigned num_streams) override; + size_t num_streams) override; SinkToStoragePtr write(const ASTPtr & query, const StorageMetadataPtr & metadata_snapshot, ContextPtr local_context) override; diff --git a/src/Storages/StorageMaterializedMySQL.cpp b/src/Storages/StorageMaterializedMySQL.cpp index bb69f211a9e..0dc0b1bff0b 100644 --- a/src/Storages/StorageMaterializedMySQL.cpp +++ b/src/Storages/StorageMaterializedMySQL.cpp @@ -40,7 +40,7 @@ void StorageMaterializedMySQL::read( ContextPtr context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, - unsigned int num_streams) + size_t num_streams) { if (const auto * db = typeid_cast(database)) db->rethrowExceptionIfNeeded(); diff --git a/src/Storages/StorageMaterializedMySQL.h b/src/Storages/StorageMaterializedMySQL.h index a66b7eba804..cbb59e508e8 100644 --- a/src/Storages/StorageMaterializedMySQL.h +++ b/src/Storages/StorageMaterializedMySQL.h @@ -24,8 +24,13 @@ public: bool needRewriteQueryWithFinal(const Names & column_names) const override; void read( - QueryPlan & query_plan, const Names & column_names, const StorageSnapshotPtr & metadata_snapshot, SelectQueryInfo & query_info, - ContextPtr context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, unsigned num_streams) override; + QueryPlan & query_plan, + const Names & column_names, + const StorageSnapshotPtr & metadata_snapshot, + SelectQueryInfo & query_info, + ContextPtr context, + QueryProcessingStage::Enum processed_stage, + size_t max_block_size, size_t num_streams) override; SinkToStoragePtr write(const ASTPtr &, const StorageMetadataPtr &, ContextPtr) override { throwNotAllowed(); } diff --git a/src/Storages/StorageMaterializedView.cpp b/src/Storages/StorageMaterializedView.cpp index b01415f9590..e256e087728 100644 --- a/src/Storages/StorageMaterializedView.cpp +++ b/src/Storages/StorageMaterializedView.cpp @@ -150,7 +150,7 @@ void StorageMaterializedView::read( ContextPtr local_context, QueryProcessingStage::Enum processed_stage, const size_t max_block_size, - const unsigned num_streams) + const size_t num_streams) { auto storage = getTargetTable(); auto lock = storage->lockForShare(local_context->getCurrentQueryId(), local_context->getSettingsRef().lock_acquire_timeout); diff --git a/src/Storages/StorageMaterializedView.h b/src/Storages/StorageMaterializedView.h index 1d8808b302e..af2dedf8164 100644 --- a/src/Storages/StorageMaterializedView.h +++ b/src/Storages/StorageMaterializedView.h @@ -91,7 +91,7 @@ public: ContextPtr context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, - unsigned num_streams) override; + size_t num_streams) override; Strings getDataPaths() const override; diff --git a/src/Storages/StorageMemory.cpp b/src/Storages/StorageMemory.cpp index e4dbfe15095..881cbc18b10 100644 --- a/src/Storages/StorageMemory.cpp +++ b/src/Storages/StorageMemory.cpp @@ -146,7 +146,7 @@ public: auto extended_storage_columns = storage_snapshot->getColumns( GetColumnsOptions(GetColumnsOptions::AllPhysical).withExtendedObjects()); - convertObjectsToTuples(block, extended_storage_columns); + convertDynamicColumnsToTuples(block, storage_snapshot); } if (storage.compress) @@ -212,10 +212,10 @@ StorageSnapshotPtr StorageMemory::getStorageSnapshot(const StorageMetadataPtr & auto snapshot_data = std::make_unique(); snapshot_data->blocks = data.get(); - if (!hasObjectColumns(metadata_snapshot->getColumns())) + if (!hasDynamicSubcolumns(metadata_snapshot->getColumns())) return std::make_shared(*this, metadata_snapshot, ColumnsDescription{}, std::move(snapshot_data)); - auto object_columns = getObjectColumns( + auto object_columns = getConcreteObjectColumns( snapshot_data->blocks->begin(), snapshot_data->blocks->end(), metadata_snapshot->getColumns(), @@ -231,7 +231,7 @@ Pipe StorageMemory::read( ContextPtr /*context*/, QueryProcessingStage::Enum /*processed_stage*/, size_t /*max_block_size*/, - unsigned num_streams) + size_t num_streams) { storage_snapshot->check(column_names); diff --git a/src/Storages/StorageMemory.h b/src/Storages/StorageMemory.h index 3889acb952b..c739088dbe4 100644 --- a/src/Storages/StorageMemory.h +++ b/src/Storages/StorageMemory.h @@ -51,7 +51,7 @@ public: ContextPtr context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, - unsigned num_streams) override; + size_t num_streams) override; bool supportsParallelInsert() const override { return true; } bool supportsSubcolumns() const override { return true; } diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp index 9891340a0d0..7fb21b7e053 100644 --- a/src/Storages/StorageMerge.cpp +++ b/src/Storages/StorageMerge.cpp @@ -225,11 +225,15 @@ SelectQueryInfo getModifiedQueryInfo( SelectQueryInfo modified_query_info = query_info; modified_query_info.query = query_info.query->clone(); - /// Original query could contain JOIN but we need only the first joined table and its columns. - auto & modified_select = modified_query_info.query->as(); - TreeRewriterResult new_analyzer_res = *modified_query_info.syntax_analyzer_result; - removeJoin(modified_select, new_analyzer_res, modified_context); - modified_query_info.syntax_analyzer_result = std::make_shared(std::move(new_analyzer_res)); + /// TODO: Analyzer syntax analyzer result + if (modified_query_info.syntax_analyzer_result) + { + /// Original query could contain JOIN but we need only the first joined table and its columns. + auto & modified_select = modified_query_info.query->as(); + TreeRewriterResult new_analyzer_res = *modified_query_info.syntax_analyzer_result; + removeJoin(modified_select, new_analyzer_res, modified_context); + modified_query_info.syntax_analyzer_result = std::make_shared(std::move(new_analyzer_res)); + } if (!is_merge_engine) { @@ -249,7 +253,7 @@ void StorageMerge::read( ContextPtr local_context, QueryProcessingStage::Enum processed_stage, const size_t max_block_size, - unsigned num_streams) + size_t num_streams) { /** Just in case, turn off optimization "transfer to PREWHERE", * since there is no certainty that it works when one of table is MergeTree and other is not. @@ -513,7 +517,13 @@ QueryPipelineBuilderPtr ReadFromMerge::createSources( SelectQueryOptions(processed_stage).analyze()).buildQueryPipeline()); } - if (!modified_select.final() && storage->needRewriteQueryWithFinal(real_column_names)) + bool final = false; + if (modified_query_info.table_expression_modifiers) + final = modified_query_info.table_expression_modifiers->hasFinal(); + else + final = modified_select.final(); + + if (!final && storage->needRewriteQueryWithFinal(real_column_names)) { /// NOTE: It may not work correctly in some cases, because query was analyzed without final. /// However, it's needed for MaterializedMySQL and it's unlikely that someone will use it with Merge tables. diff --git a/src/Storages/StorageMerge.h b/src/Storages/StorageMerge.h index 6bf68660803..33406321100 100644 --- a/src/Storages/StorageMerge.h +++ b/src/Storages/StorageMerge.h @@ -61,7 +61,7 @@ public: ContextPtr context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, - unsigned num_streams) override; + size_t num_streams) override; void checkAlterIsPossible(const AlterCommands & commands, ContextPtr context) const override; diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index aea853b6c39..a450a9ef3a9 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -1,4 +1,5 @@ #include "StorageMergeTree.h" +#include "Storages/MergeTree/IMergeTreeDataPart.h" #include @@ -220,7 +221,7 @@ void StorageMergeTree::read( ContextPtr local_context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, - unsigned num_streams) + size_t num_streams) { /// If true, then we will ask initiator if we can read chosen ranges bool enable_parallel_reading = local_context->getClientInfo().collaborate_with_initiator; @@ -378,7 +379,9 @@ CurrentlyMergingPartsTagger::CurrentlyMergingPartsTagger( /// if we mutate part, than we should reserve space on the same disk, because mutations possible can create hardlinks if (is_mutation) - reserved_space = storage.tryReserveSpace(total_size, future_part->parts[0]->data_part_storage); + { + reserved_space = storage.tryReserveSpace(total_size, future_part->parts[0]->getDataPartStorage()); + } else { IMergeTreeDataPart::TTLInfos ttl_infos; @@ -386,7 +389,9 @@ CurrentlyMergingPartsTagger::CurrentlyMergingPartsTagger( for (auto & part_ptr : future_part->parts) { ttl_infos.update(part_ptr->ttl_infos); - max_volume_index = std::max(max_volume_index, part_ptr->data_part_storage->getVolumeIndex(*storage.getStoragePolicy())); + auto disk_name = part_ptr->getDataPartStorage().getDiskName(); + size_t volume_index = storage.getStoragePolicy()->getVolumeIndexByDiskName(disk_name); + max_volume_index = std::max(max_volume_index, volume_index); } reserved_space = storage.balancedReservation( @@ -798,7 +803,7 @@ void StorageMergeTree::loadMutations() increment.value = std::max(increment.value.load(), current_mutations_by_version.rbegin()->first); } -std::shared_ptr StorageMergeTree::selectPartsToMerge( +MergeMutateSelectedEntryPtr StorageMergeTree::selectPartsToMerge( const StorageMetadataPtr & metadata_snapshot, bool aggressive, const String & partition_id, @@ -943,7 +948,7 @@ bool StorageMergeTree::merge( SelectPartsDecision select_decision; - std::shared_ptr merge_mutate_entry; + MergeMutateSelectedEntryPtr merge_mutate_entry; { std::unique_lock lock(currently_processing_in_background_mutex); @@ -989,18 +994,10 @@ bool StorageMergeTree::partIsAssignedToBackgroundOperation(const DataPartPtr & p return currently_merging_mutating_parts.contains(part); } -std::shared_ptr StorageMergeTree::selectPartsToMutate( +MergeMutateSelectedEntryPtr StorageMergeTree::selectPartsToMutate( const StorageMetadataPtr & metadata_snapshot, String * /* disable_reason */, TableLockHolder & /* table_lock_holder */, std::unique_lock & /*currently_processing_in_background_mutex_lock*/) { - size_t max_ast_elements = getContext()->getSettingsRef().max_expanded_ast_elements; - - auto future_part = std::make_shared(); - if (storage_settings.get()->assign_part_uuids) - future_part->uuid = UUIDHelpers::generateV4(); - - CurrentlyMergingPartsTaggerPtr tagger; - if (current_mutations_by_version.empty()) return {}; @@ -1014,6 +1011,14 @@ std::shared_ptr StorageMergeTree::selectPartsToMutate( return {}; } + size_t max_ast_elements = getContext()->getSettingsRef().max_expanded_ast_elements; + + auto future_part = std::make_shared(); + if (storage_settings.get()->assign_part_uuids) + future_part->uuid = UUIDHelpers::generateV4(); + + CurrentlyMergingPartsTaggerPtr tagger; + auto mutations_end_it = current_mutations_by_version.end(); for (const auto & part : getDataPartsVectorForInternalUsage()) { @@ -1132,7 +1137,7 @@ bool StorageMergeTree::scheduleDataProcessingJob(BackgroundJobsAssignee & assign assert(!isStaticStorage()); auto metadata_snapshot = getInMemoryMetadataPtr(); - std::shared_ptr merge_entry, mutate_entry; + MergeMutateSelectedEntryPtr merge_entry, mutate_entry; auto share_lock = lockForShare(RWLockImpl::NO_QUERY, getSettings()->lock_acquire_timeout_for_background_operations); @@ -1152,7 +1157,8 @@ bool StorageMergeTree::scheduleDataProcessingJob(BackgroundJobsAssignee & assign return false; merge_entry = selectPartsToMerge(metadata_snapshot, false, {}, false, nullptr, share_lock, lock, txn); - if (!merge_entry) + + if (!merge_entry && !current_mutations_by_version.empty()) mutate_entry = selectPartsToMutate(metadata_snapshot, nullptr, share_lock, lock); has_mutations = !current_mutations_by_version.empty(); @@ -1473,7 +1479,7 @@ void StorageMergeTree::dropPartsImpl(DataPartsVector && parts_to_remove, bool de /// NOTE: no race with background cleanup until we hold pointers to parts for (const auto & part : parts_to_remove) { - LOG_INFO(log, "Detaching {}", part->data_part_storage->getPartDirectory()); + LOG_INFO(log, "Detaching {}", part->getDataPartStorage().getPartDirectory()); part->makeCloneInDetached("", metadata_snapshot); } } @@ -1518,9 +1524,8 @@ PartitionCommandsResultInfo StorageMergeTree::attachPartition( MergeTreeData::Transaction transaction(*this, local_context->getCurrentTransaction().get()); { auto lock = lockParts(); - auto builder = loaded_parts[i]->data_part_storage->getBuilder(); fillNewPartName(loaded_parts[i], lock); - renameTempPartAndAdd(loaded_parts[i], transaction, builder, lock); + renameTempPartAndAdd(loaded_parts[i], transaction, lock); transaction.commit(&lock); } @@ -1603,9 +1608,7 @@ void StorageMergeTree::replacePartitionFrom(const StoragePtr & source_table, con for (auto part : dst_parts) { fillNewPartName(part, data_parts_lock); - - auto builder = part->data_part_storage->getBuilder(); - renameTempPartAndReplaceUnlocked(part, transaction, builder, data_parts_lock); + renameTempPartAndReplaceUnlocked(part, transaction, data_parts_lock); } /// Populate transaction transaction.commit(&data_parts_lock); @@ -1684,9 +1687,8 @@ void StorageMergeTree::movePartitionToTable(const StoragePtr & dest_table, const for (auto & part : dst_parts) { - auto builder = part->data_part_storage->getBuilder(); dest_table_storage->fillNewPartName(part, dest_data_parts_lock); - dest_table_storage->renameTempPartAndReplaceUnlocked(part, transaction, builder, dest_data_parts_lock); + dest_table_storage->renameTempPartAndReplaceUnlocked(part, transaction, dest_data_parts_lock); } @@ -1740,16 +1742,16 @@ CheckResults StorageMergeTree::checkData(const ASTPtr & query, ContextPtr local_ for (auto & part : data_parts) { /// If the checksums file is not present, calculate the checksums and write them to disk. - String checksums_path = "checksums.txt"; - String tmp_checksums_path = "checksums.txt.tmp"; - if (part->isStoredOnDisk() && !part->data_part_storage->exists(checksums_path)) + static constexpr auto checksums_path = "checksums.txt"; + if (part->isStoredOnDisk() && !part->getDataPartStorage().exists(checksums_path)) { try { auto calculated_checksums = checkDataPart(part, false); calculated_checksums.checkEqual(part->checksums, true); - part->data_part_storage->writeChecksums(part->checksums, local_context->getWriteSettings()); + auto & part_mutable = const_cast(*part); + part_mutable.writeChecksums(part->checksums, local_context->getWriteSettings()); part->checkMetadata(); results.emplace_back(part->name, true, "Checksums recounted and written to disk."); @@ -1809,17 +1811,15 @@ BackupEntries StorageMergeTree::backupMutations(UInt64 version, const String & d void StorageMergeTree::attachRestoredParts(MutableDataPartsVector && parts) { - for (auto part : parts) { /// It's important to create it outside of lock scope because /// otherwise it can lock parts in destructor and deadlock is possible. MergeTreeData::Transaction transaction(*this, NO_TRANSACTION_RAW); - auto builder = part->data_part_storage->getBuilder(); { auto lock = lockParts(); fillNewPartName(part, lock); - renameTempPartAndAdd(part, transaction, builder, lock); + renameTempPartAndAdd(part, transaction, lock); transaction.commit(&lock); } } diff --git a/src/Storages/StorageMergeTree.h b/src/Storages/StorageMergeTree.h index b36850f9f4a..745546b96f6 100644 --- a/src/Storages/StorageMergeTree.h +++ b/src/Storages/StorageMergeTree.h @@ -66,7 +66,7 @@ public: ContextPtr context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, - unsigned num_streams) override; + size_t num_streams) override; std::optional totalRows(const Settings &) const override; std::optional totalRowsByPartitionPredicate(const SelectQueryInfo &, ContextPtr) const override; @@ -187,7 +187,7 @@ private: friend struct CurrentlyMergingPartsTagger; - std::shared_ptr selectPartsToMerge( + MergeMutateSelectedEntryPtr selectPartsToMerge( const StorageMetadataPtr & metadata_snapshot, bool aggressive, const String & partition_id, @@ -200,7 +200,7 @@ private: SelectPartsDecision * select_decision_out = nullptr); - std::shared_ptr selectPartsToMutate( + MergeMutateSelectedEntryPtr selectPartsToMutate( const StorageMetadataPtr & metadata_snapshot, String * disable_reason, TableLockHolder & table_lock_holder, std::unique_lock & currently_processing_in_background_mutex_lock); diff --git a/src/Storages/StorageMongoDB.cpp b/src/Storages/StorageMongoDB.cpp index dce45b2431a..3ae9c974770 100644 --- a/src/Storages/StorageMongoDB.cpp +++ b/src/Storages/StorageMongoDB.cpp @@ -150,7 +150,7 @@ Pipe StorageMongoDB::read( ContextPtr /*context*/, QueryProcessingStage::Enum /*processed_stage*/, size_t max_block_size, - unsigned) + size_t /*num_streams*/) { connectIfNotConnected(); diff --git a/src/Storages/StorageMongoDB.h b/src/Storages/StorageMongoDB.h index 0e00b80432b..04fb759133a 100644 --- a/src/Storages/StorageMongoDB.h +++ b/src/Storages/StorageMongoDB.h @@ -37,7 +37,7 @@ public: ContextPtr context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, - unsigned num_streams) override; + size_t num_streams) override; SinkToStoragePtr write( const ASTPtr & query, diff --git a/src/Storages/StorageMySQL.cpp b/src/Storages/StorageMySQL.cpp index 4ccd31ab981..20eb59c7262 100644 --- a/src/Storages/StorageMySQL.cpp +++ b/src/Storages/StorageMySQL.cpp @@ -78,7 +78,7 @@ Pipe StorageMySQL::read( ContextPtr context_, QueryProcessingStage::Enum /*processed_stage*/, size_t /*max_block_size*/, - unsigned) + size_t /*num_streams*/) { storage_snapshot->check(column_names_); String query = transformQueryForExternalDatabase( diff --git a/src/Storages/StorageMySQL.h b/src/Storages/StorageMySQL.h index e3c0712c179..bf9a24c9bfe 100644 --- a/src/Storages/StorageMySQL.h +++ b/src/Storages/StorageMySQL.h @@ -46,7 +46,7 @@ public: ContextPtr context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, - unsigned num_streams) override; + size_t num_streams) override; SinkToStoragePtr write(const ASTPtr & query, const StorageMetadataPtr & /*metadata_snapshot*/, ContextPtr context) override; diff --git a/src/Storages/StorageNull.h b/src/Storages/StorageNull.h index 0011b5c94ad..2270731c0e3 100644 --- a/src/Storages/StorageNull.h +++ b/src/Storages/StorageNull.h @@ -35,8 +35,8 @@ public: SelectQueryInfo &, ContextPtr /*context*/, QueryProcessingStage::Enum /*processing_stage*/, - size_t, - unsigned) override + size_t /*max_block_size*/, + size_t /*num_streams*/) override { return Pipe( std::make_shared(storage_snapshot->getSampleBlockForColumns(column_names))); diff --git a/src/Storages/StoragePostgreSQL.cpp b/src/Storages/StoragePostgreSQL.cpp index e0c6dbf5463..6cf4e458438 100644 --- a/src/Storages/StoragePostgreSQL.cpp +++ b/src/Storages/StoragePostgreSQL.cpp @@ -81,7 +81,7 @@ Pipe StoragePostgreSQL::read( ContextPtr context_, QueryProcessingStage::Enum /*processed_stage*/, size_t max_block_size_, - unsigned) + size_t /*num_streams*/) { storage_snapshot->check(column_names_); diff --git a/src/Storages/StoragePostgreSQL.h b/src/Storages/StoragePostgreSQL.h index 0755e33269e..97c62daa50f 100644 --- a/src/Storages/StoragePostgreSQL.h +++ b/src/Storages/StoragePostgreSQL.h @@ -38,7 +38,7 @@ public: ContextPtr context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, - unsigned num_streams) override; + size_t num_streams) override; SinkToStoragePtr write(const ASTPtr & query, const StorageMetadataPtr & /*metadata_snapshot*/, ContextPtr context) override; diff --git a/src/Storages/StorageProxy.h b/src/Storages/StorageProxy.h index 0fabff59db4..2afd9e8a63b 100644 --- a/src/Storages/StorageProxy.h +++ b/src/Storages/StorageProxy.h @@ -50,7 +50,7 @@ public: ContextPtr context, QueryProcessingStage::Enum & processed_stage, size_t max_block_size, - unsigned num_streams) override + size_t num_streams) override { return getNested()->watch(column_names, query_info, context, processed_stage, max_block_size, num_streams); } @@ -63,7 +63,7 @@ public: ContextPtr context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, - unsigned num_streams) override + size_t num_streams) override { return getNested()->read(query_plan, column_names, storage_snapshot, query_info, context, processed_stage, max_block_size, num_streams); } diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index b55c59a3d6e..3c0fbb162bc 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -1,5 +1,6 @@ #include +#include #include #include "Common/hex.h" #include @@ -179,6 +180,7 @@ namespace ActionLocks static const auto QUEUE_UPDATE_ERROR_SLEEP_MS = 1 * 1000; static const auto MUTATIONS_FINALIZING_SLEEP_MS = 1 * 1000; static const auto MUTATIONS_FINALIZING_IDLE_SLEEP_MS = 5 * 1000; +const String StorageReplicatedMergeTree::default_zookeeper_name = "default"; void StorageReplicatedMergeTree::setZooKeeper() { @@ -285,21 +287,32 @@ StorageReplicatedMergeTree::StorageReplicatedMergeTree( , replicated_fetches_throttler(std::make_shared(getSettings()->max_replicated_fetches_network_bandwidth, getContext()->getReplicatedFetchesThrottler())) , replicated_sends_throttler(std::make_shared(getSettings()->max_replicated_sends_network_bandwidth, getContext()->getReplicatedSendsThrottler())) { + /// We create and deactivate all tasks for consistency. + /// They all will be scheduled and activated by the restarting thread. queue_updating_task = getContext()->getSchedulePool().createTask( getStorageID().getFullTableName() + " (StorageReplicatedMergeTree::queueUpdatingTask)", [this]{ queueUpdatingTask(); }); + queue_updating_task->deactivate(); + mutations_updating_task = getContext()->getSchedulePool().createTask( getStorageID().getFullTableName() + " (StorageReplicatedMergeTree::mutationsUpdatingTask)", [this]{ mutationsUpdatingTask(); }); + mutations_updating_task->deactivate(); + merge_selecting_task = getContext()->getSchedulePool().createTask( getStorageID().getFullTableName() + " (StorageReplicatedMergeTree::mergeSelectingTask)", [this] { mergeSelectingTask(); }); - /// Will be activated if we win leader election. + /// Will be activated if we will achieve leader state. merge_selecting_task->deactivate(); mutations_finalizing_task = getContext()->getSchedulePool().createTask( getStorageID().getFullTableName() + " (StorageReplicatedMergeTree::mutationsFinalizingTask)", [this] { mutationsFinalizingTask(); }); + /// This task can be scheduled by different parts of code even when storage is readonly. + /// This can lead to redundant exceptions during startup. + /// Will be activated by restarting thread. + mutations_finalizing_task->deactivate(); + bool has_zookeeper = getContext()->hasZooKeeper() || getContext()->hasAuxiliaryZooKeeper(zookeeper_name); if (has_zookeeper) { @@ -1443,6 +1456,7 @@ MergeTreeData::MutableDataPartPtr StorageReplicatedMergeTree::attachPartHelperFo const String part_new_name = actual_part_info.getPartName(); for (const DiskPtr & disk : getStoragePolicy()->getDisks()) + { for (const auto it = disk->iterateDirectory(fs::path(relative_data_path) / "detached/"); it->isValid(); it->next()) { const auto part_info = MergeTreePartInfo::tryParsePartName(it->name(), format_version); @@ -1479,6 +1493,7 @@ MergeTreeData::MutableDataPartPtr StorageReplicatedMergeTree::attachPartHelperFo return part; } } + } return {}; } @@ -1529,8 +1544,7 @@ bool StorageReplicatedMergeTree::executeLogEntry(LogEntry & entry) Transaction transaction(*this, NO_TRANSACTION_RAW); part->version.setCreationTID(Tx::PrehistoricTID, nullptr); - auto builder = part->data_part_storage->getBuilder(); - renameTempPartAndReplace(part, transaction, builder); + renameTempPartAndReplace(part, transaction); checkPartChecksumsAndCommit(transaction, part); writePartLog(PartLogElement::Type::NEW_PART, {}, 0 /** log entry is fake so we don't measure the time */, @@ -1769,7 +1783,7 @@ bool StorageReplicatedMergeTree::executeFetch(LogEntry & entry, bool need_to_che } -DataPartStoragePtr StorageReplicatedMergeTree::executeFetchShared( +MutableDataPartStoragePtr StorageReplicatedMergeTree::executeFetchShared( const String & source_replica, const String & new_part_name, const DiskPtr & disk, @@ -1813,7 +1827,7 @@ void StorageReplicatedMergeTree::executeDropRange(const LogEntry & entry) /// Therefore, we use all data parts. auto metadata_snapshot = getInMemoryMetadataPtr(); - DataPartsVector parts_to_remove; + PartsToRemoveFromZooKeeper parts_to_remove; { auto data_parts_lock = lockParts(); parts_to_remove = removePartsInRangeFromWorkingSetAndGetPartsToRemoveFromZooKeeper(NO_TRANSACTION_RAW, drop_range_info, data_parts_lock); @@ -1835,8 +1849,11 @@ void StorageReplicatedMergeTree::executeDropRange(const LogEntry & entry) /// If DETACH clone parts to detached/ directory for (const auto & part : parts_to_remove) { - LOG_INFO(log, "Detaching {}", part->data_part_storage->getPartDirectory()); - part->makeCloneInDetached("", metadata_snapshot); + if (auto part_to_detach = part.getPartIfItWasActive()) + { + LOG_INFO(log, "Detaching {}", part_to_detach->getDataPartStorage().getPartDirectory()); + part_to_detach->makeCloneInDetached("", metadata_snapshot); + } } } @@ -1927,7 +1944,7 @@ bool StorageReplicatedMergeTree::executeReplaceRange(const LogEntry & entry) PartDescriptions all_parts; PartDescriptions parts_to_add; - DataPartsVector parts_to_remove; + PartsToRemoveFromZooKeeper parts_to_remove; auto table_lock_holder_dst_table = lockForShare( RWLockImpl::NO_QUERY, getSettings()->lock_acquire_timeout_for_background_operations); @@ -1958,7 +1975,7 @@ bool StorageReplicatedMergeTree::executeReplaceRange(const LogEntry & entry) String parts_to_remove_str; for (const auto & part : parts_to_remove) { - parts_to_remove_str += part->name; + parts_to_remove_str += part.getPartName(); parts_to_remove_str += " "; } LOG_TRACE(log, "Replacing {} parts {}with empty set", parts_to_remove.size(), parts_to_remove_str); @@ -2214,8 +2231,7 @@ bool StorageReplicatedMergeTree::executeReplaceRange(const LogEntry & entry) Coordination::Requests ops; for (PartDescriptionPtr & part_desc : final_parts) { - auto builder = part_desc->res_part->data_part_storage->getBuilder(); - renameTempPartAndReplace(part_desc->res_part, transaction, builder); + renameTempPartAndReplace(part_desc->res_part, transaction); getCommitPartOps(ops, part_desc->res_part); lockSharedData(*part_desc->res_part, false, part_desc->hardlinked_files); @@ -2235,7 +2251,7 @@ bool StorageReplicatedMergeTree::executeReplaceRange(const LogEntry & entry) String parts_to_remove_str; for (const auto & part : parts_to_remove) { - parts_to_remove_str += part->name; + parts_to_remove_str += part.getPartName(); parts_to_remove_str += " "; } LOG_TRACE(log, "Replacing {} parts {}with {} parts {}", parts_to_remove.size(), parts_to_remove_str, @@ -2312,9 +2328,7 @@ void StorageReplicatedMergeTree::executeClonePartFromShard(const LogEntry & entr part = get_part(); // The fetched part is valuable and should not be cleaned like a temp part. part->is_temp = false; - auto builder = part->data_part_storage->getBuilder(); - part->renameTo("detached/" + entry.new_part_name, true, builder); - builder->commit(); + part->renameTo("detached/" + entry.new_part_name, true); LOG_INFO(log, "Cloned part {} to detached directory", part->name); } @@ -2408,6 +2422,7 @@ void StorageReplicatedMergeTree::cloneReplica(const String & source_replica, Coo std::vector source_queue; ActiveDataPartSet get_part_set{format_version}; ActiveDataPartSet drop_range_set{format_version}; + std::unordered_set exact_part_names; { std::vector queue_get_futures; @@ -2445,14 +2460,22 @@ void StorageReplicatedMergeTree::cloneReplica(const String & source_replica, Coo info.parsed_entry->znode_name = source_queue_names[i]; if (info.parsed_entry->type == LogEntry::DROP_RANGE) + { drop_range_set.add(info.parsed_entry->new_part_name); - - if (info.parsed_entry->type == LogEntry::GET_PART) + } + else if (info.parsed_entry->type == LogEntry::GET_PART) { String maybe_covering_drop_range = drop_range_set.getContainingPart(info.parsed_entry->new_part_name); if (maybe_covering_drop_range.empty()) get_part_set.add(info.parsed_entry->new_part_name); } + else + { + /// We should keep local parts if they present in the queue of source replica. + /// There's a chance that we are the only replica that has these parts. + Strings entry_virtual_parts = info.parsed_entry->getVirtualPartNames(format_version); + std::move(entry_virtual_parts.begin(), entry_virtual_parts.end(), std::inserter(exact_part_names, exact_part_names.end())); + } } } @@ -2472,11 +2495,17 @@ void StorageReplicatedMergeTree::cloneReplica(const String & source_replica, Coo for (const auto & part : local_parts_in_zk) { - if (get_part_set.getContainingPart(part).empty()) - { - parts_to_remove_from_zk.emplace_back(part); - LOG_WARNING(log, "Source replica does not have part {}. Removing it from ZooKeeper.", part); - } + /// We look for exact match (and not for any covering part) + /// because our part might be dropped and covering part might be merged though gap. + /// (avoid resurrection of data that was removed a long time ago) + if (get_part_set.getContainingPart(part) == part) + continue; + + if (exact_part_names.contains(part)) + continue; + + parts_to_remove_from_zk.emplace_back(part); + LOG_WARNING(log, "Source replica does not have part {}. Removing it from ZooKeeper.", part); } { @@ -2498,11 +2527,14 @@ void StorageReplicatedMergeTree::cloneReplica(const String & source_replica, Coo for (const auto & part : local_active_parts) { - if (get_part_set.getContainingPart(part->name).empty()) - { - parts_to_remove_from_working_set.emplace_back(part); - LOG_WARNING(log, "Source replica does not have part {}. Removing it from working set.", part->name); - } + if (get_part_set.getContainingPart(part->name) == part->name) + continue; + + if (exact_part_names.contains(part->name)) + continue; + + parts_to_remove_from_working_set.emplace_back(part); + LOG_WARNING(log, "Source replica does not have part {}. Removing it from working set.", part->name); } if (getSettings()->detach_old_local_parts_when_cloning_replica) @@ -2511,7 +2543,7 @@ void StorageReplicatedMergeTree::cloneReplica(const String & source_replica, Coo for (const auto & part : parts_to_remove_from_working_set) { - LOG_INFO(log, "Detaching {}", part->data_part_storage->getPartDirectory()); + LOG_INFO(log, "Detaching {}", part->getDataPartStorage().getPartDirectory()); part->makeCloneInDetached("clone", metadata_snapshot); } } @@ -3206,16 +3238,17 @@ StorageReplicatedMergeTree::CreateMergeEntryResult StorageReplicatedMergeTree::c int32_t log_version, MergeType merge_type) { - std::vector> exists_futures; - exists_futures.reserve(parts.size()); + Strings exists_paths; + exists_paths.reserve(parts.size()); for (const auto & part : parts) - exists_futures.emplace_back(zookeeper->asyncExists(fs::path(replica_path) / "parts" / part->name)); + exists_paths.emplace_back(fs::path(replica_path) / "parts" / part->name); + auto exists_results = zookeeper->exists(exists_paths); bool all_in_zk = true; for (size_t i = 0; i < parts.size(); ++i) { /// If there is no information about part in ZK, we will not merge it. - if (exists_futures[i].get().error == Coordination::Error::ZNONODE) + if (exists_results[i].error == Coordination::Error::ZNONODE) { all_in_zk = false; @@ -3862,7 +3895,7 @@ bool StorageReplicatedMergeTree::fetchPart(const String & part_name, const Stora auto source_part = getActiveContainingPart(covered_part_info); /// Fetch for zero-copy replication is cheap and straightforward, so we don't use local clone here - if (source_part && (!settings_ptr->allow_remote_fs_zero_copy_replication || !source_part->data_part_storage->supportZeroCopyReplication())) + if (source_part && (!settings_ptr->allow_remote_fs_zero_copy_replication || !source_part->getDataPartStorage().supportZeroCopyReplication())) { auto source_part_header = ReplicatedMergeTreePartHeader::fromColumnsAndChecksums( source_part->getColumns(), source_part->checksums); @@ -3960,11 +3993,10 @@ bool StorageReplicatedMergeTree::fetchPart(const String & part_name, const Stora { part = get_part(); - auto builder = part->data_part_storage->getBuilder(); if (!to_detached) { Transaction transaction(*this, NO_TRANSACTION_RAW); - renameTempPartAndReplace(part, transaction, builder); + renameTempPartAndReplace(part, transaction); replaced_parts = checkPartChecksumsAndCommit(transaction, part, hardlinked_files); @@ -4006,8 +4038,7 @@ bool StorageReplicatedMergeTree::fetchPart(const String & part_name, const Stora { // The fetched part is valuable and should not be cleaned like a temp part. part->is_temp = false; - part->renameTo(fs::path("detached") / part_name, true, builder); - builder->commit(); + part->renameTo(fs::path("detached") / part_name, true); } } catch (const Exception & e) @@ -4041,7 +4072,7 @@ bool StorageReplicatedMergeTree::fetchPart(const String & part_name, const Stora } -DataPartStoragePtr StorageReplicatedMergeTree::fetchExistsPart( +MutableDataPartStoragePtr StorageReplicatedMergeTree::fetchExistsPart( const String & part_name, const StorageMetadataPtr & metadata_snapshot, const String & source_replica_path, @@ -4116,14 +4147,11 @@ DataPartStoragePtr StorageReplicatedMergeTree::fetchExistsPart( { part = get_part(); - if (part->data_part_storage->getDiskName() != replaced_disk->getName()) - throw Exception("Part " + part->name + " fetched on wrong disk " + part->data_part_storage->getDiskName(), ErrorCodes::LOGICAL_ERROR); + if (part->getDataPartStorage().getDiskName() != replaced_disk->getName()) + throw Exception("Part " + part->name + " fetched on wrong disk " + part->getDataPartStorage().getDiskName(), ErrorCodes::LOGICAL_ERROR); auto replaced_path = fs::path(replaced_part_path); - auto builder = part->data_part_storage->getBuilder(); - builder->rename(replaced_path.parent_path(), replaced_path.filename(), nullptr, true, false); - part->data_part_storage->onRename(replaced_path.parent_path(), replaced_path.filename()); - builder->commit(); + part->getDataPartStorage().rename(replaced_path.parent_path(), replaced_path.filename(), nullptr, true, false); } catch (const Exception & e) { @@ -4146,8 +4174,7 @@ DataPartStoragePtr StorageReplicatedMergeTree::fetchExistsPart( ProfileEvents::increment(ProfileEvents::ReplicatedPartFetches); LOG_DEBUG(log, "Fetched part {} from {}", part_name, source_replica_path); - - return part->data_part_storage; + return part->getDataPartStoragePtr(); } void StorageReplicatedMergeTree::startup() @@ -4339,7 +4366,7 @@ void StorageReplicatedMergeTree::read( ContextPtr local_context, QueryProcessingStage::Enum processed_stage, const size_t max_block_size, - const unsigned num_streams) + const size_t num_streams) { /// If true, then we will ask initiator if we can read chosen ranges const bool enable_parallel_reading = local_context->getClientInfo().collaborate_with_initiator; @@ -5544,7 +5571,8 @@ void StorageReplicatedMergeTree::getStatus(Status & res, bool with_zk_fields) res.queue = queue.getStatus(); res.absolute_delay = getAbsoluteDelay(); /// NOTE: may be slightly inconsistent with queue status. - res.parts_to_check = part_check_thread.size(); + /// NOTE: consider convert to UInt64 + res.parts_to_check = static_cast(part_check_thread.size()); res.zookeeper_path = zookeeper_path; res.replica_name = replica_name; @@ -6205,11 +6233,11 @@ void StorageReplicatedMergeTree::clearOldPartsAndRemoveFromZK() } -void StorageReplicatedMergeTree::removePartsFromZooKeeperWithRetries(DataPartsVector & parts, size_t max_retries) +void StorageReplicatedMergeTree::removePartsFromZooKeeperWithRetries(PartsToRemoveFromZooKeeper & parts, size_t max_retries) { Strings part_names_to_remove; for (const auto & part : parts) - part_names_to_remove.emplace_back(part->name); + part_names_to_remove.emplace_back(part.getPartName()); return removePartsFromZooKeeperWithRetries(part_names_to_remove, max_retries); } @@ -6228,19 +6256,20 @@ void StorageReplicatedMergeTree::removePartsFromZooKeeperWithRetries(const Strin auto zookeeper = getZooKeeper(); - std::vector> exists_futures; - exists_futures.reserve(part_names.size()); + Strings exists_paths; + exists_paths.reserve(part_names.size()); for (const String & part_name : part_names) { - String part_path = fs::path(replica_path) / "parts" / part_name; - exists_futures.emplace_back(zookeeper->asyncExists(part_path)); + exists_paths.emplace_back(fs::path(replica_path) / "parts" / part_name); } + auto exists_results = zookeeper->exists(exists_paths); + std::vector> remove_futures; remove_futures.reserve(part_names.size()); for (size_t i = 0; i < part_names.size(); ++i) { - Coordination::ExistsResponse exists_resp = exists_futures[i].get(); + Coordination::ExistsResponse exists_resp = exists_results[i]; if (exists_resp.error == Coordination::Error::ZOK) { Coordination::Requests ops; @@ -6286,9 +6315,9 @@ void StorageReplicatedMergeTree::removePartsFromZooKeeperWithRetries(const Strin void StorageReplicatedMergeTree::removePartsFromZooKeeper( zkutil::ZooKeeperPtr & zookeeper, const Strings & part_names, NameSet * parts_should_be_retried) { - std::vector> exists_futures; + Strings exists_paths; std::vector> remove_futures; - exists_futures.reserve(part_names.size()); + exists_paths.reserve(part_names.size()); remove_futures.reserve(part_names.size()); try { @@ -6296,13 +6325,14 @@ void StorageReplicatedMergeTree::removePartsFromZooKeeper( /// if zk session will be dropped for (const String & part_name : part_names) { - String part_path = fs::path(replica_path) / "parts" / part_name; - exists_futures.emplace_back(zookeeper->asyncExists(part_path)); + exists_paths.emplace_back(fs::path(replica_path) / "parts" / part_name); } + auto exists_results = zookeeper->exists(exists_paths); + for (size_t i = 0; i < part_names.size(); ++i) { - Coordination::ExistsResponse exists_resp = exists_futures[i].get(); + auto exists_resp = exists_results[i]; if (exists_resp.error == Coordination::Error::ZOK) { Coordination::Requests ops; @@ -6534,7 +6564,7 @@ void StorageReplicatedMergeTree::replacePartitionFrom( if (replace) clearBlocksInPartition(*zookeeper, drop_range.partition_id, drop_range.max_block, drop_range.max_block); - DataPartsVector parts_to_remove; + PartsToRemoveFromZooKeeper parts_to_remove; Coordination::Responses op_results; try @@ -6560,10 +6590,7 @@ void StorageReplicatedMergeTree::replacePartitionFrom( { auto data_parts_lock = lockParts(); for (auto & part : dst_parts) - { - auto builder = part->data_part_storage->getBuilder(); - renameTempPartAndReplaceUnlocked(part, transaction, builder, data_parts_lock); - } + renameTempPartAndReplaceUnlocked(part, transaction, data_parts_lock); } for (size_t i = 0; i < dst_parts.size(); ++i) @@ -6773,7 +6800,7 @@ void StorageReplicatedMergeTree::movePartitionToTable(const StoragePtr & dest_ta clearBlocksInPartition(*zookeeper, drop_range.partition_id, drop_range.max_block, drop_range.max_block); - DataPartsVector parts_to_remove; + PartsToRemoveFromZooKeeper parts_to_remove; Coordination::Responses op_results; try @@ -6799,10 +6826,7 @@ void StorageReplicatedMergeTree::movePartitionToTable(const StoragePtr & dest_ta auto dest_data_parts_lock = dest_table_storage->lockParts(); for (auto & part : dst_parts) - { - auto builder = part->data_part_storage->getBuilder(); - dest_table_storage->renameTempPartAndReplaceUnlocked(part, transaction, builder, dest_data_parts_lock); - } + dest_table_storage->renameTempPartAndReplaceUnlocked(part, transaction, dest_data_parts_lock); for (size_t i = 0; i < dst_parts.size(); ++i) dest_table_storage->lockSharedData(*dst_parts[i], false, hardlinked_files_for_parts[i]); @@ -7391,7 +7415,7 @@ void StorageReplicatedMergeTree::checkBrokenDisks() for (auto & part : *parts) { - if (part->data_part_storage && part->data_part_storage->getDiskName() == disk_ptr->getName()) + if (part->getDataPartStorage().getDiskName() == disk_ptr->getName()) broken_part_callback(part->name); } continue; @@ -7554,10 +7578,10 @@ void StorageReplicatedMergeTree::lockSharedData(const IMergeTreeDataPart & part, { auto settings = getSettings(); - if (!part.data_part_storage || !part.isStoredOnDisk() || !settings->allow_remote_fs_zero_copy_replication) + if (!part.isStoredOnDisk() || !settings->allow_remote_fs_zero_copy_replication) return; - if (!part.data_part_storage->supportZeroCopyReplication()) + if (!part.getDataPartStorage().supportZeroCopyReplication()) return; zkutil::ZooKeeperPtr zookeeper = tryGetZooKeeper(); @@ -7568,7 +7592,7 @@ void StorageReplicatedMergeTree::lockSharedData(const IMergeTreeDataPart & part, boost::replace_all(id, "/", "_"); Strings zc_zookeeper_paths = getZeroCopyPartPath( - *getSettings(), part.data_part_storage->getDiskType(), getTableSharedID(), + *getSettings(), part.getDataPartStorage().getDiskType(), getTableSharedID(), part.name, zookeeper_path); String path_to_set_hardlinked_files; @@ -7577,7 +7601,7 @@ void StorageReplicatedMergeTree::lockSharedData(const IMergeTreeDataPart & part, if (hardlinked_files.has_value() && !hardlinked_files->hardlinks_from_source_part.empty()) { path_to_set_hardlinked_files = getZeroCopyPartPath( - *getSettings(), part.data_part_storage->getDiskType(), hardlinked_files->source_table_shared_id, + *getSettings(), part.getDataPartStorage().getDiskType(), hardlinked_files->source_table_shared_id, hardlinked_files->source_part_name, zookeeper_path)[0]; hardlinks = hardlinked_files->hardlinks_from_source_part; @@ -7601,25 +7625,22 @@ std::pair StorageReplicatedMergeTree::unlockSharedData(const IMer if (!settings->allow_remote_fs_zero_copy_replication) return std::make_pair(true, NameSet{}); - if (!part.data_part_storage) - LOG_WARNING(log, "Datapart storage for part {} (temp: {}) is not initialzied", part.name, part.is_temp); - - if (!part.data_part_storage || !part.isStoredOnDisk()) + if (!part.isStoredOnDisk()) { LOG_TRACE(log, "Part {} is not stored on disk, blobs can be removed", part.name); return std::make_pair(true, NameSet{}); } - if (!part.data_part_storage || !part.data_part_storage->supportZeroCopyReplication()) + if (!part.getDataPartStorage().supportZeroCopyReplication()) { LOG_TRACE(log, "Part {} is not stored on zero-copy replicated disk, blobs can be removed", part.name); return std::make_pair(true, NameSet{}); } /// If part is temporary refcount file may be absent - if (part.data_part_storage->exists(IMergeTreeDataPart::FILE_FOR_REFERENCES_CHECK)) + if (part.getDataPartStorage().exists(IMergeTreeDataPart::FILE_FOR_REFERENCES_CHECK)) { - auto ref_count = part.data_part_storage->getRefCount(IMergeTreeDataPart::FILE_FOR_REFERENCES_CHECK); + auto ref_count = part.getDataPartStorage().getRefCount(IMergeTreeDataPart::FILE_FOR_REFERENCES_CHECK); if (ref_count > 0) /// Keep part shard info for frozen backups { LOG_TRACE(log, "Part {} has more than zero local references ({}), blobs cannot be removed", part.name, ref_count); @@ -7657,7 +7678,7 @@ std::pair StorageReplicatedMergeTree::unlockSharedData(const IMer return unlockSharedDataByID( part.getUniqueId(), getTableSharedID(), part.name, replica_name, - part.data_part_storage->getDiskType(), zookeeper, *getSettings(), log, zookeeper_path, format_version); + part.getDataPartStorage().getDiskType(), zookeeper, *getSettings(), log, zookeeper_path, format_version); } namespace @@ -7673,14 +7694,14 @@ namespace /// But sometimes we need an opposite. When we deleting all_0_0_0_1 it can be non replicated to other replicas, so we are the only owner of this part. /// In this case when we will drop all_0_0_0_1 we will drop blobs for all_0_0_0. But it will lead to dataloss. For such case we need to check that other replicas /// still need parent part. -NameSet getParentLockedBlobs(zkutil::ZooKeeperPtr zookeeper_ptr, const std::string & zero_copy_part_path_prefix, const std::string & part_info_str, MergeTreeDataFormatVersion format_version, Poco::Logger * log) +std::pair getParentLockedBlobs(zkutil::ZooKeeperPtr zookeeper_ptr, const std::string & zero_copy_part_path_prefix, const std::string & part_info_str, MergeTreeDataFormatVersion format_version, Poco::Logger * log) { NameSet files_not_to_remove; MergeTreePartInfo part_info = MergeTreePartInfo::fromPartName(part_info_str, format_version); /// No mutations -- no hardlinks -- no issues if (part_info.mutation == 0) - return files_not_to_remove; + return {false, files_not_to_remove}; /// Getting all zero copy parts Strings parts_str; @@ -7725,17 +7746,17 @@ NameSet getParentLockedBlobs(zkutil::ZooKeeperPtr zookeeper_ptr, const std::stri LOG_TRACE(log, "Found files not to remove from parent part {}: [{}]", part_candidate_info_str, fmt::join(files_not_to_remove, ", ")); } - break; + return {true, files_not_to_remove}; } } - return files_not_to_remove; + return {false, files_not_to_remove}; } } std::pair StorageReplicatedMergeTree::unlockSharedDataByID( String part_id, const String & table_uuid, const String & part_name, - const String & replica_name_, std::string disk_type, zkutil::ZooKeeperPtr zookeeper_ptr, const MergeTreeSettings & settings, + const String & replica_name_, const std::string & disk_type, zkutil::ZooKeeperPtr zookeeper_ptr, const MergeTreeSettings & settings, Poco::Logger * logger, const String & zookeeper_path_old, MergeTreeDataFormatVersion data_format_version) { boost::replace_all(part_id, "/", "_"); @@ -7754,7 +7775,7 @@ std::pair StorageReplicatedMergeTree::unlockSharedDataByID( if (!files_not_to_remove_str.empty()) boost::split(files_not_to_remove, files_not_to_remove_str, boost::is_any_of("\n ")); - auto parent_not_to_remove = getParentLockedBlobs(zookeeper_ptr, fs::path(zc_zookeeper_path).parent_path(), part_name, data_format_version, logger); + auto [has_parent, parent_not_to_remove] = getParentLockedBlobs(zookeeper_ptr, fs::path(zc_zookeeper_path).parent_path(), part_name, data_format_version, logger); files_not_to_remove.insert(parent_not_to_remove.begin(), parent_not_to_remove.end()); String zookeeper_part_uniq_node = fs::path(zc_zookeeper_path) / part_id; @@ -7764,9 +7785,23 @@ std::pair StorageReplicatedMergeTree::unlockSharedDataByID( LOG_TRACE(logger, "Remove zookeeper lock {} for part {}", zookeeper_part_replica_node, part_name); - if (auto ec = zookeeper_ptr->tryRemove(zookeeper_part_replica_node); ec != Coordination::Error::ZOK && ec != Coordination::Error::ZNONODE) + if (auto ec = zookeeper_ptr->tryRemove(zookeeper_part_replica_node); ec != Coordination::Error::ZOK) { - throw zkutil::KeeperException(ec, zookeeper_part_replica_node); + /// Very complex case. It means that lock already doesn't exist when we tried to remove it. + /// So we don't know are we owner of this part or not. Maybe we just mutated it, renamed on disk and failed to lock in ZK. + /// But during mutation we can have hardlinks to another part. So it's not Ok to remove blobs of this part if it was mutated. + if (ec == Coordination::Error::ZNONODE) + { + if (has_parent) + { + LOG_INFO(logger, "Lock on path {} for part {} doesn't exist, refuse to remove blobs", zookeeper_part_replica_node, part_name); + return {false, {}}; + } + } + else + { + throw zkutil::KeeperException(ec, zookeeper_part_replica_node); + } } /// Check, maybe we were the last replica and can remove part forever @@ -7842,7 +7877,7 @@ std::pair StorageReplicatedMergeTree::unlockSharedDataByID( } -DataPartStoragePtr StorageReplicatedMergeTree::tryToFetchIfShared( +MutableDataPartStoragePtr StorageReplicatedMergeTree::tryToFetchIfShared( const IMergeTreeDataPart & part, const DiskPtr & disk, const String & path) @@ -7942,7 +7977,7 @@ String StorageReplicatedMergeTree::getSharedDataReplica( Strings StorageReplicatedMergeTree::getZeroCopyPartPath( - const MergeTreeSettings & settings, std::string disk_type, const String & table_uuid, + const MergeTreeSettings & settings, const std::string & disk_type, const String & table_uuid, const String & part_name, const String & zookeeper_path_old) { Strings res; @@ -7950,11 +7985,11 @@ Strings StorageReplicatedMergeTree::getZeroCopyPartPath( String zero_copy = fmt::format("zero_copy_{}", disk_type); String new_path = fs::path(settings.remote_fs_zero_copy_zookeeper_path.toString()) / zero_copy / table_uuid / part_name; - res.push_back(new_path); + res.push_back(std::move(new_path)); if (settings.remote_fs_zero_copy_path_compatible_mode && !zookeeper_path_old.empty()) { /// Compatibility mode for cluster with old and new versions String old_path = fs::path(zookeeper_path_old) / zero_copy / "shared" / part_name; - res.push_back(old_path); + res.push_back(std::move(old_path)); } return res; @@ -8077,15 +8112,13 @@ bool StorageReplicatedMergeTree::createEmptyPartInsteadOfLost(zkutil::ZooKeeperP minmax_idx->update(block, getMinMaxColumnsNames(metadata_snapshot->getPartitionKey())); auto new_volume = createVolumeFromReservation(reservation, volume); + auto data_part_storage = std::make_shared( new_volume, relative_data_path, TMP_PREFIX + lost_part_name); - DataPartStorageBuilderPtr data_part_storage_builder = std::make_shared( - new_volume, - relative_data_path, - TMP_PREFIX + lost_part_name); + data_part_storage->beginTransaction(); auto new_data_part = createPart( lost_part_name, @@ -8128,16 +8161,16 @@ bool StorageReplicatedMergeTree::createEmptyPartInsteadOfLost(zkutil::ZooKeeperP if (new_data_part->isStoredOnDisk()) { /// The name could be non-unique in case of stale files from previous runs. - if (data_part_storage_builder->exists()) + if (data_part_storage->exists()) { - LOG_WARNING(log, "Removing old temporary directory {}", new_data_part->data_part_storage->getFullPath()); - data_part_storage_builder->removeRecursive(); + LOG_WARNING(log, "Removing old temporary directory {}", new_data_part->getDataPartStorage().getFullPath()); + data_part_storage->removeRecursive(); } - data_part_storage_builder->createDirectories(); + data_part_storage->createDirectories(); if (getSettings()->fsync_part_directory) - sync_guard = data_part_storage_builder->getDirectorySyncGuard(); + sync_guard = data_part_storage->getDirectorySyncGuard(); } /// This effectively chooses minimal compression method: @@ -8145,7 +8178,7 @@ bool StorageReplicatedMergeTree::createEmptyPartInsteadOfLost(zkutil::ZooKeeperP auto compression_codec = getContext()->chooseCompressionCodec(0, 0); const auto & index_factory = MergeTreeIndexFactory::instance(); - MergedBlockOutputStream out(new_data_part, data_part_storage_builder, metadata_snapshot, columns, + MergedBlockOutputStream out(new_data_part, metadata_snapshot, columns, index_factory.getMany(metadata_snapshot->getSecondaryIndices()), compression_codec, NO_TRANSACTION_PTR); bool sync_on_insert = settings->fsync_after_insert; @@ -8159,7 +8192,7 @@ bool StorageReplicatedMergeTree::createEmptyPartInsteadOfLost(zkutil::ZooKeeperP try { MergeTreeData::Transaction transaction(*this, NO_TRANSACTION_RAW); - auto replaced_parts = renameTempPartAndReplace(new_data_part, transaction, data_part_storage_builder); + auto replaced_parts = renameTempPartAndReplace(new_data_part, transaction); if (!replaced_parts.empty()) { @@ -8347,7 +8380,7 @@ bool StorageReplicatedMergeTree::removeDetachedPart(DiskPtr disk, const String & if (disk->supportZeroCopyReplication()) { String table_id = getTableSharedID(); - return removeSharedDetachedPart(disk, path, part_name, table_id, zookeeper_name, replica_name, zookeeper_path, getContext(), current_zookeeper); + return removeSharedDetachedPart(disk, path, part_name, table_id, replica_name, zookeeper_path, getContext(), current_zookeeper); } disk->removeRecursive(path); @@ -8357,7 +8390,7 @@ bool StorageReplicatedMergeTree::removeDetachedPart(DiskPtr disk, const String & bool StorageReplicatedMergeTree::removeSharedDetachedPart(DiskPtr disk, const String & path, const String & part_name, const String & table_uuid, - const String &, const String & detached_replica_name, const String & detached_zookeeper_path, ContextPtr local_context, const zkutil::ZooKeeperPtr & zookeeper) + const String & detached_replica_name, const String & detached_zookeeper_path, const ContextPtr & local_context, const zkutil::ZooKeeperPtr & zookeeper) { bool keep_shared = false; diff --git a/src/Storages/StorageReplicatedMergeTree.h b/src/Storages/StorageReplicatedMergeTree.h index e10ffcce22c..323b1ce02bf 100644 --- a/src/Storages/StorageReplicatedMergeTree.h +++ b/src/Storages/StorageReplicatedMergeTree.h @@ -131,7 +131,7 @@ public: ContextPtr context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, - unsigned num_streams) override; + size_t num_streams) override; std::optional totalRows(const Settings & settings) const override; std::optional totalRowsByPartitionPredicate(const SelectQueryInfo & query_info, ContextPtr context) const override; @@ -263,7 +263,7 @@ public: bool canExecuteFetch(const ReplicatedMergeTreeLogEntry & entry, String & disable_reason) const; /// Fetch part only when it stored on shared storage like S3 - DataPartStoragePtr executeFetchShared(const String & source_replica, const String & new_part_name, const DiskPtr & disk, const String & path); + MutableDataPartStoragePtr executeFetchShared(const String & source_replica, const String & new_part_name, const DiskPtr & disk, const String & path); /// Lock part in zookeeper for use shared data in several nodes void lockSharedData(const IMergeTreeDataPart & part, bool replace_existing_lock, std::optional hardlinked_files) const override; @@ -279,16 +279,16 @@ public: /// Return true if data unlocked /// Return false if data is still used by another node static std::pair unlockSharedDataByID(String part_id, const String & table_uuid, const String & part_name, const String & replica_name_, - std::string disk_type, zkutil::ZooKeeperPtr zookeeper_, const MergeTreeSettings & settings, Poco::Logger * logger, + const std::string & disk_type, zkutil::ZooKeeperPtr zookeeper_, const MergeTreeSettings & settings, Poco::Logger * logger, const String & zookeeper_path_old, MergeTreeDataFormatVersion data_format_version); /// Fetch part only if some replica has it on shared storage like S3 - DataPartStoragePtr tryToFetchIfShared(const IMergeTreeDataPart & part, const DiskPtr & disk, const String & path) override; + MutableDataPartStoragePtr tryToFetchIfShared(const IMergeTreeDataPart & part, const DiskPtr & disk, const String & path) override; /// Get best replica having this partition on a same type remote disk String getSharedDataReplica(const IMergeTreeDataPart & part, DataSourceType data_source_type) const; - inline String getReplicaName() const { return replica_name; } + inline const String & getReplicaName() const { return replica_name; } /// Restores table metadata if ZooKeeper lost it. /// Used only on restarted readonly replicas (not checked). All active (Active) parts are moved to detached/ @@ -310,9 +310,9 @@ public: bool createEmptyPartInsteadOfLost(zkutil::ZooKeeperPtr zookeeper, const String & lost_part_name); // Return default or custom zookeeper name for table - String getZooKeeperName() const { return zookeeper_name; } + const String & getZooKeeperName() const { return zookeeper_name; } - String getZooKeeperPath() const { return zookeeper_path; } + const String & getZooKeeperPath() const { return zookeeper_path; } // Return table id, common for different replicas String getTableSharedID() const override; @@ -320,13 +320,13 @@ public: /// Returns the same as getTableSharedID(), but extracts it from a create query. static std::optional tryGetTableSharedIDFromCreateQuery(const IAST & create_query, const ContextPtr & global_context); - static String getDefaultZooKeeperName() { return default_zookeeper_name; } + static const String & getDefaultZooKeeperName() { return default_zookeeper_name; } /// Check if there are new broken disks and enqueue part recovery tasks. void checkBrokenDisks(); static bool removeSharedDetachedPart(DiskPtr disk, const String & path, const String & part_name, const String & table_uuid, - const String & zookeeper_name, const String & replica_name, const String & zookeeper_path, ContextPtr local_context, const zkutil::ZooKeeperPtr & zookeeper); + const String & replica_name, const String & zookeeper_path, const ContextPtr & local_context, const zkutil::ZooKeeperPtr & zookeeper); bool canUseZeroCopyReplication() const; private: @@ -381,11 +381,11 @@ private: /// If false - ZooKeeper is available, but there is no table metadata. It's safe to drop table in this case. std::optional has_metadata_in_zookeeper; - static constexpr auto default_zookeeper_name = "default"; - String zookeeper_name; - String zookeeper_path; - String replica_name; - String replica_path; + static const String default_zookeeper_name; + const String zookeeper_name; + const String zookeeper_path; + const String replica_name; + const String replica_path; /** /replicas/me/is_active. */ @@ -549,7 +549,7 @@ private: /// Remove parts from ZooKeeper, throw exception if unable to do so after max_retries. void removePartsFromZooKeeperWithRetries(const Strings & part_names, size_t max_retries = 5); - void removePartsFromZooKeeperWithRetries(DataPartsVector & parts, size_t max_retries = 5); + void removePartsFromZooKeeperWithRetries(PartsToRemoveFromZooKeeper & parts, size_t max_retries = 5); /// Removes a part from ZooKeeper and adds a task to the queue to download it. It is supposed to do this with broken parts. void removePartAndEnqueueFetch(const String & part_name); @@ -682,7 +682,7 @@ private: * Used for replace local part on the same s3-shared part in hybrid storage. * Returns false if part is already fetching right now. */ - DataPartStoragePtr fetchExistsPart( + MutableDataPartStoragePtr fetchExistsPart( const String & part_name, const StorageMetadataPtr & metadata_snapshot, const String & replica_path, @@ -829,7 +829,7 @@ private: PartitionBlockNumbersHolder allocateBlockNumbersInAffectedPartitions( const MutationCommands & commands, ContextPtr query_context, const zkutil::ZooKeeperPtr & zookeeper) const; - static Strings getZeroCopyPartPath(const MergeTreeSettings & settings, std::string disk_type, const String & table_uuid, + static Strings getZeroCopyPartPath(const MergeTreeSettings & settings, const std::string & disk_type, const String & table_uuid, const String & part_name, const String & zookeeper_path_old); static void createZeroCopyLockNode( diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp index 675dd548088..d759c339dea 100644 --- a/src/Storages/StorageS3.cpp +++ b/src/Storages/StorageS3.cpp @@ -139,7 +139,9 @@ public: request.SetBucket(globbed_uri.bucket); request.SetPrefix(key_prefix); + matcher = std::make_unique(makeRegexpPatternFromGlobs(globbed_uri.key)); + recursive = globbed_uri.key == "/**" ? true : false; fillInternalBufferAssumeLocked(); } @@ -197,7 +199,7 @@ private: for (const auto & row : result_batch) { const String & key = row.GetKey(); - if (re2::RE2::FullMatch(key, *matcher)) + if (recursive || re2::RE2::FullMatch(key, *matcher)) { String path = fs::path(globbed_uri.bucket) / key; if (object_infos) @@ -224,7 +226,7 @@ private: for (const auto & row : result_batch) { String key = row.GetKey(); - if (re2::RE2::FullMatch(key, *matcher)) + if (recursive || re2::RE2::FullMatch(key, *matcher)) buffer.emplace_back(std::move(key)); } } @@ -252,6 +254,7 @@ private: Aws::S3::Model::ListObjectsV2Request request; Aws::S3::Model::ListObjectsV2Outcome outcome; std::unique_ptr matcher; + bool recursive{false}; bool is_finished{false}; std::unordered_map * object_infos; Strings * read_keys; @@ -361,39 +364,6 @@ String StorageS3Source::KeysIterator::next() return pimpl->next(); } -class StorageS3Source::ReadTasksIterator::Impl -{ -public: - explicit Impl(const std::vector & read_tasks_, const ReadTaskCallback & new_read_tasks_callback_) - : read_tasks(read_tasks_), new_read_tasks_callback(new_read_tasks_callback_) - { - } - - String next() - { - size_t current_index = index.fetch_add(1, std::memory_order_relaxed); - if (current_index >= read_tasks.size()) - return new_read_tasks_callback(); - return read_tasks[current_index]; - } - -private: - std::atomic_size_t index = 0; - std::vector read_tasks; - ReadTaskCallback new_read_tasks_callback; -}; - -StorageS3Source::ReadTasksIterator::ReadTasksIterator( - const std::vector & read_tasks_, const ReadTaskCallback & new_read_tasks_callback_) - : pimpl(std::make_shared(read_tasks_, new_read_tasks_callback_)) -{ -} - -String StorageS3Source::ReadTasksIterator::next() -{ - return pimpl->next(); -} - Block StorageS3Source::getHeader(Block sample_block, const std::vector & requested_virtual_columns) { for (const auto & virtual_column : requested_virtual_columns) @@ -457,8 +427,9 @@ bool StorageS3Source::initialize() file_path = fs::path(bucket) / current_key; - auto zstd_window_log_max = getContext()->getSettingsRef().zstd_window_log_max; - read_buf = wrapReadBufferWithCompressionMethod(createS3ReadBuffer(current_key), chooseCompressionMethod(current_key, compression_hint), zstd_window_log_max); + int zstd_window_log_max = static_cast(getContext()->getSettingsRef().zstd_window_log_max); + read_buf = wrapReadBufferWithCompressionMethod( + createS3ReadBuffer(current_key), chooseCompressionMethod(current_key, compression_hint), zstd_window_log_max); auto input_format = getContext()->getInputFormat(format, *read_buf, sample_block, max_block_size, format_settings); QueryPipelineBuilder builder; @@ -802,8 +773,7 @@ StorageS3::StorageS3( distributed_processing_, is_key_with_globs, format_settings, - context_, - &read_tasks_used_in_schema_inference); + context_); storage_metadata.setColumns(columns); } else @@ -831,19 +801,14 @@ std::shared_ptr StorageS3::createFileIterator( ContextPtr local_context, ASTPtr query, const Block & virtual_block, - const std::vector & read_tasks, std::unordered_map * object_infos, Strings * read_keys) { if (distributed_processing) { return std::make_shared( - [read_tasks_iterator = std::make_shared(read_tasks, local_context->getReadTaskCallback()), read_keys]() -> String - { - auto key = read_tasks_iterator->next(); - if (read_keys) - read_keys->push_back(key); - return key; + [callback = local_context->getReadTaskCallback()]() -> String { + return callback(); }); } else if (is_key_with_globs) @@ -875,7 +840,7 @@ Pipe StorageS3::read( ContextPtr local_context, QueryProcessingStage::Enum /*processed_stage*/, size_t max_block_size, - unsigned num_streams) + size_t num_streams) { bool has_wildcards = s3_configuration.uri.bucket.find(PARTITION_ID_WILDCARD) != String::npos || keys.back().find(PARTITION_ID_WILDCARD) != String::npos; @@ -903,7 +868,6 @@ Pipe StorageS3::read( local_context, query_info.query, virtual_block, - read_tasks_used_in_schema_inference, &object_infos); ColumnsDescription columns_description; @@ -1079,12 +1043,12 @@ void StorageS3::updateS3Configuration(ContextPtr ctx, StorageS3::S3Configuration S3::PocoHTTPClientConfiguration client_configuration = S3::ClientFactory::instance().createClientConfiguration( upd.auth_settings.region, ctx->getRemoteHostFilter(), - ctx->getGlobalContext()->getSettingsRef().s3_max_redirects, + static_cast(ctx->getGlobalContext()->getSettingsRef().s3_max_redirects), ctx->getGlobalContext()->getSettingsRef().enable_s3_requests_logging, /* for_disk_s3 = */ false); client_configuration.endpointOverride = upd.uri.endpoint; - client_configuration.maxConnections = upd.rw_settings.max_connections; + client_configuration.maxConnections = static_cast(upd.rw_settings.max_connections); auto credentials = Aws::Auth::AWSCredentials(upd.auth_settings.access_key_id, upd.auth_settings.secret_access_key); auto headers = upd.auth_settings.headers; @@ -1147,6 +1111,14 @@ StorageS3Configuration StorageS3::getConfiguration(ASTs & engine_args, ContextPt } else { + /// Supported signatures: + /// + /// S3('url') + /// S3('url', 'format') + /// S3('url', 'format', 'compression') + /// S3('url', 'aws_access_key_id', 'aws_secret_access_key', 'format') + /// S3('url', 'aws_access_key_id', 'aws_secret_access_key', 'format', 'compression') + if (engine_args.empty() || engine_args.size() > 5) throw Exception( "Storage S3 requires 1 to 5 arguments: url, [access_key_id, secret_access_key], name of used format and [compression_method].", @@ -1201,7 +1173,7 @@ ColumnsDescription StorageS3::getTableStructureFromData( return getTableStructureFromDataImpl( configuration.format, s3_configuration, configuration.compression_method, distributed_processing, - s3_configuration.uri.key.find_first_of("*?{") != std::string::npos, format_settings, ctx, nullptr, object_infos); + s3_configuration.uri.key.find_first_of("*?{") != std::string::npos, format_settings, ctx, object_infos); } ColumnsDescription StorageS3::getTableStructureFromDataImpl( @@ -1212,13 +1184,12 @@ ColumnsDescription StorageS3::getTableStructureFromDataImpl( bool is_key_with_globs, const std::optional & format_settings, ContextPtr ctx, - std::vector * read_keys_in_distributed_processing, std::unordered_map * object_infos) { std::vector read_keys; auto file_iterator - = createFileIterator(s3_configuration, {s3_configuration.uri.key}, is_key_with_globs, distributed_processing, ctx, nullptr, {}, {}, object_infos, &read_keys); + = createFileIterator(s3_configuration, {s3_configuration.uri.key}, is_key_with_globs, distributed_processing, ctx, nullptr, {}, object_infos, &read_keys); std::optional columns_from_cache; size_t prev_read_keys_size = read_keys.size(); @@ -1254,7 +1225,7 @@ ColumnsDescription StorageS3::getTableStructureFromDataImpl( } first = false; - const auto zstd_window_log_max = ctx->getSettingsRef().zstd_window_log_max; + int zstd_window_log_max = static_cast(ctx->getSettingsRef().zstd_window_log_max); return wrapReadBufferWithCompressionMethod( std::make_unique( s3_configuration.client, s3_configuration.uri.bucket, key, s3_configuration.uri.version_id, s3_configuration.rw_settings.max_single_read_retries, ctx->getReadSettings()), @@ -1271,9 +1242,6 @@ ColumnsDescription StorageS3::getTableStructureFromDataImpl( if (ctx->getSettingsRef().schema_inference_use_cache_for_s3) addColumnsToCache(read_keys, s3_configuration, columns, format, format_settings, ctx); - if (distributed_processing && read_keys_in_distributed_processing) - *read_keys_in_distributed_processing = std::move(read_keys); - return columns; } diff --git a/src/Storages/StorageS3.h b/src/Storages/StorageS3.h index a983a59d98c..23947a32092 100644 --- a/src/Storages/StorageS3.h +++ b/src/Storages/StorageS3.h @@ -66,18 +66,6 @@ public: std::shared_ptr pimpl; }; - class ReadTasksIterator - { - public: - ReadTasksIterator(const std::vector & read_tasks_, const ReadTaskCallback & new_read_tasks_callback_); - String next(); - - private: - class Impl; - /// shared_ptr to have copy constructor - std::shared_ptr pimpl; - }; - using IteratorWrapper = std::function; static Block getHeader(Block sample_block, const std::vector & requested_virtual_columns); @@ -171,7 +159,7 @@ public: ContextPtr context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, - unsigned num_streams) override; + size_t num_streams) override; SinkToStoragePtr write(const ASTPtr & query, const StorageMetadataPtr & /*metadata_snapshot*/, ContextPtr context) override; @@ -197,7 +185,7 @@ public: const S3::URI uri; std::shared_ptr client; - S3Settings::AuthSettings auth_settings; + S3::AuthSettings auth_settings; S3Settings::ReadWriteSettings rw_settings; /// If s3 configuration was passed from ast, then it is static. @@ -209,7 +197,7 @@ public: S3Configuration( const String & url_, - const S3Settings::AuthSettings & auth_settings_, + const S3::AuthSettings & auth_settings_, const S3Settings::ReadWriteSettings & rw_settings_, const HeaderCollection & headers_from_ast_) : uri(S3::URI(url_)) @@ -238,8 +226,6 @@ private: ASTPtr partition_by; bool is_key_with_globs = false; - std::vector read_tasks_used_in_schema_inference; - std::unordered_map object_infos; static void updateS3Configuration(ContextPtr, S3Configuration &); @@ -252,7 +238,6 @@ private: ContextPtr local_context, ASTPtr query, const Block & virtual_block, - const std::vector & read_tasks = {}, std::unordered_map * object_infos = nullptr, Strings * read_keys = nullptr); @@ -264,7 +249,6 @@ private: bool is_key_with_globs, const std::optional & format_settings, ContextPtr ctx, - std::vector * read_keys_in_distributed_processing = nullptr, std::unordered_map * object_infos = nullptr); bool supportsSubsetOfColumns() const override; diff --git a/src/Storages/StorageS3Cluster.cpp b/src/Storages/StorageS3Cluster.cpp index df927069bb0..3b8c8b1cb92 100644 --- a/src/Storages/StorageS3Cluster.cpp +++ b/src/Storages/StorageS3Cluster.cpp @@ -5,46 +5,40 @@ #if USE_AWS_S3 #include "Common/Exception.h" -#include #include "Client/Connection.h" #include "Core/QueryProcessingStage.h" -#include -#include -#include #include -#include #include #include #include #include #include #include -#include #include #include #include -#include "Processors/ISource.h" #include #include #include #include +#include #include #include #include +#include +#include #include #include #include #include -#include #include #include -#include -#include namespace DB { + StorageS3Cluster::StorageS3Cluster( const StorageS3ClusterConfiguration & configuration_, const StorageID & table_id_, @@ -72,6 +66,7 @@ StorageS3Cluster::StorageS3Cluster( auto columns = StorageS3::getTableStructureFromDataImpl(format_name, s3_configuration, compression_method, /*distributed_processing_*/false, is_key_with_globs, /*format_settings=*/std::nullopt, context_); storage_metadata.setColumns(columns); + add_columns_structure_to_query = true; } else storage_metadata.setColumns(columns_); @@ -97,7 +92,7 @@ Pipe StorageS3Cluster::read( ContextPtr context, QueryProcessingStage::Enum processed_stage, size_t /*max_block_size*/, - unsigned /*num_streams*/) + size_t /*num_streams*/) { StorageS3::updateS3Configuration(context, s3_configuration); @@ -117,6 +112,11 @@ Pipe StorageS3Cluster::read( const bool add_agg_info = processed_stage == QueryProcessingStage::WithMergeableState; + ASTPtr query_to_send = query_info.original_query->clone(); + if (add_columns_structure_to_query) + addColumnsStructureToQueryWithClusterEngine( + query_to_send, StorageDictionary::generateNamesAndTypesDescription(storage_snapshot->metadata->getColumns().getAll()), 5, getName()); + for (const auto & replicas : cluster->getShardsAddresses()) { /// There will be only one replica, because we consider each replica as a shard @@ -135,7 +135,7 @@ Pipe StorageS3Cluster::read( /// So, task_identifier is passed as constructor argument. It is more obvious. auto remote_query_executor = std::make_shared( connection, - queryToString(query_info.original_query), + queryToString(query_to_send), header, context, /*throttler=*/nullptr, diff --git a/src/Storages/StorageS3Cluster.h b/src/Storages/StorageS3Cluster.h index d2cf1b917a1..3a3942f4222 100644 --- a/src/Storages/StorageS3Cluster.h +++ b/src/Storages/StorageS3Cluster.h @@ -30,7 +30,7 @@ public: std::string getName() const override { return "S3Cluster"; } Pipe read(const Names &, const StorageSnapshotPtr &, SelectQueryInfo &, - ContextPtr, QueryProcessingStage::Enum, size_t /*max_block_size*/, unsigned /*num_streams*/) override; + ContextPtr, QueryProcessingStage::Enum, size_t /*max_block_size*/, size_t /*num_streams*/) override; QueryProcessingStage::Enum getQueryProcessingStage(ContextPtr, QueryProcessingStage::Enum, const StorageSnapshotPtr &, SelectQueryInfo &) const override; @@ -46,6 +46,7 @@ private: String compression_method; NamesAndTypesList virtual_columns; Block virtual_block; + bool add_columns_structure_to_query = false; }; diff --git a/src/Storages/StorageS3Settings.cpp b/src/Storages/StorageS3Settings.cpp index 4ab3375e188..68e15d10f52 100644 --- a/src/Storages/StorageS3Settings.cpp +++ b/src/Storages/StorageS3Settings.cpp @@ -1,19 +1,27 @@ #include +#include + #include #include #include - +#include #include namespace DB { -namespace ErrorCodes + +namespace { - extern const int INVALID_CONFIG_PARAMETER; + /// An object up to 5 GB can be copied in a single atomic operation. + constexpr UInt64 DEFAULT_MAX_SINGLE_OPERATION_COPY_SIZE = 5_GiB; + + /// The maximum size of an uploaded part. + constexpr UInt64 DEFAULT_MAX_UPLOAD_PART_SIZE = 5_GiB; } + void StorageS3Settings::loadFromConfig(const String & config_elem, const Poco::Util::AbstractConfiguration & config, const Settings & settings) { std::lock_guard lock(mutex); @@ -46,48 +54,17 @@ void StorageS3Settings::loadFromConfig(const String & config_elem, const Poco::U if (config.has(config_elem + "." + key + ".endpoint")) { auto endpoint = get_string_for_key(key, "endpoint", false); - auto access_key_id = get_string_for_key(key, "access_key_id"); - auto secret_access_key = get_string_for_key(key, "secret_access_key"); - auto region = get_string_for_key(key, "region"); - auto server_side_encryption_customer_key_base64 = get_string_for_key(key, "server_side_encryption_customer_key_base64"); - std::optional use_environment_credentials; - if (config.has(config_elem + "." + key + ".use_environment_credentials")) - use_environment_credentials = config.getBool(config_elem + "." + key + ".use_environment_credentials"); - - std::optional use_insecure_imds_request; - if (config.has(config_elem + "." + key + ".use_insecure_imds_request")) - use_insecure_imds_request = config.getBool(config_elem + "." + key + ".use_insecure_imds_request"); - - HeaderCollection headers; - Poco::Util::AbstractConfiguration::Keys subconfig_keys; - config.keys(config_elem + "." + key, subconfig_keys); - for (const String & subkey : subconfig_keys) - { - if (subkey.starts_with("header")) - { - auto header_str = config.getString(config_elem + "." + key + "." + subkey); - auto delimiter = header_str.find(':'); - if (delimiter == String::npos) - throw Exception("Malformed s3 header value", ErrorCodes::INVALID_CONFIG_PARAMETER); - headers.emplace_back(HttpHeader{header_str.substr(0, delimiter), header_str.substr(delimiter + 1, String::npos)}); - } - } - - S3Settings::AuthSettings auth_settings{ - std::move(access_key_id), std::move(secret_access_key), - std::move(region), - std::move(server_side_encryption_customer_key_base64), - std::move(headers), - use_environment_credentials, - use_insecure_imds_request}; + auto auth_settings = S3::AuthSettings::loadFromConfig(config_elem + "." + key, config); S3Settings::ReadWriteSettings rw_settings; rw_settings.max_single_read_retries = get_uint_for_key(key, "max_single_read_retries", true, settings.s3_max_single_read_retries); rw_settings.min_upload_part_size = get_uint_for_key(key, "min_upload_part_size", true, settings.s3_min_upload_part_size); + rw_settings.max_upload_part_size = get_uint_for_key(key, "max_upload_part_size", true, DEFAULT_MAX_UPLOAD_PART_SIZE); rw_settings.upload_part_size_multiply_factor = get_uint_for_key(key, "upload_part_size_multiply_factor", true, settings.s3_upload_part_size_multiply_factor); rw_settings.upload_part_size_multiply_parts_count_threshold = get_uint_for_key(key, "upload_part_size_multiply_parts_count_threshold", true, settings.s3_upload_part_size_multiply_parts_count_threshold); rw_settings.max_single_part_upload_size = get_uint_for_key(key, "max_single_part_upload_size", true, settings.s3_max_single_part_upload_size); + rw_settings.max_single_operation_copy_size = get_uint_for_key(key, "max_single_operation_copy_size", true, DEFAULT_MAX_SINGLE_OPERATION_COPY_SIZE); rw_settings.max_connections = get_uint_for_key(key, "max_connections", true, settings.s3_max_connections); rw_settings.check_objects_after_upload = get_bool_for_key(key, "check_objects_after_upload", true, false); @@ -130,12 +107,16 @@ void S3Settings::ReadWriteSettings::updateFromSettingsIfEmpty(const Settings & s max_single_read_retries = settings.s3_max_single_read_retries; if (!min_upload_part_size) min_upload_part_size = settings.s3_min_upload_part_size; + if (!max_upload_part_size) + max_upload_part_size = DEFAULT_MAX_UPLOAD_PART_SIZE; if (!upload_part_size_multiply_factor) upload_part_size_multiply_factor = settings.s3_upload_part_size_multiply_factor; if (!upload_part_size_multiply_parts_count_threshold) upload_part_size_multiply_parts_count_threshold = settings.s3_upload_part_size_multiply_parts_count_threshold; if (!max_single_part_upload_size) max_single_part_upload_size = settings.s3_max_single_part_upload_size; + if (!max_single_operation_copy_size) + max_single_operation_copy_size = DEFAULT_MAX_SINGLE_OPERATION_COPY_SIZE; if (!max_connections) max_connections = settings.s3_max_connections; if (!max_unexpected_write_error_retries) diff --git a/src/Storages/StorageS3Settings.h b/src/Storages/StorageS3Settings.h index 80ef4f52deb..bd90ba569d8 100644 --- a/src/Storages/StorageS3Settings.h +++ b/src/Storages/StorageS3Settings.h @@ -9,6 +9,8 @@ #include #include +#include + namespace Poco::Util { class AbstractConfiguration; @@ -21,53 +23,15 @@ struct Settings; struct S3Settings { - struct AuthSettings - { - String access_key_id; - String secret_access_key; - String region; - String server_side_encryption_customer_key_base64; - - HeaderCollection headers; - - std::optional use_environment_credentials; - std::optional use_insecure_imds_request; - - inline bool operator==(const AuthSettings & other) const - { - return access_key_id == other.access_key_id && secret_access_key == other.secret_access_key - && region == other.region - && server_side_encryption_customer_key_base64 == other.server_side_encryption_customer_key_base64 - && headers == other.headers - && use_environment_credentials == other.use_environment_credentials - && use_insecure_imds_request == other.use_insecure_imds_request; - } - - void updateFrom(const AuthSettings & from) - { - /// Update with check for emptyness only parameters which - /// can be passed not only from config, but via ast. - - if (!from.access_key_id.empty()) - access_key_id = from.access_key_id; - if (!from.secret_access_key.empty()) - secret_access_key = from.secret_access_key; - - headers = from.headers; - region = from.region; - server_side_encryption_customer_key_base64 = from.server_side_encryption_customer_key_base64; - use_environment_credentials = from.use_environment_credentials; - use_insecure_imds_request = from.use_insecure_imds_request; - } - }; - struct ReadWriteSettings { size_t max_single_read_retries = 0; size_t min_upload_part_size = 0; + size_t max_upload_part_size = 0; size_t upload_part_size_multiply_factor = 0; size_t upload_part_size_multiply_parts_count_threshold = 0; size_t max_single_part_upload_size = 0; + size_t max_single_operation_copy_size = 0; size_t max_connections = 0; bool check_objects_after_upload = false; size_t max_unexpected_write_error_retries = 0; @@ -79,9 +43,11 @@ struct S3Settings { return max_single_read_retries == other.max_single_read_retries && min_upload_part_size == other.min_upload_part_size + && max_upload_part_size == other.max_upload_part_size && upload_part_size_multiply_factor == other.upload_part_size_multiply_factor && upload_part_size_multiply_parts_count_threshold == other.upload_part_size_multiply_parts_count_threshold && max_single_part_upload_size == other.max_single_part_upload_size + && max_single_operation_copy_size == other.max_single_operation_copy_size && max_connections == other.max_connections && check_objects_after_upload == other.check_objects_after_upload && max_unexpected_write_error_retries == other.max_unexpected_write_error_retries; @@ -90,7 +56,7 @@ struct S3Settings void updateFromSettingsIfEmpty(const Settings & settings); }; - AuthSettings auth_settings; + S3::AuthSettings auth_settings; ReadWriteSettings rw_settings; inline bool operator==(const S3Settings & other) const diff --git a/src/Storages/StorageSQLite.cpp b/src/Storages/StorageSQLite.cpp index a86ed7646b3..92f954ebb9d 100644 --- a/src/Storages/StorageSQLite.cpp +++ b/src/Storages/StorageSQLite.cpp @@ -57,7 +57,7 @@ Pipe StorageSQLite::read( ContextPtr context_, QueryProcessingStage::Enum, size_t max_block_size, - unsigned int) + size_t /*num_streams*/) { if (!sqlite_db) sqlite_db = openSQLiteDB(database_path, getContext(), /* throw_on_error */true); diff --git a/src/Storages/StorageSQLite.h b/src/Storages/StorageSQLite.h index b0f209b5bc3..a021c00f627 100644 --- a/src/Storages/StorageSQLite.h +++ b/src/Storages/StorageSQLite.h @@ -38,7 +38,7 @@ public: ContextPtr context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, - unsigned num_streams) override; + size_t num_streams) override; SinkToStoragePtr write(const ASTPtr & query, const StorageMetadataPtr & /*metadata_snapshot*/, ContextPtr context) override; diff --git a/src/Storages/StorageSnapshot.cpp b/src/Storages/StorageSnapshot.cpp index a99fec8c154..48851f0974d 100644 --- a/src/Storages/StorageSnapshot.cpp +++ b/src/Storages/StorageSnapshot.cpp @@ -76,7 +76,7 @@ std::optional StorageSnapshot::tryGetColumn(const GetColumnsOpt { const auto & columns = getMetadataForQuery()->getColumns(); auto column = columns.tryGetColumn(options, column_name); - if (column && (!isObject(column->type) || !options.with_extended_objects)) + if (column && (!column->type->hasDynamicSubcolumns() || !options.with_extended_objects)) return column; if (options.with_extended_objects) diff --git a/src/Storages/StorageStripeLog.cpp b/src/Storages/StorageStripeLog.cpp index 0ecbdb0db10..92d53ffc1ac 100644 --- a/src/Storages/StorageStripeLog.cpp +++ b/src/Storages/StorageStripeLog.cpp @@ -349,7 +349,7 @@ Pipe StorageStripeLog::read( ContextPtr local_context, QueryProcessingStage::Enum /*processed_stage*/, const size_t /*max_block_size*/, - unsigned num_streams) + size_t num_streams) { storage_snapshot->check(column_names); diff --git a/src/Storages/StorageStripeLog.h b/src/Storages/StorageStripeLog.h index efdf18c0f7b..3f1b4ed0ad5 100644 --- a/src/Storages/StorageStripeLog.h +++ b/src/Storages/StorageStripeLog.h @@ -47,7 +47,7 @@ public: ContextPtr local_context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, - unsigned num_streams) override; + size_t num_streams) override; SinkToStoragePtr write(const ASTPtr & query, const StorageMetadataPtr & metadata_snapshot, ContextPtr local_context) override; diff --git a/src/Storages/StorageTableFunction.h b/src/Storages/StorageTableFunction.h index 2a4bfdf304b..b105e50a54f 100644 --- a/src/Storages/StorageTableFunction.h +++ b/src/Storages/StorageTableFunction.h @@ -101,7 +101,7 @@ public: ContextPtr context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, - unsigned num_streams) override + size_t num_streams) override { String cnames; for (const auto & c : column_names) diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp index f1c924a3448..0f01dc4288c 100644 --- a/src/Storages/StorageURL.cpp +++ b/src/Storages/StorageURL.cpp @@ -266,6 +266,7 @@ namespace setCredentials(credentials, request_uri); const auto settings = context->getSettings(); + int zstd_window_log_max = static_cast(settings.zstd_window_log_max); try { if (download_threads > 1) @@ -279,7 +280,7 @@ namespace timeouts, credentials, settings.max_http_get_redirects, - DBMS_DEFAULT_BUFFER_SIZE, + settings.max_read_buffer_size, read_settings, headers, ReadWriteBufferFromHTTP::Range{0, std::nullopt}, @@ -340,7 +341,7 @@ namespace timeouts, credentials, settings.max_http_get_redirects, - DBMS_DEFAULT_BUFFER_SIZE, + settings.max_read_buffer_size, read_settings, headers, &context->getRemoteHostFilter(), @@ -354,7 +355,7 @@ namespace threadPoolCallbackRunner(IOThreadPool::get(), "URLParallelRead"), download_threads), compression_method, - settings.zstd_window_log_max); + zstd_window_log_max); } } catch (const Poco::Exception & e) @@ -377,7 +378,7 @@ namespace timeouts, credentials, settings.max_http_get_redirects, - DBMS_DEFAULT_BUFFER_SIZE, + settings.max_read_buffer_size, read_settings, headers, ReadWriteBufferFromHTTP::Range{}, @@ -386,7 +387,7 @@ namespace /* use_external_buffer */ false, /* skip_url_not_found_error */ skip_url_not_found_error), compression_method, - settings.zstd_window_log_max); + zstd_window_log_max); } catch (...) { @@ -641,7 +642,7 @@ Pipe IStorageURLBase::read( ContextPtr local_context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, - unsigned num_streams) + size_t num_streams) { auto params = getReadURIParams(column_names, storage_snapshot, query_info, local_context, processed_stage, max_block_size); @@ -730,7 +731,7 @@ Pipe StorageURLWithFailover::read( ContextPtr local_context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, - unsigned /*num_streams*/) + size_t /*num_streams*/) { ColumnsDescription columns_description; Block block_for_format; @@ -862,6 +863,8 @@ std::optional IStorageURLBase::getLastModificationTime( const Poco::Net::HTTPBasicCredentials & credentials, const ContextPtr & context) { + auto settings = context->getSettingsRef(); + try { ReadWriteBufferFromHTTP buf( @@ -870,8 +873,8 @@ std::optional IStorageURLBase::getLastModificationTime( {}, ConnectionTimeouts::getHTTPTimeouts(context), credentials, - context->getSettingsRef().max_http_get_redirects, - DBMS_DEFAULT_BUFFER_SIZE, + settings.max_http_get_redirects, + settings.max_read_buffer_size, context->getReadSettings(), headers, ReadWriteBufferFromHTTP::Range{}, diff --git a/src/Storages/StorageURL.h b/src/Storages/StorageURL.h index 63c803f2d26..bf8858b8b66 100644 --- a/src/Storages/StorageURL.h +++ b/src/Storages/StorageURL.h @@ -35,7 +35,7 @@ public: ContextPtr context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, - unsigned num_streams) override; + size_t num_streams) override; SinkToStoragePtr write(const ASTPtr & query, const StorageMetadataPtr & /*metadata_snapshot*/, ContextPtr context) override; @@ -206,7 +206,7 @@ public: ContextPtr context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, - unsigned num_streams) override; + size_t num_streams) override; struct Configuration { diff --git a/src/Storages/StorageValues.cpp b/src/Storages/StorageValues.cpp index 2a3e1743983..300b11b7346 100644 --- a/src/Storages/StorageValues.cpp +++ b/src/Storages/StorageValues.cpp @@ -27,7 +27,7 @@ Pipe StorageValues::read( ContextPtr /*context*/, QueryProcessingStage::Enum /*processed_stage*/, size_t /*max_block_size*/, - unsigned /*num_streams*/) + size_t /*num_streams*/) { storage_snapshot->check(column_names); diff --git a/src/Storages/StorageValues.h b/src/Storages/StorageValues.h index bf7bf0466e4..55222903797 100644 --- a/src/Storages/StorageValues.h +++ b/src/Storages/StorageValues.h @@ -23,7 +23,7 @@ public: ContextPtr context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, - unsigned num_streams) override; + size_t num_streams) override; /// Why we may have virtual columns in the storage from a single block? /// Because it used as tmp storage for pushing blocks into views, and some diff --git a/src/Storages/StorageView.cpp b/src/Storages/StorageView.cpp index adaf1c4e404..a55d7ad3c09 100644 --- a/src/Storages/StorageView.cpp +++ b/src/Storages/StorageView.cpp @@ -111,7 +111,7 @@ void StorageView::read( ContextPtr context, QueryProcessingStage::Enum /*processed_stage*/, const size_t /*max_block_size*/, - const unsigned /*num_streams*/) + const size_t /*num_streams*/) { ASTPtr current_inner_query = storage_snapshot->metadata->getSelectQuery().inner_query; diff --git a/src/Storages/StorageView.h b/src/Storages/StorageView.h index 31c96addd08..593ac820ad4 100644 --- a/src/Storages/StorageView.h +++ b/src/Storages/StorageView.h @@ -32,7 +32,7 @@ public: ContextPtr context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, - unsigned num_streams) override; + size_t num_streams) override; static void replaceWithSubquery(ASTSelectQuery & select_query, ASTPtr & view_name, const StorageMetadataPtr & metadata_snapshot) { diff --git a/src/Storages/StorageXDBC.cpp b/src/Storages/StorageXDBC.cpp index aacbb5fa302..5f57d37278b 100644 --- a/src/Storages/StorageXDBC.cpp +++ b/src/Storages/StorageXDBC.cpp @@ -106,7 +106,7 @@ Pipe StorageXDBC::read( ContextPtr local_context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, - unsigned num_streams) + size_t num_streams) { storage_snapshot->check(column_names); diff --git a/src/Storages/StorageXDBC.h b/src/Storages/StorageXDBC.h index a2bb9c15baf..aa313e024ca 100644 --- a/src/Storages/StorageXDBC.h +++ b/src/Storages/StorageXDBC.h @@ -26,7 +26,7 @@ public: ContextPtr context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, - unsigned num_streams) override; + size_t num_streams) override; StorageXDBC( const StorageID & table_id_, diff --git a/src/Storages/System/IStorageSystemOneBlock.h b/src/Storages/System/IStorageSystemOneBlock.h index 2cfe2de05db..63b9a443f95 100644 --- a/src/Storages/System/IStorageSystemOneBlock.h +++ b/src/Storages/System/IStorageSystemOneBlock.h @@ -45,7 +45,7 @@ public: ContextPtr context, QueryProcessingStage::Enum /*processed_stage*/, size_t /*max_block_size*/, - unsigned /*num_streams*/) override + size_t /*num_streams*/) override { storage_snapshot->check(column_names); diff --git a/src/Storages/System/StorageSystemBackups.cpp b/src/Storages/System/StorageSystemBackups.cpp index e7146711c4a..52a26fe0cd6 100644 --- a/src/Storages/System/StorageSystemBackups.cpp +++ b/src/Storages/System/StorageSystemBackups.cpp @@ -51,8 +51,8 @@ void StorageSystemBackups::fillData(MutableColumns & res_columns, ContextPtr con column_uncompressed_size.insertValue(info.uncompressed_size); column_compressed_size.insertValue(info.compressed_size); column_error.insertData(info.error_message.data(), info.error_message.size()); - column_start_time.insertValue(std::chrono::system_clock::to_time_t(info.start_time)); - column_end_time.insertValue(std::chrono::system_clock::to_time_t(info.end_time)); + column_start_time.insertValue(static_cast(std::chrono::system_clock::to_time_t(info.start_time))); + column_end_time.insertValue(static_cast(std::chrono::system_clock::to_time_t(info.end_time))); }; for (const auto & entry : context->getBackupsWorker().getAllInfos()) diff --git a/src/Storages/System/StorageSystemColumns.cpp b/src/Storages/System/StorageSystemColumns.cpp index 20cab9fdc47..18e7d269795 100644 --- a/src/Storages/System/StorageSystemColumns.cpp +++ b/src/Storages/System/StorageSystemColumns.cpp @@ -296,7 +296,7 @@ Pipe StorageSystemColumns::read( ContextPtr context, QueryProcessingStage::Enum /*processed_stage*/, const size_t max_block_size, - const unsigned /*num_streams*/) + const size_t /*num_streams*/) { storage_snapshot->check(column_names); diff --git a/src/Storages/System/StorageSystemColumns.h b/src/Storages/System/StorageSystemColumns.h index 542e4ce9661..7b4b5dd8fb3 100644 --- a/src/Storages/System/StorageSystemColumns.h +++ b/src/Storages/System/StorageSystemColumns.h @@ -24,7 +24,7 @@ public: ContextPtr context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, - unsigned num_streams) override; + size_t num_streams) override; bool isSystemStorage() const override { return true; } }; diff --git a/src/Storages/System/StorageSystemContributors.generated.cpp b/src/Storages/System/StorageSystemContributors.generated.cpp index 25eae3b83b6..e1f4f7b82bf 100644 --- a/src/Storages/System/StorageSystemContributors.generated.cpp +++ b/src/Storages/System/StorageSystemContributors.generated.cpp @@ -12,6 +12,7 @@ const char * auto_contributors[] { "821008736@qq.com", "ANDREI STAROVEROV", "Aaron Katz", + "Adam Rutkowski", "Adri Fernandez", "Ahmed Dardery", "Aimiyoo", @@ -76,11 +77,15 @@ const char * auto_contributors[] { "Alexey Elymanov", "Alexey Gusev", "Alexey Ilyukhov", + "Alexey Ivanov", "Alexey Milovidov", "Alexey Tronov", "Alexey Vasiliev", "Alexey Zatelepin", "Alexsey Shestakov", + "AlfVII", + "Alfonso Martinez", + "Alfred Xu", "Ali Demirci", "Aliaksandr Pliutau", "Aliaksandr Shylau", @@ -196,6 +201,7 @@ const char * auto_contributors[] { "Brian Hunter", "Bulat Gaifullin", "Carbyn", + "Carlos Rodríguez Hernández", "Caspian", "Chao Ma", "Chao Wang", @@ -222,6 +228,7 @@ const char * auto_contributors[] { "DIAOZHAFENG", "Dale McDiarmid", "Dale Mcdiarmid", + "Dalitso Banda", "Dan Roscigno", "DanRoscigno", "Daniel Bershatsky", @@ -267,6 +274,7 @@ const char * auto_contributors[] { "Dmitry S..ky / skype: dvska-at-skype", "Dmitry Ukolov", "Doge", + "Dom Del Nano", "Dongdong Yang", "DoomzD", "Dr. Strange Looker", @@ -276,6 +284,7 @@ const char * auto_contributors[] { "Egor Savin", "Ekaterina", "Eldar Zaitov", + "Elena", "Elena Baskakova", "Elghazal Ahmed", "Elizaveta Mironyuk", @@ -342,6 +351,7 @@ const char * auto_contributors[] { "Grigory Pervakov", "GruffGemini", "Guillaume Tassery", + "Guo Wangyang", "Guo Wei (William)", "Haavard Kvaalen", "Habibullah Oladepo", @@ -349,6 +359,7 @@ const char * auto_contributors[] { "Hakob Saghatelyan", "Hamoon", "Han Fei", + "Han Shukai", "Harry Lee", "Harry-Lee", "HarryLeeIBM", @@ -404,6 +415,7 @@ const char * auto_contributors[] { "Jack Song", "JackyWoo", "Jacob Hayes", + "Jacob Herrington", "Jake Liu", "Jakub Kuklis", "James Maidment", @@ -419,6 +431,7 @@ const char * auto_contributors[] { "Jiading Guo", "Jiang Tao", "Jianmei Zhang", + "Jiebin Sun", "Jochen Schalanda", "John", "John Hummel", @@ -432,6 +445,7 @@ const char * auto_contributors[] { "Julian Gilyadov", "Julian Zhou", "Julio Jimenez", + "Jus", "Justin Hilliard", "Kang Liu", "Karl Pietrzak", @@ -652,6 +666,7 @@ const char * auto_contributors[] { "OuO", "PHO", "Pablo Alegre", + "Pablo Marcos", "Paramtamtam", "Patrick Zippenfenig", "Paul Loyd", @@ -681,6 +696,7 @@ const char * auto_contributors[] { "Prashant Shahi", "Pxl", "Pysaoke", + "Quanfa Fu", "Quid37", "Rafael Acevedo", "Rafael David Tinoco", @@ -693,6 +709,7 @@ const char * auto_contributors[] { "RedClusive", "RegulusZ", "Reilee", + "Reinaldy Rafli", "Reto Kromer", "Ri", "Rich Raposa", @@ -726,6 +743,7 @@ const char * auto_contributors[] { "Sachin", "Safronov Michail", "SaltTan", + "Salvatore Mesoraca", "Sami Kerola", "Samuel Chou", "San", @@ -927,6 +945,7 @@ const char * auto_contributors[] { "ZhiYong Wang", "Zhichang Yu", "Zhichun Wu", + "Zhiguo Zhou", "Zhipeng", "Zijie Lu", "Zoran Pandovski", @@ -950,6 +969,7 @@ const char * auto_contributors[] { "alexander goryanets", "alexander kozhikhov", "alexey-milovidov", + "alexeyerm", "alexeypavlenko", "alfredlu", "amesaru", @@ -1131,6 +1151,7 @@ const char * auto_contributors[] { "jennyma", "jetgm", "jewisliu", + "jferroal", "jiahui-97", "jianmei zhang", "jinjunzh", @@ -1236,6 +1257,7 @@ const char * auto_contributors[] { "mo-avatar", "morty", "moscas", + "mosinnik", "mreddy017", "msaf1980", "msirm", @@ -1321,6 +1343,7 @@ const char * auto_contributors[] { "simon-says", "snyk-bot", "songenjie", + "sperlingxx", "spff", "spongedc", "spume", @@ -1422,6 +1445,7 @@ const char * auto_contributors[] { "zhongyuankai", "zhoubintao", "zhukai", + "zimv", "zkun", "zlx19950903", "zombee0", diff --git a/src/Storages/System/StorageSystemDataSkippingIndices.cpp b/src/Storages/System/StorageSystemDataSkippingIndices.cpp index e725f8a03c6..be04261cc4e 100644 --- a/src/Storages/System/StorageSystemDataSkippingIndices.cpp +++ b/src/Storages/System/StorageSystemDataSkippingIndices.cpp @@ -171,7 +171,7 @@ Pipe StorageSystemDataSkippingIndices::read( ContextPtr context, QueryProcessingStage::Enum /* processed_stage */, size_t max_block_size, - unsigned int /* num_streams */) + size_t /* num_streams */) { storage_snapshot->check(column_names); diff --git a/src/Storages/System/StorageSystemDataSkippingIndices.h b/src/Storages/System/StorageSystemDataSkippingIndices.h index 046855edd5e..8a1e8c159b4 100644 --- a/src/Storages/System/StorageSystemDataSkippingIndices.h +++ b/src/Storages/System/StorageSystemDataSkippingIndices.h @@ -21,7 +21,7 @@ public: ContextPtr context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, - unsigned num_streams) override; + size_t num_streams) override; bool isSystemStorage() const override { return true; } }; diff --git a/src/Storages/System/StorageSystemDetachedParts.cpp b/src/Storages/System/StorageSystemDetachedParts.cpp index 574ce4f44c2..d094fefddcb 100644 --- a/src/Storages/System/StorageSystemDetachedParts.cpp +++ b/src/Storages/System/StorageSystemDetachedParts.cpp @@ -36,7 +36,7 @@ Pipe StorageSystemDetachedParts::read( ContextPtr context, QueryProcessingStage::Enum /*processed_stage*/, const size_t /*max_block_size*/, - const unsigned /*num_streams*/) + const size_t /*num_streams*/) { StoragesInfoStream stream(query_info, context); diff --git a/src/Storages/System/StorageSystemDetachedParts.h b/src/Storages/System/StorageSystemDetachedParts.h index 23f27816138..20ac69f0eea 100644 --- a/src/Storages/System/StorageSystemDetachedParts.h +++ b/src/Storages/System/StorageSystemDetachedParts.h @@ -27,7 +27,7 @@ protected: ContextPtr context, QueryProcessingStage::Enum /*processed_stage*/, size_t /*max_block_size*/, - unsigned /*num_streams*/) override; + size_t /*num_streams*/) override; }; } diff --git a/src/Storages/System/StorageSystemDisks.cpp b/src/Storages/System/StorageSystemDisks.cpp index 6b50b00dc30..86b5eafdf72 100644 --- a/src/Storages/System/StorageSystemDisks.cpp +++ b/src/Storages/System/StorageSystemDisks.cpp @@ -37,7 +37,7 @@ Pipe StorageSystemDisks::read( ContextPtr context, QueryProcessingStage::Enum /*processed_stage*/, const size_t /*max_block_size*/, - const unsigned /*num_streams*/) + const size_t /*num_streams*/) { storage_snapshot->check(column_names); diff --git a/src/Storages/System/StorageSystemDisks.h b/src/Storages/System/StorageSystemDisks.h index cd1dc1a8bbf..06cc7e8d4e2 100644 --- a/src/Storages/System/StorageSystemDisks.h +++ b/src/Storages/System/StorageSystemDisks.h @@ -27,7 +27,7 @@ public: ContextPtr context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, - unsigned num_streams) override; + size_t num_streams) override; bool isSystemStorage() const override { return true; } }; diff --git a/src/Storages/System/StorageSystemErrors.cpp b/src/Storages/System/StorageSystemErrors.cpp index 4c8c8e60d69..bbe67bc0d21 100644 --- a/src/Storages/System/StorageSystemErrors.cpp +++ b/src/Storages/System/StorageSystemErrors.cpp @@ -51,7 +51,7 @@ void StorageSystemErrors::fillData(MutableColumns & res_columns, ContextPtr cont for (size_t i = 0, end = ErrorCodes::end(); i < end; ++i) { const auto & error = ErrorCodes::values[i].get(); - std::string_view name = ErrorCodes::getName(i); + std::string_view name = ErrorCodes::getName(static_cast(i)); if (name.empty()) continue; diff --git a/src/Storages/System/StorageSystemFunctions.cpp b/src/Storages/System/StorageSystemFunctions.cpp index db6b51cb4f1..a0a406a974c 100644 --- a/src/Storages/System/StorageSystemFunctions.cpp +++ b/src/Storages/System/StorageSystemFunctions.cpp @@ -6,18 +6,9 @@ #include #include #include -#include -#include +#include +#include #include -#include -#include -#include -#include -#include -#include -#include - -namespace fs = std::filesystem; namespace DB @@ -30,11 +21,6 @@ enum class FunctionOrigin : Int8 EXECUTABLE_USER_DEFINED = 2 }; -namespace ErrorCodes -{ - extern const int CANNOT_RESTORE_TABLE; -} - namespace { template @@ -134,63 +120,12 @@ void StorageSystemFunctions::fillData(MutableColumns & res_columns, ContextPtr c void StorageSystemFunctions::backupData(BackupEntriesCollector & backup_entries_collector, const String & data_path_in_backup, const std::optional & /* partitions */) { - const auto & user_defined_sql_functions_factory = UserDefinedSQLFunctionFactory::instance(); - const auto & user_defined_sql_functions_names = user_defined_sql_functions_factory.getAllRegisteredNames(); - fs::path data_path_in_backup_fs{data_path_in_backup}; - for (const auto & function_name : user_defined_sql_functions_names) - { - auto ast = user_defined_sql_functions_factory.tryGet(function_name); - if (!ast) - continue; - backup_entries_collector.addBackupEntry( - data_path_in_backup_fs / (escapeForFileName(function_name) + ".sql"), - std::make_shared(queryToString(ast))); - } + UserDefinedSQLFunctionFactory::instance().backup(backup_entries_collector, data_path_in_backup); } void StorageSystemFunctions::restoreDataFromBackup(RestorerFromBackup & restorer, const String & data_path_in_backup, const std::optional & /* partitions */) { - auto backup = restorer.getBackup(); - fs::path data_path_in_backup_fs{data_path_in_backup}; - - Strings filenames = backup->listFiles(data_path_in_backup); - for (const auto & filename : filenames) - { - if (!filename.ends_with(".sql")) - { - throw Exception(ErrorCodes::CANNOT_RESTORE_TABLE, "Cannot restore table {}: File name {} doesn't have the extension .sql", - getStorageID().getFullTableName(), String{data_path_in_backup_fs / filename}); - } - } - - auto & user_defined_sql_functions_factory = UserDefinedSQLFunctionFactory::instance(); - const auto & restore_settings = restorer.getRestoreSettings(); - auto context = restorer.getContext(); - - for (const auto & filename : filenames) - { - String escaped_function_name = filename.substr(0, filename.length() - strlen(".sql")); - String function_name = unescapeForFileName(escaped_function_name); - - String filepath = data_path_in_backup_fs / filename; - auto function_def_entry = backup->readFile(filepath); - auto function_def_in = function_def_entry->getReadBuffer(); - String function_def; - readStringUntilEOF(function_def, *function_def_in); - - ParserCreateFunctionQuery parser; - ASTPtr ast = parseQuery( - parser, - function_def.data(), - function_def.data() + function_def.size(), - "in file " + filepath + " from backup " + backup->getName(), - 0, - context->getSettingsRef().max_parser_depth); - - bool replace = (restore_settings.create_function == RestoreUDFCreationMode::kReplace); - bool if_not_exists = (restore_settings.create_function == RestoreUDFCreationMode::kCreateIfNotExists); - user_defined_sql_functions_factory.registerFunction(context, function_name, ast, replace, if_not_exists, true); - } + UserDefinedSQLFunctionFactory::instance().restore(restorer, data_path_in_backup); } } diff --git a/src/Storages/System/StorageSystemNumbers.cpp b/src/Storages/System/StorageSystemNumbers.cpp index 523ec25b89c..70c0c64305d 100644 --- a/src/Storages/System/StorageSystemNumbers.cpp +++ b/src/Storages/System/StorageSystemNumbers.cpp @@ -2,6 +2,7 @@ #include #include #include +#include #include #include @@ -125,11 +126,11 @@ StorageSystemNumbers::StorageSystemNumbers(const StorageID & table_id, bool mult Pipe StorageSystemNumbers::read( const Names & column_names, const StorageSnapshotPtr & storage_snapshot, - SelectQueryInfo &, + SelectQueryInfo & query_info, ContextPtr /*context*/, QueryProcessingStage::Enum /*processed_stage*/, size_t max_block_size, - unsigned num_streams) + size_t num_streams) { storage_snapshot->check(column_names); @@ -154,7 +155,12 @@ Pipe StorageSystemNumbers::read( auto source = std::make_shared(state, max_block_size, max_counter); if (i == 0) - source->addTotalRowsApprox(*limit); + { + auto rows_appr = *limit; + if (query_info.limit > 0 && query_info.limit < rows_appr) + rows_appr = query_info.limit; + source->addTotalRowsApprox(rows_appr); + } pipe.addSource(std::move(source)); } @@ -167,7 +173,12 @@ Pipe StorageSystemNumbers::read( auto source = std::make_shared(max_block_size, offset + i * max_block_size, num_streams * max_block_size); if (limit && i == 0) - source->addTotalRowsApprox(*limit); + { + auto rows_appr = *limit; + if (query_info.limit > 0 && query_info.limit < rows_appr) + rows_appr = query_info.limit; + source->addTotalRowsApprox(rows_appr); + } pipe.addSource(std::move(source)); } diff --git a/src/Storages/System/StorageSystemNumbers.h b/src/Storages/System/StorageSystemNumbers.h index 6bb89c0525e..acddac681ef 100644 --- a/src/Storages/System/StorageSystemNumbers.h +++ b/src/Storages/System/StorageSystemNumbers.h @@ -38,7 +38,7 @@ public: ContextPtr context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, - unsigned num_streams) override; + size_t num_streams) override; bool hasEvenlyDistributedRead() const override { return true; } bool isSystemStorage() const override { return true; } diff --git a/src/Storages/System/StorageSystemOne.cpp b/src/Storages/System/StorageSystemOne.cpp index f262c981b83..3091ffdb51a 100644 --- a/src/Storages/System/StorageSystemOne.cpp +++ b/src/Storages/System/StorageSystemOne.cpp @@ -27,7 +27,7 @@ Pipe StorageSystemOne::read( ContextPtr /*context*/, QueryProcessingStage::Enum /*processed_stage*/, const size_t /*max_block_size*/, - const unsigned /*num_streams*/) + const size_t /*num_streams*/) { storage_snapshot->check(column_names); diff --git a/src/Storages/System/StorageSystemOne.h b/src/Storages/System/StorageSystemOne.h index 35dba59a99e..d8a26f1def4 100644 --- a/src/Storages/System/StorageSystemOne.h +++ b/src/Storages/System/StorageSystemOne.h @@ -28,7 +28,7 @@ public: ContextPtr context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, - unsigned num_streams) override; + size_t num_streams) override; bool isSystemStorage() const override { return true; } diff --git a/src/Storages/System/StorageSystemParts.cpp b/src/Storages/System/StorageSystemParts.cpp index d788efd8860..fa1c26b623d 100644 --- a/src/Storages/System/StorageSystemParts.cpp +++ b/src/Storages/System/StorageSystemParts.cpp @@ -198,9 +198,9 @@ void StorageSystemParts::processNextStorage( if (part->isStoredOnDisk()) { if (columns_mask[src_index++]) - columns[res_index++]->insert(part->data_part_storage->getDiskName()); + columns[res_index++]->insert(part->getDataPartStorage().getDiskName()); if (columns_mask[src_index++]) - columns[res_index++]->insert(part->data_part_storage->getFullPath()); + columns[res_index++]->insert(part->getDataPartStorage().getFullPath()); } else { diff --git a/src/Storages/System/StorageSystemPartsBase.cpp b/src/Storages/System/StorageSystemPartsBase.cpp index bcfd670ece9..a0c022f5540 100644 --- a/src/Storages/System/StorageSystemPartsBase.cpp +++ b/src/Storages/System/StorageSystemPartsBase.cpp @@ -247,7 +247,7 @@ Pipe StorageSystemPartsBase::read( ContextPtr context, QueryProcessingStage::Enum /*processed_stage*/, const size_t /*max_block_size*/, - const unsigned /*num_streams*/) + const size_t /*num_streams*/) { bool has_state_column = hasStateColumn(column_names, storage_snapshot); diff --git a/src/Storages/System/StorageSystemPartsBase.h b/src/Storages/System/StorageSystemPartsBase.h index 8db96700e1a..cb6265d82df 100644 --- a/src/Storages/System/StorageSystemPartsBase.h +++ b/src/Storages/System/StorageSystemPartsBase.h @@ -63,7 +63,7 @@ public: ContextPtr context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, - unsigned num_streams) override; + size_t num_streams) override; NamesAndTypesList getVirtuals() const override; diff --git a/src/Storages/System/StorageSystemPartsColumns.cpp b/src/Storages/System/StorageSystemPartsColumns.cpp index cc6e69b160f..cd51c767eae 100644 --- a/src/Storages/System/StorageSystemPartsColumns.cpp +++ b/src/Storages/System/StorageSystemPartsColumns.cpp @@ -190,9 +190,9 @@ void StorageSystemPartsColumns::processNextStorage( if (columns_mask[src_index++]) columns[res_index++]->insert(info.engine); if (columns_mask[src_index++]) - columns[res_index++]->insert(part->data_part_storage->getDiskName()); + columns[res_index++]->insert(part->getDataPartStorage().getDiskName()); if (columns_mask[src_index++]) - columns[res_index++]->insert(part->data_part_storage->getFullPath()); + columns[res_index++]->insert(part->getDataPartStorage().getFullPath()); if (columns_mask[src_index++]) columns[res_index++]->insert(column.name); diff --git a/src/Storages/System/StorageSystemProjectionParts.cpp b/src/Storages/System/StorageSystemProjectionParts.cpp index 3934e7c9623..37c62ba5eb0 100644 --- a/src/Storages/System/StorageSystemProjectionParts.cpp +++ b/src/Storages/System/StorageSystemProjectionParts.cpp @@ -200,9 +200,9 @@ void StorageSystemProjectionParts::processNextStorage( if (part->isStoredOnDisk()) { if (columns_mask[src_index++]) - columns[res_index++]->insert(part->data_part_storage->getDiskName()); + columns[res_index++]->insert(part->getDataPartStorage().getDiskName()); if (columns_mask[src_index++]) - columns[res_index++]->insert(part->data_part_storage->getFullPath()); + columns[res_index++]->insert(part->getDataPartStorage().getFullPath()); } else { diff --git a/src/Storages/System/StorageSystemProjectionPartsColumns.cpp b/src/Storages/System/StorageSystemProjectionPartsColumns.cpp index 0847010faaa..a5968597885 100644 --- a/src/Storages/System/StorageSystemProjectionPartsColumns.cpp +++ b/src/Storages/System/StorageSystemProjectionPartsColumns.cpp @@ -211,9 +211,9 @@ void StorageSystemProjectionPartsColumns::processNextStorage( if (columns_mask[src_index++]) columns[res_index++]->insert(info.engine); if (columns_mask[src_index++]) - columns[res_index++]->insert(part->data_part_storage->getDiskName()); + columns[res_index++]->insert(part->getDataPartStorage().getDiskName()); if (columns_mask[src_index++]) - columns[res_index++]->insert(part->data_part_storage->getFullPath()); + columns[res_index++]->insert(part->getDataPartStorage().getFullPath()); if (columns_mask[src_index++]) columns[res_index++]->insert(column.name); diff --git a/src/Storages/System/StorageSystemQuotaLimits.cpp b/src/Storages/System/StorageSystemQuotaLimits.cpp index 0261d3d2cd9..6cc269130a0 100644 --- a/src/Storages/System/StorageSystemQuotaLimits.cpp +++ b/src/Storages/System/StorageSystemQuotaLimits.cpp @@ -90,7 +90,7 @@ void StorageSystemQuotaLimits::fillData(MutableColumns & res_columns, ContextPtr auto add_row = [&](const String & quota_name, const Quota::Limits & limits) { column_quota_name.insertData(quota_name.data(), quota_name.length()); - column_duration.push_back(limits.duration.count()); + column_duration.push_back(static_cast(limits.duration.count())); column_is_randomized_interval.push_back(limits.randomize_interval); for (auto quota_type : collections::range(QuotaType::MAX)) diff --git a/src/Storages/System/StorageSystemQuotaUsage.cpp b/src/Storages/System/StorageSystemQuotaUsage.cpp index 6ba47a86dbf..5d047dc0359 100644 --- a/src/Storages/System/StorageSystemQuotaUsage.cpp +++ b/src/Storages/System/StorageSystemQuotaUsage.cpp @@ -162,8 +162,8 @@ void StorageSystemQuotaUsage::fillDataImpl( time_t end_time = std::chrono::system_clock::to_time_t(interval->end_of_interval); UInt32 duration = static_cast(std::chrono::duration_cast(interval->duration).count()); time_t start_time = end_time - duration; - column_start_time.getData().push_back(start_time); - column_end_time.getData().push_back(end_time); + column_start_time.getData().push_back(static_cast(start_time)); + column_end_time.getData().push_back(static_cast(end_time)); column_duration.getData().push_back(duration); column_start_time_null_map.push_back(false); column_end_time_null_map.push_back(false); diff --git a/src/Storages/System/StorageSystemQuotas.cpp b/src/Storages/System/StorageSystemQuotas.cpp index 17863fa7326..439883e038a 100644 --- a/src/Storages/System/StorageSystemQuotas.cpp +++ b/src/Storages/System/StorageSystemQuotas.cpp @@ -96,7 +96,10 @@ void StorageSystemQuotas::fillData(MutableColumns & res_columns, ContextPtr cont column_key_types_offsets.push_back(column_key_types.size()); for (const auto & limits : all_limits) - column_durations.push_back(std::chrono::duration_cast(limits.duration).count()); + { + column_durations.push_back( + static_cast(std::chrono::duration_cast(limits.duration).count())); + } column_durations_offsets.push_back(column_durations.size()); auto apply_to_ast = apply_to.toASTWithNames(access_control); diff --git a/src/Storages/System/StorageSystemRemoteDataPaths.cpp b/src/Storages/System/StorageSystemRemoteDataPaths.cpp index de7e1911e44..20076603522 100644 --- a/src/Storages/System/StorageSystemRemoteDataPaths.cpp +++ b/src/Storages/System/StorageSystemRemoteDataPaths.cpp @@ -38,7 +38,7 @@ Pipe StorageSystemRemoteDataPaths::read( ContextPtr context, QueryProcessingStage::Enum /*processed_stage*/, const size_t /*max_block_size*/, - const unsigned /*num_streams*/) + const size_t /*num_streams*/) { storage_snapshot->check(column_names); diff --git a/src/Storages/System/StorageSystemRemoteDataPaths.h b/src/Storages/System/StorageSystemRemoteDataPaths.h index f868ae60795..7e883d144ef 100644 --- a/src/Storages/System/StorageSystemRemoteDataPaths.h +++ b/src/Storages/System/StorageSystemRemoteDataPaths.h @@ -21,7 +21,7 @@ public: ContextPtr context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, - unsigned num_streams) override; + size_t num_streams) override; }; } diff --git a/src/Storages/System/StorageSystemReplicas.cpp b/src/Storages/System/StorageSystemReplicas.cpp index e018ccc0733..0f7877a6e41 100644 --- a/src/Storages/System/StorageSystemReplicas.cpp +++ b/src/Storages/System/StorageSystemReplicas.cpp @@ -66,7 +66,7 @@ Pipe StorageSystemReplicas::read( ContextPtr context, QueryProcessingStage::Enum /*processed_stage*/, const size_t /*max_block_size*/, - const unsigned /*num_streams*/) + const size_t /*num_streams*/) { storage_snapshot->check(column_names); diff --git a/src/Storages/System/StorageSystemReplicas.h b/src/Storages/System/StorageSystemReplicas.h index fc7f8f15861..e9c29dec0fd 100644 --- a/src/Storages/System/StorageSystemReplicas.h +++ b/src/Storages/System/StorageSystemReplicas.h @@ -25,7 +25,7 @@ public: ContextPtr context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, - unsigned num_streams) override; + size_t num_streams) override; bool isSystemStorage() const override { return true; } }; diff --git a/src/Storages/System/StorageSystemStackTrace.cpp b/src/Storages/System/StorageSystemStackTrace.cpp index 549ce193137..df3d8b74e6e 100644 --- a/src/Storages/System/StorageSystemStackTrace.cpp +++ b/src/Storages/System/StorageSystemStackTrace.cpp @@ -258,7 +258,7 @@ Pipe StorageSystemStackTrace::read( ContextPtr context, QueryProcessingStage::Enum /*processed_stage*/, const size_t /*max_block_size*/, - const unsigned /*num_streams*/) + const size_t /*num_streams*/) { storage_snapshot->check(column_names); @@ -324,7 +324,7 @@ Pipe StorageSystemStackTrace::read( sigval sig_value{}; sig_value.sival_int = sequence_num.load(std::memory_order_acquire); - if (0 != ::sigqueue(tid, sig, sig_value)) + if (0 != ::sigqueue(static_cast(tid), sig, sig_value)) { /// The thread may has been already finished. if (ESRCH == errno) diff --git a/src/Storages/System/StorageSystemStackTrace.h b/src/Storages/System/StorageSystemStackTrace.h index dd613882e49..9133a86aa55 100644 --- a/src/Storages/System/StorageSystemStackTrace.h +++ b/src/Storages/System/StorageSystemStackTrace.h @@ -33,7 +33,7 @@ public: ContextPtr context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, - unsigned num_streams) override; + size_t num_streams) override; bool isSystemStorage() const override { return true; } diff --git a/src/Storages/System/StorageSystemStoragePolicies.cpp b/src/Storages/System/StorageSystemStoragePolicies.cpp index 832c430e2be..b42bd7859dd 100644 --- a/src/Storages/System/StorageSystemStoragePolicies.cpp +++ b/src/Storages/System/StorageSystemStoragePolicies.cpp @@ -44,7 +44,7 @@ Pipe StorageSystemStoragePolicies::read( ContextPtr context, QueryProcessingStage::Enum /*processed_stage*/, const size_t /*max_block_size*/, - const unsigned /*num_streams*/) + const size_t /*num_streams*/) { storage_snapshot->check(column_names); diff --git a/src/Storages/System/StorageSystemStoragePolicies.h b/src/Storages/System/StorageSystemStoragePolicies.h index 3340a4b5e62..afc729c8368 100644 --- a/src/Storages/System/StorageSystemStoragePolicies.h +++ b/src/Storages/System/StorageSystemStoragePolicies.h @@ -27,7 +27,7 @@ public: ContextPtr context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, - unsigned num_streams) override; + size_t num_streams) override; bool isSystemStorage() const override { return true; } }; diff --git a/src/Storages/System/StorageSystemTables.cpp b/src/Storages/System/StorageSystemTables.cpp index e36b22a979e..83f922850a3 100644 --- a/src/Storages/System/StorageSystemTables.cpp +++ b/src/Storages/System/StorageSystemTables.cpp @@ -574,7 +574,7 @@ Pipe StorageSystemTables::read( ContextPtr context, QueryProcessingStage::Enum /*processed_stage*/, const size_t max_block_size, - const unsigned /*num_streams*/) + const size_t /*num_streams*/) { storage_snapshot->check(column_names); diff --git a/src/Storages/System/StorageSystemTables.h b/src/Storages/System/StorageSystemTables.h index 11ac75aab08..60b6144f122 100644 --- a/src/Storages/System/StorageSystemTables.h +++ b/src/Storages/System/StorageSystemTables.h @@ -25,7 +25,7 @@ public: ContextPtr context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, - unsigned num_streams) override; + size_t num_streams) override; bool isSystemStorage() const override { return true; } }; diff --git a/src/Storages/System/StorageSystemZeros.cpp b/src/Storages/System/StorageSystemZeros.cpp index 9e5836fa358..6c2ddd8d3dd 100644 --- a/src/Storages/System/StorageSystemZeros.cpp +++ b/src/Storages/System/StorageSystemZeros.cpp @@ -97,7 +97,7 @@ Pipe StorageSystemZeros::read( ContextPtr /*context*/, QueryProcessingStage::Enum /*processed_stage*/, size_t max_block_size, - unsigned num_streams) + size_t num_streams) { storage_snapshot->check(column_names); diff --git a/src/Storages/System/StorageSystemZeros.h b/src/Storages/System/StorageSystemZeros.h index 5461feacb6b..64443a3cfd6 100644 --- a/src/Storages/System/StorageSystemZeros.h +++ b/src/Storages/System/StorageSystemZeros.h @@ -29,7 +29,7 @@ public: ContextPtr context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, - unsigned num_streams) override; + size_t num_streams) override; bool hasEvenlyDistributedRead() const override { return true; } bool isSystemStorage() const override { return true; } diff --git a/src/Storages/WindowView/StorageWindowView.cpp b/src/Storages/WindowView/StorageWindowView.cpp index d34066de769..c0bc5ad8da9 100644 --- a/src/Storages/WindowView/StorageWindowView.cpp +++ b/src/Storages/WindowView/StorageWindowView.cpp @@ -1018,7 +1018,8 @@ void StorageWindowView::threadFuncFireProc() return; std::lock_guard lock(fire_signal_mutex); - UInt32 timestamp_now = std::time(nullptr); + /// TODO: consider using time_t instead (for every timestamp in this class) + UInt32 timestamp_now = static_cast(std::time(nullptr)); while (next_fire_signal <= timestamp_now) { @@ -1078,7 +1079,7 @@ void StorageWindowView::read( ContextPtr local_context, QueryProcessingStage::Enum processed_stage, const size_t max_block_size, - const unsigned num_streams) + const size_t num_streams) { if (target_table_id.empty()) return; @@ -1118,7 +1119,7 @@ Pipe StorageWindowView::watch( ContextPtr local_context, QueryProcessingStage::Enum & processed_stage, size_t /*max_block_size*/, - const unsigned /*num_streams*/) + const size_t /*num_streams*/) { ASTWatchQuery & query = typeid_cast(*query_info.query); @@ -1189,7 +1190,7 @@ StorageWindowView::StorageWindowView( target_table_id = has_inner_target_table ? StorageID(table_id_.database_name, generateTargetTableName(table_id_)) : query.to_table_id; if (is_proctime) - next_fire_signal = getWindowUpperBound(std::time(nullptr)); + next_fire_signal = getWindowUpperBound(static_cast(std::time(nullptr))); std::exchange(has_inner_table, true); if (!attach_) diff --git a/src/Storages/WindowView/StorageWindowView.h b/src/Storages/WindowView/StorageWindowView.h index 96c034b9590..6da34389e4d 100644 --- a/src/Storages/WindowView/StorageWindowView.h +++ b/src/Storages/WindowView/StorageWindowView.h @@ -150,7 +150,7 @@ public: ContextPtr context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, - unsigned num_streams) override; + size_t num_streams) override; Pipe watch( const Names & column_names, @@ -158,7 +158,7 @@ public: ContextPtr context, QueryProcessingStage::Enum & processed_stage, size_t max_block_size, - unsigned num_streams) override; + size_t num_streams) override; std::pair getNewBlocks(UInt32 watermark); diff --git a/src/Storages/addColumnsStructureToQueryWithClusterEngine.cpp b/src/Storages/addColumnsStructureToQueryWithClusterEngine.cpp new file mode 100644 index 00000000000..31f49fa5490 --- /dev/null +++ b/src/Storages/addColumnsStructureToQueryWithClusterEngine.cpp @@ -0,0 +1,51 @@ +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + +static ASTExpressionList * extractTableFunctionArgumentsFromSelectQuery(ASTPtr & query) +{ + auto * select_query = query->as(); + if (!select_query || !select_query->tables()) + return nullptr; + + auto * tables = select_query->tables()->as(); + auto * table_expression = tables->children[0]->as()->table_expression->as(); + if (!table_expression->table_function) + return nullptr; + + auto * table_function = table_expression->table_function->as(); + return table_function->arguments->as(); +} + +void addColumnsStructureToQueryWithClusterEngine(ASTPtr & query, const String & structure, size_t max_arguments, const String & function_name) +{ + ASTExpressionList * expression_list = extractTableFunctionArgumentsFromSelectQuery(query); + if (!expression_list) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected SELECT query from table function {}, got '{}'", function_name, queryToString(query)); + auto structure_literal = std::make_shared(structure); + + if (expression_list->children.size() < 2 || expression_list->children.size() > max_arguments) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected 2 to {} arguments in {} table functions, got {}", function_name, max_arguments, expression_list->children.size()); + + if (expression_list->children.size() == 2 || expression_list->children.size() == max_arguments - 1) + { + auto format_literal = std::make_shared("auto"); + expression_list->children.push_back(format_literal); + } + + expression_list->children.push_back(structure_literal); +} + +} diff --git a/src/Storages/addColumnsStructureToQueryWithClusterEngine.h b/src/Storages/addColumnsStructureToQueryWithClusterEngine.h new file mode 100644 index 00000000000..f39f3a31630 --- /dev/null +++ b/src/Storages/addColumnsStructureToQueryWithClusterEngine.h @@ -0,0 +1,11 @@ +#pragma once + +#include + +namespace DB +{ + +/// Add structure argument for queries with s3Cluster/hdfsCluster table function. +void addColumnsStructureToQueryWithClusterEngine(ASTPtr & query, const String & structure, size_t max_arguments, const String & function_name); + +} diff --git a/src/Storages/examples/merge_selector.cpp b/src/Storages/examples/merge_selector.cpp index 9433e38c648..a3b0d8a29ef 100644 --- a/src/Storages/examples/merge_selector.cpp +++ b/src/Storages/examples/merge_selector.cpp @@ -66,7 +66,7 @@ int main(int, char **) size_t sum_merged_size = 0; size_t start_index = 0; - size_t max_level = 0; + unsigned max_level = 0; bool in_range = false; for (size_t i = 0, size = parts.size(); i < size; ++i) diff --git a/src/Storages/examples/merge_selector2.cpp b/src/Storages/examples/merge_selector2.cpp index d9d08a84bcf..029da26fad6 100644 --- a/src/Storages/examples/merge_selector2.cpp +++ b/src/Storages/examples/merge_selector2.cpp @@ -72,7 +72,7 @@ int main(int, char **) size_t sum_merged_size = 0; size_t start_index = 0; - size_t max_level = 0; + unsigned max_level = 0; bool in_range = false; for (size_t i = 0, size = parts.size(); i < size; ++i) diff --git a/src/Storages/getStructureOfRemoteTable.cpp b/src/Storages/getStructureOfRemoteTable.cpp index 3d104ada0b6..7bd5e629c39 100644 --- a/src/Storages/getStructureOfRemoteTable.cpp +++ b/src/Storages/getStructureOfRemoteTable.cpp @@ -58,7 +58,7 @@ ColumnsDescription getStructureOfRemoteTableInShard( } ColumnsDescription res; - auto new_context = ClusterProxy::updateSettingsForCluster(cluster, context, context->getSettingsRef()); + auto new_context = ClusterProxy::updateSettingsForCluster(cluster, context, context->getSettingsRef(), table_id); /// Expect only needed columns from the result of DESC TABLE. NOTE 'comment' column is ignored for compatibility reasons. Block sample_block @@ -169,7 +169,7 @@ ColumnsDescriptionByShardNum getExtendedObjectsOfRemoteTables( const auto & shards_info = cluster.getShardsInfo(); auto query = "DESC TABLE " + remote_table_id.getFullTableName(); - auto new_context = ClusterProxy::updateSettingsForCluster(cluster, context, context->getSettingsRef()); + auto new_context = ClusterProxy::updateSettingsForCluster(cluster, context, context->getSettingsRef(), remote_table_id); new_context->setSetting("describe_extend_object_types", true); /// Expect only needed columns from the result of DESC TABLE. @@ -200,7 +200,7 @@ ColumnsDescriptionByShardNum getExtendedObjectsOfRemoteTables( auto type_name = type_col[i].get(); auto storage_column = storage_columns.tryGetPhysical(name); - if (storage_column && isObject(storage_column->type)) + if (storage_column && storage_column->type->hasDynamicSubcolumns()) res.add(ColumnDescription(std::move(name), DataTypeFactory::instance().get(type_name))); } } diff --git a/src/Storages/transformQueryForExternalDatabase.cpp b/src/Storages/transformQueryForExternalDatabase.cpp index c42fb7fa965..51b11680f82 100644 --- a/src/Storages/transformQueryForExternalDatabase.cpp +++ b/src/Storages/transformQueryForExternalDatabase.cpp @@ -22,6 +22,7 @@ namespace ErrorCodes { extern const int LOGICAL_ERROR; extern const int INCORRECT_QUERY; + extern const int UNSUPPORTED_METHOD; } namespace @@ -251,6 +252,11 @@ String transformQueryForExternalDatabase( ContextPtr context) { auto clone_query = query_info.query->clone(); + + /// TODO: Analyzer syntax analyzer result + if (!query_info.syntax_analyzer_result) + throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "transform query for external database is unsupported"); + const Names used_columns = query_info.syntax_analyzer_result->requiredSourceColumns(); bool strict = context->getSettingsRef().external_table_strict_query; diff --git a/src/TableFunctions/TableFunctionFile.cpp b/src/TableFunctions/TableFunctionFile.cpp index fd474f037b3..4ecf29a05bd 100644 --- a/src/TableFunctions/TableFunctionFile.cpp +++ b/src/TableFunctions/TableFunctionFile.cpp @@ -43,7 +43,8 @@ void TableFunctionFile::parseFirstArguments(const ASTPtr & arg, const ContextPtr } else if (type == Field::Types::Int64 || type == Field::Types::UInt64) { - fd = (type == Field::Types::Int64) ? literal->value.get() : literal->value.get(); + fd = static_cast( + (type == Field::Types::Int64) ? literal->value.get() : literal->value.get()); if (fd < 0) throw Exception("File descriptor must be non-negative", ErrorCodes::BAD_ARGUMENTS); } diff --git a/src/TableFunctions/TableFunctionHDFSCluster.cpp b/src/TableFunctions/TableFunctionHDFSCluster.cpp index 26fcb514cca..73b77f770b2 100644 --- a/src/TableFunctions/TableFunctionHDFSCluster.cpp +++ b/src/TableFunctions/TableFunctionHDFSCluster.cpp @@ -48,7 +48,7 @@ void TableFunctionHDFSCluster::parseArguments(const ASTPtr & ast_function, Conte const auto message = fmt::format( "The signature of table function {} shall be the following:\n" \ " - cluster, uri\n",\ - " - cluster, format\n",\ + " - cluster, uri, format\n",\ " - cluster, uri, format, structure\n",\ " - cluster, uri, format, structure, compression_method", getName()); diff --git a/src/TableFunctions/TableFunctionRemote.cpp b/src/TableFunctions/TableFunctionRemote.cpp index 097a239ccae..2c62e29810f 100644 --- a/src/TableFunctions/TableFunctionRemote.cpp +++ b/src/TableFunctions/TableFunctionRemote.cpp @@ -94,6 +94,30 @@ void TableFunctionRemote::parseArguments(const ASTPtr & ast_function, ContextPtr } else { + /// Supported signatures: + /// + /// remote('addresses_expr', db.table) + /// remote('addresses_expr', 'db', 'table') + /// remote('addresses_expr', db.table, 'user') + /// remote('addresses_expr', 'db', 'table', 'user') + /// remote('addresses_expr', db.table, 'user', 'password') + /// remote('addresses_expr', 'db', 'table', 'user', 'password') + /// remote('addresses_expr', db.table, sharding_key) + /// remote('addresses_expr', 'db', 'table', sharding_key) + /// remote('addresses_expr', db.table, 'user', sharding_key) + /// remote('addresses_expr', 'db', 'table', 'user', sharding_key) + /// remote('addresses_expr', db.table, 'user', 'password', sharding_key) + /// remote('addresses_expr', 'db', 'table', 'user', 'password', sharding_key) + /// + /// remoteSecure() - same as remote() + /// + /// cluster('cluster_name', db.table) + /// cluster('cluster_name', 'db', 'table') + /// cluster('cluster_name', db.table, sharding_key) + /// cluster('cluster_name', 'db', 'table', sharding_key) + /// + /// clusterAllReplicas() - same as cluster() + if (args.size() < 2 || args.size() > max_args) throw Exception(help_message, ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); @@ -318,7 +342,6 @@ TableFunctionRemote::TableFunctionRemote(const std::string & name_, bool secure_ is_cluster_function ? " [, sharding_key]" : " [, username[, password], sharding_key]"); } - void registerTableFunctionRemote(TableFunctionFactory & factory) { factory.registerFunction("remote", [] () -> TableFunctionPtr { return std::make_shared("remote"); }); diff --git a/src/TableFunctions/TableFunctionS3.cpp b/src/TableFunctions/TableFunctionS3.cpp index b8e4fcb67fa..4c0b5352545 100644 --- a/src/TableFunctions/TableFunctionS3.cpp +++ b/src/TableFunctions/TableFunctionS3.cpp @@ -64,7 +64,7 @@ void TableFunctionS3::parseArgumentsImpl(const String & error_message, ASTs & ar if (args.size() == 4) { auto second_arg = checkAndGetLiteralArgument(args[1], "format/access_key_id"); - if (FormatFactory::instance().getAllFormats().contains(second_arg)) + if (second_arg == "auto" || FormatFactory::instance().getAllFormats().contains(second_arg)) args_to_idx = {{"format", 1}, {"structure", 2}, {"compression_method", 3}}; else @@ -77,7 +77,7 @@ void TableFunctionS3::parseArgumentsImpl(const String & error_message, ASTs & ar { auto second_arg = checkAndGetLiteralArgument(args[1], "format/access_key_id"); - if (FormatFactory::instance().getAllFormats().contains(second_arg)) + if (second_arg == "auto" || FormatFactory::instance().getAllFormats().contains(second_arg)) args_to_idx = {{"format", 1}, {"structure", 2}}; else args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}}; diff --git a/tests/.rgignore b/tests/.rgignore new file mode 100644 index 00000000000..26cb6f9025d --- /dev/null +++ b/tests/.rgignore @@ -0,0 +1 @@ +data_json diff --git a/tests/ci/ast_fuzzer_check.py b/tests/ci/ast_fuzzer_check.py index 8f94ef4a915..04dbe78adc4 100644 --- a/tests/ci/ast_fuzzer_check.py +++ b/tests/ci/ast_fuzzer_check.py @@ -69,7 +69,7 @@ if __name__ == "__main__": logging.info("Check is already finished according to github status, exiting") sys.exit(0) - docker_image = get_image_with_version(temp_path, IMAGE_NAME) + docker_image = get_image_with_version(reports_path, IMAGE_NAME) build_name = get_build_name_for_check(check_name) print(build_name) diff --git a/tests/ci/bugfix_validate_check.py b/tests/ci/bugfix_validate_check.py index 4e6001aaa74..e5f37f2940b 100644 --- a/tests/ci/bugfix_validate_check.py +++ b/tests/ci/bugfix_validate_check.py @@ -3,14 +3,21 @@ import argparse import csv import itertools +import logging import os -import sys + +from github import Github + +from s3_helper import S3Helper +from get_robot_token import get_best_robot_token +from pr_info import PRInfo +from upload_result_helper import upload_results +from commit_status_helper import post_commit_status def parse_args(): parser = argparse.ArgumentParser() - parser.add_argument("report1") - parser.add_argument("report2") + parser.add_argument("status", nargs="+", help="Path to status file") return parser.parse_args() @@ -26,20 +33,63 @@ def post_commit_status_from_file(file_path): return res[0] -def process_results(file_path): +def process_result(file_path): + test_results = [] state, report_url, description = post_commit_status_from_file(file_path) prefix = os.path.basename(os.path.dirname(file_path)) - print( - f"::notice:: bugfix check: {prefix} - {state}: {description} Report url: {report_url}" - ) - return state == "success" + is_ok = state == "success" + if is_ok and report_url == "null": + return is_ok, None + + status = f'OK: Bug reproduced (Report' + if not is_ok: + status = f'Bug is not reproduced (Report)' + test_results.append([f"{prefix}: {description}", status]) + return is_ok, test_results + + +def process_all_results(file_paths): + any_ok = False + all_results = [] + for status_path in file_paths: + is_ok, test_results = process_result(status_path) + any_ok = any_ok or is_ok + if test_results is not None: + all_results.extend(test_results) + + return any_ok, all_results def main(args): - is_ok = False - is_ok = process_results(args.report1) or is_ok - is_ok = process_results(args.report2) or is_ok - sys.exit(0 if is_ok else 1) + logging.basicConfig(level=logging.INFO) + + check_name_with_group = "Bugfix validate check" + + is_ok, test_results = process_all_results(args.status) + + if not test_results: + logging.info("No results to upload") + return + + pr_info = PRInfo() + report_url = upload_results( + S3Helper(), + pr_info.number, + pr_info.sha, + test_results, + [], + check_name_with_group, + ) + + gh = Github(get_best_robot_token(), per_page=100) + post_commit_status( + gh, + pr_info.sha, + check_name_with_group, + "" if is_ok else "Changed tests doesn't reproduce the bug", + "success" if is_ok else "error", + report_url, + ) if __name__ == "__main__": diff --git a/tests/ci/cancel_and_rerun_workflow_lambda/app.py b/tests/ci/cancel_and_rerun_workflow_lambda/app.py index 813ee9d1ab7..21a5ce517f6 100644 --- a/tests/ci/cancel_and_rerun_workflow_lambda/app.py +++ b/tests/ci/cancel_and_rerun_workflow_lambda/app.py @@ -15,7 +15,7 @@ import boto3 # type: ignore NEED_RERUN_OR_CANCELL_WORKFLOWS = { "PullRequestCI", "DocsCheck", - "DocsRelease", + "DocsReleaseChecks", "BackportPR", } diff --git a/tests/ci/cancel_and_rerun_workflow_lambda/requirements.txt b/tests/ci/cancel_and_rerun_workflow_lambda/requirements.txt index c0dcf4a4dde..e607f1a9f39 100644 --- a/tests/ci/cancel_and_rerun_workflow_lambda/requirements.txt +++ b/tests/ci/cancel_and_rerun_workflow_lambda/requirements.txt @@ -1,3 +1,3 @@ requests PyJWT -cryptography +cryptography==37.0.4 diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py index 5e69046915e..93322b69669 100644 --- a/tests/ci/ci_config.py +++ b/tests/ci/ci_config.py @@ -355,6 +355,12 @@ CI_CONFIG = { "required_build": "package_aarch64", "test_grep_exclude_filter": "", }, + "SQLancer (release)": { + "required_build": "package_release", + }, + "SQLancer (debug)": { + "required_build": "package_debug", + }, }, } # type: dict diff --git a/tests/ci/clickhouse_helper.py b/tests/ci/clickhouse_helper.py index a81334860d1..c82d9da05e9 100644 --- a/tests/ci/clickhouse_helper.py +++ b/tests/ci/clickhouse_helper.py @@ -37,12 +37,8 @@ class ClickHouseHelper: url, params=params, data=json_str, headers=auth ) except Exception as e: - logging.warning( - "Received exception while sending data to %s on %s attempt: %s", - url, - i, - e, - ) + error = f"Received exception while sending data to {url} on {i} attempt: {e}" + logging.warning(error) continue logging.info("Response content '%s'", response.content) diff --git a/tests/ci/functional_test_check.py b/tests/ci/functional_test_check.py index 388f93f34ec..f7d3288c316 100644 --- a/tests/ci/functional_test_check.py +++ b/tests/ci/functional_test_check.py @@ -210,7 +210,10 @@ if __name__ == "__main__": run_changed_tests = flaky_check or validate_bugix_check gh = Github(get_best_robot_token(), per_page=100) - pr_info = PRInfo(need_changed_files=run_changed_tests) + # For validate_bugix_check we need up to date information about labels, so pr_event_from_api is used + pr_info = PRInfo( + need_changed_files=run_changed_tests, pr_event_from_api=validate_bugix_check + ) atexit.register(update_mergeable_check, gh, pr_info, check_name) @@ -221,11 +224,11 @@ if __name__ == "__main__": if args.post_commit_status == "file": post_commit_status_to_file( os.path.join(temp_path, "post_commit_status.tsv"), - "Skipped (no pr-bugfix)", + f"Skipped (no pr-bugfix in {pr_info.labels})", "success", "null", ) - logging.info("Skipping '%s' (no pr-bugfix)", check_name) + logging.info("Skipping '%s' (no pr-bugfix in %s)", check_name, pr_info.labels) sys.exit(0) if "RUN_BY_HASH_NUM" in os.environ: @@ -320,7 +323,7 @@ if __name__ == "__main__": state, description, test_results, additional_logs = process_results( result_path, server_log_path ) - state = override_status(state, check_name, validate_bugix_check) + state = override_status(state, check_name, invert=validate_bugix_check) ch_helper = ClickHouseHelper() mark_flaky_tests(ch_helper, check_name, test_results) diff --git a/tests/ci/integration_test_check.py b/tests/ci/integration_test_check.py index 3709a7271d7..cba428cbcf5 100644 --- a/tests/ci/integration_test_check.py +++ b/tests/ci/integration_test_check.py @@ -167,17 +167,22 @@ if __name__ == "__main__": os.makedirs(temp_path) is_flaky_check = "flaky" in check_name - pr_info = PRInfo(need_changed_files=is_flaky_check or validate_bugix_check) + + # For validate_bugix_check we need up to date information about labels, so pr_event_from_api is used + pr_info = PRInfo( + need_changed_files=is_flaky_check or validate_bugix_check, + pr_event_from_api=validate_bugix_check, + ) if validate_bugix_check and "pr-bugfix" not in pr_info.labels: if args.post_commit_status == "file": post_commit_status_to_file( os.path.join(temp_path, "post_commit_status.tsv"), - "Skipped (no pr-bugfix)", + f"Skipped (no pr-bugfix in {pr_info.labels})", "success", "null", ) - logging.info("Skipping '%s' (no pr-bugfix)", check_name) + logging.info("Skipping '%s' (no pr-bugfix in '%s')", check_name, pr_info.labels) sys.exit(0) gh = Github(get_best_robot_token(), per_page=100) @@ -244,7 +249,7 @@ if __name__ == "__main__": subprocess.check_call(f"sudo chown -R ubuntu:ubuntu {temp_path}", shell=True) state, description, test_results, additional_logs = process_results(result_path) - state = override_status(state, check_name, validate_bugix_check) + state = override_status(state, check_name, invert=validate_bugix_check) ch_helper = ClickHouseHelper() mark_flaky_tests(ch_helper, check_name, test_results) diff --git a/tests/ci/metrics_lambda/requirements.txt b/tests/ci/metrics_lambda/requirements.txt index c0dcf4a4dde..e607f1a9f39 100644 --- a/tests/ci/metrics_lambda/requirements.txt +++ b/tests/ci/metrics_lambda/requirements.txt @@ -1,3 +1,3 @@ requests PyJWT -cryptography +cryptography==37.0.4 diff --git a/tests/ci/pr_info.py b/tests/ci/pr_info.py index dc016a7eed9..5f725a61b3e 100644 --- a/tests/ci/pr_info.py +++ b/tests/ci/pr_info.py @@ -87,7 +87,7 @@ class PRInfo: self.body = "" self.diff_urls = [] self.release_pr = 0 - ref = github_event.get("ref", "refs/head/master") + ref = github_event.get("ref", "refs/heads/master") if ref and ref.startswith("refs/heads/"): ref = ref[11:] diff --git a/tests/ci/sqlancer_check.py b/tests/ci/sqlancer_check.py new file mode 100644 index 00000000000..51c95e50746 --- /dev/null +++ b/tests/ci/sqlancer_check.py @@ -0,0 +1,193 @@ +#!/usr/bin/env python3 + +import logging +import subprocess +import os +import sys + +from github import Github + +from env_helper import ( + GITHUB_REPOSITORY, + GITHUB_RUN_URL, + REPORTS_PATH, + REPO_COPY, + TEMP_PATH, +) +from s3_helper import S3Helper +from get_robot_token import get_best_robot_token +from pr_info import PRInfo +from build_download_helper import get_build_name_for_check, read_build_urls +from docker_pull_helper import get_image_with_version +from commit_status_helper import post_commit_status +from clickhouse_helper import ClickHouseHelper, prepare_tests_results_for_clickhouse +from upload_result_helper import upload_results +from stopwatch import Stopwatch +from rerun_helper import RerunHelper + +IMAGE_NAME = "clickhouse/sqlancer-test" + + +def get_run_command(download_url, workspace_path, image): + return ( + f"docker run " + # For sysctl + "--privileged " + "--network=host " + f"--volume={workspace_path}:/workspace " + "--cap-add syslog --cap-add sys_admin --cap-add=SYS_PTRACE " + f'-e BINARY_URL_TO_DOWNLOAD="{download_url}" ' + f"{image}" + ) + + +def get_commit(gh, commit_sha): + repo = gh.get_repo(GITHUB_REPOSITORY) + commit = repo.get_commit(commit_sha) + return commit + + +if __name__ == "__main__": + logging.basicConfig(level=logging.INFO) + + stopwatch = Stopwatch() + + temp_path = TEMP_PATH + repo_path = REPO_COPY + reports_path = REPORTS_PATH + + check_name = sys.argv[1] + + if not os.path.exists(temp_path): + os.makedirs(temp_path) + + pr_info = PRInfo() + + gh = Github(get_best_robot_token(), per_page=100) + + rerun_helper = RerunHelper(gh, pr_info, check_name) + if rerun_helper.is_already_finished_by_status(): + logging.info("Check is already finished according to github status, exiting") + sys.exit(0) + + docker_image = get_image_with_version(reports_path, IMAGE_NAME) + + build_name = get_build_name_for_check(check_name) + print(build_name) + urls = read_build_urls(build_name, reports_path) + if not urls: + raise Exception("No build URLs found") + + for url in urls: + if url.endswith("/clickhouse"): + build_url = url + break + else: + raise Exception("Cannot find binary clickhouse among build results") + + logging.info("Got build url %s", build_url) + + workspace_path = os.path.join(temp_path, "workspace") + if not os.path.exists(workspace_path): + os.makedirs(workspace_path) + + run_command = get_run_command(build_url, workspace_path, docker_image) + logging.info("Going to run %s", run_command) + + run_log_path = os.path.join(workspace_path, "runlog.log") + with open(run_log_path, "w", encoding="utf-8") as log: + with subprocess.Popen( + run_command, shell=True, stderr=log, stdout=log + ) as process: + retcode = process.wait() + if retcode == 0: + logging.info("Run successfully") + else: + logging.info("Run failed") + + subprocess.check_call(f"sudo chown -R ubuntu:ubuntu {temp_path}", shell=True) + + check_name_lower = ( + check_name.lower().replace("(", "").replace(")", "").replace(" ", "") + ) + s3_prefix = f"{pr_info.number}/{pr_info.sha}/{check_name_lower}/" + + tests = [ + "TLPGroupBy", + "TLPHaving", + "TLPWhere", + "TLPDistinct", + "TLPAggregate", + "NoREC", + ] + + paths = [ + run_log_path, + os.path.join(workspace_path, "clickhouse-server.log"), + os.path.join(workspace_path, "stderr.log"), + os.path.join(workspace_path, "stdout.log"), + ] + for t in tests: + err_name = f"{t}.err" + log_name = f"{t}.out" + paths.append(os.path.join(workspace_path, err_name)) + paths.append(os.path.join(workspace_path, log_name)) + + s3_helper = S3Helper() + report_url = GITHUB_RUN_URL + + status = "success" + test_results = [] + # Try to get status message saved by the SQLancer + try: + # with open( + # os.path.join(workspace_path, "status.txt"), "r", encoding="utf-8" + # ) as status_f: + # status = status_f.readline().rstrip("\n") + if os.path.exists(os.path.join(workspace_path, "server_crashed.log")): + test_results.append("Server crashed", "FAIL") + with open( + os.path.join(workspace_path, "summary.tsv"), "r", encoding="utf-8" + ) as summary_f: + for line in summary_f: + l = line.split("\t") + test_results.append((l[0], l[1])) + + with open( + os.path.join(workspace_path, "description.txt"), "r", encoding="utf-8" + ) as desc_f: + description = desc_f.readline().rstrip("\n")[:140] + except: + # status = "failure" + description = "Task failed: $?=" + str(retcode) + + report_url = upload_results( + s3_helper, + pr_info.number, + pr_info.sha, + test_results, + paths, + check_name, + False, + ) + + post_commit_status(gh, pr_info.sha, check_name, description, status, report_url) + + print(f"::notice:: {check_name} Report url: {report_url}") + + ch_helper = ClickHouseHelper() + + prepared_events = prepare_tests_results_for_clickhouse( + pr_info, + test_results, + status, + stopwatch.duration_seconds, + stopwatch.start_time_str, + report_url, + check_name, + ) + + ch_helper.insert_events_into(db="default", table="checks", events=prepared_events) + + print(f"::notice Result: '{status}', '{description}', '{report_url}'") + post_commit_status(gh, pr_info.sha, check_name, description, status, report_url) diff --git a/tests/ci/termination_lambda/requirements.txt b/tests/ci/termination_lambda/requirements.txt index c0dcf4a4dde..e607f1a9f39 100644 --- a/tests/ci/termination_lambda/requirements.txt +++ b/tests/ci/termination_lambda/requirements.txt @@ -1,3 +1,3 @@ requests PyJWT -cryptography +cryptography==37.0.4 diff --git a/tests/ci/token_lambda/requirements.txt b/tests/ci/token_lambda/requirements.txt index c0dcf4a4dde..e607f1a9f39 100644 --- a/tests/ci/token_lambda/requirements.txt +++ b/tests/ci/token_lambda/requirements.txt @@ -1,3 +1,3 @@ requests PyJWT -cryptography +cryptography==37.0.4 diff --git a/tests/ci/upload_result_helper.py b/tests/ci/upload_result_helper.py index 0fde4408176..e145df02f80 100644 --- a/tests/ci/upload_result_helper.py +++ b/tests/ci/upload_result_helper.py @@ -14,6 +14,8 @@ from report import ReportColorTheme, create_test_html_report def process_logs( s3_client, additional_logs, s3_path_prefix, test_results, with_raw_logs ): + logging.info("Upload files to s3 %s", additional_logs) + processed_logs = {} # Firstly convert paths of logs from test_results to urls to s3. for test_result in test_results: diff --git a/tests/ci/version_helper.py b/tests/ci/version_helper.py index 966858c0747..162bab6a50a 100755 --- a/tests/ci/version_helper.py +++ b/tests/ci/version_helper.py @@ -344,7 +344,7 @@ def main(): update_contributors() return - version = get_version_from_repo(args.version_path) + version = get_version_from_repo(args.version_path, Git(True)) if args.update: version = version.update(args.update) diff --git a/tests/ci/workflow_approve_rerun_lambda/app.py b/tests/ci/workflow_approve_rerun_lambda/app.py index 39bd9cfb283..f2b785840d8 100644 --- a/tests/ci/workflow_approve_rerun_lambda/app.py +++ b/tests/ci/workflow_approve_rerun_lambda/app.py @@ -61,11 +61,11 @@ TRUSTED_WORKFLOW_IDS = { NEED_RERUN_WORKFLOWS = { "BackportPR", - "Docs", - "DocsRelease", + "DocsCheck", + "DocsReleaseChecks", "MasterCI", "PullRequestCI", - "ReleaseCI", + "ReleaseBranchCI", } # Individual trusted contirbutors who are not in any trusted organization. diff --git a/tests/ci/workflow_approve_rerun_lambda/requirements.txt b/tests/ci/workflow_approve_rerun_lambda/requirements.txt index c0dcf4a4dde..e607f1a9f39 100644 --- a/tests/ci/workflow_approve_rerun_lambda/requirements.txt +++ b/tests/ci/workflow_approve_rerun_lambda/requirements.txt @@ -1,3 +1,3 @@ requests PyJWT -cryptography +cryptography==37.0.4 diff --git a/tests/clickhouse-test b/tests/clickhouse-test index 12f85a5adbf..20e63412d91 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -987,7 +987,7 @@ class TestCase: and (proc.stderr is None) and (proc.stdout is None or "Exception" not in proc.stdout) ) - need_drop_database = not maybe_passed + need_drop_database = maybe_passed debug_log = "" if os.path.exists(self.testcase_args.debug_log_file): @@ -2055,7 +2055,7 @@ if __name__ == "__main__": parser.add_argument( "--no-drop-if-fail", action="store_true", - help="Do not drop database for test if test has failed", + help="Do not drop database for test if test has failed (does not work if reference file mismatch)", ) parser.add_argument( "--hide-db-name", diff --git a/tests/config/config.d/storage_conf.xml b/tests/config/config.d/storage_conf.xml index a2a7f5cc750..8226d801cef 100644 --- a/tests/config/config.d/storage_conf.xml +++ b/tests/config/config.d/storage_conf.xml @@ -93,6 +93,15 @@ 22548578304 0 + + cache + s3_disk_6 + s3_cache_6/ + 22548578304 + 0 + 1 + 100 + cache s3_disk_6 @@ -183,6 +192,13 @@ + + +
+ s3_cache_6 +
+
+
diff --git a/tests/fuzz/all.dict b/tests/fuzz/all.dict index a147878da9b..7977cb9ed21 100644 --- a/tests/fuzz/all.dict +++ b/tests/fuzz/all.dict @@ -447,6 +447,7 @@ "FORMAT" "formatDateTime" "formatReadableQuantity" +"formatReadableDecimalSize" "formatReadableSize" "formatReadableTimeDelta" "formatRow" diff --git a/tests/fuzz/dictionaries/functions.dict b/tests/fuzz/dictionaries/functions.dict index b90697f0c3d..e4f347babf8 100644 --- a/tests/fuzz/dictionaries/functions.dict +++ b/tests/fuzz/dictionaries/functions.dict @@ -399,6 +399,7 @@ "demangle" "toNullable" "concat" +"formatReadableDecimalSize" "formatReadableSize" "shardCount" "fromModifiedJulianDayOrNull" diff --git a/tests/integration/helpers/client.py b/tests/integration/helpers/client.py index a4407d5b442..ab1cc65e9a9 100644 --- a/tests/integration/helpers/client.py +++ b/tests/integration/helpers/client.py @@ -8,7 +8,14 @@ DEFAULT_QUERY_TIMEOUT = 600 class Client: - def __init__(self, host, port=9000, command="/usr/bin/clickhouse-client"): + def __init__( + self, + host, + port=9000, + command="/usr/bin/clickhouse-client", + secure=False, + config=None, + ): self.host = host self.port = port self.command = [command] @@ -16,6 +23,11 @@ class Client: if os.path.basename(command) == "clickhouse": self.command.append("client") + if secure: + self.command.append("--secure") + if config is not None: + self.command += ["--config-file", config] + self.command += ["--host", self.host, "--port", str(self.port), "--stacktrace"] def stacktraces_on_timeout_decorator(func): diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py index c987ca292c1..666833013c8 100644 --- a/tests/integration/helpers/cluster.py +++ b/tests/integration/helpers/cluster.py @@ -2678,7 +2678,9 @@ class ClickHouseCluster: # Check server logs for Fatal messages and sanitizer failures. # NOTE: we cannot do this via docker since in case of Fatal message container may already die. for name, instance in self.instances.items(): - if instance.contains_in_log(SANITIZER_SIGN, from_host=True): + if instance.contains_in_log( + SANITIZER_SIGN, from_host=True, filename="stderr.log" + ): sanitizer_assert_instance = instance.grep_in_log( SANITIZER_SIGN, from_host=True, filename="stderr.log" ) diff --git a/tests/integration/test_attach_backup_from_s3_plain/__init__.py b/tests/integration/test_attach_backup_from_s3_plain/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_attach_backup_from_s3_plain/configs/disk_s3.xml b/tests/integration/test_attach_backup_from_s3_plain/configs/disk_s3.xml new file mode 100644 index 00000000000..67278694d39 --- /dev/null +++ b/tests/integration/test_attach_backup_from_s3_plain/configs/disk_s3.xml @@ -0,0 +1,34 @@ + + + + + + s3_plain + http://minio1:9001/root/data/disks/disk_s3_plain/ + minio + minio123 + 33554432 + + + s3_plain + + http://minio1:9001/root/data/disks/disk_s3_plain/backup/ + minio + minio123 + 33554432 + + + + + +
+ attach_disk_s3_plain +
+
+
+
+
+ + backup_disk_s3_plain + +
diff --git a/tests/integration/test_attach_backup_from_s3_plain/test.py b/tests/integration/test_attach_backup_from_s3_plain/test.py new file mode 100644 index 00000000000..35d53d5b8bd --- /dev/null +++ b/tests/integration/test_attach_backup_from_s3_plain/test.py @@ -0,0 +1,40 @@ +# pylint: disable=global-statement +# pylint: disable=line-too-long + +import pytest +from helpers.cluster import ClickHouseCluster + +cluster = ClickHouseCluster(__file__) +node = cluster.add_instance( + "node", + main_configs=["configs/disk_s3.xml"], + with_minio=True, +) + + +@pytest.fixture(scope="module", autouse=True) +def start_cluster(): + try: + cluster.start() + yield + finally: + cluster.shutdown() + + +def test_attach_backup(): + node.query( + f""" + -- BACKUP writes Ordinary like structure + set allow_deprecated_database_ordinary=1; + create database ordinary engine=Ordinary; + + create table ordinary.test_backup_attach engine=MergeTree() order by tuple() as select * from numbers(100); + -- NOTE: name of backup ("backup") is significant. + backup table ordinary.test_backup_attach TO Disk('backup_disk_s3_plain', 'backup'); + + drop table ordinary.test_backup_attach; + attach table ordinary.test_backup_attach (number UInt64) engine=MergeTree() order by tuple() settings storage_policy='attach_policy_s3_plain'; + """ + ) + + assert int(node.query("select count() from ordinary.test_backup_attach")) == 100 diff --git a/tests/integration/test_backup_restore_s3/__init__.py b/tests/integration/test_backup_restore_s3/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_backup_restore_s3/configs/disk_s3.xml b/tests/integration/test_backup_restore_s3/configs/disk_s3.xml new file mode 100644 index 00000000000..c1fd059bc67 --- /dev/null +++ b/tests/integration/test_backup_restore_s3/configs/disk_s3.xml @@ -0,0 +1,47 @@ + + + + + + s3 + http://minio1:9001/root/data/disks/disk_s3/ + minio + minio123 + + + s3 + http://minio1:9001/root2/data/disks/disk_s3/ + minio + minio123 + + + s3_plain + http://minio1:9001/root/data/disks/disk_s3_plain/ + minio + minio123 + 33554432 + + + + + +
+ disk_s3 +
+
+
+ + +
+ disk_s3_other_bucket +
+
+
+
+
+ + default + disk_s3 + disk_s3_plain + +
diff --git a/tests/integration/test_backup_restore_s3/configs/named_collection_s3_backups.xml b/tests/integration/test_backup_restore_s3/configs/named_collection_s3_backups.xml new file mode 100644 index 00000000000..7a9d5effede --- /dev/null +++ b/tests/integration/test_backup_restore_s3/configs/named_collection_s3_backups.xml @@ -0,0 +1,9 @@ + + + + http://minio1:9001/root/data/backups + minio + minio123 + + + \ No newline at end of file diff --git a/tests/integration/test_backup_restore_s3/configs/s3_settings.xml b/tests/integration/test_backup_restore_s3/configs/s3_settings.xml new file mode 100644 index 00000000000..2aef4db55c8 --- /dev/null +++ b/tests/integration/test_backup_restore_s3/configs/s3_settings.xml @@ -0,0 +1,12 @@ + + + + http://minio1:9001/root/data/backups/multipart_upload_copy/ + + 1 + 5242880 + 3 + 2 + + + diff --git a/tests/integration/test_backup_restore_s3/test.py b/tests/integration/test_backup_restore_s3/test.py new file mode 100644 index 00000000000..7ddb1459ab9 --- /dev/null +++ b/tests/integration/test_backup_restore_s3/test.py @@ -0,0 +1,133 @@ +import pytest +from helpers.cluster import ClickHouseCluster + +cluster = ClickHouseCluster(__file__) +node = cluster.add_instance( + "node", + main_configs=[ + "configs/disk_s3.xml", + "configs/named_collection_s3_backups.xml", + "configs/s3_settings.xml", + ], + with_minio=True, +) + + +@pytest.fixture(scope="module", autouse=True) +def start_cluster(): + try: + cluster.start() + yield + finally: + cluster.shutdown() + + +backup_id_counter = 0 + + +def new_backup_name(): + global backup_id_counter + backup_id_counter += 1 + return f"backup{backup_id_counter}" + + +def check_backup_and_restore(storage_policy, backup_destination, size=1000): + node.query( + f""" + DROP TABLE IF EXISTS data NO DELAY; + CREATE TABLE data (key Int, value String, array Array(String)) Engine=MergeTree() ORDER BY tuple() SETTINGS storage_policy='{storage_policy}'; + INSERT INTO data SELECT * FROM generateRandom('key Int, value String, array Array(String)') LIMIT {size}; + BACKUP TABLE data TO {backup_destination}; + RESTORE TABLE data AS data_restored FROM {backup_destination}; + SELECT throwIf( + (SELECT count(), sum(sipHash64(*)) FROM data) != + (SELECT count(), sum(sipHash64(*)) FROM data_restored), + 'Data does not matched after BACKUP/RESTORE' + ); + DROP TABLE data NO DELAY; + DROP TABLE data_restored NO DELAY; + """ + ) + + +@pytest.mark.parametrize( + "storage_policy, to_disk", + [ + pytest.param( + "default", + "default", + id="from_local_to_local", + ), + pytest.param( + "policy_s3", + "default", + id="from_s3_to_local", + ), + pytest.param( + "default", + "disk_s3", + id="from_local_to_s3", + ), + pytest.param( + "policy_s3", + "disk_s3_plain", + id="from_s3_to_s3_plain", + ), + pytest.param( + "default", + "disk_s3_plain", + id="from_local_to_s3_plain", + ), + ], +) +def test_backup_to_disk(storage_policy, to_disk): + backup_name = new_backup_name() + backup_destination = f"Disk('{to_disk}', '{backup_name}')" + check_backup_and_restore(storage_policy, backup_destination) + + +def test_backup_to_s3(): + storage_policy = "default" + backup_name = new_backup_name() + backup_destination = ( + f"S3('http://minio1:9001/root/data/backups/{backup_name}', 'minio', 'minio123')" + ) + check_backup_and_restore(storage_policy, backup_destination) + + +def test_backup_to_s3_named_collection(): + storage_policy = "default" + backup_name = new_backup_name() + backup_destination = f"S3(named_collection_s3_backups, '{backup_name}')" + check_backup_and_restore(storage_policy, backup_destination) + + +def test_backup_to_s3_native_copy(): + storage_policy = "policy_s3" + backup_name = new_backup_name() + backup_destination = ( + f"S3('http://minio1:9001/root/data/backups/{backup_name}', 'minio', 'minio123')" + ) + check_backup_and_restore(storage_policy, backup_destination) + assert node.contains_in_log("using native copy") + assert node.contains_in_log("single-operation copy") + + +def test_backup_to_s3_native_copy_other_bucket(): + storage_policy = "policy_s3_other_bucket" + backup_name = new_backup_name() + backup_destination = ( + f"S3('http://minio1:9001/root/data/backups/{backup_name}', 'minio', 'minio123')" + ) + check_backup_and_restore(storage_policy, backup_destination) + assert node.contains_in_log("using native copy") + assert node.contains_in_log("single-operation copy") + + +def test_backup_to_s3_native_copy_multipart_upload(): + storage_policy = "policy_s3" + backup_name = new_backup_name() + backup_destination = f"S3('http://minio1:9001/root/data/backups/multipart_upload_copy/{backup_name}', 'minio', 'minio123')" + check_backup_and_restore(storage_policy, backup_destination, size=1000000) + assert node.contains_in_log("using native copy") + assert node.contains_in_log("multipart upload copy") diff --git a/tests/integration/test_backward_compatibility/test_functions.py b/tests/integration/test_backward_compatibility/test_functions.py new file mode 100644 index 00000000000..fe1c0ea7108 --- /dev/null +++ b/tests/integration/test_backward_compatibility/test_functions.py @@ -0,0 +1,224 @@ +# pylint: disable=unused-argument +# pylint: disable=line-too-long +# pylint: disable=call-var-from-loop +# pylint: disable=redefined-outer-name + +import logging +import pytest +from helpers.cluster import ClickHouseCluster +from helpers.client import QueryRuntimeException + +cluster = ClickHouseCluster(__file__) +upstream = cluster.add_instance("upstream") +backward = cluster.add_instance( + "backward", + image="clickhouse/clickhouse-server", + tag="22.9", + with_installed_binary=True, +) + + +@pytest.fixture(scope="module") +def start_cluster(): + try: + cluster.start() + yield cluster + + finally: + cluster.shutdown() + + +def test_aggregate_states(start_cluster): + """ + This test goes though all aggregate functions that: + - has only one argument + - support string as an argument + + And do a simple check by creating the aggregate state with one string. + + Yes this is not covers everything (does not cover functions with + different number of arguments, types, different states in case of + multiple values - uniqCombined, but as for uniqCombined it will be + checked via uniqHLL12), but at least something. + + And about type, String had been selected, since it more likely that + there will be used some hash function internally. + """ + + aggregate_functions = backward.query( + """ + SELECT if(NOT empty(alias_to), alias_to, name) + FROM system.functions + WHERE is_aggregate = 1 + """ + ) + aggregate_functions = aggregate_functions.strip().split("\n") + aggregate_functions = map(lambda x: x.strip(), aggregate_functions) + + aggregate_functions = list(aggregate_functions) + logging.info("Got %s aggregate functions", len(aggregate_functions)) + + skipped = 0 + failed = 0 + passed = 0 + + def get_aggregate_state_hex(node, function_name): + return node.query( + f"select hex(initializeAggregation('{function_name}State', 'foo'))" + ).strip() + + for aggregate_function in aggregate_functions: + logging.info("Checking %s", aggregate_function) + + try: + backward_state = get_aggregate_state_hex(backward, aggregate_function) + except QueryRuntimeException as e: + error_message = str(e) + allowed_errors = [ + "NUMBER_OF_ARGUMENTS_DOESNT_MATCH", + "ILLEGAL_TYPE_OF_ARGUMENT", + # sequenceNextNode() and friends + "UNKNOWN_AGGREGATE_FUNCTION", + # Function X takes exactly one parameter: + # The function 'X' can only be used as a window function + "BAD_ARGUMENTS", + # aggThrow + "AGGREGATE_FUNCTION_THROW", + ] + if any(map(lambda x: x in error_message, allowed_errors)): + logging.info("Skipping %s", aggregate_function) + skipped += 1 + continue + logging.exception("Failed %s", function) + failed += 1 + continue + + upstream_state = get_aggregate_state_hex(upstream, aggregate_function) + if upstream_state != backward_state: + logging.info( + "Failed %s, %s (backward) != %s (upstream)", + aggregate_function, + backward_state, + upstream_state, + ) + failed += 1 + else: + logging.info("OK %s", aggregate_function) + passed += 1 + + logging.info( + "Aggregate functions: %s, Failed: %s, skipped: %s, passed: %s", + len(aggregate_functions), + failed, + skipped, + passed, + ) + assert failed == 0 + assert passed > 0 + assert failed + passed + skipped == len(aggregate_functions) + + +def test_string_functions(start_cluster): + functions = backward.query( + """ + SELECT if(NOT empty(alias_to), alias_to, name) + FROM system.functions + WHERE is_aggregate = 0 + """ + ) + functions = functions.strip().split("\n") + functions = map(lambda x: x.strip(), functions) + + excludes = [ + "rand", + "rand64", + "randConstant", + "generateUUIDv4", + # Syntax error otherwise + "position", + "substring", + "CAST", + # NOTE: no need to ignore now()/now64() since they will fail because they don't accept any argument + ] + functions = filter(lambda x: x not in excludes, functions) + + functions = list(functions) + logging.info("Got %s functions", len(functions)) + + skipped = 0 + failed = 0 + passed = 0 + + def get_function_value(node, function_name, value="foo"): + return node.query(f"select {function_name}('{value}')").strip() + + for function in functions: + logging.info("Checking %s", function) + + try: + backward_value = get_function_value(backward, function) + except QueryRuntimeException as e: + error_message = str(e) + allowed_errors = [ + # Messages + "Cannot load time zone ", + "No macro ", + "Should start with ", # POINT/POLYGON/... + "Cannot read input: expected a digit but got something else:", + # ErrorCodes + "NUMBER_OF_ARGUMENTS_DOESNT_MATCH", + "ILLEGAL_TYPE_OF_ARGUMENT", + "TOO_FEW_ARGUMENTS_FOR_FUNCTION", + "DICTIONARIES_WAS_NOT_LOADED", + "CANNOT_PARSE_UUID", + "CANNOT_PARSE_DOMAIN_VALUE_FROM_STRING", + "ILLEGAL_COLUMN", + "TYPE_MISMATCH", + "SUPPORT_IS_DISABLED", + "CANNOT_PARSE_DATE", + "UNKNOWN_SETTING", + "CANNOT_PARSE_BOOL", + "FILE_DOESNT_EXIST", + "NOT_IMPLEMENTED", + "BAD_GET", + "UNKNOWN_TYPE", + # addressToSymbol + "FUNCTION_NOT_ALLOWED", + # Date functions + "CANNOT_PARSE_TEXT", + "CANNOT_PARSE_DATETIME", + # Function X takes exactly one parameter: + # The function 'X' can only be used as a window function + "BAD_ARGUMENTS", + ] + if any(map(lambda x: x in error_message, allowed_errors)): + logging.info("Skipping %s", function) + skipped += 1 + continue + logging.exception("Failed %s", function) + failed += 1 + continue + + upstream_value = get_function_value(upstream, function) + if upstream_value != backward_value: + logging.info( + "Failed %s, %s (backward) != %s (upstream)", + function, + backward_value, + upstream_value, + ) + failed += 1 + else: + logging.info("OK %s", function) + passed += 1 + + logging.info( + "Functions: %s, failed: %s, skipped: %s, passed: %s", + len(functions), + failed, + skipped, + passed, + ) + assert failed == 0 + assert passed > 0 + assert failed + passed + skipped == len(functions) diff --git a/tests/integration/test_composable_protocols/__init__.py b/tests/integration/test_composable_protocols/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_composable_protocols/configs/client.xml b/tests/integration/test_composable_protocols/configs/client.xml new file mode 100644 index 00000000000..15d83a7b1ab --- /dev/null +++ b/tests/integration/test_composable_protocols/configs/client.xml @@ -0,0 +1,10 @@ + + + + none + + AcceptCertificateHandler + + + + diff --git a/tests/integration/test_composable_protocols/configs/config.xml b/tests/integration/test_composable_protocols/configs/config.xml new file mode 100644 index 00000000000..35673c3e7e5 --- /dev/null +++ b/tests/integration/test_composable_protocols/configs/config.xml @@ -0,0 +1,63 @@ + + + + + + /etc/clickhouse-server/config.d/server.crt + /etc/clickhouse-server/config.d/server.key + none + true + true + sslv2,sslv3 + true + + + + 0.0.0.0 + + + + tcp + 0.0.0.0 + 9000 + native protocol (tcp) + + + tls + tcp + 9440 + secure native protocol (tcp_secure) + + + tcp + 0.0.0.0 + 9001 + native protocol endpoint (tcp) + + + proxy1 + tcp + 9100 + native protocol with PROXYv1 (tcp_proxy) + + + http + 8123 + http protocol + + + tls + http + 0.0.0.0 + 8443 + https protocol + + + https + 8444 + https protocol endpoint + + + + + diff --git a/tests/integration/test_composable_protocols/configs/server.crt b/tests/integration/test_composable_protocols/configs/server.crt new file mode 100644 index 00000000000..6f4deca038f --- /dev/null +++ b/tests/integration/test_composable_protocols/configs/server.crt @@ -0,0 +1,18 @@ +-----BEGIN CERTIFICATE----- +MIIC+zCCAeOgAwIBAgIJAIhI9ozZJ+TWMA0GCSqGSIb3DQEBCwUAMBQxEjAQBgNV +BAMMCWxvY2FsaG9zdDAeFw0xOTA0MjIwNDMyNTJaFw0yMDA0MjEwNDMyNTJaMBQx +EjAQBgNVBAMMCWxvY2FsaG9zdDCCASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoC +ggEBAK+wVUEdqF2uXvN0MJBgnAHyXi6JTi4p/F6igsrCjSNjJWzHH0vQmK8ujfcF +CkifW88i+W5eHctuEtQqNHK+t9x9YiZtXrj6m/XkOXs20mYgENSmbbbHbriTPnZB +zZrq6UqMlwIHNNAa+I3NMORQxVRaI0ybXnGVO5elr70xHpk03xL0JWKHpEqYp4db +2aBQgF6y3Ww4khxjIYqpUYXWXGFnVIRU7FKVEAM1xyKqvQzXjQ5sVM/wyHknveEF +3b/X4ggN+KNl5KOc0cWDh1/XaatJAPaUUPqZcq76tynLbP64Xm3dxHcj+gtRkO67 +ef6MSg6l63m3XQP6Qb+MIkd06OsCAwEAAaNQME4wHQYDVR0OBBYEFDmODTO8QLDN +ykR3x0LIOnjNhrKhMB8GA1UdIwQYMBaAFDmODTO8QLDNykR3x0LIOnjNhrKhMAwG +A1UdEwQFMAMBAf8wDQYJKoZIhvcNAQELBQADggEBAAwaiJc7uqEpnH3aukbftDwX +m8GfEnj1HVdgg+9GGNq+9rvUYBF6gdPmjRCX9dO0cclLFx8jc2org0rTSq9WoOhX +E6qL4Eqrmc5SE3Y9jZM0h6GRD4oXK014FmtZ3T6ddZU3dQLj3BS2r1XrvmubTvGN +ZuTJNY8nx8Hh6H5XINmsEjUF9E5hog+PwCE03xt2adIdYL+gsbxASeNYyeUFpZv5 +zcXR3VoakBWnAaOVgCHq2qh96QAnL7ZKzFkGf/MdwV10KU3dmb+ICbQUUdf9Gc17 +aaDCIRws312F433FdXBkGs2UkB7ZZme9dfn6O1QbeTNvex2VLMqYx/CTkfFbOQA= +-----END CERTIFICATE----- diff --git a/tests/integration/test_composable_protocols/configs/server.key b/tests/integration/test_composable_protocols/configs/server.key new file mode 100644 index 00000000000..6eddb3295db --- /dev/null +++ b/tests/integration/test_composable_protocols/configs/server.key @@ -0,0 +1,28 @@ +-----BEGIN PRIVATE KEY----- +MIIEvQIBADANBgkqhkiG9w0BAQEFAASCBKcwggSjAgEAAoIBAQCvsFVBHahdrl7z +dDCQYJwB8l4uiU4uKfxeooLKwo0jYyVsxx9L0JivLo33BQpIn1vPIvluXh3LbhLU +KjRyvrfcfWImbV64+pv15Dl7NtJmIBDUpm22x264kz52Qc2a6ulKjJcCBzTQGviN +zTDkUMVUWiNMm15xlTuXpa+9MR6ZNN8S9CVih6RKmKeHW9mgUIBest1sOJIcYyGK +qVGF1lxhZ1SEVOxSlRADNcciqr0M140ObFTP8Mh5J73hBd2/1+IIDfijZeSjnNHF +g4df12mrSQD2lFD6mXKu+rcpy2z+uF5t3cR3I/oLUZDuu3n+jEoOpet5t10D+kG/ +jCJHdOjrAgMBAAECggEARF66zrxb6RkSmmt8+rKeA6PuQu3sHsr4C1vyyjUr97l9 +tvdGlpp20LWtSZQMjHZ3pARYTTsTHTeY3DgQcRcHNicVKx8k3ZepWeeW9vw+pL+V +zSt3RsoVrH6gsCSrfr4sS3aqzX9AbjwQvh48CJ3mLQ1m70kHV+xbZIh1+4pB/hyP +1wKyUE18ZkOptXvO/TtoHzLQCecpkXtWzmry1Eh2isvXA+NMrAtLibGsyM1mtm7i +5ozevzHabvvCDBEe+KgZdONgVhhhvm2eOd+/s4w3rw4ETud4fI/ZAJyWXhiIKFnA +VJbElWruSAoVBW7p2bsF5PbmVzvo8vXL+VylxYD+AQKBgQDhLoRKTVhNkn/QjKxq +sdOh+QZra0LzjVpAmkQzu7wZMSHEz9qePQciDQQrYKrmRF1vNcIRCVUTqWYheJ/1 +lKRrCGa0ab6k96zkWMqLHD5u+UeJV7r1dJIx08ME9kNJ+x/XtB8klRIji16NiQUS +qc6p8z0M2AnbJzsRfWZRH8FeYwKBgQDHu8dzdtVGI7MtxfPOE/bfajiopDg8BdTC +pdug2T8XofRHRq7Q+0vYjTAZFT/slib91Pk6VvvPdo9VBZiL4omv4dAq6mOOdX/c +U14mJe1X5GCrr8ExZ8BfNJ3t/6sV1fcxyJwAw7iBguqxA2JqdM/wFk10K8XqvzVn +CD6O9yGt2QKBgFX1BMi8N538809vs41S7l9hCQNOQZNo/O+2M5yv6ECRkbtoQKKw +1x03bMUGNJaLuELweXE5Z8GGo5bZTe5X3F+DKHlr+DtO1C+ieUaa9HY2MAmMdLCn +2/qrREGLo+oEs4YKmuzC/taUp/ZNPKOAMISNdluFyFVg51pozPrgrVbTAoGBAKkE +LBl3O67o0t0vH8sJdeVFG8EJhlS0koBMnfgVHqC++dm+5HwPyvTrNQJkyv1HaqNt +r6FArkG3ED9gRuBIyT6+lctbIPgSUip9mbQqcBfqOCvQxGksZMur2ODncz09HLtS +CUFUXjOqNzOnq4ZuZu/Bz7U4vXiSaXxQq6+LTUKxAoGAFZU/qrI06XxnrE9A1X0W +l7DSkpZaDcu11NrZ473yONih/xOZNh4SSBpX8a7F6Pmh9BdtGqphML8NFPvQKcfP +b9H2iid2tc292uyrUEb5uTMmv61zoTwtitqLzO0+tS6PT3fXobX+eyeEWKzPBljL +HFtxG5CCXpkdnWRmaJnhTzA= +-----END PRIVATE KEY----- diff --git a/tests/integration/test_composable_protocols/configs/users.xml b/tests/integration/test_composable_protocols/configs/users.xml new file mode 100644 index 00000000000..da8425b3695 --- /dev/null +++ b/tests/integration/test_composable_protocols/configs/users.xml @@ -0,0 +1,16 @@ + + + + + + + + + + + ::/0 + + default + + + diff --git a/tests/integration/test_composable_protocols/test.py b/tests/integration/test_composable_protocols/test.py new file mode 100644 index 00000000000..d861af929c3 --- /dev/null +++ b/tests/integration/test_composable_protocols/test.py @@ -0,0 +1,94 @@ +import ssl +import pytest +import os.path as p +import os +from helpers.cluster import ClickHouseCluster +from helpers.client import Client +import urllib.request, urllib.parse +import subprocess +import socket + +SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) + +cluster = ClickHouseCluster(__file__) +server = cluster.add_instance( + "server", + base_config_dir="configs", + main_configs=["configs/server.crt", "configs/server.key"], +) + + +@pytest.fixture(scope="module", autouse=True) +def setup_nodes(): + try: + cluster.start() + yield cluster + finally: + cluster.shutdown() + + +def execute_query_https(host, port, query): + url = f"https://{host}:{port}/?query={urllib.parse.quote(query)}" + + ctx = ssl.create_default_context() + ctx.check_hostname = False + ctx.verify_mode = ssl.CERT_NONE + + request = urllib.request.Request(url) + response = urllib.request.urlopen(request, context=ctx).read() + return response.decode("utf-8") + + +def execute_query_http(host, port, query): + url = f"http://{host}:{port}/?query={urllib.parse.quote(query)}" + + request = urllib.request.Request(url) + response = urllib.request.urlopen(request).read() + return response.decode("utf-8") + + +def netcat(hostname, port, content): + s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + s.connect((hostname, port)) + s.sendall(content) + s.shutdown(socket.SHUT_WR) + data = [] + while 1: + d = s.recv(1024) + if len(d) == 0: + break + data.append(d) + s.close() + return b"".join(data) + + +def test_connections(): + + client = Client(server.ip_address, 9000, command=cluster.client_bin_path) + assert client.query("SELECT 1") == "1\n" + + client = Client( + server.ip_address, + 9440, + command=cluster.client_bin_path, + secure=True, + config=f"{SCRIPT_DIR}/configs/client.xml", + ) + assert client.query("SELECT 1") == "1\n" + + client = Client(server.ip_address, 9001, command=cluster.client_bin_path) + assert client.query("SELECT 1") == "1\n" + + assert execute_query_http(server.ip_address, 8123, "SELECT 1") == "1\n" + + assert execute_query_https(server.ip_address, 8443, "SELECT 1") == "1\n" + + assert execute_query_https(server.ip_address, 8444, "SELECT 1") == "1\n" + + data = "PROXY TCP4 255.255.255.255 255.255.255.255 65535 65535\r\n\0\021ClickHouse client\024\r\253\251\003\0\007default\0\004\001\0\001\0\0\t0.0.0.0:0\001\tmilovidov\021milovidov-desktop\vClickHouse \024\r\253\251\003\0\001\0\0\0\002\001\025SELECT 'Hello, world'\002\0\247\203\254l\325\\z|\265\254F\275\333\206\342\024\202\024\0\0\0\n\0\0\0\240\01\0\02\377\377\377\377\0\0\0" + assert ( + netcat(server.ip_address, 9100, bytearray(data, "latin-1")).find( + bytearray("Hello, world", "latin-1") + ) + >= 0 + ) diff --git a/tests/integration/test_disk_over_web_server/test.py b/tests/integration/test_disk_over_web_server/test.py index ea6e407a18f..2ccc17db4f4 100644 --- a/tests/integration/test_disk_over_web_server/test.py +++ b/tests/integration/test_disk_over_web_server/test.py @@ -129,6 +129,9 @@ def test_incorrect_usage(cluster): result = node2.query_and_get_error("TRUNCATE TABLE test0") assert "Table is read-only" in result + result = node2.query_and_get_error("OPTIMIZE TABLE test0 FINAL") + assert "Only read-only operations are supported" in result + node2.query("DROP TABLE test0 SYNC") diff --git a/tests/integration/test_disks_app_func/test.py b/tests/integration/test_disks_app_func/test.py index d87f387e122..de9b23abd5e 100644 --- a/tests/integration/test_disks_app_func/test.py +++ b/tests/integration/test_disks_app_func/test.py @@ -37,7 +37,7 @@ def test_disks_app_func_ld(started_cluster): source = cluster.instances["disks_app_test"] out = source.exec_in_container( - ["/usr/bin/clickhouse", "disks", "--send-logs", "list-disks"] + ["/usr/bin/clickhouse", "disks", "--save-logs", "list-disks"] ) disks = out.split("\n") @@ -51,7 +51,7 @@ def test_disks_app_func_ls(started_cluster): init_data(source) out = source.exec_in_container( - ["/usr/bin/clickhouse", "disks", "--send-logs", "--disk", "test1", "list", "."] + ["/usr/bin/clickhouse", "disks", "--save-logs", "--disk", "test1", "list", "."] ) files = out.split("\n") @@ -62,7 +62,7 @@ def test_disks_app_func_ls(started_cluster): [ "/usr/bin/clickhouse", "disks", - "--send-logs", + "--save-logs", "--disk", "test1", "list", @@ -89,7 +89,7 @@ def test_disks_app_func_cp(started_cluster): [ "/usr/bin/clickhouse", "disks", - "--send-logs", + "--save-logs", "--disk", "test1", "write", @@ -114,7 +114,7 @@ def test_disks_app_func_cp(started_cluster): ) out = source.exec_in_container( - ["/usr/bin/clickhouse", "disks", "--send-logs", "--disk", "test2", "list", "."] + ["/usr/bin/clickhouse", "disks", "--save-logs", "--disk", "test2", "list", "."] ) assert "path1" in out @@ -123,7 +123,7 @@ def test_disks_app_func_cp(started_cluster): [ "/usr/bin/clickhouse", "disks", - "--send-logs", + "--save-logs", "--disk", "test2", "remove", @@ -135,7 +135,7 @@ def test_disks_app_func_cp(started_cluster): [ "/usr/bin/clickhouse", "disks", - "--send-logs", + "--save-logs", "--disk", "test1", "remove", @@ -146,13 +146,13 @@ def test_disks_app_func_cp(started_cluster): # alesapin: Why we need list one more time? # kssenii: it is an assertion that the file is indeed deleted out = source.exec_in_container( - ["/usr/bin/clickhouse", "disks", "--send-logs", "--disk", "test2", "list", "."] + ["/usr/bin/clickhouse", "disks", "--save-logs", "--disk", "test2", "list", "."] ) assert "path1" not in out out = source.exec_in_container( - ["/usr/bin/clickhouse", "disks", "--send-logs", "--disk", "test1", "list", "."] + ["/usr/bin/clickhouse", "disks", "--save-logs", "--disk", "test1", "list", "."] ) assert "path1" not in out @@ -174,7 +174,7 @@ def test_disks_app_func_ln(started_cluster): ) out = source.exec_in_container( - ["/usr/bin/clickhouse", "disks", "--send-logs", "list", "data/default/"] + ["/usr/bin/clickhouse", "disks", "--save-logs", "list", "data/default/"] ) files = out.split("\n") @@ -196,7 +196,7 @@ def test_disks_app_func_rm(started_cluster): [ "/usr/bin/clickhouse", "disks", - "--send-logs", + "--save-logs", "--disk", "test2", "write", @@ -207,7 +207,7 @@ def test_disks_app_func_rm(started_cluster): ) out = source.exec_in_container( - ["/usr/bin/clickhouse", "disks", "--send-logs", "--disk", "test2", "list", "."] + ["/usr/bin/clickhouse", "disks", "--save-logs", "--disk", "test2", "list", "."] ) assert "path3" in out @@ -216,7 +216,7 @@ def test_disks_app_func_rm(started_cluster): [ "/usr/bin/clickhouse", "disks", - "--send-logs", + "--save-logs", "--disk", "test2", "remove", @@ -225,7 +225,7 @@ def test_disks_app_func_rm(started_cluster): ) out = source.exec_in_container( - ["/usr/bin/clickhouse", "disks", "--send-logs", "--disk", "test2", "list", "."] + ["/usr/bin/clickhouse", "disks", "--save-logs", "--disk", "test2", "list", "."] ) assert "path3" not in out @@ -237,7 +237,7 @@ def test_disks_app_func_mv(started_cluster): init_data(source) out = source.exec_in_container( - ["/usr/bin/clickhouse", "disks", "--send-logs", "--disk", "test1", "list", "."] + ["/usr/bin/clickhouse", "disks", "--save-logs", "--disk", "test1", "list", "."] ) files = out.split("\n") @@ -257,7 +257,7 @@ def test_disks_app_func_mv(started_cluster): ) out = source.exec_in_container( - ["/usr/bin/clickhouse", "disks", "--send-logs", "--disk", "test1", "list", "."] + ["/usr/bin/clickhouse", "disks", "--save-logs", "--disk", "test1", "list", "."] ) files = out.split("\n") @@ -277,7 +277,7 @@ def test_disks_app_func_read_write(started_cluster): [ "/usr/bin/clickhouse", "disks", - "--send-logs", + "--save-logs", "--disk", "test1", "write", @@ -291,7 +291,7 @@ def test_disks_app_func_read_write(started_cluster): [ "/usr/bin/clickhouse", "disks", - "--send-logs", + "--save-logs", "--disk", "test1", "read", diff --git a/tests/integration/test_failed_async_inserts/__init__.py b/tests/integration/test_failed_async_inserts/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_failed_async_inserts/configs/config.xml b/tests/integration/test_failed_async_inserts/configs/config.xml new file mode 100644 index 00000000000..038c0792b44 --- /dev/null +++ b/tests/integration/test_failed_async_inserts/configs/config.xml @@ -0,0 +1,3 @@ + + 1000 + diff --git a/tests/integration/test_failed_async_inserts/test.py b/tests/integration/test_failed_async_inserts/test.py new file mode 100644 index 00000000000..6d66ac97006 --- /dev/null +++ b/tests/integration/test_failed_async_inserts/test.py @@ -0,0 +1,54 @@ +import logging +from time import sleep + +import pytest +from helpers.cluster import ClickHouseCluster + + +from helpers.cluster import ClickHouseCluster + +cluster = ClickHouseCluster(__file__) +node = cluster.add_instance( + "node", main_configs=["configs/config.xml"], with_zookeeper=True +) + + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster.start() + yield cluster + finally: + cluster.shutdown() + + +def test_failed_async_inserts(started_cluster): + node = started_cluster.instances["node"] + + node.query( + "CREATE TABLE async_insert_30_10_2022 (id UInt32, s String) ENGINE = Memory" + ) + node.query( + "INSERT INTO async_insert_30_10_2022 SETTINGS async_insert = 1 VALUES ()", + ignore_error=True, + ) + node.query( + "INSERT INTO async_insert_30_10_2022 SETTINGS async_insert = 1 VALUES ([1,2,3], 1)", + ignore_error=True, + ) + node.query( + 'INSERT INTO async_insert_30_10_2022 SETTINGS async_insert = 1 FORMAT JSONEachRow {"id" : 1} {"x"}', + ignore_error=True, + ) + node.query( + "INSERT INTO async_insert_30_10_2022 SETTINGS async_insert = 1 VALUES (throwIf(4),'')", + ignore_error=True, + ) + + select_query = ( + "SELECT value FROM system.events WHERE event == 'FailedAsyncInsertQuery'" + ) + + assert node.query(select_query) == "4\n" + + node.query("DROP TABLE IF EXISTS async_insert_30_10_2022 NO DELAY") diff --git a/tests/integration/test_grpc_protocol/test.py b/tests/integration/test_grpc_protocol/test.py index 52c583973d0..a3f2650eac7 100644 --- a/tests/integration/test_grpc_protocol/test.py +++ b/tests/integration/test_grpc_protocol/test.py @@ -387,7 +387,7 @@ progress { , stats { rows: 8 blocks: 4 - allocated_bytes: 324 + allocated_bytes: 1092 applied_limit: true rows_before_limit: 8 } diff --git a/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/__init__.py b/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/configs/config.xml b/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/configs/config.xml new file mode 100644 index 00000000000..42a1f962705 --- /dev/null +++ b/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/configs/config.xml @@ -0,0 +1,4 @@ + + 1 + 250 + diff --git a/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/configs/host_regexp.xml b/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/configs/host_regexp.xml new file mode 100644 index 00000000000..7a2141e6c7e --- /dev/null +++ b/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/configs/host_regexp.xml @@ -0,0 +1,11 @@ + + + + + + test1\.example\.com$ + + default + + + \ No newline at end of file diff --git a/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/configs/listen_host.xml b/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/configs/listen_host.xml new file mode 100644 index 00000000000..58ef55cd3f3 --- /dev/null +++ b/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/configs/listen_host.xml @@ -0,0 +1,5 @@ + + :: + 0.0.0.0 + 1 + diff --git a/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/coredns_config/Corefile b/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/coredns_config/Corefile new file mode 100644 index 00000000000..0dd198441dc --- /dev/null +++ b/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/coredns_config/Corefile @@ -0,0 +1,8 @@ +. { + hosts /example.com { + reload "200ms" + fallthrough + } + forward . 127.0.0.11 + log +} diff --git a/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/coredns_config/example.com b/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/coredns_config/example.com new file mode 100644 index 00000000000..9beb415c290 --- /dev/null +++ b/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/coredns_config/example.com @@ -0,0 +1 @@ +filled in runtime, but needs to exist in order to be volume mapped in docker \ No newline at end of file diff --git a/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/scripts/stress_test.py b/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/scripts/stress_test.py new file mode 100644 index 00000000000..b8bafb3d0c1 --- /dev/null +++ b/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/scripts/stress_test.py @@ -0,0 +1,63 @@ +import pycurl +import threading +from io import BytesIO +import sys + +client_ip = sys.argv[1] +server_ip = sys.argv[2] + +mutex = threading.Lock() +success_counter = 0 +number_of_threads = 100 +number_of_iterations = 100 + + +def perform_request(): + + buffer = BytesIO() + crl = pycurl.Curl() + crl.setopt(pycurl.INTERFACE, client_ip) + crl.setopt(crl.WRITEDATA, buffer) + crl.setopt(crl.URL, f"http://{server_ip}:8123/?query=select+1&user=test_dns") + + crl.perform() + + # End curl session + crl.close() + + str_response = buffer.getvalue().decode("iso-8859-1") + expected_response = "1\n" + + mutex.acquire() + + global success_counter + + if str_response == expected_response: + success_counter += 1 + + mutex.release() + + +def perform_multiple_requests(n): + for request_number in range(n): + perform_request() + + +threads = [] + + +for i in range(number_of_threads): + thread = threading.Thread( + target=perform_multiple_requests, args=(number_of_iterations,) + ) + thread.start() + threads.append(thread) + +for thread in threads: + thread.join() + + +if success_counter == number_of_threads * number_of_iterations: + exit(0) + +exit(1) diff --git a/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/test.py b/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/test.py new file mode 100644 index 00000000000..62f47579612 --- /dev/null +++ b/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/test.py @@ -0,0 +1,71 @@ +import pytest +from helpers.cluster import ClickHouseCluster, get_docker_compose_path, run_and_check +from time import sleep +import os + +DOCKER_COMPOSE_PATH = get_docker_compose_path() +SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) + +cluster = ClickHouseCluster(__file__) + +ch_server = cluster.add_instance( + "clickhouse-server", + with_coredns=True, + main_configs=["configs/config.xml", "configs/listen_host.xml"], + user_configs=["configs/host_regexp.xml"], +) + +client = cluster.add_instance( + "clickhouse-client", +) + + +@pytest.fixture(scope="module") +def started_cluster(): + global cluster + try: + cluster.start() + yield cluster + + finally: + cluster.shutdown() + + +def setup_dns_server(ip): + domains_string = "test3.example.com test2.example.com test1.example.com" + example_file_path = f'{ch_server.env_variables["COREDNS_CONFIG_DIR"]}/example.com' + run_and_check(f"echo '{ip} {domains_string}' > {example_file_path}", shell=True) + + +def setup_ch_server(dns_server_ip): + ch_server.exec_in_container( + (["bash", "-c", f"echo 'nameserver {dns_server_ip}' > /etc/resolv.conf"]) + ) + ch_server.exec_in_container( + (["bash", "-c", "echo 'options ndots:0' >> /etc/resolv.conf"]) + ) + ch_server.query("SYSTEM DROP DNS CACHE") + + +def build_endpoint_v4(ip): + return f"'http://{ip}:8123/?query=SELECT+1&user=test_dns'" + + +def build_endpoint_v6(ip): + return build_endpoint_v4(f"[{ip}]") + + +def test_host_regexp_multiple_ptr_v4(started_cluster): + server_ip = cluster.get_instance_ip("clickhouse-server") + client_ip = cluster.get_instance_ip("clickhouse-client") + dns_server_ip = cluster.get_instance_ip(cluster.coredns_host) + + setup_dns_server(client_ip) + setup_ch_server(dns_server_ip) + + current_dir = os.path.dirname(__file__) + client.copy_file_to_container( + os.path.join(current_dir, "scripts", "stress_test.py"), "stress_test.py" + ) + + client.exec_in_container(["python3", f"stress_test.py", client_ip, server_ip]) diff --git a/tests/integration/test_keeper_four_word_command/test.py b/tests/integration/test_keeper_four_word_command/test.py index 30abc7422c4..4559904f8b7 100644 --- a/tests/integration/test_keeper_four_word_command/test.py +++ b/tests/integration/test_keeper_four_word_command/test.py @@ -596,3 +596,48 @@ def test_cmd_wchp(started_cluster): assert "/test_4lw_normal_node_1" in list_data finally: destroy_zk_client(zk) + + +def test_cmd_csnp(started_cluster): + zk = None + try: + wait_nodes() + zk = get_fake_zk(node1.name, timeout=30.0) + data = keeper_utils.send_4lw_cmd(cluster, node1, cmd="csnp") + try: + int(data) + assert True + except ValueError: + assert False + finally: + destroy_zk_client(zk) + + +def test_cmd_lgif(started_cluster): + zk = None + try: + wait_nodes() + clear_znodes() + + zk = get_fake_zk(node1.name, timeout=30.0) + do_some_action(zk, create_cnt=100) + + data = keeper_utils.send_4lw_cmd(cluster, node1, cmd="lgif") + print(data) + reader = csv.reader(data.split("\n"), delimiter="\t") + result = {} + + for row in reader: + if len(row) != 0: + result[row[0]] = row[1] + + assert int(result["first_log_idx"]) == 1 + assert int(result["first_log_term"]) == 1 + assert int(result["last_log_idx"]) >= 1 + assert int(result["last_log_term"]) == 1 + assert int(result["last_committed_log_idx"]) >= 1 + assert int(result["leader_committed_log_idx"]) >= 1 + assert int(result["target_committed_log_idx"]) >= 1 + assert int(result["last_snapshot_idx"]) >= 1 + finally: + destroy_zk_client(zk) diff --git a/tests/integration/test_keeper_s3_snapshot/__init__.py b/tests/integration/test_keeper_s3_snapshot/__init__.py new file mode 100644 index 00000000000..e5a0d9b4834 --- /dev/null +++ b/tests/integration/test_keeper_s3_snapshot/__init__.py @@ -0,0 +1 @@ +#!/usr/bin/env python3 diff --git a/tests/integration/test_keeper_s3_snapshot/configs/keeper_config1.xml b/tests/integration/test_keeper_s3_snapshot/configs/keeper_config1.xml new file mode 100644 index 00000000000..8459ea3e068 --- /dev/null +++ b/tests/integration/test_keeper_s3_snapshot/configs/keeper_config1.xml @@ -0,0 +1,42 @@ + + + + http://minio1:9001/snapshots/ + minio + minio123 + + 9181 + 1 + /var/lib/clickhouse/coordination/log + /var/lib/clickhouse/coordination/snapshots + * + + + 5000 + 10000 + 5000 + 50 + trace + + + + + 1 + node1 + 9234 + + + 2 + node2 + 9234 + true + + + 3 + node3 + 9234 + true + + + + diff --git a/tests/integration/test_keeper_s3_snapshot/configs/keeper_config2.xml b/tests/integration/test_keeper_s3_snapshot/configs/keeper_config2.xml new file mode 100644 index 00000000000..dfe73628f66 --- /dev/null +++ b/tests/integration/test_keeper_s3_snapshot/configs/keeper_config2.xml @@ -0,0 +1,42 @@ + + + + http://minio1:9001/snapshots/ + minio + minio123 + + 9181 + 2 + /var/lib/clickhouse/coordination/log + /var/lib/clickhouse/coordination/snapshots + * + + + 5000 + 10000 + 5000 + 75 + trace + + + + + 1 + node1 + 9234 + + + 2 + node2 + 9234 + true + + + 3 + node3 + 9234 + true + + + + diff --git a/tests/integration/test_keeper_s3_snapshot/configs/keeper_config3.xml b/tests/integration/test_keeper_s3_snapshot/configs/keeper_config3.xml new file mode 100644 index 00000000000..948d9527718 --- /dev/null +++ b/tests/integration/test_keeper_s3_snapshot/configs/keeper_config3.xml @@ -0,0 +1,42 @@ + + + + http://minio1:9001/snapshots/ + minio + minio123 + + 9181 + 3 + /var/lib/clickhouse/coordination/log + /var/lib/clickhouse/coordination/snapshots + * + + + 5000 + 10000 + 5000 + 75 + trace + + + + + 1 + node1 + 9234 + + + 2 + node2 + 9234 + true + + + 3 + node3 + 9234 + true + + + + diff --git a/tests/integration/test_keeper_s3_snapshot/test.py b/tests/integration/test_keeper_s3_snapshot/test.py new file mode 100644 index 00000000000..3e19bc4822c --- /dev/null +++ b/tests/integration/test_keeper_s3_snapshot/test.py @@ -0,0 +1,120 @@ +import pytest +from helpers.cluster import ClickHouseCluster +from time import sleep + +from kazoo.client import KazooClient + +# from kazoo.protocol.serialization import Connect, read_buffer, write_buffer + +cluster = ClickHouseCluster(__file__) +node1 = cluster.add_instance( + "node1", + main_configs=["configs/keeper_config1.xml"], + stay_alive=True, + with_minio=True, +) +node2 = cluster.add_instance( + "node2", + main_configs=["configs/keeper_config2.xml"], + stay_alive=True, + with_minio=True, +) +node3 = cluster.add_instance( + "node3", + main_configs=["configs/keeper_config3.xml"], + stay_alive=True, + with_minio=True, +) + + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster.start() + + cluster.minio_client.make_bucket("snapshots") + + yield cluster + + finally: + cluster.shutdown() + + +def get_fake_zk(nodename, timeout=30.0): + _fake_zk_instance = KazooClient( + hosts=cluster.get_instance_ip(nodename) + ":9181", timeout=timeout + ) + _fake_zk_instance.start() + return _fake_zk_instance + + +def destroy_zk_client(zk): + try: + if zk: + zk.stop() + zk.close() + except: + pass + + +def wait_node(node): + for _ in range(100): + zk = None + try: + zk = get_fake_zk(node.name, timeout=30.0) + zk.sync("/") + print("node", node.name, "ready") + break + except Exception as ex: + sleep(0.2) + print("Waiting until", node.name, "will be ready, exception", ex) + finally: + destroy_zk_client(zk) + else: + raise Exception("Can't wait node", node.name, "to become ready") + + +def test_s3_upload(started_cluster): + node1_zk = get_fake_zk(node1.name) + + # we defined in configs snapshot_distance as 50 + # so after 50 requests we should generate a snapshot + for _ in range(210): + node1_zk.create("/test", sequence=True) + + def get_saved_snapshots(): + return [ + obj.object_name + for obj in list(cluster.minio_client.list_objects("snapshots")) + ] + + saved_snapshots = get_saved_snapshots() + assert set(saved_snapshots) == set( + [ + "snapshot_50.bin.zstd", + "snapshot_100.bin.zstd", + "snapshot_150.bin.zstd", + "snapshot_200.bin.zstd", + ] + ) + + destroy_zk_client(node1_zk) + node1.stop_clickhouse(kill=True) + + # wait for new leader to be picked and that it continues + # uploading snapshots + wait_node(node2) + node2_zk = get_fake_zk(node2.name) + for _ in range(200): + node2_zk.create("/test", sequence=True) + + saved_snapshots = get_saved_snapshots() + + assert len(saved_snapshots) > 4 + + success_upload_message = "Successfully uploaded" + assert node2.contains_in_log(success_upload_message) or node3.contains_in_log( + success_upload_message + ) + + destroy_zk_client(node2_zk) diff --git a/tests/integration/test_mask_queries_in_logs/test.py b/tests/integration/test_mask_queries_in_logs/test.py deleted file mode 100644 index 4a4d3ee4ed0..00000000000 --- a/tests/integration/test_mask_queries_in_logs/test.py +++ /dev/null @@ -1,75 +0,0 @@ -import pytest -from helpers.cluster import ClickHouseCluster - -cluster = ClickHouseCluster(__file__) -node = cluster.add_instance("node") - - -@pytest.fixture(scope="module", autouse=True) -def started_cluster(): - try: - cluster.start() - yield cluster - - finally: - cluster.shutdown() - - -def check_logs(must_contain, must_not_contain): - node.query("SYSTEM FLUSH LOGS") - - for str in must_contain: - assert node.contains_in_log(str) - assert ( - int( - node.query( - f"SELECT COUNT() FROM system.query_log WHERE query LIKE '%{str}%'" - ).strip() - ) - >= 1 - ) - - for str in must_not_contain: - assert not node.contains_in_log(str) - assert ( - int( - node.query( - f"SELECT COUNT() FROM system.query_log WHERE query LIKE '%{str}%'" - ).strip() - ) - == 0 - ) - - -# Passwords in CREATE/ALTER queries must be hidden in logs. -def test_create_alter_user(): - node.query("CREATE USER u1 IDENTIFIED BY 'qwe123' SETTINGS custom_a = 'a'") - node.query("ALTER USER u1 IDENTIFIED BY '123qwe' SETTINGS custom_b = 'b'") - node.query( - "CREATE USER u2 IDENTIFIED WITH plaintext_password BY 'plainpasswd' SETTINGS custom_c = 'c'" - ) - - assert ( - node.query("SHOW CREATE USER u1") - == "CREATE USER u1 IDENTIFIED WITH sha256_password SETTINGS custom_b = \\'b\\'\n" - ) - assert ( - node.query("SHOW CREATE USER u2") - == "CREATE USER u2 IDENTIFIED WITH plaintext_password SETTINGS custom_c = \\'c\\'\n" - ) - - check_logs( - must_contain=[ - "CREATE USER u1 IDENTIFIED WITH sha256_password", - "ALTER USER u1 IDENTIFIED WITH sha256_password", - "CREATE USER u2 IDENTIFIED WITH plaintext_password", - ], - must_not_contain=[ - "qwe123", - "123qwe", - "plainpasswd", - "IDENTIFIED WITH sha256_password BY", - "IDENTIFIED WITH sha256_hash BY", - "IDENTIFIED WITH plaintext_password BY", - ], - ) diff --git a/tests/integration/test_mask_sensitive_info_in_logs/__init__.py b/tests/integration/test_mask_sensitive_info_in_logs/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_mask_sensitive_info_in_logs/test.py b/tests/integration/test_mask_sensitive_info_in_logs/test.py new file mode 100644 index 00000000000..48f11fbf7a1 --- /dev/null +++ b/tests/integration/test_mask_sensitive_info_in_logs/test.py @@ -0,0 +1,340 @@ +import pytest +import random, string +from helpers.cluster import ClickHouseCluster + +cluster = ClickHouseCluster(__file__) +node = cluster.add_instance("node", with_zookeeper=True) + + +@pytest.fixture(scope="module", autouse=True) +def started_cluster(): + try: + cluster.start() + yield cluster + + finally: + cluster.shutdown() + + +def check_logs(must_contain=[], must_not_contain=[]): + node.query("SYSTEM FLUSH LOGS") + + for str in must_contain: + escaped_str = str.replace("`", "\\`").replace("[", "\\[").replace("]", "\\]") + assert node.contains_in_log(escaped_str) + + for str in must_not_contain: + escaped_str = str.replace("`", "\\`").replace("[", "\\[").replace("]", "\\]") + assert not node.contains_in_log(escaped_str) + + for str in must_contain: + escaped_str = str.replace("'", "\\'") + assert system_query_log_contains_search_pattern(escaped_str) + + for str in must_not_contain: + escaped_str = str.replace("'", "\\'") + assert not system_query_log_contains_search_pattern(escaped_str) + + +# Returns true if "system.query_log" has a query matching a specified pattern. +def system_query_log_contains_search_pattern(search_pattern): + return ( + int( + node.query( + f"SELECT COUNT() FROM system.query_log WHERE query LIKE '%{search_pattern}%'" + ).strip() + ) + >= 1 + ) + + +# Generates a random string. +def new_password(len=16): + return "".join( + random.choice(string.ascii_uppercase + string.digits) for _ in range(len) + ) + + +# Passwords in CREATE/ALTER queries must be hidden in logs. +def test_create_alter_user(): + password = new_password() + + node.query(f"CREATE USER u1 IDENTIFIED BY '{password}' SETTINGS custom_a = 'a'") + node.query( + f"ALTER USER u1 IDENTIFIED BY '{password}{password}' SETTINGS custom_b = 'b'" + ) + node.query( + f"CREATE USER u2 IDENTIFIED WITH plaintext_password BY '{password}' SETTINGS custom_c = 'c'" + ) + + assert ( + node.query("SHOW CREATE USER u1") + == "CREATE USER u1 IDENTIFIED WITH sha256_password SETTINGS custom_b = \\'b\\'\n" + ) + assert ( + node.query("SHOW CREATE USER u2") + == "CREATE USER u2 IDENTIFIED WITH plaintext_password SETTINGS custom_c = \\'c\\'\n" + ) + + check_logs( + must_contain=[ + "CREATE USER u1 IDENTIFIED WITH sha256_password", + "ALTER USER u1 IDENTIFIED WITH sha256_password", + "CREATE USER u2 IDENTIFIED WITH plaintext_password", + ], + must_not_contain=[ + password, + "IDENTIFIED WITH sha256_password BY", + "IDENTIFIED WITH sha256_hash BY", + "IDENTIFIED WITH plaintext_password BY", + ], + ) + + node.query("DROP USER u1, u2") + + +def test_create_table(): + password = new_password() + + table_engines = [ + f"MySQL('mysql57:3306', 'mysql_db', 'mysql_table', 'mysql_user', '{password}')", + f"PostgreSQL('postgres1:5432', 'postgres_db', 'postgres_table', 'postgres_user', '{password}')", + f"MongoDB('mongo1:27017', 'mongo_db', 'mongo_col', 'mongo_user', '{password}')", + f"S3('http://minio1:9001/root/data/test1.csv')", + f"S3('http://minio1:9001/root/data/test2.csv', 'CSV')", + f"S3('http://minio1:9001/root/data/test3.csv.gz', 'CSV', 'gzip')", + f"S3('http://minio1:9001/root/data/test4.csv', 'minio', '{password}', 'CSV')", + f"S3('http://minio1:9001/root/data/test5.csv.gz', 'minio', '{password}', 'CSV', 'gzip')", + ] + + for i, table_engine in enumerate(table_engines): + node.query(f"CREATE TABLE table{i} (x int) ENGINE = {table_engine}") + + check_logs( + must_contain=[ + "CREATE TABLE table0 (`x` int) ENGINE = MySQL('mysql57:3306', 'mysql_db', 'mysql_table', 'mysql_user', '[HIDDEN]')", + "CREATE TABLE table1 (`x` int) ENGINE = PostgreSQL('postgres1:5432', 'postgres_db', 'postgres_table', 'postgres_user', '[HIDDEN]')", + "CREATE TABLE table2 (`x` int) ENGINE = MongoDB('mongo1:27017', 'mongo_db', 'mongo_col', 'mongo_user', '[HIDDEN]')", + "CREATE TABLE table3 (x int) ENGINE = S3('http://minio1:9001/root/data/test1.csv')", + "CREATE TABLE table4 (x int) ENGINE = S3('http://minio1:9001/root/data/test2.csv', 'CSV')", + "CREATE TABLE table5 (x int) ENGINE = S3('http://minio1:9001/root/data/test3.csv.gz', 'CSV', 'gzip')", + "CREATE TABLE table6 (`x` int) ENGINE = S3('http://minio1:9001/root/data/test4.csv', 'minio', '[HIDDEN]', 'CSV')", + "CREATE TABLE table7 (`x` int) ENGINE = S3('http://minio1:9001/root/data/test5.csv.gz', 'minio', '[HIDDEN]', 'CSV', 'gzip')", + ], + must_not_contain=[password], + ) + + for i in range(0, len(table_engines)): + node.query(f"DROP TABLE table{i}") + + +def test_create_database(): + password = new_password() + + database_engines = [ + f"MySQL('localhost:3306', 'mysql_db', 'mysql_user', '{password}') SETTINGS connect_timeout=1, connection_max_tries=1", + # f"PostgreSQL('localhost:5432', 'postgres_db', 'postgres_user', '{password}')", + ] + + for i, database_engine in enumerate(database_engines): + # query_and_get_answer_with_error() is used here because we don't want to stop on error "Cannot connect to MySQL server". + # We test logging here and not actual work with MySQL server. + node.query_and_get_answer_with_error( + f"CREATE DATABASE database{i} ENGINE = {database_engine}" + ) + + check_logs( + must_contain=[ + "CREATE DATABASE database0 ENGINE = MySQL('localhost:3306', 'mysql_db', 'mysql_user', '[HIDDEN]')", + # "CREATE DATABASE database1 ENGINE = PostgreSQL('localhost:5432', 'postgres_db', 'postgres_user', '[HIDDEN]')", + ], + must_not_contain=[password], + ) + + for i in range(0, len(database_engines)): + node.query(f"DROP DATABASE IF EXISTS database{i}") + + +def test_table_functions(): + password = new_password() + + table_functions = [ + f"mysql('mysql57:3306', 'mysql_db', 'mysql_table', 'mysql_user', '{password}')", + f"postgresql('postgres1:5432', 'postgres_db', 'postgres_table', 'postgres_user', '{password}')", + f"mongodb('mongo1:27017', 'mongo_db', 'mongo_col', 'mongo_user', '{password}', 'x int')", + f"s3('http://minio1:9001/root/data/test1.csv')", + f"s3('http://minio1:9001/root/data/test2.csv', 'CSV')", + f"s3('http://minio1:9001/root/data/test3.csv', 'minio', '{password}')", + f"s3('http://minio1:9001/root/data/test4.csv', 'CSV', 'x int')", + f"s3('http://minio1:9001/root/data/test5.csv.gz', 'CSV', 'x int', 'gzip')", + f"s3('http://minio1:9001/root/data/test6.csv', 'minio', '{password}', 'CSV')", + f"s3('http://minio1:9001/root/data/test7.csv', 'minio', '{password}', 'CSV', 'x int')", + f"s3('http://minio1:9001/root/data/test8.csv.gz', 'minio', '{password}', 'CSV', 'x int', 'gzip')", + f"s3Cluster('test_shard_localhost', 'http://minio1:9001/root/data/test1.csv', 'minio', '{password}')", + f"s3Cluster('test_shard_localhost', 'http://minio1:9001/root/data/test2.csv', 'CSV', 'x int')", + f"s3Cluster('test_shard_localhost', 'http://minio1:9001/root/data/test3.csv', 'minio', '{password}', 'CSV')", + f"remote('127.{{2..11}}', default.remote_table)", + f"remote('127.{{2..11}}', default.remote_table, rand())", + f"remote('127.{{2..11}}', default.remote_table, 'remote_user')", + f"remote('127.{{2..11}}', default.remote_table, 'remote_user', '{password}')", + f"remote('127.{{2..11}}', default.remote_table, 'remote_user', rand())", + f"remote('127.{{2..11}}', default.remote_table, 'remote_user', '{password}', rand())", + f"remote('127.{{2..11}}', 'default.remote_table', 'remote_user', '{password}', rand())", + f"remote('127.{{2..11}}', 'default', 'remote_table', 'remote_user', '{password}', rand())", + f"remote('127.{{2..11}}', numbers(10), 'remote_user', '{password}', rand())", + f"remoteSecure('127.{{2..11}}', 'default', 'remote_table', 'remote_user', '{password}')", + f"remoteSecure('127.{{2..11}}', 'default', 'remote_table', 'remote_user', rand())", + ] + + for i, table_function in enumerate(table_functions): + node.query(f"CREATE TABLE tablefunc{i} (x int) AS {table_function}") + + check_logs( + must_contain=[ + "CREATE TABLE tablefunc0 (`x` int) AS mysql('mysql57:3306', 'mysql_db', 'mysql_table', 'mysql_user', '[HIDDEN]')", + "CREATE TABLE tablefunc1 (`x` int) AS postgresql('postgres1:5432', 'postgres_db', 'postgres_table', 'postgres_user', '[HIDDEN]')", + "CREATE TABLE tablefunc2 (`x` int) AS mongodb('mongo1:27017', 'mongo_db', 'mongo_col', 'mongo_user', '[HIDDEN]', 'x int')", + "CREATE TABLE tablefunc3 (x int) AS s3('http://minio1:9001/root/data/test1.csv')", + "CREATE TABLE tablefunc4 (x int) AS s3('http://minio1:9001/root/data/test2.csv', 'CSV')", + "CREATE TABLE tablefunc5 (`x` int) AS s3('http://minio1:9001/root/data/test3.csv', 'minio', '[HIDDEN]')", + "CREATE TABLE tablefunc6 (x int) AS s3('http://minio1:9001/root/data/test4.csv', 'CSV', 'x int')", + "CREATE TABLE tablefunc7 (x int) AS s3('http://minio1:9001/root/data/test5.csv.gz', 'CSV', 'x int', 'gzip')", + "CREATE TABLE tablefunc8 (`x` int) AS s3('http://minio1:9001/root/data/test6.csv', 'minio', '[HIDDEN]', 'CSV')", + "CREATE TABLE tablefunc9 (`x` int) AS s3('http://minio1:9001/root/data/test7.csv', 'minio', '[HIDDEN]', 'CSV', 'x int')", + "CREATE TABLE tablefunc10 (`x` int) AS s3('http://minio1:9001/root/data/test8.csv.gz', 'minio', '[HIDDEN]', 'CSV', 'x int', 'gzip')", + "CREATE TABLE tablefunc11 (`x` int) AS s3Cluster('test_shard_localhost', 'http://minio1:9001/root/data/test1.csv', 'minio', '[HIDDEN]')", + "CREATE TABLE tablefunc12 (x int) AS s3Cluster('test_shard_localhost', 'http://minio1:9001/root/data/test2.csv', 'CSV', 'x int')", + "CREATE TABLE tablefunc13 (`x` int) AS s3Cluster('test_shard_localhost', 'http://minio1:9001/root/data/test3.csv', 'minio', '[HIDDEN]', 'CSV')", + "CREATE TABLE tablefunc14 (x int) AS remote('127.{2..11}', default.remote_table)", + "CREATE TABLE tablefunc15 (x int) AS remote('127.{2..11}', default.remote_table, rand())", + "CREATE TABLE tablefunc16 (x int) AS remote('127.{2..11}', default.remote_table, 'remote_user')", + "CREATE TABLE tablefunc17 (`x` int) AS remote('127.{2..11}', default.remote_table, 'remote_user', '[HIDDEN]')", + "CREATE TABLE tablefunc18 (x int) AS remote('127.{2..11}', default.remote_table, 'remote_user', rand())", + "CREATE TABLE tablefunc19 (`x` int) AS remote('127.{2..11}', default.remote_table, 'remote_user', '[HIDDEN]', rand())", + "CREATE TABLE tablefunc20 (`x` int) AS remote('127.{2..11}', 'default.remote_table', 'remote_user', '[HIDDEN]', rand())", + "CREATE TABLE tablefunc21 (`x` int) AS remote('127.{2..11}', 'default', 'remote_table', 'remote_user', '[HIDDEN]', rand())", + "CREATE TABLE tablefunc22 (`x` int) AS remote('127.{2..11}', numbers(10), 'remote_user', '[HIDDEN]', rand())", + "CREATE TABLE tablefunc23 (`x` int) AS remoteSecure('127.{2..11}', 'default', 'remote_table', 'remote_user', '[HIDDEN]')", + "CREATE TABLE tablefunc24 (x int) AS remoteSecure('127.{2..11}', 'default', 'remote_table', 'remote_user', rand())", + ], + must_not_contain=[password], + ) + + for i in range(0, len(table_functions)): + node.query(f"DROP TABLE tablefunc{i}") + + +def test_encryption_functions(): + plaintext = new_password() + cipher = new_password() + key = new_password(32) + iv8 = new_password(8) + iv16 = new_password(16) + add = new_password() + + encryption_functions = [ + f"encrypt('aes-256-ofb', '{plaintext}', '{key}')", + f"encrypt('aes-256-ofb', '{plaintext}', '{key}', '{iv16}')", + f"encrypt('aes-256-gcm', '{plaintext}', '{key}', '{iv8}')", + f"encrypt('aes-256-gcm', '{plaintext}', '{key}', '{iv8}', '{add}')", + f"decrypt('aes-256-ofb', '{cipher}', '{key}', '{iv16}')", + f"aes_encrypt_mysql('aes-256-ofb', '{plaintext}', '{key}', '{iv16}')", + f"aes_decrypt_mysql('aes-256-ofb', '{cipher}', '{key}', '{iv16}')", + f"tryDecrypt('aes-256-ofb', '{cipher}', '{key}', '{iv16}')", + ] + + for encryption_function in encryption_functions: + node.query(f"SELECT {encryption_function}") + + check_logs( + must_contain=[ + "SELECT encrypt('aes-256-ofb', '[HIDDEN]')", + "SELECT encrypt('aes-256-gcm', '[HIDDEN]')", + "SELECT decrypt('aes-256-ofb', '[HIDDEN]')", + "SELECT aes_encrypt_mysql('aes-256-ofb', '[HIDDEN]')", + "SELECT aes_decrypt_mysql('aes-256-ofb', '[HIDDEN]')", + "SELECT tryDecrypt('aes-256-ofb', '[HIDDEN]')", + ], + must_not_contain=[plaintext, cipher, key, iv8, iv16, add], + ) + + +def test_create_dictionary(): + password = new_password() + + node.query( + f"CREATE DICTIONARY dict1 (n int DEFAULT 0, m int DEFAULT 1) PRIMARY KEY n " + f"SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'user1' TABLE 'test' PASSWORD '{password}' DB 'default')) " + f"LIFETIME(MIN 0 MAX 10) LAYOUT(FLAT())" + ) + + check_logs( + must_contain=[ + "CREATE DICTIONARY dict1 (`n` int DEFAULT 0, `m` int DEFAULT 1) PRIMARY KEY n " + "SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'user1' TABLE 'test' PASSWORD '[HIDDEN]' DB 'default')) " + "LIFETIME(MIN 0 MAX 10) LAYOUT(FLAT())" + ], + must_not_contain=[password], + ) + + node.query("DROP DICTIONARY dict1") + + +def test_backup_to_s3(): + node.query("CREATE TABLE temptbl (x int) ENGINE=Log") + password = new_password() + + queries = [ + f"BACKUP TABLE temptbl TO S3('http://minio1:9001/root/data/backups/backup1', 'minio', '{password}')", + f"RESTORE TABLE temptbl AS temptbl2 FROM S3('http://minio1:9001/root/data/backups/backup1', 'minio', '{password}')", + ] + + for query in queries: + # query_and_get_answer_with_error() is used here because we don't want to stop on error "Cannot connect to AWS". + # We test logging here and not actual work with AWS server. + node.query_and_get_answer_with_error(query) + + check_logs( + must_contain=[ + "BACKUP TABLE temptbl TO S3('http://minio1:9001/root/data/backups/backup1', 'minio', '[HIDDEN]')", + "RESTORE TABLE temptbl AS temptbl2 FROM S3('http://minio1:9001/root/data/backups/backup1', 'minio', '[HIDDEN]')", + ], + must_not_contain=[password], + ) + + node.query("DROP TABLE IF EXISTS temptbl") + node.query("DROP TABLE IF EXISTS temptbl2") + + +def test_on_cluster(): + password = new_password() + + node.query( + f"CREATE TABLE table_oncl ON CLUSTER 'test_shard_localhost' (x int) ENGINE = MySQL('mysql57:3307', 'mysql_db', 'mysql_table', 'mysql_user', '{password}')" + ) + + check_logs( + must_contain=[ + "CREATE TABLE table_oncl ON CLUSTER test_shard_localhost (`x` int) ENGINE = MySQL('mysql57:3307', 'mysql_db', 'mysql_table', 'mysql_user', '[HIDDEN]')", + ], + must_not_contain=[password], + ) + + # Check logs of DDLWorker during executing of this query. + assert node.contains_in_log( + "DDLWorker: Processing task .*CREATE TABLE default\\.table_oncl UUID '[0-9a-fA-F-]*' (\\`x\\` Int32) ENGINE = MySQL('mysql57:3307', 'mysql_db', 'mysql_table', 'mysql_user', '\\[HIDDEN\\]')" + ) + assert node.contains_in_log( + "DDLWorker: Executing query: .*CREATE TABLE default\\.table_oncl UUID '[0-9a-fA-F-]*' (\\`x\\` Int32) ENGINE = MySQL('mysql57:3307', 'mysql_db', 'mysql_table', 'mysql_user', '\\[HIDDEN\\]')" + ) + assert node.contains_in_log( + "executeQuery: .*CREATE TABLE default\\.table_oncl UUID '[0-9a-fA-F-]*' (\\`x\\` Int32) ENGINE = MySQL('mysql57:3307', 'mysql_db', 'mysql_table', 'mysql_user', '\\[HIDDEN\\]')" + ) + assert node.contains_in_log( + "DDLWorker: Executed query: .*CREATE TABLE default\\.table_oncl UUID '[0-9a-fA-F-]*' (\\`x\\` Int32) ENGINE = MySQL('mysql57:3307', 'mysql_db', 'mysql_table', 'mysql_user', '\\[HIDDEN\\]')" + ) + assert system_query_log_contains_search_pattern( + "%CREATE TABLE default.table_oncl UUID \\'%\\' (`x` Int32) ENGINE = MySQL(\\'mysql57:3307\\', \\'mysql_db\\', \\'mysql_table\\', \\'mysql_user\\', \\'[HIDDEN]\\')" + ) + + node.query(f"DROP TABLE table_oncl") diff --git a/tests/integration/test_partition/configs/testkeeper.xml b/tests/integration/test_partition/configs/testkeeper.xml new file mode 100644 index 00000000000..5200b789a9b --- /dev/null +++ b/tests/integration/test_partition/configs/testkeeper.xml @@ -0,0 +1,6 @@ + + + + testkeeper + + \ No newline at end of file diff --git a/tests/integration/test_partition/test.py b/tests/integration/test_partition/test.py index f3df66631a5..320209b5d7e 100644 --- a/tests/integration/test_partition/test.py +++ b/tests/integration/test_partition/test.py @@ -2,9 +2,15 @@ import pytest import logging from helpers.cluster import ClickHouseCluster from helpers.test_tools import TSV +from helpers.test_tools import assert_eq_with_retry cluster = ClickHouseCluster(__file__) -instance = cluster.add_instance("instance") +instance = cluster.add_instance( + "instance", + main_configs=[ + "configs/testkeeper.xml", + ], +) q = instance.query path_to_data = "/var/lib/clickhouse/" @@ -478,3 +484,86 @@ def test_detached_part_dir_exists(started_cluster): == "all_1_1_0\nall_1_1_0_try1\nall_2_2_0\nall_2_2_0_try1\n" ) q("drop table detached_part_dir_exists") + + +def test_make_clone_in_detached(started_cluster): + q( + "create table clone_in_detached (n int, m String) engine=ReplicatedMergeTree('/clone_in_detached', '1') order by n" + ) + + path = path_to_data + "data/default/clone_in_detached/" + + # broken part already detached + q("insert into clone_in_detached values (42, '¯\_(ツ)_/¯')") + instance.exec_in_container(["rm", path + "all_0_0_0/data.bin"]) + instance.exec_in_container( + ["cp", "-r", path + "all_0_0_0", path + "detached/broken_all_0_0_0"] + ) + assert_eq_with_retry(instance, "select * from clone_in_detached", "\n") + assert ["broken_all_0_0_0",] == sorted( + instance.exec_in_container(["ls", path + "detached/"]).strip().split("\n") + ) + + # there's a directory with the same name, but different content + q("insert into clone_in_detached values (43, '¯\_(ツ)_/¯')") + instance.exec_in_container(["rm", path + "all_1_1_0/data.bin"]) + instance.exec_in_container( + ["cp", "-r", path + "all_1_1_0", path + "detached/broken_all_1_1_0"] + ) + instance.exec_in_container(["rm", path + "detached/broken_all_1_1_0/primary.idx"]) + instance.exec_in_container( + ["cp", "-r", path + "all_1_1_0", path + "detached/broken_all_1_1_0_try0"] + ) + instance.exec_in_container( + [ + "bash", + "-c", + "echo 'broken' > {}".format( + path + "detached/broken_all_1_1_0_try0/checksums.txt" + ), + ] + ) + assert_eq_with_retry(instance, "select * from clone_in_detached", "\n") + assert [ + "broken_all_0_0_0", + "broken_all_1_1_0", + "broken_all_1_1_0_try0", + "broken_all_1_1_0_try1", + ] == sorted( + instance.exec_in_container(["ls", path + "detached/"]).strip().split("\n") + ) + + # there are directories with the same name, but different content, and part already detached + q("insert into clone_in_detached values (44, '¯\_(ツ)_/¯')") + instance.exec_in_container(["rm", path + "all_2_2_0/data.bin"]) + instance.exec_in_container( + ["cp", "-r", path + "all_2_2_0", path + "detached/broken_all_2_2_0"] + ) + instance.exec_in_container(["rm", path + "detached/broken_all_2_2_0/primary.idx"]) + instance.exec_in_container( + ["cp", "-r", path + "all_2_2_0", path + "detached/broken_all_2_2_0_try0"] + ) + instance.exec_in_container( + [ + "bash", + "-c", + "echo 'broken' > {}".format( + path + "detached/broken_all_2_2_0_try0/checksums.txt" + ), + ] + ) + instance.exec_in_container( + ["cp", "-r", path + "all_2_2_0", path + "detached/broken_all_2_2_0_try1"] + ) + assert_eq_with_retry(instance, "select * from clone_in_detached", "\n") + assert [ + "broken_all_0_0_0", + "broken_all_1_1_0", + "broken_all_1_1_0_try0", + "broken_all_1_1_0_try1", + "broken_all_2_2_0", + "broken_all_2_2_0_try0", + "broken_all_2_2_0_try1", + ] == sorted( + instance.exec_in_container(["ls", path + "detached/"]).strip().split("\n") + ) diff --git a/tests/integration/test_read_only_table/__init__.py b/tests/integration/test_read_only_table/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_read_only_table/test.py b/tests/integration/test_read_only_table/test.py new file mode 100644 index 00000000000..28abbf6601e --- /dev/null +++ b/tests/integration/test_read_only_table/test.py @@ -0,0 +1,89 @@ +import time +import re +import logging + +import pytest +from helpers.cluster import ClickHouseCluster +from helpers.test_tools import assert_eq_with_retry + +NUM_TABLES = 10 + + +def fill_nodes(nodes): + for table_id in range(NUM_TABLES): + for node in nodes: + node.query( + f""" + CREATE TABLE test_table_{table_id}(a UInt64) + ENGINE = ReplicatedMergeTree('/clickhouse/tables/test/replicated/{table_id}', '{node.name}') ORDER BY tuple(); + """ + ) + + +cluster = ClickHouseCluster(__file__) +node1 = cluster.add_instance("node1", with_zookeeper=True) +node2 = cluster.add_instance("node2", with_zookeeper=True) +node3 = cluster.add_instance("node3", with_zookeeper=True) +nodes = [node1, node2, node3] + + +def sync_replicas(table): + for node in nodes: + node.query(f"SYSTEM SYNC REPLICA {table}") + + +@pytest.fixture(scope="module") +def start_cluster(): + try: + cluster.start() + + fill_nodes(nodes) + + yield cluster + + except Exception as ex: + print(ex) + + finally: + cluster.shutdown() + + +def test_restart_zookeeper(start_cluster): + + for table_id in range(NUM_TABLES): + node1.query( + f"INSERT INTO test_table_{table_id} VALUES (1), (2), (3), (4), (5);" + ) + + logging.info("Inserted test data and initialized all tables") + + def get_zookeeper_which_node_connected_to(node): + line = str( + node.exec_in_container( + [ + "bash", + "-c", + "lsof -a -i4 -i6 -itcp -w | grep 2181 | grep ESTABLISHED", + ], + privileged=True, + user="root", + ) + ).strip() + + pattern = re.compile(r"zoo[0-9]+", re.IGNORECASE) + result = pattern.findall(line) + assert ( + len(result) == 1 + ), "ClickHouse must be connected only to one Zookeeper at a time" + return result[0] + + node1_zk = get_zookeeper_which_node_connected_to(node1) + + # ClickHouse should +- immediately reconnect to another zookeeper node + cluster.stop_zookeeper_nodes([node1_zk]) + time.sleep(5) + + for table_id in range(NUM_TABLES): + node1.query( + f"INSERT INTO test_table_{table_id} VALUES (6), (7), (8), (9), (10);" + ) diff --git a/tests/integration/test_replicated_merge_tree_hdfs_zero_copy/test.py b/tests/integration/test_replicated_merge_tree_hdfs_zero_copy/test.py index 7d65bed3901..1f81421f93c 100644 --- a/tests/integration/test_replicated_merge_tree_hdfs_zero_copy/test.py +++ b/tests/integration/test_replicated_merge_tree_hdfs_zero_copy/test.py @@ -1,8 +1,14 @@ +import pytest + +# FIXME This test is too flaky +# https://github.com/ClickHouse/ClickHouse/issues/42561 + +pytestmark = pytest.mark.skip + import logging from string import Template import time -import pytest from helpers.cluster import ClickHouseCluster from helpers.test_tools import assert_eq_with_retry diff --git a/tests/integration/test_replicated_merge_tree_with_auxiliary_zookeepers/test.py b/tests/integration/test_replicated_merge_tree_with_auxiliary_zookeepers/test.py index c46e6840153..cf76d47157a 100644 --- a/tests/integration/test_replicated_merge_tree_with_auxiliary_zookeepers/test.py +++ b/tests/integration/test_replicated_merge_tree_with_auxiliary_zookeepers/test.py @@ -11,11 +11,13 @@ node1 = cluster.add_instance( "node1", main_configs=["configs/zookeeper_config.xml", "configs/remote_servers.xml"], with_zookeeper=True, + use_keeper=False, ) node2 = cluster.add_instance( "node2", main_configs=["configs/zookeeper_config.xml", "configs/remote_servers.xml"], with_zookeeper=True, + use_keeper=False, ) diff --git a/tests/integration/test_row_policy/test.py b/tests/integration/test_row_policy/test.py index 2e696be4988..1933823f5d2 100644 --- a/tests/integration/test_row_policy/test.py +++ b/tests/integration/test_row_policy/test.py @@ -867,3 +867,30 @@ def test_policy_on_distributed_table_via_role(): assert node.query( "SELECT * FROM dist_tbl SETTINGS prefer_localhost_replica=0", user="user1" ) == TSV([[0], [2], [4], [6], [8], [0], [2], [4], [6], [8]]) + + +def test_row_policy_filter_with_subquery(): + copy_policy_xml("no_filters.xml") + assert node.query("SHOW POLICIES") == "" + + node.query("DROP ROW POLICY IF EXISTS filter_1 ON table1") + node.query("DROP TABLE IF EXISTS table_1") + node.query("DROP TABLE IF EXISTS table_2") + + node.query( + "CREATE TABLE table_1 (x int, y int) ENGINE = MergeTree ORDER BY tuple()" + ) + node.query("INSERT INTO table_1 SELECT number, number * number FROM numbers(10)") + + node.query("CREATE TABLE table_2 (a int) ENGINE=MergeTree ORDER BY tuple()") + node.query("INSERT INTO table_2 VALUES (3), (5)") + + node.query( + "CREATE ROW POLICY filter_1 ON table_1 USING x IN (SELECT a FROM table_2) TO ALL" + ) + + assert node.query("SELECT * FROM table_1") == TSV([[3, 9], [5, 25]]) + + node.query("DROP ROW POLICY filter_1 ON table_1") + node.query("DROP TABLE table_1") + node.query("DROP TABLE table_2") diff --git a/tests/integration/test_s3_zero_copy_ttl/test_ttl_move_memory_usage.py b/tests/integration/test_s3_zero_copy_ttl/test_ttl_move_memory_usage.py new file mode 100644 index 00000000000..a1e10cde031 --- /dev/null +++ b/tests/integration/test_s3_zero_copy_ttl/test_ttl_move_memory_usage.py @@ -0,0 +1,60 @@ +#!/usr/bin/env python3 +import time + +import pytest +from helpers.cluster import ClickHouseCluster + + +single_node_cluster = ClickHouseCluster(__file__) +small_node = single_node_cluster.add_instance( + "small_node", main_configs=["configs/s3.xml"], with_minio=True +) + + +@pytest.fixture(scope="module") +def started_single_node_cluster(): + try: + single_node_cluster.start() + + yield single_node_cluster + finally: + single_node_cluster.shutdown() + + +def test_move_and_s3_memory_usage(started_single_node_cluster): + if small_node.is_built_with_sanitizer() or small_node.is_debug_build(): + pytest.skip("Disabled for debug and sanitizers. Too slow.") + + small_node.query( + "CREATE TABLE s3_test_with_ttl (x UInt32, a String codec(NONE), b String codec(NONE), c String codec(NONE), d String codec(NONE), e String codec(NONE)) engine = MergeTree order by x partition by x SETTINGS storage_policy='s3_and_default'" + ) + + for _ in range(10): + small_node.query( + "insert into s3_test_with_ttl select 0, repeat('a', 100), repeat('b', 100), repeat('c', 100), repeat('d', 100), repeat('e', 100) from zeros(400000) settings max_block_size = 8192, max_insert_block_size=10000000, min_insert_block_size_rows=10000000" + ) + + # After this, we should have 5 columns per 10 * 100 * 400000 ~ 400 MB; total ~2G data in partition + small_node.query("optimize table s3_test_with_ttl final") + + small_node.query("system flush logs") + # Will take memory usage from metric_log. + # It is easier then specifying total memory limit (insert queries can hit this limit). + small_node.query("truncate table system.metric_log") + + small_node.query( + "alter table s3_test_with_ttl move partition 0 to volume 'external'", + settings={"send_logs_level": "error"}, + ) + small_node.query("system flush logs") + max_usage = small_node.query( + "select max(CurrentMetric_MemoryTracking) from system.metric_log" + ) + # 3G limit is a big one. However, we can hit it anyway with parallel s3 writes enabled. + # Also actual value can be bigger because of memory drift. + # Increase it a little bit if test fails. + assert int(max_usage) < 3e9 + res = small_node.query( + "select * from system.errors where last_error_message like '%Memory limit%' limit 1" + ) + assert res == "" diff --git a/tests/integration/test_storage_nats/test.py b/tests/integration/test_storage_nats/test.py index 63dde8922a6..77db3008524 100644 --- a/tests/integration/test_storage_nats/test.py +++ b/tests/integration/test_storage_nats/test.py @@ -1,3 +1,10 @@ +import pytest + +# FIXME This test is too flaky +# https://github.com/ClickHouse/ClickHouse/issues/39185 + +pytestmark = pytest.mark.skip + import json import os.path as p import random @@ -9,7 +16,6 @@ from random import randrange import math import asyncio -import pytest from google.protobuf.internal.encoder import _VarintBytes from helpers.client import QueryRuntimeException from helpers.cluster import ClickHouseCluster, check_nats_is_available, nats_connect_ssl diff --git a/tests/integration/test_storage_postgresql/test.py b/tests/integration/test_storage_postgresql/test.py index a3ebbe97451..7cc350e0be2 100644 --- a/tests/integration/test_storage_postgresql/test.py +++ b/tests/integration/test_storage_postgresql/test.py @@ -693,6 +693,19 @@ def test_auto_close_connection(started_cluster): assert count == 2 +def test_datetime(started_cluster): + cursor = started_cluster.postgres_conn.cursor() + cursor.execute("drop table if exists test") + cursor.execute("create table test (u timestamp)") + + node1.query("drop database if exists pg") + node1.query("create database pg engine = PostgreSQL(postgres1)") + assert "DateTime64(6)" in node1.query("show create table pg.test") + node1.query("detach table pg.test") + node1.query("attach table pg.test") + assert "DateTime64(6)" in node1.query("show create table pg.test") + + if __name__ == "__main__": cluster.start() input("Cluster created, press any key to destroy...") diff --git a/tests/performance/line_as_string_parsing.xml b/tests/performance/line_as_string_parsing.xml new file mode 100644 index 00000000000..d9fa1d4fa6e --- /dev/null +++ b/tests/performance/line_as_string_parsing.xml @@ -0,0 +1,9 @@ + + +INSERT INTO FUNCTION file(test_line_as_string.tsv) SELECT randomString(1000) FROM numbers(1000000) SETTINGS engine_file_truncate_on_insert=1 + +SELECT * FROM file(test_line_as_string.tsv, LineAsString) FORMAT Null + +INSERT INTO FUNCTION file(test_line_as_string.tsv) SELECT * FROM numbers(0) SETTINGS engine_file_truncate_on_insert=1 + + diff --git a/tests/performance/url_hits.xml b/tests/performance/url_hits.xml index 4a07c38b83f..46b39f3a6e9 100644 --- a/tests/performance/url_hits.xml +++ b/tests/performance/url_hits.xml @@ -13,10 +13,14 @@ protocol domain + domainRFC domainWithoutWWW + domainWithoutWWWRFC topLevelDomain firstSignificantSubdomain + firstSignificantSubdomainRFC cutToFirstSignificantSubdomain + cutToFirstSignificantSubdomainRFC path pathFull queryString diff --git a/tests/queries/0_stateless/00232_format_readable_decimal_size.reference b/tests/queries/0_stateless/00232_format_readable_decimal_size.reference new file mode 100644 index 00000000000..2f2a0f39bab --- /dev/null +++ b/tests/queries/0_stateless/00232_format_readable_decimal_size.reference @@ -0,0 +1,70 @@ +1.00 B 1.00 B 1.00 B +2.72 B 2.00 B 2.00 B +7.39 B 7.00 B 7.00 B +20.09 B 20.00 B 20.00 B +54.60 B 54.00 B 54.00 B +148.41 B 148.00 B 148.00 B +403.43 B 403.00 B 403.00 B +1.10 KB 1.10 KB 1.10 KB +2.98 KB 2.98 KB 2.98 KB +8.10 KB 8.10 KB 8.10 KB +22.03 KB 22.03 KB 22.03 KB +59.87 KB 59.87 KB 59.87 KB +162.75 KB 162.75 KB 162.75 KB +442.41 KB 442.41 KB 442.41 KB +1.20 MB 1.20 MB 1.20 MB +3.27 MB 3.27 MB 3.27 MB +8.89 MB 8.89 MB 8.89 MB +24.15 MB 24.15 MB 24.15 MB +65.66 MB 65.66 MB 65.66 MB +178.48 MB 178.48 MB 178.48 MB +485.17 MB 485.17 MB 485.17 MB +1.32 GB 1.32 GB 1.32 GB +3.58 GB 3.58 GB 2.15 GB +9.74 GB 9.74 GB 2.15 GB +26.49 GB 26.49 GB 2.15 GB +72.00 GB 72.00 GB 2.15 GB +195.73 GB 195.73 GB 2.15 GB +532.05 GB 532.05 GB 2.15 GB +1.45 TB 1.45 TB 2.15 GB +3.93 TB 3.93 TB 2.15 GB +10.69 TB 10.69 TB 2.15 GB +29.05 TB 29.05 TB 2.15 GB +78.96 TB 78.96 TB 2.15 GB +214.64 TB 214.64 TB 2.15 GB +583.46 TB 583.46 TB 2.15 GB +1.59 PB 1.59 PB 2.15 GB +4.31 PB 4.31 PB 2.15 GB +11.72 PB 11.72 PB 2.15 GB +31.86 PB 31.86 PB 2.15 GB +86.59 PB 86.59 PB 2.15 GB +235.39 PB 235.39 PB 2.15 GB +639.84 PB 639.84 PB 2.15 GB +1.74 EB 1.74 EB 2.15 GB +4.73 EB 4.73 EB 2.15 GB +12.85 EB 12.85 EB 2.15 GB +34.93 EB 18.45 EB 2.15 GB +94.96 EB 18.45 EB 2.15 GB +258.13 EB 18.45 EB 2.15 GB +701.67 EB 18.45 EB 2.15 GB +1.91 ZB 18.45 EB 2.15 GB +5.18 ZB 18.45 EB 2.15 GB +14.09 ZB 18.45 EB 2.15 GB +38.31 ZB 18.45 EB 2.15 GB +104.14 ZB 18.45 EB 2.15 GB +283.08 ZB 18.45 EB 2.15 GB +769.48 ZB 18.45 EB 2.15 GB +2.09 YB 18.45 EB 2.15 GB +5.69 YB 18.45 EB 2.15 GB +15.46 YB 18.45 EB 2.15 GB +42.01 YB 18.45 EB 2.15 GB +114.20 YB 18.45 EB 2.15 GB +310.43 YB 18.45 EB 2.15 GB +843.84 YB 18.45 EB 2.15 GB +2293.78 YB 18.45 EB 2.15 GB +6235.15 YB 18.45 EB 2.15 GB +16948.89 YB 18.45 EB 2.15 GB +46071.87 YB 18.45 EB 2.15 GB +125236.32 YB 18.45 EB 2.15 GB +340427.60 YB 18.45 EB 2.15 GB +925378.17 YB 18.45 EB 2.15 GB diff --git a/tests/queries/0_stateless/00232_format_readable_decimal_size.sql b/tests/queries/0_stateless/00232_format_readable_decimal_size.sql new file mode 100644 index 00000000000..f8e1409ae05 --- /dev/null +++ b/tests/queries/0_stateless/00232_format_readable_decimal_size.sql @@ -0,0 +1,4 @@ +WITH round(exp(number), 6) AS x, x > 0xFFFFFFFFFFFFFFFF ? 0xFFFFFFFFFFFFFFFF : toUInt64(x) AS y, x > 0x7FFFFFFF ? 0x7FFFFFFF : toInt32(x) AS z +SELECT formatReadableDecimalSize(x), formatReadableDecimalSize(y), formatReadableDecimalSize(z) +FROM system.numbers +LIMIT 70; diff --git a/tests/queries/0_stateless/00396_uuid.reference b/tests/queries/0_stateless/00396_uuid.reference index d70322ec4c1..588f11cb466 100644 --- a/tests/queries/0_stateless/00396_uuid.reference +++ b/tests/queries/0_stateless/00396_uuid.reference @@ -6,3 +6,8 @@ 01234567-89ab-cdef-0123-456789abcdef 01234567-89ab-cdef-0123-456789abcdef 01234567-89ab-cdef-0123-456789abcdef 3f1ed72e-f7fe-4459-9cbe-95fe9298f845 1 +-- UUID variants -- +00112233445566778899AABBCCDDEEFF +33221100554477668899AABBCCDDEEFF +00112233-4455-6677-8899-aabbccddeeff +00112233-4455-6677-8899-aabbccddeeff diff --git a/tests/queries/0_stateless/00396_uuid.sql b/tests/queries/0_stateless/00396_uuid.sql index 9d8b48bddb0..4ad659e2464 100644 --- a/tests/queries/0_stateless/00396_uuid.sql +++ b/tests/queries/0_stateless/00396_uuid.sql @@ -11,3 +11,9 @@ with generateUUIDv4() as uuid, identity(lower(hex(reverse(reinterpretAsString(uuid))))) as str, reinterpretAsUUID(reverse(unhex(str))) as uuid2 select uuid = uuid2; + +select '-- UUID variants --'; +select hex(UUIDStringToNum('00112233-4455-6677-8899-aabbccddeeff', 1)); +select hex(UUIDStringToNum('00112233-4455-6677-8899-aabbccddeeff', 2)); +select UUIDNumToString(UUIDStringToNum('00112233-4455-6677-8899-aabbccddeeff', 1), 1); +select UUIDNumToString(UUIDStringToNum('00112233-4455-6677-8899-aabbccddeeff', 2), 2); diff --git a/tests/queries/0_stateless/00398_url_functions.reference b/tests/queries/0_stateless/00398_url_functions.reference index feba95fb1b3..39d740e55cd 100644 --- a/tests/queries/0_stateless/00398_url_functions.reference +++ b/tests/queries/0_stateless/00398_url_functions.reference @@ -8,6 +8,32 @@ http ====HOST==== www.example.com + + + + + + + + +www.example.com +127.0.0.1 +www.example.com +www.example.com +www.example.com +example.com +example.com +example.com +www.example.com +example.com +example.com +example.com +example.com +example.com +example.com + + + www.example.com 127.0.0.1 www.example.com @@ -98,8 +124,25 @@ example.com example.com com +example.com +example.com +example.com +example.com +example.com +example.com +example.com +example.com +example.com +com + ====CUT TO FIRST SIGNIFICANT SUBDOMAIN WITH WWW==== +www.com +example.com +example.com +example.com +example.com + www.com example.com example.com diff --git a/tests/queries/0_stateless/00398_url_functions.sql b/tests/queries/0_stateless/00398_url_functions.sql.j2 similarity index 72% rename from tests/queries/0_stateless/00398_url_functions.sql rename to tests/queries/0_stateless/00398_url_functions.sql.j2 index 66fe591bb58..dd7da2ce6ad 100644 --- a/tests/queries/0_stateless/00398_url_functions.sql +++ b/tests/queries/0_stateless/00398_url_functions.sql.j2 @@ -7,16 +7,28 @@ SELECT protocol('http://127.0.0.1:443/') AS Scheme; SELECT protocol('//127.0.0.1:443/') AS Scheme; SELECT '====HOST===='; -SELECT domain('http://paul@www.example.com:80/') AS Host; -SELECT domain('http:/paul/example/com') AS Host; -SELECT domain('http://www.example.com?q=4') AS Host; -SELECT domain('http://127.0.0.1:443/') AS Host; -SELECT domain('//www.example.com') AS Host; -SELECT domain('//paul@www.example.com') AS Host; -SELECT domain('www.example.com') as Host; -SELECT domain('example.com') as Host; -SELECT domainWithoutWWW('//paul@www.example.com') AS Host; -SELECT domainWithoutWWW('http://paul@www.example.com:80/') AS Host; +{% for suffix in ['', 'RFC'] -%} + +SELECT domain{{ suffix }}('http://paul@www.example.com:80/') AS Host; +SELECT domain{{ suffix }}('user:password@example.com:8080') AS Host; +SELECT domain{{ suffix }}('http://user:password@example.com:8080') AS Host; +SELECT domain{{ suffix }}('http://user:password@example.com:8080/path?query=value#fragment') AS Host; +SELECT domain{{ suffix }}('newuser:@example.com') AS Host; +SELECT domain{{ suffix }}('http://:pass@example.com') AS Host; +SELECT domain{{ suffix }}(':newpass@example.com') AS Host; +SELECT domain{{ suffix }}('http://user:pass@example@.com') AS Host; +SELECT domain{{ suffix }}('http://user:pass:example.com') AS Host; +SELECT domain{{ suffix }}('http:/paul/example/com') AS Host; +SELECT domain{{ suffix }}('http://www.example.com?q=4') AS Host; +SELECT domain{{ suffix }}('http://127.0.0.1:443/') AS Host; +SELECT domain{{ suffix }}('//www.example.com') AS Host; +SELECT domain{{ suffix }}('//paul@www.example.com') AS Host; +SELECT domain{{ suffix }}('www.example.com') as Host; +SELECT domain{{ suffix }}('example.com') as Host; +SELECT domainWithoutWWW{{ suffix }}('//paul@www.example.com') AS Host; +SELECT domainWithoutWWW{{ suffix }}('http://paul@www.example.com:80/') AS Host; + +{% endfor %} SELECT '====NETLOC===='; SELECT netloc('http://paul@www.example.com:80/') AS Netloc; @@ -95,25 +107,31 @@ SELECT decodeURLComponent(encodeURLComponent('http://paul@127.0.0.1/?query=hello SELECT decodeURLFormComponent(encodeURLFormComponent('http://paul@127.0.0.1/?query=hello world foo+bar#a=b')); SELECT '====CUT TO FIRST SIGNIFICANT SUBDOMAIN===='; -SELECT cutToFirstSignificantSubdomain('http://www.example.com'); -SELECT cutToFirstSignificantSubdomain('http://www.example.com:1234'); -SELECT cutToFirstSignificantSubdomain('http://www.example.com/a/b/c'); -SELECT cutToFirstSignificantSubdomain('http://www.example.com/a/b/c?a=b'); -SELECT cutToFirstSignificantSubdomain('http://www.example.com/a/b/c?a=b#d=f'); -SELECT cutToFirstSignificantSubdomain('http://paul@www.example.com/a/b/c?a=b#d=f'); -SELECT cutToFirstSignificantSubdomain('//paul@www.example.com/a/b/c?a=b#d=f'); -SELECT cutToFirstSignificantSubdomain('www.example.com'); -SELECT cutToFirstSignificantSubdomain('example.com'); -SELECT cutToFirstSignificantSubdomain('www.com'); -SELECT cutToFirstSignificantSubdomain('com'); + +{% for suffix in ['', 'RFC'] -%} +SELECT cutToFirstSignificantSubdomain{{ suffix }}('http://www.example.com'); +SELECT cutToFirstSignificantSubdomain{{ suffix }}('http://www.example.com:1234'); +SELECT cutToFirstSignificantSubdomain{{ suffix }}('http://www.example.com/a/b/c'); +SELECT cutToFirstSignificantSubdomain{{ suffix }}('http://www.example.com/a/b/c?a=b'); +SELECT cutToFirstSignificantSubdomain{{ suffix }}('http://www.example.com/a/b/c?a=b#d=f'); +SELECT cutToFirstSignificantSubdomain{{ suffix }}('http://paul@www.example.com/a/b/c?a=b#d=f'); +SELECT cutToFirstSignificantSubdomain{{ suffix }}('//paul@www.example.com/a/b/c?a=b#d=f'); +SELECT cutToFirstSignificantSubdomain{{ suffix }}('www.example.com'); +SELECT cutToFirstSignificantSubdomain{{ suffix }}('example.com'); +SELECT cutToFirstSignificantSubdomain{{ suffix }}('www.com'); +SELECT cutToFirstSignificantSubdomain{{ suffix }}('com'); +{% endfor %} SELECT '====CUT TO FIRST SIGNIFICANT SUBDOMAIN WITH WWW===='; -SELECT cutToFirstSignificantSubdomainWithWWW('http://com'); -SELECT cutToFirstSignificantSubdomainWithWWW('http://www.com'); -SELECT cutToFirstSignificantSubdomainWithWWW('http://www.example.com'); -SELECT cutToFirstSignificantSubdomainWithWWW('http://www.foo.example.com'); -SELECT cutToFirstSignificantSubdomainWithWWW('http://www.example.com:1'); -SELECT cutToFirstSignificantSubdomainWithWWW('http://www.example.com/'); + +{% for suffix in ['', 'RFC'] -%} +SELECT cutToFirstSignificantSubdomainWithWWW{{ suffix }}('http://com'); +SELECT cutToFirstSignificantSubdomainWithWWW{{ suffix }}('http://www.com'); +SELECT cutToFirstSignificantSubdomainWithWWW{{ suffix }}('http://www.example.com'); +SELECT cutToFirstSignificantSubdomainWithWWW{{ suffix }}('http://www.foo.example.com'); +SELECT cutToFirstSignificantSubdomainWithWWW{{ suffix }}('http://www.example.com:1'); +SELECT cutToFirstSignificantSubdomainWithWWW{{ suffix }}('http://www.example.com/'); +{% endfor %} SELECT '====CUT WWW===='; SELECT cutWWW('http://www.example.com'); diff --git a/tests/queries/0_stateless/00463_long_sessions_in_http_interface.reference b/tests/queries/0_stateless/00463_long_sessions_in_http_interface.reference index 53cdf1e9393..a14d334a483 100644 --- a/tests/queries/0_stateless/00463_long_sessions_in_http_interface.reference +++ b/tests/queries/0_stateless/00463_long_sessions_in_http_interface.reference @@ -1 +1,28 @@ -PASSED +Using non-existent session with the 'session_check' flag will throw exception: +1 +Using non-existent session without the 'session_check' flag will create a new session: +1 +1 +The 'session_timeout' parameter is checked for validity and for the maximum value: +1 +1 +1 +Valid cases are accepted: +1 +1 +1 +Sessions are local per user: +1 +Hello +World +And cannot be accessed for a non-existent user: +1 +The temporary tables created in a session are not accessible without entering this session: +1 +A session successfully expire after a timeout: +111 +A session successfully expire after a timeout and the session's temporary table shadows the permanent table: +HelloWorld +A session cannot be used by concurrent connections: +1 +1 diff --git a/tests/queries/0_stateless/00463_long_sessions_in_http_interface.sh b/tests/queries/0_stateless/00463_long_sessions_in_http_interface.sh index e9f486fbb73..89da84a5bdd 100755 --- a/tests/queries/0_stateless/00463_long_sessions_in_http_interface.sh +++ b/tests/queries/0_stateless/00463_long_sessions_in_http_interface.sh @@ -1,113 +1,87 @@ #!/usr/bin/env bash # Tags: long, no-parallel +# shellcheck disable=SC2015 CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -request() { - local url="$1" - local select="$2" - ${CLICKHOUSE_CURL} --silent "$url" --data "$select" -} +echo "Using non-existent session with the 'session_check' flag will throw exception:" +${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&session_id=nonexistent&session_check=1" --data-binary "SELECT 1" | grep -c -F 'Session not found' -create_temporary_table() { - local url="$1" - request "$url" "CREATE TEMPORARY TABLE temp (x String)" - request "$url" "INSERT INTO temp VALUES ('Hello'), ('World')" -} +echo "Using non-existent session without the 'session_check' flag will create a new session:" +${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&session_id=${CLICKHOUSE_DATABASE}_1" --data-binary "SELECT 1" +${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&session_id=${CLICKHOUSE_DATABASE}_1&session_check=0" --data-binary "SELECT 1" +echo "The 'session_timeout' parameter is checked for validity and for the maximum value:" +${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&session_id=${CLICKHOUSE_DATABASE}_2&session_timeout=string" --data-binary "SELECT 1" | grep -c -F 'Invalid session timeout' +${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&session_id=${CLICKHOUSE_DATABASE}_2&session_timeout=3601" --data-binary "SELECT 1" | grep -c -F 'Maximum session timeout' +${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&session_id=${CLICKHOUSE_DATABASE}_2&session_timeout=-1" --data-binary "SELECT 1" | grep -c -F 'Invalid session timeout' -check() { - local url="$1" - local select="$2" - local output="$3" - local expected_result="$4" - local message="$5" - result=$(request "$url" "$select" | grep --count "$output") - if [ "$result" -ne "$expected_result" ]; then - echo "FAILED: $message" - exit 1 - fi -} +echo "Valid cases are accepted:" +${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&session_id=${CLICKHOUSE_DATABASE}_3&session_timeout=0" --data-binary "SELECT 1" +${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&session_id=${CLICKHOUSE_DATABASE}_4&session_timeout=3600" --data-binary "SELECT 1" +${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&session_id=${CLICKHOUSE_DATABASE}_5&session_timeout=60" --data-binary "SELECT 1" +echo "Sessions are local per user:" +${CLICKHOUSE_CLIENT} --multiquery --query "DROP USER IF EXISTS test_00463; CREATE USER test_00463; GRANT ALL ON *.* TO test_00463;" -address=${CLICKHOUSE_HOST} -port=${CLICKHOUSE_PORT_HTTP} -url="${CLICKHOUSE_PORT_HTTP_PROTO}://$address:$port/" -session="?session_id=test_$$" # use PID for session ID -select="SELECT * FROM system.settings WHERE name = 'max_rows_to_read'" -select_from_temporary_table="SELECT * FROM temp ORDER BY x" -select_from_non_existent_table="SELECT * FROM no_such_table ORDER BY x" +${CLICKHOUSE_CURL} -sS -X POST "${CLICKHOUSE_URL}&session_id=${CLICKHOUSE_DATABASE}_6&session_timeout=600" --data-binary "CREATE TEMPORARY TABLE t (s String)" +${CLICKHOUSE_CURL} -sS -X POST "${CLICKHOUSE_URL}&session_id=${CLICKHOUSE_DATABASE}_6" --data-binary "INSERT INTO t VALUES ('Hello')" +${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&user=test_00463&session_id=${CLICKHOUSE_DATABASE}_6&session_check=1" --data-binary "SELECT 1" | grep -c -F 'Session not found' +${CLICKHOUSE_CURL} -sS -X POST "${CLICKHOUSE_URL}&user=test_00463&session_id=${CLICKHOUSE_DATABASE}_6&session_timeout=600" --data-binary "CREATE TEMPORARY TABLE t (s String)" +${CLICKHOUSE_CURL} -sS -X POST "${CLICKHOUSE_URL}&user=test_00463&session_id=${CLICKHOUSE_DATABASE}_6" --data-binary "INSERT INTO t VALUES ('World')" -check "$url?session_id=no_such_session_$$&session_check=1" "$select" "Exception.*Session not found" 1 "session_check=1 does not work." -check "$url$session&session_check=0" "$select" "Exception" 0 "session_check=0 does not work." +${CLICKHOUSE_CURL} -sS -X POST "${CLICKHOUSE_URL}&session_id=${CLICKHOUSE_DATABASE}_6" --data-binary "SELECT * FROM t" +${CLICKHOUSE_CURL} -sS -X POST "${CLICKHOUSE_URL}&user=test_00463&session_id=${CLICKHOUSE_DATABASE}_6" --data-binary "SELECT * FROM t" -request "$url""$session" "SET max_rows_to_read=7777777" +${CLICKHOUSE_CLIENT} --multiquery --query "DROP USER test_00463"; -check "$url$session&session_timeout=string" "$select" "Exception.*Invalid session timeout" 1 "Non-numeric value accepted as a timeout." -check "$url$session&session_timeout=3601" "$select" "Exception.*Maximum session timeout*" 1 "More then 3600 seconds accepted as a timeout." -check "$url$session&session_timeout=-1" "$select" "Exception.*Invalid session timeout" 1 "Negative timeout accepted." -check "$url$session&session_timeout=0" "$select" "Exception" 0 "Zero timeout not accepted." -check "$url$session&session_timeout=3600" "$select" "Exception" 0 "3600 second timeout not accepted." -check "$url$session&session_timeout=60" "$select" "Exception" 0 "60 second timeout not accepted." +echo "And cannot be accessed for a non-existent user:" +${CLICKHOUSE_CURL} -sS -X POST "${CLICKHOUSE_URL}&user=test_00463&session_id=${CLICKHOUSE_DATABASE}_6" --data-binary "SELECT * FROM t" | grep -c -F 'Exception' -check "$url""$session" "$select" "7777777" 1 "Failed to reuse session." -# Workaround here -# TODO: move the test to integration test or add readonly user to test environment -if [[ -z $(request "$url?user=readonly" "SELECT ''") ]]; then - # We have readonly user - check "$url$session&user=readonly&session_check=1" "$select" "Exception.*Session not found" 1 "Session is accessable for another user." -else - check "$url$session&user=readonly&session_check=1" "$select" "Exception.*Unknown user*" 1 "Session is accessable for unknown user." -fi +echo "The temporary tables created in a session are not accessible without entering this session:" +${CLICKHOUSE_CURL} -sS -X POST "${CLICKHOUSE_URL}" --data-binary "SELECT * FROM t" | grep -c -F 'Exception' -create_temporary_table "$url""$session" -check "$url""$session" "$select_from_temporary_table" "Hello" 1 "Failed to reuse a temporary table for session." - -check "$url?session_id=another_session_$$" "$select_from_temporary_table" "Exception.*Table .* doesn't exist." 1 "Temporary table is visible for another table." - - -( ( -cat </dev/null 2>/dev/null) & -sleep 1 -check "$url""$session" "$select" "Exception.*Session is locked" 1 "Double access to the same session." - - -session="?session_id=test_timeout_$$" - -create_temporary_table "$url$session&session_timeout=1" -check "$url$session&session_timeout=1" "$select_from_temporary_table" "Hello" 1 "Failed to reuse a temporary table for session." -sleep 3 -check "$url$session&session_check=1" "$select" "Exception.*Session not found" 1 "Session did not expire on time." - -create_temporary_table "$url$session&session_timeout=2" -for _ in $(seq 1 3); do - check "$url$session&session_timeout=2" "$select_from_temporary_table" "Hello" 1 "Session expired too early." - sleep 1 +echo "A session successfully expire after a timeout:" +# An infinite loop is required to make the test reliable. We will check that the timeout corresponds to the observed time at least once +while true +do + ( + ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&session_id=${CLICKHOUSE_DATABASE}_7&session_timeout=1" --data-binary "SELECT 1" + ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&session_id=${CLICKHOUSE_DATABASE}_7&session_check=1" --data-binary "SELECT 1" + sleep 3 + ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&session_id=${CLICKHOUSE_DATABASE}_7&session_check=1" --data-binary "SELECT 1" | grep -c -F 'Session not found' + ) | tr -d '\n' | grep -F '111' && break || sleep 1 done -sleep 3 -check "$url$session&session_check=1" "$select" "Exception.*Session not found" 1 "Session did not expire on time." -create_temporary_table "$url$session&session_timeout=2" -for _ in $(seq 1 5); do - check "$url$session&session_timeout=2" "$select_from_non_existent_table" "Exception.*Table .* doesn't exist." 1 "Session expired too early." - sleep 1 +echo "A session successfully expire after a timeout and the session's temporary table shadows the permanent table:" +# An infinite loop is required to make the test reliable. We will check that the timeout corresponds to the observed time at least once +${CLICKHOUSE_CLIENT} --multiquery --query "DROP TABLE IF EXISTS t; CREATE TABLE t (s String) ENGINE = Memory; INSERT INTO t VALUES ('World');" +while true +do + ( + ${CLICKHOUSE_CURL} -X POST -sS "${CLICKHOUSE_URL}&session_id=${CLICKHOUSE_DATABASE}_8&session_timeout=1" --data-binary "CREATE TEMPORARY TABLE t (s String)" + ${CLICKHOUSE_CURL} -X POST -sS "${CLICKHOUSE_URL}&session_id=${CLICKHOUSE_DATABASE}_8" --data-binary "INSERT INTO t VALUES ('Hello')" + ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&session_id=${CLICKHOUSE_DATABASE}_8" --data-binary "SELECT * FROM t" + sleep 3 + ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&session_id=${CLICKHOUSE_DATABASE}_8" --data-binary "SELECT * FROM t" + ) | tr -d '\n' | grep -F 'HelloWorld' && break || sleep 1 done -check "$url$session&session_timeout=2" "$select_from_temporary_table" "Hello" 1 "Session expired too early. Failed to update timeout in case of exceptions." -sleep 4 -check "$url$session&session_check=1" "$select" "Exception.*Session not found" 1 "Session did not expire on time." +${CLICKHOUSE_CLIENT} --multiquery --query "DROP TABLE t" +echo "A session cannot be used by concurrent connections:" -echo "PASSED" +${CLICKHOUSE_CURL} -sS -X POST "${CLICKHOUSE_URL}&session_id=${CLICKHOUSE_DATABASE}_9&query_id=${CLICKHOUSE_DATABASE}_9" --data-binary "SELECT count() FROM system.numbers" >/dev/null & + +# An infinite loop is required to make the test reliable. We will ensure that at least once the query on the line above has started before this check +while true +do + ${CLICKHOUSE_CLIENT} --query "SELECT count() > 0 FROM system.processes WHERE query_id = '${CLICKHOUSE_DATABASE}_9'" | grep -F '1' && break || sleep 1 +done + +${CLICKHOUSE_CURL} -sS -X POST "${CLICKHOUSE_URL}&session_id=${CLICKHOUSE_DATABASE}_9" --data-binary "SELECT 1" | grep -c -F 'Session is locked' +${CLICKHOUSE_CLIENT} --multiquery --query "KILL QUERY WHERE query_id = '${CLICKHOUSE_DATABASE}_9' SYNC FORMAT Null"; +wait diff --git a/tests/queries/0_stateless/00653_running_difference.reference b/tests/queries/0_stateless/00653_running_difference.reference index 7511beb2418..e2833e0bb3e 100644 --- a/tests/queries/0_stateless/00653_running_difference.reference +++ b/tests/queries/0_stateless/00653_running_difference.reference @@ -19,3 +19,27 @@ \N \N 2 +--Date Difference-- +\N +\N +0 +364 +\N +\N +14466 +- +\N +\N +0 +11101 +22017 +\N +105432 +- +\N +\N +0 +3149094509 +\N +\N +1130059331 diff --git a/tests/queries/0_stateless/00653_running_difference.sql b/tests/queries/0_stateless/00653_running_difference.sql index fd4dfb219fd..f2b4a7300b2 100644 --- a/tests/queries/0_stateless/00653_running_difference.sql +++ b/tests/queries/0_stateless/00653_running_difference.sql @@ -5,4 +5,9 @@ select '-'; select runningDifference(x) from (select arrayJoin([Null, 1]) as x); select '-'; select runningDifference(x) from (select arrayJoin([Null, Null, 1, 3, Null, Null, 5]) as x); - +select '--Date Difference--'; +select runningDifference(x) from (select arrayJoin([Null, Null, toDate('1970-1-1'), toDate('1970-12-31'), Null, Null, toDate('2010-8-9')]) as x); +select '-'; +select runningDifference(x) from (select arrayJoin([Null, Null, toDate32('1900-1-1'), toDate32('1930-5-25'), toDate('1990-9-4'), Null, toDate32('2279-5-4')]) as x); +select '-'; +select runningDifference(x) from (select arrayJoin([Null, Null, toDateTime('1970-06-28 23:48:12', 'Asia/Istanbul'), toDateTime('2070-04-12 21:16:41', 'Asia/Istanbul'), Null, Null, toDateTime('2106-02-03 06:38:52', 'Asia/Istanbul')]) as x); diff --git a/tests/queries/0_stateless/00700_to_decimal_or_something.reference b/tests/queries/0_stateless/00700_to_decimal_or_something.reference index 89ded7bd6d4..dec36ed5df5 100644 --- a/tests/queries/0_stateless/00700_to_decimal_or_something.reference +++ b/tests/queries/0_stateless/00700_to_decimal_or_something.reference @@ -1,5 +1,5 @@ 1.1 1.1 1.1 -0 +1 0 0.42 0 0.42 0 0.42 @@ -13,7 +13,7 @@ 0 ---- 1.1 1.1 1.1 -\N +1 \N -0.42 \N -0.42 \N -0.42 diff --git a/tests/queries/0_stateless/00705_drop_create_merge_tree.reference b/tests/queries/0_stateless/00705_drop_create_merge_tree.reference index 8b137891791..e69de29bb2d 100644 --- a/tests/queries/0_stateless/00705_drop_create_merge_tree.reference +++ b/tests/queries/0_stateless/00705_drop_create_merge_tree.reference @@ -1 +0,0 @@ - diff --git a/tests/queries/0_stateless/00705_drop_create_merge_tree.sh b/tests/queries/0_stateless/00705_drop_create_merge_tree.sh index 146d6e54c0b..d7754091290 100755 --- a/tests/queries/0_stateless/00705_drop_create_merge_tree.sh +++ b/tests/queries/0_stateless/00705_drop_create_merge_tree.sh @@ -1,39 +1,12 @@ #!/usr/bin/env bash # Tags: no-fasttest -set -e - CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -function stress() -{ - # We set up a signal handler to make sure to wait for all queries to be finished before ending - CONTINUE=true - handle_interruption() - { - CONTINUE=false - } - trap handle_interruption INT - - while $CONTINUE; do - ${CLICKHOUSE_CLIENT} --query "CREATE TABLE IF NOT EXISTS table (x UInt8) ENGINE = MergeTree ORDER BY tuple()" 2>/dev/null - ${CLICKHOUSE_CLIENT} --query "DROP TABLE table" 2>/dev/null - done - - trap - INT -} - -# https://stackoverflow.com/questions/9954794/execute-a-shell-function-with-timeout -export -f stress - -for _ in {1..5}; do - # Ten seconds are just barely enough to reproduce the issue in most of runs. - timeout -s INT 10 bash -c stress & -done - +yes 'CREATE TABLE IF NOT EXISTS table (x UInt8) ENGINE = MergeTree ORDER BY tuple();' | head -n 1000 | $CLICKHOUSE_CLIENT --ignore-error -nm 2>/dev/null & +yes 'DROP TABLE table;' | head -n 1000 | $CLICKHOUSE_CLIENT --ignore-error -nm 2>/dev/null & wait -echo -${CLICKHOUSE_CLIENT} --query "DROP TABLE IF EXISTS table"; +${CLICKHOUSE_CLIENT} --query "DROP TABLE IF EXISTS table" diff --git a/tests/queries/0_stateless/00718_format_datetime.reference b/tests/queries/0_stateless/00718_format_datetime.reference index 4f12a46d7c0..bc98dd59d5f 100644 --- a/tests/queries/0_stateless/00718_format_datetime.reference +++ b/tests/queries/0_stateless/00718_format_datetime.reference @@ -1,33 +1,34 @@ -20 +20 20 +02 02 +01/02/18 01/02/18 + 2 2 +2018-01-02 2018-01-02 +22 00 02 -01/02/18 - 2 -2018-01-02 -22 -02 -10 +10 12 11 12 -001 -366 -01 -33 -\n -AM +001 001 +366 366 +01 01 +33 00 +\n \n +AM AM AM PM -22:33 -44 -\t -22:33:44 -1 7 -01 01 53 52 -1 0 -18 -2018 -% -no formatting pattern +22:33 00:00 +44 00 +\t \t +22:33:44 00:00:00 +1 7 1 7 +01 01 53 52 01 01 53 52 +1 0 1 0 +18 18 +2018 2018 +% % +no formatting pattern no formatting pattern 2018-01-01 00:00:00 +1927-01-01 00:00:00 2018-01-01 01:00:00 2018-01-01 04:00:00 +0000 -1100 diff --git a/tests/queries/0_stateless/00718_format_datetime.sql b/tests/queries/0_stateless/00718_format_datetime.sql index 7ed1f0abea4..deb5fb96c6c 100644 --- a/tests/queries/0_stateless/00718_format_datetime.sql +++ b/tests/queries/0_stateless/00718_format_datetime.sql @@ -8,38 +8,44 @@ SELECT formatDateTime(now(), 'unescaped %'); -- { serverError 36 } SELECT formatDateTime(toDateTime('2018-01-02 22:33:44'), '%U'); -- { serverError 48 } SELECT formatDateTime(toDateTime('2018-01-02 22:33:44'), '%W'); -- { serverError 48 } -SELECT formatDateTime(toDateTime('2018-01-02 22:33:44'), '%C'); -SELECT formatDateTime(toDateTime('2018-01-02 22:33:44'), '%d'); -SELECT formatDateTime(toDateTime('2018-01-02 22:33:44'), '%D'); -SELECT formatDateTime(toDateTime('2018-01-02 22:33:44'), '%e'); -SELECT formatDateTime(toDateTime('2018-01-02 22:33:44'), '%F'); -SELECT formatDateTime(toDateTime('2018-01-02 22:33:44'), '%H'); +SELECT formatDateTime(toDateTime('2018-01-02 22:33:44'), '%C'), formatDateTime(toDate32('2018-01-02'), '%C'); +SELECT formatDateTime(toDateTime('2018-01-02 22:33:44'), '%d'), formatDateTime(toDate32('2018-01-02'), '%d'); +SELECT formatDateTime(toDateTime('2018-01-02 22:33:44'), '%D'), formatDateTime(toDate32('2018-01-02'), '%D'); +SELECT formatDateTime(toDateTime('2018-01-02 22:33:44'), '%e'), formatDateTime(toDate32('2018-01-02'), '%e'); +SELECT formatDateTime(toDateTime('2018-01-02 22:33:44'), '%F'), formatDateTime(toDate32('2018-01-02'), '%F'); +SELECT formatDateTime(toDateTime('2018-01-02 22:33:44'), '%H'), formatDateTime(toDate32('2018-01-02'), '%H'); SELECT formatDateTime(toDateTime('2018-01-02 02:33:44'), '%H'); -SELECT formatDateTime(toDateTime('2018-01-02 22:33:44'), '%I'); +SELECT formatDateTime(toDateTime('2018-01-02 22:33:44'), '%I'), formatDateTime(toDate32('2018-01-02'), '%I'); SELECT formatDateTime(toDateTime('2018-01-02 11:33:44'), '%I'); SELECT formatDateTime(toDateTime('2018-01-02 00:33:44'), '%I'); -SELECT formatDateTime(toDateTime('2018-01-01 00:33:44'), '%j'); -SELECT formatDateTime(toDateTime('2000-12-31 00:33:44'), '%j'); -SELECT formatDateTime(toDateTime('2018-01-02 22:33:44'), '%m'); -SELECT formatDateTime(toDateTime('2018-01-02 22:33:44'), '%M'); -SELECT formatDateTime(toDateTime('2018-01-02 22:33:44'), '%n'); -SELECT formatDateTime(toDateTime('2018-01-02 00:33:44'), '%p'); +SELECT formatDateTime(toDateTime('2018-01-01 00:33:44'), '%j'), formatDateTime(toDate32('2018-01-01'), '%j'); +SELECT formatDateTime(toDateTime('2000-12-31 00:33:44'), '%j'), formatDateTime(toDate32('2000-12-31'), '%j'); +SELECT formatDateTime(toDateTime('2018-01-02 22:33:44'), '%m'), formatDateTime(toDate32('2018-01-02'), '%m'); +SELECT formatDateTime(toDateTime('2018-01-02 22:33:44'), '%M'), formatDateTime(toDate32('2018-01-02'), '%M'); +SELECT formatDateTime(toDateTime('2018-01-02 22:33:44'), '%n'), formatDateTime(toDate32('2018-01-02'), '%n'); +SELECT formatDateTime(toDateTime('2018-01-02 00:33:44'), '%p'), formatDateTime(toDateTime('2018-01-02'), '%p'); SELECT formatDateTime(toDateTime('2018-01-02 11:33:44'), '%p'); SELECT formatDateTime(toDateTime('2018-01-02 12:33:44'), '%p'); -SELECT formatDateTime(toDateTime('2018-01-02 22:33:44'), '%R'); -SELECT formatDateTime(toDateTime('2018-01-02 22:33:44'), '%S'); -SELECT formatDateTime(toDateTime('2018-01-02 22:33:44'), '%t'); -SELECT formatDateTime(toDateTime('2018-01-02 22:33:44'), '%T'); -SELECT formatDateTime(toDateTime('2018-01-01 22:33:44'), '%u'), formatDateTime(toDateTime('2018-01-07 22:33:44'), '%u'); +SELECT formatDateTime(toDateTime('2018-01-02 22:33:44'), '%R'), formatDateTime(toDate32('2018-01-02'), '%R'); +SELECT formatDateTime(toDateTime('2018-01-02 22:33:44'), '%S'), formatDateTime(toDate32('2018-01-02'), '%S'); +SELECT formatDateTime(toDateTime('2018-01-02 22:33:44'), '%t'), formatDateTime(toDate32('2018-01-02'), '%t'); +SELECT formatDateTime(toDateTime('2018-01-02 22:33:44'), '%T'), formatDateTime(toDate32('2018-01-02'), '%T'); +SELECT formatDateTime(toDateTime('2018-01-01 22:33:44'), '%u'), formatDateTime(toDateTime('2018-01-07 22:33:44'), '%u'), + formatDateTime(toDate32('2018-01-01'), '%u'), formatDateTime(toDate32('2018-01-07'), '%u'); SELECT formatDateTime(toDateTime('1996-01-01 22:33:44'), '%V'), formatDateTime(toDateTime('1996-12-31 22:33:44'), '%V'), - formatDateTime(toDateTime('1999-01-01 22:33:44'), '%V'), formatDateTime(toDateTime('1999-12-31 22:33:44'), '%V'); -SELECT formatDateTime(toDateTime('2018-01-01 22:33:44'), '%w'), formatDateTime(toDateTime('2018-01-07 22:33:44'), '%w'); -SELECT formatDateTime(toDateTime('2018-01-02 22:33:44'), '%y'); -SELECT formatDateTime(toDateTime('2018-01-02 22:33:44'), '%Y'); -SELECT formatDateTime(toDateTime('2018-01-02 22:33:44'), '%%'); -SELECT formatDateTime(toDateTime('2018-01-02 22:33:44'), 'no formatting pattern'); + formatDateTime(toDateTime('1999-01-01 22:33:44'), '%V'), formatDateTime(toDateTime('1999-12-31 22:33:44'), '%V'), + formatDateTime(toDate32('1996-01-01'), '%V'), formatDateTime(toDate32('1996-12-31'), '%V'), + formatDateTime(toDate32('1999-01-01'), '%V'), formatDateTime(toDate32('1999-12-31'), '%V'); +SELECT formatDateTime(toDateTime('2018-01-01 22:33:44'), '%w'), formatDateTime(toDateTime('2018-01-07 22:33:44'), '%w'), + formatDateTime(toDate32('2018-01-01'), '%w'), formatDateTime(toDate32('2018-01-07'), '%w'); +SELECT formatDateTime(toDateTime('2018-01-02 22:33:44'), '%y'), formatDateTime(toDate32('2018-01-02'), '%y'); +SELECT formatDateTime(toDateTime('2018-01-02 22:33:44'), '%Y'), formatDateTime(toDate32('2018-01-02'), '%Y'); +SELECT formatDateTime(toDateTime('2018-01-02 22:33:44'), '%%'), formatDateTime(toDate32('2018-01-02'), '%%'); +SELECT formatDateTime(toDateTime('2018-01-02 22:33:44'), 'no formatting pattern'), formatDateTime(toDate32('2018-01-02'), 'no formatting pattern'); SELECT formatDateTime(toDate('2018-01-01'), '%F %T'); +SELECT formatDateTime(toDate32('1927-01-01'), '%F %T'); + SELECT formatDateTime(toDateTime('2018-01-01 01:00:00', 'UTC'), '%F %T', 'UTC'), formatDateTime(toDateTime('2018-01-01 01:00:00', 'UTC'), '%F %T', 'Asia/Istanbul'); diff --git a/tests/queries/0_stateless/00732_base64_functions.reference b/tests/queries/0_stateless/00732_base64_functions.reference index b22ae4e7e24..5dc1ba03b89 100644 --- a/tests/queries/0_stateless/00732_base64_functions.reference +++ b/tests/queries/0_stateless/00732_base64_functions.reference @@ -14,3 +14,5 @@ fooba foobar 1 1 +Zm9v +foo diff --git a/tests/queries/0_stateless/00732_base64_functions.sql b/tests/queries/0_stateless/00732_base64_functions.sql index 4ed86e20913..adba0cdebbd 100644 --- a/tests/queries/0_stateless/00732_base64_functions.sql +++ b/tests/queries/0_stateless/00732_base64_functions.sql @@ -14,3 +14,6 @@ SELECT base64Decode(val, 'excess argument') FROM (select arrayJoin(['', 'Zg==', SELECT tryBase64Decode('Zm9vYmF=Zm9v', 'excess argument'); -- { serverError 42 } SELECT base64Decode('Zm9vYmF=Zm9v'); -- { serverError 117 } + +select base64Encode(toFixedString('foo', 3)); +select base64Decode(toFixedString('Zm9v', 4)); diff --git a/tests/queries/0_stateless/00753_system_columns_and_system_tables_long.reference b/tests/queries/0_stateless/00753_system_columns_and_system_tables_long.reference index dba46e48e43..58f8b7abfb3 100644 --- a/tests/queries/0_stateless/00753_system_columns_and_system_tables_long.reference +++ b/tests/queries/0_stateless/00753_system_columns_and_system_tables_long.reference @@ -35,7 +35,7 @@ Check total_bytes/total_rows for StripeLog 113 1 Check total_bytes/total_rows for Memory 0 0 -64 1 +256 1 Check total_bytes/total_rows for Buffer 0 0 256 50 diff --git a/tests/queries/0_stateless/00900_long_parquet.reference b/tests/queries/0_stateless/00900_long_parquet.reference index 4dfc726145e..bbdad7243bd 100644 --- a/tests/queries/0_stateless/00900_long_parquet.reference +++ b/tests/queries/0_stateless/00900_long_parquet.reference @@ -44,12 +44,12 @@ converted: diff: dest: 79 81 82 83 84 85 86 87 88 89 str01\0\0\0\0\0\0\0\0\0\0 fstr1\0\0\0\0\0\0\0\0\0\0 2003-03-04 2004-05-06 00:00:00 2004-05-06 07:08:09.012000000 -80 81 82 83 84 85 86 87 88 89 str02 fstr2\0\0\0\0\0\0\0\0\0\0 2149-06-06 2006-08-09 10:11:12 2006-08-09 10:11:12.345000000 +80 81 82 83 84 85 86 87 88 89 str02 fstr2\0\0\0\0\0\0\0\0\0\0 2005-03-04 2006-08-09 10:11:12 2006-08-09 10:11:12.345000000 min: --128 0 0 0 0 0 0 0 -1 -1 string-1\0\0\0\0\0\0\0 fixedstring-1\0\0 2003-04-05 2149-06-06 2003-02-03 04:05:06.789000000 --108 108 8 92 -8 108 -40 -116 -1 -1 string-0\0\0\0\0\0\0\0 fixedstring\0\0\0\0 2001-02-03 2149-06-06 2002-02-03 04:05:06.789000000 +-128 0 0 0 0 0 0 0 -1 -1 string-1\0\0\0\0\0\0\0 fixedstring-1\0\0 2003-04-05 2003-02-03 2003-02-03 04:05:06.789000000 +-108 108 8 92 -8 108 -40 -116 -1 -1 string-0\0\0\0\0\0\0\0 fixedstring\0\0\0\0 2001-02-03 2002-02-03 2002-02-03 04:05:06.789000000 79 81 82 83 84 85 86 87 88 89 str01\0\0\0\0\0\0\0\0\0\0 fstr1\0\0\0\0\0\0\0\0\0\0 2003-03-04 2004-05-06 2004-05-06 07:08:09.012000000 -127 -1 -1 -1 -1 -1 -1 -1 -1 -1 string-2\0\0\0\0\0\0\0 fixedstring-2\0\0 2004-06-07 2149-06-06 2004-02-03 04:05:06.789000000 +127 -1 -1 -1 -1 -1 -1 -1 -1 -1 string-2\0\0\0\0\0\0\0 fixedstring-2\0\0 2004-06-07 2004-02-03 2004-02-03 04:05:06.789000000 max: -128 0 -32768 0 -2147483648 0 -9223372036854775808 0 -1 -1 string-1 fixedstring-1\0\0 2003-04-05 00:00:00 2003-02-03 04:05:06 2003-02-03 04:05:06.789000000 -108 108 -1016 1116 -1032 1132 -1064 1164 -1 -1 string-0 fixedstring\0\0\0\0 2001-02-03 00:00:00 2002-02-03 04:05:06 2002-02-03 04:05:06.789000000 diff --git a/tests/queries/0_stateless/00918_json_functions.reference b/tests/queries/0_stateless/00918_json_functions.reference index 8e6fc3914e0..fc03457c677 100644 --- a/tests/queries/0_stateless/00918_json_functions.reference +++ b/tests/queries/0_stateless/00918_json_functions.reference @@ -61,11 +61,47 @@ Friday (1,'417ddc5d-e556-4d27-95dd-a34d84e46a50') hello (3333.6,'test') +(3333.6,'test') +(3333.6333333333,'test') (3333.6333333333,'test') 123456.1234 Decimal(20, 4) +123456.1234 Decimal(20, 4) +123456789012345.12 Decimal(30, 4) +(1234567890.1234567890123456789,'test') Tuple(a Decimal(35, 20), b LowCardinality(String)) +(1234567890.12345678901234567890123456789,'test') Tuple(a Decimal(45, 30), b LowCardinality(String)) 123456789012345.1136 123456789012345.1136 1234567890.12345677879616925706 (1234567890.12345677879616925706,'test') 1234567890.123456695758468374595199311875 (1234567890.123456695758468374595199311875,'test') +-1234567890 Int32 +1234567890 UInt32 +-1234567890123456789 Int64 +1234567890123456789 UInt64 +-1234567890123456789 Int128 +1234567890123456789 UInt128 +-1234567890123456789 Int256 +1234567890123456789 UInt256 +-123456789 Int32 +123456789 UInt32 +-123456789012 Int64 +123456789012 UInt64 +-123456789012 Int128 +123456789012 UInt128 +-123456789012 Int256 +123456789012 UInt256 +-123456789 Int32 +123456789 UInt32 +-1234567890123456789 Int64 +1234567890123456789 UInt64 +-12345678901234567890123456789012345678 Int128 +12345678901234567890123456789012345678 UInt128 +-11345678901234567890123456789012345678901234567890123456789012345678901234567 Int256 +11345678901234567890123456789012345678901234567890123456789012345678901234567 UInt256 +0 Int32 +0 UInt32 +0 Int64 +0 UInt64 +false Bool +true Bool --JSONExtractKeysAndValues-- [('a','hello'),('b','[-100,200,300]')] [('b',[-100,200,300])] @@ -217,3 +253,4 @@ e u v --show error: type should be const string +--show error: index type should be integer diff --git a/tests/queries/0_stateless/00918_json_functions.sql b/tests/queries/0_stateless/00918_json_functions.sql index 87682587c8e..3105994ce20 100644 --- a/tests/queries/0_stateless/00918_json_functions.sql +++ b/tests/queries/0_stateless/00918_json_functions.sql @@ -72,11 +72,47 @@ SELECT JSONExtract('{"a":123456, "b":3.55}', 'Tuple(a LowCardinality(Int32), b D SELECT JSONExtract('{"a":1, "b":"417ddc5d-e556-4d27-95dd-a34d84e46a50"}', 'Tuple(a Int8, b UUID)'); SELECT JSONExtract('{"a": "hello", "b": [-100, 200.0, 300]}', 'a', 'LowCardinality(String)'); SELECT JSONExtract('{"a":3333.6333333333333333333333, "b":"test"}', 'Tuple(a Decimal(10,1), b LowCardinality(String))'); +SELECT JSONExtract('{"a":"3333.6333333333333333333333", "b":"test"}', 'Tuple(a Decimal(10,1), b LowCardinality(String))'); SELECT JSONExtract('{"a":3333.6333333333333333333333, "b":"test"}', 'Tuple(a Decimal(20,10), b LowCardinality(String))'); +SELECT JSONExtract('{"a":"3333.6333333333333333333333", "b":"test"}', 'Tuple(a Decimal(20,10), b LowCardinality(String))'); SELECT JSONExtract('{"a":123456.123456}', 'a', 'Decimal(20, 4)') as a, toTypeName(a); +SELECT JSONExtract('{"a":"123456.123456"}', 'a', 'Decimal(20, 4)') as a, toTypeName(a); +SELECT JSONExtract('{"a":"123456789012345.12"}', 'a', 'Decimal(30, 4)') as a, toTypeName(a); +SELECT JSONExtract('{"a":"1234567890.12345678901234567890", "b":"test"}', 'Tuple(a Decimal(35,20), b LowCardinality(String))') as a, toTypeName(a); +SELECT JSONExtract('{"a":"1234567890.123456789012345678901234567890", "b":"test"}', 'Tuple(a Decimal(45,30), b LowCardinality(String))') as a, toTypeName(a); SELECT toDecimal64(123456789012345.12, 4), JSONExtract('{"a":123456789012345.12}', 'a', 'Decimal(30, 4)'); SELECT toDecimal128(1234567890.12345678901234567890, 20), JSONExtract('{"a":1234567890.12345678901234567890, "b":"test"}', 'Tuple(a Decimal(35,20), b LowCardinality(String))'); SELECT toDecimal256(1234567890.123456789012345678901234567890, 30), JSONExtract('{"a":1234567890.12345678901234567890, "b":"test"}', 'Tuple(a Decimal(45,30), b LowCardinality(String))'); +SELECT JSONExtract('{"a":-1234567890}', 'a', 'Int32') as a, toTypeName(a); +SELECT JSONExtract('{"a":1234567890}', 'a', 'UInt32') as a, toTypeName(a); +SELECT JSONExtract('{"a":-1234567890123456789}', 'a', 'Int64') as a, toTypeName(a); +SELECT JSONExtract('{"a":1234567890123456789}', 'a', 'UInt64') as a, toTypeName(a); +SELECT JSONExtract('{"a":-1234567890123456789}', 'a', 'Int128') as a, toTypeName(a); +SELECT JSONExtract('{"a":1234567890123456789}', 'a', 'UInt128') as a, toTypeName(a); +SELECT JSONExtract('{"a":-1234567890123456789}', 'a', 'Int256') as a, toTypeName(a); +SELECT JSONExtract('{"a":1234567890123456789}', 'a', 'UInt256') as a, toTypeName(a); +SELECT JSONExtract('{"a":-123456789.345}', 'a', 'Int32') as a, toTypeName(a); +SELECT JSONExtract('{"a":123456789.345}', 'a', 'UInt32') as a, toTypeName(a); +SELECT JSONExtract('{"a":-123456789012.345}', 'a', 'Int64') as a, toTypeName(a); +SELECT JSONExtract('{"a":123456789012.345}', 'a', 'UInt64') as a, toTypeName(a); +SELECT JSONExtract('{"a":-123456789012.345}', 'a', 'Int128') as a, toTypeName(a); +SELECT JSONExtract('{"a":123456789012.345}', 'a', 'UInt128') as a, toTypeName(a); +SELECT JSONExtract('{"a":-123456789012.345}', 'a', 'Int256') as a, toTypeName(a); +SELECT JSONExtract('{"a":123456789012.345}', 'a', 'UInt256') as a, toTypeName(a); +SELECT JSONExtract('{"a":"-123456789"}', 'a', 'Int32') as a, toTypeName(a); +SELECT JSONExtract('{"a":"123456789"}', 'a', 'UInt32') as a, toTypeName(a); +SELECT JSONExtract('{"a":"-1234567890123456789"}', 'a', 'Int64') as a, toTypeName(a); +SELECT JSONExtract('{"a":"1234567890123456789"}', 'a', 'UInt64') as a, toTypeName(a); +SELECT JSONExtract('{"a":"-12345678901234567890123456789012345678"}', 'a', 'Int128') as a, toTypeName(a); +SELECT JSONExtract('{"a":"12345678901234567890123456789012345678"}', 'a', 'UInt128') as a, toTypeName(a); +SELECT JSONExtract('{"a":"-11345678901234567890123456789012345678901234567890123456789012345678901234567"}', 'a', 'Int256') as a, toTypeName(a); +SELECT JSONExtract('{"a":"11345678901234567890123456789012345678901234567890123456789012345678901234567"}', 'a', 'UInt256') as a, toTypeName(a); +SELECT JSONExtract('{"a":"-1234567899999"}', 'a', 'Int32') as a, toTypeName(a); +SELECT JSONExtract('{"a":"1234567899999"}', 'a', 'UInt32') as a, toTypeName(a); +SELECT JSONExtract('{"a":"-1234567890123456789999"}', 'a', 'Int64') as a, toTypeName(a); +SELECT JSONExtract('{"a":"1234567890123456789999"}', 'a', 'UInt64') as a, toTypeName(a); +SELECT JSONExtract('{"a":0}', 'a', 'Bool') as a, toTypeName(a); +SELECT JSONExtract('{"a":1}', 'a', 'Bool') as a, toTypeName(a); SELECT '--JSONExtractKeysAndValues--'; SELECT JSONExtractKeysAndValues('{"a": "hello", "b": [-100, 200.0, 300]}', 'String'); @@ -244,3 +280,6 @@ SELECT JSONExtractString(json, 's') FROM (SELECT arrayJoin(['{"s":"u"}', '{"s":" SELECT '--show error: type should be const string'; SELECT JSONExtractKeysAndValues([], JSONLength('^?V{LSwp')); -- { serverError 44 } WITH '{"i": 1, "f": 1.2}' AS json SELECT JSONExtract(json, 'i', JSONType(json, 'i')); -- { serverError 44 } + +SELECT '--show error: index type should be integer'; +SELECT JSONExtract('[]', JSONExtract('0', 'UInt256'), 'UInt256'); -- { serverError 43 } diff --git a/tests/queries/0_stateless/00938_template_input_format.reference b/tests/queries/0_stateless/00938_template_input_format.reference index e1f77d9a581..ec8cd7a21f0 100644 --- a/tests/queries/0_stateless/00938_template_input_format.reference +++ b/tests/queries/0_stateless/00938_template_input_format.reference @@ -31,3 +31,5 @@ cv bn m","qwe,rty",456,"2016-01-02" "zx\cv\bn m","qwe,rty","as""df'gh","",789,"2016-01-04" "","zx cv bn m","qwe,rty","as""df'gh",9876543210,"2016-01-03" +1 +1 diff --git a/tests/queries/0_stateless/00938_template_input_format.sh b/tests/queries/0_stateless/00938_template_input_format.sh index e99f59614da..be75edcdb61 100755 --- a/tests/queries/0_stateless/00938_template_input_format.sh +++ b/tests/queries/0_stateless/00938_template_input_format.sh @@ -83,3 +83,13 @@ $CLICKHOUSE_CLIENT --query="DROP TABLE template1"; $CLICKHOUSE_CLIENT --query="DROP TABLE template2"; rm "$CURDIR"/00938_template_input_format_resultset.tmp "$CURDIR"/00938_template_input_format_row.tmp +echo -ne '\${a:Escaped},\${b:Escaped}\n' > "$CURDIR"/00938_template_input_format_row.tmp +echo -ne "a,b\nc,d\n" | $CLICKHOUSE_LOCAL --structure "a String, b String" --input-format Template \ + --format_template_row "$CURDIR"/00938_template_input_format_row.tmp --format_template_rows_between_delimiter '' \ + -q 'select * from table' 2>&1| grep -Fac "'Escaped' serialization requires delimiter" +echo -ne '\${a:Escaped},\${:Escaped}\n' > "$CURDIR"/00938_template_input_format_row.tmp +echo -ne "a,b\nc,d\n" | $CLICKHOUSE_LOCAL --structure "a String" --input-format Template \ + --format_template_row "$CURDIR"/00938_template_input_format_row.tmp --format_template_rows_between_delimiter '' \ + -q 'select * from table' 2>&1| grep -Fac "'Escaped' serialization requires delimiter" +rm "$CURDIR"/00938_template_input_format_row.tmp + diff --git a/tests/queries/0_stateless/00941_to_custom_week.sql b/tests/queries/0_stateless/00941_to_custom_week.sql index 04ff08d4117..4dd5d209306 100644 --- a/tests/queries/0_stateless/00941_to_custom_week.sql +++ b/tests/queries/0_stateless/00941_to_custom_week.sql @@ -49,3 +49,4 @@ SELECT toStartOfWeek(x, 3) AS w3, toStartOfWeek(x_t, 3) AS wt3 FROM numbers(10); + diff --git a/tests/queries/0_stateless/01014_format_custom_separated.reference b/tests/queries/0_stateless/01014_format_custom_separated.reference index d46a6fdf5b1..626d6ed66b8 100644 --- a/tests/queries/0_stateless/01014_format_custom_separated.reference +++ b/tests/queries/0_stateless/01014_format_custom_separated.reference @@ -8,3 +8,4 @@ 1,"2019-09-25","world" 2,"2019-09-26","custom" 3,"2019-09-27","separated" +1 diff --git a/tests/queries/0_stateless/01014_format_custom_separated.sh b/tests/queries/0_stateless/01014_format_custom_separated.sh index 4e88419d125..655607c8c9b 100755 --- a/tests/queries/0_stateless/01014_format_custom_separated.sh +++ b/tests/queries/0_stateless/01014_format_custom_separated.sh @@ -34,3 +34,8 @@ FORMAT CustomSeparated" $CLICKHOUSE_CLIENT --query="SELECT * FROM custom_separated ORDER BY n FORMAT CSV" $CLICKHOUSE_CLIENT --query="DROP TABLE custom_separated" + +echo -ne "a,b\nc,d\n" | $CLICKHOUSE_LOCAL --structure "a String, b String" \ + --input-format CustomSeparated --format_custom_escaping_rule=Escaped \ + --format_custom_field_delimiter=',' --format_custom_row_after_delimiter=$'\n' -q 'select * from table' \ + 2>&1| grep -Fac "'Escaped' serialization requires delimiter" diff --git a/tests/queries/0_stateless/01186_conversion_to_nullable.reference b/tests/queries/0_stateless/01186_conversion_to_nullable.reference index 86fa0afff20..e4c1fd7c40b 100644 --- a/tests/queries/0_stateless/01186_conversion_to_nullable.reference +++ b/tests/queries/0_stateless/01186_conversion_to_nullable.reference @@ -26,7 +26,7 @@ \N 42 \N -\N +3.14 42 \N 3.14159 diff --git a/tests/queries/0_stateless/01284_port.reference b/tests/queries/0_stateless/01284_port.reference index 7e776595065..5b7b58bc7e4 100644 --- a/tests/queries/0_stateless/01284_port.reference +++ b/tests/queries/0_stateless/01284_port.reference @@ -22,3 +22,27 @@ ipv6 0 host-no-dot 0 +ipv4 +0 +80 +80 +80 +80 +hostname +0 +80 +80 +80 +80 +default-port +80 +80 +ipv6 +0 +0 +0 +0 +0 +0 +host-no-dot +0 diff --git a/tests/queries/0_stateless/01284_port.sql b/tests/queries/0_stateless/01284_port.sql deleted file mode 100644 index 9c31a5d42ad..00000000000 --- a/tests/queries/0_stateless/01284_port.sql +++ /dev/null @@ -1,34 +0,0 @@ -select 'ipv4'; -select port('http://127.0.0.1/'); -select port('http://127.0.0.1:80'); -select port('http://127.0.0.1:80/'); -select port('//127.0.0.1:80/'); -select port('127.0.0.1:80'); -select 'hostname'; -select port('http://foobar.com/'); -select port('http://foobar.com:80'); -select port('http://foobar.com:80/'); -select port('//foobar.com:80/'); -select port('foobar.com:80'); - -select 'default-port'; -select port('http://127.0.0.1/', toUInt16(80)); -select port('http://foobar.com/', toUInt16(80)); - --- unsupported -/* ILLEGAL_TYPE_OF_ARGUMENT */ select port(toFixedString('', 1)); -- { serverError 43; } -/* ILLEGAL_TYPE_OF_ARGUMENT */ select port('', 1); -- { serverError 43; } -/* NUMBER_OF_ARGUMENTS_DOESNT_MATCH */ select port('', 1, 1); -- { serverError 42; } - --- --- Known limitations of domain() (getURLHost()) --- -select 'ipv6'; -select port('http://[2001:db8::8a2e:370:7334]/'); -select port('http://[2001:db8::8a2e:370:7334]:80'); -select port('http://[2001:db8::8a2e:370:7334]:80/'); -select port('//[2001:db8::8a2e:370:7334]:80/'); -select port('[2001:db8::8a2e:370:7334]:80'); -select port('2001:db8::8a2e:370:7334:80'); -select 'host-no-dot'; -select port('//foobar:80/'); diff --git a/tests/queries/0_stateless/01284_port.sql.j2 b/tests/queries/0_stateless/01284_port.sql.j2 new file mode 100644 index 00000000000..6f78b3b8e3b --- /dev/null +++ b/tests/queries/0_stateless/01284_port.sql.j2 @@ -0,0 +1,39 @@ +{% for suffix in ['', 'RFC'] -%} + +select 'ipv4'; +select port{{ suffix }}('http://127.0.0.1/'); +select port{{ suffix }}('http://127.0.0.1:80'); +select port{{ suffix }}('http://127.0.0.1:80/'); +select port{{ suffix }}('//127.0.0.1:80/'); +select port{{ suffix }}('127.0.0.1:80'); + +select 'hostname'; +select port{{ suffix }}('http://foobar.com/'); +select port{{ suffix }}('http://foobar.com:80'); +select port{{ suffix }}('http://foobar.com:80/'); +select port{{ suffix }}('//foobar.com:80/'); +select port{{ suffix }}('foobar.com:80'); + +select 'default-port'; +select port{{ suffix }}('http://127.0.0.1/', toUInt16(80)); +select port{{ suffix }}('http://foobar.com/', toUInt16(80)); + +-- unsupported +/* ILLEGAL_TYPE_OF_ARGUMENT */ select port(toFixedString('', 1)); -- { serverError 43; } +/* ILLEGAL_TYPE_OF_ARGUMENT */ select port{{ suffix }}('', 1); -- { serverError 43; } +/* NUMBER_OF_ARGUMENTS_DOESNT_MATCH */ select port{{ suffix }}('', 1, 1); -- { serverError 42; } + +-- +-- Known limitations of domain() (getURLHost()) +-- +select 'ipv6'; +select port{{ suffix }}('http://[2001:db8::8a2e:370:7334]/'); +select port{{ suffix }}('http://[2001:db8::8a2e:370:7334]:80'); +select port{{ suffix }}('http://[2001:db8::8a2e:370:7334]:80/'); +select port{{ suffix }}('//[2001:db8::8a2e:370:7334]:80/'); +select port{{ suffix }}('[2001:db8::8a2e:370:7334]:80'); +select port{{ suffix }}('2001:db8::8a2e:370:7334:80'); +select 'host-no-dot'; +select port{{ suffix }}('//foobar:80/'); + +{%- endfor %} diff --git a/tests/queries/0_stateless/01288_shard_max_network_bandwidth.sql b/tests/queries/0_stateless/01288_shard_max_network_bandwidth.sql index 969bb0a126c..d2daf48a1cb 100644 --- a/tests/queries/0_stateless/01288_shard_max_network_bandwidth.sql +++ b/tests/queries/0_stateless/01288_shard_max_network_bandwidth.sql @@ -1,7 +1,7 @@ -- Tags: shard --- Limit to 10 MB/sec -SET max_network_bandwidth = 10000000; +-- Limit to 100 KB/sec +SET max_network_bandwidth = 100000; -- Lower max_block_size, so we can start throttling sooner. Otherwise query will be executed too quickly. SET max_block_size = 100; @@ -11,7 +11,7 @@ CREATE TEMPORARY TABLE times (t DateTime); -- rand64 is uncompressable data. Each number will take 8 bytes of bandwidth. -- This query should execute in no less than 1.6 seconds if throttled. INSERT INTO times SELECT now(); -SELECT sum(ignore(*)) FROM (SELECT rand64() FROM remote('127.0.0.{2,3}', numbers(2000000))); +SELECT sum(ignore(*)) FROM (SELECT rand64() FROM remote('127.0.0.{2,3}', numbers(20000))); INSERT INTO times SELECT now(); SELECT max(t) - min(t) >= 1 FROM times; diff --git a/tests/queries/0_stateless/01411_from_unixtime.reference b/tests/queries/0_stateless/01411_from_unixtime.reference index 1bc7519e668..17086e8c58b 100644 --- a/tests/queries/0_stateless/01411_from_unixtime.reference +++ b/tests/queries/0_stateless/01411_from_unixtime.reference @@ -5,25 +5,25 @@ 11 1970-01-15 1970-01-15 06:52:36 -20 +20 20 +02 02 +01/02/18 01/02/18 + 2 2 +2018-01-02 2018-01-02 +22 00 02 -01/02/18 - 2 -2018-01-02 -22 -02 -10 +10 12 11 12 -001 -366 -01 -33 -\n -AM +001 001 +366 366 +01 01 +33 00 +\n \n +AM AM AM PM -22:33 -44 -\t -22:33:44 +22:33 00:00 +44 00 +\t \t +22:33:44 00:00:00 diff --git a/tests/queries/0_stateless/01411_from_unixtime.sql b/tests/queries/0_stateless/01411_from_unixtime.sql index ec7b4d65b57..9a6655768e0 100644 --- a/tests/queries/0_stateless/01411_from_unixtime.sql +++ b/tests/queries/0_stateless/01411_from_unixtime.sql @@ -5,25 +5,25 @@ SELECT FROM_UNIXTIME(5345345, '%C', 'UTC'); SELECT FROM_UNIXTIME(645123, '%H', 'UTC'); SELECT FROM_UNIXTIME(1232456, '%Y-%m-%d', 'UTC'); SELECT FROM_UNIXTIME(1234356, '%Y-%m-%d %R:%S', 'UTC'); -SELECT FROM_UNIXTIME(toDateTime('2018-01-02 22:33:44'), '%C'); -SELECT FROM_UNIXTIME(toDateTime('2018-01-02 22:33:44'), '%d'); -SELECT FROM_UNIXTIME(toDateTime('2018-01-02 22:33:44'), '%D'); -SELECT FROM_UNIXTIME(toDateTime('2018-01-02 22:33:44'), '%e'); -SELECT FROM_UNIXTIME(toDateTime('2018-01-02 22:33:44'), '%F'); -SELECT FROM_UNIXTIME(toDateTime('2018-01-02 22:33:44'), '%H'); +SELECT FROM_UNIXTIME(toDateTime('2018-01-02 22:33:44'), '%C'), FROM_UNIXTIME(toDate32('2018-01-02'), '%C'); +SELECT FROM_UNIXTIME(toDateTime('2018-01-02 22:33:44'), '%d'), FROM_UNIXTIME(toDate32('2018-01-02'), '%d'); +SELECT FROM_UNIXTIME(toDateTime('2018-01-02 22:33:44'), '%D'), FROM_UNIXTIME(toDate32('2018-01-02'), '%D'); +SELECT FROM_UNIXTIME(toDateTime('2018-01-02 22:33:44'), '%e'), FROM_UNIXTIME(toDate32('2018-01-02'), '%e'); +SELECT FROM_UNIXTIME(toDateTime('2018-01-02 22:33:44'), '%F'), FROM_UNIXTIME(toDate32('2018-01-02'), '%F'); +SELECT FROM_UNIXTIME(toDateTime('2018-01-02 22:33:44'), '%H'), FROM_UNIXTIME(toDate32('2018-01-02'), '%H'); SELECT FROM_UNIXTIME(toDateTime('2018-01-02 02:33:44'), '%H'); -SELECT FROM_UNIXTIME(toDateTime('2018-01-02 22:33:44'), '%I'); +SELECT FROM_UNIXTIME(toDateTime('2018-01-02 22:33:44'), '%I'), FROM_UNIXTIME(toDate32('2018-01-02'), '%I'); SELECT FROM_UNIXTIME(toDateTime('2018-01-02 11:33:44'), '%I'); SELECT FROM_UNIXTIME(toDateTime('2018-01-02 00:33:44'), '%I'); -SELECT FROM_UNIXTIME(toDateTime('2018-01-01 00:33:44'), '%j'); -SELECT FROM_UNIXTIME(toDateTime('2000-12-31 00:33:44'), '%j'); -SELECT FROM_UNIXTIME(toDateTime('2018-01-02 22:33:44'), '%m'); -SELECT FROM_UNIXTIME(toDateTime('2018-01-02 22:33:44'), '%M'); -SELECT FROM_UNIXTIME(toDateTime('2018-01-02 22:33:44'), '%n'); -SELECT FROM_UNIXTIME(toDateTime('2018-01-02 00:33:44'), '%p'); +SELECT FROM_UNIXTIME(toDateTime('2018-01-01 00:33:44'), '%j'), FROM_UNIXTIME(toDate32('2018-01-01'), '%j'); +SELECT FROM_UNIXTIME(toDateTime('2000-12-31 00:33:44'), '%j'), FROM_UNIXTIME(toDate32('2000-12-31'), '%j'); +SELECT FROM_UNIXTIME(toDateTime('2018-01-02 22:33:44'), '%m'), FROM_UNIXTIME(toDate32('2018-01-02'), '%m'); +SELECT FROM_UNIXTIME(toDateTime('2018-01-02 22:33:44'), '%M'), FROM_UNIXTIME(toDate32('2018-01-02'), '%M'); +SELECT FROM_UNIXTIME(toDateTime('2018-01-02 22:33:44'), '%n'), FROM_UNIXTIME(toDate32('2018-01-02'), '%n'); +SELECT FROM_UNIXTIME(toDateTime('2018-01-02 00:33:44'), '%p'), FROM_UNIXTIME(toDate32('2018-01-02'), '%p'); SELECT FROM_UNIXTIME(toDateTime('2018-01-02 11:33:44'), '%p'); SELECT FROM_UNIXTIME(toDateTime('2018-01-02 12:33:44'), '%p'); -SELECT FROM_UNIXTIME(toDateTime('2018-01-02 22:33:44'), '%R'); -SELECT FROM_UNIXTIME(toDateTime('2018-01-02 22:33:44'), '%S'); -SELECT FROM_UNIXTIME(toDateTime('2018-01-02 22:33:44'), '%t'); -SELECT FROM_UNIXTIME(toDateTime('2018-01-02 22:33:44'), '%T'); +SELECT FROM_UNIXTIME(toDateTime('2018-01-02 22:33:44'), '%R'), FROM_UNIXTIME(toDate32('2018-01-02'), '%R'); +SELECT FROM_UNIXTIME(toDateTime('2018-01-02 22:33:44'), '%S'), FROM_UNIXTIME(toDate32('2018-01-02'), '%S'); +SELECT FROM_UNIXTIME(toDateTime('2018-01-02 22:33:44'), '%t'), FROM_UNIXTIME(toDate32('2018-01-02'), '%t'); +SELECT FROM_UNIXTIME(toDateTime('2018-01-02 22:33:44'), '%T'), FROM_UNIXTIME(toDate32('2018-01-02'), '%T'); diff --git a/tests/queries/0_stateless/01440_to_date_monotonicity.reference b/tests/queries/0_stateless/01440_to_date_monotonicity.reference index dd8545b721d..2dbec540fbb 100644 --- a/tests/queries/0_stateless/01440_to_date_monotonicity.reference +++ b/tests/queries/0_stateless/01440_to_date_monotonicity.reference @@ -1,4 +1,4 @@ 0 -1970-01-01 2120-07-26 1970-04-11 1970-01-01 2149-06-06 +1970-01-01 2106-02-07 1970-04-11 1970-01-01 2149-06-06 1970-01-01 02:00:00 2106-02-07 09:28:15 1970-01-01 02:16:40 2000-01-01 13:12:12 diff --git a/tests/queries/0_stateless/01601_custom_tld.reference b/tests/queries/0_stateless/01601_custom_tld.reference index 981067606a2..7ef6eb7d3a2 100644 --- a/tests/queries/0_stateless/01601_custom_tld.reference +++ b/tests/queries/0_stateless/01601_custom_tld.reference @@ -89,3 +89,92 @@ select cutToFirstSignificantSubdomainCustom('city.kawasaki.jp', 'public_suffix_l city.kawasaki.jp select cutToFirstSignificantSubdomainCustom('some.city.kawasaki.jp', 'public_suffix_list'); city.kawasaki.jp +select '-- no-tld'; +-- no-tld +-- even if there is no TLD, 2-nd level by default anyway +-- FIXME: make this behavior optional (so that TLD for host never changed, either empty or something real) +select cutToFirstSignificantSubdomainRFC('there-is-no-such-domain'); + +select cutToFirstSignificantSubdomainRFC('foo.there-is-no-such-domain'); +foo.there-is-no-such-domain +select cutToFirstSignificantSubdomainRFC('bar.foo.there-is-no-such-domain'); +foo.there-is-no-such-domain +select cutToFirstSignificantSubdomainCustomRFC('there-is-no-such-domain', 'public_suffix_list'); + +select cutToFirstSignificantSubdomainCustomRFC('foo.there-is-no-such-domain', 'public_suffix_list'); +foo.there-is-no-such-domain +select cutToFirstSignificantSubdomainCustomRFC('bar.foo.there-is-no-such-domain', 'public_suffix_list'); +foo.there-is-no-such-domain +select firstSignificantSubdomainCustomRFC('bar.foo.there-is-no-such-domain', 'public_suffix_list'); +foo +select '-- generic'; +-- generic +select firstSignificantSubdomainCustomRFC('foo.kernel.biz.ss', 'public_suffix_list'); -- kernel +kernel +select cutToFirstSignificantSubdomainCustomRFC('foo.kernel.biz.ss', 'public_suffix_list'); -- kernel.biz.ss +kernel.biz.ss +select '-- difference'; +-- difference +-- biz.ss is not in the default TLD list, hence: +select cutToFirstSignificantSubdomainRFC('foo.kernel.biz.ss'); -- biz.ss +biz.ss +select cutToFirstSignificantSubdomainCustomRFC('foo.kernel.biz.ss', 'public_suffix_list'); -- kernel.biz.ss +kernel.biz.ss +select '-- 3+level'; +-- 3+level +select cutToFirstSignificantSubdomainCustomRFC('xx.blogspot.co.at', 'public_suffix_list'); -- xx.blogspot.co.at +xx.blogspot.co.at +select firstSignificantSubdomainCustomRFC('xx.blogspot.co.at', 'public_suffix_list'); -- blogspot +blogspot +select cutToFirstSignificantSubdomainCustomRFC('foo.bar.xx.blogspot.co.at', 'public_suffix_list'); -- xx.blogspot.co.at +xx.blogspot.co.at +select firstSignificantSubdomainCustomRFC('foo.bar.xx.blogspot.co.at', 'public_suffix_list'); -- blogspot +blogspot +select '-- url'; +-- url +select cutToFirstSignificantSubdomainCustomRFC('http://foobar.com', 'public_suffix_list'); +foobar.com +select cutToFirstSignificantSubdomainCustomRFC('http://foobar.com/foo', 'public_suffix_list'); +foobar.com +select cutToFirstSignificantSubdomainCustomRFC('http://bar.foobar.com/foo', 'public_suffix_list'); +foobar.com +select cutToFirstSignificantSubdomainCustomRFC('http://xx.blogspot.co.at', 'public_suffix_list'); +xx.blogspot.co.at +select '-- www'; +-- www +select cutToFirstSignificantSubdomainCustomWithWWWRFC('http://www.foo', 'public_suffix_list'); +www.foo +select cutToFirstSignificantSubdomainCustomRFC('http://www.foo', 'public_suffix_list'); +foo +select '-- vector'; +-- vector +select cutToFirstSignificantSubdomainCustomRFC('http://xx.blogspot.co.at/' || toString(number), 'public_suffix_list') from numbers(1); +xx.blogspot.co.at +select cutToFirstSignificantSubdomainCustomRFC('there-is-no-such-domain' || toString(number), 'public_suffix_list') from numbers(1); + +select '-- no new line'; +-- no new line +select cutToFirstSignificantSubdomainCustomRFC('foo.bar', 'no_new_line_list'); +foo.bar +select cutToFirstSignificantSubdomainCustomRFC('a.foo.bar', 'no_new_line_list'); +a.foo.bar +select cutToFirstSignificantSubdomainCustomRFC('a.foo.baz', 'no_new_line_list'); +foo.baz +select '-- asterisk'; +-- asterisk +select cutToFirstSignificantSubdomainCustomRFC('foo.something.sheffield.sch.uk', 'public_suffix_list'); +something.sheffield.sch.uk +select cutToFirstSignificantSubdomainCustomRFC('something.sheffield.sch.uk', 'public_suffix_list'); +something.sheffield.sch.uk +select cutToFirstSignificantSubdomainCustomRFC('sheffield.sch.uk', 'public_suffix_list'); +sheffield.sch.uk +select '-- exclamation mark'; +-- exclamation mark +select cutToFirstSignificantSubdomainCustomRFC('foo.kawasaki.jp', 'public_suffix_list'); +foo.kawasaki.jp +select cutToFirstSignificantSubdomainCustomRFC('foo.foo.kawasaki.jp', 'public_suffix_list'); +foo.foo.kawasaki.jp +select cutToFirstSignificantSubdomainCustomRFC('city.kawasaki.jp', 'public_suffix_list'); +city.kawasaki.jp +select cutToFirstSignificantSubdomainCustomRFC('some.city.kawasaki.jp', 'public_suffix_list'); +city.kawasaki.jp diff --git a/tests/queries/0_stateless/01601_custom_tld.sql b/tests/queries/0_stateless/01601_custom_tld.sql deleted file mode 100644 index 69ae209af2c..00000000000 --- a/tests/queries/0_stateless/01601_custom_tld.sql +++ /dev/null @@ -1,57 +0,0 @@ --- { echo } - -select '-- no-tld'; --- even if there is no TLD, 2-nd level by default anyway --- FIXME: make this behavior optional (so that TLD for host never changed, either empty or something real) -select cutToFirstSignificantSubdomain('there-is-no-such-domain'); -select cutToFirstSignificantSubdomain('foo.there-is-no-such-domain'); -select cutToFirstSignificantSubdomain('bar.foo.there-is-no-such-domain'); -select cutToFirstSignificantSubdomainCustom('there-is-no-such-domain', 'public_suffix_list'); -select cutToFirstSignificantSubdomainCustom('foo.there-is-no-such-domain', 'public_suffix_list'); -select cutToFirstSignificantSubdomainCustom('bar.foo.there-is-no-such-domain', 'public_suffix_list'); -select firstSignificantSubdomainCustom('bar.foo.there-is-no-such-domain', 'public_suffix_list'); - -select '-- generic'; -select firstSignificantSubdomainCustom('foo.kernel.biz.ss', 'public_suffix_list'); -- kernel -select cutToFirstSignificantSubdomainCustom('foo.kernel.biz.ss', 'public_suffix_list'); -- kernel.biz.ss - -select '-- difference'; --- biz.ss is not in the default TLD list, hence: -select cutToFirstSignificantSubdomain('foo.kernel.biz.ss'); -- biz.ss -select cutToFirstSignificantSubdomainCustom('foo.kernel.biz.ss', 'public_suffix_list'); -- kernel.biz.ss - -select '-- 3+level'; -select cutToFirstSignificantSubdomainCustom('xx.blogspot.co.at', 'public_suffix_list'); -- xx.blogspot.co.at -select firstSignificantSubdomainCustom('xx.blogspot.co.at', 'public_suffix_list'); -- blogspot -select cutToFirstSignificantSubdomainCustom('foo.bar.xx.blogspot.co.at', 'public_suffix_list'); -- xx.blogspot.co.at -select firstSignificantSubdomainCustom('foo.bar.xx.blogspot.co.at', 'public_suffix_list'); -- blogspot - -select '-- url'; -select cutToFirstSignificantSubdomainCustom('http://foobar.com', 'public_suffix_list'); -select cutToFirstSignificantSubdomainCustom('http://foobar.com/foo', 'public_suffix_list'); -select cutToFirstSignificantSubdomainCustom('http://bar.foobar.com/foo', 'public_suffix_list'); -select cutToFirstSignificantSubdomainCustom('http://xx.blogspot.co.at', 'public_suffix_list'); - -select '-- www'; -select cutToFirstSignificantSubdomainCustomWithWWW('http://www.foo', 'public_suffix_list'); -select cutToFirstSignificantSubdomainCustom('http://www.foo', 'public_suffix_list'); - -select '-- vector'; -select cutToFirstSignificantSubdomainCustom('http://xx.blogspot.co.at/' || toString(number), 'public_suffix_list') from numbers(1); -select cutToFirstSignificantSubdomainCustom('there-is-no-such-domain' || toString(number), 'public_suffix_list') from numbers(1); - -select '-- no new line'; -select cutToFirstSignificantSubdomainCustom('foo.bar', 'no_new_line_list'); -select cutToFirstSignificantSubdomainCustom('a.foo.bar', 'no_new_line_list'); -select cutToFirstSignificantSubdomainCustom('a.foo.baz', 'no_new_line_list'); - -select '-- asterisk'; -select cutToFirstSignificantSubdomainCustom('foo.something.sheffield.sch.uk', 'public_suffix_list'); -select cutToFirstSignificantSubdomainCustom('something.sheffield.sch.uk', 'public_suffix_list'); -select cutToFirstSignificantSubdomainCustom('sheffield.sch.uk', 'public_suffix_list'); - -select '-- exclamation mark'; -select cutToFirstSignificantSubdomainCustom('foo.kawasaki.jp', 'public_suffix_list'); -select cutToFirstSignificantSubdomainCustom('foo.foo.kawasaki.jp', 'public_suffix_list'); -select cutToFirstSignificantSubdomainCustom('city.kawasaki.jp', 'public_suffix_list'); -select cutToFirstSignificantSubdomainCustom('some.city.kawasaki.jp', 'public_suffix_list'); diff --git a/tests/queries/0_stateless/01601_custom_tld.sql.j2 b/tests/queries/0_stateless/01601_custom_tld.sql.j2 new file mode 100644 index 00000000000..1e0982ea1b7 --- /dev/null +++ b/tests/queries/0_stateless/01601_custom_tld.sql.j2 @@ -0,0 +1,61 @@ +-- { echo } + +{% for suffix in ['', 'RFC'] -%} + +select '-- no-tld'; +-- even if there is no TLD, 2-nd level by default anyway +-- FIXME: make this behavior optional (so that TLD for host never changed, either empty or something real) +select cutToFirstSignificantSubdomain{{ suffix }}('there-is-no-such-domain'); +select cutToFirstSignificantSubdomain{{ suffix }}('foo.there-is-no-such-domain'); +select cutToFirstSignificantSubdomain{{ suffix }}('bar.foo.there-is-no-such-domain'); +select cutToFirstSignificantSubdomainCustom{{ suffix }}('there-is-no-such-domain', 'public_suffix_list'); +select cutToFirstSignificantSubdomainCustom{{ suffix }}('foo.there-is-no-such-domain', 'public_suffix_list'); +select cutToFirstSignificantSubdomainCustom{{ suffix }}('bar.foo.there-is-no-such-domain', 'public_suffix_list'); +select firstSignificantSubdomainCustom{{ suffix }}('bar.foo.there-is-no-such-domain', 'public_suffix_list'); + +select '-- generic'; +select firstSignificantSubdomainCustom{{ suffix }}('foo.kernel.biz.ss', 'public_suffix_list'); -- kernel +select cutToFirstSignificantSubdomainCustom{{ suffix }}('foo.kernel.biz.ss', 'public_suffix_list'); -- kernel.biz.ss + +select '-- difference'; +-- biz.ss is not in the default TLD list, hence: +select cutToFirstSignificantSubdomain{{ suffix }}('foo.kernel.biz.ss'); -- biz.ss +select cutToFirstSignificantSubdomainCustom{{ suffix }}('foo.kernel.biz.ss', 'public_suffix_list'); -- kernel.biz.ss + +select '-- 3+level'; +select cutToFirstSignificantSubdomainCustom{{ suffix }}('xx.blogspot.co.at', 'public_suffix_list'); -- xx.blogspot.co.at +select firstSignificantSubdomainCustom{{ suffix }}('xx.blogspot.co.at', 'public_suffix_list'); -- blogspot +select cutToFirstSignificantSubdomainCustom{{ suffix }}('foo.bar.xx.blogspot.co.at', 'public_suffix_list'); -- xx.blogspot.co.at +select firstSignificantSubdomainCustom{{ suffix }}('foo.bar.xx.blogspot.co.at', 'public_suffix_list'); -- blogspot + +select '-- url'; +select cutToFirstSignificantSubdomainCustom{{ suffix }}('http://foobar.com', 'public_suffix_list'); +select cutToFirstSignificantSubdomainCustom{{ suffix }}('http://foobar.com/foo', 'public_suffix_list'); +select cutToFirstSignificantSubdomainCustom{{ suffix }}('http://bar.foobar.com/foo', 'public_suffix_list'); +select cutToFirstSignificantSubdomainCustom{{ suffix }}('http://xx.blogspot.co.at', 'public_suffix_list'); + +select '-- www'; +select cutToFirstSignificantSubdomainCustomWithWWW{{ suffix }}('http://www.foo', 'public_suffix_list'); +select cutToFirstSignificantSubdomainCustom{{ suffix }}('http://www.foo', 'public_suffix_list'); + +select '-- vector'; +select cutToFirstSignificantSubdomainCustom{{ suffix }}('http://xx.blogspot.co.at/' || toString(number), 'public_suffix_list') from numbers(1); +select cutToFirstSignificantSubdomainCustom{{ suffix }}('there-is-no-such-domain' || toString(number), 'public_suffix_list') from numbers(1); + +select '-- no new line'; +select cutToFirstSignificantSubdomainCustom{{ suffix }}('foo.bar', 'no_new_line_list'); +select cutToFirstSignificantSubdomainCustom{{ suffix }}('a.foo.bar', 'no_new_line_list'); +select cutToFirstSignificantSubdomainCustom{{ suffix }}('a.foo.baz', 'no_new_line_list'); + +select '-- asterisk'; +select cutToFirstSignificantSubdomainCustom{{ suffix }}('foo.something.sheffield.sch.uk', 'public_suffix_list'); +select cutToFirstSignificantSubdomainCustom{{ suffix }}('something.sheffield.sch.uk', 'public_suffix_list'); +select cutToFirstSignificantSubdomainCustom{{ suffix }}('sheffield.sch.uk', 'public_suffix_list'); + +select '-- exclamation mark'; +select cutToFirstSignificantSubdomainCustom{{ suffix }}('foo.kawasaki.jp', 'public_suffix_list'); +select cutToFirstSignificantSubdomainCustom{{ suffix }}('foo.foo.kawasaki.jp', 'public_suffix_list'); +select cutToFirstSignificantSubdomainCustom{{ suffix }}('city.kawasaki.jp', 'public_suffix_list'); +select cutToFirstSignificantSubdomainCustom{{ suffix }}('some.city.kawasaki.jp', 'public_suffix_list'); + +{% endfor %} diff --git a/tests/queries/0_stateless/01685_json_extract_double_as_float.reference b/tests/queries/0_stateless/01685_json_extract_double_as_float.reference index f3f4206b425..a24f6569f44 100644 --- a/tests/queries/0_stateless/01685_json_extract_double_as_float.reference +++ b/tests/queries/0_stateless/01685_json_extract_double_as_float.reference @@ -1,7 +1,7 @@ 1.1 1.1 1.1 1.1 0.01 0.01 0.01 0.01 -0 -\N +1 +1 -1e300 -inf 0 diff --git a/tests/queries/0_stateless/01710_projection_in_index.reference b/tests/queries/0_stateless/01710_projection_in_index.reference index 73c1df53be4..4be49ff0513 100644 --- a/tests/queries/0_stateless/01710_projection_in_index.reference +++ b/tests/queries/0_stateless/01710_projection_in_index.reference @@ -1,2 +1,3 @@ 1 1 1 2 2 2 +1 diff --git a/tests/queries/0_stateless/01710_projection_in_index.sql b/tests/queries/0_stateless/01710_projection_in_index.sql index 2669d69dc9f..87f5e79e37e 100644 --- a/tests/queries/0_stateless/01710_projection_in_index.sql +++ b/tests/queries/0_stateless/01710_projection_in_index.sql @@ -9,3 +9,13 @@ set allow_experimental_projection_optimization = 1, max_rows_to_read = 3; select * from t where i < 5 and j in (1, 2); drop table t; + +drop table if exists test; + +create table test (name String, time Int64) engine MergeTree order by time; + +insert into test values ('hello world', 1662336000241); + +select count() from (select fromUnixTimestamp64Milli(time, 'UTC') time_fmt, name from test where time_fmt > '2022-09-05 00:00:00'); + +drop table test; diff --git a/tests/queries/0_stateless/01744_fuse_sum_count_aggregate.sql b/tests/queries/0_stateless/01744_fuse_sum_count_aggregate.sql index 5b6ed440ba4..375662eb405 100644 --- a/tests/queries/0_stateless/01744_fuse_sum_count_aggregate.sql +++ b/tests/queries/0_stateless/01744_fuse_sum_count_aggregate.sql @@ -10,4 +10,5 @@ EXPLAIN SYNTAX SELECT sum(a), sum(b), count(b) from fuse_tbl; SELECT '---------NOT trigger fuse--------'; SELECT sum(a), avg(b) from fuse_tbl; EXPLAIN SYNTAX SELECT sum(a), avg(b) from fuse_tbl; + DROP TABLE fuse_tbl; diff --git a/tests/queries/0_stateless/01810_max_part_removal_threads_long.sh b/tests/queries/0_stateless/01810_max_part_removal_threads_long.sh index b1f30a41924..85662438f33 100755 --- a/tests/queries/0_stateless/01810_max_part_removal_threads_long.sh +++ b/tests/queries/0_stateless/01810_max_part_removal_threads_long.sh @@ -1,5 +1,6 @@ #!/usr/bin/env bash -# Tags: long +# Tags: long, no-s3-storage +# Because parallel parts removal disabled for s3 storage # NOTE: this done as not .sql since we need to Ordinary database # (to account threads in query_log for DROP TABLE query) diff --git a/tests/queries/0_stateless/01811_datename.reference b/tests/queries/0_stateless/01811_datename.reference index 2968fde301a..29bf05750e7 100644 --- a/tests/queries/0_stateless/01811_datename.reference +++ b/tests/queries/0_stateless/01811_datename.reference @@ -1,10 +1,10 @@ -2021 2021 2021 -2 2 2 -April April April -104 104 104 -14 14 14 -15 15 15 -Wednesday Wednesday Wednesday +2021 2021 2021 2021 +2 2 2 2 +April April April April +104 104 104 104 +14 14 14 14 +15 15 15 15 +Wednesday Wednesday Wednesday Wednesday 11 11 22 22 33 33 diff --git a/tests/queries/0_stateless/01811_datename.sql b/tests/queries/0_stateless/01811_datename.sql index b757d9ae018..fe9f5d20238 100644 --- a/tests/queries/0_stateless/01811_datename.sql +++ b/tests/queries/0_stateless/01811_datename.sql @@ -1,44 +1,51 @@ WITH toDate('2021-04-14') AS date_value, + toDate32('2021-04-14') AS date_32_value, toDateTime('2021-04-14 11:22:33') AS date_time_value, toDateTime64('2021-04-14 11:22:33', 3) AS date_time_64_value -SELECT dateName('year', date_value), dateName('year', date_time_value), dateName('year', date_time_64_value); +SELECT dateName('year', date_value), dateName('year', date_32_value), dateName('year', date_time_value), dateName('year', date_time_64_value); WITH toDate('2021-04-14') AS date_value, + toDate32('2021-04-14') AS date_32_value, toDateTime('2021-04-14 11:22:33') AS date_time_value, toDateTime64('2021-04-14 11:22:33', 3) AS date_time_64_value -SELECT dateName('quarter', date_value), dateName('quarter', date_time_value), dateName('quarter', date_time_64_value); +SELECT dateName('quarter', date_value), dateName('quarter', date_32_value), dateName('quarter', date_time_value), dateName('quarter', date_time_64_value); WITH toDate('2021-04-14') AS date_value, + toDate32('2021-04-14') AS date_32_value, toDateTime('2021-04-14 11:22:33') AS date_time_value, toDateTime64('2021-04-14 11:22:33', 3) AS date_time_64_value -SELECT dateName('month', date_value), dateName('month', date_time_value), dateName('month', date_time_64_value); +SELECT dateName('month', date_value), dateName('month', date_32_value), dateName('month', date_time_value), dateName('month', date_time_64_value); WITH toDate('2021-04-14') AS date_value, + toDate32('2021-04-14') AS date_32_value, toDateTime('2021-04-14 11:22:33') AS date_time_value, toDateTime64('2021-04-14 11:22:33', 3) AS date_time_64_value -SELECT dateName('dayofyear', date_value), dateName('dayofyear', date_time_value), dateName('dayofyear', date_time_64_value); +SELECT dateName('dayofyear', date_value), dateName('dayofyear', date_32_value), dateName('dayofyear', date_time_value), dateName('dayofyear', date_time_64_value); WITH toDate('2021-04-14') AS date_value, + toDate32('2021-04-14') AS date_32_value, toDateTime('2021-04-14 11:22:33') AS date_time_value, toDateTime64('2021-04-14 11:22:33', 3) AS date_time_64_value -SELECT dateName('day', date_value), dateName('day', date_time_value), dateName('day', date_time_64_value); +SELECT dateName('day', date_value), dateName('day', date_32_value), dateName('day', date_time_value), dateName('day', date_time_64_value); WITH toDate('2021-04-14') AS date_value, + toDate32('2021-04-14') AS date_32_value, toDateTime('2021-04-14 11:22:33') AS date_time_value, toDateTime64('2021-04-14 11:22:33', 3) AS date_time_64_value -SELECT dateName('week', date_value), dateName('week', date_time_value), dateName('week', date_time_64_value); +SELECT dateName('week', date_value), dateName('week', date_32_value), dateName('week', date_time_value), dateName('week', date_time_64_value); WITH toDate('2021-04-14') AS date_value, + toDate32('2021-04-14') AS date_32_value, toDateTime('2021-04-14 11:22:33') AS date_time_value, toDateTime64('2021-04-14 11:22:33', 3) AS date_time_64_value -SELECT dateName('weekday', date_value), dateName('weekday', date_time_value), dateName('weekday', date_time_64_value); +SELECT dateName('weekday', date_value), dateName('weekday', date_32_value), dateName('weekday', date_time_value), dateName('weekday', date_time_64_value); WITH toDateTime('2021-04-14 11:22:33') AS date_time_value, diff --git a/tests/queries/0_stateless/01825_type_json_in_array.reference b/tests/queries/0_stateless/01825_type_json_in_array.reference new file mode 100644 index 00000000000..c36a22e6951 --- /dev/null +++ b/tests/queries/0_stateless/01825_type_json_in_array.reference @@ -0,0 +1,23 @@ +{"id":1,"arr":[{"k1":1,"k2":{"k3":2,"k4":3,"k5":""}},{"k1":2,"k2":{"k3":0,"k4":0,"k5":"foo"}}]} +{"id":2,"arr":[{"k1":3,"k2":{"k3":4,"k4":5,"k5":""}}]} +1 [1,2] [2,0] [3,0] ['','foo'] +2 [3] [4] [5] [''] +{"arr":{"k1":1,"k2":{"k3":2,"k4":3,"k5":""}}} +{"arr":{"k1":2,"k2":{"k3":0,"k4":0,"k5":"foo"}}} +{"arr":{"k1":3,"k2":{"k3":4,"k4":5,"k5":""}}} +Array(Tuple(k1 Int8, k2 Tuple(k3 Int8, k4 Int8, k5 String))) +{"id":1,"arr":[{"k1":[{"k2":"aaa","k3":"bbb","k4":0},{"k2":"ccc","k3":"","k4":0}],"k5":{"k6":""}}]} +{"id":2,"arr":[{"k1":[{"k2":"","k3":"ddd","k4":10},{"k2":"","k3":"","k4":20}],"k5":{"k6":"foo"}}]} +1 [['aaa','ccc']] [['bbb','']] [[0,0]] [''] +2 [['','']] [['ddd','']] [[10,20]] ['foo'] +{"k1":{"k2":"","k3":"","k4":20}} +{"k1":{"k2":"","k3":"ddd","k4":10}} +{"k1":{"k2":"aaa","k3":"bbb","k4":0}} +{"k1":{"k2":"ccc","k3":"","k4":0}} +Tuple(k2 String, k3 String, k4 Int8) +{"arr":[{"x":1}]} +{"arr":{"x":{"y":1},"t":{"y":2}}} +{"arr":[1,{"y":1}]} +{"arr":[2,{"y":2}]} +{"arr":[{"x":"aaa","y":[1,2,3]}]} +{"arr":[{"x":1}]} diff --git a/tests/queries/0_stateless/01825_type_json_in_array.sql b/tests/queries/0_stateless/01825_type_json_in_array.sql new file mode 100644 index 00000000000..e5c20d7ba6b --- /dev/null +++ b/tests/queries/0_stateless/01825_type_json_in_array.sql @@ -0,0 +1,35 @@ +-- Tags: no-fasttest + +SET allow_experimental_object_type = 1; +DROP TABLE IF EXISTS t_json_array; + +CREATE TABLE t_json_array (id UInt32, arr Array(JSON)) ENGINE = MergeTree ORDER BY id; + +INSERT INTO t_json_array FORMAT JSONEachRow {"id": 1, "arr": [{"k1": 1, "k2": {"k3": 2, "k4": 3}}, {"k1": 2, "k2": {"k5": "foo"}}]} +INSERT INTO t_json_array FORMAT JSONEachRow {"id": 2, "arr": [{"k1": 3, "k2": {"k3": 4, "k4": 5}}]} + +SET output_format_json_named_tuples_as_objects = 1; + +SELECT * FROM t_json_array ORDER BY id FORMAT JSONEachRow; +SELECT id, arr.k1, arr.k2.k3, arr.k2.k4, arr.k2.k5 FROM t_json_array ORDER BY id; +SELECT arr FROM t_json_array ARRAY JOIN arr ORDER BY arr.k1 FORMAT JSONEachRow; +SELECT toTypeName(arr) FROM t_json_array LIMIT 1; + +TRUNCATE TABLE t_json_array; + +INSERT INTO t_json_array FORMAT JSONEachRow {"id": 1, "arr": [{"k1": [{"k2": "aaa", "k3": "bbb"}, {"k2": "ccc"}]}]} +INSERT INTO t_json_array FORMAT JSONEachRow {"id": 2, "arr": [{"k1": [{"k3": "ddd", "k4": 10}, {"k4": 20}], "k5": {"k6": "foo"}}]} + +SELECT * FROM t_json_array ORDER BY id FORMAT JSONEachRow; +SELECT id, arr.k1.k2, arr.k1.k3, arr.k1.k4, arr.k5.k6 FROM t_json_array ORDER BY id; + +SELECT arrayJoin(arrayJoin(arr.k1)) AS k1 FROM t_json_array ORDER BY k1 FORMAT JSONEachRow; +SELECT toTypeName(arrayJoin(arrayJoin(arr.k1))) AS arr FROM t_json_array LIMIT 1; + +DROP TABLE t_json_array; + +SELECT * FROM values('arr Array(JSON)', '[\'{"x" : 1}\']') FORMAT JSONEachRow; +SELECT * FROM values('arr Map(String, JSON)', '{\'x\' : \'{"y" : 1}\', \'t\' : \'{"y" : 2}\'}') FORMAT JSONEachRow; +SELECT * FROM values('arr Tuple(Int32, JSON)', '(1, \'{"y" : 1}\')', '(2, \'{"y" : 2}\')') FORMAT JSONEachRow; +SELECT * FROM format(JSONEachRow, '{"arr" : [{"x" : "aaa", "y" : [1,2,3]}]}') FORMAT JSONEachRow; +SELECT * FROM values('arr Array(JSON)', '[\'{"x" : 1}\']') FORMAT JSONEachRow; diff --git a/tests/queries/0_stateless/01825_type_json_in_other_types.reference b/tests/queries/0_stateless/01825_type_json_in_other_types.reference new file mode 100644 index 00000000000..b94885a65ab --- /dev/null +++ b/tests/queries/0_stateless/01825_type_json_in_other_types.reference @@ -0,0 +1,17 @@ +Tuple(String, Map(String, Array(Tuple(k1 Nested(k2 Int8, k3 Int8, k5 String), k4 String))), Tuple(k1 String, k2 Tuple(k3 String, k4 String))) +============= +{"id":1,"data":["foo",{"aa":[{"k1":[{"k2":1,"k3":2,"k5":""},{"k2":0,"k3":3,"k5":""}],"k4":""},{"k1":[{"k2":4,"k3":0,"k5":""},{"k2":0,"k3":5,"k5":""},{"k2":6,"k3":0,"k5":""}],"k4":"qqq"}],"bb":[{"k1":[],"k4":"www"},{"k1":[{"k2":7,"k3":8,"k5":""},{"k2":9,"k3":10,"k5":""},{"k2":11,"k3":12,"k5":""}],"k4":""}]},{"k1":"aa","k2":{"k3":"bb","k4":"c"}}]} +{"id":2,"data":["bar",{"aa":[{"k1":[{"k2":13,"k3":14,"k5":""},{"k2":15,"k3":16,"k5":""}],"k4":"www"}]},{"k1":"","k2":{"k3":"","k4":""}}]} +{"id":3,"data":["some",{"aa":[{"k1":[{"k2":0,"k3":20,"k5":"some"}],"k4":""}]},{"k1":"eee","k2":{"k3":"","k4":""}}]} +============= +{"aa":[{"k1":[{"k2":1,"k3":2,"k5":""},{"k2":0,"k3":3,"k5":""}],"k4":""},{"k1":[{"k2":4,"k3":0,"k5":""},{"k2":0,"k3":5,"k5":""},{"k2":6,"k3":0,"k5":""}],"k4":"qqq"}],"bb":[{"k1":[],"k4":"www"},{"k1":[{"k2":7,"k3":8,"k5":""},{"k2":9,"k3":10,"k5":""},{"k2":11,"k3":12,"k5":""}],"k4":""}]} +{"aa":[{"k1":[{"k2":13,"k3":14,"k5":""},{"k2":15,"k3":16,"k5":""}],"k4":"www"}],"bb":[]} +{"aa":[{"k1":[{"k2":0,"k3":20,"k5":"some"}],"k4":""}],"bb":[]} +============= +{"k1":[[{"k2":1,"k3":2,"k5":""},{"k2":0,"k3":3,"k5":""}],[{"k2":4,"k3":0,"k5":""},{"k2":0,"k3":5,"k5":""},{"k2":6,"k3":0,"k5":""}]],"k4":["","qqq"]} +{"k1":[[{"k2":13,"k3":14,"k5":""},{"k2":15,"k3":16,"k5":""}]],"k4":["www"]} +{"k1":[[{"k2":0,"k3":20,"k5":"some"}]],"k4":[""]} +============= +{"obj":{"k1":"aa","k2":{"k3":"bb","k4":"c"}}} +{"obj":{"k1":"","k2":{"k3":"","k4":""}}} +{"obj":{"k1":"eee","k2":{"k3":"","k4":""}}} diff --git a/tests/queries/0_stateless/01825_type_json_in_other_types.sh b/tests/queries/0_stateless/01825_type_json_in_other_types.sh new file mode 100755 index 00000000000..e9cf0bcaca1 --- /dev/null +++ b/tests/queries/0_stateless/01825_type_json_in_other_types.sh @@ -0,0 +1,91 @@ +#!/usr/bin/env bash +# Tags: no-fasttest + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +${CLICKHOUSE_CLIENT} -q "SET allow_experimental_object_type = 1" +${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS t_json_nested" + +${CLICKHOUSE_CLIENT} -q " + CREATE TABLE t_json_nested + ( + id UInt32, + data Tuple(String, Map(String, Array(JSON)), JSON) + ) + ENGINE = MergeTree ORDER BY id" --allow_experimental_object_type 1 + +cat < 0 + if retry_range_request: + code = HttpProcessor.responses_to_get.pop() + if code not in HttpProcessor.responses: + self.send_response(int(code)) + else: + self.send_response(206 if HttpProcessor.allow_range else 200) + self.send_header("Content-type", "application/json") if HttpProcessor.allow_range: @@ -169,7 +177,7 @@ class HttpProcessor(BaseHTTPRequestHandler): self.send_head() def do_GET(self): - result = self.send_head() + result = self.send_head(True) if result == None: return @@ -211,26 +219,36 @@ def start_server(): ##################################################################### -def test_select(download_buffer_size): +def test_select(settings): global HTTP_SERVER_URL_STR - query = f"SELECT * FROM url('{HTTP_SERVER_URL_STR}','JSONAsString') SETTINGS max_download_buffer_size={download_buffer_size};" + query = f"SELECT * FROM url('{HTTP_SERVER_URL_STR}','JSONAsString') SETTINGS {','.join((k+'='+repr(v) for k, v in settings.items()))};" check_answers(query, EXPECTED_ANSWER) -def run_test(allow_range, download_buffer_size=20): +def run_test(allow_range, settings, check_retries=False): HttpProcessor.range_used = False HttpProcessor.get_call_num = 0 HttpProcessor.allow_range = allow_range + if check_retries: + HttpProcessor.responses_to_get = ["500", "200", "206"] + retries_num = len(HttpProcessor.responses_to_get) t, httpd = start_server() t.start() - test_select(download_buffer_size) + test_select(settings) + download_buffer_size = settings["max_download_buffer_size"] expected_get_call_num = (PAYLOAD_LEN - 1) // download_buffer_size + 1 if allow_range: if not HttpProcessor.range_used: raise Exception("HTTP Range was not used when supported") + if check_retries and len(HttpProcessor.responses_to_get) > 0: + raise Exception("Expected to get http response 500, which had to be retried, but 200 ok returned and then retried") + + if retries_num > 0: + expected_get_call_num += retries_num - 1 + if expected_get_call_num != HttpProcessor.get_call_num: raise Exception( f"Invalid amount of GET calls with Range. Expected {expected_get_call_num}, actual {HttpProcessor.get_call_num}" @@ -245,9 +263,23 @@ def run_test(allow_range, download_buffer_size=20): def main(): - run_test(allow_range=False) - run_test(allow_range=True, download_buffer_size=20) - run_test(allow_range=True, download_buffer_size=10) + settings = {"max_download_buffer_size" : 20} + + # Test Accept-Ranges=False + run_test(allow_range=False, settings=settings) + # Test Accept-Ranges=True, parallel download is used + run_test(allow_range=True, settings=settings) + + # Test Accept-Ranges=True, parallel download is used + settings = {"max_download_buffer_size" : 10} + run_test(allow_range=True, settings=settings) + + # Test Accept-Ranges=True, parallel download is not used, + # first get request 500 response, + # second get request 200ok response, + # third get request (retry) 206 response. + settings["max_download_threads"] = 2 + run_test(allow_range=True, settings=settings, check_retries=True) if __name__ == "__main__": diff --git a/tests/queries/0_stateless/02233_HTTP_ranged.reference b/tests/queries/0_stateless/02233_HTTP_ranged.reference index 17f0fff172a..6164e96afc5 100644 --- a/tests/queries/0_stateless/02233_HTTP_ranged.reference +++ b/tests/queries/0_stateless/02233_HTTP_ranged.reference @@ -1,3 +1,4 @@ PASSED PASSED PASSED +PASSED diff --git a/tests/queries/0_stateless/02240_filesystem_cache_bypass_cache_threshold.reference b/tests/queries/0_stateless/02240_filesystem_cache_bypass_cache_threshold.reference new file mode 100644 index 00000000000..de9ac10f641 --- /dev/null +++ b/tests/queries/0_stateless/02240_filesystem_cache_bypass_cache_threshold.reference @@ -0,0 +1,16 @@ +-- { echo } + +SYSTEM DROP FILESYSTEM CACHE; +SET enable_filesystem_cache_on_write_operations=0; +DROP TABLE IF EXISTS test; +CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache_6', min_bytes_for_wide_part = 10485760; +INSERT INTO test SELECT number, toString(number) FROM numbers(100); +SELECT * FROM test FORMAT Null; +SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache ORDER BY file_segment_range_end, size; +0 79 80 +SYSTEM DROP FILESYSTEM CACHE; +SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache; +SELECT * FROM test FORMAT Null; +SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache; +SYSTEM DROP FILESYSTEM CACHE; +SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache; diff --git a/tests/queries/0_stateless/02240_filesystem_cache_bypass_cache_threshold.sql b/tests/queries/0_stateless/02240_filesystem_cache_bypass_cache_threshold.sql new file mode 100644 index 00000000000..d3b3d3d7f4c --- /dev/null +++ b/tests/queries/0_stateless/02240_filesystem_cache_bypass_cache_threshold.sql @@ -0,0 +1,19 @@ +-- Tags: no-parallel, no-fasttest, no-s3-storage, no-random-settings + +-- { echo } + +SYSTEM DROP FILESYSTEM CACHE; +SET enable_filesystem_cache_on_write_operations=0; + +DROP TABLE IF EXISTS test; +CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache_6', min_bytes_for_wide_part = 10485760; +INSERT INTO test SELECT number, toString(number) FROM numbers(100); + +SELECT * FROM test FORMAT Null; +SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache ORDER BY file_segment_range_end, size; +SYSTEM DROP FILESYSTEM CACHE; +SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache; +SELECT * FROM test FORMAT Null; +SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache; +SYSTEM DROP FILESYSTEM CACHE; +SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache; diff --git a/tests/queries/0_stateless/02240_system_remote_filesystem_query_cache.reference b/tests/queries/0_stateless/02240_filesystem_query_cache.reference similarity index 100% rename from tests/queries/0_stateless/02240_system_remote_filesystem_query_cache.reference rename to tests/queries/0_stateless/02240_filesystem_query_cache.reference diff --git a/tests/queries/0_stateless/02240_system_remote_filesystem_query_cache.sql b/tests/queries/0_stateless/02240_filesystem_query_cache.sql similarity index 100% rename from tests/queries/0_stateless/02240_system_remote_filesystem_query_cache.sql rename to tests/queries/0_stateless/02240_filesystem_query_cache.sql diff --git a/tests/queries/0_stateless/02267_file_globs_schema_inference.reference b/tests/queries/0_stateless/02267_file_globs_schema_inference.reference index 98da2074df6..ad94d5181ef 100644 --- a/tests/queries/0_stateless/02267_file_globs_schema_inference.reference +++ b/tests/queries/0_stateless/02267_file_globs_schema_inference.reference @@ -1,2 +1,3 @@ 1 \N +OK diff --git a/tests/queries/0_stateless/02267_file_globs_schema_inference.sh b/tests/queries/0_stateless/02267_file_globs_schema_inference.sh new file mode 100755 index 00000000000..701e18a0259 --- /dev/null +++ b/tests/queries/0_stateless/02267_file_globs_schema_inference.sh @@ -0,0 +1,18 @@ +#!/usr/bin/env bash +# Tags: no-fasttest + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +$CLICKHOUSE_CLIENT -q "insert into function file('${CLICKHOUSE_TEST_UNIQUE_NAME}_data2.jsonl') select NULL as x SETTINGS engine_file_truncate_on_insert = 1"; +$CLICKHOUSE_CLIENT -q "insert into function file('${CLICKHOUSE_TEST_UNIQUE_NAME}_data3.jsonl') select * from numbers(0) SETTINGS engine_file_truncate_on_insert = 1"; +$CLICKHOUSE_CLIENT -q "insert into function file('${CLICKHOUSE_TEST_UNIQUE_NAME}_data4.jsonl') select 1 as x SETTINGS engine_file_truncate_on_insert = 1"; + +$CLICKHOUSE_CLIENT -q "select * from file('${CLICKHOUSE_TEST_UNIQUE_NAME}_data*.jsonl') order by x"; + +$CLICKHOUSE_CLIENT -q "insert into function file('${CLICKHOUSE_TEST_UNIQUE_NAME}_data4.jsonl', 'TSV') select 1 as x"; +$CLICKHOUSE_CLIENT -q "insert into function file('${CLICKHOUSE_TEST_UNIQUE_NAME}_data1.jsonl', 'TSV') select [1,2,3] as x SETTINGS engine_file_truncate_on_insert = 1"; + +$CLICKHOUSE_CLIENT -q "select * from file('${CLICKHOUSE_TEST_UNIQUE_NAME}_data*.jsonl') settings schema_inference_use_cache_for_file=0" 2>&1 | grep -F -q "INCORRECT_DATA" && echo "OK" || echo "FAIL"; + diff --git a/tests/queries/0_stateless/02267_file_globs_schema_inference.sql b/tests/queries/0_stateless/02267_file_globs_schema_inference.sql deleted file mode 100644 index 6862d6f0602..00000000000 --- a/tests/queries/0_stateless/02267_file_globs_schema_inference.sql +++ /dev/null @@ -1,11 +0,0 @@ --- Tags: no-fasttest, no-parallel - -insert into function file('02267_data2.jsonl') select NULL as x; -insert into function file('02267_data3.jsonl') select * from numbers(0); -insert into function file('02267_data4.jsonl') select 1 as x; -select * from file('02267_data*.jsonl') order by x; - -insert into function file('02267_data1.jsonl', 'TSV') select 1 as x; -insert into function file('02267_data1.jsonl', 'TSV') select [1,2,3] as x; - -select * from file('02267_data*.jsonl') settings schema_inference_use_cache_for_file=0; --{serverError INCORRECT_DATA} diff --git a/tests/queries/0_stateless/02294_anova_cmp.python b/tests/queries/0_stateless/02294_anova_cmp.python new file mode 100644 index 00000000000..7597b3712d1 --- /dev/null +++ b/tests/queries/0_stateless/02294_anova_cmp.python @@ -0,0 +1,86 @@ +#!/usr/bin/env python3 +import os +import sys +from statistics import variance +from scipy import stats +import pandas as pd +import numpy as np + +CURDIR = os.path.dirname(os.path.realpath(__file__)) +sys.path.insert(0, os.path.join(CURDIR, 'helpers')) + +from pure_http_client import ClickHouseClient + + +# unpooled variance z-test for means of two samples +def scipy_anova(rvs): + return stats.f_oneway(*rvs) + + +def test_and_check(rvs, n_groups, f_stat, p_value, precision=1e-2): + client = ClickHouseClient() + client.query("DROP TABLE IF EXISTS anova;") + client.query("CREATE TABLE anova (left Float64, right UInt64) ENGINE = Memory;") + for group in range(n_groups): + client.query(f'''INSERT INTO anova VALUES {", ".join([f'({i},{group})' for i in rvs[group]])};''') + + real = client.query_return_df( + '''SELECT roundBankers(a.1, 16) as f_stat, roundBankers(a.2, 16) as p_value FROM (SELECT anova(left, right) as a FROM anova) FORMAT TabSeparatedWithNames;''') + + real_f_stat = real['f_stat'][0] + real_p_value = real['p_value'][0] + assert(abs(real_f_stat - np.float64(f_stat)) < precision), f"clickhouse_f_stat {real_f_stat}, py_f_stat {f_stat}" + assert(abs(real_p_value - np.float64(p_value)) < precision), f"clickhouse_p_value {real_p_value}, py_p_value {p_value}" + client.query("DROP TABLE IF EXISTS anova;") + + +def test_anova(): + n_groups = 3 + rvs = [] + loc = 0 + scale = 5 + size = 500 + for _ in range(n_groups): + rvs.append(np.round(stats.norm.rvs(loc=loc, scale=scale, size=size), 2)) + loc += 5 + f_stat, p_value = scipy_anova(rvs) + test_and_check(rvs, n_groups, f_stat, p_value) + + n_groups = 6 + rvs = [] + loc = 0 + scale = 5 + size = 500 + for _ in range(n_groups): + rvs.append(np.round(stats.norm.rvs(loc=loc, scale=scale, size=size), 2)) + f_stat, p_value = scipy_anova(rvs) + test_and_check(rvs, n_groups, f_stat, p_value) + + n_groups = 10 + rvs = [] + loc = 1 + scale = 2 + size = 100 + for _ in range(n_groups): + rvs.append(np.round(stats.norm.rvs(loc=loc, scale=scale, size=size), 2)) + loc += 1 + scale += 2 + size += 100 + f_stat, p_value = scipy_anova(rvs) + test_and_check(rvs, n_groups, f_stat, p_value) + + n_groups = 20 + rvs = [] + loc = 0 + scale = 10 + size = 1100 + for _ in range(n_groups): + rvs.append(np.round(stats.norm.rvs(loc=loc, scale=scale, size=size), 2)) + size -= 50 + f_stat, p_value = scipy_anova(rvs) + test_and_check(rvs, n_groups, f_stat, p_value) + + +if __name__ == "__main__": + test_anova() + print("Ok.") diff --git a/tests/queries/0_stateless/02294_anova_cmp.reference b/tests/queries/0_stateless/02294_anova_cmp.reference new file mode 100644 index 00000000000..587579af915 --- /dev/null +++ b/tests/queries/0_stateless/02294_anova_cmp.reference @@ -0,0 +1 @@ +Ok. diff --git a/tests/queries/0_stateless/02294_anova_cmp.sh b/tests/queries/0_stateless/02294_anova_cmp.sh new file mode 100755 index 00000000000..3dc9ef09b99 --- /dev/null +++ b/tests/queries/0_stateless/02294_anova_cmp.sh @@ -0,0 +1,9 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +# We should have correct env vars from shell_config.sh to run this test + +python3 "$CURDIR"/02294_anova_cmp.python diff --git a/tests/queries/0_stateless/02310_clickhouse_client_INSERT_progress_profile_events.expect b/tests/queries/0_stateless/02310_clickhouse_client_INSERT_progress_profile_events.expect new file mode 100755 index 00000000000..20333ae7960 --- /dev/null +++ b/tests/queries/0_stateless/02310_clickhouse_client_INSERT_progress_profile_events.expect @@ -0,0 +1,32 @@ +#!/usr/bin/expect -f +# Tags: long + +# This is the regression for the concurrent access in ProgressIndication, +# so it is important to read enough rows here (10e6). +# +# Initially there was 100e6, but under thread fuzzer 10min may be not enough sometimes, +# but I believe that CI will catch possible issues even with less rows anyway. + +set basedir [file dirname $argv0] +set basename [file tail $argv0] +exp_internal -f $env(CLICKHOUSE_TMP)/$basename.debuglog 0 + +log_user 0 +set timeout 60 +match_max 100000 +set stty_init "rows 25 cols 120" + +expect_after { + eof { exp_continue } + timeout { exit 1 } +} + +spawn bash +send "source $basedir/../shell_config.sh\r" + +send "yes | head -n10000000 | \$CLICKHOUSE_CLIENT --query \"insert into function null('foo String') format TSV\" >/dev/null\r" +expect "Progress: " +send "\3" + +send "exit\r" +expect eof diff --git a/tests/queries/0_stateless/02310_clickhouse_client_INSERT_progress_profile_events.reference b/tests/queries/0_stateless/02310_clickhouse_client_INSERT_progress_profile_events.reference index 64ab61e6765..e69de29bb2d 100644 --- a/tests/queries/0_stateless/02310_clickhouse_client_INSERT_progress_profile_events.reference +++ b/tests/queries/0_stateless/02310_clickhouse_client_INSERT_progress_profile_events.reference @@ -1,2 +0,0 @@ -0 ---progress produce some rows diff --git a/tests/queries/0_stateless/02310_clickhouse_client_INSERT_progress_profile_events.sh b/tests/queries/0_stateless/02310_clickhouse_client_INSERT_progress_profile_events.sh deleted file mode 100755 index 6c37d870652..00000000000 --- a/tests/queries/0_stateless/02310_clickhouse_client_INSERT_progress_profile_events.sh +++ /dev/null @@ -1,19 +0,0 @@ -#!/usr/bin/env bash -# Tags: long - -# This is the regression for the concurrent access in ProgressIndication, -# so it is important to read enough rows here (10e6). -# -# Initially there was 100e6, but under thread fuzzer 10min may be not enough sometimes, -# but I believe that CI will catch possible issues even with less rows anyway. - -CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -# shellcheck source=../shell_config.sh -. "$CUR_DIR"/../shell_config.sh - -tmp_file_progress="$(mktemp "$CUR_DIR/$CLICKHOUSE_TEST_UNIQUE_NAME.XXXXXX.progress")" -trap 'rm $tmp_file_progress' EXIT - -yes | head -n10000000 | $CLICKHOUSE_CLIENT -q "insert into function null('foo String') format TSV" --progress 2> "$tmp_file_progress" -echo $? -test -s "$tmp_file_progress" && echo "--progress produce some rows" || echo "FAIL: no rows with --progress" diff --git a/tests/queries/0_stateless/02310_clickhouse_local_INSERT_progress_profile_events.expect b/tests/queries/0_stateless/02310_clickhouse_local_INSERT_progress_profile_events.expect new file mode 100755 index 00000000000..5c95e17aefd --- /dev/null +++ b/tests/queries/0_stateless/02310_clickhouse_local_INSERT_progress_profile_events.expect @@ -0,0 +1,32 @@ +#!/usr/bin/expect -f +# Tags: long + +# This is the regression for the concurrent access in ProgressIndication, +# so it is important to read enough rows here (10e6). +# +# Initially there was 100e6, but under thread fuzzer 10min may be not enough sometimes, +# but I believe that CI will catch possible issues even with less rows anyway. + +set basedir [file dirname $argv0] +set basename [file tail $argv0] +exp_internal -f $env(CLICKHOUSE_TMP)/$basename.debuglog 0 + +log_user 0 +set timeout 60 +match_max 100000 +set stty_init "rows 25 cols 120" + +expect_after { + eof { exp_continue } + timeout { exit 1 } +} + +spawn bash +send "source $basedir/../shell_config.sh\r" + +send "yes | head -n10000000 | \$CLICKHOUSE_LOCAL --query \"insert into function null('foo String') format TSV\" >/dev/null\r" +expect "Progress: " +send "\3" + +send "exit\r" +expect eof diff --git a/tests/queries/0_stateless/02310_clickhouse_local_INSERT_progress_profile_events.reference b/tests/queries/0_stateless/02310_clickhouse_local_INSERT_progress_profile_events.reference index 64ab61e6765..e69de29bb2d 100644 --- a/tests/queries/0_stateless/02310_clickhouse_local_INSERT_progress_profile_events.reference +++ b/tests/queries/0_stateless/02310_clickhouse_local_INSERT_progress_profile_events.reference @@ -1,2 +0,0 @@ -0 ---progress produce some rows diff --git a/tests/queries/0_stateless/02310_clickhouse_local_INSERT_progress_profile_events.sh b/tests/queries/0_stateless/02310_clickhouse_local_INSERT_progress_profile_events.sh deleted file mode 100755 index 00a8b7a2a90..00000000000 --- a/tests/queries/0_stateless/02310_clickhouse_local_INSERT_progress_profile_events.sh +++ /dev/null @@ -1,19 +0,0 @@ -#!/usr/bin/env bash -# Tags: long - -# This is the regression for the concurrent access in ProgressIndication, -# so it is important to read enough rows here (10e6). -# -# Initially there was 100e6, but under thread fuzzer 10min may be not enough sometimes, -# but I believe that CI will catch possible issues even with less rows anyway. - -CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -# shellcheck source=../shell_config.sh -. "$CUR_DIR"/../shell_config.sh - -tmp_file_progress="$(mktemp "$CUR_DIR/$CLICKHOUSE_TEST_UNIQUE_NAME.XXXXXX.progress")" -trap 'rm $tmp_file_progress' EXIT - -yes | head -n10000000 | $CLICKHOUSE_LOCAL -q "insert into function null('foo String') format TSV" --progress 2> "$tmp_file_progress" -echo $? -test -s "$tmp_file_progress" && echo "--progress produce some rows" || echo "FAIL: no rows with --progress" diff --git a/tests/queries/0_stateless/02337_analyzer_columns_basic.reference b/tests/queries/0_stateless/02337_analyzer_columns_basic.reference new file mode 100644 index 00000000000..1482c79b602 --- /dev/null +++ b/tests/queries/0_stateless/02337_analyzer_columns_basic.reference @@ -0,0 +1,46 @@ +Empty from section +dummy UInt8 +0 +-- +dummy UInt8 +0 +-- +dummy UInt8 +0 +Table access without table name qualification +id UInt64 +0 +-- +value String +Value +-- +id UInt64 +value String +0 Value +Table access with table name qualification +id UInt64 +0 +-- +value String +Value +-- +id UInt64 +value String +0 Value +-- +id UInt64 +value String +0 Value +Table access with database and table name qualification +-- +id UInt64 +value String +0 Value +-- +id UInt64 +value String +0 Value +-- +id UInt64 +value String +0 Value diff --git a/tests/queries/0_stateless/02337_analyzer_columns_basic.sql b/tests/queries/0_stateless/02337_analyzer_columns_basic.sql new file mode 100644 index 00000000000..76f9f8b25e4 --- /dev/null +++ b/tests/queries/0_stateless/02337_analyzer_columns_basic.sql @@ -0,0 +1,101 @@ +-- Tags: no-parallel + +SET allow_experimental_analyzer = 1; + +-- Empty from section + +SELECT 'Empty from section'; + +DESCRIBE (SELECT dummy); +SELECT dummy; + +SELECT '--'; + +DESCRIBE (SELECT one.dummy); +SELECT one.dummy; + +SELECT '--'; + +DESCRIBE (SELECT system.one.dummy); +SELECT system.one.dummy; + +DROP TABLE IF EXISTS test_table; +CREATE TABLE test_table +( + id UInt64, + value String +) ENGINE=TinyLog; + +INSERT INTO test_table VALUES (0, 'Value'); + +SELECT 'Table access without table name qualification'; + +SELECT test_id FROM test_table; -- { serverError 47 } +SELECT test_id FROM test_unknown_table; -- { serverError 60 } + +DESCRIBE (SELECT id FROM test_table); +SELECT id FROM test_table; + +SELECT '--'; + +DESCRIBE (SELECT value FROM test_table); +SELECT value FROM test_table; + +SELECT '--'; + +DESCRIBE (SELECT id, value FROM test_table); +SELECT id, value FROM test_table; + +SELECT 'Table access with table name qualification'; + +DESCRIBE (SELECT test_table.id FROM test_table); +SELECT test_table.id FROM test_table; + +SELECT '--'; + +DESCRIBE (SELECT test_table.value FROM test_table); +SELECT test_table.value FROM test_table; + +SELECT '--'; + +DESCRIBE (SELECT test_table.id, test_table.value FROM test_table); +SELECT test_table.id, test_table.value FROM test_table; + +SELECT '--'; + +DESCRIBE (SELECT test.id, test.value FROM test_table AS test); +SELECT test.id, test.value FROM test_table AS test; + +DROP TABLE test_table; + +SELECT 'Table access with database and table name qualification'; + +DROP DATABASE IF EXISTS 02337_db; +CREATE DATABASE 02337_db; + +DROP TABLE IF EXISTS 02337_db.test_table; +CREATE TABLE 02337_db.test_table +( + id UInt64, + value String +) ENGINE=TinyLog; + +INSERT INTO 02337_db.test_table VALUES (0, 'Value'); + +SELECT '--'; + +DESCRIBE (SELECT test_table.id, test_table.value FROM 02337_db.test_table); +SELECT test_table.id, test_table.value FROM 02337_db.test_table; + +SELECT '--'; + +DESCRIBE (SELECT 02337_db.test_table.id, 02337_db.test_table.value FROM 02337_db.test_table); +SELECT 02337_db.test_table.id, 02337_db.test_table.value FROM 02337_db.test_table; + +SELECT '--'; + +DESCRIBE (SELECT test_table.id, test_table.value FROM 02337_db.test_table AS test_table); +SELECT test_table.id, test_table.value FROM 02337_db.test_table AS test_table; + +DROP TABLE 02337_db.test_table; +DROP DATABASE 02337_db; diff --git a/tests/queries/0_stateless/02338_analyzer_constants_basic.reference b/tests/queries/0_stateless/02338_analyzer_constants_basic.reference new file mode 100644 index 00000000000..f3a69e4d835 --- /dev/null +++ b/tests/queries/0_stateless/02338_analyzer_constants_basic.reference @@ -0,0 +1,35 @@ +1 UInt8 +1 +-- +\'test\' String +test +-- +1 UInt8 +\'test\' String +1 test +-- +1 UInt8 +\'test\' String +[1, 2, 3] Array(UInt8) +1 test [1,2,3] +-- +1 UInt8 +\'test\' String +[1, 2, 3] Array(UInt8) +[\'1\', \'2\', \'3\'] Array(String) +1 test [1,2,3] ['1','2','3'] +-- +NULL Nullable(Nothing) +\N +-- +(1, 1) Tuple(UInt8, UInt8) +(1,1) +-- +array((1, 1)) Array(Tuple(UInt8, UInt8)) +[(1,1)] +NULL Nullable(Nothing) +1 UInt8 +\'test\' String +[1, 2, 3] Array(UInt8) +array((1, 1), (1, 1)) Array(Tuple(UInt8, UInt8)) +\N 1 test [1,2,3] [(1,1),(1,1)] diff --git a/tests/queries/0_stateless/02338_analyzer_constants_basic.sql b/tests/queries/0_stateless/02338_analyzer_constants_basic.sql new file mode 100644 index 00000000000..6d6249538a4 --- /dev/null +++ b/tests/queries/0_stateless/02338_analyzer_constants_basic.sql @@ -0,0 +1,42 @@ +SET allow_experimental_analyzer = 1; + +DESCRIBE (SELECT 1); +SELECT 1; + +SELECT '--'; + +DESCRIBE (SELECT 'test'); +SELECT 'test'; + +SELECT '--'; + +DESCRIBE (SELECT 1, 'test'); +SELECT 1, 'test'; + +SELECT '--'; + +DESCRIBE (SELECT 1, 'test', [1, 2, 3]); +SELECT 1, 'test', [1, 2, 3]; + +SELECT '--'; + +DESCRIBE (SELECT 1, 'test', [1, 2, 3], ['1', '2', '3']); +SELECT 1, 'test', [1, 2, 3], ['1', '2', '3']; + +SELECT '--'; + +DESCRIBE (SELECT NULL); +SELECT NULL; + +SELECT '--'; + +DESCRIBE (SELECT (1, 1)); +SELECT (1, 1); + +SELECT '--'; + +DESCRIBE (SELECT [(1, 1)]); +SELECT [(1, 1)]; + +DESCRIBE (SELECT NULL, 1, 'test', [1, 2, 3], [(1, 1), (1, 1)]); +SELECT NULL, 1, 'test', [1, 2, 3], [(1, 1), (1, 1)]; diff --git a/tests/queries/0_stateless/02339_analyzer_matcher_basic.reference b/tests/queries/0_stateless/02339_analyzer_matcher_basic.reference new file mode 100644 index 00000000000..3ca0d303793 --- /dev/null +++ b/tests/queries/0_stateless/02339_analyzer_matcher_basic.reference @@ -0,0 +1,98 @@ +Matchers without FROM section +dummy UInt8 +0 +-- +dummy UInt8 +0 +-- +dummy UInt8 +0 +Unqualified matchers +id UInt64 +value String +0 Value +-- +id UInt64 +0 +-- +id UInt64 +value String +0 Value +-- +id UInt64 +value String +0 Value +Table qualified matchers +id UInt64 +value String +0 Value +-- +id UInt64 +0 +-- +id UInt64 +value String +0 Value +-- +id UInt64 +value String +0 Value +Database and table qualified matchers +APPLY transformer +-- +toString(id) String +toString(value) String +0 Value +-- +toString(id) String +toString(value) String +0 Value +-- +length(toString(id)) UInt64 +length(toString(value)) UInt64 +1 5 +-- +length(toString(id)) UInt64 +length(toString(value)) UInt64 +1 5 +-- +id UInt64 +value String +0 Value +EXCEPT transformer +-- +value String +Value +-- +value String +Value +-- +toString(value) String +Value +-- +toString(value) String +Value +REPLACE transformer +-- +5 UInt8 +value String +5 Value +-- +5 UInt8 +value String +5 Value +-- +5 UInt8 +6 UInt8 +5 6 +-- +5 UInt8 +6 UInt8 +5 6 +Combine EXCEPT, REPLACE, APPLY transformers +-- +toString(6) String +6 +-- +toString(6) String +6 diff --git a/tests/queries/0_stateless/02339_analyzer_matcher_basic.sql b/tests/queries/0_stateless/02339_analyzer_matcher_basic.sql new file mode 100644 index 00000000000..9d7c486b28a --- /dev/null +++ b/tests/queries/0_stateless/02339_analyzer_matcher_basic.sql @@ -0,0 +1,188 @@ +-- Tags: no-parallel + +SET allow_experimental_analyzer = 1; + +SELECT 'Matchers without FROM section'; + +DESCRIBE (SELECT *); +SELECT *; + +SELECT '--'; + +DESCRIBE (SELECT COLUMNS(dummy)); +SELECT COLUMNS(dummy); + +SELECT '--'; + +DESCRIBE (SELECT COLUMNS('d')); +SELECT COLUMNS('d'); + +DROP TABLE IF EXISTS test_table; +CREATE TABLE test_table +( + id UInt64, + value String +) ENGINE=TinyLog; + +INSERT INTO test_table VALUES (0, 'Value'); + +SELECT 'Unqualified matchers'; + +DESCRIBE (SELECT * FROM test_table); +SELECT * FROM test_table; + +SELECT '--'; + +DESCRIBE (SELECT COLUMNS(id) FROM test_table); +SELECT COLUMNS(id) FROM test_table; + +SELECT '--'; + +DESCRIBE (SELECT COLUMNS(id), COLUMNS(value) FROM test_table); +SELECT COLUMNS(id), COLUMNS(value) FROM test_table; + +SELECT '--'; + +DESCRIBE (SELECT COLUMNS('i'), COLUMNS('v') FROM test_table); +SELECT COLUMNS('i'), COLUMNS('v') FROM test_table; + +SELECT 'Table qualified matchers'; + +DESCRIBE (SELECT test_table.* FROM test_table); +SELECT test_table.* FROM test_table; + +SELECT '--'; + +DESCRIBE (SELECT test_table.COLUMNS(id) FROM test_table); +SELECT test_table.COLUMNS(id) FROM test_table; + +SELECT '--'; + +DESCRIBE (SELECT test_table.COLUMNS(id), test_table.COLUMNS(value) FROM test_table); +SELECT test_table.COLUMNS(id), test_table.COLUMNS(value) FROM test_table; + +SELECT '--'; + +DESCRIBE (SELECT test_table.COLUMNS('i'), test_table.COLUMNS('v') FROM test_table); +SELECT test_table.COLUMNS('i'), test_table.COLUMNS('v') FROM test_table; + +SELECT 'Database and table qualified matchers'; + +DROP DATABASE IF EXISTS 02339_db; +CREATE DATABASE 02339_db; + +DROP TABLE IF EXISTS 02339_db.test_table; +CREATE TABLE 02339_db.test_table +( + id UInt64, + value String +) ENGINE=TinyLog; + +INSERT INTO 02339_db.test_table VALUES (0, 'Value'); + +-- TODO: Qualified COLUMNS where identifier has more than 2 parts are not supported on parser level + +-- SELECT '--'; + +-- DESCRIBE (SELECT 02339_db.test_table.* FROM 02339_db.test_table); +-- SELECT 02339_db.test_table.* FROM 02339_db.test_table; + +-- SELECT '--'; + +-- DESCRIBE (SELECT 02339_db.test_table.COLUMNS(id) FROM 02339_db.test_table); +-- SELECT 02339_db.test_table.COLUMNS(id) FROM 02339_db.test_table; + +-- SELECT '--'; + +-- DESCRIBE (SELECT 02339_db.test_table.COLUMNS(id), 02339_db.test_table.COLUMNS(value) FROM 02339_db.test_table); +-- SELECT 02339_db.test_table.COLUMNS(id), 02339_db.test_table.COLUMNS(value) FROM 02339_db.test_table; + +-- SELECT '--'; + +-- DESCRIBE (SELECT 02339_db.test_table.COLUMNS('i'), 02339_db.test_table.COLUMNS('v') FROM 02339_db.test_table); +-- SELECT 02339_db.test_table.COLUMNS('i'), 02339_db.test_table.COLUMNS('v') FROM 02339_db.test_table; + +DROP TABLE 02339_db.test_table; +DROP DATABASE 02339_db; + +SELECT 'APPLY transformer'; + +SELECT '--'; + +DESCRIBE (SELECT * APPLY toString FROM test_table); +SELECT * APPLY toString FROM test_table; + +SELECT '--'; + +DESCRIBE (SELECT * APPLY (x -> toString(x)) FROM test_table); +SELECT * APPLY (x -> toString(x)) FROM test_table; + +SELECT '--'; + +DESCRIBE (SELECT * APPLY (x -> toString(x)) APPLY (x -> length(x)) FROM test_table); +SELECT * APPLY (x -> toString(x)) APPLY (x -> length(x)) FROM test_table; + +SELECT '--'; + +DESCRIBE (SELECT * APPLY (x -> toString(x)) APPLY length FROM test_table); +SELECT * APPLY (x -> toString(x)) APPLY length FROM test_table; + +SELECT '--'; +DESCRIBE (SELECT * FROM test_table); +SELECT * FROM test_table; + +SELECT 'EXCEPT transformer'; + +SELECT '--'; + +DESCRIBE (SELECT * EXCEPT (id) FROM test_table); +SELECT * EXCEPT (id) FROM test_table; + +SELECT '--'; + +DESCRIBE (SELECT COLUMNS(id, value) EXCEPT (id) FROM test_table); +SELECT COLUMNS(id, value) EXCEPT (id) FROM test_table; + +SELECT '--'; + +DESCRIBE (SELECT * EXCEPT (id) APPLY toString FROM test_table); +SELECT * EXCEPT (id) APPLY toString FROM test_table; + +SELECT '--'; + +DESCRIBE (SELECT COLUMNS(id, value) EXCEPT (id) APPLY toString FROM test_table); +SELECT COLUMNS(id, value) EXCEPT (id) APPLY toString FROM test_table; + +SELECT 'REPLACE transformer'; + +SELECT '--'; + +DESCRIBE (SELECT * REPLACE (5 AS id) FROM test_table); +SELECT * REPLACE (5 AS id) FROM test_table; + +SELECT '--'; + +DESCRIBE (SELECT COLUMNS(id, value) REPLACE (5 AS id) FROM test_table); +SELECT COLUMNS(id, value) REPLACE (5 AS id) FROM test_table; + +SELECT '--'; + +DESCRIBE (SELECT * REPLACE (5 AS id, 6 as value) FROM test_table); +SELECT * REPLACE (5 AS id, 6 as value) FROM test_table; + +SELECT '--'; + +DESCRIBE (SELECT COLUMNS(id, value) REPLACE (5 AS id, 6 as value) FROM test_table); +SELECT COLUMNS(id, value) REPLACE (5 AS id, 6 as value) FROM test_table; + +SELECT 'Combine EXCEPT, REPLACE, APPLY transformers'; + +SELECT '--'; + +DESCRIBE (SELECT * EXCEPT id REPLACE (5 AS id, 6 as value) APPLY toString FROM test_table); +SELECT * EXCEPT id REPLACE (5 AS id, 6 as value) APPLY toString FROM test_table; + +SELECT '--'; + +DESCRIBE (SELECT COLUMNS(id, value) EXCEPT id REPLACE (5 AS id, 6 as value) APPLY toString FROM test_table); +SELECT COLUMNS(id, value) EXCEPT id REPLACE (5 AS id, 6 as value) APPLY toString FROM test_table; diff --git a/tests/queries/0_stateless/02340_analyzer_functions.reference b/tests/queries/0_stateless/02340_analyzer_functions.reference new file mode 100644 index 00000000000..fe086c69e91 --- /dev/null +++ b/tests/queries/0_stateless/02340_analyzer_functions.reference @@ -0,0 +1,11 @@ +plus(1, 1) UInt16 +2 +-- +plus(dummy, dummy) UInt16 +0 +-- +plus(id, length(value)) UInt64 +5 +-- +concat(concat(toString(id), \'_\'), value) String +0_Value diff --git a/tests/queries/0_stateless/02340_analyzer_functions.sql b/tests/queries/0_stateless/02340_analyzer_functions.sql new file mode 100644 index 00000000000..101a5bfcc86 --- /dev/null +++ b/tests/queries/0_stateless/02340_analyzer_functions.sql @@ -0,0 +1,28 @@ +SET allow_experimental_analyzer = 1; + +DESCRIBE (SELECT 1 + 1); +SELECT 1 + 1; + +SELECT '--'; + +DESCRIBE (SELECT dummy + dummy); +SELECT dummy + dummy; + +DROP TABLE IF EXISTS test_table; +CREATE TABLE test_table +( + id UInt64, + value String +) ENGINE=TinyLog; + +INSERT INTO test_table VALUES (0, 'Value'); + +SELECT '--'; + +DESCRIBE (SELECT id + length(value) FROM test_table); +SELECT id + length(value) FROM test_table; + +SELECT '--'; + +DESCRIBE (SELECT concat(concat(toString(id), '_'), (value)) FROM test_table); +SELECT concat(concat(toString(id), '_'), (value)) FROM test_table; diff --git a/tests/queries/0_stateless/02341_analyzer_aliases_basics.reference b/tests/queries/0_stateless/02341_analyzer_aliases_basics.reference new file mode 100644 index 00000000000..3733d6b6084 --- /dev/null +++ b/tests/queries/0_stateless/02341_analyzer_aliases_basics.reference @@ -0,0 +1,19 @@ +Aliases to constants +1 1 +4 2 1 3 4 +1 +1 1 +1 1 2 +1 2 1 +3 6 +Aliases to columns +0 0 0 +0 Value 0 Value +0 Value +Alias conflict with identifier inside expression +0 +1 +3 +Alias setting prefer_column_name_to_alias +0 +Value diff --git a/tests/queries/0_stateless/02341_analyzer_aliases_basics.sql b/tests/queries/0_stateless/02341_analyzer_aliases_basics.sql new file mode 100644 index 00000000000..52a1cd1dae8 --- /dev/null +++ b/tests/queries/0_stateless/02341_analyzer_aliases_basics.sql @@ -0,0 +1,50 @@ +SET allow_experimental_analyzer = 1; + +SELECT 'Aliases to constants'; + +SELECT 1 as a, a; +SELECT (c + 1) as d, (a + 1) as b, 1 AS a, (b + 1) as c, d; + +WITH 1 as a SELECT a; +WITH a as b SELECT 1 as a, b; + +SELECT 1 AS x, x, x + 1; +SELECT x, x + 1, 1 AS x; +SELECT x, 1 + (2 + (3 AS x)); + +SELECT a AS b, b AS a; -- { serverError 174 } + +DROP TABLE IF EXISTS test_table; +CREATE TABLE test_table +( + id UInt64, + value String +) ENGINE=TinyLog; + +INSERT INTO test_table VALUES (0, 'Value'); + +SELECT 'Aliases to columns'; + +SELECT id_alias_2, id AS id_alias, id_alias as id_alias_2 FROM test_table; +SELECT id_1, value_1, id as id_1, value as value_1 FROM test_table; + +WITH value_1 as value_2, id_1 as id_2, id AS id_1, value AS value_1 SELECT id_2, value_2 FROM test_table; + +SELECT (id + b) AS id, id as b FROM test_table; -- { serverError 174 } +SELECT (1 + b + 1 + id) AS id, b as c, id as b FROM test_table; -- { serverError 174 } + +SELECT 'Alias conflict with identifier inside expression'; + +SELECT id AS id FROM test_table; +SELECT (id + 1) AS id FROM test_table; +SELECT (id + 1 + 1 + 1 + id) AS id FROM test_table; + +SELECT 'Alias setting prefer_column_name_to_alias'; + +WITH id AS value SELECT value FROM test_table; + +SET prefer_column_name_to_alias = 1; +WITH id AS value SELECT value FROM test_table; +SET prefer_column_name_to_alias = 0; + +DROP TABLE test_table; diff --git a/tests/queries/0_stateless/02342_analyzer_compound_types.reference b/tests/queries/0_stateless/02342_analyzer_compound_types.reference new file mode 100644 index 00000000000..51e0bbe6e92 --- /dev/null +++ b/tests/queries/0_stateless/02342_analyzer_compound_types.reference @@ -0,0 +1,106 @@ +Constant tuple +(1,'Value') 1 Value +(1,'Value') 1 Value +(1,'Value') 1 +(1,'Value') Value +(1,'Value') 1 +(1,'Value') Value +Tuple +-- +id UInt64 +value Tuple(value_0_level_0 Tuple(value_0_level_1 String, value_1_level_1 String), value_1_level_0 String) +0 (('value_0_level_1','value_1_level_1'),'value_1_level_0') +-- +id UInt64 +value Tuple(value_0_level_0 Tuple(value_0_level_1 String, value_1_level_1 String), value_1_level_0 String) +0 (('value_0_level_1','value_1_level_1'),'value_1_level_0') +-- +value.value_0_level_0 Tuple(value_0_level_1 String, value_1_level_1 String) +value.value_1_level_0 String +('value_0_level_1','value_1_level_1') value_1_level_0 +-- +alias_value Tuple(value_0_level_0 Tuple(value_0_level_1 String, value_1_level_1 String), value_1_level_0 String) +alias_value.value_0_level_0 Tuple(value_0_level_1 String, value_1_level_1 String) +alias_value.value_1_level_0 String +(('value_0_level_1','value_1_level_1'),'value_1_level_0') ('value_0_level_1','value_1_level_1') value_1_level_0 +-- +alias_value Tuple(value_0_level_0 Tuple(value_0_level_1 String, value_1_level_1 String), value_1_level_0 String) +alias_value.value_0_level_0 Tuple(value_0_level_1 String, value_1_level_1 String) +alias_value.value_1_level_0 String +(('value_0_level_1','value_1_level_1'),'value_1_level_0') ('value_0_level_1','value_1_level_1') value_1_level_0 +-- +alias_value Tuple(value_0_level_0 Tuple(value_0_level_1 String, value_1_level_1 String), value_1_level_0 String) +toString(alias_value.value_0_level_0) String +toString(alias_value.value_1_level_0) String +(('value_0_level_1','value_1_level_1'),'value_1_level_0') (\'value_0_level_1\',\'value_1_level_1\') value_1_level_0 +-- +value.value_0_level_0 Tuple(value_0_level_1 String, value_1_level_1 String) +value.value_1_level_0 String +('value_0_level_1','value_1_level_1') value_1_level_0 +-- +toString(value.value_0_level_0) String +toString(value.value_1_level_0) String +(\'value_0_level_1\',\'value_1_level_1\') value_1_level_0 +-- +value.value_0_level_0.value_0_level_1 String +value.value_0_level_0.value_1_level_1 String +value_0_level_1 value_1_level_1 +-- +alias_value Tuple(value_0_level_1 String, value_1_level_1 String) +alias_value.value_0_level_1 String +alias_value.value_1_level_1 String +('value_0_level_1','value_1_level_1') value_0_level_1 value_1_level_1 +-- +alias_value Tuple(value_0_level_1 String, value_1_level_1 String) +alias_value.value_0_level_1 String +alias_value.value_1_level_1 String +('value_0_level_1','value_1_level_1') value_0_level_1 value_1_level_1 +-- +alias_value Tuple(value_0_level_1 String, value_1_level_1 String) +toString(alias_value.value_0_level_1) String +toString(alias_value.value_1_level_1) String +('value_0_level_1','value_1_level_1') value_0_level_1 value_1_level_1 +-- +value.value_0_level_0.value_0_level_1 String +value.value_0_level_0.value_1_level_1 String +value_0_level_1 value_1_level_1 +-- +toString(value.value_0_level_0.value_0_level_1) String +toString(value.value_0_level_0.value_1_level_1) String +value_0_level_1 value_1_level_1 +Nested +id UInt64 +value.value_0_level_0 Array(Nested(value_0_level_1 String, value_1_level_1 String)) +value.value_1_level_0 Array(String) +0 [[('value_0_level_1','value_1_level_1')]] ['value_1_level_0'] +-- +value.value_0_level_0 Array(Nested(value_0_level_1 String, value_1_level_1 String)) +value.value_1_level_0 Array(String) +[[('value_0_level_1','value_1_level_1')]] ['value_1_level_0'] +-- +value.value_0_level_0.value_0_level_1 Array(Array(String)) +value.value_0_level_0.value_1_level_1 Array(Array(String)) +[['value_0_level_1']] [['value_1_level_1']] +-- +value_alias Array(Nested(value_0_level_1 String, value_1_level_1 String)) +value_alias.value_0_level_1 Array(Array(String)) +value_alias.value_1_level_1 Array(Array(String)) +[[('value_0_level_1','value_1_level_1')]] [['value_0_level_1']] [['value_1_level_1']] +-- +value_alias Array(Nested(value_0_level_1 String, value_1_level_1 String)) +value_alias.value_0_level_1 Array(Array(String)) +value_alias.value_1_level_1 Array(Array(String)) +[[('value_0_level_1','value_1_level_1')]] [['value_0_level_1']] [['value_1_level_1']] +-- +value_alias Array(Nested(value_0_level_1 String, value_1_level_1 String)) +toString(value_alias.value_0_level_1) String +toString(value_alias.value_1_level_1) String +[[('value_0_level_1','value_1_level_1')]] [[\'value_0_level_1\']] [[\'value_1_level_1\']] +-- +value.value_0_level_0.value_0_level_1 Array(Array(String)) +value.value_0_level_0.value_1_level_1 Array(Array(String)) +[['value_0_level_1']] [['value_1_level_1']] +-- +toString(value.value_0_level_0.value_0_level_1) String +toString(value.value_0_level_0.value_1_level_1) String +[[\'value_0_level_1\']] [[\'value_1_level_1\']] diff --git a/tests/queries/0_stateless/02342_analyzer_compound_types.sql b/tests/queries/0_stateless/02342_analyzer_compound_types.sql new file mode 100644 index 00000000000..0fd96928496 --- /dev/null +++ b/tests/queries/0_stateless/02342_analyzer_compound_types.sql @@ -0,0 +1,195 @@ +SET allow_experimental_analyzer = 1; + +SELECT 'Constant tuple'; + +SELECT cast((1, 'Value'), 'Tuple (id UInt64, value String)') AS value, value.id, value.value; +SELECT cast((1, 'Value'), 'Tuple (id UInt64, value String)') AS value, value.* APPLY toString; +SELECT cast((1, 'Value'), 'Tuple (id UInt64, value String)') AS value, value.COLUMNS(id) APPLY toString; +SELECT cast((1, 'Value'), 'Tuple (id UInt64, value String)') AS value, value.COLUMNS(value) APPLY toString; +SELECT cast((1, 'Value'), 'Tuple (id UInt64, value String)') AS value, value.COLUMNS('i') APPLY toString; +SELECT cast((1, 'Value'), 'Tuple (id UInt64, value String)') AS value, value.COLUMNS('v') APPLY toString; + +SELECT 'Tuple'; + +DROP TABLE IF EXISTS test_table; +CREATE TABLE test_table +( + id UInt64, + value Tuple(value_0_level_0 Tuple(value_0_level_1 String, value_1_level_1 String), value_1_level_0 String) +) ENGINE=MergeTree ORDER BY id; + +INSERT INTO test_table VALUES (0, (('value_0_level_1', 'value_1_level_1'), 'value_1_level_0')); + +SELECT '--'; + +DESCRIBE (SELECT * FROM test_table); +SELECT * FROM test_table; + +SELECT '--'; + +DESCRIBE (SELECT id, value FROM test_table); +SELECT id, value FROM test_table; + +SELECT '--'; + +DESCRIBE (SELECT value.value_0_level_0, value.value_1_level_0 FROM test_table); +SELECT value.value_0_level_0, value.value_1_level_0 FROM test_table; + +SELECT '--'; + +DESCRIBE (SELECT value AS alias_value, alias_value.value_0_level_0, alias_value.value_1_level_0 FROM test_table); +SELECT value AS alias_value, alias_value.value_0_level_0, alias_value.value_1_level_0 FROM test_table; + +SELECT '--'; + +DESCRIBE (SELECT value AS alias_value, alias_value.* FROM test_table); +SELECT value AS alias_value, alias_value.* FROM test_table; + +SELECT '--'; + +DESCRIBE (SELECT value AS alias_value, alias_value.* APPLY toString FROM test_table); +SELECT value AS alias_value, alias_value.* APPLY toString FROM test_table; + +SELECT '--'; + +DESCRIBE (SELECT value.* FROM test_table); +SELECT value.* FROM test_table; + +SELECT '--'; + +DESCRIBE (SELECT value.* APPLY toString FROM test_table); +SELECT value.* APPLY toString FROM test_table; + +SELECT '--'; + +DESCRIBE (SELECT value.value_0_level_0.value_0_level_1, value.value_0_level_0.value_1_level_1 FROM test_table); +SELECT value.value_0_level_0.value_0_level_1, value.value_0_level_0.value_1_level_1 FROM test_table; + +SELECT '--'; + +DESCRIBE (SELECT value.value_0_level_0 AS alias_value, alias_value.value_0_level_1, alias_value.value_1_level_1 FROM test_table); +SELECT value.value_0_level_0 AS alias_value, alias_value.value_0_level_1, alias_value.value_1_level_1 FROM test_table; + +SELECT '--'; + +DESCRIBE (SELECT value.value_0_level_0 AS alias_value, alias_value.* FROM test_table); +SELECT value.value_0_level_0 AS alias_value, alias_value.* FROM test_table; + +SELECT '--'; + +DESCRIBE (SELECT value.value_0_level_0 AS alias_value, alias_value.* APPLY toString FROM test_table); +SELECT value.value_0_level_0 AS alias_value, alias_value.* APPLY toString FROM test_table; + +SELECT '--'; + +DESCRIBE (SELECT value.value_0_level_0.* FROM test_table); +SELECT value.value_0_level_0.* FROM test_table; + +SELECT '--'; + +DESCRIBE (SELECT value.value_0_level_0.* APPLY toString FROM test_table); +SELECT value.value_0_level_0.* APPLY toString FROM test_table; + +DROP TABLE test_table; + +-- SELECT 'Array of tuples'; + +-- DROP TABLE IF EXISTS test_table; +-- CREATE TABLE test_table +-- ( +-- id UInt64, +-- value Array(Tuple(value_0_level_0 Tuple(value_0_level_1 String, value_1_level_1 String), value_1_level_0 String)) +-- ) ENGINE=MergeTree ORDER BY id; + +-- INSERT INTO test_table VALUES (0, [('value_0_level_1', 'value_1_level_1')], ['value_1_level_0']); + +-- DESCRIBE (SELECT * FROM test_table); +-- SELECT * FROM test_table; + +-- SELECT '--'; + +-- DESCRIBE (SELECT value.value_0_level_0, value.value_1_level_0 FROM test_table); +-- SELECT value.value_0_level_0, value.value_1_level_0 FROM test_table; + +-- SELECT '--'; + +-- DESCRIBE (SELECT value.value_0_level_0.value_0_level_1, value.value_0_level_0.value_1_level_1 FROM test_table); +-- SELECT value.value_0_level_0.value_0_level_1, value.value_0_level_0.value_1_level_1 FROM test_table; + +-- SELECT '--'; + +-- DESCRIBE (SELECT value.value_0_level_0 AS alias_value, alias_value.value_0_level_1, alias_value.value_1_level_1 FROM test_table); +-- SELECT value.value_0_level_0 AS alias_value, alias_value.value_0_level_1, alias_value.value_1_level_1 FROM test_table; + +-- SELECT '--'; + +-- DESCRIBE (SELECT value.value_0_level_0 AS alias_value, alias_value.* FROM test_table); +-- SELECT value.value_0_level_0 AS alias_value, alias_value.* FROM test_table; + +-- SELECT '--'; + +-- DESCRIBE (SELECT value.value_0_level_0 AS alias_value, alias_value.* APPLY toString FROM test_table); +-- SELECT value.value_0_level_0 AS alias_value, alias_value.* APPLY toString FROM test_table; + +-- SELECT '--'; + +-- DESCRIBE (SELECT value.value_0_level_0.* FROM test_table); +-- SELECT value.value_0_level_0.* FROM test_table; + +-- SELECT '--'; + +-- DESCRIBE (SELECT value.value_0_level_0.* APPLY toString FROM test_table); +-- SELECT value.value_0_level_0.* APPLY toString FROM test_table; + +-- DROP TABLE test_table; + +SELECT 'Nested'; + +DROP TABLE IF EXISTS test_table; +CREATE TABLE test_table +( + id UInt64, + value Nested (value_0_level_0 Nested(value_0_level_1 String, value_1_level_1 String), value_1_level_0 String) +) ENGINE=MergeTree ORDER BY id; + +INSERT INTO test_table VALUES (0, [[('value_0_level_1', 'value_1_level_1')]], ['value_1_level_0']); + +DESCRIBE (SELECT * FROM test_table); +SELECT * FROM test_table; + +SELECT '--'; + +DESCRIBE (SELECT value.value_0_level_0, value.value_1_level_0 FROM test_table); +SELECT value.value_0_level_0, value.value_1_level_0 FROM test_table; + +SELECT '--'; + +DESCRIBE (SELECT value.value_0_level_0.value_0_level_1, value.value_0_level_0.value_1_level_1 FROM test_table); +SELECT value.value_0_level_0.value_0_level_1, value.value_0_level_0.value_1_level_1 FROM test_table; + +SELECT '--'; + +DESCRIBE (SELECT value.value_0_level_0 AS value_alias, value_alias.value_0_level_1, value_alias.value_1_level_1 FROM test_table); +SELECT value.value_0_level_0 AS value_alias, value_alias.value_0_level_1, value_alias.value_1_level_1 FROM test_table; + +SELECT '--'; + +DESCRIBE (SELECT value.value_0_level_0 AS value_alias, value_alias.* FROM test_table); +SELECT value.value_0_level_0 AS value_alias, value_alias.* FROM test_table; + +SELECT '--'; + +DESCRIBE (SELECT value.value_0_level_0 AS value_alias, value_alias.* APPLY toString FROM test_table); +SELECT value.value_0_level_0 AS value_alias, value_alias.* APPLY toString FROM test_table; + +SELECT '--'; + +DESCRIBE (SELECT value.value_0_level_0.* FROM test_table); +SELECT value.value_0_level_0.* FROM test_table; + +SELECT '--'; + +DESCRIBE (SELECT value.value_0_level_0.* APPLY toString FROM test_table); +SELECT value.value_0_level_0.* APPLY toString FROM test_table; + +DROP TABLE test_table; diff --git a/tests/queries/0_stateless/02343_analyzer_column_transformers_strict.reference b/tests/queries/0_stateless/02343_analyzer_column_transformers_strict.reference new file mode 100644 index 00000000000..4904e950431 --- /dev/null +++ b/tests/queries/0_stateless/02343_analyzer_column_transformers_strict.reference @@ -0,0 +1,2 @@ +Value +1 2 diff --git a/tests/queries/0_stateless/02343_analyzer_column_transformers_strict.sql b/tests/queries/0_stateless/02343_analyzer_column_transformers_strict.sql new file mode 100644 index 00000000000..98ee7bc8f58 --- /dev/null +++ b/tests/queries/0_stateless/02343_analyzer_column_transformers_strict.sql @@ -0,0 +1,18 @@ +SET allow_experimental_analyzer = 1; + +DROP TABLE IF EXISTS test_table; +CREATE TABLE test_table +( + id UInt64, + value String +) ENGINE=TinyLog; + +INSERT INTO test_table VALUES (0, 'Value'); + +SELECT * EXCEPT (id) FROM test_table; +SELECT * EXCEPT STRICT (id, value1) FROM test_table; -- { serverError 36 } + +SELECT * REPLACE STRICT (1 AS id, 2 AS value) FROM test_table; +SELECT * REPLACE STRICT (1 AS id, 2 AS value_1) FROM test_table; -- { serverError 36 } + +DROP TABLE IF EXISTS test_table; diff --git a/tests/queries/0_stateless/02343_analyzer_lambdas.reference b/tests/queries/0_stateless/02343_analyzer_lambdas.reference new file mode 100644 index 00000000000..8d29481c255 --- /dev/null +++ b/tests/queries/0_stateless/02343_analyzer_lambdas.reference @@ -0,0 +1,29 @@ +Standalone lambdas +2 +1 \N [1,2,3] +1 \N [1,2,3] +1 +0 Value +Lambda as function parameter +[2,3,4] +[2,3,4] +['1','2','3'] ['1','2','3'] +['1','2','3'] ['1','2','3'] +[0,0,0] +[1,2,3] +['1_0','2_0','3_0'] +Lambda compound argument +(1,'Value') 1_Value +value_0_level_0_value_1_level_0 +Lambda matcher +0 +0 Value +[1,1,1] +[2,2,2] +0 1 1 +0 2 2 +Lambda untuple +(1,'Value') 1 Value +Lambda carrying +2 1 +1 0 diff --git a/tests/queries/0_stateless/02343_analyzer_lambdas.sql b/tests/queries/0_stateless/02343_analyzer_lambdas.sql new file mode 100644 index 00000000000..b90f7b32b57 --- /dev/null +++ b/tests/queries/0_stateless/02343_analyzer_lambdas.sql @@ -0,0 +1,69 @@ +SET allow_experimental_analyzer = 1; + +DROP TABLE IF EXISTS test_table; +CREATE TABLE test_table +( + id UInt64, + value String +) ENGINE=TinyLog; + +INSERT INTO test_table VALUES (0, 'Value'); + +SELECT 'Standalone lambdas'; + +WITH x -> x + 1 AS lambda SELECT lambda(1); +WITH x -> toString(x) AS lambda SELECT lambda(1), lambda(NULL), lambda([1,2,3]); +WITH x -> toString(x) AS lambda_1, lambda_1 AS lambda_2, lambda_2 AS lambda_3 SELECT lambda_1(1), lambda_2(NULL), lambda_3([1,2,3]); + +WITH x -> x + 1 AS lambda SELECT lambda(id) FROM test_table; +WITH x -> toString(x) AS lambda SELECT lambda(id), lambda(value) FROM test_table; + +SELECT 'Lambda as function parameter'; + +SELECT arrayMap(x -> x + 1, [1,2,3]); +WITH x -> x + 1 AS lambda SELECT arrayMap(lambda, [1,2,3]); +SELECT arrayMap((x -> toString(x)) as lambda, [1,2,3]), arrayMap(lambda, ['1','2','3']); +WITH x -> toString(x) AS lambda_1 SELECT arrayMap(lambda_1 AS lambda_2, [1,2,3]), arrayMap(lambda_2, ['1', '2', '3']); + +SELECT arrayMap(x -> id, [1,2,3]) FROM test_table; +SELECT arrayMap(x -> x + id, [1,2,3]) FROM test_table; +SELECT arrayMap((x -> concat(concat(toString(x), '_'), toString(id))) as lambda, [1,2,3]) FROM test_table; + +SELECT 'Lambda compound argument'; + +DROP TABLE IF EXISTS test_table_tuple; +CREATE TABLE test_table_tuple +( + id UInt64, + value Tuple(value_0_level_0 String, value_1_level_0 String) +) ENGINE=TinyLog; + +INSERT INTO test_table_tuple VALUES (0, ('value_0_level_0', 'value_1_level_0')); + +WITH x -> concat(concat(toString(x.id), '_'), x.value) AS lambda SELECT cast((1, 'Value'), 'Tuple (id UInt64, value String)') AS value, lambda(value); +WITH x -> concat(concat(x.value_0_level_0, '_'), x.value_1_level_0) AS lambda SELECT lambda(value) FROM test_table_tuple; + +SELECT 'Lambda matcher'; + +WITH x -> * AS lambda SELECT lambda(1); +WITH x -> * AS lambda SELECT lambda(1) FROM test_table; + +WITH cast(tuple(1), 'Tuple (value UInt64)') AS compound_value SELECT arrayMap(x -> compound_value.*, [1,2,3]); +WITH cast(tuple(1, 1), 'Tuple (value_1 UInt64, value_2 UInt64)') AS compound_value SELECT arrayMap(x -> compound_value.*, [1,2,3]); -- { serverError 1 } +WITH cast(tuple(1, 1), 'Tuple (value_1 UInt64, value_2 UInt64)') AS compound_value SELECT arrayMap(x -> plus(compound_value.*), [1,2,3]); + +WITH cast(tuple(1), 'Tuple (value UInt64)') AS compound_value SELECT id, test_table.* APPLY x -> compound_value.* FROM test_table; +WITH cast(tuple(1, 1), 'Tuple (value_1 UInt64, value_2 UInt64)') AS compound_value SELECT id, test_table.* APPLY x -> compound_value.* FROM test_table; -- { serverError 1 } +WITH cast(tuple(1, 1), 'Tuple (value_1 UInt64, value_2 UInt64)') AS compound_value SELECT id, test_table.* APPLY x -> plus(compound_value.*) FROM test_table; + +SELECT 'Lambda untuple'; + +WITH x -> untuple(x) AS lambda SELECT cast((1, 'Value'), 'Tuple (id UInt64, value String)') AS value, lambda(value); + +SELECT 'Lambda carrying'; + +WITH (functor, x) -> functor(x) AS lambda, x -> x + 1 AS functor_1, x -> toString(x) AS functor_2 SELECT lambda(functor_1, 1), lambda(functor_2, 1); +WITH (functor, x) -> functor(x) AS lambda, x -> x + 1 AS functor_1, x -> toString(x) AS functor_2 SELECT lambda(functor_1, id), lambda(functor_2, id) FROM test_table; + +DROP TABLE test_table_tuple; +DROP TABLE test_table; diff --git a/tests/queries/0_stateless/02343_analyzer_lambdas_issue_28083.reference b/tests/queries/0_stateless/02343_analyzer_lambdas_issue_28083.reference new file mode 100644 index 00000000000..a2ed8e55d62 --- /dev/null +++ b/tests/queries/0_stateless/02343_analyzer_lambdas_issue_28083.reference @@ -0,0 +1,2 @@ +n [('n',1)] +[('n',1)] diff --git a/tests/queries/0_stateless/02343_analyzer_lambdas_issue_28083.sql b/tests/queries/0_stateless/02343_analyzer_lambdas_issue_28083.sql new file mode 100644 index 00000000000..3b780e1dec3 --- /dev/null +++ b/tests/queries/0_stateless/02343_analyzer_lambdas_issue_28083.sql @@ -0,0 +1,17 @@ +SET allow_experimental_analyzer = 1; + +select so, + r +from + (select [('y',0),('n',1)] as cg, + if( arrayMap( x -> x.1, cg ) != ['y', 'n'], 'y', 'n') as so, + arrayFilter( x -> x.1 = so , cg) as r + ); + +select + r +from + (select [('y',0),('n',1)] as cg, + if( arrayMap( x -> x.1, cg ) != ['y', 'n'], 'y', 'n') as so, + arrayFilter( x -> x.1 = so , cg) as r + ); diff --git a/tests/queries/0_stateless/02343_analyzer_lambdas_issue_36677.reference b/tests/queries/0_stateless/02343_analyzer_lambdas_issue_36677.reference new file mode 100644 index 00000000000..bec52aa46b6 --- /dev/null +++ b/tests/queries/0_stateless/02343_analyzer_lambdas_issue_36677.reference @@ -0,0 +1,2 @@ +2.1999999999999997 289.99999999999994 [1,2,3,4] [0.1,0.2,0.1,0.2] +2.1999999999999997 289.99999999999994 [1,2,3,4] [0.1,0.2,0.1,0.2] diff --git a/tests/queries/0_stateless/02343_analyzer_lambdas_issue_36677.sql b/tests/queries/0_stateless/02343_analyzer_lambdas_issue_36677.sql new file mode 100644 index 00000000000..b07f3f33ac3 --- /dev/null +++ b/tests/queries/0_stateless/02343_analyzer_lambdas_issue_36677.sql @@ -0,0 +1,14 @@ +SET allow_experimental_analyzer = 1; + +SELECT + arraySum(x -> ((x.1) / ((x.2) * (x.2))), arrayZip(mag, magerr)) / arraySum(x -> (1. / (x * x)), magerr) AS weightedmeanmag, + arraySum(x -> ((((x.1) - weightedmeanmag) * ((x.1) - weightedmeanmag)) / ((x.2) * (x.2))), arrayZip(mag, magerr)) AS chi2, + [1, 2, 3, 4] AS mag, + [0.1, 0.2, 0.1, 0.2] AS magerr; + +SELECT + arraySum(x -> ((x.1) / ((x.2) * (x.2))), arrayZip(mag, magerr)) / arraySum(x -> (1. / (x * x)), magerr) AS weightedmeanmag, + arraySum(x -> ((((x.1) - weightedmeanmag) * ((x.1) - weightedmeanmag)) / ((x.2) * (x.2))), arrayZip(mag, magerr)) AS chi2, + [1, 2, 3, 4] AS mag, + [0.1, 0.2, 0.1, 0.2] AS magerr +WHERE isFinite(chi2) diff --git a/tests/queries/0_stateless/02344_analyzer_multiple_aliases_for_expression.reference b/tests/queries/0_stateless/02344_analyzer_multiple_aliases_for_expression.reference new file mode 100644 index 00000000000..e0d1bb800d2 --- /dev/null +++ b/tests/queries/0_stateless/02344_analyzer_multiple_aliases_for_expression.reference @@ -0,0 +1,4 @@ +1 1 +0 0 +2 +1 1 diff --git a/tests/queries/0_stateless/02344_analyzer_multiple_aliases_for_expression.sql b/tests/queries/0_stateless/02344_analyzer_multiple_aliases_for_expression.sql new file mode 100644 index 00000000000..cd1bca8285b --- /dev/null +++ b/tests/queries/0_stateless/02344_analyzer_multiple_aliases_for_expression.sql @@ -0,0 +1,27 @@ +SET allow_experimental_analyzer = 1; + +DROP TABLE IF EXISTS test_table; +CREATE TABLE test_table +( + id UInt64, + value String +) ENGINE=TinyLog; + +INSERT INTO test_table VALUES (0, 'Value'); + +SELECT 1 AS value, 1 AS value; +SELECT id AS value, id AS value FROM test_table; +WITH x -> x + 1 AS lambda, x -> x + 1 AS lambda SELECT lambda(1); +SELECT (SELECT 1) AS subquery, (SELECT 1) AS subquery; + +SELECT 1 AS value, 2 AS value; -- { serverError 179 } +SELECT plus(1, 1) AS value, 2 AS value; -- { serverError 179 } +SELECT (SELECT 1) AS subquery, 1 AS subquery; -- { serverError 179 } +WITH x -> x + 1 AS lambda, x -> x + 2 AS lambda SELECT lambda(1); -- { serverError 179 } +WITH x -> x + 1 AS lambda SELECT (SELECT 1) AS lambda; -- { serverError 179 } +WITH x -> x + 1 AS lambda SELECT 1 AS lambda; -- { serverError 179 } +SELECT id AS value, value AS value FROM test_table; -- { serverError 179 } +SELECT id AS value_1, value AS value_1 FROM test_table; -- { serverError 179 } +SELECT id AS value, (id + 1) AS value FROM test_table; -- { serverError 179 } + +DROP TABLE test_table; diff --git a/tests/queries/0_stateless/02344_show_caches.reference b/tests/queries/0_stateless/02344_show_caches.reference index 0c5957edb82..68882f63e1f 100644 --- a/tests/queries/0_stateless/02344_show_caches.reference +++ b/tests/queries/0_stateless/02344_show_caches.reference @@ -1,12 +1,13 @@ cached_azure s3_cache_2 +s3_cache +s3_cache_3 +s3_cache_multi s3_cache_4 s3_cache_5 local_cache +s3_cache_6 s3_cache_small local_cache_2 local_cache_3 -s3_cache_multi -s3_cache_3 -s3_cache s3_cache_multi_2 diff --git a/tests/queries/0_stateless/02345_analyzer_subqueries.reference b/tests/queries/0_stateless/02345_analyzer_subqueries.reference new file mode 100644 index 00000000000..1e70be9ef52 --- /dev/null +++ b/tests/queries/0_stateless/02345_analyzer_subqueries.reference @@ -0,0 +1,27 @@ +Scalar subqueries +1 +1 +0 +Value +(0,'Value') +Subqueries FROM section +1 +1 +1 +1 +1 +1 +0 Value +0 Value +2 +Subqueries CTE +1 +1 +1 +1 +1 +1 +1 +1 +2 +2 diff --git a/tests/queries/0_stateless/02345_analyzer_subqueries.sql b/tests/queries/0_stateless/02345_analyzer_subqueries.sql new file mode 100644 index 00000000000..c0cc242b57b --- /dev/null +++ b/tests/queries/0_stateless/02345_analyzer_subqueries.sql @@ -0,0 +1,51 @@ +SET allow_experimental_analyzer = 1; + +DROP TABLE IF EXISTS test_table; +CREATE TABLE test_table +( + id UInt64, + value String +) ENGINE=TinyLog; + +INSERT INTO test_table VALUES (0, 'Value'); + +SELECT 'Scalar subqueries'; + +SELECT (SELECT 1); +WITH 1 AS a SELECT (SELECT a); + +SELECT (SELECT id FROM test_table); +SELECT (SELECT value FROM test_table); +SELECT (SELECT id, value FROM test_table); + +SELECT 'Subqueries FROM section'; + +SELECT a FROM (SELECT 1 AS a) AS b; +SELECT b.a FROM (SELECT 1 AS a) AS b; + +SELECT a FROM (SELECT 1 AS a) AS b; +SELECT b.a FROM (SELECT 1 AS a) AS b; + +WITH 1 AS global_a SELECT a FROM (SELECT global_a AS a) AS b; +WITH 1 AS global_a SELECT b.a FROM (SELECT global_a AS a) AS b; + +SELECT * FROM (SELECT * FROM (SELECT * FROM test_table)); +SELECT * FROM (SELECT id, value FROM (SELECT * FROM test_table)); + +WITH 1 AS a SELECT (SELECT * FROM (SELECT * FROM (SELECT a + 1))); + +SELECT 'Subqueries CTE'; + +WITH subquery AS (SELECT 1 AS a) SELECT * FROM subquery; +WITH subquery AS (SELECT 1 AS a) SELECT a FROM subquery; +WITH subquery AS (SELECT 1 AS a) SELECT subquery.a FROM subquery; +WITH subquery AS (SELECT 1 AS a) SELECT subquery.* FROM subquery; +WITH subquery AS (SELECT 1 AS a) SELECT subquery.* APPLY toString FROM subquery; +WITH subquery AS (SELECT 1 AS a) SELECT subquery_alias.a FROM subquery AS subquery_alias; +WITH subquery AS (SELECT 1 AS a) SELECT subquery_alias.* FROM subquery AS subquery_alias; +WITH subquery AS (SELECT 1 AS a) SELECT subquery_alias.* APPLY toString FROM subquery AS subquery_alias; + +WITH subquery_1 AS (SELECT 1 AS a), subquery_2 AS (SELECT 1 + subquery_1.a FROM subquery_1) SELECT * FROM subquery_2; +WITH subquery_1 AS (SELECT 1 AS a), subquery_2 AS (SELECT (1 + subquery_1.a) AS a FROM subquery_1) SELECT subquery_2.a FROM subquery_2; + +DROP TABLE test_table; diff --git a/tests/queries/0_stateless/02346_additional_filters.reference b/tests/queries/0_stateless/02346_additional_filters.reference index 22d53173e71..0a08995223d 100644 --- a/tests/queries/0_stateless/02346_additional_filters.reference +++ b/tests/queries/0_stateless/02346_additional_filters.reference @@ -60,6 +60,14 @@ select * from remote('127.0.0.{1,2}', system.one) settings additional_table_filt 0 0 select * from remote('127.0.0.{1,2}', system.one) settings additional_table_filters={'system.one' : 'dummy != 0'}; +select * from distr_table settings additional_table_filters={'distr_table' : 'x = 2'}; +2 bb +2 bb +select * from distr_table settings additional_table_filters={'distr_table' : 'x != 2 and x != 3'}; +1 a +4 dddd +1 a +4 dddd select * from system.numbers limit 5; 0 1 diff --git a/tests/queries/0_stateless/02346_additional_filters.sql b/tests/queries/0_stateless/02346_additional_filters.sql index 9e0bee4549b..f6b665713ec 100644 --- a/tests/queries/0_stateless/02346_additional_filters.sql +++ b/tests/queries/0_stateless/02346_additional_filters.sql @@ -1,3 +1,4 @@ +-- Tags: distributed drop table if exists table_1; drop table if exists table_2; drop table if exists v_numbers; @@ -6,6 +7,8 @@ drop table if exists mv_table; create table table_1 (x UInt32, y String) engine = MergeTree order by x; insert into table_1 values (1, 'a'), (2, 'bb'), (3, 'ccc'), (4, 'dddd'); +CREATE TABLE distr_table (x UInt32, y String) ENGINE = Distributed(test_cluster_two_shards, currentDatabase(), 'table_1'); + -- { echoOn } select * from table_1; @@ -29,6 +32,9 @@ select x from table_1 prewhere x != 2 where x != 2 settings additional_table_fil select * from remote('127.0.0.{1,2}', system.one) settings additional_table_filters={'system.one' : 'dummy = 0'}; select * from remote('127.0.0.{1,2}', system.one) settings additional_table_filters={'system.one' : 'dummy != 0'}; +select * from distr_table settings additional_table_filters={'distr_table' : 'x = 2'}; +select * from distr_table settings additional_table_filters={'distr_table' : 'x != 2 and x != 3'}; + select * from system.numbers limit 5; select * from system.numbers as t limit 5 settings additional_table_filters={'t' : 'number % 2 != 0'}; select * from system.numbers limit 5 settings additional_table_filters={'system.numbers' : 'number != 3'}; diff --git a/tests/queries/0_stateless/02346_additional_filters_distr.reference b/tests/queries/0_stateless/02346_additional_filters_distr.reference new file mode 100644 index 00000000000..81814b5e7bb --- /dev/null +++ b/tests/queries/0_stateless/02346_additional_filters_distr.reference @@ -0,0 +1,3 @@ +4 dddd +5 a +6 bb diff --git a/tests/queries/0_stateless/02346_additional_filters_distr.sql b/tests/queries/0_stateless/02346_additional_filters_distr.sql new file mode 100644 index 00000000000..bc9c1715c72 --- /dev/null +++ b/tests/queries/0_stateless/02346_additional_filters_distr.sql @@ -0,0 +1,20 @@ +-- Tags: no-parallel, distributed + +create database if not exists shard_0; +create database if not exists shard_1; + +drop table if exists dist_02346; +drop table if exists shard_0.data_02346; +drop table if exists shard_1.data_02346; + +create table shard_0.data_02346 (x UInt32, y String) engine = MergeTree order by x settings index_granularity = 2; +insert into shard_0.data_02346 values (1, 'a'), (2, 'bb'), (3, 'ccc'), (4, 'dddd'); + +create table shard_1.data_02346 (x UInt32, y String) engine = MergeTree order by x settings index_granularity = 2; +insert into shard_1.data_02346 values (5, 'a'), (6, 'bb'), (7, 'ccc'), (8, 'dddd'); + +create table dist_02346 (x UInt32, y String) engine=Distributed('test_cluster_two_shards_different_databases', /* default_database= */ '', data_02346); + +set max_rows_to_read=4; + +select * from dist_02346 order by x settings additional_table_filters={'dist_02346' : 'x > 3 and x < 7'}; diff --git a/tests/queries/0_stateless/02346_additional_filters_index.reference b/tests/queries/0_stateless/02346_additional_filters_index.reference new file mode 100644 index 00000000000..d4b9509cb3c --- /dev/null +++ b/tests/queries/0_stateless/02346_additional_filters_index.reference @@ -0,0 +1,30 @@ +-- { echoOn } +set max_rows_to_read = 2; +select * from table_1 order by x settings additional_table_filters={'table_1' : 'x > 3'}; +4 dddd +select * from table_1 order by x settings additional_table_filters={'table_1' : 'x < 3'}; +1 a +2 bb +select * from table_1 order by x settings additional_table_filters={'table_1' : 'length(y) >= 3'}; +3 ccc +4 dddd +select * from table_1 order by x settings additional_table_filters={'table_1' : 'length(y) < 3'}; +1 a +2 bb +set max_rows_to_read = 4; +select * from distr_table order by x settings additional_table_filters={'distr_table' : 'x > 3'}; +4 dddd +4 dddd +select * from distr_table order by x settings additional_table_filters={'distr_table' : 'x < 3'}; +1 a +1 a +2 bb +2 bb +select * from distr_table order by x settings additional_table_filters={'distr_table' : 'length(y) > 3'}; +4 dddd +4 dddd +select * from distr_table order by x settings additional_table_filters={'distr_table' : 'length(y) < 3'}; +1 a +1 a +2 bb +2 bb diff --git a/tests/queries/0_stateless/02346_additional_filters_index.sql b/tests/queries/0_stateless/02346_additional_filters_index.sql new file mode 100644 index 00000000000..0d40cc1f898 --- /dev/null +++ b/tests/queries/0_stateless/02346_additional_filters_index.sql @@ -0,0 +1,24 @@ +-- Tags: distributed + +create table table_1 (x UInt32, y String, INDEX a (length(y)) TYPE minmax GRANULARITY 1) engine = MergeTree order by x settings index_granularity = 2; +insert into table_1 values (1, 'a'), (2, 'bb'), (3, 'ccc'), (4, 'dddd'); + +CREATE TABLE distr_table (x UInt32, y String) ENGINE = Distributed(test_cluster_two_shards, currentDatabase(), 'table_1'); + +-- { echoOn } +set max_rows_to_read = 2; + +select * from table_1 order by x settings additional_table_filters={'table_1' : 'x > 3'}; +select * from table_1 order by x settings additional_table_filters={'table_1' : 'x < 3'}; + +select * from table_1 order by x settings additional_table_filters={'table_1' : 'length(y) >= 3'}; +select * from table_1 order by x settings additional_table_filters={'table_1' : 'length(y) < 3'}; + +set max_rows_to_read = 4; + +select * from distr_table order by x settings additional_table_filters={'distr_table' : 'x > 3'}; +select * from distr_table order by x settings additional_table_filters={'distr_table' : 'x < 3'}; + +select * from distr_table order by x settings additional_table_filters={'distr_table' : 'length(y) > 3'}; +select * from distr_table order by x settings additional_table_filters={'distr_table' : 'length(y) < 3'}; + diff --git a/tests/queries/0_stateless/02353_ascii.reference b/tests/queries/0_stateless/02353_ascii.reference new file mode 100644 index 00000000000..79588517e2a --- /dev/null +++ b/tests/queries/0_stateless/02353_ascii.reference @@ -0,0 +1,14 @@ +50 +0 +50 +0 +48 +49 +50 +51 +52 +53 +54 +55 +56 +57 diff --git a/tests/queries/0_stateless/02353_ascii.sql b/tests/queries/0_stateless/02353_ascii.sql new file mode 100644 index 00000000000..5b7a20ad61c --- /dev/null +++ b/tests/queries/0_stateless/02353_ascii.sql @@ -0,0 +1,5 @@ +SELECT ascii('234'); +SELECT ascii(''); +SELECT ascii(materialize('234')); +SELECT ascii(materialize('')); +SELECT ascii(toString(number) || 'abc') from numbers(10); diff --git a/tests/queries/0_stateless/02354_annoy.sql b/tests/queries/0_stateless/02354_annoy.sql index 8a8d023a104..654a4b545ea 100644 --- a/tests/queries/0_stateless/02354_annoy.sql +++ b/tests/queries/0_stateless/02354_annoy.sql @@ -44,3 +44,71 @@ ORDER BY L2Distance(embedding, [0.0, 0.0]) LIMIT 3; -- { serverError 80 } DROP TABLE IF EXISTS 02354_annoy; + +-- ------------------------------------ +-- Check that weird base columns are rejected + +-- Index spans >1 column + +CREATE TABLE 02354_annoy +( + id Int32, + embedding Array(Float32), + INDEX annoy_index (embedding, id) TYPE annoy(100) GRANULARITY 1 +) +ENGINE = MergeTree +ORDER BY id +SETTINGS index_granularity=5; -- {serverError 7 } + +-- Index must be created on Array(Float32) or Tuple(Float32) + +CREATE TABLE 02354_annoy +( + id Int32, + embedding Float32, + INDEX annoy_index embedding TYPE annoy(100) GRANULARITY 1 +) +ENGINE = MergeTree +ORDER BY id +SETTINGS index_granularity=5; -- {serverError 44 } + + +CREATE TABLE 02354_annoy +( + id Int32, + embedding Array(Float64), + INDEX annoy_index embedding TYPE annoy(100) GRANULARITY 1 +) +ENGINE = MergeTree +ORDER BY id +SETTINGS index_granularity=5; -- {serverError 44 } + +CREATE TABLE 02354_annoy +( + id Int32, + embedding Tuple(Float32, Float64), + INDEX annoy_index embedding TYPE annoy(100) GRANULARITY 1 +) +ENGINE = MergeTree +ORDER BY id +SETTINGS index_granularity=5; -- {serverError 44 } + +CREATE TABLE 02354_annoy +( + id Int32, + embedding Array(LowCardinality(Float32)), + INDEX annoy_index embedding TYPE annoy(100) GRANULARITY 1 +) +ENGINE = MergeTree +ORDER BY id +SETTINGS index_granularity=5; -- {serverError 44 } + +CREATE TABLE 02354_annoy +( + id Int32, + embedding Array(Nullable(Float32)), + INDEX annoy_index embedding TYPE annoy(100) GRANULARITY 1 +) +ENGINE = MergeTree +ORDER BY id +SETTINGS index_granularity=5; -- {serverError 44 } diff --git a/tests/queries/0_stateless/02366_explain_query_tree.reference b/tests/queries/0_stateless/02366_explain_query_tree.reference new file mode 100644 index 00000000000..769d7661e68 --- /dev/null +++ b/tests/queries/0_stateless/02366_explain_query_tree.reference @@ -0,0 +1,102 @@ +QUERY id: 0 + PROJECTION + LIST id: 1, nodes: 1 + CONSTANT id: 2, constant_value: UInt64_1, constant_value_type: UInt8 + JOIN TREE + IDENTIFIER id: 3, identifier: system.one +-- +QUERY id: 0 + PROJECTION + LIST id: 1, nodes: 2 + IDENTIFIER id: 2, identifier: id + IDENTIFIER id: 3, identifier: value + JOIN TREE + IDENTIFIER id: 4, identifier: test_table +-- +QUERY id: 0 + PROJECTION COLUMNS + id UInt64 + value String + PROJECTION + LIST id: 1, nodes: 2 + COLUMN id: 2, column_name: id, result_type: UInt64, source_id: 3 + COLUMN id: 4, column_name: value, result_type: String, source_id: 3 + JOIN TREE + TABLE id: 3, table_name: default.test_table +-- +QUERY id: 0 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: arrayMap, function_type: ordinary + ARGUMENTS + LIST id: 3, nodes: 2 + LAMBDA id: 4 + ARGUMENTS + LIST id: 5, nodes: 1 + IDENTIFIER id: 6, identifier: x + EXPRESSION + FUNCTION id: 7, function_name: plus, function_type: ordinary + ARGUMENTS + LIST id: 8, nodes: 2 + IDENTIFIER id: 9, identifier: x + IDENTIFIER id: 10, identifier: id + CONSTANT id: 11, constant_value: Array_[UInt64_1, UInt64_2, UInt64_3], constant_value_type: Array(UInt8) + JOIN TREE + IDENTIFIER id: 12, identifier: test_table +-- +QUERY id: 0 + PROJECTION COLUMNS + arrayMap(lambda(tuple(x), plus(x, 1)), [1, 2, 3]) Array(UInt16) + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: arrayMap, function_type: ordinary, result_type: Array(UInt16) + ARGUMENTS + LIST id: 3, nodes: 2 + LAMBDA id: 4 + ARGUMENTS + LIST id: 5, nodes: 1 + COLUMN id: 6, column_name: x, result_type: UInt8, source_id: 4 + EXPRESSION + FUNCTION id: 7, function_name: plus, function_type: ordinary, result_type: UInt16 + ARGUMENTS + LIST id: 8, nodes: 2 + COLUMN id: 6, column_name: x, result_type: UInt8, source_id: 4 + CONSTANT id: 9, constant_value: UInt64_1, constant_value_type: UInt8 + CONSTANT id: 10, constant_value: Array_[UInt64_1, UInt64_2, UInt64_3], constant_value_type: Array(UInt8) + JOIN TREE + TABLE id: 11, table_name: default.test_table +-- +QUERY id: 0 + WITH + LIST id: 1, nodes: 1 + LAMBDA id: 2, alias: lambda + ARGUMENTS + LIST id: 3, nodes: 1 + IDENTIFIER id: 4, identifier: x + EXPRESSION + FUNCTION id: 5, function_name: plus, function_type: ordinary + ARGUMENTS + LIST id: 6, nodes: 2 + IDENTIFIER id: 7, identifier: x + CONSTANT id: 8, constant_value: UInt64_1, constant_value_type: UInt8 + PROJECTION + LIST id: 9, nodes: 1 + FUNCTION id: 10, function_name: lambda, function_type: ordinary + ARGUMENTS + LIST id: 11, nodes: 1 + IDENTIFIER id: 12, identifier: id + JOIN TREE + IDENTIFIER id: 13, identifier: test_table +-- +QUERY id: 0 + PROJECTION COLUMNS + lambda(id) UInt64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: plus, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 3, nodes: 2 + COLUMN id: 4, column_name: id, result_type: UInt64, source_id: 5 + CONSTANT id: 6, constant_value: UInt64_1, constant_value_type: UInt8 + JOIN TREE + TABLE id: 5, table_name: default.test_table diff --git a/tests/queries/0_stateless/02366_explain_query_tree.sql b/tests/queries/0_stateless/02366_explain_query_tree.sql new file mode 100644 index 00000000000..c942f0cac37 --- /dev/null +++ b/tests/queries/0_stateless/02366_explain_query_tree.sql @@ -0,0 +1,38 @@ +SET allow_experimental_analyzer = 1; + +EXPLAIN QUERY TREE SELECT 1; + +SELECT '--'; + +DROP TABLE IF EXISTS test_table; +CREATE TABLE test_table +( + id UInt64, + value String +) ENGINE=TinyLog; + +INSERT INTO test_table VALUES (0, 'Value'); + +EXPLAIN QUERY TREE SELECT id, value FROM test_table; + +SELECT '--'; + +EXPLAIN QUERY TREE run_passes = 1 SELECT id, value FROM test_table; + +SELECT '--'; + +EXPLAIN QUERY TREE SELECT arrayMap(x -> x + id, [1, 2, 3]) FROM test_table; + +SELECT '--'; + +EXPLAIN QUERY TREE run_passes = 1 SELECT arrayMap(x -> x + 1, [1, 2, 3]) FROM test_table; + +SELECT '--'; + +EXPLAIN QUERY TREE WITH x -> x + 1 AS lambda SELECT lambda(id) FROM test_table; + +SELECT '--'; + +EXPLAIN QUERY TREE run_passes = 1 WITH x -> x + 1 AS lambda SELECT lambda(id) FROM test_table; + +DROP TABLE test_table; diff --git a/tests/queries/0_stateless/02367_analyzer_table_alias_columns.reference b/tests/queries/0_stateless/02367_analyzer_table_alias_columns.reference new file mode 100644 index 00000000000..1f4875e38c2 --- /dev/null +++ b/tests/queries/0_stateless/02367_analyzer_table_alias_columns.reference @@ -0,0 +1,3 @@ +0 6 5 +0 Value 2 +0 Value 2 diff --git a/tests/queries/0_stateless/02367_analyzer_table_alias_columns.sql b/tests/queries/0_stateless/02367_analyzer_table_alias_columns.sql new file mode 100644 index 00000000000..f41680cd9f4 --- /dev/null +++ b/tests/queries/0_stateless/02367_analyzer_table_alias_columns.sql @@ -0,0 +1,41 @@ +SET allow_experimental_analyzer = 1; + +DROP TABLE IF EXISTS test_table; +CREATE TABLE test_table +( + id UInt64, + alias_value_1 ALIAS id + alias_value_2 + 1, + alias_value_2 ALIAS id + 5 +) ENGINE=TinyLog; + +INSERT INTO test_table VALUES (0); + +SELECT id, alias_value_1, alias_value_2 FROM test_table; + +DROP TABLE test_table; + +CREATE TABLE test_table +( + id UInt64, + value String, + alias_value ALIAS ((id + 1) AS inside_value) + inside_value +) ENGINE=TinyLog; + +INSERT INTO test_table VALUES (0, 'Value'); + +SELECT id, value, alias_value FROM test_table; + +DROP TABLE test_table; + +CREATE TABLE test_table +( + id UInt64, + value String, + alias_value ALIAS ((id + 1) AS value) + value +) ENGINE=TinyLog; + +INSERT INTO test_table VALUES (0, 'Value'); + +SELECT id, value, alias_value FROM test_table; + +DROP TABLE test_table; diff --git a/tests/queries/0_stateless/02368_analyzer_table_functions.reference b/tests/queries/0_stateless/02368_analyzer_table_functions.reference new file mode 100644 index 00000000000..0c1bc4d90a2 --- /dev/null +++ b/tests/queries/0_stateless/02368_analyzer_table_functions.reference @@ -0,0 +1,6 @@ +1 2 [1,2,3] [['abc'],[],['d','e']] +1 2 [1,2,3] [['abc'],[],['d','e']] +1 2 [1,2,3] [['abc'],[],['d','e']] +1 2 [1,2,3] [['abc'],[],['d','e']] +1 2 [1,2,3] [['abc'],[],['d','e']] +CSV 1,2,"[1,2,3]","[[\'abc\'], [], [\'d\', \'e\']]" 1 2 [1,2,3] [['abc'],[],['d','e']] diff --git a/tests/queries/0_stateless/02368_analyzer_table_functions.sql b/tests/queries/0_stateless/02368_analyzer_table_functions.sql new file mode 100644 index 00000000000..456e095c6c1 --- /dev/null +++ b/tests/queries/0_stateless/02368_analyzer_table_functions.sql @@ -0,0 +1,10 @@ +SET allow_experimental_analyzer = 1; + +SELECT c1, c2, c3, c4 FROM format('CSV', '1,2,"[1,2,3]","[[\'abc\'], [], [\'d\', \'e\']]"'); +SELECT f.c1, f.c2, f.c3, f.c4 FROM format('CSV', '1,2,"[1,2,3]","[[\'abc\'], [], [\'d\', \'e\']]"') AS f; +SELECT f.* FROM format('CSV', '1,2,"[1,2,3]","[[\'abc\'], [], [\'d\', \'e\']]"') AS f; + +WITH 'CSV', '1,2,"[1,2,3]","[[\'abc\'], [], [\'d\', \'e\']]"' AS format_value SELECT c1, c2, c3, c4 FROM format('CSV', format_value); +WITH concat('1,2,"[1,2,3]",','"[[\'abc\'], [], [\'d\', \'e\']]"') AS format_value SELECT c1, c2, c3, c4 FROM format('CSV', format_value); + +SELECT format, format_value, c1, c2, c3, c4 FROM format('CSV' AS format, '1,2,"[1,2,3]","[[\'abc\'], [], [\'d\', \'e\']]"' AS format_value); diff --git a/tests/queries/0_stateless/02369_analyzer_array_join_function.reference b/tests/queries/0_stateless/02369_analyzer_array_join_function.reference new file mode 100644 index 00000000000..7025cff6909 --- /dev/null +++ b/tests/queries/0_stateless/02369_analyzer_array_join_function.reference @@ -0,0 +1,44 @@ +1 +2 +3 +-- +1 1 +2 2 +3 3 +-- +1 1 +2 2 +3 3 +-- +[1,2,3] 1 +[1,2,3] 2 +[1,2,3] 3 +-- +1 1 +1 2 +1 3 +1 4 +2 1 +2 2 +2 3 +2 4 +3 1 +3 2 +3 3 +3 4 +-- +[1,1,1] +[2,2,2] +[3,3,3] +-- +[2,3,4] 1 +[3,4,5] 2 +[4,5,6] 3 +-- +0 1 +0 2 +0 3 +-- +0 1 1 +0 2 2 +0 3 3 diff --git a/tests/queries/0_stateless/02369_analyzer_array_join_function.sql b/tests/queries/0_stateless/02369_analyzer_array_join_function.sql new file mode 100644 index 00000000000..9a9939d2a2f --- /dev/null +++ b/tests/queries/0_stateless/02369_analyzer_array_join_function.sql @@ -0,0 +1,59 @@ +SET allow_experimental_analyzer = 1; + +SELECT arrayJoin([1, 2, 3]); + +SELECT '--'; + +SELECT arrayJoin([1, 2, 3]) AS a, arrayJoin([1, 2, 3]); + +SELECT '--'; + +SELECT arrayJoin([1, 2, 3]) AS a, a; + +SELECT '--'; + +SELECT arrayJoin([[1, 2, 3]]) AS a, arrayJoin(a) AS b; + +SELECT '--'; + +SELECT arrayJoin([1, 2, 3]) AS a, arrayJoin([1, 2, 3, 4]) AS b; + +SELECT '--'; + +SELECT arrayMap(x -> arrayJoin([1, 2, 3]), [1, 2, 3]); + +SELECT arrayMap(x -> arrayJoin(x), [[1, 2, 3]]); -- { serverError 36 } + +SELECT arrayMap(x -> arrayJoin(cast(x, 'Array(UInt8)')), [[1, 2, 3]]); -- { serverError 36 } + +SELECT '--'; + +SELECT arrayMap(x -> x + a, [1, 2, 3]), arrayJoin([1,2,3]) as a; + +SELECT '--'; + +DROP TABLE IF EXISTS test_table; +CREATE TABLE test_table +( + id UInt64, + value_1 Array(UInt8), + value_2 Array(UInt8), +) ENGINE=TinyLog; + +INSERT INTO test_table VALUES (0, [1, 2, 3], [1, 2, 3, 4]); + +SELECT id, arrayJoin(value_1) FROM test_table; + +SELECT '--'; + +SELECT id, arrayJoin(value_1) AS a, a FROM test_table; + +-- SELECT '--'; + +-- SELECT id, arrayJoin(value_1), arrayJoin(value_2) FROM test_table; + +-- SELECT '--'; + +-- SELECT id, arrayJoin(value_1), arrayJoin(value_2), arrayJoin([5, 6]) FROM test_table; + +DROP TABLE test_table; diff --git a/tests/queries/0_stateless/02370_analyzer_in_function.reference b/tests/queries/0_stateless/02370_analyzer_in_function.reference new file mode 100644 index 00000000000..fda174c0b7c --- /dev/null +++ b/tests/queries/0_stateless/02370_analyzer_in_function.reference @@ -0,0 +1,14 @@ +1 +1 +0 +0 +1 +1 +0 +1 +-- +1 +0 +1 +1 +0 diff --git a/tests/queries/0_stateless/02370_analyzer_in_function.sql b/tests/queries/0_stateless/02370_analyzer_in_function.sql new file mode 100644 index 00000000000..a7128ced449 --- /dev/null +++ b/tests/queries/0_stateless/02370_analyzer_in_function.sql @@ -0,0 +1,23 @@ +SET allow_experimental_analyzer = 1; + +SELECT 1 IN 1; +SELECT 1 IN (1); +SELECT 1 IN 0; +SELECT 1 IN (0); +SELECT 1 IN (1, 2); +SELECT (1, 1) IN ((1, 1), (1, 2)); +SELECT (1, 1) IN ((1, 2), (1, 2)); +SELECT 1 IN (((1), (2))); + +SELECT '--'; + +SELECT 1 IN [1]; +SELECT 1 IN [0]; +SELECT 1 IN [1, 2]; +SELECT (1, 1) IN [(1, 1), (1, 2)]; +SELECT (1, 1) IN [(1, 2), (1, 2)]; + +SELECT (1, 2) IN 1; -- { serverError 43 } +SELECT (1, 2) IN [1]; -- { serverError 124 } +SELECT (1, 2) IN (((1, 2), (1, 2)), ((1, 2), (1, 2))); -- { serverError 43 } +SELECT (1, 2) IN [((1, 2), (1, 2)), ((1, 2), (1, 2))]; -- { serverError 43 } diff --git a/tests/queries/0_stateless/02371_analyzer_join_cross.reference b/tests/queries/0_stateless/02371_analyzer_join_cross.reference new file mode 100644 index 00000000000..50e43ac28d1 --- /dev/null +++ b/tests/queries/0_stateless/02371_analyzer_join_cross.reference @@ -0,0 +1,133 @@ +0 Join_1_Value_0 0 Join_2_Value_0 +0 Join_1_Value_0 1 Join_2_Value_1 +0 Join_1_Value_0 2 Join_2_Value_2 +1 Join_1_Value_1 0 Join_2_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 +1 Join_1_Value_1 2 Join_2_Value_2 +3 Join_1_Value_3 0 Join_2_Value_0 +3 Join_1_Value_3 1 Join_2_Value_1 +3 Join_1_Value_3 2 Join_2_Value_2 +-- +0 Join_1_Value_0 0 Join_2_Value_0 +0 Join_1_Value_0 1 Join_2_Value_1 +0 Join_1_Value_0 2 Join_2_Value_2 +1 Join_1_Value_1 0 Join_2_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 +1 Join_1_Value_1 2 Join_2_Value_2 +3 Join_1_Value_3 0 Join_2_Value_0 +3 Join_1_Value_3 1 Join_2_Value_1 +3 Join_1_Value_3 2 Join_2_Value_2 +-- +0 0 Join_1_Value_0 Join_1_Value_0 0 0 Join_2_Value_0 Join_2_Value_0 +0 0 Join_1_Value_0 Join_1_Value_0 1 1 Join_2_Value_1 Join_2_Value_1 +0 0 Join_1_Value_0 Join_1_Value_0 2 2 Join_2_Value_2 Join_2_Value_2 +1 1 Join_1_Value_1 Join_1_Value_1 0 0 Join_2_Value_0 Join_2_Value_0 +1 1 Join_1_Value_1 Join_1_Value_1 1 1 Join_2_Value_1 Join_2_Value_1 +1 1 Join_1_Value_1 Join_1_Value_1 2 2 Join_2_Value_2 Join_2_Value_2 +3 3 Join_1_Value_3 Join_1_Value_3 0 0 Join_2_Value_0 Join_2_Value_0 +3 3 Join_1_Value_3 Join_1_Value_3 1 1 Join_2_Value_1 Join_2_Value_1 +3 3 Join_1_Value_3 Join_1_Value_3 2 2 Join_2_Value_2 Join_2_Value_2 +-- +0 Join_1_Value_0 0 Join_2_Value_0 +0 Join_1_Value_0 1 Join_2_Value_1 +0 Join_1_Value_0 2 Join_2_Value_2 +1 Join_1_Value_1 0 Join_2_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 +1 Join_1_Value_1 2 Join_2_Value_2 +3 Join_1_Value_3 0 Join_2_Value_0 +3 Join_1_Value_3 1 Join_2_Value_1 +3 Join_1_Value_3 2 Join_2_Value_2 +-- +0 0 Join_1_Value_0 Join_1_Value_0 0 0 Join_2_Value_0 Join_2_Value_0 +0 0 Join_1_Value_0 Join_1_Value_0 1 1 Join_2_Value_1 Join_2_Value_1 +0 0 Join_1_Value_0 Join_1_Value_0 2 2 Join_2_Value_2 Join_2_Value_2 +1 1 Join_1_Value_1 Join_1_Value_1 0 0 Join_2_Value_0 Join_2_Value_0 +1 1 Join_1_Value_1 Join_1_Value_1 1 1 Join_2_Value_1 Join_2_Value_1 +1 1 Join_1_Value_1 Join_1_Value_1 2 2 Join_2_Value_2 Join_2_Value_2 +3 3 Join_1_Value_3 Join_1_Value_3 0 0 Join_2_Value_0 Join_2_Value_0 +3 3 Join_1_Value_3 Join_1_Value_3 1 1 Join_2_Value_1 Join_2_Value_1 +3 3 Join_1_Value_3 Join_1_Value_3 2 2 Join_2_Value_2 Join_2_Value_2 +-- +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +0 Join_1_Value_0 0 Join_2_Value_0 1 Join_3_Value_1 +0 Join_1_Value_0 0 Join_2_Value_0 2 Join_3_Value_2 +0 Join_1_Value_0 1 Join_2_Value_1 0 Join_3_Value_0 +0 Join_1_Value_0 1 Join_2_Value_1 1 Join_3_Value_1 +0 Join_1_Value_0 1 Join_2_Value_1 2 Join_3_Value_2 +0 Join_1_Value_0 2 Join_2_Value_2 0 Join_3_Value_0 +0 Join_1_Value_0 2 Join_2_Value_2 1 Join_3_Value_1 +0 Join_1_Value_0 2 Join_2_Value_2 2 Join_3_Value_2 +1 Join_1_Value_1 0 Join_2_Value_0 0 Join_3_Value_0 +1 Join_1_Value_1 0 Join_2_Value_0 1 Join_3_Value_1 +1 Join_1_Value_1 0 Join_2_Value_0 2 Join_3_Value_2 +1 Join_1_Value_1 1 Join_2_Value_1 0 Join_3_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 +1 Join_1_Value_1 1 Join_2_Value_1 2 Join_3_Value_2 +1 Join_1_Value_1 2 Join_2_Value_2 0 Join_3_Value_0 +1 Join_1_Value_1 2 Join_2_Value_2 1 Join_3_Value_1 +1 Join_1_Value_1 2 Join_2_Value_2 2 Join_3_Value_2 +3 Join_1_Value_3 0 Join_2_Value_0 0 Join_3_Value_0 +3 Join_1_Value_3 0 Join_2_Value_0 1 Join_3_Value_1 +3 Join_1_Value_3 0 Join_2_Value_0 2 Join_3_Value_2 +3 Join_1_Value_3 1 Join_2_Value_1 0 Join_3_Value_0 +3 Join_1_Value_3 1 Join_2_Value_1 1 Join_3_Value_1 +3 Join_1_Value_3 1 Join_2_Value_1 2 Join_3_Value_2 +3 Join_1_Value_3 2 Join_2_Value_2 0 Join_3_Value_0 +3 Join_1_Value_3 2 Join_2_Value_2 1 Join_3_Value_1 +3 Join_1_Value_3 2 Join_2_Value_2 2 Join_3_Value_2 +-- +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +0 Join_1_Value_0 0 Join_2_Value_0 1 Join_3_Value_1 +0 Join_1_Value_0 0 Join_2_Value_0 2 Join_3_Value_2 +0 Join_1_Value_0 1 Join_2_Value_1 0 Join_3_Value_0 +0 Join_1_Value_0 1 Join_2_Value_1 1 Join_3_Value_1 +0 Join_1_Value_0 1 Join_2_Value_1 2 Join_3_Value_2 +0 Join_1_Value_0 2 Join_2_Value_2 0 Join_3_Value_0 +0 Join_1_Value_0 2 Join_2_Value_2 1 Join_3_Value_1 +0 Join_1_Value_0 2 Join_2_Value_2 2 Join_3_Value_2 +1 Join_1_Value_1 0 Join_2_Value_0 0 Join_3_Value_0 +1 Join_1_Value_1 0 Join_2_Value_0 1 Join_3_Value_1 +1 Join_1_Value_1 0 Join_2_Value_0 2 Join_3_Value_2 +1 Join_1_Value_1 1 Join_2_Value_1 0 Join_3_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 +1 Join_1_Value_1 1 Join_2_Value_1 2 Join_3_Value_2 +1 Join_1_Value_1 2 Join_2_Value_2 0 Join_3_Value_0 +1 Join_1_Value_1 2 Join_2_Value_2 1 Join_3_Value_1 +1 Join_1_Value_1 2 Join_2_Value_2 2 Join_3_Value_2 +3 Join_1_Value_3 0 Join_2_Value_0 0 Join_3_Value_0 +3 Join_1_Value_3 0 Join_2_Value_0 1 Join_3_Value_1 +3 Join_1_Value_3 0 Join_2_Value_0 2 Join_3_Value_2 +3 Join_1_Value_3 1 Join_2_Value_1 0 Join_3_Value_0 +3 Join_1_Value_3 1 Join_2_Value_1 1 Join_3_Value_1 +3 Join_1_Value_3 1 Join_2_Value_1 2 Join_3_Value_2 +3 Join_1_Value_3 2 Join_2_Value_2 0 Join_3_Value_0 +3 Join_1_Value_3 2 Join_2_Value_2 1 Join_3_Value_1 +3 Join_1_Value_3 2 Join_2_Value_2 2 Join_3_Value_2 +-- +0 0 Join_1_Value_0 Join_1_Value_0 0 0 Join_2_Value_0 Join_2_Value_0 0 0 Join_3_Value_0 Join_3_Value_0 +0 0 Join_1_Value_0 Join_1_Value_0 0 0 Join_2_Value_0 Join_2_Value_0 1 1 Join_3_Value_1 Join_3_Value_1 +0 0 Join_1_Value_0 Join_1_Value_0 0 0 Join_2_Value_0 Join_2_Value_0 2 2 Join_3_Value_2 Join_3_Value_2 +0 0 Join_1_Value_0 Join_1_Value_0 1 1 Join_2_Value_1 Join_2_Value_1 0 0 Join_3_Value_0 Join_3_Value_0 +0 0 Join_1_Value_0 Join_1_Value_0 1 1 Join_2_Value_1 Join_2_Value_1 1 1 Join_3_Value_1 Join_3_Value_1 +0 0 Join_1_Value_0 Join_1_Value_0 1 1 Join_2_Value_1 Join_2_Value_1 2 2 Join_3_Value_2 Join_3_Value_2 +0 0 Join_1_Value_0 Join_1_Value_0 2 2 Join_2_Value_2 Join_2_Value_2 0 0 Join_3_Value_0 Join_3_Value_0 +0 0 Join_1_Value_0 Join_1_Value_0 2 2 Join_2_Value_2 Join_2_Value_2 1 1 Join_3_Value_1 Join_3_Value_1 +0 0 Join_1_Value_0 Join_1_Value_0 2 2 Join_2_Value_2 Join_2_Value_2 2 2 Join_3_Value_2 Join_3_Value_2 +1 1 Join_1_Value_1 Join_1_Value_1 0 0 Join_2_Value_0 Join_2_Value_0 0 0 Join_3_Value_0 Join_3_Value_0 +1 1 Join_1_Value_1 Join_1_Value_1 0 0 Join_2_Value_0 Join_2_Value_0 1 1 Join_3_Value_1 Join_3_Value_1 +1 1 Join_1_Value_1 Join_1_Value_1 0 0 Join_2_Value_0 Join_2_Value_0 2 2 Join_3_Value_2 Join_3_Value_2 +1 1 Join_1_Value_1 Join_1_Value_1 1 1 Join_2_Value_1 Join_2_Value_1 0 0 Join_3_Value_0 Join_3_Value_0 +1 1 Join_1_Value_1 Join_1_Value_1 1 1 Join_2_Value_1 Join_2_Value_1 1 1 Join_3_Value_1 Join_3_Value_1 +1 1 Join_1_Value_1 Join_1_Value_1 1 1 Join_2_Value_1 Join_2_Value_1 2 2 Join_3_Value_2 Join_3_Value_2 +1 1 Join_1_Value_1 Join_1_Value_1 2 2 Join_2_Value_2 Join_2_Value_2 0 0 Join_3_Value_0 Join_3_Value_0 +1 1 Join_1_Value_1 Join_1_Value_1 2 2 Join_2_Value_2 Join_2_Value_2 1 1 Join_3_Value_1 Join_3_Value_1 +1 1 Join_1_Value_1 Join_1_Value_1 2 2 Join_2_Value_2 Join_2_Value_2 2 2 Join_3_Value_2 Join_3_Value_2 +3 3 Join_1_Value_3 Join_1_Value_3 0 0 Join_2_Value_0 Join_2_Value_0 0 0 Join_3_Value_0 Join_3_Value_0 +3 3 Join_1_Value_3 Join_1_Value_3 0 0 Join_2_Value_0 Join_2_Value_0 1 1 Join_3_Value_1 Join_3_Value_1 +3 3 Join_1_Value_3 Join_1_Value_3 0 0 Join_2_Value_0 Join_2_Value_0 2 2 Join_3_Value_2 Join_3_Value_2 +3 3 Join_1_Value_3 Join_1_Value_3 1 1 Join_2_Value_1 Join_2_Value_1 0 0 Join_3_Value_0 Join_3_Value_0 +3 3 Join_1_Value_3 Join_1_Value_3 1 1 Join_2_Value_1 Join_2_Value_1 1 1 Join_3_Value_1 Join_3_Value_1 +3 3 Join_1_Value_3 Join_1_Value_3 1 1 Join_2_Value_1 Join_2_Value_1 2 2 Join_3_Value_2 Join_3_Value_2 +3 3 Join_1_Value_3 Join_1_Value_3 2 2 Join_2_Value_2 Join_2_Value_2 0 0 Join_3_Value_0 Join_3_Value_0 +3 3 Join_1_Value_3 Join_1_Value_3 2 2 Join_2_Value_2 Join_2_Value_2 1 1 Join_3_Value_1 Join_3_Value_1 +3 3 Join_1_Value_3 Join_1_Value_3 2 2 Join_2_Value_2 Join_2_Value_2 2 2 Join_3_Value_2 Join_3_Value_2 diff --git a/tests/queries/0_stateless/02371_analyzer_join_cross.sql b/tests/queries/0_stateless/02371_analyzer_join_cross.sql new file mode 100644 index 00000000000..8261572cdf2 --- /dev/null +++ b/tests/queries/0_stateless/02371_analyzer_join_cross.sql @@ -0,0 +1,78 @@ +SET allow_experimental_analyzer = 1; + +DROP TABLE IF EXISTS test_table_join_1; +CREATE TABLE test_table_join_1 +( + id UInt64, + value String +) ENGINE = TinyLog; + +DROP TABLE IF EXISTS test_table_join_2; +CREATE TABLE test_table_join_2 +( + id UInt64, + value String +) ENGINE = TinyLog; + +DROP TABLE IF EXISTS test_table_join_3; +CREATE TABLE test_table_join_3 +( + id UInt64, + value String +) ENGINE = TinyLog; + +INSERT INTO test_table_join_1 VALUES (0, 'Join_1_Value_0'); +INSERT INTO test_table_join_1 VALUES (1, 'Join_1_Value_1'); +INSERT INTO test_table_join_1 VALUES (3, 'Join_1_Value_3'); + +INSERT INTO test_table_join_2 VALUES (0, 'Join_2_Value_0'); +INSERT INTO test_table_join_2 VALUES (1, 'Join_2_Value_1'); +INSERT INTO test_table_join_2 VALUES (2, 'Join_2_Value_2'); + +INSERT INTO test_table_join_3 VALUES (0, 'Join_3_Value_0'); +INSERT INTO test_table_join_3 VALUES (1, 'Join_3_Value_1'); +INSERT INTO test_table_join_3 VALUES (2, 'Join_3_Value_2'); + +SELECT test_table_join_1.id, test_table_join_1.value, test_table_join_2.id, test_table_join_2.value +FROM test_table_join_1, test_table_join_2; + +SELECT '--'; + +SELECT t1.id, t1.value, t2.id, t2.value FROM test_table_join_1 AS t1, test_table_join_2 AS t2; + +SELECT '--'; + +SELECT t1.id, test_table_join_1.id, t1.value, test_table_join_1.value, t2.id, test_table_join_2.id, t2.value, test_table_join_2.value +FROM test_table_join_1 AS t1, test_table_join_2 AS t2; + +SELECT '--'; + +SELECT t1.id, t1.value, t2.id, t2.value FROM test_table_join_1 AS t1, test_table_join_2 AS t2; + +SELECT '--'; + +SELECT t1.id, test_table_join_1.id, t1.value, test_table_join_1.value, t2.id, test_table_join_2.id, t2.value, test_table_join_2.value FROM test_table_join_1 AS t1, test_table_join_2 AS t2; + +SELECT '--'; + +SELECT test_table_join_1.id, test_table_join_1.value, test_table_join_2.id, test_table_join_2.value, test_table_join_3.id, test_table_join_3.value +FROM test_table_join_1, test_table_join_2, test_table_join_3; + +SELECT '--'; + +SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value +FROM test_table_join_1 AS t1, test_table_join_2 AS t2, test_table_join_3 AS t3; + +SELECT '--'; + +SELECT t1.id, test_table_join_1.id, t1.value, test_table_join_1.value, t2.id, test_table_join_2.id, t2.value, test_table_join_2.value, +t3.id, test_table_join_3.id, t3.value, test_table_join_3.value +FROM test_table_join_1 AS t1, test_table_join_2 AS t2, test_table_join_3 AS t3; + +SELECT id FROM test_table_join_1, test_table_join_2; -- { serverError 207 } + +SELECT value FROM test_table_join_1, test_table_join_2; -- { serverError 207 } + +DROP TABLE test_table_join_1; +DROP TABLE test_table_join_2; +DROP TABLE test_table_join_3; diff --git a/tests/queries/0_stateless/02372_analyzer_join.reference b/tests/queries/0_stateless/02372_analyzer_join.reference new file mode 100644 index 00000000000..b8a658106ff --- /dev/null +++ b/tests/queries/0_stateless/02372_analyzer_join.reference @@ -0,0 +1,1554 @@ +-- { echoOn } + +SELECT 'JOIN INNER'; +JOIN INNER +SELECT 'JOIN ON without conditions'; +JOIN ON without conditions +SELECT test_table_join_1.id, test_table_join_1.value, test_table_join_2.id, test_table_join_2.value +FROM test_table_join_1 INNER JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id; +0 Join_1_Value_0 0 Join_2_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 +SELECT '--'; +-- +SELECT t1.id, t1.value, t2.id, t2.value +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id; +0 Join_1_Value_0 0 Join_2_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 +SELECT '--'; +-- +SELECT t1.id, test_table_join_1.id, t1.value, test_table_join_1.value, t2.id, test_table_join_2.id, t2.value, test_table_join_2.value +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id; +0 0 Join_1_Value_0 Join_1_Value_0 0 0 Join_2_Value_0 Join_2_Value_0 +1 1 Join_1_Value_1 Join_1_Value_1 1 1 Join_2_Value_1 Join_2_Value_1 +SELECT '--'; +-- +SELECT t1.value, t2.value +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id; +Join_1_Value_0 Join_2_Value_0 +Join_1_Value_1 Join_2_Value_1 +SELECT id FROM test_table_join_1 INNER JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id; -- { serverError 207 } +SELECT value FROM test_table_join_1 INNER JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id; -- { serverError 207 } +SELECT 'JOIN ON with conditions'; +JOIN ON with conditions +SELECT t1.id, t1.value, t2.id, t2.value +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value = 'Join_1_Value_0'; +0 Join_1_Value_0 0 Join_2_Value_0 +SELECT '--'; +-- +SELECT t1.id, t1.value, t2.id, t2.value +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t2.value = 'Join_2_Value_0'; +0 Join_1_Value_0 0 Join_2_Value_0 +SELECT '--'; +-- +SELECT t1.id, t1.value, t2.id, t2.value +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value = 'Join_1_Value_0' AND t2.value = 'Join_2_Value_0'; +0 Join_1_Value_0 0 Join_2_Value_0 +SELECT '--'; +-- +SELECT t1.id, t1.value, t2.id, t2.value +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON toString(t1.id) = toString(t2.id) AND t1.value = 'Join_1_Value_0' AND t2.value = 'Join_2_Value_0'; +0 Join_1_Value_0 0 Join_2_Value_0 +SELECT 'JOIN multiple clauses'; +JOIN multiple clauses +SELECT t1.id, t1.value, t2.id, t2.value +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id OR t1.id = t2.id; +0 Join_1_Value_0 0 Join_2_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 +SELECT 'JOIN expression aliases'; +JOIN expression aliases +SELECT t1_id, t1.value, t2_id, t2.value FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON (t1.id AS t1_id) = (t2.id AS t2_id); +0 Join_1_Value_0 0 Join_2_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 +SELECT '--'; +-- +SELECT t1.id AS t1_id, t1.value, t2.id AS t2_id, t2.value FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1_id = t2_id; +0 Join_1_Value_0 0 Join_2_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 +SELECT 'JOIN LEFT'; +JOIN LEFT +SELECT 'JOIN ON without conditions'; +JOIN ON without conditions +SELECT test_table_join_1.id, test_table_join_1.value, test_table_join_2.id, test_table_join_2.value +FROM test_table_join_1 LEFT JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id; +0 Join_1_Value_0 0 Join_2_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 +2 Join_1_Value_2 0 +SELECT '--'; +-- +SELECT t1.id, t1.value, t2.id, t2.value +FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON t1.id = t2.id; +0 Join_1_Value_0 0 Join_2_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 +2 Join_1_Value_2 0 +SELECT '--'; +-- +SELECT t1.id, test_table_join_1.id, t1.value, test_table_join_1.value, t2.id, test_table_join_2.id, t2.value, test_table_join_2.value +FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON t1.id = t2.id; +0 0 Join_1_Value_0 Join_1_Value_0 0 0 Join_2_Value_0 Join_2_Value_0 +1 1 Join_1_Value_1 Join_1_Value_1 1 1 Join_2_Value_1 Join_2_Value_1 +2 2 Join_1_Value_2 Join_1_Value_2 0 0 +SELECT '--'; +-- +SELECT t1.value, t2.value +FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON t1.id = t2.id; +Join_1_Value_0 Join_2_Value_0 +Join_1_Value_1 Join_2_Value_1 +Join_1_Value_2 +SELECT id FROM test_table_join_1 LEFT JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id; -- { serverError 207 } +SELECT value FROM test_table_join_1 LEFT JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id; -- { serverError 207 } +SELECT 'JOIN ON with conditions'; +JOIN ON with conditions +SELECT t1.id, t1.value, t2.id, t2.value +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value = 'Join_1_Value_0'; +0 Join_1_Value_0 0 Join_2_Value_0 +SELECT '--'; +-- +SELECT t1.id, t1.value, t2.id, t2.value +FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t2.value = 'Join_2_Value_0'; +0 Join_1_Value_0 0 Join_2_Value_0 +1 Join_1_Value_1 0 +2 Join_1_Value_2 0 +SELECT '--'; +-- +SELECT t1.id, t1.value, t2.id, t2.value +FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value = 'Join_1_Value_0' AND t2.value = 'Join_2_Value_0'; +0 Join_1_Value_0 0 Join_2_Value_0 +1 Join_1_Value_1 0 +2 Join_1_Value_2 0 +SELECT '--'; +-- +SELECT t1.id, t1.value, t2.id, t2.value +FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON toString(t1.id) = toString(t2.id) AND t1.value = 'Join_1_Value_0' AND t2.value = 'Join_2_Value_0'; +0 Join_1_Value_0 0 Join_2_Value_0 +1 Join_1_Value_1 0 +2 Join_1_Value_2 0 +SELECT 'JOIN multiple clauses'; +JOIN multiple clauses +SELECT t1.id, t1.value, t2.id, t2.value +FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON t1.id = t2.id OR t1.id = t2.id; +0 Join_1_Value_0 0 Join_2_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 +2 Join_1_Value_2 0 +SELECT 'JOIN expression aliases'; +JOIN expression aliases +SELECT t1_id, t1.value, t2_id, t2.value FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON (t1.id AS t1_id) = (t2.id AS t2_id); +0 Join_1_Value_0 0 Join_2_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 +2 Join_1_Value_2 0 +SELECT '--'; +-- +SELECT t1.id AS t1_id, t1.value, t2.id AS t2_id, t2.value FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON t1_id = t2_id; +0 Join_1_Value_0 0 Join_2_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 +2 Join_1_Value_2 0 +SELECT 'JOIN RIGHT'; +JOIN RIGHT +SELECT 'JOIN ON without conditions'; +JOIN ON without conditions +SELECT test_table_join_1.id, test_table_join_1.value, test_table_join_2.id, test_table_join_2.value +FROM test_table_join_1 RIGHT JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id; +0 Join_1_Value_0 0 Join_2_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 +0 3 Join_2_Value_3 +SELECT '--'; +-- +SELECT t1.id, t1.value, t2.id, t2.value +FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON t1.id = t2.id; +0 Join_1_Value_0 0 Join_2_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 +0 3 Join_2_Value_3 +SELECT '--'; +-- +SELECT t1.id, test_table_join_1.id, t1.value, test_table_join_1.value, t2.id, test_table_join_2.id, t2.value, test_table_join_2.value +FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON t1.id = t2.id; +0 0 Join_1_Value_0 Join_1_Value_0 0 0 Join_2_Value_0 Join_2_Value_0 +1 1 Join_1_Value_1 Join_1_Value_1 1 1 Join_2_Value_1 Join_2_Value_1 +0 0 3 3 Join_2_Value_3 Join_2_Value_3 +SELECT '--'; +-- +SELECT t1.value, t2.value +FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON t1.id = t2.id; +Join_1_Value_0 Join_2_Value_0 +Join_1_Value_1 Join_2_Value_1 + Join_2_Value_3 +SELECT id FROM test_table_join_1 RIGHT JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id; -- { serverError 207 } +SELECT value FROM test_table_join_1 RIGHT JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id; -- { serverError 207 } +SELECT 'JOIN ON with conditions'; +JOIN ON with conditions +SELECT t1.id, t1.value, t2.id, t2.value +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value = 'Join_1_Value_0'; +0 Join_1_Value_0 0 Join_2_Value_0 +SELECT '--'; +-- +SELECT t1.id, t1.value, t2.id, t2.value +FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t2.value = 'Join_2_Value_0'; +0 Join_1_Value_0 0 Join_2_Value_0 +0 1 Join_2_Value_1 +0 3 Join_2_Value_3 +SELECT '--'; +-- +SELECT t1.id, t1.value, t2.id, t2.value +FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value = 'Join_1_Value_0' AND t2.value = 'Join_2_Value_0'; +0 Join_1_Value_0 0 Join_2_Value_0 +0 1 Join_2_Value_1 +0 3 Join_2_Value_3 +SELECT '--'; +-- +SELECT t1.id, t1.value, t2.id, t2.value +FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON toString(t1.id) = toString(t2.id) AND t1.value = 'Join_1_Value_0' AND t2.value = 'Join_2_Value_0'; +0 Join_1_Value_0 0 Join_2_Value_0 +0 1 Join_2_Value_1 +0 3 Join_2_Value_3 +SELECT 'JOIN multiple clauses'; +JOIN multiple clauses +SELECT t1.id, t1.value, t2.id, t2.value +FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON t1.id = t2.id OR t1.id = t2.id; +0 Join_1_Value_0 0 Join_2_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 +0 3 Join_2_Value_3 +SELECT 'JOIN expression aliases'; +JOIN expression aliases +SELECT t1_id, t1.value, t2_id, t2.value FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON (t1.id AS t1_id) = (t2.id AS t2_id); +0 Join_1_Value_0 0 Join_2_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 +0 3 Join_2_Value_3 +SELECT '--'; +-- +SELECT t1.id AS t1_id, t1.value, t2.id AS t2_id, t2.value FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON t1_id = t2_id; +0 Join_1_Value_0 0 Join_2_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 +0 3 Join_2_Value_3 +SELECT 'JOIN FULL'; +JOIN FULL +SELECT 'JOIN ON without conditions'; +JOIN ON without conditions +SELECT test_table_join_1.id, test_table_join_1.value, test_table_join_2.id, test_table_join_2.value +FROM test_table_join_1 FULL JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id; +0 Join_1_Value_0 0 Join_2_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 +2 Join_1_Value_2 0 +0 3 Join_2_Value_3 +SELECT '--'; +-- +SELECT t1.id, t1.value, t2.id, t2.value +FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON t1.id = t2.id; +0 Join_1_Value_0 0 Join_2_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 +2 Join_1_Value_2 0 +0 3 Join_2_Value_3 +SELECT '--'; +-- +SELECT t1.id, test_table_join_1.id, t1.value, test_table_join_1.value, t2.id, test_table_join_2.id, t2.value, test_table_join_2.value +FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON t1.id = t2.id; +0 0 Join_1_Value_0 Join_1_Value_0 0 0 Join_2_Value_0 Join_2_Value_0 +1 1 Join_1_Value_1 Join_1_Value_1 1 1 Join_2_Value_1 Join_2_Value_1 +2 2 Join_1_Value_2 Join_1_Value_2 0 0 +0 0 3 3 Join_2_Value_3 Join_2_Value_3 +SELECT '--'; +-- +SELECT t1.value, t2.value +FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON t1.id = t2.id; +Join_1_Value_0 Join_2_Value_0 +Join_1_Value_1 Join_2_Value_1 +Join_1_Value_2 + Join_2_Value_3 +SELECT id FROM test_table_join_1 FULL JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id; -- { serverError 207 } +SELECT value FROM test_table_join_1 FULL JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id; -- { serverError 207 } +SELECT 'JOIN ON with conditions'; +JOIN ON with conditions +SELECT t1.id, t1.value, t2.id, t2.value +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value = 'Join_1_Value_0'; +0 Join_1_Value_0 0 Join_2_Value_0 +SELECT '--'; +-- +SELECT t1.id, t1.value, t2.id, t2.value +FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t2.value = 'Join_2_Value_0'; +0 Join_1_Value_0 0 Join_2_Value_0 +1 Join_1_Value_1 0 +2 Join_1_Value_2 0 +0 1 Join_2_Value_1 +0 3 Join_2_Value_3 +SELECT '--'; +-- +SELECT t1.id, t1.value, t2.id, t2.value +FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value = 'Join_1_Value_0' AND t2.value = 'Join_2_Value_0'; +0 Join_1_Value_0 0 Join_2_Value_0 +1 Join_1_Value_1 0 +2 Join_1_Value_2 0 +0 1 Join_2_Value_1 +0 3 Join_2_Value_3 +SELECT '--'; +-- +SELECT t1.id, t1.value, t2.id, t2.value +FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON toString(t1.id) = toString(t2.id) AND t1.value = 'Join_1_Value_0' AND t2.value = 'Join_2_Value_0'; +0 Join_1_Value_0 0 Join_2_Value_0 +1 Join_1_Value_1 0 +2 Join_1_Value_2 0 +0 1 Join_2_Value_1 +0 3 Join_2_Value_3 +SELECT 'JOIN multiple clauses'; +JOIN multiple clauses +SELECT t1.id, t1.value, t2.id, t2.value +FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON t1.id = t2.id OR t1.id = t2.id; +0 Join_1_Value_0 0 Join_2_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 +2 Join_1_Value_2 0 +0 3 Join_2_Value_3 +SELECT 'JOIN expression aliases'; +JOIN expression aliases +SELECT t1_id, t1.value, t2_id, t2.value FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON (t1.id AS t1_id) = (t2.id AS t2_id); +0 Join_1_Value_0 0 Join_2_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 +2 Join_1_Value_2 0 +0 3 Join_2_Value_3 +SELECT '--'; +-- +SELECT t1.id AS t1_id, t1.value, t2.id AS t2_id, t2.value FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON t1_id = t2_id; +0 Join_1_Value_0 0 Join_2_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 +2 Join_1_Value_2 0 +0 3 Join_2_Value_3 +SELECT 'First JOIN INNER second JOIN INNER'; +First JOIN INNER second JOIN INNER +SELECT 'JOIN ON without conditions'; +JOIN ON without conditions +SELECT test_table_join_1.id, test_table_join_1.value, test_table_join_2.id, test_table_join_2.value, test_table_join_3.id, test_table_join_3.value +FROM test_table_join_1 INNER JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id +INNER JOIN test_table_join_3 ON test_table_join_2.id = test_table_join_3.id; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 +SELECT '--'; +-- +SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id +INNER JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 +SELECT '--'; +-- +SELECT t1.id, test_table_join_1.id, t1.value, test_table_join_1.value, t2.id, test_table_join_2.id, t2.value, test_table_join_2.value, +t3.id, test_table_join_3.id, t3.value, test_table_join_3.value +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id +INNER JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +0 0 Join_1_Value_0 Join_1_Value_0 0 0 Join_2_Value_0 Join_2_Value_0 0 0 Join_3_Value_0 Join_3_Value_0 +1 1 Join_1_Value_1 Join_1_Value_1 1 1 Join_2_Value_1 Join_2_Value_1 1 1 Join_3_Value_1 Join_3_Value_1 +SELECT '--'; +-- +SELECT t1.value, t2.value, t3.value +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id +INNER JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +Join_1_Value_0 Join_2_Value_0 Join_3_Value_0 +Join_1_Value_1 Join_2_Value_1 Join_3_Value_1 +SELECT 'JOIN ON with conditions'; +JOIN ON with conditions +SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value == 'Join_1_Value_0' +INNER JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0'; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +SELECT '--'; +-- +SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value == 'Join_1_Value_0' AND t2.value == 'Join_2_Value_0' +INNER JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0' AND t3.value == 'Join_3_Value_0'; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +SELECT 'JOIN multiple clauses'; +JOIN multiple clauses +SELECT t1.value, t2.value, t3.value +FROM test_table_join_1 AS t1 +INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id OR t1.id = t2.id +INNER JOIN test_table_join_3 AS t3 ON t2.id = t3.id OR t3.id = t2.id; +Join_1_Value_0 Join_2_Value_0 Join_3_Value_0 +Join_1_Value_1 Join_2_Value_1 Join_3_Value_1 +SELECT 'JOIN expression aliases'; +JOIN expression aliases +SELECT t1_id, t1.value, t2_id, t2.value, t3_id, t3.value +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON (t1.id AS t1_id) = (t2.id AS t2_id) +INNER JOIN test_table_join_3 AS t3 ON t2_id = (t3.id AS t3_id); +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 +SELECT '--'; +-- +SELECT t1.id AS t1_id, t1.value, t2.id AS t2_id, t2.value, t3.id AS t3_id, t3.value +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1_id = t2_id +INNER JOIN test_table_join_3 AS t3 ON t2_id = t3_id; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 +SELECT 'First JOIN INNER second JOIN LEFT'; +First JOIN INNER second JOIN LEFT +SELECT 'JOIN ON without conditions'; +JOIN ON without conditions +SELECT test_table_join_1.id, test_table_join_1.value, test_table_join_2.id, test_table_join_2.value, test_table_join_3.id, test_table_join_3.value +FROM test_table_join_1 INNER JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id +LEFT JOIN test_table_join_3 ON test_table_join_2.id = test_table_join_3.id; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 +SELECT '--'; +-- +SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id +LEFT JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 +SELECT '--'; +-- +SELECT t1.id, test_table_join_1.id, t1.value, test_table_join_1.value, t2.id, test_table_join_2.id, t2.value, test_table_join_2.value, +t3.id, test_table_join_3.id, t3.value, test_table_join_3.value +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id +LEFT JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +0 0 Join_1_Value_0 Join_1_Value_0 0 0 Join_2_Value_0 Join_2_Value_0 0 0 Join_3_Value_0 Join_3_Value_0 +1 1 Join_1_Value_1 Join_1_Value_1 1 1 Join_2_Value_1 Join_2_Value_1 1 1 Join_3_Value_1 Join_3_Value_1 +SELECT '--'; +-- +SELECT t1.value, t2.value, t3.value +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id +LEFT JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +Join_1_Value_0 Join_2_Value_0 Join_3_Value_0 +Join_1_Value_1 Join_2_Value_1 Join_3_Value_1 +SELECT 'JOIN ON with conditions'; +JOIN ON with conditions +SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value == 'Join_1_Value_0' +LEFT JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0'; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +SELECT '--'; +-- +SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value == 'Join_1_Value_0' AND t2.value == 'Join_2_Value_0' +LEFT JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0' AND t3.value == 'Join_3_Value_0'; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +SELECT 'JOIN multiple clauses'; +JOIN multiple clauses +SELECT t1.value, t2.value, t3.value +FROM test_table_join_1 AS t1 +INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id OR t1.id = t2.id +LEFT JOIN test_table_join_3 AS t3 ON t2.id = t3.id OR t3.id = t2.id; +Join_1_Value_0 Join_2_Value_0 Join_3_Value_0 +Join_1_Value_1 Join_2_Value_1 Join_3_Value_1 +SELECT 'JOIN expression aliases'; +JOIN expression aliases +SELECT t1_id, t1.value, t2_id, t2.value, t3_id, t3.value +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON (t1.id AS t1_id) = (t2.id AS t2_id) +LEFT JOIN test_table_join_3 AS t3 ON t2_id = (t3.id AS t3_id); +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 +SELECT '--'; +-- +SELECT t1.id AS t1_id, t1.value, t2.id AS t2_id, t2.value, t3.id AS t3_id, t3.value +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1_id = t2_id +LEFT JOIN test_table_join_3 AS t3 ON t2_id = t3_id; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 +SELECT 'First JOIN INNER second JOIN RIGHT'; +First JOIN INNER second JOIN RIGHT +SELECT 'JOIN ON without conditions'; +JOIN ON without conditions +SELECT test_table_join_1.id, test_table_join_1.value, test_table_join_2.id, test_table_join_2.value, test_table_join_3.id, test_table_join_3.value +FROM test_table_join_1 INNER JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id +RIGHT JOIN test_table_join_3 ON test_table_join_2.id = test_table_join_3.id; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 +0 0 4 Join_3_Value_4 +SELECT '--'; +-- +SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id +RIGHT JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 +0 0 4 Join_3_Value_4 +SELECT '--'; +-- +SELECT t1.id, test_table_join_1.id, t1.value, test_table_join_1.value, t2.id, test_table_join_2.id, t2.value, test_table_join_2.value, +t3.id, test_table_join_3.id, t3.value, test_table_join_3.value +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id +RIGHT JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +0 0 Join_1_Value_0 Join_1_Value_0 0 0 Join_2_Value_0 Join_2_Value_0 0 0 Join_3_Value_0 Join_3_Value_0 +1 1 Join_1_Value_1 Join_1_Value_1 1 1 Join_2_Value_1 Join_2_Value_1 1 1 Join_3_Value_1 Join_3_Value_1 +0 0 0 0 4 4 Join_3_Value_4 Join_3_Value_4 +SELECT '--'; +-- +SELECT t1.value, t2.value, t3.value +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id +RIGHT JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +Join_1_Value_0 Join_2_Value_0 Join_3_Value_0 +Join_1_Value_1 Join_2_Value_1 Join_3_Value_1 + Join_3_Value_4 +SELECT 'JOIN ON with conditions'; +JOIN ON with conditions +SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value == 'Join_1_Value_0' +RIGHT JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0'; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +0 0 4 Join_3_Value_4 +0 0 1 Join_3_Value_1 +SELECT '--'; +-- +SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value == 'Join_1_Value_0' AND t2.value == 'Join_2_Value_0' +RIGHT JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0' AND t3.value == 'Join_3_Value_0'; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +0 0 1 Join_3_Value_1 +0 0 4 Join_3_Value_4 +SELECT 'JOIN multiple clauses'; +JOIN multiple clauses +SELECT t1.value, t2.value, t3.value +FROM test_table_join_1 AS t1 +INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id OR t1.id = t2.id +RIGHT JOIN test_table_join_3 AS t3 ON t2.id = t3.id OR t3.id = t2.id; +Join_1_Value_0 Join_2_Value_0 Join_3_Value_0 +Join_1_Value_1 Join_2_Value_1 Join_3_Value_1 + Join_3_Value_4 +SELECT 'JOIN expression aliases'; +JOIN expression aliases +SELECT t1_id, t1.value, t2_id, t2.value, t3_id, t3.value +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON (t1.id AS t1_id) = (t2.id AS t2_id) +RIGHT JOIN test_table_join_3 AS t3 ON t2_id = (t3.id AS t3_id); +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 +0 0 4 Join_3_Value_4 +SELECT '--'; +-- +SELECT t1.id AS t1_id, t1.value, t2.id AS t2_id, t2.value, t3.id AS t3_id, t3.value +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1_id = t2_id +RIGHT JOIN test_table_join_3 AS t3 ON t2_id = t3_id; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 +0 0 4 Join_3_Value_4 +SELECT 'First JOIN INNER second JOIN FULL'; +First JOIN INNER second JOIN FULL +SELECT 'JOIN ON without conditions'; +JOIN ON without conditions +SELECT test_table_join_1.id, test_table_join_1.value, test_table_join_2.id, test_table_join_2.value, test_table_join_3.id, test_table_join_3.value +FROM test_table_join_1 INNER JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id +FULL JOIN test_table_join_3 ON test_table_join_2.id = test_table_join_3.id; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 +0 0 4 Join_3_Value_4 +SELECT '--'; +-- +SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id +FULL JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 +0 0 4 Join_3_Value_4 +SELECT '--'; +-- +SELECT t1.id, test_table_join_1.id, t1.value, test_table_join_1.value, t2.id, test_table_join_2.id, t2.value, test_table_join_2.value, +t3.id, test_table_join_3.id, t3.value, test_table_join_3.value +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id +FULL JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +0 0 Join_1_Value_0 Join_1_Value_0 0 0 Join_2_Value_0 Join_2_Value_0 0 0 Join_3_Value_0 Join_3_Value_0 +1 1 Join_1_Value_1 Join_1_Value_1 1 1 Join_2_Value_1 Join_2_Value_1 1 1 Join_3_Value_1 Join_3_Value_1 +0 0 0 0 4 4 Join_3_Value_4 Join_3_Value_4 +SELECT '--'; +-- +SELECT t1.value, t2.value, t3.value +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id +FULL JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +Join_1_Value_0 Join_2_Value_0 Join_3_Value_0 +Join_1_Value_1 Join_2_Value_1 Join_3_Value_1 + Join_3_Value_4 +SELECT 'JOIN ON with conditions'; +JOIN ON with conditions +SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value == 'Join_1_Value_0' +FULL JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0'; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +0 0 4 Join_3_Value_4 +0 0 1 Join_3_Value_1 +SELECT '--'; +-- +SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value == 'Join_1_Value_0' AND t2.value == 'Join_2_Value_0' +FULL JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0' AND t3.value == 'Join_3_Value_0'; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +0 0 1 Join_3_Value_1 +0 0 4 Join_3_Value_4 +SELECT 'JOIN multiple clauses'; +JOIN multiple clauses +SELECT t1.value, t2.value, t3.value +FROM test_table_join_1 AS t1 +INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id OR t1.id = t2.id +FULL JOIN test_table_join_3 AS t3 ON t2.id = t3.id OR t3.id = t2.id; +Join_1_Value_0 Join_2_Value_0 Join_3_Value_0 +Join_1_Value_1 Join_2_Value_1 Join_3_Value_1 + Join_3_Value_4 +SELECT 'JOIN expression aliases'; +JOIN expression aliases +SELECT t1_id, t1.value, t2_id, t2.value, t3_id, t3.value +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON (t1.id AS t1_id) = (t2.id AS t2_id) +FULL JOIN test_table_join_3 AS t3 ON t2_id = (t3.id AS t3_id); +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 +0 0 4 Join_3_Value_4 +SELECT '--'; +-- +SELECT t1.id AS t1_id, t1.value, t2.id AS t2_id, t2.value, t3.id AS t3_id, t3.value +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1_id = t2_id +FULL JOIN test_table_join_3 AS t3 ON t2_id = t3_id; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 +0 0 4 Join_3_Value_4 +SELECT 'First JOIN LEFT second JOIN INNER'; +First JOIN LEFT second JOIN INNER +SELECT 'JOIN ON without conditions'; +JOIN ON without conditions +SELECT test_table_join_1.id, test_table_join_1.value, test_table_join_2.id, test_table_join_2.value, test_table_join_3.id, test_table_join_3.value +FROM test_table_join_1 LEFT JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id +INNER JOIN test_table_join_3 ON test_table_join_2.id = test_table_join_3.id; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 +2 Join_1_Value_2 0 0 Join_3_Value_0 +SELECT '--'; +-- +SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value +FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id +INNER JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 +2 Join_1_Value_2 0 0 Join_3_Value_0 +SELECT '--'; +-- +SELECT t1.id, test_table_join_1.id, t1.value, test_table_join_1.value, t2.id, test_table_join_2.id, t2.value, test_table_join_2.value, +t3.id, test_table_join_3.id, t3.value, test_table_join_3.value +FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id +INNER JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +0 0 Join_1_Value_0 Join_1_Value_0 0 0 Join_2_Value_0 Join_2_Value_0 0 0 Join_3_Value_0 Join_3_Value_0 +1 1 Join_1_Value_1 Join_1_Value_1 1 1 Join_2_Value_1 Join_2_Value_1 1 1 Join_3_Value_1 Join_3_Value_1 +2 2 Join_1_Value_2 Join_1_Value_2 0 0 0 0 Join_3_Value_0 Join_3_Value_0 +SELECT '--'; +-- +SELECT t1.value, t2.value, t3.value +FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id +INNER JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +Join_1_Value_0 Join_2_Value_0 Join_3_Value_0 +Join_1_Value_1 Join_2_Value_1 Join_3_Value_1 +Join_1_Value_2 Join_3_Value_0 +SELECT 'JOIN ON with conditions'; +JOIN ON with conditions +SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value +FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value == 'Join_1_Value_0' +INNER JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0'; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +SELECT '--'; +-- +SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value +FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value == 'Join_1_Value_0' AND t2.value == 'Join_2_Value_0' +INNER JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0' AND t3.value == 'Join_3_Value_0'; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +SELECT 'JOIN multiple clauses'; +JOIN multiple clauses +SELECT t1.value, t2.value, t3.value +FROM test_table_join_1 AS t1 +LEFT JOIN test_table_join_2 AS t2 ON t1.id = t2.id OR t1.id = t2.id +INNER JOIN test_table_join_3 AS t3 ON t2.id = t3.id OR t3.id = t2.id; +Join_1_Value_0 Join_2_Value_0 Join_3_Value_0 +Join_1_Value_1 Join_2_Value_1 Join_3_Value_1 +Join_1_Value_2 Join_3_Value_0 +SELECT 'JOIN expression aliases'; +JOIN expression aliases +SELECT t1_id, t1.value, t2_id, t2.value, t3_id, t3.value +FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON (t1.id AS t1_id) = (t2.id AS t2_id) +INNER JOIN test_table_join_3 AS t3 ON t2_id = (t3.id AS t3_id); +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 +2 Join_1_Value_2 0 0 Join_3_Value_0 +SELECT '--'; +-- +SELECT t1.id AS t1_id, t1.value, t2.id AS t2_id, t2.value, t3.id AS t3_id, t3.value +FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON t1_id = t2_id +INNER JOIN test_table_join_3 AS t3 ON t2_id = t3_id; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 +2 Join_1_Value_2 0 0 Join_3_Value_0 +SELECT 'First JOIN LEFT second JOIN LEFT'; +First JOIN LEFT second JOIN LEFT +SELECT 'JOIN ON without conditions'; +JOIN ON without conditions +SELECT test_table_join_1.id, test_table_join_1.value, test_table_join_2.id, test_table_join_2.value, test_table_join_3.id, test_table_join_3.value +FROM test_table_join_1 LEFT JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id +LEFT JOIN test_table_join_3 ON test_table_join_2.id = test_table_join_3.id; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 +2 Join_1_Value_2 0 0 Join_3_Value_0 +SELECT '--'; +-- +SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value +FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id +LEFT JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 +2 Join_1_Value_2 0 0 Join_3_Value_0 +SELECT '--'; +-- +SELECT t1.id, test_table_join_1.id, t1.value, test_table_join_1.value, t2.id, test_table_join_2.id, t2.value, test_table_join_2.value, +t3.id, test_table_join_3.id, t3.value, test_table_join_3.value +FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id +LEFT JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +0 0 Join_1_Value_0 Join_1_Value_0 0 0 Join_2_Value_0 Join_2_Value_0 0 0 Join_3_Value_0 Join_3_Value_0 +1 1 Join_1_Value_1 Join_1_Value_1 1 1 Join_2_Value_1 Join_2_Value_1 1 1 Join_3_Value_1 Join_3_Value_1 +2 2 Join_1_Value_2 Join_1_Value_2 0 0 0 0 Join_3_Value_0 Join_3_Value_0 +SELECT '--'; +-- +SELECT t1.value, t2.value, t3.value +FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id +LEFT JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +Join_1_Value_0 Join_2_Value_0 Join_3_Value_0 +Join_1_Value_1 Join_2_Value_1 Join_3_Value_1 +Join_1_Value_2 Join_3_Value_0 +SELECT 'JOIN ON with conditions'; +JOIN ON with conditions +SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value +FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value == 'Join_1_Value_0' +LEFT JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0'; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +1 Join_1_Value_1 0 0 +2 Join_1_Value_2 0 0 +SELECT '--'; +-- +SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value +FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value == 'Join_1_Value_0' AND t2.value == 'Join_2_Value_0' +LEFT JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0' AND t3.value == 'Join_3_Value_0'; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +1 Join_1_Value_1 0 0 +2 Join_1_Value_2 0 0 +SELECT 'JOIN multiple clauses'; +JOIN multiple clauses +SELECT t1.value, t2.value, t3.value +FROM test_table_join_1 AS t1 +LEFT JOIN test_table_join_2 AS t2 ON t1.id = t2.id OR t1.id = t2.id +LEFT JOIN test_table_join_3 AS t3 ON t2.id = t3.id OR t3.id = t2.id; +Join_1_Value_0 Join_2_Value_0 Join_3_Value_0 +Join_1_Value_1 Join_2_Value_1 Join_3_Value_1 +Join_1_Value_2 Join_3_Value_0 +SELECT 'JOIN expression aliases'; +JOIN expression aliases +SELECT t1_id, t1.value, t2_id, t2.value, t3_id, t3.value +FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON (t1.id AS t1_id) = (t2.id AS t2_id) +LEFT JOIN test_table_join_3 AS t3 ON t2_id = (t3.id AS t3_id); +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 +2 Join_1_Value_2 0 0 Join_3_Value_0 +SELECT '--'; +-- +SELECT t1.id AS t1_id, t1.value, t2.id AS t2_id, t2.value, t3.id AS t3_id, t3.value +FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON t1_id = t2_id +LEFT JOIN test_table_join_3 AS t3 ON t2_id = t3_id; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 +2 Join_1_Value_2 0 0 Join_3_Value_0 +SELECT 'First JOIN LEFT second JOIN RIGHT'; +First JOIN LEFT second JOIN RIGHT +SELECT 'JOIN ON without conditions'; +JOIN ON without conditions +SELECT test_table_join_1.id, test_table_join_1.value, test_table_join_2.id, test_table_join_2.value, test_table_join_3.id, test_table_join_3.value +FROM test_table_join_1 LEFT JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id +RIGHT JOIN test_table_join_3 ON test_table_join_2.id = test_table_join_3.id; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 +2 Join_1_Value_2 0 0 Join_3_Value_0 +0 0 4 Join_3_Value_4 +SELECT '--'; +-- +SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value +FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id +RIGHT JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 +2 Join_1_Value_2 0 0 Join_3_Value_0 +0 0 4 Join_3_Value_4 +SELECT '--'; +-- +SELECT t1.id, test_table_join_1.id, t1.value, test_table_join_1.value, t2.id, test_table_join_2.id, t2.value, test_table_join_2.value, +t3.id, test_table_join_3.id, t3.value, test_table_join_3.value +FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id +RIGHT JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +0 0 Join_1_Value_0 Join_1_Value_0 0 0 Join_2_Value_0 Join_2_Value_0 0 0 Join_3_Value_0 Join_3_Value_0 +1 1 Join_1_Value_1 Join_1_Value_1 1 1 Join_2_Value_1 Join_2_Value_1 1 1 Join_3_Value_1 Join_3_Value_1 +2 2 Join_1_Value_2 Join_1_Value_2 0 0 0 0 Join_3_Value_0 Join_3_Value_0 +0 0 0 0 4 4 Join_3_Value_4 Join_3_Value_4 +SELECT '--'; +-- +SELECT t1.value, t2.value, t3.value +FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id +RIGHT JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +Join_1_Value_0 Join_2_Value_0 Join_3_Value_0 +Join_1_Value_1 Join_2_Value_1 Join_3_Value_1 +Join_1_Value_2 Join_3_Value_0 + Join_3_Value_4 +SELECT 'JOIN ON with conditions'; +JOIN ON with conditions +SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value +FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value == 'Join_1_Value_0' +RIGHT JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0'; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +0 0 4 Join_3_Value_4 +0 0 1 Join_3_Value_1 +SELECT '--'; +-- +SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value +FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value == 'Join_1_Value_0' AND t2.value == 'Join_2_Value_0' +RIGHT JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0' AND t3.value == 'Join_3_Value_0'; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +0 0 1 Join_3_Value_1 +0 0 4 Join_3_Value_4 +SELECT 'JOIN multiple clauses'; +JOIN multiple clauses +SELECT t1.value, t2.value, t3.value +FROM test_table_join_1 AS t1 +LEFT JOIN test_table_join_2 AS t2 ON t1.id = t2.id OR t1.id = t2.id +RIGHT JOIN test_table_join_3 AS t3 ON t2.id = t3.id OR t3.id = t2.id; +Join_1_Value_0 Join_2_Value_0 Join_3_Value_0 +Join_1_Value_1 Join_2_Value_1 Join_3_Value_1 +Join_1_Value_2 Join_3_Value_0 + Join_3_Value_4 +SELECT 'JOIN expression aliases'; +JOIN expression aliases +SELECT t1_id, t1.value, t2_id, t2.value, t3_id, t3.value +FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON (t1.id AS t1_id) = (t2.id AS t2_id) +RIGHT JOIN test_table_join_3 AS t3 ON t2_id = (t3.id AS t3_id); +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 +2 Join_1_Value_2 0 0 Join_3_Value_0 +0 0 4 Join_3_Value_4 +SELECT '--'; +-- +SELECT t1.id AS t1_id, t1.value, t2.id AS t2_id, t2.value, t3.id AS t3_id, t3.value +FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON t1_id = t2_id +RIGHT JOIN test_table_join_3 AS t3 ON t2_id = t3_id; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 +2 Join_1_Value_2 0 0 Join_3_Value_0 +0 0 4 Join_3_Value_4 +SELECT 'First JOIN LEFT second JOIN FULL'; +First JOIN LEFT second JOIN FULL +SELECT 'JOIN ON without conditions'; +JOIN ON without conditions +SELECT test_table_join_1.id, test_table_join_1.value, test_table_join_2.id, test_table_join_2.value, test_table_join_3.id, test_table_join_3.value +FROM test_table_join_1 LEFT JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id +FULL JOIN test_table_join_3 ON test_table_join_2.id = test_table_join_3.id; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 +2 Join_1_Value_2 0 0 Join_3_Value_0 +0 0 4 Join_3_Value_4 +SELECT '--'; +-- +SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value +FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id +FULL JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 +2 Join_1_Value_2 0 0 Join_3_Value_0 +0 0 4 Join_3_Value_4 +SELECT '--'; +-- +SELECT t1.id, test_table_join_1.id, t1.value, test_table_join_1.value, t2.id, test_table_join_2.id, t2.value, test_table_join_2.value, +t3.id, test_table_join_3.id, t3.value, test_table_join_3.value +FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id +FULL JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +0 0 Join_1_Value_0 Join_1_Value_0 0 0 Join_2_Value_0 Join_2_Value_0 0 0 Join_3_Value_0 Join_3_Value_0 +1 1 Join_1_Value_1 Join_1_Value_1 1 1 Join_2_Value_1 Join_2_Value_1 1 1 Join_3_Value_1 Join_3_Value_1 +2 2 Join_1_Value_2 Join_1_Value_2 0 0 0 0 Join_3_Value_0 Join_3_Value_0 +0 0 0 0 4 4 Join_3_Value_4 Join_3_Value_4 +SELECT '--'; +-- +SELECT t1.value, t2.value, t3.value +FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id +FULL JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +Join_1_Value_0 Join_2_Value_0 Join_3_Value_0 +Join_1_Value_1 Join_2_Value_1 Join_3_Value_1 +Join_1_Value_2 Join_3_Value_0 + Join_3_Value_4 +SELECT 'JOIN ON with conditions'; +JOIN ON with conditions +SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value +FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value == 'Join_1_Value_0' +FULL JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0'; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +1 Join_1_Value_1 0 0 +2 Join_1_Value_2 0 0 +0 0 4 Join_3_Value_4 +0 0 1 Join_3_Value_1 +SELECT '--'; +-- +SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value +FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value == 'Join_1_Value_0' AND t2.value == 'Join_2_Value_0' +FULL JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0' AND t3.value == 'Join_3_Value_0'; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +1 Join_1_Value_1 0 0 +2 Join_1_Value_2 0 0 +0 0 1 Join_3_Value_1 +0 0 4 Join_3_Value_4 +SELECT 'JOIN multiple clauses'; +JOIN multiple clauses +SELECT t1.value, t2.value, t3.value +FROM test_table_join_1 AS t1 +LEFT JOIN test_table_join_2 AS t2 ON t1.id = t2.id OR t1.id = t2.id +FULL JOIN test_table_join_3 AS t3 ON t2.id = t3.id OR t3.id = t2.id; +Join_1_Value_0 Join_2_Value_0 Join_3_Value_0 +Join_1_Value_1 Join_2_Value_1 Join_3_Value_1 +Join_1_Value_2 Join_3_Value_0 + Join_3_Value_4 +SELECT 'JOIN expression aliases'; +JOIN expression aliases +SELECT t1_id, t1.value, t2_id, t2.value, t3_id, t3.value +FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON (t1.id AS t1_id) = (t2.id AS t2_id) +FULL JOIN test_table_join_3 AS t3 ON t2_id = (t3.id AS t3_id); +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 +2 Join_1_Value_2 0 0 Join_3_Value_0 +0 0 4 Join_3_Value_4 +SELECT '--'; +-- +SELECT t1.id AS t1_id, t1.value, t2.id AS t2_id, t2.value, t3.id AS t3_id, t3.value +FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON t1_id = t2_id +FULL JOIN test_table_join_3 AS t3 ON t2_id = t3_id; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 +2 Join_1_Value_2 0 0 Join_3_Value_0 +0 0 4 Join_3_Value_4 +SELECT 'First JOIN RIGHT second JOIN INNER'; +First JOIN RIGHT second JOIN INNER +SELECT 'JOIN ON without conditions'; +JOIN ON without conditions +SELECT test_table_join_1.id, test_table_join_1.value, test_table_join_2.id, test_table_join_2.value, test_table_join_3.id, test_table_join_3.value +FROM test_table_join_1 RIGHT JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id +INNER JOIN test_table_join_3 ON test_table_join_2.id = test_table_join_3.id; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 +SELECT '--'; +-- +SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value +FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id +INNER JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 +SELECT '--'; +-- +SELECT t1.id, test_table_join_1.id, t1.value, test_table_join_1.value, t2.id, test_table_join_2.id, t2.value, test_table_join_2.value, +t3.id, test_table_join_3.id, t3.value, test_table_join_3.value +FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id +INNER JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +0 0 Join_1_Value_0 Join_1_Value_0 0 0 Join_2_Value_0 Join_2_Value_0 0 0 Join_3_Value_0 Join_3_Value_0 +1 1 Join_1_Value_1 Join_1_Value_1 1 1 Join_2_Value_1 Join_2_Value_1 1 1 Join_3_Value_1 Join_3_Value_1 +SELECT '--'; +-- +SELECT t1.value, t2.value, t3.value +FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id +INNER JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +Join_1_Value_0 Join_2_Value_0 Join_3_Value_0 +Join_1_Value_1 Join_2_Value_1 Join_3_Value_1 +SELECT 'JOIN ON with conditions'; +JOIN ON with conditions +SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value +FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value == 'Join_1_Value_0' +INNER JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0'; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +SELECT '--'; +-- +SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value +FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value == 'Join_1_Value_0' AND t2.value == 'Join_2_Value_0' +INNER JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0' AND t3.value == 'Join_3_Value_0'; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +SELECT 'JOIN multiple clauses'; +JOIN multiple clauses +SELECT t1.value, t2.value, t3.value +FROM test_table_join_1 AS t1 +RIGHT JOIN test_table_join_2 AS t2 ON t1.id = t2.id OR t1.id = t2.id +INNER JOIN test_table_join_3 AS t3 ON t2.id = t3.id OR t3.id = t2.id; +Join_1_Value_0 Join_2_Value_0 Join_3_Value_0 +Join_1_Value_1 Join_2_Value_1 Join_3_Value_1 +SELECT 'JOIN expression aliases'; +JOIN expression aliases +SELECT t1_id, t1.value, t2_id, t2.value, t3_id, t3.value +FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON (t1.id AS t1_id) = (t2.id AS t2_id) +INNER JOIN test_table_join_3 AS t3 ON t2_id = (t3.id AS t3_id); +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 +SELECT '--'; +-- +SELECT t1.id AS t1_id, t1.value, t2.id AS t2_id, t2.value, t3.id AS t3_id, t3.value +FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON t1_id = t2_id +INNER JOIN test_table_join_3 AS t3 ON t2_id = t3_id; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 +SELECT 'First JOIN RIGHT second JOIN LEFT'; +First JOIN RIGHT second JOIN LEFT +SELECT 'JOIN ON without conditions'; +JOIN ON without conditions +SELECT test_table_join_1.id, test_table_join_1.value, test_table_join_2.id, test_table_join_2.value, test_table_join_3.id, test_table_join_3.value +FROM test_table_join_1 RIGHT JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id +LEFT JOIN test_table_join_3 ON test_table_join_2.id = test_table_join_3.id; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 +0 3 Join_2_Value_3 0 +SELECT '--'; +-- +SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value +FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id +LEFT JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 +0 3 Join_2_Value_3 0 +SELECT '--'; +-- +SELECT t1.id, test_table_join_1.id, t1.value, test_table_join_1.value, t2.id, test_table_join_2.id, t2.value, test_table_join_2.value, +t3.id, test_table_join_3.id, t3.value, test_table_join_3.value +FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id +LEFT JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +0 0 Join_1_Value_0 Join_1_Value_0 0 0 Join_2_Value_0 Join_2_Value_0 0 0 Join_3_Value_0 Join_3_Value_0 +1 1 Join_1_Value_1 Join_1_Value_1 1 1 Join_2_Value_1 Join_2_Value_1 1 1 Join_3_Value_1 Join_3_Value_1 +0 0 3 3 Join_2_Value_3 Join_2_Value_3 0 0 +SELECT '--'; +-- +SELECT t1.value, t2.value, t3.value +FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id +LEFT JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +Join_1_Value_0 Join_2_Value_0 Join_3_Value_0 +Join_1_Value_1 Join_2_Value_1 Join_3_Value_1 + Join_2_Value_3 +SELECT 'JOIN ON with conditions'; +JOIN ON with conditions +SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value +FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value == 'Join_1_Value_0' +LEFT JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0'; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +0 3 Join_2_Value_3 0 +0 1 Join_2_Value_1 0 +SELECT '--'; +-- +SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value +FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value == 'Join_1_Value_0' AND t2.value == 'Join_2_Value_0' +LEFT JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0' AND t3.value == 'Join_3_Value_0'; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +0 1 Join_2_Value_1 0 +0 3 Join_2_Value_3 0 +SELECT 'JOIN multiple clauses'; +JOIN multiple clauses +SELECT t1.value, t2.value, t3.value +FROM test_table_join_1 AS t1 +RIGHT JOIN test_table_join_2 AS t2 ON t1.id = t2.id OR t1.id = t2.id +LEFT JOIN test_table_join_3 AS t3 ON t2.id = t3.id OR t3.id = t2.id; +Join_1_Value_0 Join_2_Value_0 Join_3_Value_0 +Join_1_Value_1 Join_2_Value_1 Join_3_Value_1 + Join_2_Value_3 +SELECT 'JOIN expression aliases'; +JOIN expression aliases +SELECT t1_id, t1.value, t2_id, t2.value, t3_id, t3.value +FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON (t1.id AS t1_id) = (t2.id AS t2_id) +LEFT JOIN test_table_join_3 AS t3 ON t2_id = (t3.id AS t3_id); +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 +0 3 Join_2_Value_3 0 +SELECT '--'; +-- +SELECT t1.id AS t1_id, t1.value, t2.id AS t2_id, t2.value, t3.id AS t3_id, t3.value +FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON t1_id = t2_id +LEFT JOIN test_table_join_3 AS t3 ON t2_id = t3_id; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 +0 3 Join_2_Value_3 0 +SELECT 'First JOIN RIGHT second JOIN RIGHT'; +First JOIN RIGHT second JOIN RIGHT +SELECT 'JOIN ON without conditions'; +JOIN ON without conditions +SELECT test_table_join_1.id, test_table_join_1.value, test_table_join_2.id, test_table_join_2.value, test_table_join_3.id, test_table_join_3.value +FROM test_table_join_1 RIGHT JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id +RIGHT JOIN test_table_join_3 ON test_table_join_2.id = test_table_join_3.id; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 +0 0 4 Join_3_Value_4 +SELECT '--'; +-- +SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value +FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id +RIGHT JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 +0 0 4 Join_3_Value_4 +SELECT '--'; +-- +SELECT t1.id, test_table_join_1.id, t1.value, test_table_join_1.value, t2.id, test_table_join_2.id, t2.value, test_table_join_2.value, +t3.id, test_table_join_3.id, t3.value, test_table_join_3.value +FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id +RIGHT JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +0 0 Join_1_Value_0 Join_1_Value_0 0 0 Join_2_Value_0 Join_2_Value_0 0 0 Join_3_Value_0 Join_3_Value_0 +1 1 Join_1_Value_1 Join_1_Value_1 1 1 Join_2_Value_1 Join_2_Value_1 1 1 Join_3_Value_1 Join_3_Value_1 +0 0 0 0 4 4 Join_3_Value_4 Join_3_Value_4 +SELECT '--'; +-- +SELECT t1.value, t2.value, t3.value +FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id +RIGHT JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +Join_1_Value_0 Join_2_Value_0 Join_3_Value_0 +Join_1_Value_1 Join_2_Value_1 Join_3_Value_1 + Join_3_Value_4 +SELECT 'JOIN ON with conditions'; +JOIN ON with conditions +SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value +FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value == 'Join_1_Value_0' +RIGHT JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0'; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +0 0 4 Join_3_Value_4 +0 0 1 Join_3_Value_1 +SELECT '--'; +-- +SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value +FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value == 'Join_1_Value_0' AND t2.value == 'Join_2_Value_0' +RIGHT JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0' AND t3.value == 'Join_3_Value_0'; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +0 0 1 Join_3_Value_1 +0 0 4 Join_3_Value_4 +SELECT 'JOIN multiple clauses'; +JOIN multiple clauses +SELECT t1.value, t2.value, t3.value +FROM test_table_join_1 AS t1 +RIGHT JOIN test_table_join_2 AS t2 ON t1.id = t2.id OR t1.id = t2.id +RIGHT JOIN test_table_join_3 AS t3 ON t2.id = t3.id OR t3.id = t2.id; +Join_1_Value_0 Join_2_Value_0 Join_3_Value_0 +Join_1_Value_1 Join_2_Value_1 Join_3_Value_1 + Join_3_Value_4 +SELECT 'JOIN expression aliases'; +JOIN expression aliases +SELECT t1_id, t1.value, t2_id, t2.value, t3_id, t3.value +FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON (t1.id AS t1_id) = (t2.id AS t2_id) +RIGHT JOIN test_table_join_3 AS t3 ON t2_id = (t3.id AS t3_id); +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 +0 0 4 Join_3_Value_4 +SELECT '--'; +-- +SELECT t1.id AS t1_id, t1.value, t2.id AS t2_id, t2.value, t3.id AS t3_id, t3.value +FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON t1_id = t2_id +RIGHT JOIN test_table_join_3 AS t3 ON t2_id = t3_id; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 +0 0 4 Join_3_Value_4 +SELECT 'First JOIN RIGHT second JOIN FULL'; +First JOIN RIGHT second JOIN FULL +SELECT 'JOIN ON without conditions'; +JOIN ON without conditions +SELECT test_table_join_1.id, test_table_join_1.value, test_table_join_2.id, test_table_join_2.value, test_table_join_3.id, test_table_join_3.value +FROM test_table_join_1 RIGHT JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id +FULL JOIN test_table_join_3 ON test_table_join_2.id = test_table_join_3.id; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 +0 3 Join_2_Value_3 0 +0 0 4 Join_3_Value_4 +SELECT '--'; +-- +SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value +FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id +FULL JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 +0 3 Join_2_Value_3 0 +0 0 4 Join_3_Value_4 +SELECT '--'; +-- +SELECT t1.id, test_table_join_1.id, t1.value, test_table_join_1.value, t2.id, test_table_join_2.id, t2.value, test_table_join_2.value, +t3.id, test_table_join_3.id, t3.value, test_table_join_3.value +FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id +FULL JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +0 0 Join_1_Value_0 Join_1_Value_0 0 0 Join_2_Value_0 Join_2_Value_0 0 0 Join_3_Value_0 Join_3_Value_0 +1 1 Join_1_Value_1 Join_1_Value_1 1 1 Join_2_Value_1 Join_2_Value_1 1 1 Join_3_Value_1 Join_3_Value_1 +0 0 3 3 Join_2_Value_3 Join_2_Value_3 0 0 +0 0 0 0 4 4 Join_3_Value_4 Join_3_Value_4 +SELECT '--'; +-- +SELECT t1.value, t2.value, t3.value +FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id +FULL JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +Join_1_Value_0 Join_2_Value_0 Join_3_Value_0 +Join_1_Value_1 Join_2_Value_1 Join_3_Value_1 + Join_2_Value_3 + Join_3_Value_4 +SELECT 'JOIN ON with conditions'; +JOIN ON with conditions +SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value +FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value == 'Join_1_Value_0' +FULL JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0'; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +0 3 Join_2_Value_3 0 +0 1 Join_2_Value_1 0 +0 0 4 Join_3_Value_4 +0 0 1 Join_3_Value_1 +SELECT '--'; +-- +SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value +FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value == 'Join_1_Value_0' AND t2.value == 'Join_2_Value_0' +FULL JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0' AND t3.value == 'Join_3_Value_0'; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +0 1 Join_2_Value_1 0 +0 3 Join_2_Value_3 0 +0 0 1 Join_3_Value_1 +0 0 4 Join_3_Value_4 +SELECT 'JOIN multiple clauses'; +JOIN multiple clauses +SELECT t1.value, t2.value, t3.value +FROM test_table_join_1 AS t1 +RIGHT JOIN test_table_join_2 AS t2 ON t1.id = t2.id OR t1.id = t2.id +FULL JOIN test_table_join_3 AS t3 ON t2.id = t3.id OR t3.id = t2.id; +Join_1_Value_0 Join_2_Value_0 Join_3_Value_0 +Join_1_Value_1 Join_2_Value_1 Join_3_Value_1 + Join_2_Value_3 + Join_3_Value_4 +SELECT 'JOIN expression aliases'; +JOIN expression aliases +SELECT t1_id, t1.value, t2_id, t2.value, t3_id, t3.value +FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON (t1.id AS t1_id) = (t2.id AS t2_id) +FULL JOIN test_table_join_3 AS t3 ON t2_id = (t3.id AS t3_id); +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 +0 3 Join_2_Value_3 0 +0 0 4 Join_3_Value_4 +SELECT '--'; +-- +SELECT t1.id AS t1_id, t1.value, t2.id AS t2_id, t2.value, t3.id AS t3_id, t3.value +FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON t1_id = t2_id +FULL JOIN test_table_join_3 AS t3 ON t2_id = t3_id; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 +0 3 Join_2_Value_3 0 +0 0 4 Join_3_Value_4 +SELECT 'First JOIN FULL second JOIN INNER'; +First JOIN FULL second JOIN INNER +SELECT 'JOIN ON without conditions'; +JOIN ON without conditions +SELECT test_table_join_1.id, test_table_join_1.value, test_table_join_2.id, test_table_join_2.value, test_table_join_3.id, test_table_join_3.value +FROM test_table_join_1 FULL JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id +INNER JOIN test_table_join_3 ON test_table_join_2.id = test_table_join_3.id; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 +2 Join_1_Value_2 0 0 Join_3_Value_0 +SELECT '--'; +-- +SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value +FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id +INNER JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 +2 Join_1_Value_2 0 0 Join_3_Value_0 +SELECT '--'; +-- +SELECT t1.id, test_table_join_1.id, t1.value, test_table_join_1.value, t2.id, test_table_join_2.id, t2.value, test_table_join_2.value, +t3.id, test_table_join_3.id, t3.value, test_table_join_3.value +FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id +INNER JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +0 0 Join_1_Value_0 Join_1_Value_0 0 0 Join_2_Value_0 Join_2_Value_0 0 0 Join_3_Value_0 Join_3_Value_0 +1 1 Join_1_Value_1 Join_1_Value_1 1 1 Join_2_Value_1 Join_2_Value_1 1 1 Join_3_Value_1 Join_3_Value_1 +2 2 Join_1_Value_2 Join_1_Value_2 0 0 0 0 Join_3_Value_0 Join_3_Value_0 +SELECT '--'; +-- +SELECT t1.value, t2.value, t3.value +FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id +INNER JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +Join_1_Value_0 Join_2_Value_0 Join_3_Value_0 +Join_1_Value_1 Join_2_Value_1 Join_3_Value_1 +Join_1_Value_2 Join_3_Value_0 +SELECT 'JOIN ON with conditions'; +JOIN ON with conditions +SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value +FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value == 'Join_1_Value_0' +INNER JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0'; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +SELECT '--'; +-- +SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value +FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value == 'Join_1_Value_0' AND t2.value == 'Join_2_Value_0' +INNER JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0' AND t3.value == 'Join_3_Value_0'; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +SELECT 'JOIN multiple clauses'; +JOIN multiple clauses +SELECT t1.value, t2.value, t3.value +FROM test_table_join_1 AS t1 +FULL JOIN test_table_join_2 AS t2 ON t1.id = t2.id OR t1.id = t2.id +INNER JOIN test_table_join_3 AS t3 ON t2.id = t3.id OR t3.id = t2.id; +Join_1_Value_0 Join_2_Value_0 Join_3_Value_0 +Join_1_Value_1 Join_2_Value_1 Join_3_Value_1 +Join_1_Value_2 Join_3_Value_0 +SELECT 'JOIN expression aliases'; +JOIN expression aliases +SELECT t1_id, t1.value, t2_id, t2.value, t3_id, t3.value +FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON (t1.id AS t1_id) = (t2.id AS t2_id) +INNER JOIN test_table_join_3 AS t3 ON t2_id = (t3.id AS t3_id); +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 +2 Join_1_Value_2 0 0 Join_3_Value_0 +SELECT '--'; +-- +SELECT t1.id AS t1_id, t1.value, t2.id AS t2_id, t2.value, t3.id AS t3_id, t3.value +FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON t1_id = t2_id +INNER JOIN test_table_join_3 AS t3 ON t2_id = t3_id; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 +2 Join_1_Value_2 0 0 Join_3_Value_0 +SELECT 'First JOIN FULL second JOIN LEFT'; +First JOIN FULL second JOIN LEFT +SELECT 'JOIN ON without conditions'; +JOIN ON without conditions +SELECT test_table_join_1.id, test_table_join_1.value, test_table_join_2.id, test_table_join_2.value, test_table_join_3.id, test_table_join_3.value +FROM test_table_join_1 FULL JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id +LEFT JOIN test_table_join_3 ON test_table_join_2.id = test_table_join_3.id; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 +2 Join_1_Value_2 0 0 Join_3_Value_0 +0 3 Join_2_Value_3 0 +SELECT '--'; +-- +SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value +FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id +LEFT JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 +2 Join_1_Value_2 0 0 Join_3_Value_0 +0 3 Join_2_Value_3 0 +SELECT '--'; +-- +SELECT t1.id, test_table_join_1.id, t1.value, test_table_join_1.value, t2.id, test_table_join_2.id, t2.value, test_table_join_2.value, +t3.id, test_table_join_3.id, t3.value, test_table_join_3.value +FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id +LEFT JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +0 0 Join_1_Value_0 Join_1_Value_0 0 0 Join_2_Value_0 Join_2_Value_0 0 0 Join_3_Value_0 Join_3_Value_0 +1 1 Join_1_Value_1 Join_1_Value_1 1 1 Join_2_Value_1 Join_2_Value_1 1 1 Join_3_Value_1 Join_3_Value_1 +2 2 Join_1_Value_2 Join_1_Value_2 0 0 0 0 Join_3_Value_0 Join_3_Value_0 +0 0 3 3 Join_2_Value_3 Join_2_Value_3 0 0 +SELECT '--'; +-- +SELECT t1.value, t2.value, t3.value +FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id +LEFT JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +Join_1_Value_0 Join_2_Value_0 Join_3_Value_0 +Join_1_Value_1 Join_2_Value_1 Join_3_Value_1 +Join_1_Value_2 Join_3_Value_0 + Join_2_Value_3 +SELECT 'JOIN ON with conditions'; +JOIN ON with conditions +SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value +FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value == 'Join_1_Value_0' +LEFT JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0'; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +1 Join_1_Value_1 0 0 +2 Join_1_Value_2 0 0 +0 3 Join_2_Value_3 0 +0 1 Join_2_Value_1 0 +SELECT '--'; +-- +SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value +FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value == 'Join_1_Value_0' AND t2.value == 'Join_2_Value_0' +LEFT JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0' AND t3.value == 'Join_3_Value_0'; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +1 Join_1_Value_1 0 0 +2 Join_1_Value_2 0 0 +0 1 Join_2_Value_1 0 +0 3 Join_2_Value_3 0 +SELECT 'JOIN multiple clauses'; +JOIN multiple clauses +SELECT t1.value, t2.value, t3.value +FROM test_table_join_1 AS t1 +FULL JOIN test_table_join_2 AS t2 ON t1.id = t2.id OR t1.id = t2.id +LEFT JOIN test_table_join_3 AS t3 ON t2.id = t3.id OR t3.id = t2.id; +Join_1_Value_0 Join_2_Value_0 Join_3_Value_0 +Join_1_Value_1 Join_2_Value_1 Join_3_Value_1 +Join_1_Value_2 Join_3_Value_0 + Join_2_Value_3 +SELECT 'JOIN expression aliases'; +JOIN expression aliases +SELECT t1_id, t1.value, t2_id, t2.value, t3_id, t3.value +FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON (t1.id AS t1_id) = (t2.id AS t2_id) +LEFT JOIN test_table_join_3 AS t3 ON t2_id = (t3.id AS t3_id); +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 +2 Join_1_Value_2 0 0 Join_3_Value_0 +0 3 Join_2_Value_3 0 +SELECT '--'; +-- +SELECT t1.id AS t1_id, t1.value, t2.id AS t2_id, t2.value, t3.id AS t3_id, t3.value +FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON t1_id = t2_id +LEFT JOIN test_table_join_3 AS t3 ON t2_id = t3_id; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 +2 Join_1_Value_2 0 0 Join_3_Value_0 +0 3 Join_2_Value_3 0 +SELECT 'First JOIN FULL second JOIN RIGHT'; +First JOIN FULL second JOIN RIGHT +SELECT 'JOIN ON without conditions'; +JOIN ON without conditions +SELECT test_table_join_1.id, test_table_join_1.value, test_table_join_2.id, test_table_join_2.value, test_table_join_3.id, test_table_join_3.value +FROM test_table_join_1 FULL JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id +RIGHT JOIN test_table_join_3 ON test_table_join_2.id = test_table_join_3.id; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 +2 Join_1_Value_2 0 0 Join_3_Value_0 +0 0 4 Join_3_Value_4 +SELECT '--'; +-- +SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value +FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id +RIGHT JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 +2 Join_1_Value_2 0 0 Join_3_Value_0 +0 0 4 Join_3_Value_4 +SELECT '--'; +-- +SELECT t1.id, test_table_join_1.id, t1.value, test_table_join_1.value, t2.id, test_table_join_2.id, t2.value, test_table_join_2.value, +t3.id, test_table_join_3.id, t3.value, test_table_join_3.value +FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id +RIGHT JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +0 0 Join_1_Value_0 Join_1_Value_0 0 0 Join_2_Value_0 Join_2_Value_0 0 0 Join_3_Value_0 Join_3_Value_0 +1 1 Join_1_Value_1 Join_1_Value_1 1 1 Join_2_Value_1 Join_2_Value_1 1 1 Join_3_Value_1 Join_3_Value_1 +2 2 Join_1_Value_2 Join_1_Value_2 0 0 0 0 Join_3_Value_0 Join_3_Value_0 +0 0 0 0 4 4 Join_3_Value_4 Join_3_Value_4 +SELECT '--'; +-- +SELECT t1.value, t2.value, t3.value +FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id +RIGHT JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +Join_1_Value_0 Join_2_Value_0 Join_3_Value_0 +Join_1_Value_1 Join_2_Value_1 Join_3_Value_1 +Join_1_Value_2 Join_3_Value_0 + Join_3_Value_4 +SELECT 'JOIN ON with conditions'; +JOIN ON with conditions +SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value +FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value == 'Join_1_Value_0' +RIGHT JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0'; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +0 0 4 Join_3_Value_4 +0 0 1 Join_3_Value_1 +SELECT '--'; +-- +SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value +FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value == 'Join_1_Value_0' AND t2.value == 'Join_2_Value_0' +RIGHT JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0' AND t3.value == 'Join_3_Value_0'; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +0 0 1 Join_3_Value_1 +0 0 4 Join_3_Value_4 +SELECT 'JOIN multiple clauses'; +JOIN multiple clauses +SELECT t1.value, t2.value, t3.value +FROM test_table_join_1 AS t1 +FULL JOIN test_table_join_2 AS t2 ON t1.id = t2.id OR t1.id = t2.id +RIGHT JOIN test_table_join_3 AS t3 ON t2.id = t3.id OR t3.id = t2.id; +Join_1_Value_0 Join_2_Value_0 Join_3_Value_0 +Join_1_Value_1 Join_2_Value_1 Join_3_Value_1 +Join_1_Value_2 Join_3_Value_0 + Join_3_Value_4 +SELECT 'JOIN expression aliases'; +JOIN expression aliases +SELECT t1_id, t1.value, t2_id, t2.value, t3_id, t3.value +FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON (t1.id AS t1_id) = (t2.id AS t2_id) +RIGHT JOIN test_table_join_3 AS t3 ON t2_id = (t3.id AS t3_id); +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 +2 Join_1_Value_2 0 0 Join_3_Value_0 +0 0 4 Join_3_Value_4 +SELECT '--'; +-- +SELECT t1.id AS t1_id, t1.value, t2.id AS t2_id, t2.value, t3.id AS t3_id, t3.value +FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON t1_id = t2_id +RIGHT JOIN test_table_join_3 AS t3 ON t2_id = t3_id; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 +2 Join_1_Value_2 0 0 Join_3_Value_0 +0 0 4 Join_3_Value_4 +SELECT 'First JOIN FULL second JOIN FULL'; +First JOIN FULL second JOIN FULL +SELECT 'JOIN ON without conditions'; +JOIN ON without conditions +SELECT test_table_join_1.id, test_table_join_1.value, test_table_join_2.id, test_table_join_2.value, test_table_join_3.id, test_table_join_3.value +FROM test_table_join_1 FULL JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id +FULL JOIN test_table_join_3 ON test_table_join_2.id = test_table_join_3.id; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 +2 Join_1_Value_2 0 0 Join_3_Value_0 +0 3 Join_2_Value_3 0 +0 0 4 Join_3_Value_4 +SELECT '--'; +-- +SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value +FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id +FULL JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 +2 Join_1_Value_2 0 0 Join_3_Value_0 +0 3 Join_2_Value_3 0 +0 0 4 Join_3_Value_4 +SELECT '--'; +-- +SELECT t1.id, test_table_join_1.id, t1.value, test_table_join_1.value, t2.id, test_table_join_2.id, t2.value, test_table_join_2.value, +t3.id, test_table_join_3.id, t3.value, test_table_join_3.value +FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id +FULL JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +0 0 Join_1_Value_0 Join_1_Value_0 0 0 Join_2_Value_0 Join_2_Value_0 0 0 Join_3_Value_0 Join_3_Value_0 +1 1 Join_1_Value_1 Join_1_Value_1 1 1 Join_2_Value_1 Join_2_Value_1 1 1 Join_3_Value_1 Join_3_Value_1 +2 2 Join_1_Value_2 Join_1_Value_2 0 0 0 0 Join_3_Value_0 Join_3_Value_0 +0 0 3 3 Join_2_Value_3 Join_2_Value_3 0 0 +0 0 0 0 4 4 Join_3_Value_4 Join_3_Value_4 +SELECT '--'; +-- +SELECT t1.value, t2.value, t3.value +FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id +FULL JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +Join_1_Value_0 Join_2_Value_0 Join_3_Value_0 +Join_1_Value_1 Join_2_Value_1 Join_3_Value_1 +Join_1_Value_2 Join_3_Value_0 + Join_2_Value_3 + Join_3_Value_4 +SELECT 'JOIN ON with conditions'; +JOIN ON with conditions +SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value +FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value == 'Join_1_Value_0' +FULL JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0'; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +1 Join_1_Value_1 0 0 +2 Join_1_Value_2 0 0 +0 3 Join_2_Value_3 0 +0 1 Join_2_Value_1 0 +0 0 4 Join_3_Value_4 +0 0 1 Join_3_Value_1 +SELECT '--'; +-- +SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value +FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value == 'Join_1_Value_0' AND t2.value == 'Join_2_Value_0' +FULL JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0' AND t3.value == 'Join_3_Value_0'; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +1 Join_1_Value_1 0 0 +2 Join_1_Value_2 0 0 +0 1 Join_2_Value_1 0 +0 3 Join_2_Value_3 0 +0 0 1 Join_3_Value_1 +0 0 4 Join_3_Value_4 +SELECT 'JOIN multiple clauses'; +JOIN multiple clauses +SELECT t1.value, t2.value, t3.value +FROM test_table_join_1 AS t1 +FULL JOIN test_table_join_2 AS t2 ON t1.id = t2.id OR t1.id = t2.id +FULL JOIN test_table_join_3 AS t3 ON t2.id = t3.id OR t3.id = t2.id; +Join_1_Value_0 Join_2_Value_0 Join_3_Value_0 +Join_1_Value_1 Join_2_Value_1 Join_3_Value_1 +Join_1_Value_2 Join_3_Value_0 + Join_2_Value_3 + Join_3_Value_4 +SELECT 'JOIN expression aliases'; +JOIN expression aliases +SELECT t1_id, t1.value, t2_id, t2.value, t3_id, t3.value +FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON (t1.id AS t1_id) = (t2.id AS t2_id) +FULL JOIN test_table_join_3 AS t3 ON t2_id = (t3.id AS t3_id); +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 +2 Join_1_Value_2 0 0 Join_3_Value_0 +0 3 Join_2_Value_3 0 +0 0 4 Join_3_Value_4 +SELECT '--'; +-- +SELECT t1.id AS t1_id, t1.value, t2.id AS t2_id, t2.value, t3.id AS t3_id, t3.value +FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON t1_id = t2_id +FULL JOIN test_table_join_3 AS t3 ON t2_id = t3_id; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 +2 Join_1_Value_2 0 0 Join_3_Value_0 +0 3 Join_2_Value_3 0 +0 0 4 Join_3_Value_4 diff --git a/tests/queries/0_stateless/02372_analyzer_join.sql.j2 b/tests/queries/0_stateless/02372_analyzer_join.sql.j2 new file mode 100644 index 00000000000..9b3c212562b --- /dev/null +++ b/tests/queries/0_stateless/02372_analyzer_join.sql.j2 @@ -0,0 +1,170 @@ +-- Tags: long + +SET allow_experimental_analyzer = 1; + +DROP TABLE IF EXISTS test_table_join_1; +CREATE TABLE test_table_join_1 +( + id UInt64, + value String +) ENGINE = TinyLog; + +DROP TABLE IF EXISTS test_table_join_2; +CREATE TABLE test_table_join_2 +( + id UInt64, + value String +) ENGINE = TinyLog; + +DROP TABLE IF EXISTS test_table_join_3; +CREATE TABLE test_table_join_3 +( + id UInt64, + value String +) ENGINE = TinyLog; + +INSERT INTO test_table_join_1 VALUES (0, 'Join_1_Value_0'); +INSERT INTO test_table_join_1 VALUES (1, 'Join_1_Value_1'); +INSERT INTO test_table_join_1 VALUES (2, 'Join_1_Value_2'); + +INSERT INTO test_table_join_2 VALUES (0, 'Join_2_Value_0'); +INSERT INTO test_table_join_2 VALUES (1, 'Join_2_Value_1'); +INSERT INTO test_table_join_2 VALUES (3, 'Join_2_Value_3'); + +INSERT INTO test_table_join_3 VALUES (0, 'Join_3_Value_0'); +INSERT INTO test_table_join_3 VALUES (1, 'Join_3_Value_1'); +INSERT INTO test_table_join_3 VALUES (4, 'Join_3_Value_4'); + +-- { echoOn } + +{% for join_type in ['INNER', 'LEFT', 'RIGHT', 'FULL'] -%} + +SELECT 'JOIN {{ join_type }}'; + +SELECT 'JOIN ON without conditions'; + +SELECT test_table_join_1.id, test_table_join_1.value, test_table_join_2.id, test_table_join_2.value +FROM test_table_join_1 {{ join_type }} JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id; + +SELECT '--'; + +SELECT t1.id, t1.value, t2.id, t2.value +FROM test_table_join_1 AS t1 {{ join_type }} JOIN test_table_join_2 AS t2 ON t1.id = t2.id; + +SELECT '--'; + +SELECT t1.id, test_table_join_1.id, t1.value, test_table_join_1.value, t2.id, test_table_join_2.id, t2.value, test_table_join_2.value +FROM test_table_join_1 AS t1 {{ join_type }} JOIN test_table_join_2 AS t2 ON t1.id = t2.id; + +SELECT '--'; + +SELECT t1.value, t2.value +FROM test_table_join_1 AS t1 {{ join_type }} JOIN test_table_join_2 AS t2 ON t1.id = t2.id; + +SELECT id FROM test_table_join_1 {{ join_type }} JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id; -- { serverError 207 } + +SELECT value FROM test_table_join_1 {{ join_type }} JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id; -- { serverError 207 } + +SELECT 'JOIN ON with conditions'; + +SELECT t1.id, t1.value, t2.id, t2.value +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value = 'Join_1_Value_0'; + +SELECT '--'; + +SELECT t1.id, t1.value, t2.id, t2.value +FROM test_table_join_1 AS t1 {{ join_type }} JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t2.value = 'Join_2_Value_0'; + +SELECT '--'; + +SELECT t1.id, t1.value, t2.id, t2.value +FROM test_table_join_1 AS t1 {{ join_type }} JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value = 'Join_1_Value_0' AND t2.value = 'Join_2_Value_0'; + +SELECT '--'; + +SELECT t1.id, t1.value, t2.id, t2.value +FROM test_table_join_1 AS t1 {{ join_type }} JOIN test_table_join_2 AS t2 ON toString(t1.id) = toString(t2.id) AND t1.value = 'Join_1_Value_0' AND t2.value = 'Join_2_Value_0'; + +SELECT 'JOIN multiple clauses'; + +SELECT t1.id, t1.value, t2.id, t2.value +FROM test_table_join_1 AS t1 {{ join_type }} JOIN test_table_join_2 AS t2 ON t1.id = t2.id OR t1.id = t2.id; + +SELECT 'JOIN expression aliases'; + +SELECT t1_id, t1.value, t2_id, t2.value FROM test_table_join_1 AS t1 {{ join_type }} JOIN test_table_join_2 AS t2 ON (t1.id AS t1_id) = (t2.id AS t2_id); + +SELECT '--'; + +SELECT t1.id AS t1_id, t1.value, t2.id AS t2_id, t2.value FROM test_table_join_1 AS t1 {{ join_type }} JOIN test_table_join_2 AS t2 ON t1_id = t2_id; + +{% endfor %} + +{% for first_join_type in ['INNER', 'LEFT', 'RIGHT', 'FULL'] -%} +{% for second_join_type in ['INNER', 'LEFT', 'RIGHT', 'FULL'] -%} + +SELECT 'First JOIN {{ first_join_type }} second JOIN {{ second_join_type }}'; + +SELECT 'JOIN ON without conditions'; + +SELECT test_table_join_1.id, test_table_join_1.value, test_table_join_2.id, test_table_join_2.value, test_table_join_3.id, test_table_join_3.value +FROM test_table_join_1 {{ first_join_type }} JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id +{{ second_join_type }} JOIN test_table_join_3 ON test_table_join_2.id = test_table_join_3.id; + +SELECT '--'; + +SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value +FROM test_table_join_1 AS t1 {{ first_join_type }} JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id +{{ second_join_type }} JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; + +SELECT '--'; + +SELECT t1.id, test_table_join_1.id, t1.value, test_table_join_1.value, t2.id, test_table_join_2.id, t2.value, test_table_join_2.value, +t3.id, test_table_join_3.id, t3.value, test_table_join_3.value +FROM test_table_join_1 AS t1 {{ first_join_type }} JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id +{{ second_join_type }} JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; + +SELECT '--'; +SELECT t1.value, t2.value, t3.value +FROM test_table_join_1 AS t1 {{ first_join_type }} JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id +{{ second_join_type }} JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; + +SELECT 'JOIN ON with conditions'; + +SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value +FROM test_table_join_1 AS t1 {{ first_join_type }} JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value == 'Join_1_Value_0' +{{ second_join_type }} JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0'; + +SELECT '--'; + +SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value +FROM test_table_join_1 AS t1 {{ first_join_type }} JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value == 'Join_1_Value_0' AND t2.value == 'Join_2_Value_0' +{{ second_join_type }} JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0' AND t3.value == 'Join_3_Value_0'; + +SELECT 'JOIN multiple clauses'; + +SELECT t1.value, t2.value, t3.value +FROM test_table_join_1 AS t1 +{{ first_join_type }} JOIN test_table_join_2 AS t2 ON t1.id = t2.id OR t1.id = t2.id +{{ second_join_type }} JOIN test_table_join_3 AS t3 ON t2.id = t3.id OR t3.id = t2.id; + +SELECT 'JOIN expression aliases'; + +SELECT t1_id, t1.value, t2_id, t2.value, t3_id, t3.value +FROM test_table_join_1 AS t1 {{ first_join_type }} JOIN test_table_join_2 AS t2 ON (t1.id AS t1_id) = (t2.id AS t2_id) +{{ second_join_type }} JOIN test_table_join_3 AS t3 ON t2_id = (t3.id AS t3_id); + +SELECT '--'; + +SELECT t1.id AS t1_id, t1.value, t2.id AS t2_id, t2.value, t3.id AS t3_id, t3.value +FROM test_table_join_1 AS t1 {{ first_join_type }} JOIN test_table_join_2 AS t2 ON t1_id = t2_id +{{ second_join_type }} JOIN test_table_join_3 AS t3 ON t2_id = t3_id; + +{% endfor %} +{% endfor %} + +-- { echoOff } + +DROP TABLE test_table_join_1; +DROP TABLE test_table_join_2; +DROP TABLE test_table_join_3; diff --git a/tests/queries/0_stateless/02373_analyzer_join_use_nulls.reference b/tests/queries/0_stateless/02373_analyzer_join_use_nulls.reference new file mode 100644 index 00000000000..3722c23e4a0 --- /dev/null +++ b/tests/queries/0_stateless/02373_analyzer_join_use_nulls.reference @@ -0,0 +1,60 @@ +-- { echoOn } + +SELECT t1.id AS t1_id, toTypeName(t1_id), t1.value AS t1_value, toTypeName(t1_value), t2.id AS t2_id, toTypeName(t2_id), t2.value AS t2_value, toTypeName(t2_value) +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id; +0 UInt64 Join_1_Value_0 String 0 UInt64 Join_2_Value_0 String +1 UInt64 Join_1_Value_1 String 1 UInt64 Join_2_Value_1 String +SELECT '--'; +-- +SELECT t1.id AS t1_id, toTypeName(t1_id), t1.value AS t1_value, toTypeName(t1_value), t2.id AS t2_id, toTypeName(t2_id), t2.value AS t2_value, toTypeName(t2_value) +FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON t1.id = t2.id; +0 UInt64 Join_1_Value_0 String 0 Nullable(UInt64) Join_2_Value_0 Nullable(String) +1 UInt64 Join_1_Value_1 String 1 Nullable(UInt64) Join_2_Value_1 Nullable(String) +2 UInt64 Join_1_Value_2 String \N Nullable(UInt64) \N Nullable(String) +SELECT '--'; +-- +SELECT t1.id AS t1_id, toTypeName(t1_id), t1.value AS t1_value, toTypeName(t1_value), t2.id AS t2_id, toTypeName(t2_id), t2.value AS t2_value, toTypeName(t2_value) +FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON t1.id = t2.id; +0 Nullable(UInt64) Join_1_Value_0 Nullable(String) 0 UInt64 Join_2_Value_0 String +1 Nullable(UInt64) Join_1_Value_1 Nullable(String) 1 UInt64 Join_2_Value_1 String +\N Nullable(UInt64) \N Nullable(String) 3 UInt64 Join_2_Value_3 String +SELECT '--'; +-- +SELECT t1.id AS t1_id, toTypeName(t1_id), t1.value AS t1_value, toTypeName(t1_value), t2.id AS t2_id, toTypeName(t2_id), t2.value AS t2_value, toTypeName(t2_value) +FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON t1.id = t2.id; +0 Nullable(UInt64) Join_1_Value_0 Nullable(String) 0 Nullable(UInt64) Join_2_Value_0 Nullable(String) +1 Nullable(UInt64) Join_1_Value_1 Nullable(String) 1 Nullable(UInt64) Join_2_Value_1 Nullable(String) +2 Nullable(UInt64) Join_1_Value_2 Nullable(String) \N Nullable(UInt64) \N Nullable(String) +\N Nullable(UInt64) \N Nullable(String) 3 Nullable(UInt64) Join_2_Value_3 Nullable(String) +SELECT '--'; +-- +SELECT id AS using_id, toTypeName(using_id), t1.id AS t1_id, toTypeName(t1_id), t1.value AS t1_value, toTypeName(t1_value), +t2.id AS t2_id, toTypeName(t2_id), t2.value AS t2_value, toTypeName(t2_value) +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 USING (id); +0 UInt64 0 UInt64 Join_1_Value_0 String 0 UInt64 Join_2_Value_0 String +1 UInt64 1 UInt64 Join_1_Value_1 String 1 UInt64 Join_2_Value_1 String +SELECT '--'; +-- +SELECT id AS using_id, toTypeName(using_id), t1.id AS t1_id, toTypeName(t1_id), t1.value AS t1_value, toTypeName(t1_value), +t2.id AS t2_id, toTypeName(t2_id), t2.value AS t2_value, toTypeName(t2_value) +FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 USING (id); +0 UInt64 0 UInt64 Join_1_Value_0 String 0 Nullable(UInt64) Join_2_Value_0 Nullable(String) +1 UInt64 1 UInt64 Join_1_Value_1 String 1 Nullable(UInt64) Join_2_Value_1 Nullable(String) +2 UInt64 2 UInt64 Join_1_Value_2 String \N Nullable(UInt64) \N Nullable(String) +SELECT '--'; +-- +SELECT id AS using_id, toTypeName(using_id), t1.id AS t1_id, toTypeName(t1_id), t1.value AS t1_value, toTypeName(t1_value), +t2.id AS t2_id, toTypeName(t2_id), t2.value AS t2_value, toTypeName(t2_value) +FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 USING (id); +0 UInt64 0 Nullable(UInt64) Join_1_Value_0 Nullable(String) 0 UInt64 Join_2_Value_0 String +1 UInt64 1 Nullable(UInt64) Join_1_Value_1 Nullable(String) 1 UInt64 Join_2_Value_1 String +3 UInt64 \N Nullable(UInt64) \N Nullable(String) 3 UInt64 Join_2_Value_3 String +SELECT '--'; +-- +SELECT id AS using_id, toTypeName(using_id), t1.id AS t1_id, toTypeName(t1_id), t1.value AS t1_value, toTypeName(t1_value), +t2.id AS t2_id, toTypeName(t2_id), t2.value AS t2_value, toTypeName(t2_value) +FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 USING (id); +0 Nullable(UInt64) 0 Nullable(UInt64) Join_1_Value_0 Nullable(String) 0 Nullable(UInt64) Join_2_Value_0 Nullable(String) +1 Nullable(UInt64) 1 Nullable(UInt64) Join_1_Value_1 Nullable(String) 1 Nullable(UInt64) Join_2_Value_1 Nullable(String) +2 Nullable(UInt64) 2 Nullable(UInt64) Join_1_Value_2 Nullable(String) \N Nullable(UInt64) \N Nullable(String) +\N Nullable(UInt64) \N Nullable(UInt64) \N Nullable(String) 3 Nullable(UInt64) Join_2_Value_3 Nullable(String) diff --git a/tests/queries/0_stateless/02373_analyzer_join_use_nulls.sql b/tests/queries/0_stateless/02373_analyzer_join_use_nulls.sql new file mode 100644 index 00000000000..db7895084e8 --- /dev/null +++ b/tests/queries/0_stateless/02373_analyzer_join_use_nulls.sql @@ -0,0 +1,73 @@ +SET allow_experimental_analyzer = 1; +SET join_use_nulls = 1; + +DROP TABLE IF EXISTS test_table_join_1; +CREATE TABLE test_table_join_1 +( + id UInt64, + value String +) ENGINE = TinyLog; + +DROP TABLE IF EXISTS test_table_join_2; +CREATE TABLE test_table_join_2 +( + id UInt64, + value String +) ENGINE = TinyLog; + +INSERT INTO test_table_join_1 VALUES (0, 'Join_1_Value_0'); +INSERT INTO test_table_join_1 VALUES (1, 'Join_1_Value_1'); +INSERT INTO test_table_join_1 VALUES (2, 'Join_1_Value_2'); + +INSERT INTO test_table_join_2 VALUES (0, 'Join_2_Value_0'); +INSERT INTO test_table_join_2 VALUES (1, 'Join_2_Value_1'); +INSERT INTO test_table_join_2 VALUES (3, 'Join_2_Value_3'); + +-- { echoOn } + +SELECT t1.id AS t1_id, toTypeName(t1_id), t1.value AS t1_value, toTypeName(t1_value), t2.id AS t2_id, toTypeName(t2_id), t2.value AS t2_value, toTypeName(t2_value) +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id; + +SELECT '--'; + +SELECT t1.id AS t1_id, toTypeName(t1_id), t1.value AS t1_value, toTypeName(t1_value), t2.id AS t2_id, toTypeName(t2_id), t2.value AS t2_value, toTypeName(t2_value) +FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON t1.id = t2.id; + +SELECT '--'; + +SELECT t1.id AS t1_id, toTypeName(t1_id), t1.value AS t1_value, toTypeName(t1_value), t2.id AS t2_id, toTypeName(t2_id), t2.value AS t2_value, toTypeName(t2_value) +FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON t1.id = t2.id; + +SELECT '--'; + +SELECT t1.id AS t1_id, toTypeName(t1_id), t1.value AS t1_value, toTypeName(t1_value), t2.id AS t2_id, toTypeName(t2_id), t2.value AS t2_value, toTypeName(t2_value) +FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON t1.id = t2.id; + +SELECT '--'; + +SELECT id AS using_id, toTypeName(using_id), t1.id AS t1_id, toTypeName(t1_id), t1.value AS t1_value, toTypeName(t1_value), +t2.id AS t2_id, toTypeName(t2_id), t2.value AS t2_value, toTypeName(t2_value) +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 USING (id); + +SELECT '--'; + +SELECT id AS using_id, toTypeName(using_id), t1.id AS t1_id, toTypeName(t1_id), t1.value AS t1_value, toTypeName(t1_value), +t2.id AS t2_id, toTypeName(t2_id), t2.value AS t2_value, toTypeName(t2_value) +FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 USING (id); + +SELECT '--'; + +SELECT id AS using_id, toTypeName(using_id), t1.id AS t1_id, toTypeName(t1_id), t1.value AS t1_value, toTypeName(t1_value), +t2.id AS t2_id, toTypeName(t2_id), t2.value AS t2_value, toTypeName(t2_value) +FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 USING (id); + +SELECT '--'; + +SELECT id AS using_id, toTypeName(using_id), t1.id AS t1_id, toTypeName(t1_id), t1.value AS t1_value, toTypeName(t1_value), +t2.id AS t2_id, toTypeName(t2_id), t2.value AS t2_value, toTypeName(t2_value) +FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 USING (id); + +-- { echoOff } + +DROP TABLE test_table_join_1; +DROP TABLE test_table_join_2; diff --git a/tests/queries/0_stateless/02373_progress_contain_result.reference b/tests/queries/0_stateless/02373_progress_contain_result.reference index 1e7492e2829..a125646e7b8 100644 --- a/tests/queries/0_stateless/02373_progress_contain_result.reference +++ b/tests/queries/0_stateless/02373_progress_contain_result.reference @@ -1 +1 @@ -< X-ClickHouse-Summary: {"read_rows":"100","read_bytes":"800","written_rows":"0","written_bytes":"0","total_rows_to_read":"100","result_rows":"100","result_bytes":"131"} +< X-ClickHouse-Summary: {"read_rows":"100","read_bytes":"800","written_rows":"0","written_bytes":"0","total_rows_to_read":"100","result_rows":"100","result_bytes":"227"} diff --git a/tests/queries/0_stateless/02374_analyzer_array_join.reference b/tests/queries/0_stateless/02374_analyzer_array_join.reference new file mode 100644 index 00000000000..28859f715b3 --- /dev/null +++ b/tests/queries/0_stateless/02374_analyzer_array_join.reference @@ -0,0 +1,110 @@ +-- { echoOn } + +SELECT 'ARRAY JOIN with constant'; +ARRAY JOIN with constant +SELECT id, value, value_1 FROM test_table ARRAY JOIN [1, 2, 3] AS value_1; +0 Value 1 +0 Value 2 +0 Value 3 +0 Value 1 +0 Value 2 +0 Value 3 +SELECT '--'; +-- +SELECT id, value FROM test_table ARRAY JOIN [1, 2, 3] AS value; +0 1 +0 2 +0 3 +0 1 +0 2 +0 3 +SELECT '--'; +-- +WITH [1, 2, 3] AS constant_array SELECT id, value FROM test_table ARRAY JOIN constant_array AS value; +0 1 +0 2 +0 3 +0 1 +0 2 +0 3 +SELECT '--'; +-- +WITH [1, 2, 3] AS constant_array SELECT id, value, value_1 FROM test_table ARRAY JOIN constant_array AS value_1; +0 Value 1 +0 Value 2 +0 Value 3 +0 Value 1 +0 Value 2 +0 Value 3 +SELECT '--'; +-- +SELECT id, value, value_1, value_2 FROM test_table ARRAY JOIN [[1, 2, 3]] AS value_1 ARRAY JOIN value_1 AS value_2; +0 Value [1,2,3] 1 +0 Value [1,2,3] 2 +0 Value [1,2,3] 3 +0 Value [1,2,3] 1 +0 Value [1,2,3] 2 +0 Value [1,2,3] 3 +SELECT 1 AS value FROM test_table ARRAY JOIN [1,2,3] AS value; -- { serverError 179 } +SELECT 'ARRAY JOIN with column'; +ARRAY JOIN with column +SELECT id, value, test_table.value_array FROM test_table ARRAY JOIN value_array; +0 Value 1 +0 Value 2 +0 Value 3 +0 Value 4 +0 Value 5 +0 Value 6 +SELECT '--'; +-- +SELECT id, value_array, value FROM test_table ARRAY JOIN value_array AS value; +0 [1,2,3] 1 +0 [1,2,3] 2 +0 [1,2,3] 3 +0 [4,5,6] 4 +0 [4,5,6] 5 +0 [4,5,6] 6 +SELECT '--'; +-- +SELECT id, value, value_array, value_array_element FROM test_table ARRAY JOIN value_array AS value_array_element; +0 Value [1,2,3] 1 +0 Value [1,2,3] 2 +0 Value [1,2,3] 3 +0 Value [4,5,6] 4 +0 Value [4,5,6] 5 +0 Value [4,5,6] 6 +SELECT '--'; +-- +SELECT id, value, value_array AS value_array_array_alias FROM test_table ARRAY JOIN value_array_array_alias; +0 Value [1,2,3] +0 Value [1,2,3] +0 Value [1,2,3] +0 Value [4,5,6] +0 Value [4,5,6] +0 Value [4,5,6] +SELECT '--'; +-- +SELECT id AS value FROM test_table ARRAY JOIN value_array AS value; -- { serverError 179 } +SELECT '--'; +-- +SELECT id, value, value_array AS value_array_array_alias, value_array_array_alias_element FROM test_table ARRAY JOIN value_array_array_alias AS value_array_array_alias_element; +0 Value [1,2,3] 1 +0 Value [1,2,3] 2 +0 Value [1,2,3] 3 +0 Value [4,5,6] 4 +0 Value [4,5,6] 5 +0 Value [4,5,6] 6 +SELECT '--'; +-- +SELECT id, value, value_array_array, value_array_array_inner_element, value_array_array_inner_element, value_array_array_inner_inner_element +FROM test_table ARRAY JOIN value_array_array AS value_array_array_inner_element +ARRAY JOIN value_array_array_inner_element AS value_array_array_inner_inner_element; +0 Value [[1,2,3]] [1,2,3] [1,2,3] 1 +0 Value [[1,2,3]] [1,2,3] [1,2,3] 2 +0 Value [[1,2,3]] [1,2,3] [1,2,3] 3 +0 Value [[1,2,3],[4,5,6]] [1,2,3] [1,2,3] 1 +0 Value [[1,2,3],[4,5,6]] [1,2,3] [1,2,3] 2 +0 Value [[1,2,3],[4,5,6]] [1,2,3] [1,2,3] 3 +0 Value [[1,2,3],[4,5,6]] [4,5,6] [4,5,6] 4 +0 Value [[1,2,3],[4,5,6]] [4,5,6] [4,5,6] 5 +0 Value [[1,2,3],[4,5,6]] [4,5,6] [4,5,6] 6 diff --git a/tests/queries/0_stateless/02374_analyzer_array_join.sql b/tests/queries/0_stateless/02374_analyzer_array_join.sql new file mode 100644 index 00000000000..8ebfdef349c --- /dev/null +++ b/tests/queries/0_stateless/02374_analyzer_array_join.sql @@ -0,0 +1,70 @@ +SET allow_experimental_analyzer = 1; + +DROP TABLE IF EXISTS test_table; +CREATE TABLE test_table +( + id UInt64, + value String, + value_array Array(UInt64), + value_array_array Array(Array(UInt64)) +) ENGINE=TinyLog; + +INSERT INTO test_table VALUES (0, 'Value', [1, 2, 3], [[1, 2, 3]]), (0, 'Value', [4, 5, 6], [[1, 2, 3], [4, 5, 6]]); + +-- { echoOn } + +SELECT 'ARRAY JOIN with constant'; + +SELECT id, value, value_1 FROM test_table ARRAY JOIN [1, 2, 3] AS value_1; + +SELECT '--'; + +SELECT id, value FROM test_table ARRAY JOIN [1, 2, 3] AS value; + +SELECT '--'; + +WITH [1, 2, 3] AS constant_array SELECT id, value FROM test_table ARRAY JOIN constant_array AS value; + +SELECT '--'; + +WITH [1, 2, 3] AS constant_array SELECT id, value, value_1 FROM test_table ARRAY JOIN constant_array AS value_1; + +SELECT '--'; + +SELECT id, value, value_1, value_2 FROM test_table ARRAY JOIN [[1, 2, 3]] AS value_1 ARRAY JOIN value_1 AS value_2; + +SELECT 1 AS value FROM test_table ARRAY JOIN [1,2,3] AS value; -- { serverError 179 } + +SELECT 'ARRAY JOIN with column'; + +SELECT id, value, test_table.value_array FROM test_table ARRAY JOIN value_array; + +SELECT '--'; + +SELECT id, value_array, value FROM test_table ARRAY JOIN value_array AS value; + +SELECT '--'; + +SELECT id, value, value_array, value_array_element FROM test_table ARRAY JOIN value_array AS value_array_element; + +SELECT '--'; + +SELECT id, value, value_array AS value_array_array_alias FROM test_table ARRAY JOIN value_array_array_alias; + +SELECT '--'; + +SELECT id AS value FROM test_table ARRAY JOIN value_array AS value; -- { serverError 179 } + +SELECT '--'; + +SELECT id, value, value_array AS value_array_array_alias, value_array_array_alias_element FROM test_table ARRAY JOIN value_array_array_alias AS value_array_array_alias_element; + +SELECT '--'; + +SELECT id, value, value_array_array, value_array_array_inner_element, value_array_array_inner_element, value_array_array_inner_inner_element +FROM test_table ARRAY JOIN value_array_array AS value_array_array_inner_element +ARRAY JOIN value_array_array_inner_element AS value_array_array_inner_inner_element; + +-- { echoOff } + +DROP TABLE test_table; diff --git a/tests/queries/0_stateless/02374_analyzer_join_using.reference b/tests/queries/0_stateless/02374_analyzer_join_using.reference new file mode 100644 index 00000000000..62750c33f89 --- /dev/null +++ b/tests/queries/0_stateless/02374_analyzer_join_using.reference @@ -0,0 +1,452 @@ +-- { echoOn } + +SELECT 'JOIN INNER'; +JOIN INNER +SELECT id AS using_id, toTypeName(using_id), t1.id AS t1_id, toTypeName(t1_id), t1.value AS t1_value, toTypeName(t1_value), +t2.id AS t2_id, toTypeName(t2_id), t2.value AS t2_value, toTypeName(t2_value) +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 USING (id); +0 UInt16 0 UInt16 Join_1_Value_0 String 0 UInt16 Join_2_Value_0 String +1 UInt16 1 UInt16 Join_1_Value_1 String 1 UInt16 Join_2_Value_1 String +SELECT '--'; +-- +SELECT t1.value AS t1_value, toTypeName(t1_value), t2.value AS t2_value, toTypeName(t2_value) +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 USING (id); +Join_1_Value_0 String Join_2_Value_0 String +Join_1_Value_1 String Join_2_Value_1 String +SELECT '--'; +-- +SELECT 1 FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 USING (id); +1 +1 +SELECT t1.value AS t1_value, toTypeName(t1_value), t2.value AS t2_value, toTypeName(t2_value) +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 USING (test_value); -- { serverError 47 } +SELECT 'JOIN LEFT'; +JOIN LEFT +SELECT id AS using_id, toTypeName(using_id), t1.id AS t1_id, toTypeName(t1_id), t1.value AS t1_value, toTypeName(t1_value), +t2.id AS t2_id, toTypeName(t2_id), t2.value AS t2_value, toTypeName(t2_value) +FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 USING (id); +0 UInt16 0 UInt16 Join_1_Value_0 String 0 UInt16 Join_2_Value_0 String +1 UInt16 1 UInt16 Join_1_Value_1 String 1 UInt16 Join_2_Value_1 String +2 UInt16 2 UInt16 Join_1_Value_2 String 0 UInt16 String +SELECT '--'; +-- +SELECT t1.value AS t1_value, toTypeName(t1_value), t2.value AS t2_value, toTypeName(t2_value) +FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 USING (id); +Join_1_Value_0 String Join_2_Value_0 String +Join_1_Value_1 String Join_2_Value_1 String +Join_1_Value_2 String String +SELECT '--'; +-- +SELECT 1 FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 USING (id); +1 +1 +1 +SELECT t1.value AS t1_value, toTypeName(t1_value), t2.value AS t2_value, toTypeName(t2_value) +FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 USING (test_value); -- { serverError 47 } +SELECT 'JOIN RIGHT'; +JOIN RIGHT +SELECT id AS using_id, toTypeName(using_id), t1.id AS t1_id, toTypeName(t1_id), t1.value AS t1_value, toTypeName(t1_value), +t2.id AS t2_id, toTypeName(t2_id), t2.value AS t2_value, toTypeName(t2_value) +FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 USING (id); +0 UInt16 0 UInt16 Join_1_Value_0 String 0 UInt16 Join_2_Value_0 String +1 UInt16 1 UInt16 Join_1_Value_1 String 1 UInt16 Join_2_Value_1 String +3 UInt16 0 UInt16 String 3 UInt16 Join_2_Value_3 String +SELECT '--'; +-- +SELECT t1.value AS t1_value, toTypeName(t1_value), t2.value AS t2_value, toTypeName(t2_value) +FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 USING (id); +Join_1_Value_0 String Join_2_Value_0 String +Join_1_Value_1 String Join_2_Value_1 String + String Join_2_Value_3 String +SELECT '--'; +-- +SELECT 1 FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 USING (id); +1 +1 +1 +SELECT t1.value AS t1_value, toTypeName(t1_value), t2.value AS t2_value, toTypeName(t2_value) +FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 USING (test_value); -- { serverError 47 } +SELECT 'JOIN FULL'; +JOIN FULL +SELECT id AS using_id, toTypeName(using_id), t1.id AS t1_id, toTypeName(t1_id), t1.value AS t1_value, toTypeName(t1_value), +t2.id AS t2_id, toTypeName(t2_id), t2.value AS t2_value, toTypeName(t2_value) +FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 USING (id); +0 UInt16 0 UInt16 Join_1_Value_0 String 0 UInt16 Join_2_Value_0 String +1 UInt16 1 UInt16 Join_1_Value_1 String 1 UInt16 Join_2_Value_1 String +2 UInt16 2 UInt16 Join_1_Value_2 String 0 UInt16 String +0 UInt16 0 UInt16 String 3 UInt16 Join_2_Value_3 String +SELECT '--'; +-- +SELECT t1.value AS t1_value, toTypeName(t1_value), t2.value AS t2_value, toTypeName(t2_value) +FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 USING (id); +Join_1_Value_0 String Join_2_Value_0 String +Join_1_Value_1 String Join_2_Value_1 String +Join_1_Value_2 String String + String Join_2_Value_3 String +SELECT '--'; +-- +SELECT 1 FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 USING (id); +1 +1 +1 +1 +SELECT t1.value AS t1_value, toTypeName(t1_value), t2.value AS t2_value, toTypeName(t2_value) +FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 USING (test_value); -- { serverError 47 } +SELECT 'First JOIN INNER second JOIN INNER'; +First JOIN INNER second JOIN INNER +SELECT id AS using_id, toTypeName(using_id), t1.id AS t1_id, toTypeName(t1_id), t1.value AS t1_value, toTypeName(t1_value), +t2.id AS t2_id, toTypeName(t2_id), t2.value AS t2_value, toTypeName(t2_value), t3.id AS t3_id, toTypeName(t3_id), t3.value AS t3_value, toTypeName(t3_value) +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 USING (id) INNER JOIN test_table_join_3 AS t3 USING(id); +0 UInt64 0 UInt64 Join_1_Value_0 String 0 UInt64 Join_2_Value_0 String 0 UInt64 Join_3_Value_0 String +1 UInt64 1 UInt64 Join_1_Value_1 String 1 UInt64 Join_2_Value_1 String 1 UInt64 Join_3_Value_1 String +SELECT '--'; +-- +SELECT t1.value AS t1_value, toTypeName(t1_value), t2.value AS t2_value, toTypeName(t2_value), t3.value AS t3_value, toTypeName(t3_value) +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 USING (id) INNER JOIN test_table_join_3 AS t3 USING(id); +Join_1_Value_0 String Join_2_Value_0 String Join_3_Value_0 String +Join_1_Value_1 String Join_2_Value_1 String Join_3_Value_1 String +SELECT '--'; +-- +SELECT 1 FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 USING (id) INNER JOIN test_table_join_3 AS t3 USING(id); +1 +1 +SELECT id FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id INNER JOIN test_table_join_3 AS t3 USING (id); -- { serverError 207 } +SELECT 'First JOIN INNER second JOIN LEFT'; +First JOIN INNER second JOIN LEFT +SELECT id AS using_id, toTypeName(using_id), t1.id AS t1_id, toTypeName(t1_id), t1.value AS t1_value, toTypeName(t1_value), +t2.id AS t2_id, toTypeName(t2_id), t2.value AS t2_value, toTypeName(t2_value), t3.id AS t3_id, toTypeName(t3_id), t3.value AS t3_value, toTypeName(t3_value) +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 USING (id) LEFT JOIN test_table_join_3 AS t3 USING(id); +0 UInt64 0 UInt64 Join_1_Value_0 String 0 UInt64 Join_2_Value_0 String 0 UInt64 Join_3_Value_0 String +1 UInt64 1 UInt64 Join_1_Value_1 String 1 UInt64 Join_2_Value_1 String 1 UInt64 Join_3_Value_1 String +SELECT '--'; +-- +SELECT t1.value AS t1_value, toTypeName(t1_value), t2.value AS t2_value, toTypeName(t2_value), t3.value AS t3_value, toTypeName(t3_value) +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 USING (id) LEFT JOIN test_table_join_3 AS t3 USING(id); +Join_1_Value_0 String Join_2_Value_0 String Join_3_Value_0 String +Join_1_Value_1 String Join_2_Value_1 String Join_3_Value_1 String +SELECT '--'; +-- +SELECT 1 FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 USING (id) LEFT JOIN test_table_join_3 AS t3 USING(id); +1 +1 +SELECT id FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id LEFT JOIN test_table_join_3 AS t3 USING (id); -- { serverError 207 } +SELECT 'First JOIN INNER second JOIN RIGHT'; +First JOIN INNER second JOIN RIGHT +SELECT id AS using_id, toTypeName(using_id), t1.id AS t1_id, toTypeName(t1_id), t1.value AS t1_value, toTypeName(t1_value), +t2.id AS t2_id, toTypeName(t2_id), t2.value AS t2_value, toTypeName(t2_value), t3.id AS t3_id, toTypeName(t3_id), t3.value AS t3_value, toTypeName(t3_value) +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 USING (id) RIGHT JOIN test_table_join_3 AS t3 USING(id); +0 UInt64 0 UInt64 Join_1_Value_0 String 0 UInt64 Join_2_Value_0 String 0 UInt64 Join_3_Value_0 String +1 UInt64 1 UInt64 Join_1_Value_1 String 1 UInt64 Join_2_Value_1 String 1 UInt64 Join_3_Value_1 String +4 UInt64 0 UInt64 String 0 UInt64 String 4 UInt64 Join_3_Value_4 String +SELECT '--'; +-- +SELECT t1.value AS t1_value, toTypeName(t1_value), t2.value AS t2_value, toTypeName(t2_value), t3.value AS t3_value, toTypeName(t3_value) +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 USING (id) RIGHT JOIN test_table_join_3 AS t3 USING(id); +Join_1_Value_0 String Join_2_Value_0 String Join_3_Value_0 String +Join_1_Value_1 String Join_2_Value_1 String Join_3_Value_1 String + String String Join_3_Value_4 String +SELECT '--'; +-- +SELECT 1 FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 USING (id) RIGHT JOIN test_table_join_3 AS t3 USING(id); +1 +1 +1 +SELECT id FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id RIGHT JOIN test_table_join_3 AS t3 USING (id); -- { serverError 207 } +SELECT 'First JOIN INNER second JOIN FULL'; +First JOIN INNER second JOIN FULL +SELECT id AS using_id, toTypeName(using_id), t1.id AS t1_id, toTypeName(t1_id), t1.value AS t1_value, toTypeName(t1_value), +t2.id AS t2_id, toTypeName(t2_id), t2.value AS t2_value, toTypeName(t2_value), t3.id AS t3_id, toTypeName(t3_id), t3.value AS t3_value, toTypeName(t3_value) +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 USING (id) FULL JOIN test_table_join_3 AS t3 USING(id); +0 UInt64 0 UInt64 Join_1_Value_0 String 0 UInt64 Join_2_Value_0 String 0 UInt64 Join_3_Value_0 String +1 UInt64 1 UInt64 Join_1_Value_1 String 1 UInt64 Join_2_Value_1 String 1 UInt64 Join_3_Value_1 String +0 UInt64 0 UInt64 String 0 UInt64 String 4 UInt64 Join_3_Value_4 String +SELECT '--'; +-- +SELECT t1.value AS t1_value, toTypeName(t1_value), t2.value AS t2_value, toTypeName(t2_value), t3.value AS t3_value, toTypeName(t3_value) +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 USING (id) FULL JOIN test_table_join_3 AS t3 USING(id); +Join_1_Value_0 String Join_2_Value_0 String Join_3_Value_0 String +Join_1_Value_1 String Join_2_Value_1 String Join_3_Value_1 String + String String Join_3_Value_4 String +SELECT '--'; +-- +SELECT 1 FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 USING (id) FULL JOIN test_table_join_3 AS t3 USING(id); +1 +1 +1 +SELECT id FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id FULL JOIN test_table_join_3 AS t3 USING (id); -- { serverError 207 } +SELECT 'First JOIN LEFT second JOIN INNER'; +First JOIN LEFT second JOIN INNER +SELECT id AS using_id, toTypeName(using_id), t1.id AS t1_id, toTypeName(t1_id), t1.value AS t1_value, toTypeName(t1_value), +t2.id AS t2_id, toTypeName(t2_id), t2.value AS t2_value, toTypeName(t2_value), t3.id AS t3_id, toTypeName(t3_id), t3.value AS t3_value, toTypeName(t3_value) +FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 USING (id) INNER JOIN test_table_join_3 AS t3 USING(id); +0 UInt64 0 UInt64 Join_1_Value_0 String 0 UInt64 Join_2_Value_0 String 0 UInt64 Join_3_Value_0 String +1 UInt64 1 UInt64 Join_1_Value_1 String 1 UInt64 Join_2_Value_1 String 1 UInt64 Join_3_Value_1 String +SELECT '--'; +-- +SELECT t1.value AS t1_value, toTypeName(t1_value), t2.value AS t2_value, toTypeName(t2_value), t3.value AS t3_value, toTypeName(t3_value) +FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 USING (id) INNER JOIN test_table_join_3 AS t3 USING(id); +Join_1_Value_0 String Join_2_Value_0 String Join_3_Value_0 String +Join_1_Value_1 String Join_2_Value_1 String Join_3_Value_1 String +SELECT '--'; +-- +SELECT 1 FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 USING (id) INNER JOIN test_table_join_3 AS t3 USING(id); +1 +1 +SELECT id FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON t1.id = t2.id INNER JOIN test_table_join_3 AS t3 USING (id); -- { serverError 207 } +SELECT 'First JOIN LEFT second JOIN LEFT'; +First JOIN LEFT second JOIN LEFT +SELECT id AS using_id, toTypeName(using_id), t1.id AS t1_id, toTypeName(t1_id), t1.value AS t1_value, toTypeName(t1_value), +t2.id AS t2_id, toTypeName(t2_id), t2.value AS t2_value, toTypeName(t2_value), t3.id AS t3_id, toTypeName(t3_id), t3.value AS t3_value, toTypeName(t3_value) +FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 USING (id) LEFT JOIN test_table_join_3 AS t3 USING(id); +0 UInt64 0 UInt64 Join_1_Value_0 String 0 UInt64 Join_2_Value_0 String 0 UInt64 Join_3_Value_0 String +1 UInt64 1 UInt64 Join_1_Value_1 String 1 UInt64 Join_2_Value_1 String 1 UInt64 Join_3_Value_1 String +2 UInt64 2 UInt64 Join_1_Value_2 String 0 UInt64 String 0 UInt64 String +SELECT '--'; +-- +SELECT t1.value AS t1_value, toTypeName(t1_value), t2.value AS t2_value, toTypeName(t2_value), t3.value AS t3_value, toTypeName(t3_value) +FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 USING (id) LEFT JOIN test_table_join_3 AS t3 USING(id); +Join_1_Value_0 String Join_2_Value_0 String Join_3_Value_0 String +Join_1_Value_1 String Join_2_Value_1 String Join_3_Value_1 String +Join_1_Value_2 String String String +SELECT '--'; +-- +SELECT 1 FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 USING (id) LEFT JOIN test_table_join_3 AS t3 USING(id); +1 +1 +1 +SELECT id FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON t1.id = t2.id LEFT JOIN test_table_join_3 AS t3 USING (id); -- { serverError 207 } +SELECT 'First JOIN LEFT second JOIN RIGHT'; +First JOIN LEFT second JOIN RIGHT +SELECT id AS using_id, toTypeName(using_id), t1.id AS t1_id, toTypeName(t1_id), t1.value AS t1_value, toTypeName(t1_value), +t2.id AS t2_id, toTypeName(t2_id), t2.value AS t2_value, toTypeName(t2_value), t3.id AS t3_id, toTypeName(t3_id), t3.value AS t3_value, toTypeName(t3_value) +FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 USING (id) RIGHT JOIN test_table_join_3 AS t3 USING(id); +0 UInt64 0 UInt64 Join_1_Value_0 String 0 UInt64 Join_2_Value_0 String 0 UInt64 Join_3_Value_0 String +1 UInt64 1 UInt64 Join_1_Value_1 String 1 UInt64 Join_2_Value_1 String 1 UInt64 Join_3_Value_1 String +4 UInt64 0 UInt64 String 0 UInt64 String 4 UInt64 Join_3_Value_4 String +SELECT '--'; +-- +SELECT t1.value AS t1_value, toTypeName(t1_value), t2.value AS t2_value, toTypeName(t2_value), t3.value AS t3_value, toTypeName(t3_value) +FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 USING (id) RIGHT JOIN test_table_join_3 AS t3 USING(id); +Join_1_Value_0 String Join_2_Value_0 String Join_3_Value_0 String +Join_1_Value_1 String Join_2_Value_1 String Join_3_Value_1 String + String String Join_3_Value_4 String +SELECT '--'; +-- +SELECT 1 FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 USING (id) RIGHT JOIN test_table_join_3 AS t3 USING(id); +1 +1 +1 +SELECT id FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON t1.id = t2.id RIGHT JOIN test_table_join_3 AS t3 USING (id); -- { serverError 207 } +SELECT 'First JOIN LEFT second JOIN FULL'; +First JOIN LEFT second JOIN FULL +SELECT id AS using_id, toTypeName(using_id), t1.id AS t1_id, toTypeName(t1_id), t1.value AS t1_value, toTypeName(t1_value), +t2.id AS t2_id, toTypeName(t2_id), t2.value AS t2_value, toTypeName(t2_value), t3.id AS t3_id, toTypeName(t3_id), t3.value AS t3_value, toTypeName(t3_value) +FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 USING (id) FULL JOIN test_table_join_3 AS t3 USING(id); +0 UInt64 0 UInt64 Join_1_Value_0 String 0 UInt64 Join_2_Value_0 String 0 UInt64 Join_3_Value_0 String +1 UInt64 1 UInt64 Join_1_Value_1 String 1 UInt64 Join_2_Value_1 String 1 UInt64 Join_3_Value_1 String +2 UInt64 2 UInt64 Join_1_Value_2 String 0 UInt64 String 0 UInt64 String +0 UInt64 0 UInt64 String 0 UInt64 String 4 UInt64 Join_3_Value_4 String +SELECT '--'; +-- +SELECT t1.value AS t1_value, toTypeName(t1_value), t2.value AS t2_value, toTypeName(t2_value), t3.value AS t3_value, toTypeName(t3_value) +FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 USING (id) FULL JOIN test_table_join_3 AS t3 USING(id); +Join_1_Value_0 String Join_2_Value_0 String Join_3_Value_0 String +Join_1_Value_1 String Join_2_Value_1 String Join_3_Value_1 String +Join_1_Value_2 String String String + String String Join_3_Value_4 String +SELECT '--'; +-- +SELECT 1 FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 USING (id) FULL JOIN test_table_join_3 AS t3 USING(id); +1 +1 +1 +1 +SELECT id FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON t1.id = t2.id FULL JOIN test_table_join_3 AS t3 USING (id); -- { serverError 207 } +SELECT 'First JOIN RIGHT second JOIN INNER'; +First JOIN RIGHT second JOIN INNER +SELECT id AS using_id, toTypeName(using_id), t1.id AS t1_id, toTypeName(t1_id), t1.value AS t1_value, toTypeName(t1_value), +t2.id AS t2_id, toTypeName(t2_id), t2.value AS t2_value, toTypeName(t2_value), t3.id AS t3_id, toTypeName(t3_id), t3.value AS t3_value, toTypeName(t3_value) +FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 USING (id) INNER JOIN test_table_join_3 AS t3 USING(id); +0 UInt64 0 UInt64 Join_1_Value_0 String 0 UInt64 Join_2_Value_0 String 0 UInt64 Join_3_Value_0 String +1 UInt64 1 UInt64 Join_1_Value_1 String 1 UInt64 Join_2_Value_1 String 1 UInt64 Join_3_Value_1 String +SELECT '--'; +-- +SELECT t1.value AS t1_value, toTypeName(t1_value), t2.value AS t2_value, toTypeName(t2_value), t3.value AS t3_value, toTypeName(t3_value) +FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 USING (id) INNER JOIN test_table_join_3 AS t3 USING(id); +Join_1_Value_0 String Join_2_Value_0 String Join_3_Value_0 String +Join_1_Value_1 String Join_2_Value_1 String Join_3_Value_1 String +SELECT '--'; +-- +SELECT 1 FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 USING (id) INNER JOIN test_table_join_3 AS t3 USING(id); +1 +1 +SELECT id FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON t1.id = t2.id INNER JOIN test_table_join_3 AS t3 USING (id); -- { serverError 207 } +SELECT 'First JOIN RIGHT second JOIN LEFT'; +First JOIN RIGHT second JOIN LEFT +SELECT id AS using_id, toTypeName(using_id), t1.id AS t1_id, toTypeName(t1_id), t1.value AS t1_value, toTypeName(t1_value), +t2.id AS t2_id, toTypeName(t2_id), t2.value AS t2_value, toTypeName(t2_value), t3.id AS t3_id, toTypeName(t3_id), t3.value AS t3_value, toTypeName(t3_value) +FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 USING (id) LEFT JOIN test_table_join_3 AS t3 USING(id); +0 UInt64 0 UInt64 Join_1_Value_0 String 0 UInt64 Join_2_Value_0 String 0 UInt64 Join_3_Value_0 String +1 UInt64 1 UInt64 Join_1_Value_1 String 1 UInt64 Join_2_Value_1 String 1 UInt64 Join_3_Value_1 String +3 UInt64 0 UInt64 String 3 UInt64 Join_2_Value_3 String 0 UInt64 String +SELECT '--'; +-- +SELECT t1.value AS t1_value, toTypeName(t1_value), t2.value AS t2_value, toTypeName(t2_value), t3.value AS t3_value, toTypeName(t3_value) +FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 USING (id) LEFT JOIN test_table_join_3 AS t3 USING(id); +Join_1_Value_0 String Join_2_Value_0 String Join_3_Value_0 String +Join_1_Value_1 String Join_2_Value_1 String Join_3_Value_1 String + String Join_2_Value_3 String String +SELECT '--'; +-- +SELECT 1 FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 USING (id) LEFT JOIN test_table_join_3 AS t3 USING(id); +1 +1 +1 +SELECT id FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON t1.id = t2.id LEFT JOIN test_table_join_3 AS t3 USING (id); -- { serverError 207 } +SELECT 'First JOIN RIGHT second JOIN RIGHT'; +First JOIN RIGHT second JOIN RIGHT +SELECT id AS using_id, toTypeName(using_id), t1.id AS t1_id, toTypeName(t1_id), t1.value AS t1_value, toTypeName(t1_value), +t2.id AS t2_id, toTypeName(t2_id), t2.value AS t2_value, toTypeName(t2_value), t3.id AS t3_id, toTypeName(t3_id), t3.value AS t3_value, toTypeName(t3_value) +FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 USING (id) RIGHT JOIN test_table_join_3 AS t3 USING(id); +0 UInt64 0 UInt64 Join_1_Value_0 String 0 UInt64 Join_2_Value_0 String 0 UInt64 Join_3_Value_0 String +1 UInt64 1 UInt64 Join_1_Value_1 String 1 UInt64 Join_2_Value_1 String 1 UInt64 Join_3_Value_1 String +4 UInt64 0 UInt64 String 0 UInt64 String 4 UInt64 Join_3_Value_4 String +SELECT '--'; +-- +SELECT t1.value AS t1_value, toTypeName(t1_value), t2.value AS t2_value, toTypeName(t2_value), t3.value AS t3_value, toTypeName(t3_value) +FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 USING (id) RIGHT JOIN test_table_join_3 AS t3 USING(id); +Join_1_Value_0 String Join_2_Value_0 String Join_3_Value_0 String +Join_1_Value_1 String Join_2_Value_1 String Join_3_Value_1 String + String String Join_3_Value_4 String +SELECT '--'; +-- +SELECT 1 FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 USING (id) RIGHT JOIN test_table_join_3 AS t3 USING(id); +1 +1 +1 +SELECT id FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON t1.id = t2.id RIGHT JOIN test_table_join_3 AS t3 USING (id); -- { serverError 207 } +SELECT 'First JOIN RIGHT second JOIN FULL'; +First JOIN RIGHT second JOIN FULL +SELECT id AS using_id, toTypeName(using_id), t1.id AS t1_id, toTypeName(t1_id), t1.value AS t1_value, toTypeName(t1_value), +t2.id AS t2_id, toTypeName(t2_id), t2.value AS t2_value, toTypeName(t2_value), t3.id AS t3_id, toTypeName(t3_id), t3.value AS t3_value, toTypeName(t3_value) +FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 USING (id) FULL JOIN test_table_join_3 AS t3 USING(id); +0 UInt64 0 UInt64 Join_1_Value_0 String 0 UInt64 Join_2_Value_0 String 0 UInt64 Join_3_Value_0 String +1 UInt64 1 UInt64 Join_1_Value_1 String 1 UInt64 Join_2_Value_1 String 1 UInt64 Join_3_Value_1 String +3 UInt64 0 UInt64 String 3 UInt64 Join_2_Value_3 String 0 UInt64 String +0 UInt64 0 UInt64 String 0 UInt64 String 4 UInt64 Join_3_Value_4 String +SELECT '--'; +-- +SELECT t1.value AS t1_value, toTypeName(t1_value), t2.value AS t2_value, toTypeName(t2_value), t3.value AS t3_value, toTypeName(t3_value) +FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 USING (id) FULL JOIN test_table_join_3 AS t3 USING(id); +Join_1_Value_0 String Join_2_Value_0 String Join_3_Value_0 String +Join_1_Value_1 String Join_2_Value_1 String Join_3_Value_1 String + String Join_2_Value_3 String String + String String Join_3_Value_4 String +SELECT '--'; +-- +SELECT 1 FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 USING (id) FULL JOIN test_table_join_3 AS t3 USING(id); +1 +1 +1 +1 +SELECT id FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON t1.id = t2.id FULL JOIN test_table_join_3 AS t3 USING (id); -- { serverError 207 } +SELECT 'First JOIN FULL second JOIN INNER'; +First JOIN FULL second JOIN INNER +SELECT id AS using_id, toTypeName(using_id), t1.id AS t1_id, toTypeName(t1_id), t1.value AS t1_value, toTypeName(t1_value), +t2.id AS t2_id, toTypeName(t2_id), t2.value AS t2_value, toTypeName(t2_value), t3.id AS t3_id, toTypeName(t3_id), t3.value AS t3_value, toTypeName(t3_value) +FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 USING (id) INNER JOIN test_table_join_3 AS t3 USING(id); +0 UInt64 0 UInt64 Join_1_Value_0 String 0 UInt64 Join_2_Value_0 String 0 UInt64 Join_3_Value_0 String +1 UInt64 1 UInt64 Join_1_Value_1 String 1 UInt64 Join_2_Value_1 String 1 UInt64 Join_3_Value_1 String +0 UInt64 0 UInt64 String 3 UInt64 Join_2_Value_3 String 0 UInt64 Join_3_Value_0 String +SELECT '--'; +-- +SELECT t1.value AS t1_value, toTypeName(t1_value), t2.value AS t2_value, toTypeName(t2_value), t3.value AS t3_value, toTypeName(t3_value) +FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 USING (id) INNER JOIN test_table_join_3 AS t3 USING(id); +Join_1_Value_0 String Join_2_Value_0 String Join_3_Value_0 String +Join_1_Value_1 String Join_2_Value_1 String Join_3_Value_1 String + String Join_2_Value_3 String Join_3_Value_0 String +SELECT '--'; +-- +SELECT 1 FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 USING (id) INNER JOIN test_table_join_3 AS t3 USING(id); +1 +1 +1 +SELECT id FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON t1.id = t2.id INNER JOIN test_table_join_3 AS t3 USING (id); -- { serverError 207 } +SELECT 'First JOIN FULL second JOIN LEFT'; +First JOIN FULL second JOIN LEFT +SELECT id AS using_id, toTypeName(using_id), t1.id AS t1_id, toTypeName(t1_id), t1.value AS t1_value, toTypeName(t1_value), +t2.id AS t2_id, toTypeName(t2_id), t2.value AS t2_value, toTypeName(t2_value), t3.id AS t3_id, toTypeName(t3_id), t3.value AS t3_value, toTypeName(t3_value) +FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 USING (id) LEFT JOIN test_table_join_3 AS t3 USING(id); +0 UInt64 0 UInt64 Join_1_Value_0 String 0 UInt64 Join_2_Value_0 String 0 UInt64 Join_3_Value_0 String +1 UInt64 1 UInt64 Join_1_Value_1 String 1 UInt64 Join_2_Value_1 String 1 UInt64 Join_3_Value_1 String +2 UInt64 2 UInt64 Join_1_Value_2 String 0 UInt64 String 0 UInt64 String +0 UInt64 0 UInt64 String 3 UInt64 Join_2_Value_3 String 0 UInt64 Join_3_Value_0 String +SELECT '--'; +-- +SELECT t1.value AS t1_value, toTypeName(t1_value), t2.value AS t2_value, toTypeName(t2_value), t3.value AS t3_value, toTypeName(t3_value) +FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 USING (id) LEFT JOIN test_table_join_3 AS t3 USING(id); +Join_1_Value_0 String Join_2_Value_0 String Join_3_Value_0 String +Join_1_Value_1 String Join_2_Value_1 String Join_3_Value_1 String +Join_1_Value_2 String String String + String Join_2_Value_3 String Join_3_Value_0 String +SELECT '--'; +-- +SELECT 1 FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 USING (id) LEFT JOIN test_table_join_3 AS t3 USING(id); +1 +1 +1 +1 +SELECT id FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON t1.id = t2.id LEFT JOIN test_table_join_3 AS t3 USING (id); -- { serverError 207 } +SELECT 'First JOIN FULL second JOIN RIGHT'; +First JOIN FULL second JOIN RIGHT +SELECT id AS using_id, toTypeName(using_id), t1.id AS t1_id, toTypeName(t1_id), t1.value AS t1_value, toTypeName(t1_value), +t2.id AS t2_id, toTypeName(t2_id), t2.value AS t2_value, toTypeName(t2_value), t3.id AS t3_id, toTypeName(t3_id), t3.value AS t3_value, toTypeName(t3_value) +FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 USING (id) RIGHT JOIN test_table_join_3 AS t3 USING(id); +0 UInt64 0 UInt64 Join_1_Value_0 String 0 UInt64 Join_2_Value_0 String 0 UInt64 Join_3_Value_0 String +1 UInt64 1 UInt64 Join_1_Value_1 String 1 UInt64 Join_2_Value_1 String 1 UInt64 Join_3_Value_1 String +0 UInt64 0 UInt64 String 3 UInt64 Join_2_Value_3 String 0 UInt64 Join_3_Value_0 String +4 UInt64 0 UInt64 String 0 UInt64 String 4 UInt64 Join_3_Value_4 String +SELECT '--'; +-- +SELECT t1.value AS t1_value, toTypeName(t1_value), t2.value AS t2_value, toTypeName(t2_value), t3.value AS t3_value, toTypeName(t3_value) +FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 USING (id) RIGHT JOIN test_table_join_3 AS t3 USING(id); +Join_1_Value_0 String Join_2_Value_0 String Join_3_Value_0 String +Join_1_Value_1 String Join_2_Value_1 String Join_3_Value_1 String + String Join_2_Value_3 String Join_3_Value_0 String + String String Join_3_Value_4 String +SELECT '--'; +-- +SELECT 1 FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 USING (id) RIGHT JOIN test_table_join_3 AS t3 USING(id); +1 +1 +1 +1 +SELECT id FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON t1.id = t2.id RIGHT JOIN test_table_join_3 AS t3 USING (id); -- { serverError 207 } +SELECT 'First JOIN FULL second JOIN FULL'; +First JOIN FULL second JOIN FULL +SELECT id AS using_id, toTypeName(using_id), t1.id AS t1_id, toTypeName(t1_id), t1.value AS t1_value, toTypeName(t1_value), +t2.id AS t2_id, toTypeName(t2_id), t2.value AS t2_value, toTypeName(t2_value), t3.id AS t3_id, toTypeName(t3_id), t3.value AS t3_value, toTypeName(t3_value) +FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 USING (id) FULL JOIN test_table_join_3 AS t3 USING(id); +0 UInt64 0 UInt64 Join_1_Value_0 String 0 UInt64 Join_2_Value_0 String 0 UInt64 Join_3_Value_0 String +1 UInt64 1 UInt64 Join_1_Value_1 String 1 UInt64 Join_2_Value_1 String 1 UInt64 Join_3_Value_1 String +2 UInt64 2 UInt64 Join_1_Value_2 String 0 UInt64 String 0 UInt64 String +0 UInt64 0 UInt64 String 3 UInt64 Join_2_Value_3 String 0 UInt64 Join_3_Value_0 String +0 UInt64 0 UInt64 String 0 UInt64 String 4 UInt64 Join_3_Value_4 String +SELECT '--'; +-- +SELECT t1.value AS t1_value, toTypeName(t1_value), t2.value AS t2_value, toTypeName(t2_value), t3.value AS t3_value, toTypeName(t3_value) +FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 USING (id) FULL JOIN test_table_join_3 AS t3 USING(id); +Join_1_Value_0 String Join_2_Value_0 String Join_3_Value_0 String +Join_1_Value_1 String Join_2_Value_1 String Join_3_Value_1 String +Join_1_Value_2 String String String + String Join_2_Value_3 String Join_3_Value_0 String + String String Join_3_Value_4 String +SELECT '--'; +-- +SELECT 1 FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 USING (id) FULL JOIN test_table_join_3 AS t3 USING(id); +1 +1 +1 +1 +1 +SELECT id FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON t1.id = t2.id FULL JOIN test_table_join_3 AS t3 USING (id); -- { serverError 207 } diff --git a/tests/queries/0_stateless/02374_analyzer_join_using.sql.j2 b/tests/queries/0_stateless/02374_analyzer_join_using.sql.j2 new file mode 100644 index 00000000000..26fb52716ff --- /dev/null +++ b/tests/queries/0_stateless/02374_analyzer_join_using.sql.j2 @@ -0,0 +1,87 @@ +SET allow_experimental_analyzer = 1; + +DROP TABLE IF EXISTS test_table_join_1; +CREATE TABLE test_table_join_1 +( + id UInt8, + value String +) ENGINE = TinyLog; + +DROP TABLE IF EXISTS test_table_join_2; +CREATE TABLE test_table_join_2 +( + id UInt16, + value String +) ENGINE = TinyLog; + +DROP TABLE IF EXISTS test_table_join_3; +CREATE TABLE test_table_join_3 +( + id UInt64, + value String +) ENGINE = TinyLog; + +INSERT INTO test_table_join_1 VALUES (0, 'Join_1_Value_0'); +INSERT INTO test_table_join_1 VALUES (1, 'Join_1_Value_1'); +INSERT INTO test_table_join_1 VALUES (2, 'Join_1_Value_2'); + +INSERT INTO test_table_join_2 VALUES (0, 'Join_2_Value_0'); +INSERT INTO test_table_join_2 VALUES (1, 'Join_2_Value_1'); +INSERT INTO test_table_join_2 VALUES (3, 'Join_2_Value_3'); + +INSERT INTO test_table_join_3 VALUES (0, 'Join_3_Value_0'); +INSERT INTO test_table_join_3 VALUES (1, 'Join_3_Value_1'); +INSERT INTO test_table_join_3 VALUES (4, 'Join_3_Value_4'); + +-- { echoOn } + +{% for join_type in ['INNER', 'LEFT', 'RIGHT', 'FULL'] -%} + +SELECT 'JOIN {{ join_type }}'; + +SELECT id AS using_id, toTypeName(using_id), t1.id AS t1_id, toTypeName(t1_id), t1.value AS t1_value, toTypeName(t1_value), +t2.id AS t2_id, toTypeName(t2_id), t2.value AS t2_value, toTypeName(t2_value) +FROM test_table_join_1 AS t1 {{ join_type }} JOIN test_table_join_2 AS t2 USING (id); + +SELECT '--'; + +SELECT t1.value AS t1_value, toTypeName(t1_value), t2.value AS t2_value, toTypeName(t2_value) +FROM test_table_join_1 AS t1 {{ join_type }} JOIN test_table_join_2 AS t2 USING (id); + +SELECT '--'; + +SELECT 1 FROM test_table_join_1 AS t1 {{ join_type }} JOIN test_table_join_2 AS t2 USING (id); + +SELECT t1.value AS t1_value, toTypeName(t1_value), t2.value AS t2_value, toTypeName(t2_value) +FROM test_table_join_1 AS t1 {{ join_type }} JOIN test_table_join_2 AS t2 USING (test_value); -- { serverError 47 } + +{% endfor %} + +{% for first_join_type in ['INNER', 'LEFT', 'RIGHT', 'FULL'] -%} +{% for second_join_type in ['INNER', 'LEFT', 'RIGHT', 'FULL'] -%} + +SELECT 'First JOIN {{ first_join_type }} second JOIN {{ second_join_type }}'; + +SELECT id AS using_id, toTypeName(using_id), t1.id AS t1_id, toTypeName(t1_id), t1.value AS t1_value, toTypeName(t1_value), +t2.id AS t2_id, toTypeName(t2_id), t2.value AS t2_value, toTypeName(t2_value), t3.id AS t3_id, toTypeName(t3_id), t3.value AS t3_value, toTypeName(t3_value) +FROM test_table_join_1 AS t1 {{ first_join_type }} JOIN test_table_join_2 AS t2 USING (id) {{ second_join_type }} JOIN test_table_join_3 AS t3 USING(id); + +SELECT '--'; + +SELECT t1.value AS t1_value, toTypeName(t1_value), t2.value AS t2_value, toTypeName(t2_value), t3.value AS t3_value, toTypeName(t3_value) +FROM test_table_join_1 AS t1 {{ first_join_type }} JOIN test_table_join_2 AS t2 USING (id) {{ second_join_type }} JOIN test_table_join_3 AS t3 USING(id); + +SELECT '--'; + +SELECT 1 FROM test_table_join_1 AS t1 {{ first_join_type }} JOIN test_table_join_2 AS t2 USING (id) {{ second_join_type }} JOIN test_table_join_3 AS t3 USING(id); + +SELECT id FROM test_table_join_1 AS t1 {{ first_join_type }} JOIN test_table_join_2 AS t2 ON t1.id = t2.id {{ second_join_type }} JOIN test_table_join_3 AS t3 USING (id); -- { serverError 207 } + +{% endfor %} +{% endfor %} + +-- { echoOff } + +DROP TABLE test_table_join_1; +DROP TABLE test_table_join_2; +DROP TABLE test_table_join_3; diff --git a/tests/queries/0_stateless/02375_analyzer_union.reference b/tests/queries/0_stateless/02375_analyzer_union.reference new file mode 100644 index 00000000000..199b9af5313 --- /dev/null +++ b/tests/queries/0_stateless/02375_analyzer_union.reference @@ -0,0 +1,62 @@ +-- { echoOn } + +SELECT 'Union constants'; +Union constants +SELECT 1 UNION ALL SELECT 1; +1 +1 +SELECT '--'; +-- +SELECT 1 UNION DISTINCT SELECT 1 UNION ALL SELECT 1; +1 +1 +SELECT '--'; +-- +SELECT 1 INTERSECT SELECT 1; +1 +SELECT '--'; +-- +SELECT 1 EXCEPT SELECT 1; +SELECT '--'; +-- +SELECT id FROM (SELECT 1 AS id UNION ALL SELECT 1); +1 +1 +SELECT 'Union non constants'; +Union non constants +SELECT value FROM (SELECT 1 as value UNION ALL SELECT 1 UNION ALL SELECT 1); +1 +1 +1 +SELECT '--'; +-- +SELECT id FROM test_table UNION ALL SELECT id FROM test_table; +0 +0 +SELECT '--'; +-- +SELECT id FROM test_table UNION DISTINCT SELECT id FROM test_table; +0 +SELECT '--'; +-- +SELECT id FROM test_table INTERSECT SELECT id FROM test_table; +0 +SELECT '--'; +-- +SELECT id FROM test_table EXCEPT SELECT id FROM test_table; +SELECT '--'; +-- +SELECT id FROM (SELECT id FROM test_table UNION ALL SELECT id FROM test_table); +0 +0 +SELECT '--'; +-- +SELECT id FROM (SELECT id FROM test_table UNION DISTINCT SELECT id FROM test_table); +0 +SELECT '--'; +-- +SELECT id FROM (SELECT id FROM test_table INTERSECT SELECT id FROM test_table); +0 +SELECT '--'; +-- +SELECT id FROM (SELECT id FROM test_table EXCEPT SELECT id FROM test_table); diff --git a/tests/queries/0_stateless/02375_analyzer_union.sql b/tests/queries/0_stateless/02375_analyzer_union.sql new file mode 100644 index 00000000000..5e41f07d217 --- /dev/null +++ b/tests/queries/0_stateless/02375_analyzer_union.sql @@ -0,0 +1,71 @@ +SET allow_experimental_analyzer = 0; + +DROP TABLE IF EXISTS test_table; +CREATE TABLE test_table +( + id UInt64, + value String +) ENGINE=TinyLog; + +INSERT INTO test_table VALUES (0, 'Value'); + +-- { echoOn } + +SELECT 'Union constants'; + +SELECT 1 UNION ALL SELECT 1; + +SELECT '--'; + +SELECT 1 UNION DISTINCT SELECT 1 UNION ALL SELECT 1; + +SELECT '--'; + +SELECT 1 INTERSECT SELECT 1; + +SELECT '--'; + +SELECT 1 EXCEPT SELECT 1; + +SELECT '--'; + +SELECT id FROM (SELECT 1 AS id UNION ALL SELECT 1); + +SELECT 'Union non constants'; + +SELECT value FROM (SELECT 1 as value UNION ALL SELECT 1 UNION ALL SELECT 1); + +SELECT '--'; + +SELECT id FROM test_table UNION ALL SELECT id FROM test_table; + +SELECT '--'; + +SELECT id FROM test_table UNION DISTINCT SELECT id FROM test_table; + +SELECT '--'; + +SELECT id FROM test_table INTERSECT SELECT id FROM test_table; + +SELECT '--'; +SELECT id FROM test_table EXCEPT SELECT id FROM test_table; + +SELECT '--'; + +SELECT id FROM (SELECT id FROM test_table UNION ALL SELECT id FROM test_table); + +SELECT '--'; + +SELECT id FROM (SELECT id FROM test_table UNION DISTINCT SELECT id FROM test_table); + +SELECT '--'; + +SELECT id FROM (SELECT id FROM test_table INTERSECT SELECT id FROM test_table); + +SELECT '--'; + +SELECT id FROM (SELECT id FROM test_table EXCEPT SELECT id FROM test_table); + +-- { echoOff } + +DROP TABLE test_table; diff --git a/tests/queries/0_stateless/02376_analyzer_in_function_subquery.reference b/tests/queries/0_stateless/02376_analyzer_in_function_subquery.reference new file mode 100644 index 00000000000..3641c7d2f09 --- /dev/null +++ b/tests/queries/0_stateless/02376_analyzer_in_function_subquery.reference @@ -0,0 +1,43 @@ +-- { echoOn } + +SELECT id, value FROM test_table WHERE 1 IN (SELECT 1); +0 Value_0 +1 Value_1 +2 Value_2 +SELECT '--'; +-- +SELECT id, value FROM test_table WHERE 0 IN (SELECT 1); +SELECT '--'; +-- +SELECT id, value FROM test_table WHERE id IN (SELECT 1); +1 Value_1 +SELECT '--'; +-- +SELECT id, value FROM test_table WHERE id IN (SELECT 2); +2 Value_2 +SELECT '--'; +-- +SELECT id, value FROM test_table WHERE id IN test_table_for_in; +0 Value_0 +1 Value_1 +SELECT '--'; +-- +SELECT id, value FROM test_table WHERE id IN (SELECT id FROM test_table_for_in); +0 Value_0 +1 Value_1 +SELECT '--'; +-- +SELECT id, value FROM test_table WHERE id IN (SELECT id FROM test_table_for_in UNION DISTINCT SELECT id FROM test_table_for_in); +0 Value_0 +1 Value_1 +SELECT '--'; +-- +WITH cte_test_table_for_in AS (SELECT id FROM test_table_for_in) SELECT id, value FROM test_table WHERE id IN cte_test_table_for_in; +0 Value_0 +1 Value_1 +SELECT '--'; +-- +WITH cte_test_table_for_in AS (SELECT id FROM test_table_for_in) SELECT id, value +FROM test_table WHERE id IN (SELECT id FROM cte_test_table_for_in UNION DISTINCT SELECT id FROM cte_test_table_for_in); +0 Value_0 +1 Value_1 diff --git a/tests/queries/0_stateless/02376_analyzer_in_function_subquery.sql b/tests/queries/0_stateless/02376_analyzer_in_function_subquery.sql new file mode 100644 index 00000000000..72a4edb8567 --- /dev/null +++ b/tests/queries/0_stateless/02376_analyzer_in_function_subquery.sql @@ -0,0 +1,60 @@ +SET allow_experimental_analyzer = 1; + +DROP TABLE IF EXISTS test_table; +CREATE TABLE test_table +( + id UInt64, + value String +) ENGINE=TinyLog; + +INSERT INTO test_table VALUES (0, 'Value_0'), (1, 'Value_1'), (2, 'Value_2'); + +DROP TABLE IF EXISTS test_table_for_in; +CREATE TABLE test_table_for_in +( + id UInt64 +) ENGINE=TinyLog; + +INSERT INTO test_table_for_in VALUES (0), (1); + +-- { echoOn } + +SELECT id, value FROM test_table WHERE 1 IN (SELECT 1); + +SELECT '--'; + +SELECT id, value FROM test_table WHERE 0 IN (SELECT 1); + +SELECT '--'; + +SELECT id, value FROM test_table WHERE id IN (SELECT 1); + +SELECT '--'; + +SELECT id, value FROM test_table WHERE id IN (SELECT 2); + +SELECT '--'; + +SELECT id, value FROM test_table WHERE id IN test_table_for_in; + +SELECT '--'; + +SELECT id, value FROM test_table WHERE id IN (SELECT id FROM test_table_for_in); + +SELECT '--'; + +SELECT id, value FROM test_table WHERE id IN (SELECT id FROM test_table_for_in UNION DISTINCT SELECT id FROM test_table_for_in); + +SELECT '--'; + +WITH cte_test_table_for_in AS (SELECT id FROM test_table_for_in) SELECT id, value FROM test_table WHERE id IN cte_test_table_for_in; + +SELECT '--'; + +WITH cte_test_table_for_in AS (SELECT id FROM test_table_for_in) SELECT id, value +FROM test_table WHERE id IN (SELECT id FROM cte_test_table_for_in UNION DISTINCT SELECT id FROM cte_test_table_for_in); + +-- { echoOff } + +DROP TABLE test_table; +DROP TABLE test_table_for_in; diff --git a/tests/queries/0_stateless/02377_analyzer_in_function_set.reference b/tests/queries/0_stateless/02377_analyzer_in_function_set.reference new file mode 100644 index 00000000000..b32da0d591a --- /dev/null +++ b/tests/queries/0_stateless/02377_analyzer_in_function_set.reference @@ -0,0 +1,2 @@ +0 Value_0 +1 Value_1 diff --git a/tests/queries/0_stateless/02377_analyzer_in_function_set.sql b/tests/queries/0_stateless/02377_analyzer_in_function_set.sql new file mode 100644 index 00000000000..e3cbcf75a9c --- /dev/null +++ b/tests/queries/0_stateless/02377_analyzer_in_function_set.sql @@ -0,0 +1,23 @@ +SET allow_experimental_analyzer = 1; + +DROP TABLE IF EXISTS test_table; +CREATE TABLE test_table +( + id UInt64, + value String +) ENGINE=TinyLog; + +INSERT INTO test_table VALUES (0, 'Value_0'), (1, 'Value_1'), (2, 'Value_2'); + +DROP TABLE IF EXISTS special_set_table; +CREATE TABLE special_set_table +( + id UInt64 +) ENGINE=Set; + +INSERT INTO special_set_table VALUES (0), (1); + +SELECT id, value FROM test_table WHERE id IN special_set_table; + +DROP TABLE special_set_table; +DROP TABLE test_table; diff --git a/tests/queries/0_stateless/02378_analyzer_projection_names.reference b/tests/queries/0_stateless/02378_analyzer_projection_names.reference new file mode 100644 index 00000000000..1fa79677876 --- /dev/null +++ b/tests/queries/0_stateless/02378_analyzer_projection_names.reference @@ -0,0 +1,739 @@ +-- { echoOn } + +SELECT 'Constants'; +Constants +DESCRIBE (SELECT 1, 'Value'); +1 UInt8 +\'Value\' String +SELECT '--'; +-- +DESCRIBE (SELECT 1 + 1, concat('Value_1', 'Value_2')); +plus(1, 1) UInt16 +concat(\'Value_1\', \'Value_2\') String +SELECT '--'; +-- +DESCRIBE (SELECT cast(tuple(1, 'Value'), 'Tuple (id UInt64, value String)')); +CAST(tuple(1, \'Value\'), \'Tuple (id UInt64, value String)\') Tuple(id UInt64, value String) +SELECT 'Columns'; +Columns +DESCRIBE (SELECT test_table.id, test_table.id, id FROM test_table); +id UInt64 +id UInt64 +id UInt64 +SELECT '--'; +-- +DESCRIBE (SELECT * FROM test_table); +id UInt64 +value String +SELECT '--'; +-- +DESCRIBE (SELECT * APPLY toString FROM test_table); +toString(id) String +toString(value) String +SELECT '--'; +-- +DESCRIBE (SELECT * APPLY x -> toString(x) FROM test_table); +toString(id) String +toString(value) String +SELECT '--'; +-- +DESCRIBE (SELECT tuple_value.* FROM test_table_compound); +tuple_value.value_1 UInt64 +tuple_value.value_2 String +SELECT '--'; +-- +DESCRIBE (SELECT tuple_value.* APPLY x -> x FROM test_table_compound); +tuple_value.value_1 UInt64 +tuple_value.value_2 String +SELECT '--'; +-- +DESCRIBE (SELECT tuple_value.* APPLY toString FROM test_table_compound); +toString(tuple_value.value_1) String +toString(tuple_value.value_2) String +SELECT '--'; +-- +DESCRIBE (SELECT tuple_value.* APPLY x -> toString(x) FROM test_table_compound); +toString(tuple_value.value_1) String +toString(tuple_value.value_2) String +SELECT 'Constants with aliases'; +Constants with aliases +DESCRIBE (SELECT 1 AS a, a AS b, b, b AS c, c, 'Value' AS d, d AS e, e AS f); +a UInt8 +b UInt8 +b UInt8 +c UInt8 +c UInt8 +d String +e String +f String +SELECT '--'; +-- +DESCRIBE (SELECT plus(1 AS a, a AS b), plus(b, b), plus(b, b) AS c, concat('Value' AS d, d) AS e, e); +plus(a, b) UInt16 +plus(b, b) UInt16 +c UInt16 +e String +e String +SELECT '--'; +-- +DESCRIBE (SELECT cast(tuple(1, 'Value'), 'Tuple (id UInt64, value String)') AS a, a.id, a.value); +a Tuple(id UInt64, value String) +a.id UInt64 +a.value String +SELECT '--'; +-- +DESCRIBE (SELECT cast(tuple(1, 'Value'), 'Tuple (id UInt64, value String)') AS a, a.*); +a Tuple(id UInt64, value String) +a.id UInt64 +a.value String +SELECT '--'; +-- +DESCRIBE (SELECT cast(tuple(1, 'Value'), 'Tuple (id UInt64, value String)') AS a, a.* EXCEPT id); +a Tuple(id UInt64, value String) +a.value String +SELECT '--'; +-- +DESCRIBE (SELECT cast(tuple(1, 'Value'), 'Tuple (id UInt64, value String)') AS a, a.* EXCEPT value); +a Tuple(id UInt64, value String) +a.id UInt64 +SELECT '--'; +-- +DESCRIBE (SELECT cast(tuple(1, 'Value'), 'Tuple (id UInt64, value String)') AS a, a.* EXCEPT value APPLY toString); +a Tuple(id UInt64, value String) +toString(a.id) String +SELECT '--'; +-- +DESCRIBE (SELECT cast(tuple(1, 'Value'), 'Tuple (id UInt64, value String)') AS a, a.* EXCEPT value APPLY x -> toString(x)); +a Tuple(id UInt64, value String) +toString(a.id) String +SELECT '--'; +-- +DESCRIBE (SELECT cast(tuple(1, 'Value'), 'Tuple (id UInt64, value String)') AS a, untuple(a)); +a Tuple(id UInt64, value String) +tupleElement(a, \'id\') UInt64 +tupleElement(a, \'value\') String +SELECT '--'; +-- +DESCRIBE (SELECT cast(tuple(1, 'Value'), 'Tuple (id UInt64, value String)') AS a, untuple(a) AS b); +a Tuple(id UInt64, value String) +b.id UInt64 +b.value String +SELECT 'Columns with aliases'; +Columns with aliases +DESCRIBE (SELECT test_table.id AS a, a, test_table.id AS b, b AS c, c FROM test_table); +a UInt64 +a UInt64 +b UInt64 +c UInt64 +c UInt64 +SELECT '--'; +-- +DESCRIBE (SELECT plus(test_table.id AS a, test_table.id), plus(id, id AS b), plus(b, b), plus(test_table.id, test_table.id) FROM test_table); +plus(a, id) UInt64 +plus(id, b) UInt64 +plus(b, b) UInt64 +plus(id, id) UInt64 +SELECT '--'; +-- +DESCRIBE (SELECT test_table.* REPLACE id + (id AS id_alias) AS id, id_alias FROM test_table); +plus(id, id_alias) UInt64 +value String +id_alias UInt64 +SELECT 'Matcher'; +Matcher +DESCRIBE (SELECT * FROM test_table); +id UInt64 +value String +SELECT '--'; +-- +DESCRIBE (SELECT test_table.* FROM test_table); +id UInt64 +value String +SELECT '--'; +-- +DESCRIBE (SELECT 1 AS id, 2 AS value, * FROM test_table); +id UInt8 +value UInt8 +test_table.id UInt64 +test_table.value String +SELECT '--'; +-- +DESCRIBE (SELECT 1 AS id, 2 AS value, * FROM test_table AS t1); +id UInt8 +value UInt8 +t1.id UInt64 +t1.value String +SELECT 'Lambda'; +Lambda +DESCRIBE (SELECT arrayMap(x -> x + 1, [1,2,3])); +arrayMap(lambda(tuple(x), plus(x, 1)), [1, 2, 3]) Array(UInt16) +SELECT '--'; +-- +DESCRIBE (SELECT 1 AS a, arrayMap(x -> x + a, [1,2,3])); +a UInt8 +arrayMap(lambda(tuple(x), plus(x, a)), [1, 2, 3]) Array(UInt16) +SELECT '--'; +-- +DESCRIBE (SELECT arrayMap(x -> x + test_table.id + test_table.id + id, [1,2,3]) FROM test_table); +arrayMap(lambda(tuple(x), plus(plus(plus(x, id), id), id)), [1, 2, 3]) Array(UInt64) +SELECT '--'; +-- +DESCRIBE (SELECT arrayMap(x -> x + (test_table.id AS first) + (test_table.id AS second) + id, [1,2,3]) FROM test_table); +arrayMap(lambda(tuple(x), plus(plus(plus(x, first), second), id)), [1, 2, 3]) Array(UInt64) +SELECT '--'; +-- +DESCRIBE (SELECT arrayMap(x -> test_table.* EXCEPT value, [1,2,3]) FROM test_table); +arrayMap(lambda(tuple(x), id), [1, 2, 3]) Array(UInt64) +SELECT '--'; +-- +DESCRIBE (SELECT arrayMap(x -> test_table.* EXCEPT value APPLY x -> x, [1,2,3]) FROM test_table); +arrayMap(lambda(tuple(x), id), [1, 2, 3]) Array(UInt64) +SELECT '--'; +-- +DESCRIBE (SELECT arrayMap(x -> test_table.* EXCEPT value APPLY toString, [1,2,3]) FROM test_table); +arrayMap(lambda(tuple(x), toString(id)), [1, 2, 3]) Array(String) +SELECT '--'; +-- +DESCRIBE (SELECT arrayMap(x -> test_table.* EXCEPT value APPLY x -> toString(x), [1,2,3]) FROM test_table); +arrayMap(lambda(tuple(x), toString(id)), [1, 2, 3]) Array(String) +SELECT '--'; +-- +DESCRIBE (SELECT cast(tuple(1), 'Tuple (id UInt64)') AS compound_value, arrayMap(x -> compound_value.*, [1,2,3])); +compound_value Tuple(id UInt64) +arrayMap(lambda(tuple(x), compound_value.id), [1, 2, 3]) Array(UInt64) +SELECT '--'; +-- +DESCRIBE (SELECT cast(tuple(1), 'Tuple (id UInt64)') AS compound_value, arrayMap(x -> compound_value.* APPLY x -> x, [1,2,3])); +compound_value Tuple(id UInt64) +arrayMap(lambda(tuple(x), compound_value.id), [1, 2, 3]) Array(UInt64) +SELECT '--'; +-- +DESCRIBE (SELECT cast(tuple(1), 'Tuple (id UInt64)') AS compound_value, arrayMap(x -> compound_value.* APPLY toString, [1,2,3])); +compound_value Tuple(id UInt64) +arrayMap(lambda(tuple(x), toString(compound_value.id)), [1, 2, 3]) Array(String) +SELECT '--'; +-- +DESCRIBE (SELECT cast(tuple(1), 'Tuple (id UInt64)') AS compound_value, arrayMap(x -> compound_value.* APPLY x -> toString(x), [1,2,3])); +compound_value Tuple(id UInt64) +arrayMap(lambda(tuple(x), toString(compound_value.id)), [1, 2, 3]) Array(String) +SELECT '--'; +-- +DESCRIBE (SELECT cast(tuple(1, 'Value'), 'Tuple (id UInt64, value String)') AS compound_value, arrayMap(x -> compound_value.* EXCEPT value, [1,2,3])); +compound_value Tuple(id UInt64, value String) +arrayMap(lambda(tuple(x), compound_value.id), [1, 2, 3]) Array(UInt64) +SELECT '--'; +-- +DESCRIBE (SELECT cast(tuple(1, 'Value'), 'Tuple (id UInt64, value String)') AS compound_value, arrayMap(x -> compound_value.* EXCEPT value APPLY x -> x, [1,2,3])); +compound_value Tuple(id UInt64, value String) +arrayMap(lambda(tuple(x), compound_value.id), [1, 2, 3]) Array(UInt64) +SELECT '--'; +-- +DESCRIBE (SELECT cast(tuple(1, 'Value'), 'Tuple (id UInt64, value String)') AS compound_value, arrayMap(x -> compound_value.* EXCEPT value APPLY toString, [1,2,3])); +compound_value Tuple(id UInt64, value String) +arrayMap(lambda(tuple(x), toString(compound_value.id)), [1, 2, 3]) Array(String) +SELECT '--'; +-- +DESCRIBE (SELECT cast(tuple(1, 'Value'), 'Tuple (id UInt64, value String)') AS compound_value, arrayMap(x -> compound_value.* EXCEPT value APPLY x -> toString(x), [1,2,3])); +compound_value Tuple(id UInt64, value String) +arrayMap(lambda(tuple(x), toString(compound_value.id)), [1, 2, 3]) Array(String) +SELECT '--'; +-- +DESCRIBE (SELECT cast(tuple(1), 'Tuple (id UInt64)') AS a, arrayMap(x -> untuple(a), [1,2,3]) FROM test_table); +a Tuple(id UInt64) +arrayMap(lambda(tuple(x), tupleElement(a, \'id\')), [1, 2, 3]) Array(UInt64) +SELECT '--'; +-- +DESCRIBE (SELECT cast(tuple(1), 'Tuple (id UInt64)') AS a, arrayMap(x -> untuple(a) AS untupled_value, [1,2,3]) FROM test_table); +a Tuple(id UInt64) +arrayMap(untupled_value, [1, 2, 3]) Array(UInt64) +SELECT '--'; +-- +DESCRIBE (SELECT cast(tuple(1), 'Tuple (id UInt64)') AS a, untuple(a) AS untupled_value, arrayMap(x -> untupled_value, [1,2,3]) FROM test_table); +a Tuple(id UInt64) +untupled_value.id UInt64 +arrayMap(lambda(tuple(x), untupled_value.id), [1, 2, 3]) Array(UInt64) +SELECT '--'; +-- +DESCRIBE (SELECT cast(tuple(1), 'Tuple (id UInt64)') AS a, untuple(a) AS untupled_value, arrayMap(x -> untupled_value AS untupled_value_in_lambda, [1,2,3]) FROM test_table); +a Tuple(id UInt64) +untupled_value.id UInt64 +arrayMap(untupled_value_in_lambda, [1, 2, 3]) Array(UInt64) +SELECT 'Standalone lambda'; +Standalone lambda +DESCRIBE (WITH x -> x + 1 AS test_lambda SELECT test_lambda(1)); +test_lambda(1) UInt16 +SELECT '--'; +-- +DESCRIBE (WITH x -> * AS test_lambda SELECT test_lambda(1) AS value, value FROM test_table); +id UInt64 +value String +id UInt64 +value String +SELECT 'Subquery'; +Subquery +DESCRIBE (SELECT (SELECT 1), (SELECT 2), (SELECT 3) AS a, (SELECT 4)); +_subquery_1 Nullable(UInt8) +_subquery_2 Nullable(UInt8) +a Nullable(UInt8) +_subquery_4 Nullable(UInt8) +SELECT '--'; +-- +DESCRIBE (SELECT arrayMap(x -> (SELECT 1), [1,2,3]), arrayMap(x -> (SELECT 2) AS a, [1, 2, 3]), arrayMap(x -> (SELECT 1), [1,2,3])); +arrayMap(lambda(tuple(x), _subquery_1), [1, 2, 3]) Array(Nullable(UInt8)) +arrayMap(a, [1, 2, 3]) Array(Nullable(UInt8)) +arrayMap(lambda(tuple(x), _subquery_3), [1, 2, 3]) Array(Nullable(UInt8)) +SELECT '--'; +-- +DESCRIBE (SELECT (SELECT 1 AS a, 2 AS b) AS c, c.a, c.b); +c Tuple(a UInt8, b UInt8) +c.a UInt8 +c.b UInt8 +SELECT '--'; +-- +DESCRIBE (SELECT (SELECT 1 AS a, 2 AS b) AS c, c.*); +c Tuple(a UInt8, b UInt8) +c.a UInt8 +c.b UInt8 +SELECT '--'; +-- +DESCRIBE (SELECT (SELECT 1 UNION DISTINCT SELECT 1), (SELECT 2 UNION DISTINCT SELECT 2), (SELECT 3 UNION DISTINCT SELECT 3) AS a, (SELECT 4 UNION DISTINCT SELECT 4)); +_subquery_1 Nullable(UInt8) +_subquery_2 Nullable(UInt8) +a Nullable(UInt8) +_subquery_4 Nullable(UInt8) +SELECT '--'; +-- +DESCRIBE (SELECT arrayMap(x -> (SELECT 1 UNION DISTINCT SELECT 1), [1,2,3]), arrayMap(x -> (SELECT 2 UNION DISTINCT SELECT 2) AS a, [1, 2, 3]), +arrayMap(x -> (SELECT 3 UNION DISTINCT SELECT 3), [1,2,3])); +arrayMap(lambda(tuple(x), _subquery_1), [1, 2, 3]) Array(Nullable(UInt8)) +arrayMap(a, [1, 2, 3]) Array(Nullable(UInt8)) +arrayMap(lambda(tuple(x), _subquery_3), [1, 2, 3]) Array(Nullable(UInt8)) +SELECT '--'; +-- +DESCRIBE (SELECT (SELECT 1 AS a, 2 AS b UNION DISTINCT SELECT 1, 2) AS c, c.a, c.b); +c Tuple(a UInt8, b UInt8) +c.a UInt8 +c.b UInt8 +SELECT '--'; +-- +DESCRIBE (SELECT (SELECT 1 AS a, 2 AS b UNION DISTINCT SELECT 1, 2) AS c, c.*); +c Tuple(a UInt8, b UInt8) +c.a UInt8 +c.b UInt8 +SELECT '--'; +-- +DESCRIBE (SELECT (SELECT 1), (SELECT 2 UNION DISTINCT SELECT 2), (SELECT 3) AS a, (SELECT 4 UNION DISTINCT SELECT 4)); +_subquery_1 Nullable(UInt8) +_subquery_2 Nullable(UInt8) +a Nullable(UInt8) +_subquery_4 Nullable(UInt8) +SELECT '--'; +-- +DESCRIBE (SELECT arrayMap(x -> (SELECT 1 UNION DISTINCT SELECT 1), [1,2,3]), arrayMap(x -> (SELECT 2) AS a, [1, 2, 3]), +arrayMap(x -> (SELECT 3 UNION DISTINCT SELECT 3), [1,2,3])); +arrayMap(lambda(tuple(x), _subquery_1), [1, 2, 3]) Array(Nullable(UInt8)) +arrayMap(a, [1, 2, 3]) Array(Nullable(UInt8)) +arrayMap(lambda(tuple(x), _subquery_3), [1, 2, 3]) Array(Nullable(UInt8)) +SELECT 'Window functions'; +Window functions +DESCRIBE (SELECT count() OVER ()); +count() OVER () UInt64 +SELECT '--'; +-- +DESCRIBE (SELECT count() OVER () AS window_function); +window_function UInt64 +SELECT '--'; +-- +DESCRIBE (SELECT count() OVER (PARTITION BY id) FROM test_table); +count() OVER (PARTITION BY id) UInt64 +SELECT '--'; +-- +DESCRIBE (SELECT count() OVER (PARTITION BY id, value) FROM test_table); +count() OVER (PARTITION BY id, value) UInt64 +SELECT '--'; +-- +DESCRIBE (SELECT count() OVER (PARTITION BY id, value ORDER BY id) FROM test_table); +count() OVER (PARTITION BY id, value ORDER BY id ASC) UInt64 +SELECT '--'; +-- +DESCRIBE (SELECT count() OVER (PARTITION BY id, value ORDER BY id ASC, value DESC ROWS CURRENT ROW) FROM test_table); +count() OVER (PARTITION BY id, value ORDER BY id ASC, value DESC ROWS BETWEEN CURRENT ROW AND CURRENT ROW) UInt64 +SELECT '--'; +-- +DESCRIBE (SELECT count() OVER (PARTITION BY id, value ORDER BY id ASC, value DESC ROWS BETWEEN CURRENT ROW AND CURRENT ROW) FROM test_table); +count() OVER (PARTITION BY id, value ORDER BY id ASC, value DESC ROWS BETWEEN CURRENT ROW AND CURRENT ROW) UInt64 +SELECT '--'; +-- +DESCRIBE (SELECT count() OVER (PARTITION BY id, value ORDER BY id ASC, value DESC RANGE CURRENT ROW) FROM test_table); +count() OVER (PARTITION BY id, value ORDER BY id ASC, value DESC RANGE BETWEEN CURRENT ROW AND CURRENT ROW) UInt64 +SELECT '--'; +-- +DESCRIBE (SELECT count() OVER (PARTITION BY id, value ORDER BY id ASC, value DESC RANGE BETWEEN CURRENT ROW AND CURRENT ROW) FROM test_table); +count() OVER (PARTITION BY id, value ORDER BY id ASC, value DESC RANGE BETWEEN CURRENT ROW AND CURRENT ROW) UInt64 +SELECT '--'; +-- +DESCRIBE (SELECT count() OVER (PARTITION BY (id AS id_alias), (value AS value_alias) ORDER BY id ASC, value DESC ROWS CURRENT ROW) FROM test_table); +count() OVER (PARTITION BY id_alias, value_alias ORDER BY id ASC, value DESC ROWS BETWEEN CURRENT ROW AND CURRENT ROW) UInt64 +SELECT '--'; +-- +DESCRIBE (SELECT count() OVER (PARTITION BY id, value ORDER BY (id AS id_alias) ASC, (value AS value_alias) DESC ROWS CURRENT ROW) FROM test_table); +count() OVER (PARTITION BY id, value ORDER BY id_alias ASC, value_alias DESC ROWS BETWEEN CURRENT ROW AND CURRENT ROW) UInt64 +SELECT '--'; +-- +DESCRIBE (SELECT count() OVER (PARTITION BY id, value ORDER BY id ASC, value DESC ROWS BETWEEN 1 PRECEDING AND 2 FOLLOWING) FROM test_table); +count() OVER (PARTITION BY id, value ORDER BY id ASC, value DESC ROWS BETWEEN 1 PRECEDING AND 2 FOLLOWING) UInt64 +SELECT '--'; +-- +DESCRIBE (SELECT count() OVER (PARTITION BY id, value ORDER BY id ASC, value DESC ROWS BETWEEN 1 + 1 PRECEDING AND 2 + 2 FOLLOWING) FROM test_table); +count() OVER (PARTITION BY id, value ORDER BY id ASC, value DESC ROWS BETWEEN plus(1, 1) PRECEDING AND plus(2, 2) FOLLOWING) UInt64 +SELECT '--'; +-- +DESCRIBE (SELECT count() OVER (PARTITION BY id, value ORDER BY id ASC, value DESC ROWS BETWEEN ((1 + 1) AS frame_offset_begin) PRECEDING AND ((2 + 2) AS frame_offset_end) FOLLOWING) +FROM test_table); +count() OVER (PARTITION BY id, value ORDER BY id ASC, value DESC ROWS BETWEEN frame_offset_begin PRECEDING AND frame_offset_end FOLLOWING) UInt64 +SELECT '--'; +-- +DESCRIBE (SELECT count() OVER (ORDER BY toNullable(id) NULLS FIRST) FROM test_table); +count() OVER (ORDER BY toNullable(id) ASC NULLS FIRST) UInt64 +SELECT '--'; +-- +DESCRIBE (SELECT count() OVER (ORDER BY toNullable(id) NULLS LAST) FROM test_table); +count() OVER (ORDER BY toNullable(id) ASC NULLS LAST) UInt64 +SELECT '--'; +-- +DESCRIBE (SELECT count() OVER (ORDER BY id WITH FILL FROM 1 TO 5 STEP 1) FROM test_table); +count() OVER (ORDER BY id ASC WITH FILL FROM 1 TO 5 STEP 1) UInt64 +SELECT '--'; +-- +DESCRIBE (SELECT count() OVER (ORDER BY id WITH FILL FROM 1 + 1 TO 6 STEP 1 + 1) FROM test_table); +count() OVER (ORDER BY id ASC WITH FILL FROM plus(1, 1) TO 6 STEP plus(1, 1)) UInt64 +SELECT '--'; +-- +DESCRIBE (SELECT count() OVER (ORDER BY id WITH FILL FROM ((1 + 1) AS from) TO (6 AS to) STEP ((1 + 1) AS step)) FROM test_table); +count() OVER (ORDER BY id ASC WITH FILL FROM from TO to STEP step) UInt64 +SELECT 'Window functions WINDOW'; +Window functions WINDOW +DESCRIBE (SELECT count() OVER window_name FROM test_table WINDOW window_name AS (PARTITION BY id)); +count() OVER window_name UInt64 +SELECT '--'; +-- +DESCRIBE (SELECT count() OVER window_name FROM test_table WINDOW window_name AS (PARTITION BY id ORDER BY value)); +count() OVER window_name UInt64 +SELECT '--'; +-- +DESCRIBE (SELECT count() OVER (window_name ORDER BY id) FROM test_table WINDOW window_name AS (PARTITION BY id)); +count() OVER (window_name ORDER BY id ASC) UInt64 +SELECT 'IN function'; +IN function +DESCRIBE (SELECT id IN (SELECT 1) FROM test_table); +in(id, _subquery_1) UInt8 +SELECT '--'; +-- +DESCRIBE (SELECT id IN (SELECT id FROM test_table_in) FROM test_table); +in(id, _subquery_1) UInt8 +SELECT '--'; +-- +DESCRIBE (SELECT id IN test_table_in FROM test_table); +in(id, test_table_in) UInt8 +SELECT '--'; +-- +DESCRIBE (WITH test_table_in_cte AS (SELECT id FROM test_table) SELECT id IN (SELECT id FROM test_table_in_cte) FROM test_table); +in(id, _subquery_1) UInt8 +SELECT '--'; +-- +DESCRIBE (WITH test_table_in_cte AS (SELECT id FROM test_table) SELECT id IN test_table_in_cte FROM test_table); +in(id, test_table_in_cte) UInt8 +SELECT 'Joins'; +Joins +DESCRIBE (SELECT * FROM test_table_join_1, test_table_join_2); +test_table_join_1.id UInt64 +test_table_join_1.value String +value_join_1 String +test_table_join_2.id UInt64 +test_table_join_2.value String +value_join_2 String +SELECT '--'; +-- +DESCRIBE (SELECT * FROM test_table_join_1 AS t1, test_table_join_2 AS t2); +t1.id UInt64 +t1.value String +value_join_1 String +t2.id UInt64 +t2.value String +value_join_2 String +SELECT '--'; +-- +DESCRIBE (SELECT * APPLY toString FROM test_table_join_1 AS t1, test_table_join_2 AS t2); +toString(t1.id) String +toString(t1.value) String +toString(value_join_1) String +toString(t2.id) String +toString(t2.value) String +toString(value_join_2) String +SELECT '--'; +-- +DESCRIBE (SELECT * APPLY x -> toString(x) FROM test_table_join_1 AS t1, test_table_join_2 AS t2); +toString(t1.id) String +toString(t1.value) String +toString(value_join_1) String +toString(t2.id) String +toString(t2.value) String +toString(value_join_2) String +SELECT '--'; +-- +DESCRIBE (SELECT test_table_join_1.*, test_table_join_2.* FROM test_table_join_1 INNER JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id); +test_table_join_1.id UInt64 +test_table_join_1.value String +value_join_1 String +test_table_join_2.id UInt64 +test_table_join_2.value String +value_join_2 String +SELECT '--'; +-- +DESCRIBE (SELECT t1.*, t2.* FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id); +t1.id UInt64 +t1.value String +value_join_1 String +t2.id UInt64 +t2.value String +value_join_2 String +SELECT '--'; +-- +DESCRIBE (SELECT test_table_join_1.* APPLY toString, test_table_join_2.* APPLY toString FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id); +toString(t1.id) String +toString(t1.value) String +toString(value_join_1) String +toString(t2.id) String +toString(t2.value) String +toString(value_join_2) String +SELECT '--'; +-- +DESCRIBE (SELECT test_table_join_1.* APPLY x -> toString(x), test_table_join_2.* APPLY x -> toString(x) FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id); +toString(t1.id) String +toString(t1.value) String +toString(value_join_1) String +toString(t2.id) String +toString(t2.value) String +toString(value_join_2) String +SELECT '--'; +-- +DESCRIBE (SELECT test_table_join_1.id, test_table_join_1.value, test_table_join_1.value_join_1, test_table_join_2.id, test_table_join_2.value, test_table_join_2.value_join_2 +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id); +test_table_join_1.id UInt64 +test_table_join_1.value String +value_join_1 String +test_table_join_2.id UInt64 +test_table_join_2.value String +value_join_2 String +SELECT '--'; +-- +DESCRIBE (SELECT t1.id, t1.value, t1.value_join_1, t2.id, t2.value, t2.value_join_2 FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id); +t1.id UInt64 +t1.value String +value_join_1 String +t2.id UInt64 +t2.value String +value_join_2 String +SELECT 'Multiple JOINS'; +Multiple JOINS +DESCRIBE (SELECT * FROM test_table_join_1, test_table_join_2, test_table_join_3); +test_table_join_1.id UInt64 +test_table_join_1.value String +value_join_1 String +test_table_join_2.id UInt64 +test_table_join_2.value String +value_join_2 String +test_table_join_3.id UInt64 +test_table_join_3.value String +value_join_3 String +SELECT '--'; +-- +DESCRIBE (SELECT * FROM test_table_join_1 AS t1, test_table_join_2 AS t2, test_table_join_3 AS t3); +t1.id UInt64 +t1.value String +value_join_1 String +t2.id UInt64 +t2.value String +value_join_2 String +t3.id UInt64 +t3.value String +value_join_3 String +SELECT '--'; +-- +DESCRIBE (SELECT * APPLY toString FROM test_table_join_1 AS t1, test_table_join_2 AS t2, test_table_join_3 AS t3); +toString(t1.id) String +toString(t1.value) String +toString(value_join_1) String +toString(t2.id) String +toString(t2.value) String +toString(value_join_2) String +toString(t3.id) String +toString(t3.value) String +toString(value_join_3) String +SELECT '--'; +-- +DESCRIBE (SELECT * APPLY x -> toString(x) FROM test_table_join_1 AS t1, test_table_join_2 AS t2, test_table_join_3 AS t3); +toString(t1.id) String +toString(t1.value) String +toString(value_join_1) String +toString(t2.id) String +toString(t2.value) String +toString(value_join_2) String +toString(t3.id) String +toString(t3.value) String +toString(value_join_3) String +SELECT '--'; +-- +DESCRIBE (SELECT test_table_join_1.*, test_table_join_2.*, test_table_join_3.* +FROM test_table_join_1 INNER JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id +INNER JOIN test_table_join_3 ON test_table_join_2.id = test_table_join_3.id); +test_table_join_1.id UInt64 +test_table_join_1.value String +value_join_1 String +test_table_join_2.id UInt64 +test_table_join_2.value String +value_join_2 String +test_table_join_3.id UInt64 +test_table_join_3.value String +value_join_3 String +SELECT '--'; +-- +DESCRIBE (SELECT t1.*, t2.*, t3.* +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id INNER JOIN test_table_join_3 AS t3 ON t2.id = t3.id); +t1.id UInt64 +t1.value String +value_join_1 String +t2.id UInt64 +t2.value String +value_join_2 String +t3.id UInt64 +t3.value String +value_join_3 String +SELECT '--'; +-- +DESCRIBE (SELECT test_table_join_1.* APPLY toString, test_table_join_2.* APPLY toString, test_table_join_3.* APPLY toString +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id INNER JOIN test_table_join_3 AS t3 ON t2.id = t3.id); +toString(t1.id) String +toString(t1.value) String +toString(value_join_1) String +toString(t2.id) String +toString(t2.value) String +toString(value_join_2) String +toString(t3.id) String +toString(t3.value) String +toString(value_join_3) String +SELECT '--'; +-- +DESCRIBE (SELECT test_table_join_1.* APPLY x -> toString(x), test_table_join_2.* APPLY x -> toString(x), test_table_join_3.* APPLY x -> toString(x) +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id INNER JOIN test_table_join_3 AS t3 ON t2.id = t3.id); +toString(t1.id) String +toString(t1.value) String +toString(value_join_1) String +toString(t2.id) String +toString(t2.value) String +toString(value_join_2) String +toString(t3.id) String +toString(t3.value) String +toString(value_join_3) String +SELECT '--'; +-- +DESCRIBE (SELECT test_table_join_1.id, test_table_join_1.value, test_table_join_1.value_join_1, test_table_join_2.id, test_table_join_2.value, test_table_join_2.value_join_2, +test_table_join_3.id, test_table_join_3.value, test_table_join_3.value_join_3 +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id INNER JOIN test_table_join_3 AS t3 ON t2.id = t3.id); +test_table_join_1.id UInt64 +test_table_join_1.value String +value_join_1 String +test_table_join_2.id UInt64 +test_table_join_2.value String +value_join_2 String +test_table_join_3.id UInt64 +test_table_join_3.value String +value_join_3 String +SELECT '--'; +-- +DESCRIBE (SELECT t1.id, t1.value, t1.value_join_1, t2.id, t2.value, t2.value_join_2, t3.id, t3.value, t3.value_join_3 +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id INNER JOIN test_table_join_3 AS t3 ON t2.id = t3.id); +t1.id UInt64 +t1.value String +value_join_1 String +t2.id UInt64 +t2.value String +value_join_2 String +t3.id UInt64 +t3.value String +value_join_3 String +SELECT 'Joins USING'; +Joins USING +DESCRIBE (SELECT * FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 USING (id)); +id UInt64 +t1.value String +value_join_1 String +t2.value String +value_join_2 String +SELECT '--'; +-- +DESCRIBE (SELECT * FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 USING (id, value)); +id UInt64 +value String +value_join_1 String +value_join_2 String +SELECT '--'; +-- +DESCRIBE (SELECT id, t1.id, t1.value, t2.id, t2.value FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 USING (id)); +id UInt64 +t1.id UInt64 +t1.value String +t2.id UInt64 +t2.value String +SELECT '--'; +-- +DESCRIBE (SELECT id, value, t1.id, t1.value, t2.id, t2.value FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 USING (id, value)); +id UInt64 +value String +t1.id UInt64 +t1.value String +t2.id UInt64 +t2.value String +SELECT 'Multiple Joins USING'; +Multiple Joins USING +SELECT '--'; +-- +DESCRIBE (SELECT * FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 USING (id) INNER JOIN test_table_join_3 AS t3 USING (id)); +id UInt64 +t1.value String +value_join_1 String +t2.value String +value_join_2 String +t3.value String +value_join_3 String +SELECT '--'; +-- +DESCRIBE (SELECT * FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 USING (id, value) INNER JOIN test_table_join_3 AS t3 USING (id, value)); +id UInt64 +value String +value_join_1 String +value_join_2 String +value_join_3 String +SELECT '--'; +-- +DESCRIBE (SELECT id, t1.id, t1.value, t2.id, t2.value, t3.id, t3.value +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 USING (id) INNER JOIN test_table_join_3 AS t3 USING (id)); +id UInt64 +t1.id UInt64 +t1.value String +t2.id UInt64 +t2.value String +t3.id UInt64 +t3.value String +SELECT '--'; +-- +DESCRIBE (SELECT id, value, t1.id, t1.value, t2.id, t2.value, t3.id, t3.value +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 USING (id, value) INNER JOIN test_table_join_3 AS t3 USING (id, value)); +id UInt64 +value String +t1.id UInt64 +t1.value String +t2.id UInt64 +t2.value String +t3.id UInt64 +t3.value String diff --git a/tests/queries/0_stateless/02378_analyzer_projection_names.sql b/tests/queries/0_stateless/02378_analyzer_projection_names.sql new file mode 100644 index 00000000000..907cc79dcec --- /dev/null +++ b/tests/queries/0_stateless/02378_analyzer_projection_names.sql @@ -0,0 +1,541 @@ +SET allow_experimental_analyzer = 1; + +DROP TABLE IF EXISTS test_table; +CREATE TABLE test_table +( + id UInt64, + value String +) ENGINE=TinyLog; + +INSERT INTO test_table VALUES (0, 'Value'); + +DROP TABLE IF EXISTS test_table_in; +CREATE TABLE test_table_in +( + id UInt64 +) ENGINE=TinyLog; + +DROP TABLE IF EXISTS test_table_compound; +CREATE TABLE test_table_compound +( + id UInt64, + tuple_value Tuple(value_1 UInt64, value_2 String) +) ENGINE=TinyLog; + +INSERT INTO test_table_compound VALUES (0, tuple(0, 'Value')); + +DROP TABLE IF EXISTS test_table_join_1; +CREATE TABLE test_table_join_1 +( + id UInt64, + value String, + value_join_1 String +) ENGINE=TinyLog; + +INSERT INTO test_table_join_1 VALUES (0, 'Join_1_Value', 'Join_1_Value'); + +DROP TABLE IF EXISTS test_table_join_2; +CREATE TABLE test_table_join_2 +( + id UInt64, + value String, + value_join_2 String +) ENGINE=TinyLog; + +INSERT INTO test_table_join_2 VALUES (0, 'Join_2_Value', 'Join_2_Value'); + +DROP TABLE IF EXISTS test_table_join_3; +CREATE TABLE test_table_join_3 +( + id UInt64, + value String, + value_join_3 String +) ENGINE=TinyLog; + +INSERT INTO test_table_join_3 VALUES (0, 'Join_3_Value', 'Join_3_Value'); + +-- { echoOn } + +SELECT 'Constants'; + +DESCRIBE (SELECT 1, 'Value'); + +SELECT '--'; + +DESCRIBE (SELECT 1 + 1, concat('Value_1', 'Value_2')); + +SELECT '--'; + +DESCRIBE (SELECT cast(tuple(1, 'Value'), 'Tuple (id UInt64, value String)')); + +SELECT 'Columns'; + +DESCRIBE (SELECT test_table.id, test_table.id, id FROM test_table); + +SELECT '--'; + +DESCRIBE (SELECT * FROM test_table); + +SELECT '--'; + +DESCRIBE (SELECT * APPLY toString FROM test_table); + +SELECT '--'; + +DESCRIBE (SELECT * APPLY x -> toString(x) FROM test_table); + +SELECT '--'; + +DESCRIBE (SELECT tuple_value.* FROM test_table_compound); + +SELECT '--'; + +DESCRIBE (SELECT tuple_value.* APPLY x -> x FROM test_table_compound); + +SELECT '--'; + +DESCRIBE (SELECT tuple_value.* APPLY toString FROM test_table_compound); + +SELECT '--'; + +DESCRIBE (SELECT tuple_value.* APPLY x -> toString(x) FROM test_table_compound); + +SELECT 'Constants with aliases'; + +DESCRIBE (SELECT 1 AS a, a AS b, b, b AS c, c, 'Value' AS d, d AS e, e AS f); + +SELECT '--'; + +DESCRIBE (SELECT plus(1 AS a, a AS b), plus(b, b), plus(b, b) AS c, concat('Value' AS d, d) AS e, e); + +SELECT '--'; + +DESCRIBE (SELECT cast(tuple(1, 'Value'), 'Tuple (id UInt64, value String)') AS a, a.id, a.value); + +SELECT '--'; + +DESCRIBE (SELECT cast(tuple(1, 'Value'), 'Tuple (id UInt64, value String)') AS a, a.*); + +SELECT '--'; + +DESCRIBE (SELECT cast(tuple(1, 'Value'), 'Tuple (id UInt64, value String)') AS a, a.* EXCEPT id); + +SELECT '--'; + +DESCRIBE (SELECT cast(tuple(1, 'Value'), 'Tuple (id UInt64, value String)') AS a, a.* EXCEPT value); + +SELECT '--'; + +DESCRIBE (SELECT cast(tuple(1, 'Value'), 'Tuple (id UInt64, value String)') AS a, a.* EXCEPT value APPLY toString); + +SELECT '--'; + +DESCRIBE (SELECT cast(tuple(1, 'Value'), 'Tuple (id UInt64, value String)') AS a, a.* EXCEPT value APPLY x -> toString(x)); + +SELECT '--'; + +DESCRIBE (SELECT cast(tuple(1, 'Value'), 'Tuple (id UInt64, value String)') AS a, untuple(a)); + +SELECT '--'; + +DESCRIBE (SELECT cast(tuple(1, 'Value'), 'Tuple (id UInt64, value String)') AS a, untuple(a) AS b); + +SELECT 'Columns with aliases'; + +DESCRIBE (SELECT test_table.id AS a, a, test_table.id AS b, b AS c, c FROM test_table); + +SELECT '--'; + +DESCRIBE (SELECT plus(test_table.id AS a, test_table.id), plus(id, id AS b), plus(b, b), plus(test_table.id, test_table.id) FROM test_table); + +SELECT '--'; + +DESCRIBE (SELECT test_table.* REPLACE id + (id AS id_alias) AS id, id_alias FROM test_table); + +SELECT 'Matcher'; + +DESCRIBE (SELECT * FROM test_table); + +SELECT '--'; + +DESCRIBE (SELECT test_table.* FROM test_table); + +SELECT '--'; + +DESCRIBE (SELECT 1 AS id, 2 AS value, * FROM test_table); + +SELECT '--'; + +DESCRIBE (SELECT 1 AS id, 2 AS value, * FROM test_table AS t1); + +SELECT 'Lambda'; + +DESCRIBE (SELECT arrayMap(x -> x + 1, [1,2,3])); + +SELECT '--'; + +DESCRIBE (SELECT 1 AS a, arrayMap(x -> x + a, [1,2,3])); + +SELECT '--'; + +DESCRIBE (SELECT arrayMap(x -> x + test_table.id + test_table.id + id, [1,2,3]) FROM test_table); + +SELECT '--'; + +DESCRIBE (SELECT arrayMap(x -> x + (test_table.id AS first) + (test_table.id AS second) + id, [1,2,3]) FROM test_table); + +SELECT '--'; + +DESCRIBE (SELECT arrayMap(x -> test_table.* EXCEPT value, [1,2,3]) FROM test_table); + +SELECT '--'; + +DESCRIBE (SELECT arrayMap(x -> test_table.* EXCEPT value APPLY x -> x, [1,2,3]) FROM test_table); + +SELECT '--'; + +DESCRIBE (SELECT arrayMap(x -> test_table.* EXCEPT value APPLY toString, [1,2,3]) FROM test_table); + +SELECT '--'; + +DESCRIBE (SELECT arrayMap(x -> test_table.* EXCEPT value APPLY x -> toString(x), [1,2,3]) FROM test_table); + +SELECT '--'; + +DESCRIBE (SELECT cast(tuple(1), 'Tuple (id UInt64)') AS compound_value, arrayMap(x -> compound_value.*, [1,2,3])); + +SELECT '--'; + +DESCRIBE (SELECT cast(tuple(1), 'Tuple (id UInt64)') AS compound_value, arrayMap(x -> compound_value.* APPLY x -> x, [1,2,3])); + +SELECT '--'; + +DESCRIBE (SELECT cast(tuple(1), 'Tuple (id UInt64)') AS compound_value, arrayMap(x -> compound_value.* APPLY toString, [1,2,3])); + +SELECT '--'; + +DESCRIBE (SELECT cast(tuple(1), 'Tuple (id UInt64)') AS compound_value, arrayMap(x -> compound_value.* APPLY x -> toString(x), [1,2,3])); + +SELECT '--'; + +DESCRIBE (SELECT cast(tuple(1, 'Value'), 'Tuple (id UInt64, value String)') AS compound_value, arrayMap(x -> compound_value.* EXCEPT value, [1,2,3])); + +SELECT '--'; + +DESCRIBE (SELECT cast(tuple(1, 'Value'), 'Tuple (id UInt64, value String)') AS compound_value, arrayMap(x -> compound_value.* EXCEPT value APPLY x -> x, [1,2,3])); + +SELECT '--'; + +DESCRIBE (SELECT cast(tuple(1, 'Value'), 'Tuple (id UInt64, value String)') AS compound_value, arrayMap(x -> compound_value.* EXCEPT value APPLY toString, [1,2,3])); + +SELECT '--'; + +DESCRIBE (SELECT cast(tuple(1, 'Value'), 'Tuple (id UInt64, value String)') AS compound_value, arrayMap(x -> compound_value.* EXCEPT value APPLY x -> toString(x), [1,2,3])); + +SELECT '--'; + +DESCRIBE (SELECT cast(tuple(1), 'Tuple (id UInt64)') AS a, arrayMap(x -> untuple(a), [1,2,3]) FROM test_table); + +SELECT '--'; + +DESCRIBE (SELECT cast(tuple(1), 'Tuple (id UInt64)') AS a, arrayMap(x -> untuple(a) AS untupled_value, [1,2,3]) FROM test_table); + +SELECT '--'; + +DESCRIBE (SELECT cast(tuple(1), 'Tuple (id UInt64)') AS a, untuple(a) AS untupled_value, arrayMap(x -> untupled_value, [1,2,3]) FROM test_table); + +SELECT '--'; + +DESCRIBE (SELECT cast(tuple(1), 'Tuple (id UInt64)') AS a, untuple(a) AS untupled_value, arrayMap(x -> untupled_value AS untupled_value_in_lambda, [1,2,3]) FROM test_table); + +SELECT 'Standalone lambda'; + +DESCRIBE (WITH x -> x + 1 AS test_lambda SELECT test_lambda(1)); + +SELECT '--'; + +DESCRIBE (WITH x -> * AS test_lambda SELECT test_lambda(1) AS value, value FROM test_table); + +SELECT 'Subquery'; + +DESCRIBE (SELECT (SELECT 1), (SELECT 2), (SELECT 3) AS a, (SELECT 4)); + +SELECT '--'; + +DESCRIBE (SELECT arrayMap(x -> (SELECT 1), [1,2,3]), arrayMap(x -> (SELECT 2) AS a, [1, 2, 3]), arrayMap(x -> (SELECT 1), [1,2,3])); + +SELECT '--'; + +DESCRIBE (SELECT (SELECT 1 AS a, 2 AS b) AS c, c.a, c.b); + +SELECT '--'; + +DESCRIBE (SELECT (SELECT 1 AS a, 2 AS b) AS c, c.*); + +SELECT '--'; + +DESCRIBE (SELECT (SELECT 1 UNION DISTINCT SELECT 1), (SELECT 2 UNION DISTINCT SELECT 2), (SELECT 3 UNION DISTINCT SELECT 3) AS a, (SELECT 4 UNION DISTINCT SELECT 4)); + +SELECT '--'; + +DESCRIBE (SELECT arrayMap(x -> (SELECT 1 UNION DISTINCT SELECT 1), [1,2,3]), arrayMap(x -> (SELECT 2 UNION DISTINCT SELECT 2) AS a, [1, 2, 3]), +arrayMap(x -> (SELECT 3 UNION DISTINCT SELECT 3), [1,2,3])); + +SELECT '--'; + +DESCRIBE (SELECT (SELECT 1 AS a, 2 AS b UNION DISTINCT SELECT 1, 2) AS c, c.a, c.b); + +SELECT '--'; + +DESCRIBE (SELECT (SELECT 1 AS a, 2 AS b UNION DISTINCT SELECT 1, 2) AS c, c.*); + +SELECT '--'; + +DESCRIBE (SELECT (SELECT 1), (SELECT 2 UNION DISTINCT SELECT 2), (SELECT 3) AS a, (SELECT 4 UNION DISTINCT SELECT 4)); + +SELECT '--'; + +DESCRIBE (SELECT arrayMap(x -> (SELECT 1 UNION DISTINCT SELECT 1), [1,2,3]), arrayMap(x -> (SELECT 2) AS a, [1, 2, 3]), +arrayMap(x -> (SELECT 3 UNION DISTINCT SELECT 3), [1,2,3])); + +SELECT 'Window functions'; + +DESCRIBE (SELECT count() OVER ()); + +SELECT '--'; + +DESCRIBE (SELECT count() OVER () AS window_function); + +SELECT '--'; + +DESCRIBE (SELECT count() OVER (PARTITION BY id) FROM test_table); + +SELECT '--'; + +DESCRIBE (SELECT count() OVER (PARTITION BY id, value) FROM test_table); + +SELECT '--'; + +DESCRIBE (SELECT count() OVER (PARTITION BY id, value ORDER BY id) FROM test_table); + +SELECT '--'; + +DESCRIBE (SELECT count() OVER (PARTITION BY id, value ORDER BY id ASC, value DESC ROWS CURRENT ROW) FROM test_table); + +SELECT '--'; + +DESCRIBE (SELECT count() OVER (PARTITION BY id, value ORDER BY id ASC, value DESC ROWS BETWEEN CURRENT ROW AND CURRENT ROW) FROM test_table); + +SELECT '--'; + +DESCRIBE (SELECT count() OVER (PARTITION BY id, value ORDER BY id ASC, value DESC RANGE CURRENT ROW) FROM test_table); + +SELECT '--'; + +DESCRIBE (SELECT count() OVER (PARTITION BY id, value ORDER BY id ASC, value DESC RANGE BETWEEN CURRENT ROW AND CURRENT ROW) FROM test_table); + +SELECT '--'; + +DESCRIBE (SELECT count() OVER (PARTITION BY (id AS id_alias), (value AS value_alias) ORDER BY id ASC, value DESC ROWS CURRENT ROW) FROM test_table); + +SELECT '--'; + +DESCRIBE (SELECT count() OVER (PARTITION BY id, value ORDER BY (id AS id_alias) ASC, (value AS value_alias) DESC ROWS CURRENT ROW) FROM test_table); + +SELECT '--'; + +DESCRIBE (SELECT count() OVER (PARTITION BY id, value ORDER BY id ASC, value DESC ROWS BETWEEN 1 PRECEDING AND 2 FOLLOWING) FROM test_table); + +SELECT '--'; + +DESCRIBE (SELECT count() OVER (PARTITION BY id, value ORDER BY id ASC, value DESC ROWS BETWEEN 1 + 1 PRECEDING AND 2 + 2 FOLLOWING) FROM test_table); + +SELECT '--'; + +DESCRIBE (SELECT count() OVER (PARTITION BY id, value ORDER BY id ASC, value DESC ROWS BETWEEN ((1 + 1) AS frame_offset_begin) PRECEDING AND ((2 + 2) AS frame_offset_end) FOLLOWING) +FROM test_table); + +SELECT '--'; + +DESCRIBE (SELECT count() OVER (ORDER BY toNullable(id) NULLS FIRST) FROM test_table); + +SELECT '--'; + +DESCRIBE (SELECT count() OVER (ORDER BY toNullable(id) NULLS LAST) FROM test_table); + +SELECT '--'; + +DESCRIBE (SELECT count() OVER (ORDER BY id WITH FILL FROM 1 TO 5 STEP 1) FROM test_table); + +SELECT '--'; + +DESCRIBE (SELECT count() OVER (ORDER BY id WITH FILL FROM 1 + 1 TO 6 STEP 1 + 1) FROM test_table); + +SELECT '--'; + +DESCRIBE (SELECT count() OVER (ORDER BY id WITH FILL FROM ((1 + 1) AS from) TO (6 AS to) STEP ((1 + 1) AS step)) FROM test_table); + +SELECT 'Window functions WINDOW'; + +DESCRIBE (SELECT count() OVER window_name FROM test_table WINDOW window_name AS (PARTITION BY id)); + +SELECT '--'; + +DESCRIBE (SELECT count() OVER window_name FROM test_table WINDOW window_name AS (PARTITION BY id ORDER BY value)); + +SELECT '--'; + +DESCRIBE (SELECT count() OVER (window_name ORDER BY id) FROM test_table WINDOW window_name AS (PARTITION BY id)); + +SELECT 'IN function'; + +DESCRIBE (SELECT id IN (SELECT 1) FROM test_table); + +SELECT '--'; + +DESCRIBE (SELECT id IN (SELECT id FROM test_table_in) FROM test_table); + +SELECT '--'; + +DESCRIBE (SELECT id IN test_table_in FROM test_table); + +SELECT '--'; + +DESCRIBE (WITH test_table_in_cte AS (SELECT id FROM test_table) SELECT id IN (SELECT id FROM test_table_in_cte) FROM test_table); + +SELECT '--'; + +DESCRIBE (WITH test_table_in_cte AS (SELECT id FROM test_table) SELECT id IN test_table_in_cte FROM test_table); + +SELECT 'Joins'; + +DESCRIBE (SELECT * FROM test_table_join_1, test_table_join_2); + +SELECT '--'; + +DESCRIBE (SELECT * FROM test_table_join_1 AS t1, test_table_join_2 AS t2); + +SELECT '--'; + +DESCRIBE (SELECT * APPLY toString FROM test_table_join_1 AS t1, test_table_join_2 AS t2); + +SELECT '--'; + +DESCRIBE (SELECT * APPLY x -> toString(x) FROM test_table_join_1 AS t1, test_table_join_2 AS t2); + +SELECT '--'; + +DESCRIBE (SELECT test_table_join_1.*, test_table_join_2.* FROM test_table_join_1 INNER JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id); + +SELECT '--'; + +DESCRIBE (SELECT t1.*, t2.* FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id); + +SELECT '--'; + +DESCRIBE (SELECT test_table_join_1.* APPLY toString, test_table_join_2.* APPLY toString FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id); + +SELECT '--'; + +DESCRIBE (SELECT test_table_join_1.* APPLY x -> toString(x), test_table_join_2.* APPLY x -> toString(x) FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id); + +SELECT '--'; + +DESCRIBE (SELECT test_table_join_1.id, test_table_join_1.value, test_table_join_1.value_join_1, test_table_join_2.id, test_table_join_2.value, test_table_join_2.value_join_2 +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id); + +SELECT '--'; + +DESCRIBE (SELECT t1.id, t1.value, t1.value_join_1, t2.id, t2.value, t2.value_join_2 FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id); + +SELECT 'Multiple JOINS'; + +DESCRIBE (SELECT * FROM test_table_join_1, test_table_join_2, test_table_join_3); + +SELECT '--'; + +DESCRIBE (SELECT * FROM test_table_join_1 AS t1, test_table_join_2 AS t2, test_table_join_3 AS t3); + +SELECT '--'; + +DESCRIBE (SELECT * APPLY toString FROM test_table_join_1 AS t1, test_table_join_2 AS t2, test_table_join_3 AS t3); + +SELECT '--'; + +DESCRIBE (SELECT * APPLY x -> toString(x) FROM test_table_join_1 AS t1, test_table_join_2 AS t2, test_table_join_3 AS t3); + +SELECT '--'; + +DESCRIBE (SELECT test_table_join_1.*, test_table_join_2.*, test_table_join_3.* +FROM test_table_join_1 INNER JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id +INNER JOIN test_table_join_3 ON test_table_join_2.id = test_table_join_3.id); + +SELECT '--'; + +DESCRIBE (SELECT t1.*, t2.*, t3.* +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id INNER JOIN test_table_join_3 AS t3 ON t2.id = t3.id); + +SELECT '--'; + +DESCRIBE (SELECT test_table_join_1.* APPLY toString, test_table_join_2.* APPLY toString, test_table_join_3.* APPLY toString +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id INNER JOIN test_table_join_3 AS t3 ON t2.id = t3.id); + +SELECT '--'; + +DESCRIBE (SELECT test_table_join_1.* APPLY x -> toString(x), test_table_join_2.* APPLY x -> toString(x), test_table_join_3.* APPLY x -> toString(x) +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id INNER JOIN test_table_join_3 AS t3 ON t2.id = t3.id); + +SELECT '--'; + +DESCRIBE (SELECT test_table_join_1.id, test_table_join_1.value, test_table_join_1.value_join_1, test_table_join_2.id, test_table_join_2.value, test_table_join_2.value_join_2, +test_table_join_3.id, test_table_join_3.value, test_table_join_3.value_join_3 +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id INNER JOIN test_table_join_3 AS t3 ON t2.id = t3.id); + +SELECT '--'; + +DESCRIBE (SELECT t1.id, t1.value, t1.value_join_1, t2.id, t2.value, t2.value_join_2, t3.id, t3.value, t3.value_join_3 +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id INNER JOIN test_table_join_3 AS t3 ON t2.id = t3.id); + +SELECT 'Joins USING'; + +DESCRIBE (SELECT * FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 USING (id)); + +SELECT '--'; + +DESCRIBE (SELECT * FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 USING (id, value)); + +SELECT '--'; + +DESCRIBE (SELECT id, t1.id, t1.value, t2.id, t2.value FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 USING (id)); + +SELECT '--'; + +DESCRIBE (SELECT id, value, t1.id, t1.value, t2.id, t2.value FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 USING (id, value)); + +SELECT 'Multiple Joins USING'; + +SELECT '--'; + +DESCRIBE (SELECT * FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 USING (id) INNER JOIN test_table_join_3 AS t3 USING (id)); + +SELECT '--'; + +DESCRIBE (SELECT * FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 USING (id, value) INNER JOIN test_table_join_3 AS t3 USING (id, value)); + +SELECT '--'; + +DESCRIBE (SELECT id, t1.id, t1.value, t2.id, t2.value, t3.id, t3.value +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 USING (id) INNER JOIN test_table_join_3 AS t3 USING (id)); + +SELECT '--'; + +DESCRIBE (SELECT id, value, t1.id, t1.value, t2.id, t2.value, t3.id, t3.value +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 USING (id, value) INNER JOIN test_table_join_3 AS t3 USING (id, value)); + +-- { echoOff } + +DROP TABLE test_table_join_1; +DROP TABLE test_table_join_2; +DROP TABLE test_table_join_3; +DROP TABLE test_table; +DROP TABLE test_table_compound; diff --git a/tests/queries/0_stateless/02379_analyzer_subquery_depth.reference b/tests/queries/0_stateless/02379_analyzer_subquery_depth.reference new file mode 100644 index 00000000000..d00491fd7e5 --- /dev/null +++ b/tests/queries/0_stateless/02379_analyzer_subquery_depth.reference @@ -0,0 +1 @@ +1 diff --git a/tests/queries/0_stateless/02379_analyzer_subquery_depth.sql b/tests/queries/0_stateless/02379_analyzer_subquery_depth.sql new file mode 100644 index 00000000000..c2109f543eb --- /dev/null +++ b/tests/queries/0_stateless/02379_analyzer_subquery_depth.sql @@ -0,0 +1,4 @@ +SET allow_experimental_analyzer = 1; + +SELECT (SELECT a FROM (SELECT 1 AS a)) SETTINGS max_subquery_depth = 1; -- { serverError 162 } +SELECT (SELECT a FROM (SELECT 1 AS a)) SETTINGS max_subquery_depth = 2; diff --git a/tests/queries/0_stateless/02380_analyzer_join_sample.reference b/tests/queries/0_stateless/02380_analyzer_join_sample.reference new file mode 100644 index 00000000000..14d5f58d76a --- /dev/null +++ b/tests/queries/0_stateless/02380_analyzer_join_sample.reference @@ -0,0 +1,2 @@ +0 0 2 2 +1 1 2 2 diff --git a/tests/queries/0_stateless/02380_analyzer_join_sample.sql b/tests/queries/0_stateless/02380_analyzer_join_sample.sql new file mode 100644 index 00000000000..e417f47d173 --- /dev/null +++ b/tests/queries/0_stateless/02380_analyzer_join_sample.sql @@ -0,0 +1,29 @@ +SET allow_experimental_analyzer = 1; + +DROP TABLE IF EXISTS test_table_join_1; +CREATE TABLE test_table_join_1 +( + id UInt64, + value String +) ENGINE=MergeTree +ORDER BY id +SAMPLE BY id; + +INSERT INTO test_table_join_1 VALUES (0, 'Value'), (1, 'Value_1'); + +DROP TABLE IF EXISTS test_table_join_2; +CREATE TABLE test_table_join_2 +( + id UInt64, + value String +) ENGINE=MergeTree +ORDER BY id +SAMPLE BY id; + +INSERT INTO test_table_join_2 VALUES (0, 'Value'), (1, 'Value_1'); + +SELECT t1.id AS t1_id, t2.id AS t2_id, t1._sample_factor AS t1_sample_factor, t2._sample_factor AS t2_sample_factor +FROM test_table_join_1 AS t1 SAMPLE 1/2 INNER JOIN test_table_join_2 AS t2 SAMPLE 1/2 ON t1.id = t2.id; + +DROP TABLE test_table_join_1; +DROP TABLE test_table_join_2; diff --git a/tests/queries/0_stateless/02381_analyzer_join_final.reference b/tests/queries/0_stateless/02381_analyzer_join_final.reference new file mode 100644 index 00000000000..e00d444d142 --- /dev/null +++ b/tests/queries/0_stateless/02381_analyzer_join_final.reference @@ -0,0 +1,2 @@ +0 0 3 1 +1 1 1 3 diff --git a/tests/queries/0_stateless/02381_analyzer_join_final.sql b/tests/queries/0_stateless/02381_analyzer_join_final.sql new file mode 100644 index 00000000000..57fc3aedd8f --- /dev/null +++ b/tests/queries/0_stateless/02381_analyzer_join_final.sql @@ -0,0 +1,34 @@ +SET allow_experimental_analyzer = 1; + +DROP TABLE IF EXISTS test_table_join_1; +CREATE TABLE test_table_join_1 +( + id UInt64, + value UInt64 +) ENGINE=SummingMergeTree(value) +ORDER BY id +SAMPLE BY id; + +SYSTEM STOP MERGES test_table_join_1; +INSERT INTO test_table_join_1 VALUES (0, 1), (1, 1); +INSERT INTO test_table_join_1 VALUES (0, 2); + +DROP TABLE IF EXISTS test_table_join_2; +CREATE TABLE test_table_join_2 +( + id UInt64, + value UInt64 +) ENGINE=SummingMergeTree(value) +ORDER BY id +SAMPLE BY id; + +SYSTEM STOP MERGES test_table_join_2; +INSERT INTO test_table_join_2 VALUES (0, 1), (1, 1); +INSERT INTO test_table_join_2 VALUES (1, 2); + +SELECT t1.id AS t1_id, t2.id AS t2_id, t1.value AS t1_value, t2.value AS t2_value +FROM test_table_join_1 AS t1 FINAL INNER JOIN test_table_join_2 AS t2 FINAL ON t1.id = t2.id +ORDER BY t1_id; + +DROP TABLE test_table_join_1; +DROP TABLE test_table_join_2; diff --git a/tests/queries/0_stateless/02382_analyzer_matcher_join_using.reference b/tests/queries/0_stateless/02382_analyzer_matcher_join_using.reference new file mode 100644 index 00000000000..f2199aad4c8 --- /dev/null +++ b/tests/queries/0_stateless/02382_analyzer_matcher_join_using.reference @@ -0,0 +1,47 @@ +-- { echoOn } + +SELECT * FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 USING (id) ORDER BY id, t1.value; +0 Join_1_Value_0 Join_2_Value_0 +1 Join_1_Value_1 Join_2_Value_1 +SELECT * FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 USING (id, id, id) ORDER BY id, t1.value; -- { serverError 36 } +SELECT '--'; +-- +SELECT * FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 USING (id) ORDER BY id, t1.value; +0 Join_1_Value_0 Join_2_Value_0 +1 Join_1_Value_1 Join_2_Value_1 +2 Join_1_Value_2 +SELECT '--'; +-- +SELECT * FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 USING (id) ORDER BY id, t1.value; +0 Join_1_Value_0 Join_2_Value_0 +1 Join_1_Value_1 Join_2_Value_1 +3 Join_2_Value_3 +SELECT '--'; +-- +SELECT * FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 USING (id) ORDER BY id, t1.value; +0 Join_2_Value_3 +0 Join_1_Value_0 Join_2_Value_0 +1 Join_1_Value_1 Join_2_Value_1 +2 Join_1_Value_2 +SELECT '--'; +-- +SELECT * FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 USING (id) INNER JOIN test_table_join_3 AS t3 USING (id) ORDER BY id, t1.value; +0 Join_1_Value_0 Join_2_Value_0 Join_3_Value_0 +1 Join_1_Value_1 Join_2_Value_1 Join_3_Value_1 +SELECT '--'; +-- +SELECT * FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 USING (id) LEFT JOIN test_table_join_3 AS t3 USING (id) ORDER BY id, t1.value; +0 Join_1_Value_0 Join_2_Value_0 Join_3_Value_0 +1 Join_1_Value_1 Join_2_Value_1 Join_3_Value_1 +SELECT '--'; +-- +SELECT * FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 USING (id) RIGHT JOIN test_table_join_3 AS t3 USING (id) ORDER BY id, t1.value; +0 Join_1_Value_0 Join_2_Value_0 Join_3_Value_0 +1 Join_1_Value_1 Join_2_Value_1 Join_3_Value_1 +4 Join_3_Value_4 +SELECT '--'; +-- +SELECT * FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 USING (id) FULL JOIN test_table_join_3 AS t3 USING (id) ORDER BY id, t1.value; +0 Join_3_Value_4 +0 Join_1_Value_0 Join_2_Value_0 Join_3_Value_0 +1 Join_1_Value_1 Join_2_Value_1 Join_3_Value_1 diff --git a/tests/queries/0_stateless/02382_analyzer_matcher_join_using.sql b/tests/queries/0_stateless/02382_analyzer_matcher_join_using.sql new file mode 100644 index 00000000000..25d493dc422 --- /dev/null +++ b/tests/queries/0_stateless/02382_analyzer_matcher_join_using.sql @@ -0,0 +1,74 @@ +SET allow_experimental_analyzer = 1; + +DROP TABLE IF EXISTS test_table_join_1; +CREATE TABLE test_table_join_1 +( + id UInt8, + value String +) ENGINE = TinyLog; + +DROP TABLE IF EXISTS test_table_join_2; +CREATE TABLE test_table_join_2 +( + id UInt16, + value String +) ENGINE = TinyLog; + +DROP TABLE IF EXISTS test_table_join_3; +CREATE TABLE test_table_join_3 +( + id UInt64, + value String +) ENGINE = TinyLog; + +INSERT INTO test_table_join_1 VALUES (0, 'Join_1_Value_0'); +INSERT INTO test_table_join_1 VALUES (1, 'Join_1_Value_1'); +INSERT INTO test_table_join_1 VALUES (2, 'Join_1_Value_2'); + +INSERT INTO test_table_join_2 VALUES (0, 'Join_2_Value_0'); +INSERT INTO test_table_join_2 VALUES (1, 'Join_2_Value_1'); +INSERT INTO test_table_join_2 VALUES (3, 'Join_2_Value_3'); + +INSERT INTO test_table_join_3 VALUES (0, 'Join_3_Value_0'); +INSERT INTO test_table_join_3 VALUES (1, 'Join_3_Value_1'); +INSERT INTO test_table_join_3 VALUES (4, 'Join_3_Value_4'); + +-- { echoOn } + +SELECT * FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 USING (id) ORDER BY id, t1.value; + +SELECT * FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 USING (id, id, id) ORDER BY id, t1.value; -- { serverError 36 } + +SELECT '--'; + +SELECT * FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 USING (id) ORDER BY id, t1.value; + +SELECT '--'; + +SELECT * FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 USING (id) ORDER BY id, t1.value; + +SELECT '--'; + +SELECT * FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 USING (id) ORDER BY id, t1.value; + +SELECT '--'; + +SELECT * FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 USING (id) INNER JOIN test_table_join_3 AS t3 USING (id) ORDER BY id, t1.value; + +SELECT '--'; + +SELECT * FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 USING (id) LEFT JOIN test_table_join_3 AS t3 USING (id) ORDER BY id, t1.value; + +SELECT '--'; + +SELECT * FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 USING (id) RIGHT JOIN test_table_join_3 AS t3 USING (id) ORDER BY id, t1.value; + +SELECT '--'; + +SELECT * FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 USING (id) FULL JOIN test_table_join_3 AS t3 USING (id) ORDER BY id, t1.value; + +-- { echoOff } + +DROP TABLE test_table_join_1; +DROP TABLE test_table_join_2; +DROP TABLE test_table_join_3; diff --git a/tests/queries/0_stateless/02383_analyzer_merge_tree_self_join.reference b/tests/queries/0_stateless/02383_analyzer_merge_tree_self_join.reference new file mode 100644 index 00000000000..e48ae282f5d --- /dev/null +++ b/tests/queries/0_stateless/02383_analyzer_merge_tree_self_join.reference @@ -0,0 +1,24 @@ +-- { echoOn } + +SELECT * FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id ORDER BY t1.id, t1.value; +0 Join_1_Value_0 0 Join_2_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 +SELECT '--'; +-- +SELECT * FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON t1.id = t2.id ORDER BY t1.id, t1.value; +0 Join_1_Value_0 0 Join_2_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 +2 Join_1_Value_2 0 +SELECT '--'; +-- +SELECT * FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON t1.id = t2.id ORDER BY t1.id, t1.value; +0 3 Join_2_Value_3 +0 Join_1_Value_0 0 Join_2_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 +SELECT '--'; +-- +SELECT * FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON t1.id = t2.id ORDER BY t1.id, t1.value; +0 3 Join_2_Value_3 +0 Join_1_Value_0 0 Join_2_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 +2 Join_1_Value_2 0 diff --git a/tests/queries/0_stateless/02383_analyzer_merge_tree_self_join.sql b/tests/queries/0_stateless/02383_analyzer_merge_tree_self_join.sql new file mode 100644 index 00000000000..c22a0f4244b --- /dev/null +++ b/tests/queries/0_stateless/02383_analyzer_merge_tree_self_join.sql @@ -0,0 +1,44 @@ +SET allow_experimental_analyzer = 1; + +DROP TABLE IF EXISTS test_table_join_1; +CREATE TABLE test_table_join_1 +( + id UInt64, + value String +) ENGINE = MergeTree ORDER BY id; + +DROP TABLE IF EXISTS test_table_join_2; +CREATE TABLE test_table_join_2 +( + id UInt64, + value String +) ENGINE = MergeTree ORDER BY id; + +INSERT INTO test_table_join_1 VALUES (0, 'Join_1_Value_0'); +INSERT INTO test_table_join_1 VALUES (1, 'Join_1_Value_1'); +INSERT INTO test_table_join_1 VALUES (2, 'Join_1_Value_2'); + +INSERT INTO test_table_join_2 VALUES (0, 'Join_2_Value_0'); +INSERT INTO test_table_join_2 VALUES (1, 'Join_2_Value_1'); +INSERT INTO test_table_join_2 VALUES (3, 'Join_2_Value_3'); + +-- { echoOn } + +SELECT * FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id ORDER BY t1.id, t1.value; + +SELECT '--'; + +SELECT * FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON t1.id = t2.id ORDER BY t1.id, t1.value; + +SELECT '--'; + +SELECT * FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON t1.id = t2.id ORDER BY t1.id, t1.value; + +SELECT '--'; + +SELECT * FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON t1.id = t2.id ORDER BY t1.id, t1.value; + +-- { echoOff } + +DROP TABLE test_table_join_1; +DROP TABLE test_table_join_2; diff --git a/tests/queries/0_stateless/02384_analyzer_dict_get_join_get.reference b/tests/queries/0_stateless/02384_analyzer_dict_get_join_get.reference new file mode 100644 index 00000000000..5f783010a1c --- /dev/null +++ b/tests/queries/0_stateless/02384_analyzer_dict_get_join_get.reference @@ -0,0 +1,10 @@ +Dictionary +0 Value +Value +Value +Value +JOIN +0 Value +Value +Value +Value diff --git a/tests/queries/0_stateless/02384_analyzer_dict_get_join_get.sql b/tests/queries/0_stateless/02384_analyzer_dict_get_join_get.sql new file mode 100644 index 00000000000..ff6e417d756 --- /dev/null +++ b/tests/queries/0_stateless/02384_analyzer_dict_get_join_get.sql @@ -0,0 +1,59 @@ +SET allow_experimental_analyzer = 1; + +DROP TABLE IF EXISTS test_table; +CREATE TABLE test_table +( + id UInt64, + value String +) ENGINE=TinyLog; + +INSERT INTO test_table VALUES (0, 'Value'); + +DROP DICTIONARY IF EXISTS test_dictionary; +CREATE DICTIONARY test_dictionary +( + id UInt64, + value String +) +PRIMARY KEY id +LAYOUT(FLAT()) +SOURCE(CLICKHOUSE(TABLE 'test_table')) +LIFETIME(0); + +SELECT 'Dictionary'; + +SELECT * FROM test_dictionary; + +SELECT dictGet('test_dictionary', 'value', toUInt64(0)); + +SELECT dictGet(test_dictionary, 'value', toUInt64(0)); + +WITH 'test_dictionary' AS dictionary SELECT dictGet(dictionary, 'value', toUInt64(0)); + +WITH 'invalid_dictionary' AS dictionary SELECT dictGet(dictionary, 'value', toUInt64(0)); -- { serverError 36 } + +DROP DICTIONARY test_dictionary; +DROP TABLE test_table; + +DROP TABLE IF EXISTS test_table_join; +CREATE TABLE test_table_join +( + id UInt64, + value String +) ENGINE=Join(Any, Left, id); + +INSERT INTO test_table_join VALUES (0, 'Value'); + +SELECT 'JOIN'; + +SELECT * FROM test_table_join; + +SELECT joinGet('test_table_join', 'value', toUInt64(0)); + +SELECT joinGet(test_table_join, 'value', toUInt64(0)); + +WITH 'test_table_join' AS join_table SELECT joinGet(join_table, 'value', toUInt64(0)); + +WITH 'invalid_test_table_join' AS join_table SELECT joinGet(join_table, 'value', toUInt64(0)); -- { serverError 60 } + +DROP TABLE test_table_join; diff --git a/tests/queries/0_stateless/02385_analyzer_aliases_compound_expression.reference b/tests/queries/0_stateless/02385_analyzer_aliases_compound_expression.reference new file mode 100644 index 00000000000..05c5c9872a6 --- /dev/null +++ b/tests/queries/0_stateless/02385_analyzer_aliases_compound_expression.reference @@ -0,0 +1,7 @@ +(1,'Value') 1 Value +-- +2 +-- +1 1 +-- +1 1 diff --git a/tests/queries/0_stateless/02385_analyzer_aliases_compound_expression.sql b/tests/queries/0_stateless/02385_analyzer_aliases_compound_expression.sql new file mode 100644 index 00000000000..1a195bbfffe --- /dev/null +++ b/tests/queries/0_stateless/02385_analyzer_aliases_compound_expression.sql @@ -0,0 +1,21 @@ +SET allow_experimental_analyzer = 1; + +SELECT cast(tuple(1, 'Value'), 'Tuple(first UInt64, second String)') AS value, value.first, value.second; + +SELECT '--'; + +WITH (x -> x + 1) AS lambda SELECT lambda(1); + +WITH (x -> x + 1) AS lambda SELECT lambda.nested(1); -- { serverError 36 } + +SELECT '--'; + +SELECT * FROM (SELECT 1) AS t1, t1 AS t2; + +SELECT '--'; + +SELECT * FROM t1 AS t2, (SELECT 1) AS t1; + +SELECT * FROM (SELECT 1) AS t1, t1.nested AS t2; -- { serverError 36 } + +SELECT * FROM t1.nested AS t2, (SELECT 1) AS t1; -- { serverError 36 } diff --git a/tests/queries/0_stateless/02386_analyzer_in_function_nested_subqueries.reference b/tests/queries/0_stateless/02386_analyzer_in_function_nested_subqueries.reference new file mode 100644 index 00000000000..dec7d2fabd2 --- /dev/null +++ b/tests/queries/0_stateless/02386_analyzer_in_function_nested_subqueries.reference @@ -0,0 +1 @@ +\N diff --git a/tests/queries/0_stateless/02386_analyzer_in_function_nested_subqueries.sql b/tests/queries/0_stateless/02386_analyzer_in_function_nested_subqueries.sql new file mode 100644 index 00000000000..c8ca3ff21d4 --- /dev/null +++ b/tests/queries/0_stateless/02386_analyzer_in_function_nested_subqueries.sql @@ -0,0 +1,3 @@ +SET allow_experimental_analyzer = 1; + +SELECT (NULL IN (SELECT 9223372036854775806 IN (SELECT 65536), inf, NULL IN (NULL))) IN (SELECT NULL IN (NULL)); diff --git a/tests/queries/0_stateless/02387_analyzer_cte.reference b/tests/queries/0_stateless/02387_analyzer_cte.reference new file mode 100644 index 00000000000..1ad3aee198b --- /dev/null +++ b/tests/queries/0_stateless/02387_analyzer_cte.reference @@ -0,0 +1,7 @@ +1 +-- +0 Value +-- +1 +-- +0 Value diff --git a/tests/queries/0_stateless/02387_analyzer_cte.sql b/tests/queries/0_stateless/02387_analyzer_cte.sql new file mode 100644 index 00000000000..1f10ac10438 --- /dev/null +++ b/tests/queries/0_stateless/02387_analyzer_cte.sql @@ -0,0 +1,26 @@ +SET allow_experimental_analyzer = 1; + +DROP TABLE IF EXISTS test_table; +CREATE TABLE test_table +( + id UInt64, + value String +) ENGINE=TinyLog; + +INSERT INTO test_table VALUES (0, 'Value'); + +WITH cte_subquery AS (SELECT 1) SELECT * FROM cte_subquery; + +SELECT '--'; + +WITH cte_subquery AS (SELECT * FROM test_table) SELECT * FROM cte_subquery; + +SELECT '--'; + +WITH cte_subquery AS (SELECT 1 UNION DISTINCT SELECT 1) SELECT * FROM cte_subquery; + +SELECT '--'; + +WITH cte_subquery AS (SELECT * FROM test_table UNION DISTINCT SELECT * FROM test_table) SELECT * FROM cte_subquery; + +DROP TABLE test_table; diff --git a/tests/queries/0_stateless/02388_analyzer_recursive_lambda.reference b/tests/queries/0_stateless/02388_analyzer_recursive_lambda.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02388_analyzer_recursive_lambda.sql b/tests/queries/0_stateless/02388_analyzer_recursive_lambda.sql new file mode 100644 index 00000000000..6fc8ff2aae0 --- /dev/null +++ b/tests/queries/0_stateless/02388_analyzer_recursive_lambda.sql @@ -0,0 +1,5 @@ +SET allow_experimental_analyzer = 1; + +WITH x -> plus(lambda(1), x) AS lambda SELECT lambda(1048576); -- { serverError 1 }; + +WITH lambda(lambda(plus(x, x, -1)), tuple(x), x + 2147483646) AS lambda, x -> plus(lambda(1), x, 2) AS lambda SELECT 1048576, lambda(1048576); -- { serverError 1 }; diff --git a/tests/queries/0_stateless/02389_analyzer_nested_lambda.reference b/tests/queries/0_stateless/02389_analyzer_nested_lambda.reference new file mode 100644 index 00000000000..935c53358c0 --- /dev/null +++ b/tests/queries/0_stateless/02389_analyzer_nested_lambda.reference @@ -0,0 +1,121 @@ +-- { echoOn } + +SELECT arrayMap(x -> x + arrayMap(x -> x + 1, [1])[1], [1,2,3]); +[3,4,5] +SELECT '--'; +-- +SELECT arrayMap(x -> x + arrayMap(x -> 5, [1])[1], [1,2,3]); +[6,7,8] +SELECT '--'; +-- +SELECT 5 AS constant, arrayMap(x -> x + arrayMap(x -> constant, [1])[1], [1,2,3]); +5 [6,7,8] +SELECT '--'; +-- +SELECT arrayMap(x -> x + arrayMap(x -> x, [1])[1], [1,2,3]); +[2,3,4] +SELECT '--'; +-- +SELECT arrayMap(x -> x + arrayMap(y -> x + y, [1])[1], [1,2,3]); +[3,5,7] +SELECT '--'; +-- +SELECT arrayMap(x -> x + arrayMap(x -> (SELECT 5), [1])[1], [1,2,3]); +[6,7,8] +SELECT '--'; +-- +SELECT (SELECT 5) AS subquery, arrayMap(x -> x + arrayMap(x -> subquery, [1])[1], [1,2,3]); +5 [6,7,8] +SELECT '--'; +-- +SELECT arrayMap(x -> x + arrayMap(x -> (SELECT 5 UNION DISTINCT SELECT 5), [1])[1], [1,2,3]); +[6,7,8] +SELECT '--'; +-- +SELECT (SELECT 5 UNION DISTINCT SELECT 5) AS subquery, arrayMap(x -> x + arrayMap(x -> subquery, [1])[1], [1,2,3]); +5 [6,7,8] +SELECT '--'; +-- +WITH x -> toString(x) AS lambda SELECT arrayMap(x -> lambda(x), [1,2,3]); +['1','2','3'] +SELECT '--'; +-- +WITH x -> toString(x) AS lambda SELECT arrayMap(x -> arrayMap(y -> concat(lambda(x), '_', lambda(y)), [1,2,3]), [1,2,3]); +[['1_1','1_2','1_3'],['2_1','2_2','2_3'],['3_1','3_2','3_3']] +SELECT '--'; +-- +DROP TABLE IF EXISTS test_table; +CREATE TABLE test_table +( + id UInt64, + value String +) ENGINE=TinyLog; +INSERT INTO test_table VALUES (0, 'Value'); +SELECT arrayMap(x -> x + arrayMap(x -> id, [1])[1], [1,2,3]) FROM test_table; +[1,2,3] +SELECT '--'; +-- +SELECT arrayMap(x -> x + arrayMap(x -> x + id, [1])[1], [1,2,3]) FROM test_table; +[2,3,4] +SELECT '--'; +-- +SELECT arrayMap(x -> x + arrayMap(y -> x + y + id, [1])[1], [1,2,3]) FROM test_table; +[3,5,7] +SELECT '--'; +-- +SELECT id AS id_alias, arrayMap(x -> x + arrayMap(y -> x + y + id_alias, [1])[1], [1,2,3]) FROM test_table; +0 [3,5,7] +SELECT '--'; +-- +SELECT arrayMap(x -> x + arrayMap(x -> 5, [1])[1], [1,2,3]) FROM test_table; +[6,7,8] +SELECT '--'; +-- +SELECT 5 AS constant, arrayMap(x -> x + arrayMap(x -> constant, [1])[1], [1,2,3]) FROM test_table; +5 [6,7,8] +SELECT '--'; +-- +SELECT 5 AS constant, arrayMap(x -> x + arrayMap(x -> x + constant, [1])[1], [1,2,3]) FROM test_table; +5 [7,8,9] +SELECT '--'; +-- +SELECT 5 AS constant, arrayMap(x -> x + arrayMap(x -> x + id + constant, [1])[1], [1,2,3]) FROM test_table; +5 [7,8,9] +SELECT '--'; +-- +SELECT 5 AS constant, arrayMap(x -> x + arrayMap(y -> x + y + id + constant, [1])[1], [1,2,3]) FROM test_table; +5 [8,10,12] +SELECT '--'; +-- +SELECT arrayMap(x -> x + arrayMap(x -> id + (SELECT id FROM test_table), [1])[1], [1,2,3]) FROM test_table; +[1,2,3] +SELECT '--'; +-- +SELECT arrayMap(x -> id + arrayMap(x -> id + (SELECT id FROM test_table), [1])[1], [1,2,3]) FROM test_table; +[0,0,0] +SELECT '--'; +-- +SELECT arrayMap(x -> id + arrayMap(x -> id + (SELECT id FROM test_table UNION DISTINCT SELECT id FROM test_table), [1])[1], [1,2,3]) FROM test_table; +[0,0,0] +SELECT '--'; +-- +WITH x -> toString(id) AS lambda SELECT arrayMap(x -> lambda(x), [1,2,3]) FROM test_table; +['0','0','0'] +SELECT '--'; +-- +WITH x -> toString(id) AS lambda SELECT arrayMap(x -> arrayMap(y -> lambda(y), [1,2,3]), [1,2,3]) FROM test_table; +[['0','0','0'],['0','0','0'],['0','0','0']] +SELECT '--'; +-- +WITH x -> toString(id) AS lambda SELECT arrayMap(x -> arrayMap(y -> concat(lambda(x), '_', lambda(y)), [1,2,3]), [1,2,3]) FROM test_table; +[['0_0','0_0','0_0'],['0_0','0_0','0_0'],['0_0','0_0','0_0']] +SELECT '--'; +-- +SELECT arrayMap(x -> concat(concat(concat(concat(concat(toString(id), '___\0_______\0____'), toString(id), concat(concat(toString(id), ''), toString(id)), toString(id)), + arrayMap(x -> concat(concat(concat(concat(toString(id), ''), toString(id)), toString(id), '___\0_______\0____'), toString(id)) AS lambda, [NULL, inf, 1, 1]), + concat(toString(id), NULL), toString(id)), toString(id))) AS lambda, [NULL, NULL, 2147483647]) +FROM test_table WHERE concat(concat(concat(toString(id), '___\0_______\0____'), toString(id)), concat(toString(id), NULL), toString(id)); +SELECT '--'; +-- +SELECT arrayMap(x -> concat(toString(id), arrayMap(x -> toString(1), [NULL])), [NULL]) FROM test_table; -- { serverError 44 }; +DROP TABLE test_table; diff --git a/tests/queries/0_stateless/02389_analyzer_nested_lambda.sql b/tests/queries/0_stateless/02389_analyzer_nested_lambda.sql new file mode 100644 index 00000000000..8f8b5537da9 --- /dev/null +++ b/tests/queries/0_stateless/02389_analyzer_nested_lambda.sql @@ -0,0 +1,129 @@ +SET allow_experimental_analyzer = 1; + +-- { echoOn } + +SELECT arrayMap(x -> x + arrayMap(x -> x + 1, [1])[1], [1,2,3]); + +SELECT '--'; + +SELECT arrayMap(x -> x + arrayMap(x -> 5, [1])[1], [1,2,3]); + +SELECT '--'; + +SELECT 5 AS constant, arrayMap(x -> x + arrayMap(x -> constant, [1])[1], [1,2,3]); + +SELECT '--'; + +SELECT arrayMap(x -> x + arrayMap(x -> x, [1])[1], [1,2,3]); + +SELECT '--'; + +SELECT arrayMap(x -> x + arrayMap(y -> x + y, [1])[1], [1,2,3]); + +SELECT '--'; + +SELECT arrayMap(x -> x + arrayMap(x -> (SELECT 5), [1])[1], [1,2,3]); + +SELECT '--'; + +SELECT (SELECT 5) AS subquery, arrayMap(x -> x + arrayMap(x -> subquery, [1])[1], [1,2,3]); + +SELECT '--'; + +SELECT arrayMap(x -> x + arrayMap(x -> (SELECT 5 UNION DISTINCT SELECT 5), [1])[1], [1,2,3]); + +SELECT '--'; + +SELECT (SELECT 5 UNION DISTINCT SELECT 5) AS subquery, arrayMap(x -> x + arrayMap(x -> subquery, [1])[1], [1,2,3]); + +SELECT '--'; + +WITH x -> toString(x) AS lambda SELECT arrayMap(x -> lambda(x), [1,2,3]); + +SELECT '--'; + +WITH x -> toString(x) AS lambda SELECT arrayMap(x -> arrayMap(y -> concat(lambda(x), '_', lambda(y)), [1,2,3]), [1,2,3]); + +SELECT '--'; + +DROP TABLE IF EXISTS test_table; +CREATE TABLE test_table +( + id UInt64, + value String +) ENGINE=TinyLog; + +INSERT INTO test_table VALUES (0, 'Value'); + +SELECT arrayMap(x -> x + arrayMap(x -> id, [1])[1], [1,2,3]) FROM test_table; + +SELECT '--'; + +SELECT arrayMap(x -> x + arrayMap(x -> x + id, [1])[1], [1,2,3]) FROM test_table; + +SELECT '--'; + +SELECT arrayMap(x -> x + arrayMap(y -> x + y + id, [1])[1], [1,2,3]) FROM test_table; + +SELECT '--'; + +SELECT id AS id_alias, arrayMap(x -> x + arrayMap(y -> x + y + id_alias, [1])[1], [1,2,3]) FROM test_table; + +SELECT '--'; + +SELECT arrayMap(x -> x + arrayMap(x -> 5, [1])[1], [1,2,3]) FROM test_table; + +SELECT '--'; + +SELECT 5 AS constant, arrayMap(x -> x + arrayMap(x -> constant, [1])[1], [1,2,3]) FROM test_table; + +SELECT '--'; + +SELECT 5 AS constant, arrayMap(x -> x + arrayMap(x -> x + constant, [1])[1], [1,2,3]) FROM test_table; + +SELECT '--'; + +SELECT 5 AS constant, arrayMap(x -> x + arrayMap(x -> x + id + constant, [1])[1], [1,2,3]) FROM test_table; + +SELECT '--'; + +SELECT 5 AS constant, arrayMap(x -> x + arrayMap(y -> x + y + id + constant, [1])[1], [1,2,3]) FROM test_table; + +SELECT '--'; + +SELECT arrayMap(x -> x + arrayMap(x -> id + (SELECT id FROM test_table), [1])[1], [1,2,3]) FROM test_table; + +SELECT '--'; + +SELECT arrayMap(x -> id + arrayMap(x -> id + (SELECT id FROM test_table), [1])[1], [1,2,3]) FROM test_table; + +SELECT '--'; + +SELECT arrayMap(x -> id + arrayMap(x -> id + (SELECT id FROM test_table UNION DISTINCT SELECT id FROM test_table), [1])[1], [1,2,3]) FROM test_table; + +SELECT '--'; + +WITH x -> toString(id) AS lambda SELECT arrayMap(x -> lambda(x), [1,2,3]) FROM test_table; + +SELECT '--'; + +WITH x -> toString(id) AS lambda SELECT arrayMap(x -> arrayMap(y -> lambda(y), [1,2,3]), [1,2,3]) FROM test_table; + +SELECT '--'; + +WITH x -> toString(id) AS lambda SELECT arrayMap(x -> arrayMap(y -> concat(lambda(x), '_', lambda(y)), [1,2,3]), [1,2,3]) FROM test_table; + +SELECT '--'; + +SELECT arrayMap(x -> concat(concat(concat(concat(concat(toString(id), '___\0_______\0____'), toString(id), concat(concat(toString(id), ''), toString(id)), toString(id)), + arrayMap(x -> concat(concat(concat(concat(toString(id), ''), toString(id)), toString(id), '___\0_______\0____'), toString(id)) AS lambda, [NULL, inf, 1, 1]), + concat(toString(id), NULL), toString(id)), toString(id))) AS lambda, [NULL, NULL, 2147483647]) +FROM test_table WHERE concat(concat(concat(toString(id), '___\0_______\0____'), toString(id)), concat(toString(id), NULL), toString(id)); + +SELECT '--'; + +SELECT arrayMap(x -> concat(toString(id), arrayMap(x -> toString(1), [NULL])), [NULL]) FROM test_table; -- { serverError 44 }; + +DROP TABLE test_table; + +-- { echoOff } diff --git a/tests/queries/0_stateless/02403_date_time_narrowing.reference b/tests/queries/0_stateless/02403_date_time_narrowing.reference deleted file mode 100644 index 7d6e91c61b8..00000000000 --- a/tests/queries/0_stateless/02403_date_time_narrowing.reference +++ /dev/null @@ -1,20 +0,0 @@ -1970-01-01 2149-06-06 1970-01-01 2149-06-06 1900-01-01 1970-01-02 1970-01-01 00:00:00 2106-02-07 06:28:15 -1970-01-01 2149-06-06 -1970-01-01 2149-06-06 -1970-01-01 00:00:00 2106-02-07 06:28:15 -1970-01-01 00:00:00 2106-02-07 06:28:15 -2106-02-07 06:28:15 -toStartOfDay -2106-02-07 00:00:00 1970-01-01 00:00:00 2106-02-07 00:00:00 1970-01-01 00:00:00 2106-02-07 00:00:00 -toStartOfWeek -1970-01-01 1970-01-01 1970-01-01 1970-01-01 1970-01-01 2149-06-01 1970-01-01 2149-06-02 -toMonday -1970-01-01 1970-01-01 2149-06-02 1970-01-01 2149-06-02 -toStartOfMonth -1970-01-01 2149-06-01 1970-01-01 2149-06-01 -toLastDayOfMonth -2149-05-31 1970-01-01 2149-05-31 1970-01-01 2149-05-31 -toStartOfQuarter -1970-01-01 2149-04-01 1970-01-01 2149-04-01 -toStartOfYear -1970-01-01 2149-01-01 1970-01-01 2149-01-01 diff --git a/tests/queries/0_stateless/02403_date_time_narrowing.sql b/tests/queries/0_stateless/02403_date_time_narrowing.sql deleted file mode 100644 index 07cbba6f31c..00000000000 --- a/tests/queries/0_stateless/02403_date_time_narrowing.sql +++ /dev/null @@ -1,74 +0,0 @@ --- check conversion of numbers to date/time -- -SELECT toDate(toInt32(toDate32('1930-01-01', 'UTC')), 'UTC'), - toDate(toInt32(toDate32('2151-01-01', 'UTC')), 'UTC'), - toDate(toInt64(toDateTime64('1930-01-01 12:12:12.123', 3, 'UTC')), 'UTC'), - toDate(toInt64(toDateTime64('2151-01-01 12:12:12.123', 3, 'UTC')), 'UTC'), - toDate32(toInt32(toDate32('1900-01-01', 'UTC')) - 1, 'UTC'), - toDate32(toInt32(toDate32('2299-12-31', 'UTC')) + 1, 'UTC'), - toDateTime(toInt64(toDateTime64('1930-01-01 12:12:12.123', 3, 'UTC')), 'UTC'), - toDateTime(toInt64(toDateTime64('2151-01-01 12:12:12.123', 3, 'UTC')), 'UTC'); - --- check conversion of extended range type to normal range type -- -SELECT toDate(toDate32('1930-01-01', 'UTC'), 'UTC'), - toDate(toDate32('2151-01-01', 'UTC'), 'UTC'); - -SELECT toDate(toDateTime64('1930-01-01 12:12:12.12', 3, 'UTC'), 'UTC'), - toDate(toDateTime64('2151-01-01 12:12:12.12', 3, 'UTC'), 'UTC'); - -SELECT toDateTime(toDateTime64('1930-01-01 12:12:12.12', 3, 'UTC'), 'UTC'), - toDateTime(toDateTime64('2151-01-01 12:12:12.12', 3, 'UTC'), 'UTC'); - -SELECT toDateTime(toDate32('1930-01-01', 'UTC'), 'UTC'), - toDateTime(toDate32('2151-01-01', 'UTC'), 'UTC'); - -SELECT toDateTime(toDate('2141-01-01', 'UTC'), 'UTC'); - --- test DateTimeTransforms -- -SELECT 'toStartOfDay'; -SELECT toStartOfDay(toDate('2141-01-01', 'UTC'), 'UTC'), - toStartOfDay(toDate32('1930-01-01', 'UTC'), 'UTC'), - toStartOfDay(toDate32('2141-01-01', 'UTC'), 'UTC'), - toStartOfDay(toDateTime64('1930-01-01 12:12:12.123', 3, 'UTC'), 'UTC'), - toStartOfDay(toDateTime64('2141-01-01 12:12:12.123', 3, 'UTC'), 'UTC'); - -SELECT 'toStartOfWeek'; -SELECT toStartOfWeek(toDate('1970-01-01', 'UTC')), - toStartOfWeek(toDate32('1970-01-01', 'UTC')), - toStartOfWeek(toDateTime('1970-01-01 10:10:10', 'UTC'), 0, 'UTC'), - toStartOfWeek(toDateTime64('1970-01-01 10:10:10.123', 3, 'UTC'), 1, 'UTC'), - toStartOfWeek(toDate32('1930-01-01', 'UTC')), - toStartOfWeek(toDate32('2151-01-01', 'UTC')), - toStartOfWeek(toDateTime64('1930-01-01 12:12:12.123', 3, 'UTC'), 2, 'UTC'), - toStartOfWeek(toDateTime64('2151-01-01 12:12:12.123', 3, 'UTC'), 3, 'UTC'); - -SELECT 'toMonday'; -SELECT toMonday(toDate('1970-01-02', 'UTC')), - toMonday(toDate32('1930-01-01', 'UTC')), - toMonday(toDate32('2151-01-01', 'UTC')), - toMonday(toDateTime64('1930-01-01 12:12:12.123', 3, 'UTC'), 'UTC'), - toMonday(toDateTime64('2151-01-01 12:12:12.123', 3, 'UTC'), 'UTC'); - -SELECT 'toStartOfMonth'; -SELECT toStartOfMonth(toDate32('1930-01-01', 'UTC')), - toStartOfMonth(toDate32('2151-01-01', 'UTC')), - toStartOfMonth(toDateTime64('1930-01-01 12:12:12.123', 3, 'UTC'), 'UTC'), - toStartOfMonth(toDateTime64('2151-01-01 12:12:12.123', 3, 'UTC'), 'UTC'); - -SELECT 'toLastDayOfMonth'; -SELECT toLastDayOfMonth(toDate('2149-06-03', 'UTC')), - toLastDayOfMonth(toDate32('1930-01-01', 'UTC')), - toLastDayOfMonth(toDate32('2151-01-01', 'UTC')), - toLastDayOfMonth(toDateTime64('1930-01-01 12:12:12.123', 3, 'UTC'), 'UTC'), - toLastDayOfMonth(toDateTime64('2151-01-01 12:12:12.123', 3, 'UTC'), 'UTC'); - -SELECT 'toStartOfQuarter'; -SELECT toStartOfQuarter(toDate32('1930-01-01', 'UTC')), - toStartOfQuarter(toDate32('2151-01-01', 'UTC')), - toStartOfQuarter(toDateTime64('1930-01-01 12:12:12.123', 3, 'UTC'), 'UTC'), - toStartOfQuarter(toDateTime64('2151-01-01 12:12:12.123', 3, 'UTC'), 'UTC'); - -SELECT 'toStartOfYear'; -SELECT toStartOfYear(toDate32('1930-01-01', 'UTC')), - toStartOfYear(toDate32('2151-01-01', 'UTC')), - toStartOfYear(toDateTime64('1930-01-01 12:12:12.123', 3, 'UTC'), 'UTC'), - toStartOfYear(toDateTime64('2151-01-01 12:12:12.123', 3, 'UTC'), 'UTC'); diff --git a/tests/queries/0_stateless/02403_enable_extended_results_for_datetime_functions.reference b/tests/queries/0_stateless/02403_enable_extended_results_for_datetime_functions.reference index 5773810bf64..025191c234a 100644 --- a/tests/queries/0_stateless/02403_enable_extended_results_for_datetime_functions.reference +++ b/tests/queries/0_stateless/02403_enable_extended_results_for_datetime_functions.reference @@ -42,39 +42,39 @@ timeSlot;toDateTime64;true 1920-02-02 10:00:00.000 type;timeSlot;toDateTime64;true DateTime64(3, \'UTC\') toStartOfDay;toDate32;true 1920-02-02 00:00:00.000 type;toStartOfDay;toDate32;true DateTime64(3, \'UTC\') -toStartOfYear;toDate32;false 1970-01-01 +toStartOfYear;toDate32;false 2099-06-06 type;toStartOfYear;toDate32;false Date -toStartOfYear;toDateTime64;false 1970-01-01 +toStartOfYear;toDateTime64;false 2099-06-06 type;toStartOfYear;toDateTime64;false Date toStartOfISOYear;toDate32;false 1970-01-01 type;toStartOfISOYear;toDate32;false Date toStartOfISOYear;toDateTime64;false 1970-01-01 type;toStartOfISOYear;toDateTime64;false Date -toStartOfQuarter;toDate32;false 1970-01-01 +toStartOfQuarter;toDate32;false 2099-06-06 type;toStartOfQuarter;toDate32;false Date -toStartOfQuarter;toDateTime64;false 1970-01-01 +toStartOfQuarter;toDateTime64;false 2099-06-06 type;toStartOfQuarter;toDateTime64;false Date -toStartOfMonth;toDate32;false 1970-01-01 +toStartOfMonth;toDate32;false 2099-07-07 type;toStartOfMonth;toDate32;false Date -toStartOfMonth;toDateTime64;false 1970-01-01 +toStartOfMonth;toDateTime64;false 2099-07-07 type;toStartOfMonth;toDateTime64;false Date -toStartOfWeek;toDate32;false 1970-01-01 +toStartOfWeek;toDate32;false 2099-07-07 type;toStartOfWeek;toDate32;false Date -toStartOfWeek;toDateTime64;false 1970-01-01 +toStartOfWeek;toDateTime64;false 2099-07-07 type;toStartOfWeek;toDateTime64;false Date -toMonday;toDate32;false 1970-01-01 +toMonday;toDate32;false 2099-07-08 type;toMonday;toDate32;false Date -toMonday;toDateTime64;false 1970-01-01 +toMonday;toDateTime64;false 2099-07-08 type;toMonday;toDateTime64;false Date -toLastDayOfMonth;toDate32;false 1970-01-01 +toLastDayOfMonth;toDate32;false 2099-08-04 type;toLastDayOfMonth;toDate32;false Date -toLastDayOfMonth;toDateTime64;false 1970-01-01 +toLastDayOfMonth;toDateTime64;false 2099-08-04 type;toLastDayOfMonth;toDateTime64;false Date -toStartOfDay;toDateTime64;false 1970-01-01 00:00:00 +toStartOfDay;toDateTime64;false 2056-03-09 06:28:16 type;toStartOfDay;toDateTime64;false DateTime(\'UTC\') -toStartOfHour;toDateTime64;false 1970-01-01 00:00:00 +toStartOfHour;toDateTime64;false 2056-03-09 16:28:16 type;toStartOfHour;toDateTime64;false DateTime(\'UTC\') -toStartOfMinute;toDateTime64;false 1970-01-01 00:00:00 +toStartOfMinute;toDateTime64;false 2056-03-09 16:51:16 type;toStartOfMinute;toDateTime64;false DateTime(\'UTC\') toStartOfFiveMinutes;toDateTime64;false 2056-03-09 16:48:16 type;toStartOfFiveMinutes;toDateTime64;false DateTime(\'UTC\') @@ -84,5 +84,5 @@ toStartOfFifteenMinutes;toDateTime64;false 2056-03-09 16:43:16 type;toStartOfFifteenMinutes;toDateTime64;false DateTime(\'UTC\') timeSlot;toDateTime64;false 2056-03-09 16:58:16 type;timeSlot;toDateTime64;false DateTime(\'UTC\') -toStartOfDay;toDate32;false 1970-01-01 00:00:00 +toStartOfDay;toDate32;false 2056-03-09 06:28:16 type;toStartOfDay;toDate32;false DateTime(\'UTC\') diff --git a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference index c7ac00ee18f..040a8c8d317 100644 --- a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference +++ b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference @@ -219,10 +219,6 @@ cutFragment cutIPv6 cutQueryString cutQueryStringAndFragment -cutToFirstSignificantSubdomain -cutToFirstSignificantSubdomainCustom -cutToFirstSignificantSubdomainCustomWithWWW -cutToFirstSignificantSubdomainWithWWW cutURLParameter cutWWW dateDiff @@ -280,8 +276,6 @@ dictGetUUIDOrDefault dictHas dictIsIn divide -domain -domainWithoutWWW dotProduct dumpColumnStructure e @@ -330,8 +324,8 @@ filesystemAvailable filesystemCapacity filesystemFree finalizeAggregation -firstSignificantSubdomain firstSignificantSubdomainCustom +firstSignificantSubdomainCustomRFC flattenTuple floor format @@ -592,7 +586,6 @@ polygonsUnionCartesian polygonsUnionSpherical polygonsWithinCartesian polygonsWithinSpherical -port position positionCaseInsensitive positionCaseInsensitiveUTF8 @@ -897,7 +890,6 @@ toYear toYearWeek today tokens -topLevelDomain transactionID transactionLatestSnapshot transactionOldestSnapshot diff --git a/tests/queries/0_stateless/02428_combinators_with_over_statement.reference b/tests/queries/0_stateless/02428_combinators_with_over_statement.reference new file mode 100644 index 00000000000..55be3f35cb1 --- /dev/null +++ b/tests/queries/0_stateless/02428_combinators_with_over_statement.reference @@ -0,0 +1,50 @@ +{1:'\0wR'} +{1:'\0D@='} +{1:'\07'} +{1:'\0޲'} +{1:'\0"Q'} +{1:'\0V\''} +{1:'\0\0'} +{1:'\0_'} +{1:'\0q4h'} +{1:'\0g7'} +['\0wR'] +['\0D@='] +['\07'] +['\0޲'] +['\0"Q'] +['\0V\''] +['\0\0'] +['\0_'] +['\0q4h'] +['\0g7'] +['\0Z','\0\0'] +['\04n','\0\0'] +['\0ޓ','\0\0'] +['\01','\0\0'] +['\0_/dev/null\r" +expect "Progress: " +expect "█" +send "\3" + +# It is true even if we redirect both stdout and stderr to /dev/null +send "\$CLICKHOUSE_LOCAL --query 'SELECT sum(sleep(1) = 0) FROM numbers(3) SETTINGS max_block_size = 1' >/dev/null 2>&1\r" +expect "Progress: " +expect "█" +send "\3" + +# The option --progress has implicit value of true +send "\$CLICKHOUSE_LOCAL --progress --query 'SELECT sum(sleep(1) = 0) FROM numbers(3) SETTINGS max_block_size = 1' >/dev/null 2>&1\r" +expect "Progress: " +expect "█" +send "\3" + +# But we can set it to false +send "\$CLICKHOUSE_LOCAL --progress false --query 'SELECT sleep(1), \$\$Hello\$\$ FROM numbers(3) SETTINGS max_block_size = 1' 2>/dev/null\r" +expect -exact "0\tHello\r\n" +send "\3" + +# As well as to 0 for the same effect +send "\$CLICKHOUSE_LOCAL --progress 0 --query 'SELECT sleep(1), \$\$Hello\$\$ FROM numbers(3) SETTINGS max_block_size = 1' 2>/dev/null\r" +expect -exact "0\tHello\r\n" +send "\3" + +# If we set it to 1, the progress will be displayed as well +send "\$CLICKHOUSE_LOCAL --progress 1 --query 'SELECT sum(sleep(1) = 0) FROM numbers(3) SETTINGS max_block_size = 1' >/dev/null 2>&1\r" +expect "Progress: " +expect "█" +send "\3" + +send "exit\r" +expect eof diff --git a/tests/queries/0_stateless/02456_progress_tty.reference b/tests/queries/0_stateless/02456_progress_tty.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02457_csv_parse_date_out_of_range.reference b/tests/queries/0_stateless/02457_csv_parse_date_out_of_range.reference new file mode 100644 index 00000000000..544e8100fa4 --- /dev/null +++ b/tests/queries/0_stateless/02457_csv_parse_date_out_of_range.reference @@ -0,0 +1,2 @@ +above 2149-06-06 +below 1970-01-01 diff --git a/tests/queries/0_stateless/02457_csv_parse_date_out_of_range.sh b/tests/queries/0_stateless/02457_csv_parse_date_out_of_range.sh new file mode 100755 index 00000000000..297f7e31bab --- /dev/null +++ b/tests/queries/0_stateless/02457_csv_parse_date_out_of_range.sh @@ -0,0 +1,15 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS test_date_out_of_range sync"; +$CLICKHOUSE_CLIENT --query="CREATE TABLE test_date_out_of_range (f String, t Date) engine=Memory()"; + +printf '"above", 2200-12-31 +"below", 1900-01-01 +' | $CLICKHOUSE_CLIENT --input_format_defaults_for_omitted_fields=1 --input_format_csv_empty_as_default=1 --query="INSERT INTO test_date_out_of_range FORMAT CSV"; + +$CLICKHOUSE_CLIENT --query="SELECT * FROM test_date_out_of_range"; +$CLICKHOUSE_CLIENT --query="DROP TABLE test_date_out_of_range"; \ No newline at end of file diff --git a/tests/queries/0_stateless/02457_datediff_via_unix_epoch.reference b/tests/queries/0_stateless/02457_datediff_via_unix_epoch.reference new file mode 100644 index 00000000000..8b2de7a3eec --- /dev/null +++ b/tests/queries/0_stateless/02457_datediff_via_unix_epoch.reference @@ -0,0 +1,12 @@ +year 1 +year 1 +quarter 1 +quarter 1 +month 1 +month 1 +week 1 +week 1 +day 11 +day 11 +minute 1440 +second 86400 diff --git a/tests/queries/0_stateless/02457_datediff_via_unix_epoch.sql b/tests/queries/0_stateless/02457_datediff_via_unix_epoch.sql new file mode 100644 index 00000000000..ce977712943 --- /dev/null +++ b/tests/queries/0_stateless/02457_datediff_via_unix_epoch.sql @@ -0,0 +1,18 @@ +select 'year', date_diff('year', toDate32('1969-12-25'), toDate32('1970-01-05')); +select 'year', date_diff('year', toDateTime64('1969-12-25 10:00:00.000', 3), toDateTime64('1970-01-05 10:00:00.000', 3)); + +select 'quarter', date_diff('quarter', toDate32('1969-12-25'), toDate32('1970-01-05')); +select 'quarter', date_diff('quarter', toDateTime64('1969-12-25 10:00:00.000', 3), toDateTime64('1970-01-05 10:00:00.000', 3)); + +select 'month', date_diff('month', toDate32('1969-12-25'), toDate32('1970-01-05')); +select 'month', date_diff('month', toDateTime64('1969-12-25 10:00:00.000', 3), toDateTime64('1970-01-05 10:00:00.000', 3)); + +select 'week', date_diff('week', toDate32('1969-12-25'), toDate32('1970-01-05')); +select 'week', date_diff('week', toDateTime64('1969-12-25 10:00:00.000', 3), toDateTime64('1970-01-05 10:00:00.000', 3)); + +select 'day', date_diff('day', toDate32('1969-12-25'), toDate32('1970-01-05')); +select 'day', date_diff('day', toDateTime64('1969-12-25 10:00:00.000', 3), toDateTime64('1970-01-05 10:00:00.000', 3)); + +select 'minute', date_diff('minute', toDate32('1969-12-31'), toDate32('1970-01-01')); + +select 'second', date_diff('second', toDate32('1969-12-31'), toDate32('1970-01-01')); diff --git a/tests/queries/0_stateless/02457_key_condition_with_types_that_cannot_be_nullable.reference b/tests/queries/0_stateless/02457_key_condition_with_types_that_cannot_be_nullable.reference new file mode 100644 index 00000000000..13b65c29f05 --- /dev/null +++ b/tests/queries/0_stateless/02457_key_condition_with_types_that_cannot_be_nullable.reference @@ -0,0 +1 @@ +printer1 diff --git a/tests/queries/0_stateless/02457_key_condition_with_types_that_cannot_be_nullable.sql b/tests/queries/0_stateless/02457_key_condition_with_types_that_cannot_be_nullable.sql new file mode 100644 index 00000000000..690ec6c70e0 --- /dev/null +++ b/tests/queries/0_stateless/02457_key_condition_with_types_that_cannot_be_nullable.sql @@ -0,0 +1,9 @@ +drop table if exists test; + +create table test (Printer LowCardinality(String), IntervalStart DateTime) engine MergeTree partition by (hiveHash(Printer), toYear(IntervalStart)) order by (Printer, IntervalStart); + +insert into test values ('printer1', '2006-02-07 06:28:15'); + +select Printer from test where Printer='printer1'; + +drop table test; diff --git a/tests/queries/0_stateless/02457_morton_coding.reference b/tests/queries/0_stateless/02457_morton_coding.reference new file mode 100644 index 00000000000..311a515a458 --- /dev/null +++ b/tests/queries/0_stateless/02457_morton_coding.reference @@ -0,0 +1,12 @@ +----- START ----- +----- CONST ----- +2149 +(1,2,3,4) +4294967286 +(65534,65533) +4294967286 +(4294967286) +----- 256, 8 ----- +----- 65536, 4 ----- +----- 4294967296, 2 ----- +----- END ----- diff --git a/tests/queries/0_stateless/02457_morton_coding.sql b/tests/queries/0_stateless/02457_morton_coding.sql new file mode 100644 index 00000000000..4fc26f255f4 --- /dev/null +++ b/tests/queries/0_stateless/02457_morton_coding.sql @@ -0,0 +1,137 @@ +SELECT '----- START -----'; +drop table if exists morton_numbers_02457; +create table morton_numbers_02457( + n1 UInt32, + n2 UInt32, + n3 UInt16, + n4 UInt16, + n5 UInt8, + n6 UInt8, + n7 UInt8, + n8 UInt8 +) + Engine=MergeTree() + ORDER BY n1; + +SELECT '----- CONST -----'; +select mortonEncode(1,2,3,4); +select mortonDecode(4, 2149); +select mortonEncode(65534, 65533); +select mortonDecode(2, 4294967286); +select mortonEncode(4294967286); +select mortonDecode(1, 4294967286); + +SELECT '----- 256, 8 -----'; +insert into morton_numbers_02457 +select n1.number, n2.number, n3.number, n4.number, n5.number, n6.number, n7.number, n8.number +from numbers(256-4, 4) n1 + cross join numbers(256-4, 4) n2 + cross join numbers(256-4, 4) n3 + cross join numbers(256-4, 4) n4 + cross join numbers(256-4, 4) n5 + cross join numbers(256-4, 4) n6 + cross join numbers(256-4, 4) n7 + cross join numbers(256-4, 4) n8 +; +drop table if exists morton_numbers_1_02457; +create table morton_numbers_1_02457( + n1 UInt64, + n2 UInt64, + n3 UInt64, + n4 UInt64, + n5 UInt64, + n6 UInt64, + n7 UInt64, + n8 UInt64 +) + Engine=MergeTree() + ORDER BY n1; + +insert into morton_numbers_1_02457 +select untuple(mortonDecode(8, mortonEncode(n1, n2, n3, n4, n5, n6, n7, n8))) +from morton_numbers_02457; + +( + select * from morton_numbers_02457 + union distinct + select * from morton_numbers_1_02457 +) +except +( + select * from morton_numbers_02457 + intersect + select * from morton_numbers_1_02457 +); +drop table if exists morton_numbers_1_02457; + +SELECT '----- 65536, 4 -----'; +insert into morton_numbers_02457 +select n1.number, n2.number, n3.number, n4.number, 0, 0, 0, 0 +from numbers(pow(2, 16)-8,8) n1 + cross join numbers(pow(2, 16)-8, 8) n2 + cross join numbers(pow(2, 16)-8, 8) n3 + cross join numbers(pow(2, 16)-8, 8) n4 +; + +create table morton_numbers_2_02457( + n1 UInt64, + n2 UInt64, + n3 UInt64, + n4 UInt64 +) + Engine=MergeTree() + ORDER BY n1; + +insert into morton_numbers_2_02457 +select untuple(mortonDecode(4, mortonEncode(n1, n2, n3, n4))) +from morton_numbers_02457; + +( + select n1, n2, n3, n4 from morton_numbers_02457 + union distinct + select n1, n2, n3, n4 from morton_numbers_2_02457 +) +except +( + select n1, n2, n3, n4 from morton_numbers_02457 + intersect + select n1, n2, n3, n4 from morton_numbers_2_02457 +); +drop table if exists morton_numbers_2_02457; + +SELECT '----- 4294967296, 2 -----'; +insert into morton_numbers_02457 +select n1.number, n2.number, 0, 0, 0, 0, 0, 0 +from numbers(pow(2, 32)-8,8) n1 + cross join numbers(pow(2, 32)-8, 8) n2 + cross join numbers(pow(2, 32)-8, 8) n3 + cross join numbers(pow(2, 32)-8, 8) n4 +; + +drop table if exists morton_numbers_3_02457; +create table morton_numbers_3_02457( + n1 UInt64, + n2 UInt64 +) + Engine=MergeTree() + ORDER BY n1; + +insert into morton_numbers_3_02457 +select untuple(mortonDecode(2, mortonEncode(n1, n2))) +from morton_numbers_02457; + +( + select n1, n2 from morton_numbers_3_02457 + union distinct + select n1, n2 from morton_numbers_3_02457 +) +except +( + select n1, n2 from morton_numbers_3_02457 + intersect + select n1, n2 from morton_numbers_3_02457 +); +drop table if exists morton_numbers_3_02457; + +SELECT '----- END -----'; +drop table if exists morton_numbers_02457; diff --git a/tests/queries/0_stateless/02457_morton_coding_with_mask.reference b/tests/queries/0_stateless/02457_morton_coding_with_mask.reference new file mode 100644 index 00000000000..32d5ce3ee27 --- /dev/null +++ b/tests/queries/0_stateless/02457_morton_coding_with_mask.reference @@ -0,0 +1,15 @@ +----- START ----- +----- CONST ----- +4205569 +(1,2,3,4) +4294967286 +(65534,65533) +4294967286 +(4294967286) +2147483648 +(128) +0 +----- (1,2,1,2) ----- +----- (1,4) ----- +----- (1,1,2) ----- +----- END ----- diff --git a/tests/queries/0_stateless/02457_morton_coding_with_mask.sql b/tests/queries/0_stateless/02457_morton_coding_with_mask.sql new file mode 100644 index 00000000000..5aeb1f380be --- /dev/null +++ b/tests/queries/0_stateless/02457_morton_coding_with_mask.sql @@ -0,0 +1,143 @@ +SELECT '----- START -----'; + +SELECT '----- CONST -----'; +select mortonEncode((1,2,3,1), 1,2,3,4); +select mortonDecode((1, 2, 3, 1), 4205569); +select mortonEncode((1,1), 65534, 65533); +select mortonDecode((1,1), 4294967286); +select mortonEncode(tuple(1), 4294967286); +select mortonDecode(tuple(1), 4294967286); +select mortonEncode(tuple(4), 128); +select mortonDecode(tuple(4), 2147483648); +select mortonEncode((4,4,4,4), 128, 128, 128, 128); + +SELECT '----- (1,2,1,2) -----'; +drop table if exists morton_numbers_mask_02457; +create table morton_numbers_mask_02457( + n1 UInt8, + n2 UInt8, + n3 UInt8, + n4 UInt8 +) + Engine=MergeTree() + ORDER BY n1; + +insert into morton_numbers_mask_02457 +select n1.number, n2.number, n3.number, n4.number +from numbers(256-16, 16) n1 + cross join numbers(256-16, 16) n2 + cross join numbers(256-16, 16) n3 + cross join numbers(256-16, 16) n4 +; +drop table if exists morton_numbers_mask_1_02457; +create table morton_numbers_mask_1_02457( + n1 UInt64, + n2 UInt64, + n3 UInt64, + n4 UInt64 +) + Engine=MergeTree() + ORDER BY n1; + +insert into morton_numbers_mask_1_02457 +select untuple(mortonDecode((1,2,1,2), mortonEncode((1,2,1,2), n1, n2, n3, n4))) +from morton_numbers_mask_02457; + +( + select * from morton_numbers_mask_02457 + union distinct + select * from morton_numbers_mask_1_02457 +) +except +( + select * from morton_numbers_mask_02457 + intersect + select * from morton_numbers_mask_1_02457 +); +drop table if exists morton_numbers_mask_02457; +drop table if exists morton_numbers_mask_1_02457; + +SELECT '----- (1,4) -----'; +drop table if exists morton_numbers_mask_02457; +create table morton_numbers_mask_02457( + n1 UInt32, + n2 UInt8 +) + Engine=MergeTree() + ORDER BY n1; + +insert into morton_numbers_mask_02457 +select n1.number, n2.number +from numbers(pow(2, 32)-64, 64) n1 + cross join numbers(pow(2, 8)-64, 64) n2 +; +drop table if exists morton_numbers_mask_2_02457; +create table morton_numbers_mask_2_02457( + n1 UInt64, + n2 UInt64 +) + Engine=MergeTree() + ORDER BY n1; + +insert into morton_numbers_mask_2_02457 +select untuple(mortonDecode((1,4), mortonEncode((1,4), n1, n2))) +from morton_numbers_mask_02457; + +( + select * from morton_numbers_mask_02457 + union distinct + select * from morton_numbers_mask_2_02457 +) +except +( + select * from morton_numbers_mask_02457 + intersect + select * from morton_numbers_mask_2_02457 +); +drop table if exists morton_numbers_mask_02457; +drop table if exists morton_numbers_mask_2_02457; + +SELECT '----- (1,1,2) -----'; +drop table if exists morton_numbers_mask_02457; +create table morton_numbers_mask_02457( + n1 UInt16, + n2 UInt16, + n3 UInt8, +) + Engine=MergeTree() + ORDER BY n1; + +insert into morton_numbers_mask_02457 +select n1.number, n2.number, n3.number +from numbers(pow(2, 16)-64, 64) n1 + cross join numbers(pow(2, 16)-64, 64) n2 + cross join numbers(pow(2, 8)-64, 64) n3 +; +drop table if exists morton_numbers_mask_3_02457; +create table morton_numbers_mask_3_02457( + n1 UInt64, + n2 UInt64, + n3 UInt64 +) + Engine=MergeTree() + ORDER BY n1; + +insert into morton_numbers_mask_3_02457 +select untuple(mortonDecode((1,1,2), mortonEncode((1,1,2), n1, n2, n3))) +from morton_numbers_mask_02457; + +( + select * from morton_numbers_mask_02457 + union distinct + select * from morton_numbers_mask_3_02457 +) +except +( + select * from morton_numbers_mask_02457 + intersect + select * from morton_numbers_mask_3_02457 +); +drop table if exists morton_numbers_mask_02457; +drop table if exists morton_numbers_mask_3_02457; + +SELECT '----- END -----'; diff --git a/tests/queries/0_stateless/02457_s3_cluster_schema_inference.reference b/tests/queries/0_stateless/02457_s3_cluster_schema_inference.reference new file mode 100644 index 00000000000..b918bf2b155 --- /dev/null +++ b/tests/queries/0_stateless/02457_s3_cluster_schema_inference.reference @@ -0,0 +1,44 @@ +c1 Nullable(Int64) +c2 Nullable(Int64) +c3 Nullable(Int64) +c1 Nullable(Int64) +c2 Nullable(Int64) +c3 Nullable(Int64) +c1 Nullable(Int64) +c2 Nullable(Int64) +c3 Nullable(Int64) +c1 Nullable(Int64) +c2 Nullable(Int64) +c3 Nullable(Int64) +0 0 0 +0 0 0 +1 2 3 +4 5 6 +7 8 9 +10 11 12 +13 14 15 +16 17 18 +0 0 0 +0 0 0 +1 2 3 +4 5 6 +7 8 9 +10 11 12 +13 14 15 +16 17 18 +0 0 0 +0 0 0 +1 2 3 +4 5 6 +7 8 9 +10 11 12 +13 14 15 +16 17 18 +0 0 0 +0 0 0 +1 2 3 +4 5 6 +7 8 9 +10 11 12 +13 14 15 +16 17 18 diff --git a/tests/queries/0_stateless/02457_s3_cluster_schema_inference.sql b/tests/queries/0_stateless/02457_s3_cluster_schema_inference.sql new file mode 100644 index 00000000000..03e8785b24b --- /dev/null +++ b/tests/queries/0_stateless/02457_s3_cluster_schema_inference.sql @@ -0,0 +1,13 @@ +-- Tags: no-fasttest +-- Tag no-fasttest: Depends on AWS + +desc s3Cluster('test_cluster_one_shard_three_replicas_localhost', 'http://localhost:11111/test/{a,b}.tsv'); +desc s3Cluster('test_cluster_one_shard_three_replicas_localhost', 'http://localhost:11111/test/{a,b}.tsv', 'TSV'); +desc s3Cluster('test_cluster_one_shard_three_replicas_localhost', 'http://localhost:11111/test/{a,b}.tsv', 'test', 'testtest'); +desc s3Cluster('test_cluster_one_shard_three_replicas_localhost', 'http://localhost:11111/test/{a,b}.tsv', 'test', 'testtest', 'TSV'); + +select * from s3Cluster('test_cluster_one_shard_three_replicas_localhost', 'http://localhost:11111/test/{a,b}.tsv') order by c1, c2, c3; +select * from s3Cluster('test_cluster_one_shard_three_replicas_localhost', 'http://localhost:11111/test/{a,b}.tsv', 'TSV') order by c1, c2, c3; +select * from s3Cluster('test_cluster_one_shard_three_replicas_localhost', 'http://localhost:11111/test/{a,b}.tsv', 'test', 'testtest') order by c1, c2, c3; +select * from s3Cluster('test_cluster_one_shard_three_replicas_localhost', 'http://localhost:11111/test/{a,b}.tsv', 'test', 'testtest', 'TSV') order by c1, c2, c3; + diff --git a/tests/queries/0_stateless/02458_datediff_date32.reference b/tests/queries/0_stateless/02458_datediff_date32.reference new file mode 100644 index 00000000000..fdb8273a74b --- /dev/null +++ b/tests/queries/0_stateless/02458_datediff_date32.reference @@ -0,0 +1,169 @@ +-- { echo } + +-- Date32 vs Date32 +SELECT dateDiff('second', toDate32('1927-01-01', 'UTC'), toDate32('1927-01-02', 'UTC'), 'UTC'); +86400 +SELECT dateDiff('minute', toDate32('1927-01-01', 'UTC'), toDate32('1927-01-02', 'UTC'), 'UTC'); +1440 +SELECT dateDiff('hour', toDate32('1927-01-01', 'UTC'), toDate32('1927-01-02', 'UTC'), 'UTC'); +24 +SELECT dateDiff('day', toDate32('1927-01-01', 'UTC'), toDate32('1927-01-02', 'UTC'), 'UTC'); +1 +SELECT dateDiff('week', toDate32('1927-01-01', 'UTC'), toDate32('1927-01-08', 'UTC'), 'UTC'); +1 +SELECT dateDiff('month', toDate32('1927-01-01', 'UTC'), toDate32('1927-02-01', 'UTC'), 'UTC'); +1 +SELECT dateDiff('quarter', toDate32('1927-01-01', 'UTC'), toDate32('1927-04-01', 'UTC'), 'UTC'); +1 +SELECT dateDiff('year', toDate32('1927-01-01', 'UTC'), toDate32('1928-01-01', 'UTC'), 'UTC'); +1 +-- With DateTime64 +-- Date32 vs DateTime64 +SELECT dateDiff('second', toDate32('1927-01-01', 'UTC'), toDateTime64('1927-01-02 00:00:00', 3, 'UTC'), 'UTC'); +86400 +SELECT dateDiff('minute', toDate32('1927-01-01', 'UTC'), toDateTime64('1927-01-02 00:00:00', 3, 'UTC'), 'UTC'); +1440 +SELECT dateDiff('hour', toDate32('1927-01-01', 'UTC'), toDateTime64('1927-01-02 00:00:00', 3, 'UTC'), 'UTC'); +24 +SELECT dateDiff('day', toDate32('1927-01-01', 'UTC'), toDateTime64('1927-01-02 00:00:00', 3, 'UTC'), 'UTC'); +1 +SELECT dateDiff('week', toDate32('1927-01-01', 'UTC'), toDateTime64('1927-01-08 00:00:00', 3, 'UTC'), 'UTC'); +1 +SELECT dateDiff('month', toDate32('1927-01-01', 'UTC'), toDateTime64('1927-02-01 00:00:00', 3, 'UTC'), 'UTC'); +1 +SELECT dateDiff('quarter', toDate32('1927-01-01', 'UTC'), toDateTime64('1927-04-01 00:00:00', 3, 'UTC'), 'UTC'); +1 +SELECT dateDiff('year', toDate32('1927-01-01', 'UTC'), toDateTime64('1928-01-01 00:00:00', 3, 'UTC'), 'UTC'); +1 +-- DateTime64 vs Date32 +SELECT dateDiff('second', toDateTime64('1927-01-01 00:00:00', 3, 'UTC'), toDate32('1927-01-02', 'UTC'), 'UTC'); +86400 +SELECT dateDiff('minute', toDateTime64('1927-01-01 00:00:00', 3, 'UTC'), toDate32('1927-01-02', 'UTC'), 'UTC'); +1440 +SELECT dateDiff('hour', toDateTime64('1927-01-01 00:00:00', 3, 'UTC'), toDate32('1927-01-02', 'UTC'), 'UTC'); +24 +SELECT dateDiff('day', toDateTime64('1927-01-01 00:00:00', 3, 'UTC'), toDate32('1927-01-02', 'UTC'), 'UTC'); +1 +SELECT dateDiff('week', toDateTime64('1927-01-01 00:00:00', 3, 'UTC'), toDate32('1927-01-08', 'UTC'), 'UTC'); +1 +SELECT dateDiff('month', toDateTime64('1927-01-01 00:00:00', 3, 'UTC'), toDate32('1927-02-01', 'UTC'), 'UTC'); +1 +SELECT dateDiff('quarter', toDateTime64('1927-01-01 00:00:00', 3, 'UTC'), toDate32('1927-04-01', 'UTC'), 'UTC'); +1 +SELECT dateDiff('year', toDateTime64('1927-01-01 00:00:00', 3, 'UTC'), toDate32('1928-01-01', 'UTC'), 'UTC'); +1 +-- With DateTime +-- Date32 vs DateTime +SELECT dateDiff('second', toDate32('2015-08-18', 'UTC'), toDateTime('2015-08-19 00:00:00', 'UTC'), 'UTC'); +86400 +SELECT dateDiff('minute', toDate32('2015-08-18', 'UTC'), toDateTime('2015-08-19 00:00:00', 'UTC'), 'UTC'); +1440 +SELECT dateDiff('hour', toDate32('2015-08-18', 'UTC'), toDateTime('2015-08-19 00:00:00', 'UTC'), 'UTC'); +24 +SELECT dateDiff('day', toDate32('2015-08-18', 'UTC'), toDateTime('2015-08-19 00:00:00', 'UTC'), 'UTC'); +1 +SELECT dateDiff('week', toDate32('2015-08-18', 'UTC'), toDateTime('2015-08-25 00:00:00', 'UTC'), 'UTC'); +1 +SELECT dateDiff('month', toDate32('2015-08-18', 'UTC'), toDateTime('2015-09-18 00:00:00', 'UTC'), 'UTC'); +1 +SELECT dateDiff('quarter', toDate32('2015-08-18', 'UTC'), toDateTime('2015-11-18 00:00:00', 'UTC'), 'UTC'); +1 +SELECT dateDiff('year', toDate32('2015-08-18', 'UTC'), toDateTime('2016-08-18 00:00:00', 'UTC'), 'UTC'); +1 +-- DateTime vs Date32 +SELECT dateDiff('second', toDateTime('2015-08-18 00:00:00', 'UTC'), toDate32('2015-08-19', 'UTC'), 'UTC'); +86400 +SELECT dateDiff('minute', toDateTime('2015-08-18 00:00:00', 'UTC'), toDate32('2015-08-19', 'UTC'), 'UTC'); +1440 +SELECT dateDiff('hour', toDateTime('2015-08-18 00:00:00', 'UTC'), toDate32('2015-08-19', 'UTC'), 'UTC'); +24 +SELECT dateDiff('day', toDateTime('2015-08-18 00:00:00', 'UTC'), toDate32('2015-08-19', 'UTC'), 'UTC'); +1 +SELECT dateDiff('week', toDateTime('2015-08-18 00:00:00', 'UTC'), toDate32('2015-08-25', 'UTC'), 'UTC'); +1 +SELECT dateDiff('month', toDateTime('2015-08-18 00:00:00', 'UTC'), toDate32('2015-09-18', 'UTC'), 'UTC'); +1 +SELECT dateDiff('quarter', toDateTime('2015-08-18 00:00:00', 'UTC'), toDate32('2015-11-18', 'UTC'), 'UTC'); +1 +SELECT dateDiff('year', toDateTime('2015-08-18 00:00:00', 'UTC'), toDate32('2016-08-18', 'UTC'), 'UTC'); +1 +-- With Date +-- Date32 vs Date +SELECT dateDiff('second', toDate32('2015-08-18', 'UTC'), toDate('2015-08-19', 'UTC'), 'UTC'); +86400 +SELECT dateDiff('minute', toDate32('2015-08-18', 'UTC'), toDate('2015-08-19', 'UTC'), 'UTC'); +1440 +SELECT dateDiff('hour', toDate32('2015-08-18', 'UTC'), toDate('2015-08-19', 'UTC'), 'UTC'); +24 +SELECT dateDiff('day', toDate32('2015-08-18', 'UTC'), toDate('2015-08-19', 'UTC'), 'UTC'); +1 +SELECT dateDiff('week', toDate32('2015-08-18', 'UTC'), toDate('2015-08-25', 'UTC'), 'UTC'); +1 +SELECT dateDiff('month', toDate32('2015-08-18', 'UTC'), toDate('2015-09-18', 'UTC'), 'UTC'); +1 +SELECT dateDiff('quarter', toDate32('2015-08-18', 'UTC'), toDate('2015-11-18', 'UTC'), 'UTC'); +1 +SELECT dateDiff('year', toDate32('2015-08-18', 'UTC'), toDate('2016-08-18', 'UTC'), 'UTC'); +1 +-- Date vs Date32 +SELECT dateDiff('second', toDate('2015-08-18', 'UTC'), toDate32('2015-08-19', 'UTC'), 'UTC'); +86400 +SELECT dateDiff('minute', toDate('2015-08-18', 'UTC'), toDate32('2015-08-19', 'UTC'), 'UTC'); +1440 +SELECT dateDiff('hour', toDate('2015-08-18', 'UTC'), toDate32('2015-08-19', 'UTC'), 'UTC'); +24 +SELECT dateDiff('day', toDate('2015-08-18', 'UTC'), toDate32('2015-08-19', 'UTC'), 'UTC'); +1 +SELECT dateDiff('week', toDate('2015-08-18', 'UTC'), toDate32('2015-08-25', 'UTC'), 'UTC'); +1 +SELECT dateDiff('month', toDate('2015-08-18', 'UTC'), toDate32('2015-09-18', 'UTC'), 'UTC'); +1 +SELECT dateDiff('quarter', toDate('2015-08-18', 'UTC'), toDate32('2015-11-18', 'UTC'), 'UTC'); +1 +SELECT dateDiff('year', toDate('2015-08-18', 'UTC'), toDate32('2016-08-18', 'UTC'), 'UTC'); +1 +-- Const vs non-const columns +SELECT dateDiff('day', toDate32('1927-01-01', 'UTC'), materialize(toDate32('1927-01-02', 'UTC')), 'UTC'); +1 +SELECT dateDiff('day', toDate32('1927-01-01', 'UTC'), materialize(toDateTime64('1927-01-02 00:00:00', 3, 'UTC')), 'UTC'); +1 +SELECT dateDiff('day', toDateTime64('1927-01-01 00:00:00', 3, 'UTC'), materialize(toDate32('1927-01-02', 'UTC')), 'UTC'); +1 +SELECT dateDiff('day', toDate32('2015-08-18', 'UTC'), materialize(toDateTime('2015-08-19 00:00:00', 'UTC')), 'UTC'); +1 +SELECT dateDiff('day', toDateTime('2015-08-18 00:00:00', 'UTC'), materialize(toDate32('2015-08-19', 'UTC')), 'UTC'); +1 +SELECT dateDiff('day', toDate32('2015-08-18', 'UTC'), materialize(toDate('2015-08-19', 'UTC')), 'UTC'); +1 +SELECT dateDiff('day', toDate('2015-08-18', 'UTC'), materialize(toDate32('2015-08-19', 'UTC')), 'UTC'); +1 +-- Non-const vs const columns +SELECT dateDiff('day', materialize(toDate32('1927-01-01', 'UTC')), toDate32('1927-01-02', 'UTC'), 'UTC'); +1 +SELECT dateDiff('day', materialize(toDate32('1927-01-01', 'UTC')), toDateTime64('1927-01-02 00:00:00', 3, 'UTC'), 'UTC'); +1 +SELECT dateDiff('day', materialize(toDateTime64('1927-01-01 00:00:00', 3, 'UTC')), toDate32('1927-01-02', 'UTC'), 'UTC'); +1 +SELECT dateDiff('day', materialize(toDate32('2015-08-18', 'UTC')), toDateTime('2015-08-19 00:00:00', 'UTC'), 'UTC'); +1 +SELECT dateDiff('day', materialize(toDateTime('2015-08-18 00:00:00', 'UTC')), toDate32('2015-08-19', 'UTC'), 'UTC'); +1 +SELECT dateDiff('day', materialize(toDate32('2015-08-18', 'UTC')), toDate('2015-08-19', 'UTC'), 'UTC'); +1 +SELECT dateDiff('day', materialize(toDate('2015-08-18', 'UTC')), toDate32('2015-08-19', 'UTC'), 'UTC'); +1 +-- Non-const vs non-const columns +SELECT dateDiff('day', materialize(toDate32('1927-01-01', 'UTC')), materialize(toDate32('1927-01-02', 'UTC')), 'UTC'); +1 +SELECT dateDiff('day', materialize(toDate32('1927-01-01', 'UTC')), materialize(toDateTime64('1927-01-02 00:00:00', 3, 'UTC')), 'UTC'); +1 +SELECT dateDiff('day', materialize(toDateTime64('1927-01-01 00:00:00', 3, 'UTC')), materialize(toDate32('1927-01-02', 'UTC')), 'UTC'); +1 +SELECT dateDiff('day', materialize(toDate32('2015-08-18', 'UTC')), materialize(toDateTime('2015-08-19 00:00:00', 'UTC')), 'UTC'); +1 +SELECT dateDiff('day', materialize(toDateTime('2015-08-18 00:00:00', 'UTC')), materialize(toDate32('2015-08-19', 'UTC')), 'UTC'); +1 +SELECT dateDiff('day', materialize(toDate32('2015-08-18', 'UTC')), materialize(toDate('2015-08-19', 'UTC')), 'UTC'); +1 +SELECT dateDiff('day', materialize(toDate('2015-08-18', 'UTC')), materialize(toDate32('2015-08-19', 'UTC')), 'UTC'); +1 diff --git a/tests/queries/0_stateless/02458_datediff_date32.sql b/tests/queries/0_stateless/02458_datediff_date32.sql new file mode 100644 index 00000000000..e41070e8146 --- /dev/null +++ b/tests/queries/0_stateless/02458_datediff_date32.sql @@ -0,0 +1,101 @@ +-- { echo } + +-- Date32 vs Date32 +SELECT dateDiff('second', toDate32('1927-01-01', 'UTC'), toDate32('1927-01-02', 'UTC'), 'UTC'); +SELECT dateDiff('minute', toDate32('1927-01-01', 'UTC'), toDate32('1927-01-02', 'UTC'), 'UTC'); +SELECT dateDiff('hour', toDate32('1927-01-01', 'UTC'), toDate32('1927-01-02', 'UTC'), 'UTC'); +SELECT dateDiff('day', toDate32('1927-01-01', 'UTC'), toDate32('1927-01-02', 'UTC'), 'UTC'); +SELECT dateDiff('week', toDate32('1927-01-01', 'UTC'), toDate32('1927-01-08', 'UTC'), 'UTC'); +SELECT dateDiff('month', toDate32('1927-01-01', 'UTC'), toDate32('1927-02-01', 'UTC'), 'UTC'); +SELECT dateDiff('quarter', toDate32('1927-01-01', 'UTC'), toDate32('1927-04-01', 'UTC'), 'UTC'); +SELECT dateDiff('year', toDate32('1927-01-01', 'UTC'), toDate32('1928-01-01', 'UTC'), 'UTC'); + +-- With DateTime64 +-- Date32 vs DateTime64 +SELECT dateDiff('second', toDate32('1927-01-01', 'UTC'), toDateTime64('1927-01-02 00:00:00', 3, 'UTC'), 'UTC'); +SELECT dateDiff('minute', toDate32('1927-01-01', 'UTC'), toDateTime64('1927-01-02 00:00:00', 3, 'UTC'), 'UTC'); +SELECT dateDiff('hour', toDate32('1927-01-01', 'UTC'), toDateTime64('1927-01-02 00:00:00', 3, 'UTC'), 'UTC'); +SELECT dateDiff('day', toDate32('1927-01-01', 'UTC'), toDateTime64('1927-01-02 00:00:00', 3, 'UTC'), 'UTC'); +SELECT dateDiff('week', toDate32('1927-01-01', 'UTC'), toDateTime64('1927-01-08 00:00:00', 3, 'UTC'), 'UTC'); +SELECT dateDiff('month', toDate32('1927-01-01', 'UTC'), toDateTime64('1927-02-01 00:00:00', 3, 'UTC'), 'UTC'); +SELECT dateDiff('quarter', toDate32('1927-01-01', 'UTC'), toDateTime64('1927-04-01 00:00:00', 3, 'UTC'), 'UTC'); +SELECT dateDiff('year', toDate32('1927-01-01', 'UTC'), toDateTime64('1928-01-01 00:00:00', 3, 'UTC'), 'UTC'); + +-- DateTime64 vs Date32 +SELECT dateDiff('second', toDateTime64('1927-01-01 00:00:00', 3, 'UTC'), toDate32('1927-01-02', 'UTC'), 'UTC'); +SELECT dateDiff('minute', toDateTime64('1927-01-01 00:00:00', 3, 'UTC'), toDate32('1927-01-02', 'UTC'), 'UTC'); +SELECT dateDiff('hour', toDateTime64('1927-01-01 00:00:00', 3, 'UTC'), toDate32('1927-01-02', 'UTC'), 'UTC'); +SELECT dateDiff('day', toDateTime64('1927-01-01 00:00:00', 3, 'UTC'), toDate32('1927-01-02', 'UTC'), 'UTC'); +SELECT dateDiff('week', toDateTime64('1927-01-01 00:00:00', 3, 'UTC'), toDate32('1927-01-08', 'UTC'), 'UTC'); +SELECT dateDiff('month', toDateTime64('1927-01-01 00:00:00', 3, 'UTC'), toDate32('1927-02-01', 'UTC'), 'UTC'); +SELECT dateDiff('quarter', toDateTime64('1927-01-01 00:00:00', 3, 'UTC'), toDate32('1927-04-01', 'UTC'), 'UTC'); +SELECT dateDiff('year', toDateTime64('1927-01-01 00:00:00', 3, 'UTC'), toDate32('1928-01-01', 'UTC'), 'UTC'); + +-- With DateTime +-- Date32 vs DateTime +SELECT dateDiff('second', toDate32('2015-08-18', 'UTC'), toDateTime('2015-08-19 00:00:00', 'UTC'), 'UTC'); +SELECT dateDiff('minute', toDate32('2015-08-18', 'UTC'), toDateTime('2015-08-19 00:00:00', 'UTC'), 'UTC'); +SELECT dateDiff('hour', toDate32('2015-08-18', 'UTC'), toDateTime('2015-08-19 00:00:00', 'UTC'), 'UTC'); +SELECT dateDiff('day', toDate32('2015-08-18', 'UTC'), toDateTime('2015-08-19 00:00:00', 'UTC'), 'UTC'); +SELECT dateDiff('week', toDate32('2015-08-18', 'UTC'), toDateTime('2015-08-25 00:00:00', 'UTC'), 'UTC'); +SELECT dateDiff('month', toDate32('2015-08-18', 'UTC'), toDateTime('2015-09-18 00:00:00', 'UTC'), 'UTC'); +SELECT dateDiff('quarter', toDate32('2015-08-18', 'UTC'), toDateTime('2015-11-18 00:00:00', 'UTC'), 'UTC'); +SELECT dateDiff('year', toDate32('2015-08-18', 'UTC'), toDateTime('2016-08-18 00:00:00', 'UTC'), 'UTC'); + +-- DateTime vs Date32 +SELECT dateDiff('second', toDateTime('2015-08-18 00:00:00', 'UTC'), toDate32('2015-08-19', 'UTC'), 'UTC'); +SELECT dateDiff('minute', toDateTime('2015-08-18 00:00:00', 'UTC'), toDate32('2015-08-19', 'UTC'), 'UTC'); +SELECT dateDiff('hour', toDateTime('2015-08-18 00:00:00', 'UTC'), toDate32('2015-08-19', 'UTC'), 'UTC'); +SELECT dateDiff('day', toDateTime('2015-08-18 00:00:00', 'UTC'), toDate32('2015-08-19', 'UTC'), 'UTC'); +SELECT dateDiff('week', toDateTime('2015-08-18 00:00:00', 'UTC'), toDate32('2015-08-25', 'UTC'), 'UTC'); +SELECT dateDiff('month', toDateTime('2015-08-18 00:00:00', 'UTC'), toDate32('2015-09-18', 'UTC'), 'UTC'); +SELECT dateDiff('quarter', toDateTime('2015-08-18 00:00:00', 'UTC'), toDate32('2015-11-18', 'UTC'), 'UTC'); +SELECT dateDiff('year', toDateTime('2015-08-18 00:00:00', 'UTC'), toDate32('2016-08-18', 'UTC'), 'UTC'); + +-- With Date +-- Date32 vs Date +SELECT dateDiff('second', toDate32('2015-08-18', 'UTC'), toDate('2015-08-19', 'UTC'), 'UTC'); +SELECT dateDiff('minute', toDate32('2015-08-18', 'UTC'), toDate('2015-08-19', 'UTC'), 'UTC'); +SELECT dateDiff('hour', toDate32('2015-08-18', 'UTC'), toDate('2015-08-19', 'UTC'), 'UTC'); +SELECT dateDiff('day', toDate32('2015-08-18', 'UTC'), toDate('2015-08-19', 'UTC'), 'UTC'); +SELECT dateDiff('week', toDate32('2015-08-18', 'UTC'), toDate('2015-08-25', 'UTC'), 'UTC'); +SELECT dateDiff('month', toDate32('2015-08-18', 'UTC'), toDate('2015-09-18', 'UTC'), 'UTC'); +SELECT dateDiff('quarter', toDate32('2015-08-18', 'UTC'), toDate('2015-11-18', 'UTC'), 'UTC'); +SELECT dateDiff('year', toDate32('2015-08-18', 'UTC'), toDate('2016-08-18', 'UTC'), 'UTC'); + +-- Date vs Date32 +SELECT dateDiff('second', toDate('2015-08-18', 'UTC'), toDate32('2015-08-19', 'UTC'), 'UTC'); +SELECT dateDiff('minute', toDate('2015-08-18', 'UTC'), toDate32('2015-08-19', 'UTC'), 'UTC'); +SELECT dateDiff('hour', toDate('2015-08-18', 'UTC'), toDate32('2015-08-19', 'UTC'), 'UTC'); +SELECT dateDiff('day', toDate('2015-08-18', 'UTC'), toDate32('2015-08-19', 'UTC'), 'UTC'); +SELECT dateDiff('week', toDate('2015-08-18', 'UTC'), toDate32('2015-08-25', 'UTC'), 'UTC'); +SELECT dateDiff('month', toDate('2015-08-18', 'UTC'), toDate32('2015-09-18', 'UTC'), 'UTC'); +SELECT dateDiff('quarter', toDate('2015-08-18', 'UTC'), toDate32('2015-11-18', 'UTC'), 'UTC'); +SELECT dateDiff('year', toDate('2015-08-18', 'UTC'), toDate32('2016-08-18', 'UTC'), 'UTC'); + +-- Const vs non-const columns +SELECT dateDiff('day', toDate32('1927-01-01', 'UTC'), materialize(toDate32('1927-01-02', 'UTC')), 'UTC'); +SELECT dateDiff('day', toDate32('1927-01-01', 'UTC'), materialize(toDateTime64('1927-01-02 00:00:00', 3, 'UTC')), 'UTC'); +SELECT dateDiff('day', toDateTime64('1927-01-01 00:00:00', 3, 'UTC'), materialize(toDate32('1927-01-02', 'UTC')), 'UTC'); +SELECT dateDiff('day', toDate32('2015-08-18', 'UTC'), materialize(toDateTime('2015-08-19 00:00:00', 'UTC')), 'UTC'); +SELECT dateDiff('day', toDateTime('2015-08-18 00:00:00', 'UTC'), materialize(toDate32('2015-08-19', 'UTC')), 'UTC'); +SELECT dateDiff('day', toDate32('2015-08-18', 'UTC'), materialize(toDate('2015-08-19', 'UTC')), 'UTC'); +SELECT dateDiff('day', toDate('2015-08-18', 'UTC'), materialize(toDate32('2015-08-19', 'UTC')), 'UTC'); + +-- Non-const vs const columns +SELECT dateDiff('day', materialize(toDate32('1927-01-01', 'UTC')), toDate32('1927-01-02', 'UTC'), 'UTC'); +SELECT dateDiff('day', materialize(toDate32('1927-01-01', 'UTC')), toDateTime64('1927-01-02 00:00:00', 3, 'UTC'), 'UTC'); +SELECT dateDiff('day', materialize(toDateTime64('1927-01-01 00:00:00', 3, 'UTC')), toDate32('1927-01-02', 'UTC'), 'UTC'); +SELECT dateDiff('day', materialize(toDate32('2015-08-18', 'UTC')), toDateTime('2015-08-19 00:00:00', 'UTC'), 'UTC'); +SELECT dateDiff('day', materialize(toDateTime('2015-08-18 00:00:00', 'UTC')), toDate32('2015-08-19', 'UTC'), 'UTC'); +SELECT dateDiff('day', materialize(toDate32('2015-08-18', 'UTC')), toDate('2015-08-19', 'UTC'), 'UTC'); +SELECT dateDiff('day', materialize(toDate('2015-08-18', 'UTC')), toDate32('2015-08-19', 'UTC'), 'UTC'); + +-- Non-const vs non-const columns +SELECT dateDiff('day', materialize(toDate32('1927-01-01', 'UTC')), materialize(toDate32('1927-01-02', 'UTC')), 'UTC'); +SELECT dateDiff('day', materialize(toDate32('1927-01-01', 'UTC')), materialize(toDateTime64('1927-01-02 00:00:00', 3, 'UTC')), 'UTC'); +SELECT dateDiff('day', materialize(toDateTime64('1927-01-01 00:00:00', 3, 'UTC')), materialize(toDate32('1927-01-02', 'UTC')), 'UTC'); +SELECT dateDiff('day', materialize(toDate32('2015-08-18', 'UTC')), materialize(toDateTime('2015-08-19 00:00:00', 'UTC')), 'UTC'); +SELECT dateDiff('day', materialize(toDateTime('2015-08-18 00:00:00', 'UTC')), materialize(toDate32('2015-08-19', 'UTC')), 'UTC'); +SELECT dateDiff('day', materialize(toDate32('2015-08-18', 'UTC')), materialize(toDate('2015-08-19', 'UTC')), 'UTC'); +SELECT dateDiff('day', materialize(toDate('2015-08-18', 'UTC')), materialize(toDate32('2015-08-19', 'UTC')), 'UTC'); diff --git a/tests/queries/0_stateless/02458_default_setting.reference b/tests/queries/0_stateless/02458_default_setting.reference index 376553843ac..8f4532f370b 100644 --- a/tests/queries/0_stateless/02458_default_setting.reference +++ b/tests/queries/0_stateless/02458_default_setting.reference @@ -1,5 +1,5 @@ -1048545 +1048449 100000 1 -1048545 +1048449 0 diff --git a/tests/queries/0_stateless/02458_empty_hdfs_url.reference b/tests/queries/0_stateless/02458_empty_hdfs_url.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02458_empty_hdfs_url.sql b/tests/queries/0_stateless/02458_empty_hdfs_url.sql new file mode 100644 index 00000000000..ccc554fc628 --- /dev/null +++ b/tests/queries/0_stateless/02458_empty_hdfs_url.sql @@ -0,0 +1,5 @@ +-- Tags: no-fasttest, no-cpu-aarch64 +SELECT * FROM hdfsCluster('test_shard_localhost', '', 'TSV'); -- { serverError BAD_ARGUMENTS } +SELECT * FROM hdfsCluster('test_shard_localhost', ' ', 'TSV'); -- { serverError BAD_ARGUMENTS } +SELECT * FROM hdfsCluster('test_shard_localhost', '/', 'TSV'); -- { serverError BAD_ARGUMENTS } +SELECT * FROM hdfsCluster('test_shard_localhost', 'http/', 'TSV'); -- { serverError BAD_ARGUMENTS } \ No newline at end of file diff --git a/tests/queries/0_stateless/02458_hdfs_cluster_schema_inference.reference b/tests/queries/0_stateless/02458_hdfs_cluster_schema_inference.reference new file mode 100644 index 00000000000..a812e64a642 --- /dev/null +++ b/tests/queries/0_stateless/02458_hdfs_cluster_schema_inference.reference @@ -0,0 +1,10 @@ +c1 Nullable(Int64) +c2 Nullable(Int64) +c3 Nullable(Int64) +c1 Nullable(Int64) +c2 Nullable(Int64) +c3 Nullable(Int64) +1 2 3 +4 5 6 +1 2 3 +4 5 6 diff --git a/tests/queries/0_stateless/02458_hdfs_cluster_schema_inference.sql b/tests/queries/0_stateless/02458_hdfs_cluster_schema_inference.sql new file mode 100644 index 00000000000..42e88fc44b2 --- /dev/null +++ b/tests/queries/0_stateless/02458_hdfs_cluster_schema_inference.sql @@ -0,0 +1,12 @@ +-- Tags: no-fasttest, no-parallel, no-cpu-aarch64 +-- Tag no-fasttest: Depends on Java + +insert into table function hdfs('hdfs://localhost:12222/test_02458_1.tsv', 'TSV', 'column1 UInt32, column2 UInt32, column3 UInt32') select 1, 2, 3 settings hdfs_truncate_on_insert=1; +insert into table function hdfs('hdfs://localhost:12222/test_02458_2.tsv', 'TSV', 'column1 UInt32, column2 UInt32, column3 UInt32') select 4, 5, 6 settings hdfs_truncate_on_insert=1; + +desc hdfsCluster('test_cluster_one_shard_three_replicas_localhost', 'hdfs://localhost:12222/test_02458_{1,2}.tsv'); +desc hdfsCluster('test_cluster_one_shard_three_replicas_localhost', 'hdfs://localhost:12222/test_02458_{1,2}.tsv', 'TSV'); + +select * from hdfsCluster('test_cluster_one_shard_three_replicas_localhost', 'hdfs://localhost:12222/test_02458_{1,2}.tsv') order by c1, c2, c3; +select * from hdfsCluster('test_cluster_one_shard_three_replicas_localhost', 'hdfs://localhost:12222/test_02458_{1,2}.tsv', 'TSV') order by c1, c2, c3; + diff --git a/tests/queries/0_stateless/02459_glob_for_recursive_directory_traversal.reference b/tests/queries/0_stateless/02459_glob_for_recursive_directory_traversal.reference new file mode 100644 index 00000000000..8e146946955 --- /dev/null +++ b/tests/queries/0_stateless/02459_glob_for_recursive_directory_traversal.reference @@ -0,0 +1,14 @@ +1 1 +2 2 +1 1 +2 2 +3 3 +4 4 +5 5 +6 6 +3 3 +4 4 +3 3 +4 4 +5 5 +6 6 diff --git a/tests/queries/0_stateless/02459_glob_for_recursive_directory_traversal.sh b/tests/queries/0_stateless/02459_glob_for_recursive_directory_traversal.sh new file mode 100755 index 00000000000..2c3deda2328 --- /dev/null +++ b/tests/queries/0_stateless/02459_glob_for_recursive_directory_traversal.sh @@ -0,0 +1,43 @@ +#!/usr/bin/env bash +# Tags: no-fasttest, no-parallel + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +user_files_path=$(clickhouse-client --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') + +mkdir $user_files_path/d1 +touch $user_files_path/d1/text1.txt + +for i in {1..2} +do + echo $i$'\t'$i >> $user_files_path/d1/text1.txt +done + +mkdir $user_files_path/d1/d2 +touch $user_files_path/d1/d2/text2.txt +for i in {3..4} +do + echo $i$'\t'$i >> $user_files_path/d1/d2/text2.txt +done + +mkdir $user_files_path/d1/d2/d3 +touch $user_files_path/d1/d2/d3/text3.txt +for i in {5..6} +do + echo $i$'\t'$i >> $user_files_path/d1/d2/d3/text3.txt +done + +${CLICKHOUSE_CLIENT} -q "SELECT * from file ('d1/*','TSV', 'Index UInt8, Number UInt8')" | sort --numeric-sort +${CLICKHOUSE_CLIENT} -q "SELECT * from file ('d1/**','TSV', 'Index UInt8, Number UInt8')" | sort --numeric-sort +${CLICKHOUSE_CLIENT} -q "SELECT * from file ('d1/*/tex*','TSV', 'Index UInt8, Number UInt8')" | sort --numeric-sort +${CLICKHOUSE_CLIENT} -q "SELECT * from file ('d1/**/tex*','TSV', 'Index UInt8, Number UInt8')" | sort --numeric-sort + + +rm $user_files_path/d1/d2/d3/text3.txt +rmdir $user_files_path/d1/d2/d3 +rm $user_files_path/d1/d2/text2.txt +rmdir $user_files_path/d1/d2 +rm $user_files_path/d1/text1.txt +rmdir $user_files_path/d1 \ No newline at end of file diff --git a/tests/queries/0_stateless/02459_low_cardinality_uint128_aggregator.reference b/tests/queries/0_stateless/02459_low_cardinality_uint128_aggregator.reference new file mode 100644 index 00000000000..2a3af430e48 --- /dev/null +++ b/tests/queries/0_stateless/02459_low_cardinality_uint128_aggregator.reference @@ -0,0 +1,20 @@ +0 4950 +1 14950 +2 24950 +3 34950 +4 44950 +5 54950 +6 64950 +7 74950 +8 84950 +9 94950 +0 4950 +1 14950 +2 24950 +3 34950 +4 44950 +5 54950 +6 64950 +7 74950 +8 84950 +9 94950 diff --git a/tests/queries/0_stateless/02459_low_cardinality_uint128_aggregator.sql b/tests/queries/0_stateless/02459_low_cardinality_uint128_aggregator.sql new file mode 100644 index 00000000000..893e5514ba5 --- /dev/null +++ b/tests/queries/0_stateless/02459_low_cardinality_uint128_aggregator.sql @@ -0,0 +1,9 @@ +SET allow_suspicious_low_cardinality_types = 1; +-- LC UInt128 +CREATE TABLE group_by_pk_lc_uint128 (`k` LowCardinality(UInt128), `v` UInt32) ENGINE = MergeTree ORDER BY k PARTITION BY v%50; +INSERT INTO group_by_pk_lc_uint128 SELECT number / 100, number FROM numbers(1000); +SELECT k, sum(v) AS s FROM group_by_pk_lc_uint128 GROUP BY k ORDER BY k ASC LIMIT 1024 SETTINGS optimize_aggregation_in_order = 1; +-- LC UInt256 +CREATE TABLE group_by_pk_lc_uint256 (`k` LowCardinality(UInt256), `v` UInt32) ENGINE = MergeTree ORDER BY k PARTITION BY v%50; +INSERT INTO group_by_pk_lc_uint256 SELECT number / 100, number FROM numbers(1000); +SELECT k, sum(v) AS s FROM group_by_pk_lc_uint256 GROUP BY k ORDER BY k ASC LIMIT 1024 SETTINGS optimize_aggregation_in_order = 1; diff --git a/tests/queries/0_stateless/02459_read_in_order_bufer.reference b/tests/queries/0_stateless/02459_read_in_order_bufer.reference new file mode 100644 index 00000000000..b040bdf6167 --- /dev/null +++ b/tests/queries/0_stateless/02459_read_in_order_bufer.reference @@ -0,0 +1,5 @@ +9 +8 +7 +6 +5 diff --git a/tests/queries/0_stateless/02459_read_in_order_bufer.sql b/tests/queries/0_stateless/02459_read_in_order_bufer.sql new file mode 100644 index 00000000000..5a6e0a3dbc6 --- /dev/null +++ b/tests/queries/0_stateless/02459_read_in_order_bufer.sql @@ -0,0 +1,13 @@ +CREATE TABLE mytable_stored (`a` UInt8) ENGINE = MergeTree ORDER BY a; +CREATE TABLE mytable (`a` UInt8) ENGINE = Buffer(currentDatabase(), 'mytable_stored', 4, 600, 3600, 10, 100, 10000, 10000000); +INSERT INTO mytable VALUES (0); +INSERT INTO mytable VALUES (1); +INSERT INTO mytable VALUES (2); +INSERT INTO mytable VALUES (3); +INSERT INTO mytable VALUES (4); +INSERT INTO mytable VALUES (5); +INSERT INTO mytable VALUES (6); +INSERT INTO mytable VALUES (7); +INSERT INTO mytable VALUES (8); +INSERT INTO mytable VALUES (9); +SELECT a FROM mytable ORDER BY a DESC LIMIT 5; diff --git a/tests/queries/0_stateless/02461_alter_update_respect_part_column_type_bug.reference b/tests/queries/0_stateless/02461_alter_update_respect_part_column_type_bug.reference new file mode 100644 index 00000000000..99a39410cae --- /dev/null +++ b/tests/queries/0_stateless/02461_alter_update_respect_part_column_type_bug.reference @@ -0,0 +1,9 @@ +1 one test1 +one one test1 +one one test +one one test +----- +1 one test1 +one one test1 +one one test +one one test diff --git a/tests/queries/0_stateless/02461_alter_update_respect_part_column_type_bug.sql b/tests/queries/0_stateless/02461_alter_update_respect_part_column_type_bug.sql new file mode 100644 index 00000000000..7f48b41aa1e --- /dev/null +++ b/tests/queries/0_stateless/02461_alter_update_respect_part_column_type_bug.sql @@ -0,0 +1,94 @@ +drop table if exists src; +create table src( A Int64, B String, C String) Engine=MergeTree order by A SETTINGS min_bytes_for_wide_part=0; +insert into src values(1, 'one', 'test'); + +alter table src detach partition tuple(); +alter table src modify column B Nullable(String); +alter table src attach partition tuple(); + +alter table src update C = 'test1' where 1 settings mutations_sync=2; +select * from src; + + +drop table if exists src; +create table src( A String, B String, C String) Engine=MergeTree order by A SETTINGS min_bytes_for_wide_part=0; +insert into src values('one', 'one', 'test'); + +alter table src detach partition tuple(); +alter table src modify column A LowCardinality(String); +alter table src attach partition tuple(); + +alter table src update C = 'test1' where 1 settings mutations_sync=2; +select * from src; + + +drop table if exists src; +create table src( A String, B String, C String) Engine=MergeTree order by A SETTINGS min_bytes_for_wide_part=0; +insert into src values('one', 'one', 'test'); + +alter table src detach partition tuple(); +alter table src modify column A LowCardinality(String); +alter table src attach partition tuple(); + +alter table src modify column C LowCardinality(String); +select * from src; + +drop table if exists src; +create table src( A String, B String, C String) Engine=MergeTree order by A SETTINGS min_bytes_for_wide_part=0; +insert into src values('one', 'one', 'test'); + +alter table src detach partition tuple(); +alter table src modify column B Nullable(String); +alter table src attach partition tuple(); + +alter table src rename column B to D; +select * from src; + +select '-----'; + +drop table if exists src; +create table src( A Int64, B String, C String) Engine=ReplicatedMergeTree('/clickhouse/{database}/test/src1', '1') order by A SETTINGS min_bytes_for_wide_part=0; +insert into src values(1, 'one', 'test'); + +alter table src detach partition tuple(); +alter table src modify column B Nullable(String); +alter table src attach partition tuple(); + +alter table src update C = 'test1' where 1 settings mutations_sync=2; +select * from src; + + +drop table if exists src; +create table src( A String, B String, C String) Engine=ReplicatedMergeTree('/clickhouse/{database}/test/src2', '1') order by A SETTINGS min_bytes_for_wide_part=0; +insert into src values('one', 'one', 'test'); + +alter table src detach partition tuple(); +alter table src modify column A LowCardinality(String); +alter table src attach partition tuple(); + +alter table src update C = 'test1' where 1 settings mutations_sync=2; +select * from src; + + +drop table if exists src; +create table src( A String, B String, C String) Engine=ReplicatedMergeTree('/clickhouse/{database}/test/src3', '1') order by A SETTINGS min_bytes_for_wide_part=0; +insert into src values('one', 'one', 'test'); + +alter table src detach partition tuple(); +alter table src modify column A LowCardinality(String); +alter table src attach partition tuple(); + +alter table src modify column C LowCardinality(String); +select * from src; + +drop table if exists src; +create table src( A String, B String, C String) Engine=ReplicatedMergeTree('/clickhouse/{database}/test/src4', '1') order by A SETTINGS min_bytes_for_wide_part=0; +insert into src values('one', 'one', 'test'); + +alter table src detach partition tuple(); +alter table src modify column B Nullable(String); +alter table src attach partition tuple(); + +alter table src rename column B to D; +select * from src; + diff --git a/tests/queries/0_stateless/02461_cancel_finish_race.reference b/tests/queries/0_stateless/02461_cancel_finish_race.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02461_cancel_finish_race.sh b/tests/queries/0_stateless/02461_cancel_finish_race.sh new file mode 100755 index 00000000000..7e775437da1 --- /dev/null +++ b/tests/queries/0_stateless/02461_cancel_finish_race.sh @@ -0,0 +1,59 @@ +#!/usr/bin/env bash +# Tags: no-fasttest + + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +function thread_query() +{ + while true; do + $CLICKHOUSE_CLIENT --query "SELECT count() FROM numbers_mt(10000) WHERE rand() = 0 FORMAT Null"; + done +} + +function thread_cancel() +{ + while true; do + $CLICKHOUSE_CLIENT --query "KILL QUERY WHERE current_database = '$CLICKHOUSE_DATABASE' SYNC FORMAT Null"; + done +} + +# https://stackoverflow.com/questions/9954794/execute-a-shell-function-with-timeout +export -f thread_query; +export -f thread_cancel; + +TIMEOUT=30 + +timeout $TIMEOUT bash -c thread_query 2> /dev/null & +timeout $TIMEOUT bash -c thread_cancel 2> /dev/null & + +timeout $TIMEOUT bash -c thread_query 2> /dev/null & +timeout $TIMEOUT bash -c thread_cancel 2> /dev/null & + +timeout $TIMEOUT bash -c thread_query 2> /dev/null & +timeout $TIMEOUT bash -c thread_cancel 2> /dev/null & + +timeout $TIMEOUT bash -c thread_query 2> /dev/null & +timeout $TIMEOUT bash -c thread_cancel 2> /dev/null & + +timeout $TIMEOUT bash -c thread_query 2> /dev/null & +timeout $TIMEOUT bash -c thread_cancel 2> /dev/null & + +timeout $TIMEOUT bash -c thread_query 2> /dev/null & +timeout $TIMEOUT bash -c thread_cancel 2> /dev/null & + +timeout $TIMEOUT bash -c thread_query 2> /dev/null & +timeout $TIMEOUT bash -c thread_cancel 2> /dev/null & + +timeout $TIMEOUT bash -c thread_query 2> /dev/null & +timeout $TIMEOUT bash -c thread_cancel 2> /dev/null & + +timeout $TIMEOUT bash -c thread_query 2> /dev/null & +timeout $TIMEOUT bash -c thread_cancel 2> /dev/null & + +timeout $TIMEOUT bash -c thread_query 2> /dev/null & +timeout $TIMEOUT bash -c thread_cancel 2> /dev/null & + +wait diff --git a/tests/queries/0_stateless/02461_join_lc_issue_42380.reference b/tests/queries/0_stateless/02461_join_lc_issue_42380.reference new file mode 100644 index 00000000000..b0d5371e4f7 --- /dev/null +++ b/tests/queries/0_stateless/02461_join_lc_issue_42380.reference @@ -0,0 +1,2 @@ +1 0 +\N 1 diff --git a/tests/queries/0_stateless/02461_join_lc_issue_42380.sql b/tests/queries/0_stateless/02461_join_lc_issue_42380.sql new file mode 100644 index 00000000000..f0ecbf64e58 --- /dev/null +++ b/tests/queries/0_stateless/02461_join_lc_issue_42380.sql @@ -0,0 +1,12 @@ +DROP TABLE IF EXISTS t1__fuzz_13; +DROP TABLE IF EXISTS t2__fuzz_47; + +SET allow_suspicious_low_cardinality_types = 1; + +CREATE TABLE t1__fuzz_13 (id Nullable(Int16)) ENGINE = MergeTree() ORDER BY id SETTINGS allow_nullable_key = 1; +CREATE TABLE t2__fuzz_47 (id LowCardinality(Int16)) ENGINE = MergeTree() ORDER BY id; + +INSERT INTO t1__fuzz_13 VALUES (1); +INSERT INTO t2__fuzz_47 VALUES (1); + +SELECT * FROM t1__fuzz_13 FULL OUTER JOIN t2__fuzz_47 ON 1 = 2; diff --git a/tests/queries/0_stateless/02461_mullable_pk_monotonicity_bug.reference b/tests/queries/0_stateless/02461_mullable_pk_monotonicity_bug.reference new file mode 100644 index 00000000000..c0d3de1806a --- /dev/null +++ b/tests/queries/0_stateless/02461_mullable_pk_monotonicity_bug.reference @@ -0,0 +1,64 @@ +1 +2 +1 +2 +1 +2 +1 +2 +1 +2 +1 +2 +1 +2 +1 +2 +1 +2 +1 +2 +1 +2 +1 +2 +1 +2 +1 +2 +1 +2 +1 +2 +1 +2 +1 +2 +1 +2 +1 +2 +1 +2 +1 +2 +1 +2 +1 +2 +2022-02-02 00:00:01 +2022-02-02 00:00:02 +2022-02-02 00:00:01 +2022-02-02 00:00:02 +2022-02-02 00:00:01 +2022-02-02 00:00:02 +2022-02-02 00:00:01 +2022-02-02 00:00:02 +2022-02-02 00:00:01 +2022-02-02 00:00:02 +2022-02-02 00:00:01 +2022-02-02 00:00:02 +2022-02-02 00:00:01 +2022-02-02 00:00:02 +2022-02-02 00:00:01 +2022-02-02 00:00:02 diff --git a/tests/queries/0_stateless/02461_mullable_pk_monotonicity_bug.sql b/tests/queries/0_stateless/02461_mullable_pk_monotonicity_bug.sql new file mode 100644 index 00000000000..75c8cb2b7e7 --- /dev/null +++ b/tests/queries/0_stateless/02461_mullable_pk_monotonicity_bug.sql @@ -0,0 +1,62 @@ +create table tab (x Nullable(UInt8)) engine = MergeTree order by x settings allow_nullable_key = 1, index_granularity = 2; +insert into tab select number from numbers(4); +set allow_suspicious_low_cardinality_types=1; +set max_rows_to_read = 2; + +SELECT x + 1 FROM tab where plus(x, 1) <= 2 order by x; +SELECT x + 1 FROM tab where plus(x, 1::Nullable(UInt8)) <= 2 order by x; +SELECT x + 1 FROM tab where plus(x, 1::LowCardinality(UInt8)) <= 2 order by x; +SELECT x + 1 FROM tab where plus(x, 1::LowCardinality(Nullable(UInt8))) <= 2 order by x; +SELECT 1 + x FROM tab where plus(1, x) <= 2 order by x; +SELECT 1 + x FROM tab where plus(1::Nullable(UInt8), x) <= 2 order by x; +SELECT 1 + x FROM tab where plus(1::LowCardinality(UInt8), x) <= 2 order by x; +SELECT 1 + x FROM tab where plus(1::LowCardinality(Nullable(UInt8)), x) <= 2 order by x; + +drop table tab; +set max_rows_to_read = 100; +create table tab (x LowCardinality(UInt8)) engine = MergeTree order by x settings allow_nullable_key = 1, index_granularity = 2; +insert into tab select number from numbers(4); + +set max_rows_to_read = 2; +SELECT x + 1 FROM tab where plus(x, 1) <= 2 order by x; +SELECT x + 1 FROM tab where plus(x, 1::Nullable(UInt8)) <= 2 order by x; +SELECT x + 1 FROM tab where plus(x, 1::LowCardinality(UInt8)) <= 2 order by x; +SELECT x + 1 FROM tab where plus(x, 1::LowCardinality(Nullable(UInt8))) <= 2 order by x; +SELECT 1 + x FROM tab where plus(1, x) <= 2 order by x; +SELECT 1 + x FROM tab where plus(1::Nullable(UInt8), x) <= 2 order by x; +SELECT 1 + x FROM tab where plus(1::LowCardinality(UInt8), x) <= 2 order by x; +SELECT 1 + x FROM tab where plus(1::LowCardinality(Nullable(UInt8)), x) <= 2 order by x; + +drop table tab; +set max_rows_to_read = 100; +create table tab (x UInt128) engine = MergeTree order by x settings allow_nullable_key = 1, index_granularity = 2; +insert into tab select number from numbers(4); + +set max_rows_to_read = 2; +SELECT x + 1 FROM tab where plus(x, 1) <= 2 order by x; +SELECT x + 1 FROM tab where plus(x, 1::Nullable(UInt8)) <= 2 order by x; +SELECT x + 1 FROM tab where plus(x, 1::LowCardinality(UInt8)) <= 2 order by x; +SELECT x + 1 FROM tab where plus(x, 1::LowCardinality(Nullable(UInt8))) <= 2 order by x; +SELECT 1 + x FROM tab where plus(1, x) <= 2 order by x; +SELECT 1 + x FROM tab where plus(1::Nullable(UInt8), x) <= 2 order by x; +SELECT 1 + x FROM tab where plus(1::LowCardinality(UInt8), x) <= 2 order by x; +SELECT 1 + x FROM tab where plus(1::LowCardinality(Nullable(UInt8)), x) <= 2 order by x; + +set max_rows_to_read = 100; +SELECT x + 1 FROM tab WHERE (x + 1::LowCardinality(UInt8)) <= -9223372036854775808 order by x; + +drop table tab; +create table tab (x DateTime) engine = MergeTree order by x settings allow_nullable_key = 1, index_granularity = 2; +insert into tab select toDateTime('2022-02-02') + number from numbers(4); + +set max_rows_to_read = 2; +SELECT x + 1 FROM tab where plus(x, 1) <= toDateTime('2022-02-02') + 2 order by x; +SELECT x + 1 FROM tab where plus(x, 1::Nullable(UInt8)) <= toDateTime('2022-02-02') + 2 order by x; +SELECT x + 1 FROM tab where plus(x, 1::LowCardinality(UInt8)) <= toDateTime('2022-02-02') + 2 order by x; +SELECT x + 1 FROM tab where plus(x, 1::LowCardinality(Nullable(UInt8))) <= toDateTime('2022-02-02') + 2 order by x; +SELECT 1 + x FROM tab where plus(1, x) <= toDateTime('2022-02-02') + 2 order by x; +SELECT 1 + x FROM tab where plus(1::Nullable(UInt8), x) <= toDateTime('2022-02-02') + 2 order by x; +SELECT 1 + x FROM tab where plus(1::LowCardinality(UInt8), x) <= toDateTime('2022-02-02') + 2 order by x; +SELECT 1 + x FROM tab where plus(1::LowCardinality(Nullable(UInt8)), x) <= toDateTime('2022-02-02') + 2 order by x; + +SELECT x + 1 FROM tab WHERE (x + CAST('1', 'Nullable(UInt8)')) <= -2147483647 ORDER BY x ASC NULLS FIRST; diff --git a/tests/queries/0_stateless/02461_welch_t_test_fuzz.reference b/tests/queries/0_stateless/02461_welch_t_test_fuzz.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02461_welch_t_test_fuzz.sql b/tests/queries/0_stateless/02461_welch_t_test_fuzz.sql new file mode 100644 index 00000000000..b22dc49dec3 --- /dev/null +++ b/tests/queries/0_stateless/02461_welch_t_test_fuzz.sql @@ -0,0 +1,8 @@ + +DROP TABLE IF EXISTS welch_ttest__fuzz_7; +CREATE TABLE welch_ttest__fuzz_7 (left UInt128, right UInt128) ENGINE = Memory; + +INSERT INTO welch_ttest__fuzz_7 VALUES (0.010268, 0), (0.000167, 0), (0.000167, 0), (0.159258, 1), (0.136278, 1), (0.122389, 1); + +SELECT roundBankers(welchTTest(left, right).2, 6) from welch_ttest__fuzz_7; -- { serverError 36 } +SELECT roundBankers(studentTTest(left, right).2, 6) from welch_ttest__fuzz_7; -- { serverError 36 } diff --git a/tests/queries/0_stateless/02462_distributions.reference b/tests/queries/0_stateless/02462_distributions.reference new file mode 100644 index 00000000000..56b04bcb856 --- /dev/null +++ b/tests/queries/0_stateless/02462_distributions.reference @@ -0,0 +1,12 @@ +Ok +Ok +Ok +Ok +Ok +Ok +Ok +0 +1 +Ok +Ok +Ok diff --git a/tests/queries/0_stateless/02462_distributions.sql b/tests/queries/0_stateless/02462_distributions.sql new file mode 100644 index 00000000000..b45dc897f2a --- /dev/null +++ b/tests/queries/0_stateless/02462_distributions.sql @@ -0,0 +1,24 @@ +# Values should be between 0 and 1 +SELECT DISTINCT if (a >= toFloat64(0) AND a <= toFloat64(1), 'Ok', 'Fail') FROM (SELECT randUniform(0, 1) AS a FROM numbers(100000)); +# Mean should be around 0 +SELECT DISTINCT if (m >= toFloat64(-0.2) AND m <= toFloat64(0.2), 'Ok', 'Fail') FROM (SELECT avg(a) as m FROM (SELECT randNormal(0, 5) AS a FROM numbers(100000))); +# Values should be >= 0 +SELECT DISTINCT if (a >= toFloat64(0), 'Ok', 'Fail') FROM (SELECT randLogNormal(0, 5) AS a FROM numbers(100000)); +# Values should be >= 0 +SELECT DISTINCT if (a >= toFloat64(0), 'Ok', 'Fail') FROM (SELECT randExponential(15) AS a FROM numbers(100000)); +# Values should be >= 0 +SELECT DISTINCT if (a >= toFloat64(0), 'Ok', 'Fail') FROM (SELECT randChiSquared(3) AS a FROM numbers(100000)); +# Mean should be around 0 +SELECT DISTINCT if (m > toFloat64(-0.2) AND m < toFloat64(0.2), 'Ok', 'Fail') FROM (SELECT avg(a) as m FROM (SELECT randStudentT(5) AS a FROM numbers(100000))); +# Values should be >= 0 +SELECT DISTINCT if (a >= toFloat64(0), 'Ok', 'Fail') FROM (SELECT randFisherF(3, 4) AS a FROM numbers(100000)); +# There should be only 0s and 1s +SELECT a FROM (SELECT DISTINCT randBernoulli(0.5) AS a FROM numbers(100000)) ORDER BY a; +# Values should be >= 0 +SELECT DISTINCT if (a >= toFloat64(0), 'Ok', 'Fail') FROM (SELECT randBinomial(3, 0.5) AS a FROM numbers(100000)); +# Values should be >= 0 +SELECT DISTINCT if (a >= toFloat64(0), 'Ok', 'Fail') FROM (SELECT randNegativeBinomial(3, 0.5) AS a FROM numbers(100000)); +# Values should be >= 0 +SELECT DISTINCT if (a >= toFloat64(0), 'Ok', 'Fail') FROM (SELECT randPoisson(44) AS a FROM numbers(100000)); +# No errors +SELECT randUniform(1, 2, 1), randNormal(0, 1, 'abacaba'), randLogNormal(0, 10, 'b'), randChiSquared(1, 1), randStudentT(7, '8'), randFisherF(23, 42, 100), randBernoulli(0.5, 2), randBinomial(3, 0.5, 1), randNegativeBinomial(3, 0.5, 2), randPoisson(44, 44) FORMAT Null; diff --git a/tests/queries/0_stateless/02462_int_to_date.reference b/tests/queries/0_stateless/02462_int_to_date.reference new file mode 100644 index 00000000000..f31441cf3b8 --- /dev/null +++ b/tests/queries/0_stateless/02462_int_to_date.reference @@ -0,0 +1,4 @@ +20221011 2022-10-11 1665519765 +20221011 2022-10-11 1665519765 +20221011 2022-10-11 1665519765 Int32 +20221011 2022-10-11 1665519765 UInt32 diff --git a/tests/queries/0_stateless/02462_int_to_date.sql b/tests/queries/0_stateless/02462_int_to_date.sql new file mode 100644 index 00000000000..cd470ca12f6 --- /dev/null +++ b/tests/queries/0_stateless/02462_int_to_date.sql @@ -0,0 +1,4 @@ +select toYYYYMMDD(toDate(recordTimestamp, 'Europe/Amsterdam')), toDate(recordTimestamp, 'Europe/Amsterdam'), toInt64(1665519765) as recordTimestamp; +select toYYYYMMDD(toDate(recordTimestamp, 'Europe/Amsterdam')), toDate(recordTimestamp, 'Europe/Amsterdam'), toUInt64(1665519765) as recordTimestamp; +select toYYYYMMDD(toDate(recordTimestamp, 'Europe/Amsterdam')), toDate(recordTimestamp, 'Europe/Amsterdam'), toInt32(1665519765) as recordTimestamp, toTypeName(recordTimestamp); +select toYYYYMMDD(toDate(recordTimestamp, 'Europe/Amsterdam')), toDate(recordTimestamp, 'Europe/Amsterdam'), toUInt32(1665519765) as recordTimestamp, toTypeName(recordTimestamp); diff --git a/tests/queries/0_stateless/02462_match_regexp_pk.reference b/tests/queries/0_stateless/02462_match_regexp_pk.reference new file mode 100644 index 00000000000..428d6556f4c --- /dev/null +++ b/tests/queries/0_stateless/02462_match_regexp_pk.reference @@ -0,0 +1,5 @@ +4 +1 +3 +4 +4 diff --git a/tests/queries/0_stateless/02462_match_regexp_pk.sql b/tests/queries/0_stateless/02462_match_regexp_pk.sql new file mode 100644 index 00000000000..1a944b96196 --- /dev/null +++ b/tests/queries/0_stateless/02462_match_regexp_pk.sql @@ -0,0 +1,9 @@ +CREATE TABLE mt_match_pk (v String) ENGINE = MergeTree ORDER BY v SETTINGS index_granularity = 1; +INSERT INTO mt_match_pk VALUES ('a'), ('aaa'), ('aba'), ('bac'), ('acccca'); + +SET force_primary_key = 1; +SELECT count() FROM mt_match_pk WHERE match(v, '^a'); +SELECT count() FROM mt_match_pk WHERE match(v, '^ab'); +SELECT count() FROM mt_match_pk WHERE match(v, '^a.'); +SELECT count() FROM mt_match_pk WHERE match(v, '^ab*'); +SELECT count() FROM mt_match_pk WHERE match(v, '^ac?'); diff --git a/tests/queries/0_stateless/02463_julian_day_ubsan.reference b/tests/queries/0_stateless/02463_julian_day_ubsan.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02463_julian_day_ubsan.sql b/tests/queries/0_stateless/02463_julian_day_ubsan.sql new file mode 100644 index 00000000000..a8583d7b0a8 --- /dev/null +++ b/tests/queries/0_stateless/02463_julian_day_ubsan.sql @@ -0,0 +1 @@ +SELECT fromModifiedJulianDay(9223372036854775807 :: Int64); -- { serverError 490 } diff --git a/tests/queries/0_stateless/02464_decimal_scale_buffer_overflow.reference b/tests/queries/0_stateless/02464_decimal_scale_buffer_overflow.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02464_decimal_scale_buffer_overflow.sql b/tests/queries/0_stateless/02464_decimal_scale_buffer_overflow.sql new file mode 100644 index 00000000000..355d9012f1f --- /dev/null +++ b/tests/queries/0_stateless/02464_decimal_scale_buffer_overflow.sql @@ -0,0 +1,5 @@ +DROP TABLE IF EXISTS series__fuzz_35; +CREATE TABLE series__fuzz_35 (`i` UInt8, `x_value` Decimal(18, 14), `y_value` DateTime) ENGINE = Memory; +INSERT INTO series__fuzz_35(i, x_value, y_value) VALUES (1, 5.6,-4.4),(2, -9.6,3),(3, -1.3,-4),(4, 5.3,9.7),(5, 4.4,0.037),(6, -8.6,-7.8),(7, 5.1,9.3),(8, 7.9,-3.6),(9, -8.2,0.62),(10, -3,7.3); +SELECT skewSamp(x_value) FROM (SELECT x_value as x_value FROM series__fuzz_35 LIMIT 2) FORMAT Null; +DROP TABLE series__fuzz_35; diff --git a/tests/queries/0_stateless/02465_limit_trivial_max_rows_to_read.reference b/tests/queries/0_stateless/02465_limit_trivial_max_rows_to_read.reference new file mode 100644 index 00000000000..87370760038 --- /dev/null +++ b/tests/queries/0_stateless/02465_limit_trivial_max_rows_to_read.reference @@ -0,0 +1,7 @@ +0 +0 +1 +2 +3 +4 +0 diff --git a/tests/queries/0_stateless/02465_limit_trivial_max_rows_to_read.sql b/tests/queries/0_stateless/02465_limit_trivial_max_rows_to_read.sql new file mode 100644 index 00000000000..ee7a4e6b6b5 --- /dev/null +++ b/tests/queries/0_stateless/02465_limit_trivial_max_rows_to_read.sql @@ -0,0 +1,22 @@ +DROP TABLE IF EXISTS t_max_rows_to_read; + +CREATE TABLE t_max_rows_to_read (a UInt64) +ENGINE = MergeTree ORDER BY a +SETTINGS index_granularity = 4; + +INSERT INTO t_max_rows_to_read SELECT number FROM numbers(100); + +SET max_block_size = 10; +SET max_rows_to_read = 20; +SET read_overflow_mode = 'throw'; + +SELECT number FROM numbers(30); -- { serverError 158 } +SELECT number FROM numbers(30) LIMIT 21; -- { serverError 158 } +SELECT number FROM numbers(30) LIMIT 1; +SELECT number FROM numbers(5); + +SELECT a FROM t_max_rows_to_read LIMIT 1; +SELECT a FROM t_max_rows_to_read LIMIT 11 offset 11; -- { serverError 158 } +SELECT a FROM t_max_rows_to_read WHERE a > 50 LIMIT 1; -- { serverError 158 } + +DROP TABLE t_max_rows_to_read; diff --git a/tests/queries/0_stateless/02466_distributed_query_profiler.reference b/tests/queries/0_stateless/02466_distributed_query_profiler.reference new file mode 100644 index 00000000000..4521d575ff3 --- /dev/null +++ b/tests/queries/0_stateless/02466_distributed_query_profiler.reference @@ -0,0 +1,10 @@ +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 diff --git a/tests/queries/0_stateless/02466_distributed_query_profiler.sql b/tests/queries/0_stateless/02466_distributed_query_profiler.sql new file mode 100644 index 00000000000..9fc2fe7b4bd --- /dev/null +++ b/tests/queries/0_stateless/02466_distributed_query_profiler.sql @@ -0,0 +1,21 @@ +-- This is a regression test for EINTR handling in MultiplexedConnections::getReplicaForReading() + +select * from remote('127.{2,4}', view( + -- This is the emulation of the slow query, the server will return a line each 0.1 second + select sleep(0.1) from numbers(20) settings max_block_size=1) +) +-- LIMIT is to activate query cancellation in case of enough rows already read. +limit 10 +settings + -- This is to avoid draining in background and got the exception during query execution + drain_timeout=-1, + -- This is to activate as much signals as possible to trigger EINTR + query_profiler_real_time_period_ns=1, + -- This is to use MultiplexedConnections + use_hedged_requests=0, + -- This is to make the initiator waiting for cancel packet in MultiplexedConnections::getReplicaForReading() + -- + -- NOTE: that even smaller sleep will be enough to trigger this problem + -- with 100% probability, however just to make it more reliable, increase + -- it to 2 seconds. + sleep_in_receive_cancel_ms=2000; diff --git a/tests/queries/0_stateless/02467_cross_join_three_table_functions.reference b/tests/queries/0_stateless/02467_cross_join_three_table_functions.reference new file mode 100644 index 00000000000..0718dd8e65f --- /dev/null +++ b/tests/queries/0_stateless/02467_cross_join_three_table_functions.reference @@ -0,0 +1 @@ +1320 diff --git a/tests/queries/0_stateless/02467_cross_join_three_table_functions.sql b/tests/queries/0_stateless/02467_cross_join_three_table_functions.sql new file mode 100644 index 00000000000..5c7da815bbe --- /dev/null +++ b/tests/queries/0_stateless/02467_cross_join_three_table_functions.sql @@ -0,0 +1 @@ +SELECT count(*) FROM numbers(10) AS a, numbers(11) AS b, numbers(12) AS c; diff --git a/tests/queries/0_stateless/02467_set_with_lowcardinality_type.reference b/tests/queries/0_stateless/02467_set_with_lowcardinality_type.reference new file mode 100644 index 00000000000..b3f28057554 --- /dev/null +++ b/tests/queries/0_stateless/02467_set_with_lowcardinality_type.reference @@ -0,0 +1,2 @@ +1 test +1 test diff --git a/tests/queries/0_stateless/02467_set_with_lowcardinality_type.sql b/tests/queries/0_stateless/02467_set_with_lowcardinality_type.sql new file mode 100644 index 00000000000..dee6f7de74a --- /dev/null +++ b/tests/queries/0_stateless/02467_set_with_lowcardinality_type.sql @@ -0,0 +1,31 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/42460 +DROP TABLE IF EXISTS bloom_filter_nullable_index__fuzz_0; +CREATE TABLE bloom_filter_nullable_index__fuzz_0 +( + `order_key` UInt64, + `str` Nullable(String), + INDEX idx str TYPE bloom_filter GRANULARITY 1 +) +ENGINE = MergeTree ORDER BY order_key SETTINGS index_granularity = 6; + +INSERT INTO bloom_filter_nullable_index__fuzz_0 VALUES (1, 'test'); +INSERT INTO bloom_filter_nullable_index__fuzz_0 VALUES (2, 'test2'); + +DROP TABLE IF EXISTS bloom_filter_nullable_index__fuzz_1; +CREATE TABLE bloom_filter_nullable_index__fuzz_1 +( + `order_key` UInt64, + `str` String, + INDEX idx str TYPE bloom_filter GRANULARITY 1 +) +ENGINE = MergeTree ORDER BY order_key SETTINGS index_granularity = 6; + +INSERT INTO bloom_filter_nullable_index__fuzz_0 VALUES (1, 'test'); +INSERT INTO bloom_filter_nullable_index__fuzz_0 VALUES (2, 'test2'); + +DROP TABLE IF EXISTS nullable_string_value__fuzz_2; +CREATE TABLE nullable_string_value__fuzz_2 (`value` LowCardinality(String)) ENGINE = TinyLog; +INSERT INTO nullable_string_value__fuzz_2 VALUES ('test'); + +SELECT * FROM bloom_filter_nullable_index__fuzz_0 WHERE str IN (SELECT value FROM nullable_string_value__fuzz_2); +SELECT * FROM bloom_filter_nullable_index__fuzz_1 WHERE str IN (SELECT value FROM nullable_string_value__fuzz_2); diff --git a/tests/queries/0_stateless/02468_has_any_tuple.reference b/tests/queries/0_stateless/02468_has_any_tuple.reference new file mode 100644 index 00000000000..252a9293563 --- /dev/null +++ b/tests/queries/0_stateless/02468_has_any_tuple.reference @@ -0,0 +1,4 @@ +1 +1 +[(3,3)] +1 diff --git a/tests/queries/0_stateless/02468_has_any_tuple.sql b/tests/queries/0_stateless/02468_has_any_tuple.sql new file mode 100644 index 00000000000..12c7222d593 --- /dev/null +++ b/tests/queries/0_stateless/02468_has_any_tuple.sql @@ -0,0 +1,4 @@ +select [(toUInt8(3), toUInt8(3))] = [(toInt16(3), toInt16(3))]; +select hasAny([(toInt16(3), toInt16(3))],[(toInt16(3), toInt16(3))]); +select arrayFilter(x -> x = (toInt16(3), toInt16(3)), arrayZip([toUInt8(3)], [toUInt8(3)])); +select hasAny([(toUInt8(3), toUInt8(3))],[(toInt16(3), toInt16(3))]); diff --git a/tests/queries/0_stateless/02469_fix_aliases_parser.reference b/tests/queries/0_stateless/02469_fix_aliases_parser.reference new file mode 100644 index 00000000000..09f584c9cd4 --- /dev/null +++ b/tests/queries/0_stateless/02469_fix_aliases_parser.reference @@ -0,0 +1,2 @@ +45 +[0] diff --git a/tests/queries/0_stateless/02469_fix_aliases_parser.sql b/tests/queries/0_stateless/02469_fix_aliases_parser.sql new file mode 100644 index 00000000000..227d8becdb6 --- /dev/null +++ b/tests/queries/0_stateless/02469_fix_aliases_parser.sql @@ -0,0 +1,9 @@ +SELECT sum(number number number) FROM numbers(10); -- { clientError 62 } +SELECT sum(number number) FROM numbers(10); -- { clientError 62 } +SELECT sum(number AS number) FROM numbers(10); + +SELECT [number number number] FROM numbers(1); -- { clientError 62 } +SELECT [number number] FROM numbers(1); -- { clientError 62 } +SELECT [number AS number] FROM numbers(1); + +SELECT cast('1234' lhs lhs, 'UInt32'), lhs; -- { clientError 62 } \ No newline at end of file diff --git a/tests/queries/0_stateless/02469_interval_msan.reference b/tests/queries/0_stateless/02469_interval_msan.reference new file mode 100644 index 00000000000..c18b4e9b082 --- /dev/null +++ b/tests/queries/0_stateless/02469_interval_msan.reference @@ -0,0 +1,8 @@ +1 +1 +1 +1 +1 +1 +1 +1 diff --git a/tests/queries/0_stateless/02469_interval_msan.sql b/tests/queries/0_stateless/02469_interval_msan.sql new file mode 100644 index 00000000000..4b4a9f746ea --- /dev/null +++ b/tests/queries/0_stateless/02469_interval_msan.sql @@ -0,0 +1,19 @@ +SELECT now() + 1::Int128; -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT now() + 1::Int256; -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT now() + 1::UInt128; -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT now() + 1::UInt256; -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } + +SELECT now() - 1::Int128; -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT now() - 1::Int256; -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT now() - 1::UInt128; -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT now() - 1::UInt256; -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } + +SELECT now() + INTERVAL 1::Int128 SECOND - now(); +SELECT now() + INTERVAL 1::Int256 SECOND - now(); +SELECT now() + INTERVAL 1::UInt128 SECOND - now(); +SELECT now() + INTERVAL 1::UInt256 SECOND - now(); + +SELECT today() + INTERVAL 1::Int128 DAY - today(); +SELECT today() + INTERVAL 1::Int256 DAY - today(); +SELECT today() + INTERVAL 1::UInt128 DAY - today(); +SELECT today() + INTERVAL 1::UInt256 DAY - today(); diff --git a/tests/queries/0_stateless/02470_suspicious_low_cardinality_msan.reference b/tests/queries/0_stateless/02470_suspicious_low_cardinality_msan.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02470_suspicious_low_cardinality_msan.sql b/tests/queries/0_stateless/02470_suspicious_low_cardinality_msan.sql new file mode 100644 index 00000000000..6969be1ca64 --- /dev/null +++ b/tests/queries/0_stateless/02470_suspicious_low_cardinality_msan.sql @@ -0,0 +1,6 @@ +DROP TABLE IF EXISTS alias_2__fuzz_25; +SET allow_suspicious_low_cardinality_types = 1; +CREATE TABLE alias_2__fuzz_25 (`dt` LowCardinality(Date), `col` DateTime, `col2` Nullable(Int256), `colAlias0` Nullable(DateTime64(3)) ALIAS col, `colAlias3` Nullable(Int32) ALIAS col3 + colAlias0, `colAlias1` LowCardinality(UInt16) ALIAS colAlias0 + col2, `colAlias2` LowCardinality(Int32) ALIAS colAlias0 + colAlias1, `col3` Nullable(UInt8)) ENGINE = MergeTree ORDER BY dt; +insert into alias_2__fuzz_25 (dt, col, col2, col3) values ('2020-02-01', 1, 2, 3); +SELECT colAlias0, colAlias2, colAlias3 FROM alias_2__fuzz_25; -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +DROP TABLE alias_2__fuzz_25; diff --git a/tests/queries/0_stateless/02471_wrong_date_monotonicity.reference b/tests/queries/0_stateless/02471_wrong_date_monotonicity.reference new file mode 100644 index 00000000000..d00491fd7e5 --- /dev/null +++ b/tests/queries/0_stateless/02471_wrong_date_monotonicity.reference @@ -0,0 +1 @@ +1 diff --git a/tests/queries/0_stateless/02471_wrong_date_monotonicity.sql b/tests/queries/0_stateless/02471_wrong_date_monotonicity.sql new file mode 100644 index 00000000000..40d64e53309 --- /dev/null +++ b/tests/queries/0_stateless/02471_wrong_date_monotonicity.sql @@ -0,0 +1,5 @@ +DROP TABLE IF EXISTS tdm__fuzz_23; +CREATE TABLE tdm__fuzz_23 (`x` UInt256) ENGINE = MergeTree ORDER BY x SETTINGS write_final_mark = 0; +INSERT INTO tdm__fuzz_23 FORMAT Values (1); +SELECT count(x) FROM tdm__fuzz_23 WHERE toDate(x) < toDate(now(), 'Asia/Istanbul') SETTINGS max_rows_to_read = 1; +DROP TABLE tdm__fuzz_23; diff --git a/tests/queries/0_stateless/02472_segfault_expression_parser.reference b/tests/queries/0_stateless/02472_segfault_expression_parser.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02472_segfault_expression_parser.sql b/tests/queries/0_stateless/02472_segfault_expression_parser.sql new file mode 100644 index 00000000000..285de80a64a --- /dev/null +++ b/tests/queries/0_stateless/02472_segfault_expression_parser.sql @@ -0,0 +1 @@ +SELECT TIMESTAMP_SUB (SELECT ILIKE INTO OUTFILE , accurateCast ) FROM TIMESTAMP_SUB ( MINUTE , ) GROUP BY accurateCast; -- { clientError 62 } diff --git a/tests/queries/0_stateless/02473_map_element_nullable.reference b/tests/queries/0_stateless/02473_map_element_nullable.reference new file mode 100644 index 00000000000..84a9ba03bb4 --- /dev/null +++ b/tests/queries/0_stateless/02473_map_element_nullable.reference @@ -0,0 +1,16 @@ +2 \N \N +2 \N \N +2 \N \N +2 \N \N +2 \N \N +2 \N \N +2 \N \N +2 \N \N +2 \N \N +2 \N \N +2 \N \N +2 \N \N +2 \N \N +2 \N \N +2 \N \N +2 \N \N diff --git a/tests/queries/0_stateless/02473_map_element_nullable.sql b/tests/queries/0_stateless/02473_map_element_nullable.sql new file mode 100644 index 00000000000..e9c351d112c --- /dev/null +++ b/tests/queries/0_stateless/02473_map_element_nullable.sql @@ -0,0 +1,19 @@ +WITH map(1, 2, 3, NULL) AS m SELECT m[toNullable(1)], m[toNullable(2)], m[toNullable(3)]; +WITH map(1, 2, 3, NULL) AS m SELECT m[materialize(toNullable(1))], m[materialize(toNullable(2))], m[materialize(toNullable(3))]; +WITH materialize(map(1, 2, 3, NULL)) AS m SELECT m[toNullable(1)], m[toNullable(2)], m[toNullable(3)]; +WITH materialize(map(1, 2, 3, NULL)) AS m SELECT m[materialize(toNullable(1))], m[materialize(toNullable(2))], m[materialize(toNullable(3))]; + +WITH map('a', 2, 'b', NULL) AS m SELECT m[toNullable('a')], m[toNullable('b')], m[toNullable('c')]; +WITH map('a', 2, 'b', NULL) AS m SELECT m[materialize(toNullable('a'))], m[materialize(toNullable('b'))], m[materialize(toNullable('c'))]; +WITH materialize(map('a', 2, 'b', NULL)) AS m SELECT m[toNullable('a')], m[toNullable('b')], m[toNullable('c')]; +WITH materialize(map('a', 2, 'b', NULL)) AS m SELECT m[materialize(toNullable('a'))], m[materialize(toNullable('b'))], m[materialize(toNullable('c'))]; + +WITH map(1, 2, 3, NULL) AS m SELECT m[1], m[2], m[3]; +WITH map(1, 2, 3, NULL) AS m SELECT m[materialize(1)], m[materialize(2)], m[materialize(3)]; +WITH materialize(map(1, 2, 3, NULL)) AS m SELECT m[1], m[2], m[3]; +WITH materialize(map(1, 2, 3, NULL)) AS m SELECT m[materialize(1)], m[materialize(2)], m[materialize(3)]; + +WITH map('a', 2, 'b', NULL) AS m SELECT m['a'], m['b'], m['c']; +WITH map('a', 2, 'b', NULL) AS m SELECT m[materialize('a')], m[materialize('b')], m[materialize('c')]; +WITH materialize(map('a', 2, 'b', NULL)) AS m SELECT m['a'], m['b'], m['c']; +WITH materialize(map('a', 2, 'b', NULL)) AS m SELECT m[materialize('a')], m[materialize('b')], m[materialize('c')]; diff --git a/tests/queries/0_stateless/02473_optimize_old_parts.reference b/tests/queries/0_stateless/02473_optimize_old_parts.reference new file mode 100644 index 00000000000..9002d73ff27 --- /dev/null +++ b/tests/queries/0_stateless/02473_optimize_old_parts.reference @@ -0,0 +1,12 @@ +Without merge +3 +With merge any part range +1 +With merge partition only +1 +With merge replicated any part range +1 +With merge replicated partition only +1 +With merge partition only and new parts +3 diff --git a/tests/queries/0_stateless/02473_optimize_old_parts.sql b/tests/queries/0_stateless/02473_optimize_old_parts.sql new file mode 100644 index 00000000000..c2bd37033c1 --- /dev/null +++ b/tests/queries/0_stateless/02473_optimize_old_parts.sql @@ -0,0 +1,87 @@ +-- Tags: long + +DROP TABLE IF EXISTS test_without_merge; +DROP TABLE IF EXISTS test_with_merge; +DROP TABLE IF EXISTS test_replicated; + +SELECT 'Without merge'; + +CREATE TABLE test_without_merge (i Int64) ENGINE = MergeTree ORDER BY i; +INSERT INTO test_without_merge SELECT 1; +INSERT INTO test_without_merge SELECT 2; +INSERT INTO test_without_merge SELECT 3; + +SELECT sleepEachRow(1) FROM numbers(9) FORMAT Null; +SELECT count(*) FROM system.parts WHERE database = currentDatabase() AND table='test_without_merge' AND active; + +DROP TABLE test_without_merge; + +SELECT 'With merge any part range'; + +CREATE TABLE test_with_merge (i Int64) ENGINE = MergeTree ORDER BY i +SETTINGS min_age_to_force_merge_seconds=3, min_age_to_force_merge_on_partition_only=false; +INSERT INTO test_with_merge SELECT 1; +INSERT INTO test_with_merge SELECT 2; +INSERT INTO test_with_merge SELECT 3; + +SELECT sleepEachRow(1) FROM numbers(9) FORMAT Null; +SELECT count(*) FROM system.parts WHERE database = currentDatabase() AND table='test_with_merge' AND active; + +DROP TABLE test_with_merge; + +SELECT 'With merge partition only'; + +CREATE TABLE test_with_merge (i Int64) ENGINE = MergeTree ORDER BY i +SETTINGS min_age_to_force_merge_seconds=3, min_age_to_force_merge_on_partition_only=true; +INSERT INTO test_with_merge SELECT 1; +INSERT INTO test_with_merge SELECT 2; +INSERT INTO test_with_merge SELECT 3; + +SELECT sleepEachRow(1) FROM numbers(9) FORMAT Null; +SELECT count(*) FROM system.parts WHERE database = currentDatabase() AND table='test_with_merge' AND active; + +DROP TABLE test_with_merge; + +SELECT 'With merge replicated any part range'; + +CREATE TABLE test_replicated (i Int64) ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/test02473', 'node') ORDER BY i +SETTINGS min_age_to_force_merge_seconds=3, min_age_to_force_merge_on_partition_only=false; +INSERT INTO test_replicated SELECT 1; +INSERT INTO test_replicated SELECT 2; +INSERT INTO test_replicated SELECT 3; + +SELECT sleepEachRow(1) FROM numbers(9) FORMAT Null; +SELECT count(*) FROM system.parts WHERE database = currentDatabase() AND table='test_replicated' AND active; + +DROP TABLE test_replicated; + +SELECT 'With merge replicated partition only'; + +CREATE TABLE test_replicated (i Int64) ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/test02473_partition_only', 'node') ORDER BY i +SETTINGS min_age_to_force_merge_seconds=3, min_age_to_force_merge_on_partition_only=true; +INSERT INTO test_replicated SELECT 1; +INSERT INTO test_replicated SELECT 2; +INSERT INTO test_replicated SELECT 3; + +SELECT sleepEachRow(1) FROM numbers(9) FORMAT Null; +SELECT count(*) FROM system.parts WHERE database = currentDatabase() AND table='test_replicated' AND active; + +DROP TABLE test_replicated; + +SELECT 'With merge partition only and new parts'; + +CREATE TABLE test_with_merge (i Int64) ENGINE = MergeTree ORDER BY i +SETTINGS min_age_to_force_merge_seconds=3, min_age_to_force_merge_on_partition_only=true; +SYSTEM STOP MERGES test_with_merge; +-- These three parts will have min_age=6 at the time of merge +INSERT INTO test_with_merge SELECT 1; +INSERT INTO test_with_merge SELECT 2; +SELECT sleepEachRow(1) FROM numbers(9) FORMAT Null; +-- These three parts will have min_age=0 at the time of merge +-- and so, nothing will be merged. +INSERT INTO test_with_merge SELECT 3; +SYSTEM START MERGES test_with_merge; + +SELECT count(*) FROM system.parts WHERE database = currentDatabase() AND table='test_with_merge' AND active; + +DROP TABLE test_with_merge; diff --git a/tests/queries/0_stateless/02473_prewhere_with_bigint.reference b/tests/queries/0_stateless/02473_prewhere_with_bigint.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02473_prewhere_with_bigint.sql b/tests/queries/0_stateless/02473_prewhere_with_bigint.sql new file mode 100644 index 00000000000..29c6f0da2a1 --- /dev/null +++ b/tests/queries/0_stateless/02473_prewhere_with_bigint.sql @@ -0,0 +1,24 @@ +DROP TABLE IF EXISTS prewhere_int128; +DROP TABLE IF EXISTS prewhere_int256; +DROP TABLE IF EXISTS prewhere_uint128; +DROP TABLE IF EXISTS prewhere_uint256; + +CREATE TABLE prewhere_int128 (a Int128) ENGINE=MergeTree ORDER BY a; +INSERT INTO prewhere_int128 VALUES (1); +SELECT a FROM prewhere_int128 PREWHERE a; -- { serverError 59 } +DROP TABLE prewhere_int128; + +CREATE TABLE prewhere_int256 (a Int256) ENGINE=MergeTree ORDER BY a; +INSERT INTO prewhere_int256 VALUES (1); +SELECT a FROM prewhere_int256 PREWHERE a; -- { serverError 59 } +DROP TABLE prewhere_int256; + +CREATE TABLE prewhere_uint128 (a UInt128) ENGINE=MergeTree ORDER BY a; +INSERT INTO prewhere_uint128 VALUES (1); +SELECT a FROM prewhere_uint128 PREWHERE a; -- { serverError 59 } +DROP TABLE prewhere_uint128; + +CREATE TABLE prewhere_uint256 (a UInt256) ENGINE=MergeTree ORDER BY a; +INSERT INTO prewhere_uint256 VALUES (1); +SELECT a FROM prewhere_uint256 PREWHERE a; -- { serverError 59 } +DROP TABLE prewhere_uint256; diff --git a/tests/queries/0_stateless/02474_analyzer_subqueries_table_expression_modifiers.reference b/tests/queries/0_stateless/02474_analyzer_subqueries_table_expression_modifiers.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02474_analyzer_subqueries_table_expression_modifiers.sql b/tests/queries/0_stateless/02474_analyzer_subqueries_table_expression_modifiers.sql new file mode 100644 index 00000000000..456783cad26 --- /dev/null +++ b/tests/queries/0_stateless/02474_analyzer_subqueries_table_expression_modifiers.sql @@ -0,0 +1,17 @@ +SET allow_experimental_analyzer = 1; + +SELECT * FROM (SELECT 1) FINAL; -- { serverError 1 } +SELECT * FROM (SELECT 1) SAMPLE 1/2; -- { serverError 1 } +SELECT * FROM (SELECT 1) FINAL SAMPLE 1/2; -- { serverError 1 } + +WITH cte_subquery AS (SELECT 1) SELECT * FROM cte_subquery FINAL; -- { serverError 1 } +WITH cte_subquery AS (SELECT 1) SELECT * FROM cte_subquery SAMPLE 1/2; -- { serverError 1 } +WITH cte_subquery AS (SELECT 1) SELECT * FROM cte_subquery FINAL SAMPLE 1/2; -- { serverError 1 } + +SELECT * FROM (SELECT 1 UNION ALL SELECT 1) FINAL; -- { serverError 1 } +SELECT * FROM (SELECT 1 UNION ALL SELECT 1) SAMPLE 1/2; -- { serverError 1 } +SELECT * FROM (SELECT 1 UNION ALL SELECT 1) FINAL SAMPLE 1/2; -- { serverError 1 } + +WITH cte_subquery AS (SELECT 1 UNION ALL SELECT 1) SELECT * FROM cte_subquery FINAL; -- { serverError 1 } +WITH cte_subquery AS (SELECT 1 UNION ALL SELECT 1) SELECT * FROM cte_subquery SAMPLE 1/2; -- { serverError 1 } +WITH cte_subquery AS (SELECT 1 UNION ALL SELECT 1) SELECT * FROM cte_subquery FINAL SAMPLE 1/2; -- { serverError 1 } diff --git a/tests/queries/0_stateless/02474_create_user_query_fuzzer_bug.reference b/tests/queries/0_stateless/02474_create_user_query_fuzzer_bug.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02474_create_user_query_fuzzer_bug.sql b/tests/queries/0_stateless/02474_create_user_query_fuzzer_bug.sql new file mode 100644 index 00000000000..3ef1469cf1b --- /dev/null +++ b/tests/queries/0_stateless/02474_create_user_query_fuzzer_bug.sql @@ -0,0 +1 @@ +EXPLAIN AST ALTER user WITH a; -- { clientError SYNTAX_ERROR } diff --git a/tests/queries/0_stateless/02474_fix_function_parser_bug.reference b/tests/queries/0_stateless/02474_fix_function_parser_bug.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02474_fix_function_parser_bug.sql b/tests/queries/0_stateless/02474_fix_function_parser_bug.sql new file mode 100644 index 00000000000..67d97aa1c25 --- /dev/null +++ b/tests/queries/0_stateless/02474_fix_function_parser_bug.sql @@ -0,0 +1 @@ +CREATE DATABASE conv_mian ENGINE QALL(COLUMNS('|T.D'),¸mp} -- { clientError SYNTAX_ERROR } diff --git a/tests/queries/0_stateless/02475_analysis_of_variance.reference b/tests/queries/0_stateless/02475_analysis_of_variance.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02475_analysis_of_variance.sql b/tests/queries/0_stateless/02475_analysis_of_variance.sql new file mode 100644 index 00000000000..86996f784ea --- /dev/null +++ b/tests/queries/0_stateless/02475_analysis_of_variance.sql @@ -0,0 +1,10 @@ + +SELECT analysisOfVariance(number, number % 2) FROM numbers(10) FORMAT Null; +SELECT analysisOfVariance(number :: Decimal32(5), number % 2) FROM numbers(10) FORMAT Null; +SELECT analysisOfVariance(number :: Decimal256(5), number % 2) FROM numbers(10) FORMAT Null; + +SELECT analysisOfVariance(1.11, -20); -- { serverError BAD_ARGUMENTS } +SELECT analysisOfVariance(1.11, 20 :: UInt128); -- { serverError BAD_ARGUMENTS } +SELECT analysisOfVariance(1.11, 9000000000000000); -- { serverError BAD_ARGUMENTS } + +SELECT analysisOfVariance(number, number % 2), analysisOfVariance(100000000000000000000., number % 65535) FROM numbers(1048575); -- { serverError BAD_ARGUMENTS } diff --git a/tests/queries/0_stateless/02475_bad_cast_low_cardinality_to_string_bug.reference b/tests/queries/0_stateless/02475_bad_cast_low_cardinality_to_string_bug.reference new file mode 100644 index 00000000000..4ffc8576e57 --- /dev/null +++ b/tests/queries/0_stateless/02475_bad_cast_low_cardinality_to_string_bug.reference @@ -0,0 +1,2 @@ +bbbbb +bbbbb diff --git a/tests/queries/0_stateless/02475_bad_cast_low_cardinality_to_string_bug.sql b/tests/queries/0_stateless/02475_bad_cast_low_cardinality_to_string_bug.sql new file mode 100644 index 00000000000..3b2abfb3c42 --- /dev/null +++ b/tests/queries/0_stateless/02475_bad_cast_low_cardinality_to_string_bug.sql @@ -0,0 +1 @@ +SELECT if(materialize(0), extract(materialize(CAST('aaaaaa', 'LowCardinality(String)')), '\\w'), extract(materialize(CAST('bbbbb', 'LowCardinality(String)')), '\\w*')) AS res FROM numbers(2); diff --git a/tests/queries/0_stateless/02475_join_bug_42832.reference b/tests/queries/0_stateless/02475_join_bug_42832.reference new file mode 100644 index 00000000000..e5310261d0a --- /dev/null +++ b/tests/queries/0_stateless/02475_join_bug_42832.reference @@ -0,0 +1,2 @@ +4 6 +4 4 diff --git a/tests/queries/0_stateless/02475_join_bug_42832.sql b/tests/queries/0_stateless/02475_join_bug_42832.sql new file mode 100644 index 00000000000..e383949fb22 --- /dev/null +++ b/tests/queries/0_stateless/02475_join_bug_42832.sql @@ -0,0 +1,16 @@ +DROP TABLE IF EXISTS tab1; +DROP TABLE IF EXISTS tab2; + +SET allow_suspicious_low_cardinality_types = 1; + +CREATE TABLE tab1 (a1 Int32, b1 Int32, val UInt64) ENGINE = MergeTree ORDER BY a1; +CREATE TABLE tab2 (a2 LowCardinality(Int32), b2 Int32) ENGINE = MergeTree ORDER BY a2; + +INSERT INTO tab1 SELECT number, number, 1 from numbers(4); +INSERT INTO tab2 SELECT number + 2, number + 2 from numbers(4); + +SELECT sum(val), count(val) FROM tab1 FULL OUTER JOIN tab2 ON b1 - 2 = a2 OR a1 = b2 SETTINGS join_use_nulls = 0; +SELECT sum(val), count(val) FROM tab1 FULL OUTER JOIN tab2 ON b1 - 2 = a2 OR a1 = b2 SETTINGS join_use_nulls = 1; + +DROP TABLE IF EXISTS tab1; +DROP TABLE IF EXISTS tab2; diff --git a/tests/queries/0_stateless/02476_fix_cast_parser_bug.reference b/tests/queries/0_stateless/02476_fix_cast_parser_bug.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02476_fix_cast_parser_bug.sql b/tests/queries/0_stateless/02476_fix_cast_parser_bug.sql new file mode 100644 index 00000000000..6b01b3a8c0b --- /dev/null +++ b/tests/queries/0_stateless/02476_fix_cast_parser_bug.sql @@ -0,0 +1 @@ +SELECT CAST(a, b -> c) ++; -- { clientError SYNTAX_ERROR } diff --git a/tests/queries/1_stateful/00096_obfuscator_save_load.sh b/tests/queries/1_stateful/00096_obfuscator_save_load.sh index c90eee1d0f9..a88dfcdb9b9 100755 --- a/tests/queries/1_stateful/00096_obfuscator_save_load.sh +++ b/tests/queries/1_stateful/00096_obfuscator_save_load.sh @@ -4,12 +4,14 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh +model=$(mktemp "$CLICKHOUSE_TMP/obfuscator-model-XXXXXX.bin") + $CLICKHOUSE_CLIENT --max_threads 1 --query="SELECT URL, Title, SearchPhrase FROM test.hits LIMIT 1000" > "${CLICKHOUSE_TMP}"/data.tsv -$CLICKHOUSE_OBFUSCATOR --structure "URL String, Title String, SearchPhrase String" --input-format TSV --output-format TSV --seed hello --limit 0 --save "${CLICKHOUSE_TMP}"/model.bin < "${CLICKHOUSE_TMP}"/data.tsv 2>/dev/null -wc -c < "${CLICKHOUSE_TMP}"/model.bin -$CLICKHOUSE_OBFUSCATOR --structure "URL String, Title String, SearchPhrase String" --input-format TSV --output-format TSV --seed hello --limit 2500 --load "${CLICKHOUSE_TMP}"/model.bin < "${CLICKHOUSE_TMP}"/data.tsv > "${CLICKHOUSE_TMP}"/data2500.tsv 2>/dev/null -rm "${CLICKHOUSE_TMP}"/model.bin +$CLICKHOUSE_OBFUSCATOR --structure "URL String, Title String, SearchPhrase String" --input-format TSV --output-format TSV --seed hello --limit 0 --save "$model" < "${CLICKHOUSE_TMP}"/data.tsv 2>/dev/null +wc -c < "$model" +$CLICKHOUSE_OBFUSCATOR --structure "URL String, Title String, SearchPhrase String" --input-format TSV --output-format TSV --seed hello --limit 2500 --load "$model" < "${CLICKHOUSE_TMP}"/data.tsv > "${CLICKHOUSE_TMP}"/data2500.tsv 2>/dev/null +rm "$model" $CLICKHOUSE_LOCAL --structure "URL String, Title String, SearchPhrase String" --input-format TSV --output-format TSV --query "SELECT count(), uniq(URL), uniq(Title), uniq(SearchPhrase) FROM table" < "${CLICKHOUSE_TMP}"/data.tsv $CLICKHOUSE_LOCAL --structure "URL String, Title String, SearchPhrase String" --input-format TSV --output-format TSV --query "SELECT count(), uniq(URL), uniq(Title), uniq(SearchPhrase) FROM table" < "${CLICKHOUSE_TMP}"/data2500.tsv diff --git a/tests/queries/1_stateful/00097_constexpr_in_index.reference b/tests/queries/1_stateful/00097_constexpr_in_index.reference new file mode 100644 index 00000000000..5080d6d4cd8 --- /dev/null +++ b/tests/queries/1_stateful/00097_constexpr_in_index.reference @@ -0,0 +1 @@ +1803 diff --git a/tests/queries/1_stateful/00097_constexpr_in_index.sql b/tests/queries/1_stateful/00097_constexpr_in_index.sql new file mode 100644 index 00000000000..b5cac75c767 --- /dev/null +++ b/tests/queries/1_stateful/00097_constexpr_in_index.sql @@ -0,0 +1,3 @@ +-- Even in presense of OR, we evaluate the "0 IN (1, 2, 3)" as a constant expression therefore it does not prevent the index analysis. + +SELECT count() FROM test.hits WHERE CounterID IN (14917930, 33034174) OR 0 IN (1, 2, 3) SETTINGS max_rows_to_read = 1000000, force_primary_key = 1; diff --git a/tests/queries/1_stateful/00168_parallel_processing_on_replicas_part_1.reference b/tests/queries/1_stateful/00168_parallel_processing_on_replicas_part_1.reference deleted file mode 100644 index 2675904dea0..00000000000 --- a/tests/queries/1_stateful/00168_parallel_processing_on_replicas_part_1.reference +++ /dev/null @@ -1,110 +0,0 @@ -Testing 00001_count_hits.sql ----> Ok! ✅ -Testing 00002_count_visits.sql ----> Ok! ✅ -Testing 00004_top_counters.sql ----> Ok! ✅ -Testing 00005_filtering.sql ----> Ok! ✅ -Testing 00006_agregates.sql ----> Ok! ✅ -Testing 00007_uniq.sql ----> Ok! ✅ -Testing 00008_uniq.sql ----> Ok! ✅ -Testing 00009_uniq_distributed.sql ----> Ok! ✅ -Testing 00010_quantiles_segfault.sql ----> Ok! ✅ -Testing 00011_sorting.sql ----> Ok! ✅ -Testing 00012_sorting_distributed.sql ----> Ok! ✅ -Skipping 00013_sorting_of_nested.sql -Testing 00014_filtering_arrays.sql ----> Ok! ✅ -Testing 00015_totals_and_no_aggregate_functions.sql ----> Ok! ✅ -Testing 00016_any_if_distributed_cond_always_false.sql ----> Ok! ✅ -Testing 00017_aggregation_uninitialized_memory.sql ----> Ok! ✅ -Testing 00020_distinct_order_by_distributed.sql ----> Ok! ✅ -Testing 00021_1_select_with_in.sql ----> Ok! ✅ -Testing 00021_2_select_with_in.sql ----> Ok! ✅ -Testing 00021_3_select_with_in.sql ----> Ok! ✅ -Testing 00022_merge_prewhere.sql ----> Ok! ✅ -Testing 00023_totals_limit.sql ----> Ok! ✅ -Testing 00024_random_counters.sql ----> Ok! ✅ -Testing 00030_array_enumerate_uniq.sql ----> Ok! ✅ -Testing 00031_array_enumerate_uniq.sql ----> Ok! ✅ -Testing 00032_aggregate_key64.sql ----> Ok! ✅ -Testing 00033_aggregate_key_string.sql ----> Ok! ✅ -Testing 00034_aggregate_key_fixed_string.sql ----> Ok! ✅ -Testing 00035_aggregate_keys128.sql ----> Ok! ✅ -Testing 00036_aggregate_hashed.sql ----> Ok! ✅ -Testing 00037_uniq_state_merge1.sql ----> Ok! ✅ -Testing 00038_uniq_state_merge2.sql ----> Ok! ✅ -Testing 00039_primary_key.sql ----> Ok! ✅ -Testing 00040_aggregating_materialized_view.sql ----> Ok! ✅ -Testing 00041_aggregating_materialized_view.sql ----> Ok! ✅ -Testing 00042_any_left_join.sql ----> Ok! ✅ -Testing 00043_any_left_join.sql ----> Ok! ✅ -Testing 00044_any_left_join_string.sql ----> Ok! ✅ -Testing 00045_uniq_upto.sql ----> Ok! ✅ -Testing 00046_uniq_upto_distributed.sql ----> Ok! ✅ -Testing 00047_bar.sql ----> Ok! ✅ -Testing 00048_min_max.sql ----> Ok! ✅ -Testing 00049_max_string_if.sql ----> Ok! ✅ -Testing 00050_min_max.sql ----> Ok! ✅ -Testing 00051_min_max_array.sql ----> Ok! ✅ -Testing 00052_group_by_in.sql ----> Ok! ✅ -Testing 00053_replicate_segfault.sql ----> Ok! ✅ -Testing 00054_merge_tree_partitions.sql ----> Ok! ✅ -Testing 00055_index_and_not.sql ----> Ok! ✅ -Testing 00056_view.sql ----> Ok! ✅ -Testing 00059_merge_sorting_empty_array_joined.sql ----> Ok! ✅ -Testing 00060_move_to_prewhere_and_sets.sql ----> Ok! ✅ -Skipping 00061_storage_buffer.sql -Testing 00062_loyalty.sql ----> Ok! ✅ -Testing 00063_loyalty_joins.sql ----> Ok! ✅ -Testing 00065_loyalty_with_storage_join.sql ----> Ok! ✅ -Testing 00066_sorting_distributed_many_replicas.sql ----> Ok! ✅ -Testing 00067_union_all.sql ----> Ok! ✅ -Testing 00068_subquery_in_prewhere.sql ----> Ok! ✅ -Testing 00069_duplicate_aggregation_keys.sql ----> Ok! ✅ -Testing 00071_merge_tree_optimize_aio.sql ----> Ok! ✅ -Testing 00072_compare_date_and_string_index.sql ----> Ok! ✅ -Testing 00073_uniq_array.sql ----> Ok! ✅ -Testing 00074_full_join.sql ----> Ok! ✅ -Testing 00075_left_array_join.sql ----> Ok! ✅ -Testing 00076_system_columns_bytes.sql ----> Ok! ✅ -Testing 00077_log_tinylog_stripelog.sql ----> Ok! ✅ -Testing 00078_group_by_arrays.sql ----> Ok! ✅ -Testing 00079_array_join_not_used_joined_column.sql ----> Ok! ✅ -Testing 00080_array_join_and_union.sql ----> Ok! ✅ -Testing 00081_group_by_without_key_and_totals.sql ----> Ok! ✅ -Testing 00082_quantiles.sql ----> Ok! ✅ -Testing 00083_array_filter.sql ----> Ok! ✅ -Testing 00084_external_aggregation.sql ----> Ok! ✅ -Testing 00085_monotonic_evaluation_segfault.sql ----> Ok! ✅ -Testing 00086_array_reduce.sql ----> Ok! ✅ -Testing 00087_where_0.sql ----> Ok! ✅ -Testing 00088_global_in_one_shard_and_rows_before_limit.sql ----> Ok! ✅ -Testing 00089_position_functions_with_non_constant_arg.sql ----> Ok! ✅ -Testing 00091_prewhere_two_conditions.sql ----> Ok! ✅ -Testing 00093_prewhere_array_join.sql ----> Ok! ✅ -Testing 00094_order_by_array_join_limit.sql ----> Ok! ✅ -Skipping 00095_hyperscan_profiler.sql -Testing 00139_like.sql ----> Ok! ✅ -Skipping 00140_rename.sql -Testing 00141_transform.sql ----> Ok! ✅ -Testing 00142_system_columns.sql ----> Ok! ✅ -Testing 00143_transform_non_const_default.sql ----> Ok! ✅ -Testing 00144_functions_of_aggregation_states.sql ----> Ok! ✅ -Testing 00145_aggregate_functions_statistics.sql ----> Ok! ✅ -Testing 00146_aggregate_function_uniq.sql ----> Ok! ✅ -Testing 00147_global_in_aggregate_function.sql ----> Ok! ✅ -Testing 00148_monotonic_functions_and_index.sql ----> Ok! ✅ -Testing 00149_quantiles_timing_distributed.sql ----> Ok! ✅ -Testing 00150_quantiles_timing_precision.sql ----> Ok! ✅ -Testing 00151_order_by_read_in_order.sql ----> Ok! ✅ -Skipping 00151_replace_partition_with_different_granularity.sql -Skipping 00152_insert_different_granularity.sql -Testing 00153_aggregate_arena_race.sql ----> Ok! ✅ -Skipping 00154_avro.sql -Testing 00156_max_execution_speed_sample_merge.sql ----> Ok! ✅ -Skipping 00157_cache_dictionary.sql -Skipping 00158_cache_dictionary_has.sql -Testing 00160_decode_xml_component.sql ----> Ok! ✅ -Testing 00162_mmap_compression_none.sql ----> Ok! ✅ -Testing 00164_quantileBfloat16.sql ----> Ok! ✅ -Testing 00165_jit_aggregate_functions.sql ----> Ok! ✅ -Skipping 00166_explain_estimate.sql -Testing 00167_read_bytes_from_fs.sql ----> Ok! ✅ -Total failed tests: diff --git a/tests/queries/1_stateful/00168_parallel_processing_on_replicas_part_1.sh b/tests/queries/1_stateful/00168_parallel_processing_on_replicas_part_1.sh deleted file mode 100755 index ecd0d281b53..00000000000 --- a/tests/queries/1_stateful/00168_parallel_processing_on_replicas_part_1.sh +++ /dev/null @@ -1,102 +0,0 @@ -#!/usr/bin/env bash -# Tags: no-tsan, no-random-settings - -CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -# shellcheck source=../shell_config.sh -. "$CURDIR"/../shell_config.sh - -# set -e - -# All replicas are localhost, disable `prefer_localhost_replica` option to test network interface -# Currently this feature could not work with hedged requests -# Enabling `enable_sample_offset_parallel_processing` feature could lead to intersecting marks, so some of them would be thrown away and it will lead to incorrect result of SELECT query -SETTINGS="--max_parallel_replicas=3 --use_hedged_requests=false --allow_experimental_parallel_reading_from_replicas=true" - -# Prepare tables -$CLICKHOUSE_CLIENT $SETTINGS -nm -q ''' - drop table if exists test.dist_hits SYNC; - drop table if exists test.dist_visits SYNC; - - create table test.dist_hits as test.hits engine = Distributed("test_cluster_one_shard_three_replicas_localhost", test, hits, rand()); - create table test.dist_visits as test.visits engine = Distributed("test_cluster_one_shard_three_replicas_localhost", test, visits, rand()); -'''; - -FAILED=() - -# PreviouslyFailed=( -# ) - -SkipList=( - "00013_sorting_of_nested.sql" # It contains FINAL, which is not allowed together with parallel reading - - "00061_storage_buffer.sql" - "00095_hyperscan_profiler.sql" # too long in debug (there is a --no-debug tag inside a test) - - "00140_rename.sql" # Multiple renames are not allowed with DatabaseReplicated and tags are not forwarded through this test - - "00154_avro.sql" # Plain select * with limit with Distributed table is not deterministic - "00151_replace_partition_with_different_granularity.sql" # Replace partition from Distributed is not allowed - "00152_insert_different_granularity.sql" # The same as above - - "00157_cache_dictionary.sql" # Too long in debug mode, but result is correct - "00158_cache_dictionary_has.sql" # The same as above - - "00166_explain_estimate.sql" # Distributed table returns nothing -) - -# for TESTPATH in "${PreviouslyFailed[@]}" -for TESTPATH in "$CURDIR"/*.sql; -do - TESTNAME=$(basename $TESTPATH) - NUM=$(echo "${TESTNAME}" | grep -o -P '^\d+' | sed 's/^0*//') - if [[ "${NUM}" -ge 168 ]]; then - continue - fi - - if [[ " ${SkipList[*]} " =~ ${TESTNAME} ]]; then - echo "Skipping $TESTNAME " - continue - fi - - echo -n "Testing $TESTNAME ----> " - - # prepare test - NEW_TESTNAME="/tmp/dist_$TESTNAME" - # Added g to sed command to replace all tables, not the first - cat $TESTPATH | sed -e 's/test.hits/test.dist_hits/g' | sed -e 's/test.visits/test.dist_visits/g' > $NEW_TESTNAME - - TESTNAME_RESULT="/tmp/result_$TESTNAME" - NEW_TESTNAME_RESULT="/tmp/result_dist_$TESTNAME" - - $CLICKHOUSE_CLIENT $SETTINGS -nm < $TESTPATH > $TESTNAME_RESULT - $CLICKHOUSE_CLIENT $SETTINGS -nm < $NEW_TESTNAME > $NEW_TESTNAME_RESULT - - expected=$(cat $TESTNAME_RESULT | md5sum) - actual=$(cat $NEW_TESTNAME_RESULT | md5sum) - - if [[ "$expected" != "$actual" ]]; then - FAILED+=("$TESTNAME") - echo "Failed! ❌" - echo "Plain:" - cat $TESTNAME_RESULT - echo "Distributed:" - cat $NEW_TESTNAME_RESULT - else - echo "Ok! ✅" - fi -done - - -echo "Total failed tests: " -# Iterate the loop to read and print each array element -for value in "${FAILED[@]}" -do - echo "🔺 $value" -done - -# Drop tables - -$CLICKHOUSE_CLIENT $SETTINGS -nm -q ''' - drop table if exists test.dist_hits SYNC; - drop table if exists test.dist_visits SYNC; -'''; diff --git a/tests/queries/1_stateful/00175_obfuscator_schema_inference.sh b/tests/queries/1_stateful/00175_obfuscator_schema_inference.sh index 8ff0d2fa648..771c7ab5436 100755 --- a/tests/queries/1_stateful/00175_obfuscator_schema_inference.sh +++ b/tests/queries/1_stateful/00175_obfuscator_schema_inference.sh @@ -4,6 +4,8 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh +model=$(mktemp "$CLICKHOUSE_TMP/obfuscator-model-XXXXXX.bin") + # Compared to explicitly specifying the structure of the input, # schema inference adds Nullable(T) to all types, so the model and the results # are a bit different from test '00175_obfuscator_schema_inference.sh' @@ -14,10 +16,10 @@ $CLICKHOUSE_CLIENT --max_threads 1 --query="SELECT URL, Title, SearchPhrase FROM $CLICKHOUSE_OBFUSCATOR --input-format TSV --output-format TSV --seed hello --limit 2500 < "${CLICKHOUSE_TMP}"/data.tsv > "${CLICKHOUSE_TMP}"/data2500.tsv 2>/dev/null # Test obfuscator with saving the model -$CLICKHOUSE_OBFUSCATOR --input-format TSV --output-format TSV --seed hello --limit 0 --save "${CLICKHOUSE_TMP}"/model.bin < "${CLICKHOUSE_TMP}"/data.tsv 2>/dev/null -wc -c < "${CLICKHOUSE_TMP}"/model.bin -$CLICKHOUSE_OBFUSCATOR --input-format TSV --output-format TSV --seed hello --limit 2500 --load "${CLICKHOUSE_TMP}"/model.bin < "${CLICKHOUSE_TMP}"/data.tsv > "${CLICKHOUSE_TMP}"/data2500_load_from_model.tsv 2>/dev/null -rm "${CLICKHOUSE_TMP}"/model.bin +$CLICKHOUSE_OBFUSCATOR --input-format TSV --output-format TSV --seed hello --limit 0 --save "$model" < "${CLICKHOUSE_TMP}"/data.tsv 2>/dev/null +wc -c < "$model" +$CLICKHOUSE_OBFUSCATOR --input-format TSV --output-format TSV --seed hello --limit 2500 --load "$model" < "${CLICKHOUSE_TMP}"/data.tsv > "${CLICKHOUSE_TMP}"/data2500_load_from_model.tsv 2>/dev/null +rm "$model" $CLICKHOUSE_LOCAL --structure "URL String, Title String, SearchPhrase String" --input-format TSV --output-format TSV --query "SELECT count(), uniq(URL), uniq(Title), uniq(SearchPhrase) FROM table" < "${CLICKHOUSE_TMP}"/data.tsv $CLICKHOUSE_LOCAL --structure "URL String, Title String, SearchPhrase String" --input-format TSV --output-format TSV --query "SELECT count(), uniq(URL), uniq(Title), uniq(SearchPhrase) FROM table" < "${CLICKHOUSE_TMP}"/data2500.tsv diff --git a/utils/CMakeLists.txt b/utils/CMakeLists.txt index a79982bbd61..70c32c67063 100644 --- a/utils/CMakeLists.txt +++ b/utils/CMakeLists.txt @@ -20,17 +20,13 @@ add_subdirectory (report) # Not used in package if (NOT DEFINED ENABLE_UTILS OR ENABLE_UTILS) add_subdirectory (compressor) - add_subdirectory (iotest) add_subdirectory (corrector_utf8) add_subdirectory (zookeeper-cli) add_subdirectory (zookeeper-dump-tree) add_subdirectory (zookeeper-remove-by-list) - add_subdirectory (zookeeper-create-entry-to-download-part) - add_subdirectory (zookeeper-adjust-block-numbers-to-parts) add_subdirectory (wikistat-loader) add_subdirectory (check-marks) add_subdirectory (checksum-for-compressed-block) - add_subdirectory (db-generator) add_subdirectory (wal-dump) add_subdirectory (check-mysql-binlog) add_subdirectory (keeper-bench) @@ -44,5 +40,3 @@ if (NOT DEFINED ENABLE_UTILS OR ENABLE_UTILS) add_subdirectory (memcpy-bench) endif () endif () - -add_subdirectory (package) diff --git a/utils/antlr/README.md b/utils/antlr/README.md index 50bf34ab432..7d2112e46bf 100644 --- a/utils/antlr/README.md +++ b/utils/antlr/README.md @@ -1,3 +1,7 @@ +## This parser is unsupported + +We keep it in this repository for your curiosity. But this is not the parser of ClickHouse. + ## How to generate source code files from grammar Grammar is located inside `ClickHouseLexer.g4` and `ClickHouseParser.g4` files. diff --git a/utils/check-style/check-style b/utils/check-style/check-style index 772f48ad088..a0556d971e8 100755 --- a/utils/check-style/check-style +++ b/utils/check-style/check-style @@ -59,10 +59,7 @@ declare -A EXTERN_TYPES EXTERN_TYPES[ErrorCodes]=int EXTERN_TYPES[ProfileEvents]=Event EXTERN_TYPES[CurrentMetrics]=Metric -declare -A EXTERN_ALLOWED_CHARS -EXTERN_ALLOWED_CHARS[ErrorCodes]='_A-Z' -EXTERN_ALLOWED_CHARS[ProfileEvents]='_A-Za-z' -EXTERN_ALLOWED_CHARS[CurrentMetrics]='_A-Za-z' + EXTERN_TYPES_EXCLUDES=( ProfileEvents::global_counters ProfileEvents::Event @@ -87,18 +84,30 @@ EXTERN_TYPES_EXCLUDES=( CurrentMetrics::Metric CurrentMetrics::values CurrentMetrics::Value + + ErrorCodes::ErrorCode + ErrorCodes::getName + ErrorCodes::increment + ErrorCodes::end + ErrorCodes::values + ErrorCodes::values[i] + ErrorCodes::getErrorCodeByName ) for extern_type in ${!EXTERN_TYPES[@]}; do type_of_extern=${EXTERN_TYPES[$extern_type]} - allowed_chars=${EXTERN_ALLOWED_CHARS[$extern_type]} + allowed_chars='[_A-Za-z]+' # Unused # NOTE: to fix automatically, replace echo with: # sed -i "/extern const $type_of_extern $val/d" $file find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' | { - grep -vP $EXCLUDE_DIRS | xargs grep -l -P "extern const $type_of_extern [$allowed_chars]+" + # NOTE: the check is pretty dumb and distinguish only by the type_of_extern, + # and this matches with zkutil::CreateMode + grep -v 'src/Common/ZooKeeper/Types.h' + } | { + grep -vP $EXCLUDE_DIRS | xargs grep -l -P "extern const $type_of_extern $allowed_chars" } | while read file; do - grep -P "extern const $type_of_extern [$allowed_chars]+;" $file | sed -r -e "s/^.*?extern const $type_of_extern ([$allowed_chars]+);.*?$/\1/" | while read val; do + grep -P "extern const $type_of_extern $allowed_chars;" $file | sed -r -e "s/^.*?extern const $type_of_extern ($allowed_chars);.*?$/\1/" | while read val; do if ! grep -q "$extern_type::$val" $file; then # Excludes for SOFTWARE_EVENT/HARDWARE_EVENT/CACHE_EVENT in ThreadProfileEvents.cpp if [[ ! $extern_type::$val =~ ProfileEvents::Perf.* ]]; then @@ -110,11 +119,13 @@ for extern_type in ${!EXTERN_TYPES[@]}; do # Undefined # NOTE: to fix automatically, replace echo with: - # ( grep -q -F 'namespace $extern_type' $file && sed -i -r "0,/(\s*)extern const $type_of_extern [$allowed_chars]+/s//\1extern const $type_of_extern $val;\n&/" $file || awk '{ print; if (ns == 1) { ns = 2 }; if (ns == 2) { ns = 0; print "namespace $extern_type\n{\n extern const $type_of_extern '$val';\n}" } }; /namespace DB/ { ns = 1; };' < $file > ${file}.tmp && mv ${file}.tmp $file ) + # ( grep -q -F 'namespace $extern_type' $file && \ + # sed -i -r "0,/(\s*)extern const $type_of_extern [$allowed_chars]+/s//\1extern const $type_of_extern $val;\n&/" $file || \ + # awk '{ print; if (ns == 1) { ns = 2 }; if (ns == 2) { ns = 0; print "namespace $extern_type\n{\n extern const $type_of_extern '$val';\n}" } }; /namespace DB/ { ns = 1; };' < $file > ${file}.tmp && mv ${file}.tmp $file ) find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' | { - grep -vP $EXCLUDE_DIRS | xargs grep -l -P "$extern_type::[$allowed_chars]+" + grep -vP $EXCLUDE_DIRS | xargs grep -l -P "$extern_type::$allowed_chars" } | while read file; do - grep -P "$extern_type::[$allowed_chars]+" $file | sed -r -e "s/^.*?$extern_type::([$allowed_chars]+).*?$/\1/" | while read val; do + grep -P "$extern_type::$allowed_chars" $file | grep -P -v '^\s*//' | sed -r -e "s/^.*?$extern_type::($allowed_chars).*?$/\1/" | while read val; do if ! grep -q "extern const $type_of_extern $val" $file; then if ! in_array "$extern_type::$val" "${EXTERN_TYPES_EXCLUDES[@]}"; then echo "$extern_type::$val is used in file $file but not defined" @@ -125,9 +136,9 @@ for extern_type in ${!EXTERN_TYPES[@]}; do # Duplicates find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' | { - grep -vP $EXCLUDE_DIRS | xargs grep -l -P "$extern_type::[$allowed_chars]+" + grep -vP $EXCLUDE_DIRS | xargs grep -l -P "$extern_type::$allowed_chars" } | while read file; do - grep -P "extern const $type_of_extern [$allowed_chars]+;" $file | sort | uniq -c | grep -v -P ' +1 ' && echo "Duplicate $extern_type in file $file" + grep -P "extern const $type_of_extern $allowed_chars;" $file | sort | uniq -c | grep -v -P ' +1 ' && echo "Duplicate $extern_type in file $file" done done diff --git a/utils/compressor/decompress_perf.cpp b/utils/compressor/decompress_perf.cpp index e3210164d79..891a6d3d1dd 100644 --- a/utils/compressor/decompress_perf.cpp +++ b/utils/compressor/decompress_perf.cpp @@ -107,8 +107,12 @@ protected: if (variant == LZ4_REFERENCE) { - if (LZ4_decompress_fast(compressed_buffer + COMPRESSED_BLOCK_HEADER_SIZE, to, size_decompressed) < 0) + if (LZ4_decompress_fast( + compressed_buffer + COMPRESSED_BLOCK_HEADER_SIZE, to, + static_cast(size_decompressed)) < 0) + { throw Exception("Cannot LZ4_decompress_fast", ErrorCodes::CANNOT_DECOMPRESS); + } } else LZ4::decompress(compressed_buffer + COMPRESSED_BLOCK_HEADER_SIZE, to, size_compressed_without_checksum, size_decompressed, perf_stat); diff --git a/utils/db-generator/CMakeLists.txt b/utils/db-generator/CMakeLists.txt deleted file mode 100644 index 45780717752..00000000000 --- a/utils/db-generator/CMakeLists.txt +++ /dev/null @@ -1,2 +0,0 @@ -clickhouse_add_executable (query_db_generator query_db_generator.cpp) -target_link_libraries(query_db_generator PRIVATE clickhouse_parsers boost::program_options) diff --git a/utils/db-generator/README.md b/utils/db-generator/README.md deleted file mode 100644 index 5596aac66e4..00000000000 --- a/utils/db-generator/README.md +++ /dev/null @@ -1,35 +0,0 @@ -# Clickhouse query analysis - -Here we will consider only `SELECT` queries, i.e. those queries that get data from the table. -The built-in Clickhouse parser accepts a string as input, which is a query. Among 14 main clauses of `SELECT` statement: `WITH`, `SELECT`, `TABLES`, `PREWHERE`, `WHERE`, `GROUP_BY`, `HAVING`, `ORDER_BY`, `LIMIT_BY_OFFSET`, `LIMIT_BY_LENGTH`, `LIMIT_BY`, `LIMIT_OFFSET`, `LIMIT_LENGTH`, `SETTINGS`, we will analyze the `SELECT`, `TABLES`, `WHERE`, `GROUP_BY`, `HAVING`, `ORDER_BY` clauses because the most of data is there. We need this data to analyze the structure and to identify values. The parser issues a tree structure after parsing a query, where each node is a specific query execution operation, a function over values, a constant, a designation, etc. Nodes also have subtrees where their arguments or suboperations are located. We will try to reveal the data we need by avoiding this tree. - -## Scheme analysis - -It is necessary to determine possible tables by a query. Having a query string, you can understand which parts of it represent the names of the tables, so you can determine their number in our database. -In the Clickhouse parser, `TABLES` (Figure 1) is a query subtree responsible for tables where we get data. It contains the main table where the columns come from, as well as the `JOIN` operations that are performed in the query. Avoiding all nodes in the subtree, we use the names of the tables and databases where they are located, as well as their alias, i.e. the shortened names chosen by the query author. We may need these names to determine the ownership of the column in the future. -Thus, we get a set of databases for the query, as well as tables and their aliases, with the help of them a query is made. - -Then we need to define the set of columns that are in the query and the tables they can refer to. The set of columns in each table is already known during the query execution. Therefore, the program automatically links the column and table at runtime. However, in our case, it is impossible to unambiguously interpret the belonging of a column to a specific table, for example, in the following query `SELECT column1, column2, column3 FROM table1 JOIN table2 on table1.column2 = table2.column3`. In this case, we can say which table `column2` and `column3` belong to. However, `column1` can belong to either the first or the second table. We will refer undefined columns to the main table, on which a query is made, for unambiguous interpretation of such cases. For example, in this case, it will be `table1`. -All columns in the tree are in `IDENTIFIER` type nodes, which are in the `SELECT`, `TABLES`, `WHERE`, `GROUP_BY`, `HAVING`, `ORDER_BY` subtrees. We form a set of all tables recursively avoiding the subtrees, then we split the column into constituents such as the table (if it is explicitly specified with a dot) and the name. Then, since the table can be an alias, we replace the alias with the original table name. We now have a list of all the columns and tables they belong to. We define the main query table for non-table columns. - -## Column analysis - -Then we need to exactly define data types for columns that have a value in the query. An example is the boolean `WHERE` clause where we test boolean expressions in its attributes. If the query specifies `column > 5`, then we can conclude that this column contains a numeric value, or if the `LIKE` expression is applied to the attribute, then the attribute has a string type. -In this part, you need to learn how to extract such expressions from a query and match data types for columns, where it is possible. At the same time, it is clear that it is not always possible to make an unambiguous decision about the type of a particular attribute from the available values. For example, `column > 5` can mean many numeric types such as `UINT8`, `UINT32`, `INT32`, `INT64`, etc. It is necessary to determine the interpretation of certain values since searching through all possible values ​​can be quite large and long. -It can take a long time to iterate over all possible values, so we use `INT64` and `FLOAT64` types for numeric values, `STRING` for strings, `DATE` and `DATETIME` for dates, and `ARRAY`. -We can determine column values ​​using boolean, arithmetic and other functions on the column values ​​that are specified in the query. Such functions are in the `SELECT` and `WHERE` subtrees. The function parameter can be a constant, a column or another function (Figure 2). Thus, the following parameters can help to understand the type of the column: -- The types of arguments that a function can take, for example, the `TOSTARTOFMINUTE` function (truncate time up to a multiple of 5 minutes down) can only accept `DATETIME`, so if the argument of this function is a column, then this column has `DATETIME` type. -- The types of the remaining arguments in this function. For example, the `EQUALS` function means equality of its argument types, so if a constant and a column are present in this function, then we can define the type of the column as the type of the constant. - -Thus, we define the possible argument types, the return type, the parameter for each function, and the function arguments of the identical type. The recursive function handler will determine the possible types of columns used in these functions by the values of the arguments, and then return the possible types of the function's result. -Now, for each column, we have many possible types of values. We will choose one specific type from this set to interpret the query unambiguously. - -## Column values definition - -At this stage, we already have a certain structure of the database tables, we need to fill this table with values. We should understand which columns depend on each other when executing the function (for example, the join is done according to two columns, which means that they must have the same values). We also need to understand what values ​​the columns must have to fulfill various conditions during execution. -We search for all comparison operations in our query to achieve the goal. If the arguments of the operation are two columns, then we consider them linked. If the arguments are the column and the value, then we assign that value to the possible column value and add the value with some noise. A random number is a noise for a numeric type, it is a random number of days for a date, etc. In this case, a handler for this operation is required for each comparison operation, which generates at least two values, one of them is the operation condition, and the other is not. For example, a value greater than 5 and less than or equal to 5 must be assigned for the operation `column1 > 5`, `column1`, for the operation `column2 LIKE some% string` the same is true. The satisfying and not satisfying expression must be assigned to `column2`. -Now we have many associated columns and many values. We know that the connectivity of columns is symmetric, but we need to add transitivity for a complete definition, because if `column1 = column2` and `column2 = column3`, then `column1 = column3`, but this does not follow from the construction. Accordingly, we need to extend the connectivity across all columns. We combine multiple values for each column with the values associated with it. If we have columns with no values, then we generate random values. - -## Generation - -We have a complete view of the database schema as well as many values ​​for each table now. We will generate data by cartesian product of the value set of each column for a specific table. Thus, we get a set for each table, consisting of sets of values for each column. We start generating queries that create this table and fill it with data. We generate the `CREATE QUERY` that creates this table based on the structure of the table and the types of its columns, and then we generate the `INSERT QUERY` over the set of values, which fills the table with data. diff --git a/utils/db-generator/query_db_generator.cpp b/utils/db-generator/query_db_generator.cpp deleted file mode 100644 index 00785af89f7..00000000000 --- a/utils/db-generator/query_db_generator.cpp +++ /dev/null @@ -1,1354 +0,0 @@ -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - - -namespace po = boost::program_options; - -using ColumnType = uint32_t; -using TableAndColumn = std::pair; -pcg64 rng; - -std::string randomString(size_t length) -{ - auto randchar = []() -> char - { - const char charset[] = "0123456789" "ABCDEFGHIJKLMNOPQRSTUVWXYZ" "abcdefghijklmnopqrstuvwxyz"; - const size_t max_index = (sizeof(charset) - 1); - return charset[rng() % max_index]; - }; - std::string str(length, 0); - std::generate_n(str.begin(), length, randchar); - return str; -} -std::string randomInteger(unsigned int min = 0, unsigned int max = 4294967295) -{ - int r = rng() % (max - min) + min; - return std::to_string(r); -} - -std::string randomFloat(unsigned int min = 0, unsigned int max = 4294967295) -{ - float r = static_cast(rng() % max) / (static_cast(rng() % 100)) + min; - return std::to_string(r); -} - -std::string randomDate() -{ - int32_t year = rng() % 136 + 1970; - int32_t month = rng() % 12 + 1; - int32_t day = rng() % 12 + 1; - char answer[13]; - size_t size = sprintf(answer, "'%04u-%02u-%02u'", year, month, day); - return std::string(answer, size); -} - -std::string randomDatetime() -{ - int32_t year = rng() % 136 + 1970; - int32_t month = rng() % 12 + 1; - int32_t day = rng() % 12 + 1; - int32_t hours = rng() % 24; - int32_t minutes = rng() % 60; - int32_t seconds = rng() % 60; - char answer[22]; - size_t size = sprintf( - answer, - "'%04u-%02u-%02u %02u:%02u:%02u'", - year, - month, - day, - hours, - minutes, - seconds); - return std::string(answer, size); -} -TableAndColumn get_table_a_column(const std::string & c) -{ - auto point_place = c.rfind('.'); - std::string db{}; - std::string column{}; - if (point_place != std::string::npos) - { - db = c.substr(0, point_place); - column = c.substr(point_place + 1); - } - else - { - column = c; - } - return { db, column }; -} - - -enum Type : ColumnType -{ - i = 1, - // int - f = 2, - // float - s = 4, - // string - d = 8, - // date - dt = 16, - // datetime - b = 32, - // bool - all = 63, - a = 64, - // array - t = 128, - // tuple -}; - - -std::map type_definition = -{ - {Type::i, "Int64"}, {Type::f, "Float64"}, {Type::s, "String"}, {Type::d, "Date"}, {Type::dt, "DateTime"}, {Type::b, "UInt8"} -}; - -ColumnType time_type(std::string value) -{ - if (value.length() == 12) - { - for (size_t i : {5, 8}) - { - if (value[i] != '-') - return Type::s; - } - for (size_t i : {1, 2, 3, 4, 6, 7, 9, 10}) - { - if (!isdigit(value[i])) - return Type::s; - } - return Type::d; - } - - if (value.length() == 21) - { - for (size_t i : {5, 8}) - { - if (value[i] != '-') - return Type::s; - } - for (size_t i : {14, 17}) - { - if (value[i] != '-') - return Type::s; - } - if (value[11] != '-') - return Type::s; - return Type::dt; - } - return Type::s; -} -// Casting inner clickhouse parser type to our type -ColumnType type_cast(int t) -{ - switch (t) - { - case 1: - case 2: - case 4: - case 5: - case 19: - case 20: - case 21: - return Type::i; - - case 3: - return Type::f; - - case 16: - return Type::s; - - case 17: - return Type::a | Type::all; - - case 18: - return Type::t | Type::all; - } - return Type::all; -} - - -class FuncRet -{ -public: - FuncRet() = default; - - FuncRet(ColumnType t, std::string v) - : value(v) - , type(t) {} - - FuncRet(ColumnType t, std::string v, bool is_a) - : value(v) - , type(t) - , is_array(is_a) {} - - std::string value{}; - ColumnType type = Type::all; - bool is_array = false; -}; - - -std::map func_to_return_type = { - {"divide", FuncRet(Type::f, "")}, {"e", FuncRet(Type::f, "e()")}, {"pi", FuncRet(Type::f, "pi()")}, {"exp", FuncRet(Type::f, "")}, - {"log", FuncRet(Type::f,"")}, {"exp2", FuncRet(Type::f, "")}, {"log2", FuncRet(Type::f, "")}, {"exp10", FuncRet(Type::f, "")}, - {"log10", FuncRet(Type::f, "")}, {"sqrt", FuncRet(Type::f, "")}, {"cbrt", FuncRet(Type::f, "")}, {"erf", FuncRet(Type::f, "")}, - {"erfc", FuncRet(Type::f, "")}, {"lgamma", FuncRet(Type::f, "")}, {"tgamma", FuncRet(Type::f, "")}, {"sin", FuncRet(Type::f, "")}, - {"cos", FuncRet(Type::f, "")}, {"tan", FuncRet(Type::f, "")}, {"asin", FuncRet(Type::f, "")}, {"acos", FuncRet(Type::f, "")}, - {"atan", FuncRet(Type::f, "")}, {"pow", FuncRet(Type::f, "")}, {"splitbystring", FuncRet(Type::s | Type::a,"")}, - {"splitbychar", FuncRet(Type::s | Type::a, "")}, {"alphatokens", FuncRet(Type::s | Type::a, "")}, {"toyear", FuncRet(Type::i, "")}, - {"tomonth", FuncRet(Type::i, "")}, {"todayofmonth", FuncRet(Type::i, "")}, {"tohour", FuncRet(Type::dt, "")}, {"tominute", FuncRet(Type::dt, "")}, - {"toseconds", FuncRet(Type::dt, "")}, {"tounixtimestamp", FuncRet(Type::i, "")}, {"tostartofyear", FuncRet(Type::dt | Type::d, "")}, - {"tostartofquater",FuncRet(Type::dt | Type::d, "")}, {"tostartofmonth", FuncRet(Type::dt | Type::d, "")}, {"tomonday", FuncRet(Type::dt | Type::d, "")}, - {"tostartoffiveminutes", FuncRet(Type::dt, "")}, {"tostartoftenminutes", FuncRet(Type::dt, "")}, {"tostartoffifteenminutes", FuncRet(Type::dt, "")}, - {"tostartofinterval", FuncRet(Type::dt, "")}, {"totime", FuncRet(Type::dt, "")}, {"torelativemonthnum", FuncRet(Type::i, "")}, - {"torelativeweeknum", FuncRet(Type::i, "")}, {"torelativedaynum", FuncRet(Type::i, "")}, {"torelativehournum", FuncRet(Type::i, "")}, - {"torelativeminutenum", FuncRet(Type::i, "")}, {"torelativesecondsnum", FuncRet(Type::i, "")}, {"datediff", FuncRet(Type::d | Type::dt, "")}, - {"formatdatetime", FuncRet(Type::s, "")}, {"now", FuncRet(Type::dt | Type::d, "now()")}, {"today", FuncRet(Type::d | Type::dt, "today()")}, - {"yesterday", FuncRet(Type::d | Type::dt, "yesterday()")}, {"tolastdayofmonth", FuncRet(Type::dt | Type::d, "")} -}; - -std::set func_args_same_types = { - "equals", "notequals", "less", "greater", "lessorequals", "greaterorequals", "multiply" -}; - -std::map func_to_param_type = { - {"tostartofminute", Type::dt}, {"plus", Type::i | Type::f | Type::d | Type::dt}, {"multiply", Type::i | Type::f}, - {"minus", Type::i | Type::f | Type::d | Type::dt}, {"negate", Type::i | Type::f}, {"divide", Type::i | Type::f}, - {"abs", Type::i | Type::f}, {"gcd", Type::i | Type::f}, {"lcm", Type::i | Type::f}, {"bitnot", Type::i}, {"bitshiftleft", Type::i}, - {"bitshiftright", Type::i}, {"bittest", Type::i}, {"exp", Type::i | Type::f}, {"log", Type::i | Type::f}, - {"exp2", Type::i | Type::f}, {"log2", Type::i | Type::f}, {"exp10", Type::i | Type::f}, {"log10", Type::i | Type::f}, - {"sqrt", Type::i | Type::f}, {"cbrt", Type::i | Type::f}, {"erf", Type::i | Type::f}, {"erfc", Type::i | Type::f}, - {"lgamma", Type::i | Type::f}, {"tgamma", Type::i | Type::f}, {"sin", Type::i | Type::f}, {"cos", Type::i | Type::f}, - {"tan", Type::i | Type::f}, {"asin", Type::i | Type::f}, {"acos", Type::i | Type::f}, {"atan", Type::i | Type::f}, - {"pow", Type::i | Type::f}, {"arrayjoin", Type::all | Type::a}, {"substring", Type::s}, {"splitbystring", Type::s}, {"splitbychar", Type::s}, - {"alphatokens", Type::s}, {"toyear", Type::d | Type::dt}, {"tomonth", Type::d | Type::dt}, {"todayofmonth", Type::d | Type::dt}, {"tohour", Type::dt}, - {"tominute", Type::dt}, {"tosecond", Type::dt}, {"touixtimestamp", Type::dt}, {"tostartofyear", Type::d | Type::dt}, - {"tostartofquarter", Type::d | Type::dt}, {"tostartofmonth", Type::d | Type::dt}, {"tomonday", Type::d | Type::dt}, - {"tostartoffiveminutes", Type::dt}, {"tostartoftenminutes", Type::dt}, {"tostartoffifteenminutes", Type::d | Type::dt}, - {"tostartofinterval", Type::d | Type::dt}, {"totime", Type::d | Type::dt}, {"torelativehonthnum", Type::d | Type::dt}, - {"torelativeweeknum", Type::d | Type::dt}, {"torelativedaynum", Type::d | Type::dt}, {"torelativehournum", Type::d | Type::dt}, - {"torelativeminutenum", Type::d | Type::dt}, {"torelativesecondnum", Type::d | Type::dt}, {"datediff", Type::d | Type::dt}, - {"formatdatetime", Type::dt}, {"tolastdayofmonth", Type::d | Type::dt} -}; - - -class Column -{ -public: - TableAndColumn name; - std::set equals; - std::set values; - ColumnType type = Type::all; - bool is_array = false; - - Column() = default; - - explicit Column(const std::string & column_name) - { - name = std::make_pair("", column_name); - type = Type::all; - } - - void merge(Column other) - { - if (name.second.empty()) - name = other.name; - equals.insert(other.equals.begin(), other.equals.end()); - values.insert(other.values.begin(), other.values.end()); - type &= other.type; - is_array |= other.is_array; - } - - void printType() const - { - if (type & Type::i) - std::cout << "I"; - if (type & Type::f) - std::cout << "F"; - if (type & Type::s) - std::cout << "S"; - if (type & Type::d) - std::cout << "D"; - if (type & Type::dt) - std::cout << "DT"; - if (is_array) - std::cout << "ARR"; - std::cout << "\n"; - } - - void print() - { - std::cout << name.first << "." << name.second << "\n"; - std::cout << "type: "; - printType(); - std::cout << "values:"; - for (const auto & val : values) - std::cout << " " << val; - std::cout << "\n"; - std::cout << "equal:"; - for (const auto & col : equals) - std::cout << " " << col.first << "." << col.second; - std::cout << "\n"; - } - - std::string generateOneValue() const - { - if (type & Type::i) - return randomInteger(); - - if (type & Type::f) - return randomFloat(); - - if (type & Type::d) - return randomDate(); - - if (type & Type::dt) - return randomDatetime(); - - if (type & Type::s) - return "'" + randomString(rng() % 40) + "'"; - - if (type & Type::b) - return "0"; - - return ""; - } - - bool generateValues(int amount = 0) - { - if (values.size() > 2 && amount == 0) - return false; - while (values.empty() or amount > 0) - { - amount -= 1; - if (is_array) - { - std::string v = "["; - for (unsigned int i = 0; i < static_cast(rng()) % 10 + 1; ++i) - { - if (i != 0) - v += ", "; - v += generateOneValue(); - } - v += "]"; - values.insert(v); - } - else - { - values.insert(generateOneValue()); - } - } - return true; - } - - void unifyType() - { - if (type & Type::i) - type = Type::i; - else if (type & Type::f) - type = Type::f; - else if (type & Type::d) - type = Type::d; - else if (type & Type::dt) - type = Type::dt; - else if (type & Type::s) - type = Type::s; - else if (type & Type::b) - type = Type::b; - else - throw std::runtime_error("Error in determination column type " + name.first + '.' + name.second); - } -}; - - -std::set> -decartMul( - std::set> & prev, - std::set & mul) -{ - std::set> result; - for (const auto & v : prev) - { - for (const auto & m : mul) - { - std::vector tmp = v; - tmp.push_back(m); - result.insert(tmp); - } - } - return result; -} - - -class Table -{ -public: - Table() = default; - - explicit Table(std::string table_name) - : name(table_name) {} - - std::string name; - std::set columns; - std::map column_description; - - bool columnExists(const std::string & column_name) const - { - return columns.contains(column_name); // || columns_maybe.contains(column_name); - } - - void addColumn(const std::string & column_name) - { - columns.insert(column_name); - } - - void setDescription(Column other) - { - column_description[other.name.second].merge(other); - } - - void print() - { - std::cout << "Table\n"; - std::cout << name << "\n"; - std::cout << "Columns:\n\n"; - for (const auto & column : columns) - { - std::cout << column << "\n"; - if (column_description.contains(column)) - column_description[column].print(); - std::cout << "\n"; - } - std::cout << "\n"; - } - - void merge(Table other) - { - name = other.name; - columns.insert(other.columns.begin(), other.columns.end()); - for (const auto & desc : other.column_description) - column_description[desc.first].merge(desc.second); - } - - std::string createQuery() - { - std::string create; - std::string db, _; - std::tie(db, _) = get_table_a_column(name); - create = "CREATE DATABASE IF NOT EXISTS " + db + ";\n\n"; - create += "CREATE TABLE IF NOT EXISTS " + name + " (\n"; - for (auto column = columns.begin(); column != columns.end(); ++column) - { - if (column != columns.begin()) - create += ", \n"; - create += *column + " "; - create += column_description[*column].is_array ? "Array(" : ""; - create += type_definition[column_description[*column].type]; - create += column_description[*column].is_array ? ")" : ""; - } - create += "\n) ENGINE = Log;\n\n"; - return create; - } - - std::string insertQuery() - { - std::string insert = "INSERT INTO " + name + "\n"; - insert += "("; - std::set> values = {std::vector(0)}; - for (auto column = columns.begin(); column != columns.end(); ++column) - { - if (column != columns.begin()) - insert += ", "; - insert += *column; - values = decartMul(values, column_description[*column].values); - } - insert += ") VALUES \n"; - for (auto val_set_iter = values.begin(); val_set_iter != values.end(); - ++val_set_iter) - { - if (val_set_iter != values.begin()) - insert += ",\n"; - auto val_set = *val_set_iter; - insert += "("; - for (auto val = val_set.begin(); val != val_set.end(); ++val) - { - if (val != val_set.begin()) - insert += ", "; - insert += *val; - } - insert += ")"; - } - insert += ";\n\n"; - return insert; - } -}; - - -class TableList -{ -public: - std::string main_table; - std::map aliases; - std::unordered_map tables; - std::set nested; - - bool tableExists(const std::string & table_name) const - { - return tables.contains(table_name); - } - - void addColumn(std::string full_column) - { - std::string table, column; - std::tie(table, column) = get_table_a_column(full_column); - if (!table.empty()) - { - if (tables.contains(table)) - { - tables[table].addColumn(column); - return; - } - if (aliases.contains(table)) - { - tables[aliases[table]].addColumn(column); - return; - } - nested.insert(table); - } - tables[main_table].addColumn(full_column); - } - - void addTable(std::string table_name) - { - if (tables.contains(table_name)) - return; - - tables[table_name] = Table(table_name); - if (main_table.empty()) - main_table = table_name; - } - - void addDescription(const Column & description) - { - std::string table = description.name.first; - if (tables.contains(table)) - tables[table].setDescription(description); - } - - TableAndColumn getTable(std::string full_column) const - { - std::string table, column; - std::tie(table, column) = get_table_a_column(full_column); - if (!table.empty()) - { - if (tables.contains(table)) - return std::make_pair(table, column); - - if (aliases.contains(table)) - { - table = aliases.find(table)->second; - return std::make_pair(table, column); - } - } - return std::make_pair(main_table, full_column); - } - - void print() - { - for (auto & table : tables) - { - table.second.print(); - std::cout << "\n"; - } - } - - void merge(TableList other) - { - for (const auto & table : other.tables) - tables[table.first].merge(table.second); - nested.insert(other.nested.begin(), other.nested.end()); - if (main_table.empty()) - main_table = other.main_table; - } -}; - -std::string getAlias(DB::ASTPtr ch) -{ - auto x = std::dynamic_pointer_cast(ch); - if (x) - return x->alias; - - for (const auto & child : (*ch).children) - { - auto alias = getAlias(child); - if (!alias.empty()) - return alias; - } - return ""; -} - -using FuncHandler = std::function &)>; -std::map handlers = {}; - -FuncRet arrayJoinFunc(DB::ASTPtr ch, std::map & columns) -{ - auto x = std::dynamic_pointer_cast(ch); - if (x) - { - std::set indents = {}; - for (auto & arg : x->arguments->children) - { - auto ident = std::dynamic_pointer_cast(arg); - if (ident) - indents.insert(ident->name()); - } - for (const auto & indent : indents) - { - auto c = Column(indent); - c.type = Type::all; - c.is_array = true; - if (columns.contains(indent)) - columns[indent].merge(c); - else - columns[indent] = c; - } - FuncRet r(Type::all, ""); - return r; - } - return FuncRet(); -} - -FuncRet inFunc(DB::ASTPtr ch, std::map & columns) -{ - auto x = std::dynamic_pointer_cast(ch); - if (x) - { - std::set indents{}; - std::set values{}; - ColumnType type_value = Type::all; - - for (auto & arg : x->arguments->children) - { - auto ident = std::dynamic_pointer_cast(arg); - if (ident) - { - indents.insert(ident->name()); - } - auto literal = std::dynamic_pointer_cast(arg); - if (literal) - { - ColumnType type = type_cast(literal->value.getType()); - - auto routine = [&](const auto & arr_values) - { - for (auto & val : arr_values) - { - type = type_cast(val.getType()); - if (type == Type::s || type == Type::d || type == Type::dt) - type = time_type(applyVisitor(DB::FieldVisitorToString(), val)); - type_value &= type; - values.insert(applyVisitor(DB::FieldVisitorToString(), val)); - } - }; - - if (type & Type::a) - { - auto arr_values = literal->value.get(); - routine(arr_values); - } - - if (type & Type::a) - { - auto arr_values = literal->value.get(); - routine(arr_values); - } - } - auto subfunc = std::dynamic_pointer_cast(arg); - if (subfunc) - { - FuncHandler f; - auto arg_func_name = std::dynamic_pointer_cast(arg)->name; - if (handlers.contains(arg_func_name)) - f = handlers[arg_func_name]; - else - f = handlers[""]; - FuncRet ret = f(arg, columns); - if (!ret.value.empty()) - { - values.insert(ret.value); - } - type_value &= ret.type; - } - } - for (const auto & indent : indents) - { - auto c = Column(indent); - c.type = type_value; - c.values.insert(values.begin(), values.end()); - c.generateValues(1); - if (columns.contains(indent)) - columns[indent].merge(c); - else - columns[indent] = c; - } - FuncRet r(Type::b | Type::i, ""); - return r; - } - return FuncRet(); -} - -FuncRet arrayFunc(DB::ASTPtr ch, std::map & columns) -{ - auto x = std::dynamic_pointer_cast(ch); - if (x) - { - std::set indents = {}; - std::string value = "["; - ColumnType type_value = Type::i | Type::f | Type::d | Type::dt | Type::s; - bool no_indent = true; - for (const auto & arg : x->arguments->children) - { - auto ident = std::dynamic_pointer_cast(arg); - if (ident) - { - no_indent = false; - indents.insert(ident->name()); - } - auto literal = std::dynamic_pointer_cast(arg); - if (literal) - { - ColumnType type = type_cast(literal->value.getType()); - if (type == Type::s || type == Type::d || type == Type::dt) - type = time_type(value); - type_value &= type; - - if (value != "[") - value += ", "; - value += applyVisitor(DB::FieldVisitorToString(), literal->value); - } - } - for (const auto & indent : indents) - { - auto c = Column(indent); - c.type = type_value; - if (columns.contains(indent)) - columns[indent].merge(c); - else - columns[indent] = c; - } - value += ']'; - FuncRet r(type_value, ""); - r.is_array = true; - if (no_indent) - r.value = value; - return r; - } - return FuncRet(); -} -FuncRet arithmeticFunc(DB::ASTPtr ch, std::map & columns) -{ - auto x = std::dynamic_pointer_cast(ch); - if (x) - { - std::set indents = {}; - std::set values = {}; - ColumnType type_value = Type::i | Type::f | Type::d | Type::dt; - ColumnType args_types = 0; - bool no_indent = true; - for (auto & arg : x->arguments->children) - { - ColumnType type = 0; - auto ident = std::dynamic_pointer_cast(arg); - if (ident) - { - no_indent = false; - indents.insert(ident->name()); - } - auto literal = std::dynamic_pointer_cast(arg); - if (literal) - type = type_cast(literal->value.getType()); - auto subfunc = std::dynamic_pointer_cast(arg); - if (subfunc) - { - FuncHandler f; - auto arg_func_name = std::dynamic_pointer_cast(arg)->name; - if (handlers.contains(arg_func_name)) - f = handlers[arg_func_name]; - else - f = handlers[""]; - FuncRet ret = f(arg, columns); - type = ret.type; - } - args_types |= type; - } - if (args_types & (Type::d | Type::dt)) - type_value -= Type::f; - if (args_types & Type::f) - type_value -= Type::d | Type::dt; - for (const auto & indent : indents) - { - auto c = Column(indent); - c.type = type_value; - if (columns.contains(indent)) - columns[indent].merge(c); - else - columns[indent] = c; - } - ColumnType ret_type = 0; - if (args_types & Type::dt) - ret_type = Type::dt; - else if (args_types & Type::d) - ret_type = Type::d | Type::dt; - else if (args_types & Type::f) - ret_type = Type::f; - else - ret_type = Type::d | Type::f | Type::dt | Type::i; - FuncRet r(ret_type, ""); - if (no_indent) - { - DB::WriteBufferFromOwnString buf; - formatAST(*ch, buf); - r.value = buf.str(); - } - return r; - } - return FuncRet(); -} -FuncRet likeFunc(DB::ASTPtr ch, std::map & columns) -{ - auto x = std::dynamic_pointer_cast(ch); - if (x) - { - std::set indents = {}; - std::set values = {}; - ColumnType type_value = Type::s; - for (auto & arg : x->arguments->children) - { - auto ident = std::dynamic_pointer_cast(arg); - if (ident) - indents.insert(ident->name()); - auto literal = std::dynamic_pointer_cast(arg); - if (literal) - { - std::string value = applyVisitor(DB::FieldVisitorToString(), literal->value); - std::string example{}; - for (size_t i = 0; i != value.size(); ++i) /// NOLINT - { - if (value[i] == '%') - example += randomString(rng() % 10); - else if (value[i] == '_') - example += randomString(1); - else - example += value[i]; - } - values.insert(example); - } - } - for (const auto & indent : indents) - { - auto c = Column(indent); - c.type = type_value; - c.values.insert(values.begin(), values.end()); - if (columns.contains(indent)) - columns[indent].merge(c); - else - columns[indent] = c; - } - FuncRet r(Type::b, ""); - return r; - } - return FuncRet(); -} - -FuncRet simpleFunc(DB::ASTPtr ch, std::map & columns) -{ - auto x = std::dynamic_pointer_cast(ch); - if (x) - { - std::set indents = {}; - std::set values = {}; - ColumnType type_value = Type::all; - bool is_array = false; - bool no_indent = true; - if (func_to_param_type.contains(boost::algorithm::to_lower_copy(x->name))) - { - type_value &= func_to_param_type[boost::algorithm::to_lower_copy(x->name)]; - is_array = func_to_param_type[boost::algorithm::to_lower_copy(x->name)] & Type::a; - } - for (const auto & arg : x->arguments->children) - { - ColumnType type = Type::all; - std::string value; - auto ident = std::dynamic_pointer_cast(arg); - if (ident) - { - no_indent = false; - indents.insert(ident->name()); - } - auto literal = std::dynamic_pointer_cast(arg); - if (literal) - { - value = applyVisitor(DB::FieldVisitorToString(), literal->value); - type = type_cast(literal->value.getType()); - is_array |= type & Type::a; - } - auto subfunc = std::dynamic_pointer_cast(arg); - if (subfunc) - { - FuncHandler f; - auto arg_func_name = std::dynamic_pointer_cast(arg)->name; - if (handlers.contains(arg_func_name)) - f = handlers[arg_func_name]; - else - f = handlers[""]; - FuncRet ret = f(arg, columns); - is_array |= ret.is_array; - type = ret.type; - value = ret.value; - if (value.empty()) - no_indent = false; - } - if (!value.empty()) - { - if (type == Type::i) - { - values.insert(value); - values.insert(value + " + " + randomInteger(1, 10)); - values.insert(value + " - " + randomInteger(1, 10)); - } - if (type == Type::f) - { - values.insert(value); - values.insert(value + " + " + randomFloat(1, 10)); - values.insert(value + " - " + randomFloat(1, 10)); - } - if (type & Type::s || type & Type::d || type & Type::dt) - { - if (type == Type::s) - type = time_type(value); - if (type == Type::s) - values.insert(value); - if (type & Type::d) - { - values.insert(value); - values.insert("toDate(" + value + ") + " + randomInteger(1, 10)); - values.insert("toDate(" + value + ") - " + randomInteger(1, 10)); - } - else if (type & Type::dt) - { - values.insert(value); - values.insert( - "toDateTime(" + value + ") + " + randomInteger(1, 10000)); - values.insert( - "toDateTime(" + value + ") - " + randomInteger(1, 10000)); - } - } - } - if (func_args_same_types.contains(boost::algorithm::to_lower_copy(x->name))) - type_value &= type; - } - for (const auto & indent : indents) - { - auto c = Column(indent); - c.type = type_value; - c.is_array = is_array; - if (func_args_same_types.contains( - boost::algorithm::to_lower_copy(x->name))) - c.values = values; - for (const auto & ind : indents) - if (ind != indent) - c.equals.insert(std::make_pair("", ind)); - - if (columns.contains(indent)) - columns[indent].merge(c); - else - columns[indent] = c; - } - if (func_to_return_type.contains(boost::algorithm::to_lower_copy(x->name))) - { - if (no_indent) - { - DB::WriteBufferFromOwnString buf; - formatAST(*ch, buf); - auto r = func_to_return_type[boost::algorithm::to_lower_copy(x->name)]; - r.value = buf.str(); - return r; - } - return func_to_return_type[boost::algorithm::to_lower_copy(x->name)]; - } - else if (func_to_param_type.contains( - boost::algorithm::to_lower_copy(x->name))) - { - if (no_indent) - { - DB::WriteBufferFromOwnString buf; - formatAST(*ch, buf); - return FuncRet( - func_to_param_type[boost::algorithm::to_lower_copy(x->name)], - buf.str()); - } - return FuncRet( - func_to_param_type[boost::algorithm::to_lower_copy(x->name)], - ""); - } - } - return FuncRet(); -} - -void processFunc(DB::ASTPtr ch, std::map & columns) -{ - auto x = std::dynamic_pointer_cast(ch); - if (x) - { - FuncHandler f; - auto arg_func_name = x->name; - if (handlers.contains(arg_func_name)) - f = handlers[arg_func_name]; - else - f = handlers[""]; - f(ch, columns); - } - else - { - for (const auto & child : (*ch).children) - processFunc(child, columns); - } -} - - -std::set getIndent(DB::ASTPtr ch) -{ - if (!ch) - return {}; - - std::set ret = {}; - auto x = std::dynamic_pointer_cast(ch); - if (x) - ret.insert(x->name()); - for (const auto & child : (*ch).children) - { - auto child_ind = getIndent(child); - ret.insert(child_ind.begin(), child_ind.end()); - } - return ret; -} - - -std::set getSelectIndent( - DB::ASTPtr asp, - std::set & column_alias) -{ - std::set ret = {}; - for (auto & ch : asp->children) - { - auto alias = getAlias(ch); - auto columns = getIndent(ch); - if (alias.empty()) - column_alias.insert(alias); - ret.insert(columns.begin(), columns.end()); - } - return ret; -} - - -std::set -connectedEqualityFind( - const Column & now, - std::map & columns_descriptions, - std::set & visited) -{ - std::set result; - for (const auto & column : now.equals) - if (!visited.contains(column)) - { - visited.insert(column); - auto sub_r = connectedEqualityFind( - columns_descriptions[column.first + "." + column.second], - columns_descriptions, - visited); - result.insert(sub_r.begin(), sub_r.end()); - } - result.insert(now.name); - return result; -} - - -std::map -unificateColumns( - std::map columns_descriptions, - const TableList & all_tables) -{ - for (auto & column : columns_descriptions) - { - std::set changed_equals; - for (const auto & eq : column.second.equals) - { - std::string t, c; - std::tie(t, c) = all_tables.getTable(eq.second); - changed_equals.insert(std::make_pair(t, c)); - } - column.second.equals = changed_equals; - } - std::map result; - for (auto & column : columns_descriptions) - { - std::string t, c; - std::tie(t, c) = all_tables.getTable(column.first); - column.second.name = std::make_pair(t, c); - result[t + "." + c].merge(column.second); - } - std::set visited; - for (auto & column : result) - if (!visited.contains(column.second.name)) - { - auto equal = connectedEqualityFind( - result[column.second.name.first + "." + column.second.name.second], - result, - visited); - for (const auto & c : equal) - result[c.first + "." + c.second].equals = equal; - } - for (auto & column : result) - for (const auto & e : column.second.equals) - column.second.merge(result[e.first + "." + e.second]); - - for (auto & column : result) - { - column.second.unifyType(); - if (column.second.generateValues()) - for (const auto & e : column.second.equals) - result[e.first + "." + e.second].merge(column.second); - - } - return result; -} - -std::vector getSelect(DB::ASTPtr vertex) -{ - auto z = std::dynamic_pointer_cast(vertex); - std::vector result; - if (z) - { - result.push_back(vertex); - return result; - } - - for (const auto & child : (*vertex).children) - { - auto v = getSelect(child); - result.insert(result.end(), v.begin(), v.end()); - } - return result; -} - - -void parseSelectQuery(DB::ASTPtr ast, TableList & all_tables) -{ - if (!ast) - throw std::runtime_error("Bad ASTPtr in parseSelectQuery" + StackTrace().toString()); - - auto select_ast = std::dynamic_pointer_cast(ast); - if (!select_ast) - { - std::cerr << "not select query"; - return; - } - std::set columns = {}; - - auto x = select_ast->tables(); - if (!x) - throw std::runtime_error("There is no tables in query. Nothing to generate."); - - for (auto & child : x->children) - { - auto ch = std::dynamic_pointer_cast(child); - auto table_expression_ast = std::dynamic_pointer_cast(ch->table_expression); - if (table_expression_ast && table_expression_ast->database_and_table_name) - { - auto table_name = *(getIndent(table_expression_ast->database_and_table_name).begin()); - all_tables.addTable(table_name); - auto alias = getAlias(ch); - if (!alias.empty()) - all_tables.aliases[alias] = table_name; - } - if (table_expression_ast && table_expression_ast->subquery) - { - for (const auto & select : getSelect(table_expression_ast->subquery)) - { - TableList local; - parseSelectQuery(select, local); - all_tables.merge(local); - } - } - - if (ch->table_join) - { - auto jch = std::dynamic_pointer_cast(ch->table_join); - if (jch->using_expression_list) - { - auto join_columns = getIndent(jch->using_expression_list); - columns.insert(join_columns.begin(), join_columns.end()); - } - else if (jch->on_expression) - { - auto join_columns = getIndent(jch->on_expression); - columns.insert(join_columns.begin(), join_columns.end()); - } - } - } - - std::set column_aliases; - auto select_columns = getSelectIndent(select_ast->select(), column_aliases); - columns.insert(select_columns.begin(), select_columns.end()); - - auto where_columns = getIndent(select_ast->where()); - columns.insert(where_columns.begin(), where_columns.end()); - - auto groupby_columns = getIndent(select_ast->groupBy()); - columns.insert(groupby_columns.begin(), groupby_columns.end()); - - auto orderby_columns = getIndent(select_ast->orderBy()); - columns.insert(orderby_columns.begin(), orderby_columns.end()); - - auto having_columns = getIndent(select_ast->having()); - columns.insert(having_columns.begin(), having_columns.end()); - - std::map columns_descriptions; - processFunc(ast, columns_descriptions); - - for (const auto & column : columns) - if (!column_aliases.contains(column)) - { - if (!columns_descriptions.contains(column)) - columns_descriptions[column] = Column(column); - all_tables.addColumn(column); - } - - columns_descriptions = unificateColumns(columns_descriptions, all_tables); - for (auto & column : columns_descriptions) - all_tables.addDescription(column.second); -} - - -TableList getTablesFromSelect(std::vector queries) -{ - TableList result; - for (std::string & query : queries) - { - DB::ParserQueryWithOutput parser(query.data() + query.size()); - DB::ASTPtr ast = parseQuery(parser, query.data(), query.data() + query.size(), "", 0, 0); - for (auto & select : getSelect(ast)) - { - TableList local; - parseSelectQuery(select, local); - result.merge(local); - } - } - return result; -} - -int main(int argc, const char *argv[]) -{ - try - { - po::options_description desc("Allowed options"); - desc.add_options() - ("help,h", "Display greeting and allowed options.") - ("input,i", po::value(), "Input filename.") - ("output,o", po::value(), "Output filename."); - - po::variables_map vm; - po::store(po::parse_command_line(argc, argv, desc), vm); - po::notify(vm); - - if (vm.count("help") || vm.count("h")) - { - std::cout << "Hello! It is datasets generator for ClickHouse's queries." << std::endl; - std::cout << "Put some query as an input and it will produce queries for table creating and filling." << std::endl; - std::cout << "After that your query could be executed on this tables." << std::endl; - std::cout << desc << std::endl; - return 1; - } - if (vm.count("input")) - if (!freopen(vm["input"].as().c_str(), "r", stdin)) - std::cout << "Error while input." << std::endl; - if (vm.count("output")) - if (!freopen(vm["output"].as().c_str(), "w", stdout)) - std::cout << "Error while output." << std::endl; - if (vm.empty()) - std::cout << "Copy your queries (with semicolons) here, press Enter and Ctrl+D." << std::endl; - } - catch (...) - { - std::cerr << "Got error while parse command line arguments: " << DB::getCurrentExceptionMessage(true) << std::endl; - throw; - } - - handlers["plus"] = arithmeticFunc; - handlers["minus"] = arithmeticFunc; - handlers["like"] = likeFunc; - handlers["array"] = arrayFunc; - handlers["in"] = inFunc; - handlers[""] = simpleFunc; - - std::vector queries; - std::string in; - std::string query{}; - while (getline(std::cin, in)) - { - /// Skip comments - if (in.find("--") != std::string::npos) - continue; - - query += in + " "; - - if (in.find(';') != std::string::npos) - { - queries.push_back(query); - query = ""; - } - } - - try - { - auto result = getTablesFromSelect(queries); - - for (auto & table : result.tables) - { - std::cout << table.second.createQuery(); - std::cout << table.second.insertQuery(); - } - - for (auto & q: queries) - std::cout << q << std::endl; - } - catch (std::string & e) - { - std::cerr << "Exception: " << e << std::endl; - } -} diff --git a/utils/iotest/CMakeLists.txt b/utils/iotest/CMakeLists.txt deleted file mode 100644 index 356986eb493..00000000000 --- a/utils/iotest/CMakeLists.txt +++ /dev/null @@ -1,9 +0,0 @@ - -clickhouse_add_executable (iotest iotest.cpp ${SRCS}) -target_link_libraries (iotest PRIVATE clickhouse_common_io) - -clickhouse_add_executable (iotest_nonblock iotest_nonblock.cpp ${SRCS}) -target_link_libraries (iotest_nonblock PRIVATE clickhouse_common_io) - -clickhouse_add_executable (iotest_aio iotest_aio.cpp ${SRCS}) -target_link_libraries (iotest_aio PRIVATE clickhouse_common_io) diff --git a/utils/iotest/iotest.cpp b/utils/iotest/iotest.cpp deleted file mode 100644 index 7a1f35ddd52..00000000000 --- a/utils/iotest/iotest.cpp +++ /dev/null @@ -1,197 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include - -#include -#include -#include - - -namespace DB -{ - namespace ErrorCodes - { - extern const int CANNOT_OPEN_FILE; - extern const int CANNOT_CLOSE_FILE; - extern const int CANNOT_READ_FROM_FILE_DESCRIPTOR; - extern const int CANNOT_WRITE_TO_FILE_DESCRIPTOR; - } -} - - -enum Mode -{ - MODE_NONE = 0, - MODE_READ = 1, - MODE_WRITE = 2, - MODE_ALIGNED = 4, - MODE_DIRECT = 8, - MODE_SYNC = 16, -}; - - -void thread(int fd, int mode, size_t min_offset, size_t max_offset, size_t block_size, size_t count) -{ - using namespace DB; - - Memory<> direct_buf(block_size, ::getPageSize()); - std::vector simple_buf(block_size); - - char * buf; - if ((mode & MODE_DIRECT)) - buf = direct_buf.data(); - else - buf = simple_buf.data(); - - pcg64 rng(randomSeed()); - - for (size_t i = 0; i < count; ++i) - { - uint64_t rand_result1 = rng(); - uint64_t rand_result2 = rng(); - uint64_t rand_result3 = rng(); - - size_t rand_result = rand_result1 ^ (rand_result2 << 22) ^ (rand_result3 << 43); - size_t offset; - if ((mode & MODE_DIRECT) || (mode & MODE_ALIGNED)) - offset = min_offset + rand_result % ((max_offset - min_offset) / block_size) * block_size; - else - offset = min_offset + rand_result % (max_offset - min_offset - block_size + 1); - - if (mode & MODE_READ) - { - if (static_cast(block_size) != pread(fd, buf, block_size, offset)) - throwFromErrno("Cannot read", ErrorCodes::CANNOT_READ_FROM_FILE_DESCRIPTOR); - } - else - { - if (static_cast(block_size) != pwrite(fd, buf, block_size, offset)) - throwFromErrno("Cannot write", ErrorCodes::CANNOT_WRITE_TO_FILE_DESCRIPTOR); - } - } -} - - -int mainImpl(int argc, char ** argv) -{ - using namespace DB; - - const char * file_name = nullptr; - int mode = MODE_NONE; - UInt64 min_offset = 0; - UInt64 max_offset = 0; - UInt64 block_size = 0; - UInt64 threads = 0; - UInt64 count = 0; - - if (argc != 8) - { - std::cerr << "Usage: " << argv[0] << " file_name (r|w)[a][d][s] min_offset max_offset block_size threads count" << std::endl << - "a - aligned, d - direct, s - sync" << std::endl; - return 1; - } - - file_name = argv[1]; - min_offset = parse(argv[3]); - max_offset = parse(argv[4]); - block_size = parse(argv[5]); - threads = parse(argv[6]); - count = parse(argv[7]); - - for (int i = 0; argv[2][i]; ++i) - { - char c = argv[2][i]; - switch (c) - { - case 'r': - mode |= MODE_READ; - break; - case 'w': - mode |= MODE_WRITE; - break; - case 'a': - mode |= MODE_ALIGNED; - break; - case 'd': - mode |= MODE_DIRECT; - break; - case 's': - mode |= MODE_SYNC; - break; - default: - throw Poco::Exception("Invalid mode"); - } - } - - ThreadPool pool(threads); - - #ifndef OS_DARWIN - int fd = open(file_name, ((mode & MODE_READ) ? O_RDONLY : O_WRONLY) | ((mode & MODE_DIRECT) ? O_DIRECT : 0) | ((mode & MODE_SYNC) ? O_SYNC : 0)); - #else - int fd = open(file_name, ((mode & MODE_READ) ? O_RDONLY : O_WRONLY) | ((mode & MODE_SYNC) ? O_SYNC : 0)); - #endif - if (-1 == fd) - throwFromErrno("Cannot open file", ErrorCodes::CANNOT_OPEN_FILE); - #ifdef OS_DARWIN - if (mode & MODE_DIRECT) - if (fcntl(fd, F_NOCACHE, 1) == -1) - throwFromErrno("Cannot open file", ErrorCodes::CANNOT_CLOSE_FILE); - #endif - Stopwatch watch; - - for (size_t i = 0; i < threads; ++i) - pool.scheduleOrThrowOnError([=]{ thread(fd, mode, min_offset, max_offset, block_size, count); }); - pool.wait(); - - #if defined(OS_DARWIN) - fsync(fd); - #else - fdatasync(fd); - #endif - - watch.stop(); - - if (0 != close(fd)) - throwFromErrno("Cannot close file", ErrorCodes::CANNOT_CLOSE_FILE); - - std::cout << std::fixed << std::setprecision(2) - << "Done " << count << " * " << threads << " ops"; - if (mode & MODE_ALIGNED) - std::cout << " (aligned)"; - if (mode & MODE_DIRECT) - std::cout << " (direct)"; - if (mode & MODE_SYNC) - std::cout << " (sync)"; - std::cout << " in " << watch.elapsedSeconds() << " sec." - << ", " << count * threads / watch.elapsedSeconds() << " ops/sec." - << ", " << count * threads * block_size / watch.elapsedSeconds() / 1000000 << " MB/sec." - << std::endl; - - return 0; -} - - -int main(int argc, char ** argv) -{ - try - { - return mainImpl(argc, argv); - } - catch (const Poco::Exception & e) - { - std::cerr << e.what() << ", " << e.message() << std::endl; - return 1; - } -} diff --git a/utils/iotest/iotest_aio.cpp b/utils/iotest/iotest_aio.cpp deleted file mode 100644 index c0cf002ce58..00000000000 --- a/utils/iotest/iotest_aio.cpp +++ /dev/null @@ -1,203 +0,0 @@ -#if !defined(OS_LINUX) -int main(int, char **) { return 0; } -#else - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - - -namespace DB -{ - namespace ErrorCodes - { - extern const int CANNOT_OPEN_FILE; - extern const int CANNOT_CLOSE_FILE; - extern const int CANNOT_IO_SUBMIT; - extern const int CANNOT_IO_GETEVENTS; - } -} - - -enum Mode -{ - MODE_READ = 1, - MODE_WRITE = 2, -}; - - -void thread(int fd, int mode, size_t min_offset, size_t max_offset, size_t block_size, size_t buffers_count, size_t count) -{ - using namespace DB; - - AIOContext ctx; - - std::vector> buffers(buffers_count); - for (size_t i = 0; i < buffers_count; ++i) - buffers[i] = Memory<>(block_size, ::getPageSize()); - - pcg64_fast rng(randomSeed()); - - size_t in_progress = 0; - size_t blocks_sent = 0; - std::vector buffer_used(buffers_count, false); - std::vector iocbs(buffers_count); - std::vector query_cbs; - std::vector events(buffers_count); - - while (blocks_sent < count || in_progress > 0) - { - /// Prepare queries. - query_cbs.clear(); - for (size_t i = 0; i < buffers_count; ++i) - { - if (blocks_sent >= count || in_progress >= buffers_count) - break; - - if (buffer_used[i]) - continue; - - buffer_used[i] = true; - ++blocks_sent; - ++in_progress; - - char * buf = buffers[i].data(); - - uint64_t rand_result1 = rng(); - uint64_t rand_result2 = rng(); - uint64_t rand_result3 = rng(); - - size_t rand_result = rand_result1 ^ (rand_result2 << 22) ^ (rand_result3 << 43); - size_t offset = min_offset + rand_result % ((max_offset - min_offset) / block_size) * block_size; - - iocb & cb = iocbs[i]; - memset(&cb, 0, sizeof(cb)); - cb.aio_buf = reinterpret_cast(buf); - cb.aio_fildes = fd; - cb.aio_nbytes = block_size; - cb.aio_offset = offset; - cb.aio_data = static_cast(i); - - if (mode == MODE_READ) - { - cb.aio_lio_opcode = IOCB_CMD_PREAD; - } - else - { - cb.aio_lio_opcode = IOCB_CMD_PWRITE; - } - - query_cbs.push_back(&cb); - } - - /// Send queries. - if (io_submit(ctx.ctx, query_cbs.size(), query_cbs.data()) < 0) - throwFromErrno("io_submit failed", ErrorCodes::CANNOT_IO_SUBMIT); - - /// Receive answers. If we have something else to send, then receive at least one answer (after that send them), otherwise wait all answers. - memset(events.data(), 0, buffers_count * sizeof(events[0])); - int evs = io_getevents(ctx.ctx, (blocks_sent < count ? 1 : in_progress), buffers_count, events.data(), nullptr); - if (evs < 0) - throwFromErrno("io_getevents failed", ErrorCodes::CANNOT_IO_GETEVENTS); - - for (int i = 0; i < evs; ++i) - { - int b = static_cast(events[i].data); - if (events[i].res != static_cast(block_size)) - throw Poco::Exception("read/write error"); - --in_progress; - buffer_used[b] = false; - } - } -} - - -int mainImpl(int argc, char ** argv) -{ - using namespace DB; - - const char * file_name = nullptr; - int mode = MODE_READ; - UInt64 min_offset = 0; - UInt64 max_offset = 0; - UInt64 block_size = 0; - UInt64 buffers_count = 0; - UInt64 threads_count = 0; - UInt64 count = 0; - - if (argc != 9) - { - std::cerr << "Usage: " << argv[0] << " file_name r|w min_offset max_offset block_size threads buffers count" << std::endl; - return 1; - } - - file_name = argv[1]; - if (argv[2][0] == 'w') - mode = MODE_WRITE; - min_offset = parse(argv[3]); - max_offset = parse(argv[4]); - block_size = parse(argv[5]); - threads_count = parse(argv[6]); - buffers_count = parse(argv[7]); - count = parse(argv[8]); - - int fd = open(file_name, ((mode == MODE_READ) ? O_RDONLY : O_WRONLY) | O_DIRECT); - if (-1 == fd) - throwFromErrno("Cannot open file", ErrorCodes::CANNOT_OPEN_FILE); - - ThreadPool pool(threads_count); - - Stopwatch watch; - - for (size_t i = 0; i < threads_count; ++i) - pool.scheduleOrThrowOnError([=]{ thread(fd, mode, min_offset, max_offset, block_size, buffers_count, count); }); - pool.wait(); - - watch.stop(); - - if (0 != close(fd)) - throwFromErrno("Cannot close file", ErrorCodes::CANNOT_CLOSE_FILE); - - std::cout << std::fixed << std::setprecision(2) - << "Done " << count << " * " << threads_count << " ops"; - std::cout << " in " << watch.elapsedSeconds() << " sec." - << ", " << count * threads_count / watch.elapsedSeconds() << " ops/sec." - << ", " << count * threads_count * block_size / watch.elapsedSeconds() / 1000000 << " MB/sec." - << std::endl; - - return 0; -} - - -int main(int argc, char ** argv) -{ - try - { - return mainImpl(argc, argv); - } - catch (const Poco::Exception & e) - { - std::cerr << e.what() << ", " << e.message() << std::endl; - return 1; - } -} -#endif diff --git a/utils/iotest/iotest_nonblock.cpp b/utils/iotest/iotest_nonblock.cpp deleted file mode 100644 index 33fab4d04e6..00000000000 --- a/utils/iotest/iotest_nonblock.cpp +++ /dev/null @@ -1,177 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -#include -#include -#include -#include -#include - -#if defined (OS_LINUX) -# include -#endif - - -namespace DB -{ - namespace ErrorCodes - { - extern const int CANNOT_OPEN_FILE; - extern const int CANNOT_CLOSE_FILE; - extern const int CANNOT_READ_FROM_FILE_DESCRIPTOR; - extern const int CANNOT_WRITE_TO_FILE_DESCRIPTOR; - extern const int CANNOT_FSYNC; - extern const int SYSTEM_ERROR; - } -} - - -enum Mode -{ - MODE_READ, - MODE_WRITE, -}; - - -int mainImpl(int argc, char ** argv) -{ - using namespace DB; - - const char * file_name = nullptr; - Mode mode = MODE_READ; - UInt64 min_offset = 0; - UInt64 max_offset = 0; - UInt64 block_size = 0; - UInt64 descriptors = 0; - UInt64 count = 0; - - if (argc != 8) - { - std::cerr << "Usage: " << argv[0] << " file_name r|w min_offset max_offset block_size descriptors count" << std::endl; - return 1; - } - - file_name = argv[1]; - min_offset = parse(argv[3]); - max_offset = parse(argv[4]); - block_size = parse(argv[5]); - descriptors = parse(argv[6]); - count = parse(argv[7]); - - if (!strcmp(argv[2], "r")) - mode = MODE_READ; - else if (!strcmp(argv[2], "w")) - mode = MODE_WRITE; - else - throw Poco::Exception("Invalid mode"); - - std::vector fds(descriptors); - for (size_t i = 0; i < descriptors; ++i) - { - fds[i] = open(file_name, O_SYNC | ((mode == MODE_READ) ? O_RDONLY : O_WRONLY)); - if (-1 == fds[i]) - throwFromErrno("Cannot open file", ErrorCodes::CANNOT_OPEN_FILE); - } - - std::vector buf(block_size); - - pcg64 rng(randomSeed()); - - Stopwatch watch; - - std::vector polls(descriptors); - - for (size_t i = 0; i < descriptors; ++i) - { - polls[i].fd = fds[i]; - polls[i].events = (mode == MODE_READ) ? POLLIN : POLLOUT; - polls[i].revents = 0; - } - - size_t ops = 0; - while (ops < count) - { - if (poll(polls.data(), descriptors, -1) <= 0) - throwFromErrno("poll failed", ErrorCodes::SYSTEM_ERROR); - for (size_t i = 0; i < descriptors; ++i) - { - if (!polls[i].revents) - continue; - - if (polls[i].revents != polls[i].events) - throw Poco::Exception("revents indicates error"); - polls[i].revents = 0; - ++ops; - - uint64_t rand_result1 = rng(); - uint64_t rand_result2 = rng(); - uint64_t rand_result3 = rng(); - - size_t rand_result = rand_result1 ^ (rand_result2 << 22) ^ (rand_result3 << 43); - size_t offset; - offset = min_offset + rand_result % ((max_offset - min_offset) / block_size) * block_size; - - if (mode == MODE_READ) - { - if (static_cast(block_size) != pread(fds[i], buf.data(), block_size, offset)) - throwFromErrno("Cannot read", ErrorCodes::CANNOT_READ_FROM_FILE_DESCRIPTOR); - } - else - { - if (static_cast(block_size) != pwrite(fds[i], buf.data(), block_size, offset)) - throwFromErrno("Cannot write", ErrorCodes::CANNOT_WRITE_TO_FILE_DESCRIPTOR); - } - } - } - - for (size_t i = 0; i < descriptors; ++i) - { -#if defined(OS_DARWIN) - if (fsync(fds[i])) - throwFromErrno("Cannot fsync", ErrorCodes::CANNOT_FSYNC); -#else - if (fdatasync(fds[i])) - throwFromErrno("Cannot fdatasync", ErrorCodes::CANNOT_FSYNC); -#endif - } - - watch.stop(); - - for (size_t i = 0; i < descriptors; ++i) - { - if (0 != close(fds[i])) - throwFromErrno("Cannot close file", ErrorCodes::CANNOT_CLOSE_FILE); - } - - std::cout << std::fixed << std::setprecision(2) - << "Done " << count << " ops" << " in " << watch.elapsedSeconds() << " sec." - << ", " << count / watch.elapsedSeconds() << " ops/sec." - << ", " << count * block_size / watch.elapsedSeconds() / 1000000 << " MB/sec." - << std::endl; - - return 0; -} - - -int main(int argc, char ** argv) -{ - try - { - return mainImpl(argc, argv); - } - catch (const Poco::Exception & e) - { - std::cerr << e.what() << ", " << e.message() << std::endl; - return 1; - } -} diff --git a/utils/keeper-data-dumper/main.cpp b/utils/keeper-data-dumper/main.cpp index 0762c740ac1..dd3c3a4e2ad 100644 --- a/utils/keeper-data-dumper/main.cpp +++ b/utils/keeper-data-dumper/main.cpp @@ -63,7 +63,7 @@ int main(int argc, char *argv[]) SnapshotsQueue snapshots_queue{1}; CoordinationSettingsPtr settings = std::make_shared(); KeeperContextPtr keeper_context = std::make_shared(); - auto state_machine = std::make_shared(queue, snapshots_queue, argv[1], settings, keeper_context); + auto state_machine = std::make_shared(queue, snapshots_queue, argv[1], settings, keeper_context, nullptr); state_machine->init(); size_t last_commited_index = state_machine->last_commit_index(); diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv index 65ec5ddec01..47dbec5a5f8 100644 --- a/utils/list-versions/version_date.tsv +++ b/utils/list-versions/version_date.tsv @@ -1,12 +1,18 @@ +v22.10.2.11-stable 2022-11-01 +v22.10.1.1877-stable 2022-10-26 +v22.9.4.32-stable 2022-10-26 v22.9.3.18-stable 2022-09-30 v22.9.2.7-stable 2022-09-23 v22.9.1.2603-stable 2022-09-22 +v22.8.8.3-lts 2022-10-27 +v22.8.7.34-lts 2022-10-26 v22.8.6.71-lts 2022-09-30 v22.8.5.29-lts 2022-09-13 v22.8.4.7-lts 2022-08-31 v22.8.3.13-lts 2022-08-29 v22.8.2.11-lts 2022-08-23 v22.8.1.2097-lts 2022-08-18 +v22.7.7.24-stable 2022-10-26 v22.7.6.74-stable 2022-09-30 v22.7.5.13-stable 2022-08-29 v22.7.4.16-stable 2022-08-23 @@ -31,6 +37,7 @@ v22.4.5.9-stable 2022-05-06 v22.4.4.7-stable 2022-04-29 v22.4.3.3-stable 2022-04-26 v22.4.2.1-stable 2022-04-22 +v22.3.14.23-lts 2022-10-28 v22.3.13.80-lts 2022-09-30 v22.3.12.19-lts 2022-08-29 v22.3.11.12-lts 2022-08-10 diff --git a/utils/package/CMakeLists.txt b/utils/package/CMakeLists.txt deleted file mode 100644 index 8c8a09adc0f..00000000000 --- a/utils/package/CMakeLists.txt +++ /dev/null @@ -1 +0,0 @@ -add_subdirectory (arch) diff --git a/utils/package/arch/CMakeLists.txt b/utils/package/arch/CMakeLists.txt deleted file mode 100644 index 4ee754fec56..00000000000 --- a/utils/package/arch/CMakeLists.txt +++ /dev/null @@ -1,2 +0,0 @@ -include ("${ClickHouse_SOURCE_DIR}/cmake/version.cmake") -configure_file (PKGBUILD.in PKGBUILD) diff --git a/utils/package/arch/PKGBUILD.in b/utils/package/arch/PKGBUILD.in deleted file mode 100644 index 4e068e8b8a2..00000000000 --- a/utils/package/arch/PKGBUILD.in +++ /dev/null @@ -1,33 +0,0 @@ -pkgname=clickhouse -pkgver=${VERSION_STRING} -pkgrel=1 -pkgdesc='An open-source column-oriented database management system that allows generating analytical data reports in real time' -arch=('x86_64') -url='https://clickhouse.com/' -license=('Apache') - -package() { - install -dm 755 $pkgdir/usr/lib/tmpfiles.d - install -dm 755 $pkgdir/usr/lib/sysusers.d - install -Dm 644 ${CMAKE_CURRENT_SOURCE_DIR}/clickhouse.tmpfiles $pkgdir/usr/lib/tmpfiles.d/clickhouse.conf - install -Dm 644 ${CMAKE_CURRENT_SOURCE_DIR}/clickhouse.sysusers $pkgdir/usr/lib/sysusers.d/clickhouse.conf - install -dm 755 $pkgdir/etc/clickhouse-server/config.d - install -Dm 644 ${CMAKE_CURRENT_SOURCE_DIR}/logging.xml $pkgdir/etc/clickhouse-server/config.d/logging.xml - # This code was requisited from kmeaw@ https://aur.archlinux.org/packages/clickhouse/ . - SRC=${ClickHouse_SOURCE_DIR} - BIN=${ClickHouse_BINARY_DIR} - mkdir -p $pkgdir/etc/clickhouse-server/ $pkgdir/etc/clickhouse-client/ - mkdir -p $pkgdir/usr/bin/ - mkdir -p $pkgdir/usr/lib/systemd/system - ln -s clickhouse-client $pkgdir/usr/bin/clickhouse-server - cp $SRC/programs/server/config.xml $SRC/programs/server/users.xml $pkgdir/etc/clickhouse-server/ - cp $BIN/programs/clickhouse $pkgdir/usr/bin/clickhouse-client - patchelf --remove-rpath $pkgdir/usr/bin/clickhouse-client - patchelf --replace-needed libz.so.1 libz-ng.so.1 $pkgdir/usr/bin/clickhouse-client - cp $SRC/programs/client/clickhouse-client.xml $pkgdir/etc/clickhouse-client/config.xml - compiler="libclickhouse-compiler.so" - if ! pacman -Q clang | grep '^clang 7'; then - compiler="" - fi - cp $SRC/debian/clickhouse-server.service $pkgdir/usr/lib/systemd/system -} diff --git a/utils/package/arch/README.md b/utils/package/arch/README.md deleted file mode 100644 index 0db5aac8080..00000000000 --- a/utils/package/arch/README.md +++ /dev/null @@ -1,17 +0,0 @@ -### Build Arch Linux package - -From binary directory: - -``` -make -cd utils/package/arch -makepkg -``` - -### Install and start ClickHouse server - -``` -pacman -U clickhouse-*.pkg.tar.xz -systemctl enable clickhouse-server -systemctl start clickhouse-server -``` diff --git a/utils/package/arch/clickhouse.sysusers b/utils/package/arch/clickhouse.sysusers deleted file mode 100644 index 4381c52c4f2..00000000000 --- a/utils/package/arch/clickhouse.sysusers +++ /dev/null @@ -1,3 +0,0 @@ -u clickhouse - "ClickHouse user" /nonexistent /bin/false -g clickhouse - "ClickHouse group" -m clickhouse clickhouse diff --git a/utils/package/arch/clickhouse.tmpfiles b/utils/package/arch/clickhouse.tmpfiles deleted file mode 100644 index 631aa895f2f..00000000000 --- a/utils/package/arch/clickhouse.tmpfiles +++ /dev/null @@ -1 +0,0 @@ -d /var/lib/clickhouse 0700 clickhouse clickhouse diff --git a/utils/package/arch/logging.xml b/utils/package/arch/logging.xml deleted file mode 100644 index c7a78442424..00000000000 --- a/utils/package/arch/logging.xml +++ /dev/null @@ -1,6 +0,0 @@ - - - - - - diff --git a/utils/self-extracting-executable/compressor.cpp b/utils/self-extracting-executable/compressor.cpp index d8b4fdbb038..0c0c85838da 100644 --- a/utils/self-extracting-executable/compressor.cpp +++ b/utils/self-extracting-executable/compressor.cpp @@ -356,7 +356,7 @@ int compressFiles(const char* out_name, const char* exec, char* filenames[], int return 0; } -int copy_decompressor(int input_fd, int decompressor_size, int output_fd) +int copy_decompressor(int input_fd, ssize_t decompressor_size, int output_fd) { const ssize_t buf_size = 1ul<<19; auto buf_memory = std::make_unique(buf_size); @@ -411,7 +411,7 @@ int copy_decompressor_self(const char *self, int output_fd) } char * end = nullptr; - int decompressor_size = strtol(size_str, &end, 10); + ssize_t decompressor_size = strtol(size_str, &end, 10); if (*end != 0) { std::cerr << "Error: unable to extract decompressor" << std::endl; @@ -519,7 +519,7 @@ int main(int argc, char* argv[]) if (p[0] != 0) { char * end = nullptr; - level = strtol(p, &end, 10); + level = static_cast(strtol(p, &end, 10)); if (*end != 0) { std::cerr << "Error: level [" << p << "] is not valid" << std::endl; diff --git a/utils/self-extracting-executable/decompressor.cpp b/utils/self-extracting-executable/decompressor.cpp index c997526d38d..be25d315d68 100644 --- a/utils/self-extracting-executable/decompressor.cpp +++ b/utils/self-extracting-executable/decompressor.cpp @@ -329,7 +329,7 @@ int decompressFiles(int input_fd, char * path, char * name, bool & have_compress int read_exe_path(char *exe, size_t buf_sz) { - uint32_t size = buf_sz; + uint32_t size = static_cast(buf_sz); char apple[size]; if (_NSGetExecutablePath(apple, &size) != 0) return 1; @@ -514,7 +514,7 @@ int main(int/* argc*/, char* argv[]) return 1; } - if (chmod(self, decompressed_umask)) + if (chmod(self, static_cast(decompressed_umask))) { perror("chmod"); return 1; diff --git a/utils/zookeeper-adjust-block-numbers-to-parts/CMakeLists.txt b/utils/zookeeper-adjust-block-numbers-to-parts/CMakeLists.txt deleted file mode 100644 index b63373bacf7..00000000000 --- a/utils/zookeeper-adjust-block-numbers-to-parts/CMakeLists.txt +++ /dev/null @@ -1,3 +0,0 @@ -clickhouse_add_executable (zookeeper-adjust-block-numbers-to-parts main.cpp ${SRCS}) -target_compile_options(zookeeper-adjust-block-numbers-to-parts PRIVATE -Wno-format) -target_link_libraries (zookeeper-adjust-block-numbers-to-parts PRIVATE clickhouse_aggregate_functions dbms clickhouse_common_zookeeper boost::program_options) diff --git a/utils/zookeeper-adjust-block-numbers-to-parts/main.cpp b/utils/zookeeper-adjust-block-numbers-to-parts/main.cpp deleted file mode 100644 index 7736921a9c6..00000000000 --- a/utils/zookeeper-adjust-block-numbers-to-parts/main.cpp +++ /dev/null @@ -1,286 +0,0 @@ -#include -#include -#include -#include -#include -#include - -#include -#include - - -std::vector getAllShards(zkutil::ZooKeeper & zk, const std::string & root) -{ - return zk.getChildren(root); -} - - -std::vector removeNotExistingShards(zkutil::ZooKeeper & zk, const std::string & root, const std::vector & shards) -{ - auto existing_shards = getAllShards(zk, root); - std::vector filtered_shards; - filtered_shards.reserve(shards.size()); - for (const auto & shard : shards) - if (std::find(existing_shards.begin(), existing_shards.end(), shard) == existing_shards.end()) - std::cerr << "Shard " << shard << " not found." << std::endl; - else - filtered_shards.emplace_back(shard); - return filtered_shards; -} - - -std::vector getAllTables(zkutil::ZooKeeper & zk, const std::string & root, const std::string & shard) -{ - return zk.getChildren(root + "/" + shard); -} - - -std::vector removeNotExistingTables(zkutil::ZooKeeper & zk, const std::string & root, const std::string & shard, const std::vector & tables) -{ - auto existing_tables = getAllTables(zk, root, shard); - std::vector filtered_tables; - filtered_tables.reserve(tables.size()); - for (const auto & table : tables) - if (std::find(existing_tables.begin(), existing_tables.end(), table) == existing_tables.end()) - std::cerr << "\tTable " << table << " not found on shard " << shard << "." << std::endl; - else - filtered_tables.emplace_back(table); - return filtered_tables; -} - - -Int64 getMaxBlockNumberForPartition(zkutil::ZooKeeper & zk, - const std::string & replica_path, - const std::string & partition_name, - const DB::MergeTreeDataFormatVersion & format_version) -{ - auto replicas_path = replica_path + "/replicas"; - auto replica_hosts = zk.getChildren(replicas_path); - Int64 max_block_num = 0; - for (const auto & replica_host : replica_hosts) - { - auto parts = zk.getChildren(replicas_path + "/" + replica_host + "/parts"); - for (const auto & part : parts) - { - try - { - auto info = DB::MergeTreePartInfo::fromPartName(part, format_version); - if (info.partition_id == partition_name) - max_block_num = std::max(info.max_block, max_block_num); - } - catch (const DB::Exception & ex) - { - std::cerr << ex.displayText() << ", Part " << part << "skipped." << std::endl; - } - } - } - return max_block_num; -} - - -Int64 getCurrentBlockNumberForPartition(zkutil::ZooKeeper & zk, const std::string & part_path) -{ - Coordination::Stat stat; - zk.get(part_path, &stat); - - /// References: - /// https://stackoverflow.com/a/10347910 - /// https://bowenli86.github.io/2016/07/07/distributed%20system/zookeeper/How-does-ZooKeeper-s-persistent-sequential-id-work/ - return (stat.cversion + stat.numChildren) / 2; -} - - -std::unordered_map getPartitionsNeedAdjustingBlockNumbers( - zkutil::ZooKeeper & zk, const std::string & root, const std::vector & shards, const std::vector & tables) -{ - std::unordered_map result; - - std::vector use_shards = shards.empty() ? getAllShards(zk, root) : removeNotExistingShards(zk, root, shards); - - for (const auto & shard : use_shards) - { - std::cout << "Shard: " << shard << std::endl; - std::vector use_tables = tables.empty() ? getAllTables(zk, root, shard) : removeNotExistingTables(zk, root, shard, tables); - - for (const auto & table : use_tables) - { - std::cout << "\tTable: " << table << std::endl; - std::string table_path = root + "/" + shard + "/" + table; - std::string blocks_path = table_path + "/block_numbers"; - - std::vector partitions; - DB::MergeTreeDataFormatVersion format_version; - try - { - format_version = DB::ReplicatedMergeTreeTableMetadata::parse(zk.get(table_path + "/metadata")).data_format_version; - partitions = zk.getChildren(blocks_path); - } - catch (const DB::Exception & ex) - { - std::cerr << ex.displayText() << ", table " << table << " skipped." << std::endl; - continue; - } - - for (const auto & partition : partitions) - { - try - { - std::string part_path = blocks_path + "/" + partition; - Int64 partition_max_block = getMaxBlockNumberForPartition(zk, table_path, partition, format_version); - Int64 current_block_number = getCurrentBlockNumberForPartition(zk, part_path); - if (current_block_number < partition_max_block + 1) - { - std::cout << "\t\tPartition: " << partition << ": current block_number: " << current_block_number - << ", max block number: " << partition_max_block << ". Adjusting is required." << std::endl; - result.emplace(part_path, partition_max_block); - } - } - catch (const DB::Exception & ex) - { - std::cerr << ex.displayText() << ", partition " << partition << " skipped." << std::endl; - } - } - } - } - return result; -} - - -void setCurrentBlockNumber(zkutil::ZooKeeper & zk, const std::string & path, Int64 new_current_block_number) -{ - Int64 current_block_number = getCurrentBlockNumberForPartition(zk, path); - - auto create_ephemeral_nodes = [&](size_t count) - { - std::string block_prefix = path + "/block-"; - Coordination::Requests requests; - requests.reserve(count); - for (size_t i = 0; i != count; ++i) - requests.emplace_back(zkutil::makeCreateRequest(block_prefix, "", zkutil::CreateMode::EphemeralSequential)); - auto responses = zk.multi(requests); - - std::vector paths_created; - paths_created.reserve(responses.size()); - for (const auto & response : responses) - { - const auto * create_response = dynamic_cast(response.get()); - if (!create_response) - { - std::cerr << "\tCould not create ephemeral node " << block_prefix << std::endl; - return false; - } - paths_created.emplace_back(create_response->path_created); - } - - std::sort(paths_created.begin(), paths_created.end()); - for (const auto & path_created : paths_created) - { - Int64 number = DB::parse(path_created.c_str() + block_prefix.size(), path_created.size() - block_prefix.size()); - if (number != current_block_number) - { - char suffix[11] = ""; - size_t size = sprintf(suffix, "%010lld", current_block_number); - std::string expected_path = block_prefix + std::string(suffix, size); - std::cerr << "\t" << path_created << ": Ephemeral node has been created with an unexpected path (expected something like " - << expected_path << ")." << std::endl; - return false; - } - std::cout << "\t" << path_created << std::endl; - ++current_block_number; - } - - return true; - }; - - if (current_block_number >= new_current_block_number) - return; - - std::cout << "Creating ephemeral sequential nodes:" << std::endl; - create_ephemeral_nodes(1); /// Firstly try to create just a single node. - - /// Create other nodes in batches of 50 nodes. - while (current_block_number + 50 <= new_current_block_number) // NOLINT: clang-tidy thinks that the loop is infinite - create_ephemeral_nodes(50); - - create_ephemeral_nodes(new_current_block_number - current_block_number); -} - - -int main(int argc, char ** argv) -try -{ - /// Parse the command line. - namespace po = boost::program_options; - po::options_description desc("Allowed options"); - desc.add_options() - ("help,h", "show help") - ("zookeeper,z", po::value(), "Addresses of ZooKeeper instances, comma-separated. Example: example01e.clickhouse.com:2181") - ("path,p", po::value(), "[optional] Path of replica queue to insert node (without trailing slash). By default it's /clickhouse/tables") - ("shard,s", po::value(), "[optional] Shards to process, comma-separated. If not specified then the utility will process all the shards.") - ("table,t", po::value(), "[optional] Tables to process, comma-separated. If not specified then the utility will process all the tables.") - ("dry-run", "[optional] Specify if you want this utility just to analyze block numbers without any changes."); - - po::variables_map options; - po::store(po::parse_command_line(argc, argv, desc), options); - - auto show_usage = [&] - { - std::cout << "Usage: " << std::endl; - std::cout << " " << argv[0] << " [options]" << std::endl; - std::cout << desc << std::endl; - }; - - if (options.count("help") || (argc == 1)) - { - std::cout << "This utility adjusts the /block_numbers zookeeper nodes to the correct block number in partition." << std::endl; - std::cout << "It might be useful when incorrect block numbers stored in zookeeper don't allow you to insert data into a table or drop/detach a partition." << std::endl; - show_usage(); - return 0; - } - - if (!options.count("zookeeper")) - { - std::cerr << "Option --zookeeper should be set." << std::endl; - show_usage(); - return 1; - } - - std::string root = options.count("path") ? options.at("path").as() : "/clickhouse/tables"; - - std::vector shards, tables; - if (options.count("shard")) - boost::split(shards, options.at("shard").as(), boost::algorithm::is_any_of(",")); - if (options.count("table")) - boost::split(tables, options.at("table").as(), boost::algorithm::is_any_of(",")); - - /// Check if the adjusting of the block numbers is required. - std::cout << "Checking if adjusting of the block numbers is required:" << std::endl; - zkutil::ZooKeeper zookeeper(options.at("zookeeper").as()); - auto part_paths_with_max_block_numbers = getPartitionsNeedAdjustingBlockNumbers(zookeeper, root, shards, tables); - - if (part_paths_with_max_block_numbers.empty()) - { - std::cout << "No adjusting required." << std::endl; - return 0; - } - - std::cout << "Required adjusting of " << part_paths_with_max_block_numbers.size() << " block numbers." << std::endl; - - /// Adjust the block numbers. - if (options.count("dry-run")) - { - std::cout << "This is a dry-run, exiting." << std::endl; - return 0; - } - - std::cout << std::endl << "Adjusting the block numbers:" << std::endl; - for (const auto & [part_path, max_block_number] : part_paths_with_max_block_numbers) - setCurrentBlockNumber(zookeeper, part_path, max_block_number + 1); - - return 0; -} -catch (...) -{ - std::cerr << DB::getCurrentExceptionMessage(true) << '\n'; - throw; -} diff --git a/utils/zookeeper-create-entry-to-download-part/CMakeLists.txt b/utils/zookeeper-create-entry-to-download-part/CMakeLists.txt deleted file mode 100644 index 4c7a9ba9560..00000000000 --- a/utils/zookeeper-create-entry-to-download-part/CMakeLists.txt +++ /dev/null @@ -1,2 +0,0 @@ -clickhouse_add_executable (zookeeper-create-entry-to-download-part main.cpp ${SRCS}) -target_link_libraries (zookeeper-create-entry-to-download-part PRIVATE dbms clickhouse_common_zookeeper boost::program_options) diff --git a/utils/zookeeper-create-entry-to-download-part/main.cpp b/utils/zookeeper-create-entry-to-download-part/main.cpp deleted file mode 100644 index b92857929b7..00000000000 --- a/utils/zookeeper-create-entry-to-download-part/main.cpp +++ /dev/null @@ -1,47 +0,0 @@ -#include -#include -#include -#include - - -int main(int argc, char ** argv) -try -{ - boost::program_options::options_description desc("Allowed options"); - desc.add_options() - ("help,h", "produce help message") - ("address,a", boost::program_options::value()->required(), - "addresses of ZooKeeper instances, comma separated. Example: example01e.clickhouse.com:2181") - ("path,p", boost::program_options::value()->required(), "path of replica queue to insert node (without trailing slash)") - ("name,n", boost::program_options::value()->required(), "name of part to download") - ; - - boost::program_options::variables_map options; - boost::program_options::store(boost::program_options::parse_command_line(argc, argv, desc), options); - - if (options.count("help")) - { - std::cout << "Insert log entry to replication queue to download part from any replica." << std::endl; - std::cout << "Usage: " << argv[0] << " [options]" << std::endl; - std::cout << desc << std::endl; - return 1; - } - - std::string path = options.at("path").as(); - std::string name = options.at("name").as(); - - zkutil::ZooKeeper zookeeper(options.at("address").as()); - - DB::ReplicatedMergeTreeLogEntry entry; - entry.type = DB::ReplicatedMergeTreeLogEntry::MERGE_PARTS; - entry.source_parts = {name}; - entry.new_part_name = name; - - zookeeper.create(path + "/queue-", entry.toString(), zkutil::CreateMode::PersistentSequential); - return 0; -} -catch (...) -{ - std::cerr << DB::getCurrentExceptionMessage(true) << '\n'; - throw; -}