diff --git a/.github/workflows/backport_branches.yml b/.github/workflows/backport_branches.yml index c90df6e57b7..cbd3bd7bec4 100644 --- a/.github/workflows/backport_branches.yml +++ b/.github/workflows/backport_branches.yml @@ -12,11 +12,10 @@ jobs: PythonUnitTests: runs-on: [self-hosted, style-checker] steps: - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Python unit tests run: | cd "$GITHUB_WORKSPACE/tests/ci" @@ -24,34 +23,32 @@ jobs: DockerHubPushAarch64: runs-on: [self-hosted, style-checker-aarch64] steps: - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Images check run: | cd "$GITHUB_WORKSPACE/tests/ci" python3 docker_images_check.py --suffix aarch64 - name: Upload images files to artifacts - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: changed_images_aarch64 path: ${{ runner.temp }}/docker_images_check/changed_images_aarch64.json DockerHubPushAmd64: runs-on: [self-hosted, style-checker] steps: - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Images check run: | cd "$GITHUB_WORKSPACE/tests/ci" python3 docker_images_check.py --suffix amd64 - name: Upload images files to artifacts - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: changed_images_amd64 path: ${{ runner.temp }}/docker_images_check/changed_images_amd64.json @@ -59,18 +56,17 @@ jobs: needs: [DockerHubPushAmd64, DockerHubPushAarch64] runs-on: [self-hosted, style-checker] steps: - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Download changed aarch64 images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images_aarch64 path: ${{ runner.temp }} - name: Download changed amd64 images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images_amd64 path: ${{ runner.temp }} @@ -79,7 +75,7 @@ jobs: cd "$GITHUB_WORKSPACE/tests/ci" python3 docker_manifests_merge.py --suffix amd64 --suffix aarch64 - name: Upload images files to artifacts - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: changed_images path: ${{ runner.temp }}/changed_images.json @@ -94,13 +90,12 @@ jobs: REPO_COPY=${{runner.temp}}/compatibility_check/ClickHouse REPORTS_PATH=${{runner.temp}}/reports_dir EOF - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - name: CompatibilityCheck @@ -132,28 +127,25 @@ jobs: BUILD_NAME=package_release EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 with: + clear-repository: true + submodules: true fetch-depth: 0 # For a proper version and performance artifacts - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -177,28 +169,25 @@ jobs: BUILD_NAME=package_aarch64 EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 with: + clear-repository: true + submodules: true fetch-depth: 0 # For a proper version and performance artifacts - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -222,26 +211,24 @@ jobs: BUILD_NAME=package_asan EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + submodules: true - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -265,26 +252,24 @@ jobs: BUILD_NAME=package_tsan EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + submodules: true - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -308,26 +293,24 @@ jobs: BUILD_NAME=package_debug EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + submodules: true - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -351,28 +334,25 @@ jobs: BUILD_NAME=binary_darwin EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 with: + clear-repository: true + submodules: true fetch-depth: 0 # otherwise we will have no info about contributors - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -396,28 +376,25 @@ jobs: BUILD_NAME=binary_darwin_aarch64 EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 with: + clear-repository: true + submodules: true fetch-depth: 0 # otherwise we will have no info about contributors - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -436,12 +413,10 @@ jobs: - BuilderDebAarch64 runs-on: [self-hosted, style-checker] steps: - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 with: + clear-repository: true fetch-depth: 0 # It MUST BE THE SAME for all dependencies and the job itself - name: Check docker clickhouse/clickhouse-server building run: | @@ -477,14 +452,13 @@ jobs: NEEDS_DATA_PATH=${{runner.temp}}/needs.json EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Report Builder run: | sudo rm -fr "$TEMP_PATH" @@ -516,14 +490,13 @@ jobs: NEEDS_DATA_PATH=${{runner.temp}}/needs.json EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Report Builder run: | sudo rm -fr "$TEMP_PATH" @@ -556,14 +529,13 @@ jobs: KILL_TIMEOUT=10800 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -594,14 +566,13 @@ jobs: KILL_TIMEOUT=3600 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -635,14 +606,13 @@ jobs: REPO_COPY=${{runner.temp}}/stress_thread/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Stress test run: | sudo rm -fr "$TEMP_PATH" @@ -672,14 +642,13 @@ jobs: REPO_COPY=${{runner.temp}}/integration_tests_release/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Integration test run: | sudo rm -fr "$TEMP_PATH" @@ -706,11 +675,10 @@ jobs: - CompatibilityCheck runs-on: [self-hosted, style-checker] steps: - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Finish label run: | cd "$GITHUB_WORKSPACE/tests/ci" diff --git a/.github/workflows/cherry_pick.yml b/.github/workflows/cherry_pick.yml index 3e6f9e76c56..065e584182b 100644 --- a/.github/workflows/cherry_pick.yml +++ b/.github/workflows/cherry_pick.yml @@ -28,8 +28,9 @@ jobs: REPO_TEAM=core EOF - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 with: + clear-repository: true token: ${{secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN}} fetch-depth: 0 - name: Cherry pick diff --git a/.github/workflows/docs_check.yml b/.github/workflows/docs_check.yml index 5c912ebd359..a513eb9216d 100644 --- a/.github/workflows/docs_check.yml +++ b/.github/workflows/docs_check.yml @@ -21,11 +21,10 @@ jobs: CheckLabels: runs-on: [self-hosted, style-checker] steps: - - name: Clear repository - run: | - sudo rm -rf "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Labels check run: | cd "$GITHUB_WORKSPACE/tests/ci" @@ -34,17 +33,16 @@ jobs: needs: CheckLabels runs-on: [self-hosted, style-checker-aarch64] steps: - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Images check run: | cd "$GITHUB_WORKSPACE/tests/ci" python3 docker_images_check.py --suffix aarch64 - name: Upload images files to artifacts - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: changed_images_aarch64 path: ${{ runner.temp }}/docker_images_check/changed_images_aarch64.json @@ -52,17 +50,16 @@ jobs: needs: CheckLabels runs-on: [self-hosted, style-checker] steps: - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Images check run: | cd "$GITHUB_WORKSPACE/tests/ci" python3 docker_images_check.py --suffix amd64 - name: Upload images files to artifacts - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: changed_images_amd64 path: ${{ runner.temp }}/docker_images_check/changed_images_amd64.json @@ -70,18 +67,17 @@ jobs: needs: [DockerHubPushAmd64, DockerHubPushAarch64] runs-on: [self-hosted, style-checker] steps: - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Download changed aarch64 images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images_aarch64 path: ${{ runner.temp }} - name: Download changed amd64 images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images_amd64 path: ${{ runner.temp }} @@ -90,7 +86,7 @@ jobs: cd "$GITHUB_WORKSPACE/tests/ci" python3 docker_manifests_merge.py --suffix amd64 --suffix aarch64 - name: Upload images files to artifacts - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: changed_images path: ${{ runner.temp }}/changed_images.json @@ -110,15 +106,14 @@ jobs: - name: Download changed images # even if artifact does not exist, e.g. on `do not test` label or failed Docker job continue-on-error: true - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.TEMP_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Style Check run: | cd "$GITHUB_WORKSPACE/tests/ci" @@ -140,15 +135,14 @@ jobs: REPO_COPY=${{runner.temp}}/docs_check/ClickHouse EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.TEMP_PATH }} - - name: Clear repository - run: | - sudo rm -rf "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Docs Check run: | cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" @@ -167,11 +161,10 @@ jobs: - DocsCheck runs-on: [self-hosted, style-checker] steps: - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Finish label run: | cd "$GITHUB_WORKSPACE/tests/ci" diff --git a/.github/workflows/docs_release.yml b/.github/workflows/docs_release.yml index 1b43138852b..fc4b9d88c3e 100644 --- a/.github/workflows/docs_release.yml +++ b/.github/workflows/docs_release.yml @@ -23,34 +23,32 @@ jobs: DockerHubPushAarch64: runs-on: [self-hosted, style-checker-aarch64] steps: - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Images check run: | cd "$GITHUB_WORKSPACE/tests/ci" python3 docker_images_check.py --suffix aarch64 - name: Upload images files to artifacts - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: changed_images_aarch64 path: ${{ runner.temp }}/docker_images_check/changed_images_aarch64.json DockerHubPushAmd64: runs-on: [self-hosted, style-checker] steps: - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Images check run: | cd "$GITHUB_WORKSPACE/tests/ci" python3 docker_images_check.py --suffix amd64 - name: Upload images files to artifacts - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: changed_images_amd64 path: ${{ runner.temp }}/docker_images_check/changed_images_amd64.json @@ -58,18 +56,17 @@ jobs: needs: [DockerHubPushAmd64, DockerHubPushAarch64] runs-on: [self-hosted, style-checker] steps: - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Download changed aarch64 images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images_aarch64 path: ${{ runner.temp }} - name: Download changed amd64 images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images_amd64 path: ${{ runner.temp }} @@ -78,7 +75,7 @@ jobs: cd "$GITHUB_WORKSPACE/tests/ci" python3 docker_manifests_merge.py --suffix amd64 --suffix aarch64 - name: Upload images files to artifacts - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: changed_images path: ${{ runner.temp }}/changed_images.json @@ -97,13 +94,12 @@ jobs: ${{secrets.ROBOT_CLICKHOUSE_SSH_KEY}} RCSK EOF - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.TEMP_PATH }} diff --git a/.github/workflows/jepsen.yml b/.github/workflows/jepsen.yml index 5afc066065e..e67df15c4d3 100644 --- a/.github/workflows/jepsen.yml +++ b/.github/workflows/jepsen.yml @@ -19,12 +19,10 @@ jobs: TEMP_PATH=${{runner.temp}}/keeper_jepsen REPO_COPY=${{runner.temp}}/keeper_jepsen/ClickHouse EOF - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 with: + clear-repository: true fetch-depth: 0 - name: Jepsen Test run: | @@ -50,12 +48,10 @@ jobs: # TEMP_PATH=${{runner.temp}}/server_jepsen # REPO_COPY=${{runner.temp}}/server_jepsen/ClickHouse # EOF - # - name: Clear repository - # run: | - # sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" # - name: Check out repository code - # uses: actions/checkout@v2 + # uses: ClickHouse/checkout@v1 # with: + # clear-repository: true # fetch-depth: 0 # - name: Jepsen Test # run: | diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml index 3eca97441f5..cd4ee482702 100644 --- a/.github/workflows/master.yml +++ b/.github/workflows/master.yml @@ -12,11 +12,10 @@ jobs: PythonUnitTests: runs-on: [self-hosted, style-checker] steps: - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Python unit tests run: | cd "$GITHUB_WORKSPACE/tests/ci" @@ -24,34 +23,32 @@ jobs: DockerHubPushAarch64: runs-on: [self-hosted, style-checker-aarch64] steps: - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Images check run: | cd "$GITHUB_WORKSPACE/tests/ci" python3 docker_images_check.py --suffix aarch64 - name: Upload images files to artifacts - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: changed_images_aarch64 path: ${{ runner.temp }}/docker_images_check/changed_images_aarch64.json DockerHubPushAmd64: runs-on: [self-hosted, style-checker] steps: - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Images check run: | cd "$GITHUB_WORKSPACE/tests/ci" python3 docker_images_check.py --suffix amd64 - name: Upload images files to artifacts - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: changed_images_amd64 path: ${{ runner.temp }}/docker_images_check/changed_images_amd64.json @@ -59,18 +56,17 @@ jobs: needs: [DockerHubPushAmd64, DockerHubPushAarch64, PythonUnitTests] runs-on: [self-hosted, style-checker] steps: - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Download changed aarch64 images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images_aarch64 path: ${{ runner.temp }} - name: Download changed amd64 images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images_amd64 path: ${{ runner.temp }} @@ -79,7 +75,7 @@ jobs: cd "$GITHUB_WORKSPACE/tests/ci" python3 docker_manifests_merge.py --suffix amd64 --suffix aarch64 - name: Upload images files to artifacts - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: changed_images path: ${{ runner.temp }}/changed_images.json @@ -96,15 +92,14 @@ jobs: - name: Download changed images # even if artifact does not exist, e.g. on `do not test` label or failed Docker job continue-on-error: true - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.TEMP_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Style Check run: | cd "$GITHUB_WORKSPACE/tests/ci" @@ -126,13 +121,12 @@ jobs: REPO_COPY=${{runner.temp}}/compatibility_check/ClickHouse REPORTS_PATH=${{runner.temp}}/reports_dir EOF - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - name: CompatibilityCheck @@ -147,38 +141,6 @@ jobs: docker ps --quiet | xargs --no-run-if-empty docker kill ||: docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: sudo rm -fr "$TEMP_PATH" - SharedBuildSmokeTest: - needs: [BuilderDebShared] - runs-on: [self-hosted, style-checker] - steps: - - name: Set envs - run: | - cat >> "$GITHUB_ENV" << 'EOF' - TEMP_PATH=${{runner.temp}}/split_build_check - REPO_COPY=${{runner.temp}}/split_build_check/ClickHouse - REPORTS_PATH=${{runner.temp}}/reports_dir - EOF - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - - name: Check out repository code - uses: actions/checkout@v2 - - name: Download json reports - uses: actions/download-artifact@v2 - with: - path: ${{ env.REPORTS_PATH }} - - name: Shared build check - run: | - sudo rm -fr "$TEMP_PATH" - mkdir -p "$TEMP_PATH" - cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" - cd "$REPO_COPY/tests/ci" && python3 split_build_smoke_check.py - - name: Cleanup - if: always() - run: | - docker ps --quiet | xargs --no-run-if-empty docker kill ||: - docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: - sudo rm -fr "$TEMP_PATH" ######################################################################################### #################################### ORDINARY BUILDS #################################### ######################################################################################### @@ -196,28 +158,25 @@ jobs: BUILD_NAME=package_release EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 with: + clear-repository: true + submodules: true fetch-depth: 0 # For a proper version and performance artifacts - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -241,24 +200,24 @@ jobs: BUILD_NAME=package_aarch64 EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ runner.temp }}/images_path - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 with: + clear-repository: true + submodules: true fetch-depth: 0 # For a proper version and performance artifacts - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ runner.temp }}/build_check/${{ env.BUILD_URLS }}.json @@ -282,28 +241,25 @@ jobs: BUILD_NAME=binary_release EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 with: + clear-repository: true + submodules: true fetch-depth: 0 # otherwise we will have no info about contributors - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -327,26 +283,24 @@ jobs: BUILD_NAME=package_asan EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + submodules: true - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -370,26 +324,24 @@ jobs: BUILD_NAME=package_ubsan EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + submodules: true - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -413,26 +365,24 @@ jobs: BUILD_NAME=package_tsan EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + submodules: true - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -456,26 +406,24 @@ jobs: BUILD_NAME=package_msan EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + submodules: true - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -499,26 +447,24 @@ jobs: BUILD_NAME=package_debug EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + submodules: true - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -531,49 +477,6 @@ jobs: ########################################################################################## ##################################### SPECIAL BUILDS ##################################### ########################################################################################## - BuilderDebShared: - needs: [DockerHubPush] - runs-on: [self-hosted, builder] - steps: - - name: Set envs - run: | - cat >> "$GITHUB_ENV" << 'EOF' - TEMP_PATH=${{runner.temp}}/build_check - IMAGES_PATH=${{runner.temp}}/images_path - REPO_COPY=${{runner.temp}}/build_check/ClickHouse - CACHES_PATH=${{runner.temp}}/../ccaches - BUILD_NAME=binary_shared - EOF - - name: Download changed images - uses: actions/download-artifact@v2 - with: - name: changed_images - path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - - name: Check out repository code - uses: actions/checkout@v2 - - name: Build - run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 - sudo rm -fr "$TEMP_PATH" - mkdir -p "$TEMP_PATH" - cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" - cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - - name: Upload build URLs to artifacts - if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 - with: - name: ${{ env.BUILD_URLS }} - path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json - - name: Cleanup - if: always() - run: | - docker ps --quiet | xargs --no-run-if-empty docker kill ||: - docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: - sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" BuilderBinClangTidy: needs: [DockerHubPush] runs-on: [self-hosted, builder] @@ -588,26 +491,24 @@ jobs: BUILD_NAME=binary_tidy EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + submodules: true - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -631,28 +532,25 @@ jobs: BUILD_NAME=binary_darwin EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 with: + clear-repository: true + submodules: true fetch-depth: 0 # otherwise we will have no info about contributors - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -676,28 +574,25 @@ jobs: BUILD_NAME=binary_aarch64 EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 with: + clear-repository: true + submodules: true fetch-depth: 0 # otherwise we will have no info about contributors - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -721,28 +616,25 @@ jobs: BUILD_NAME=binary_freebsd EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 with: + clear-repository: true + submodules: true fetch-depth: 0 # otherwise we will have no info about contributors - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -766,28 +658,25 @@ jobs: BUILD_NAME=binary_darwin_aarch64 EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 with: + clear-repository: true + submodules: true fetch-depth: 0 # otherwise we will have no info about contributors - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -811,28 +700,25 @@ jobs: BUILD_NAME=binary_ppc64le EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 with: + clear-repository: true + submodules: true fetch-depth: 0 # otherwise we will have no info about contributors - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -856,28 +742,25 @@ jobs: BUILD_NAME=binary_amd64_compat EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 with: + clear-repository: true + submodules: true fetch-depth: 0 # otherwise we will have no info about contributors - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -901,28 +784,25 @@ jobs: BUILD_NAME=binary_aarch64_v80compat EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 with: + clear-repository: true + submodules: true fetch-depth: 0 # otherwise we will have no info about contributors - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -941,12 +821,10 @@ jobs: - BuilderDebAarch64 runs-on: [self-hosted, style-checker] steps: - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 with: + clear-repository: true fetch-depth: 0 # It MUST BE THE SAME for all dependencies and the job itself - name: Check docker clickhouse/clickhouse-server building run: | @@ -986,14 +864,13 @@ jobs: NEEDS_DATA_PATH=${{runner.temp}}/needs.json EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Report Builder run: | sudo rm -fr "$TEMP_PATH" @@ -1015,12 +892,10 @@ jobs: - BuilderBinDarwin - BuilderBinDarwinAarch64 - BuilderBinFreeBSD - # - BuilderBinGCC - BuilderBinPPC64 - BuilderBinAmd64Compat - BuilderBinAarch64V80Compat - BuilderBinClangTidy - - BuilderDebShared runs-on: [self-hosted, style-checker] if: ${{ success() || failure() }} steps: @@ -1033,14 +908,13 @@ jobs: NEEDS_DATA_PATH=${{runner.temp}}/needs.json EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Report Builder run: | sudo rm -fr "$TEMP_PATH" @@ -1064,11 +938,10 @@ jobs: - BuilderDebAarch64 runs-on: [self-hosted, style-checker] steps: - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Mark Commit Release Ready run: | cd "$GITHUB_WORKSPACE/tests/ci" @@ -1090,14 +963,13 @@ jobs: KILL_TIMEOUT=10800 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1125,14 +997,13 @@ jobs: KILL_TIMEOUT=10800 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1162,14 +1033,13 @@ jobs: RUN_BY_HASH_TOTAL=2 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1199,14 +1069,13 @@ jobs: RUN_BY_HASH_TOTAL=2 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1234,14 +1103,13 @@ jobs: KILL_TIMEOUT=10800 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1269,14 +1137,13 @@ jobs: KILL_TIMEOUT=10800 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1306,14 +1173,13 @@ jobs: RUN_BY_HASH_TOTAL=2 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1343,14 +1209,13 @@ jobs: RUN_BY_HASH_TOTAL=2 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1380,14 +1245,13 @@ jobs: RUN_BY_HASH_TOTAL=3 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1417,14 +1281,13 @@ jobs: RUN_BY_HASH_TOTAL=3 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1454,14 +1317,13 @@ jobs: RUN_BY_HASH_TOTAL=3 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1489,14 +1351,13 @@ jobs: KILL_TIMEOUT=10800 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1526,14 +1387,13 @@ jobs: RUN_BY_HASH_TOTAL=3 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1563,14 +1423,13 @@ jobs: RUN_BY_HASH_TOTAL=3 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1600,14 +1459,13 @@ jobs: RUN_BY_HASH_TOTAL=3 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1637,14 +1495,13 @@ jobs: RUN_BY_HASH_TOTAL=3 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1674,14 +1531,13 @@ jobs: RUN_BY_HASH_TOTAL=3 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1711,14 +1567,13 @@ jobs: RUN_BY_HASH_TOTAL=3 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1749,14 +1604,13 @@ jobs: KILL_TIMEOUT=3600 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1784,14 +1638,13 @@ jobs: KILL_TIMEOUT=3600 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1819,14 +1672,13 @@ jobs: KILL_TIMEOUT=3600 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1854,14 +1706,13 @@ jobs: KILL_TIMEOUT=3600 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1889,14 +1740,13 @@ jobs: KILL_TIMEOUT=3600 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1924,14 +1774,13 @@ jobs: KILL_TIMEOUT=3600 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1959,14 +1808,13 @@ jobs: KILL_TIMEOUT=3600 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1996,14 +1844,13 @@ jobs: REPO_COPY=${{runner.temp}}/stress_thread/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Stress test run: | sudo rm -fr "$TEMP_PATH" @@ -2034,14 +1881,13 @@ jobs: REPO_COPY=${{runner.temp}}/stress_thread/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Stress test run: | sudo rm -fr "$TEMP_PATH" @@ -2068,14 +1914,13 @@ jobs: REPO_COPY=${{runner.temp}}/stress_memory/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Stress test run: | sudo rm -fr "$TEMP_PATH" @@ -2102,14 +1947,13 @@ jobs: REPO_COPY=${{runner.temp}}/stress_undefined/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Stress test run: | sudo rm -fr "$TEMP_PATH" @@ -2136,14 +1980,13 @@ jobs: REPO_COPY=${{runner.temp}}/stress_debug/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Stress test run: | sudo rm -fr "$TEMP_PATH" @@ -2175,14 +2018,13 @@ jobs: RUN_BY_HASH_TOTAL=3 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Integration test run: | sudo rm -fr "$TEMP_PATH" @@ -2211,14 +2053,13 @@ jobs: RUN_BY_HASH_TOTAL=3 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Integration test run: | sudo rm -fr "$TEMP_PATH" @@ -2247,14 +2088,13 @@ jobs: RUN_BY_HASH_TOTAL=3 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Integration test run: | sudo rm -fr "$TEMP_PATH" @@ -2283,14 +2123,13 @@ jobs: RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Integration test run: | sudo rm -fr "$TEMP_PATH" @@ -2319,14 +2158,13 @@ jobs: RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Integration test run: | sudo rm -fr "$TEMP_PATH" @@ -2355,14 +2193,13 @@ jobs: RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Integration test run: | sudo rm -fr "$TEMP_PATH" @@ -2391,14 +2228,13 @@ jobs: RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Integration test run: | sudo rm -fr "$TEMP_PATH" @@ -2427,14 +2263,13 @@ jobs: RUN_BY_HASH_TOTAL=2 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Integration test run: | sudo rm -fr "$TEMP_PATH" @@ -2463,14 +2298,13 @@ jobs: RUN_BY_HASH_TOTAL=2 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Integration test run: | sudo rm -fr "$TEMP_PATH" @@ -2500,14 +2334,13 @@ jobs: REPO_COPY=${{runner.temp}}/ast_fuzzer_asan/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Fuzzer run: | sudo rm -fr "$TEMP_PATH" @@ -2534,14 +2367,13 @@ jobs: REPO_COPY=${{runner.temp}}/ast_fuzzer_tsan/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Fuzzer run: | sudo rm -fr "$TEMP_PATH" @@ -2568,14 +2400,13 @@ jobs: REPO_COPY=${{runner.temp}}/ast_fuzzer_ubsan/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Fuzzer run: | sudo rm -fr "$TEMP_PATH" @@ -2602,14 +2433,13 @@ jobs: REPO_COPY=${{runner.temp}}/ast_fuzzer_msan/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Fuzzer run: | sudo rm -fr "$TEMP_PATH" @@ -2636,14 +2466,13 @@ jobs: REPO_COPY=${{runner.temp}}/ast_fuzzer_debug/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Fuzzer run: | sudo rm -fr "$TEMP_PATH" @@ -2673,14 +2502,13 @@ jobs: REPO_COPY=${{runner.temp}}/unit_tests_asan/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Unit test run: | sudo rm -fr "$TEMP_PATH" @@ -2707,14 +2535,13 @@ jobs: REPO_COPY=${{runner.temp}}/unit_tests_asan/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Unit test run: | sudo rm -fr "$TEMP_PATH" @@ -2728,40 +2555,6 @@ jobs: docker ps --quiet | xargs --no-run-if-empty docker kill ||: docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: sudo rm -fr "$TEMP_PATH" - # UnitTestsReleaseGCC: - # needs: [BuilderBinGCC] - # runs-on: [self-hosted, fuzzer-unit-tester] - # steps: - # - name: Set envs - # run: | - # cat >> "$GITHUB_ENV" << 'EOF' - # TEMP_PATH=${{runner.temp}}/unit_tests_asan - # REPORTS_PATH=${{runner.temp}}/reports_dir - # CHECK_NAME=Unit tests (release-gcc) - # REPO_COPY=${{runner.temp}}/unit_tests_asan/ClickHouse - # EOF - # - name: Download json reports - # uses: actions/download-artifact@v2 - # with: - # path: ${{ env.REPORTS_PATH }} - # - name: Clear repository - # run: | - # sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - # - name: Check out repository code - # uses: actions/checkout@v2 - # - name: Unit test - # run: | - # sudo rm -fr "$TEMP_PATH" - # mkdir -p "$TEMP_PATH" - # cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" - # cd "$REPO_COPY/tests/ci" - # python3 unit_tests_check.py "$CHECK_NAME" - # - name: Cleanup - # if: always() - # run: | - # docker ps --quiet | xargs --no-run-if-empty docker kill ||: - # docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: - # sudo rm -fr "$TEMP_PATH" UnitTestsTsan: needs: [BuilderDebTsan] runs-on: [self-hosted, fuzzer-unit-tester] @@ -2775,14 +2568,13 @@ jobs: REPO_COPY=${{runner.temp}}/unit_tests_tsan/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Unit test run: | sudo rm -fr "$TEMP_PATH" @@ -2809,14 +2601,13 @@ jobs: REPO_COPY=${{runner.temp}}/unit_tests_msan/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Unit test run: | sudo rm -fr "$TEMP_PATH" @@ -2843,14 +2634,13 @@ jobs: REPO_COPY=${{runner.temp}}/unit_tests_ubsan/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Unit test run: | sudo rm -fr "$TEMP_PATH" @@ -2882,14 +2672,13 @@ jobs: RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Performance Comparison run: | sudo rm -fr "$TEMP_PATH" @@ -2918,14 +2707,13 @@ jobs: RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Performance Comparison run: | sudo rm -fr "$TEMP_PATH" @@ -2954,14 +2742,13 @@ jobs: RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Performance Comparison run: | sudo rm -fr "$TEMP_PATH" @@ -2990,14 +2777,13 @@ jobs: RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Performance Comparison run: | sudo rm -fr "$TEMP_PATH" @@ -3026,14 +2812,13 @@ jobs: RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Performance Comparison run: | sudo rm -fr "$TEMP_PATH" @@ -3062,14 +2847,13 @@ jobs: RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Performance Comparison run: | sudo rm -fr "$TEMP_PATH" @@ -3098,14 +2882,13 @@ jobs: RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Performance Comparison run: | sudo rm -fr "$TEMP_PATH" @@ -3134,14 +2917,13 @@ jobs: RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Performance Comparison run: | sudo rm -fr "$TEMP_PATH" @@ -3171,14 +2953,13 @@ jobs: REPO_COPY=${{runner.temp}}/sqlancer_release/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: SQLancer run: | sudo rm -fr "$TEMP_PATH" @@ -3205,14 +2986,13 @@ jobs: REPO_COPY=${{runner.temp}}/sqlancer_debug/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: SQLancer run: | sudo rm -fr "$TEMP_PATH" @@ -3286,16 +3066,14 @@ jobs: - UnitTestsMsan - UnitTestsUBsan - UnitTestsReleaseClang - - SharedBuildSmokeTest - SQLancerTestRelease - SQLancerTestDebug runs-on: [self-hosted, style-checker] steps: - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Finish label run: | cd "$GITHUB_WORKSPACE/tests/ci" diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml index 9ebbe4e090d..415d1b8fdc4 100644 --- a/.github/workflows/nightly.yml +++ b/.github/workflows/nightly.yml @@ -16,34 +16,32 @@ jobs: DockerHubPushAarch64: runs-on: [self-hosted, style-checker-aarch64] steps: - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Images check run: | cd "$GITHUB_WORKSPACE/tests/ci" python3 docker_images_check.py --suffix aarch64 --all - name: Upload images files to artifacts - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: changed_images_aarch64 path: ${{ runner.temp }}/docker_images_check/changed_images_aarch64.json DockerHubPushAmd64: runs-on: [self-hosted, style-checker] steps: - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Images check run: | cd "$GITHUB_WORKSPACE/tests/ci" python3 docker_images_check.py --suffix amd64 --all - name: Upload images files to artifacts - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: changed_images_amd64 path: ${{ runner.temp }}/docker_images_check/changed_images_amd64.json @@ -51,18 +49,17 @@ jobs: needs: [DockerHubPushAmd64, DockerHubPushAarch64] runs-on: [self-hosted, style-checker] steps: - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Download changed aarch64 images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images_aarch64 path: ${{ runner.temp }} - name: Download changed amd64 images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images_amd64 path: ${{ runner.temp }} @@ -71,7 +68,7 @@ jobs: cd "$GITHUB_WORKSPACE/tests/ci" python3 docker_manifests_merge.py --suffix amd64 --suffix aarch64 - name: Upload images files to artifacts - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: changed_images path: ${{ runner.temp }}/changed_images.json @@ -90,22 +87,17 @@ jobs: EOF echo "COVERITY_TOKEN=${{ secrets.COVERITY_TOKEN }}" >> "$GITHUB_ENV" - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - id: coverity-checkout - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 with: - fetch-depth: 0 # otherwise we will have no info about contributors + clear-repository: true + submodules: true - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" @@ -134,8 +126,10 @@ jobs: CC: clang-15 CXX: clang++-15 steps: - - uses: actions/checkout@v2 + - name: Check out repository code + uses: ClickHouse/checkout@v1 with: + clear-repository: true fetch-depth: 0 # Shallow clones should be disabled for a better relevancy of analysis submodules: true - name: Set up JDK 11 diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index a81f52a9371..aecf3799a5d 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -26,11 +26,10 @@ jobs: # Run the first check always, even if the CI is cancelled if: ${{ always() }} steps: - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Labels check run: | cd "$GITHUB_WORKSPACE/tests/ci" @@ -39,11 +38,10 @@ jobs: needs: CheckLabels runs-on: [self-hosted, style-checker] steps: - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Python unit tests run: | cd "$GITHUB_WORKSPACE/tests/ci" @@ -52,17 +50,16 @@ jobs: needs: CheckLabels runs-on: [self-hosted, style-checker-aarch64] steps: - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Images check run: | cd "$GITHUB_WORKSPACE/tests/ci" python3 docker_images_check.py --suffix aarch64 - name: Upload images files to artifacts - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: changed_images_aarch64 path: ${{ runner.temp }}/docker_images_check/changed_images_aarch64.json @@ -70,17 +67,16 @@ jobs: needs: CheckLabels runs-on: [self-hosted, style-checker] steps: - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Images check run: | cd "$GITHUB_WORKSPACE/tests/ci" python3 docker_images_check.py --suffix amd64 - name: Upload images files to artifacts - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: changed_images_amd64 path: ${{ runner.temp }}/docker_images_check/changed_images_amd64.json @@ -88,18 +84,17 @@ jobs: needs: [DockerHubPushAmd64, DockerHubPushAarch64, PythonUnitTests] runs-on: [self-hosted, style-checker] steps: - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Download changed aarch64 images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images_aarch64 path: ${{ runner.temp }} - name: Download changed amd64 images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images_amd64 path: ${{ runner.temp }} @@ -108,7 +103,7 @@ jobs: cd "$GITHUB_WORKSPACE/tests/ci" python3 docker_manifests_merge.py --suffix amd64 --suffix aarch64 - name: Upload images files to artifacts - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: changed_images path: ${{ runner.temp }}/changed_images.json @@ -129,15 +124,14 @@ jobs: - name: Download changed images # even if artifact does not exist, e.g. on `do not test` label or failed Docker job continue-on-error: true - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.TEMP_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Style Check run: | cd "$GITHUB_WORKSPACE/tests/ci" @@ -159,16 +153,12 @@ jobs: REPO_COPY=${{runner.temp}}/fasttest/ClickHouse CACHES_PATH=${{runner.temp}}/../ccaches EOF - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" - mkdir "$GITHUB_WORKSPACE" - sudo rm -fr "$TEMP_PATH" - mkdir -p "$TEMP_PATH" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.TEMP_PATH }} @@ -193,13 +183,12 @@ jobs: REPO_COPY=${{runner.temp}}/compatibility_check/ClickHouse REPORTS_PATH=${{runner.temp}}/reports_dir EOF - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - name: CompatibilityCheck @@ -214,38 +203,6 @@ jobs: docker ps --quiet | xargs --no-run-if-empty docker kill ||: docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: sudo rm -fr "$TEMP_PATH" - SharedBuildSmokeTest: - needs: [BuilderDebShared] - runs-on: [self-hosted, style-checker] - steps: - - name: Set envs - run: | - cat >> "$GITHUB_ENV" << 'EOF' - TEMP_PATH=${{runner.temp}}/split_build_check - REPO_COPY=${{runner.temp}}/split_build_check/ClickHouse - REPORTS_PATH=${{runner.temp}}/reports_dir - EOF - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - - name: Check out repository code - uses: actions/checkout@v2 - - name: Download json reports - uses: actions/download-artifact@v2 - with: - path: ${{ env.REPORTS_PATH }} - - name: Shared build check - run: | - sudo rm -fr "$TEMP_PATH" - mkdir -p "$TEMP_PATH" - cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" - cd "$REPO_COPY/tests/ci" && python3 split_build_smoke_check.py - - name: Cleanup - if: always() - run: | - docker ps --quiet | xargs --no-run-if-empty docker kill ||: - docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: - sudo rm -fr "$TEMP_PATH" ######################################################################################### #################################### ORDINARY BUILDS #################################### ######################################################################################### @@ -263,28 +220,25 @@ jobs: BUILD_NAME=package_release EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 with: + clear-repository: true fetch-depth: 0 # for performance artifact + submodules: true - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -308,26 +262,24 @@ jobs: BUILD_NAME=binary_release EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + submodules: true - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -351,28 +303,25 @@ jobs: BUILD_NAME=package_aarch64 EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ runner.temp }}/images_path - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 with: + clear-repository: true + submodules: true fetch-depth: 0 # for performance artifact - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -396,26 +345,24 @@ jobs: BUILD_NAME=package_asan EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + submodules: true - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -439,26 +386,24 @@ jobs: BUILD_NAME=package_ubsan EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + submodules: true - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -482,26 +427,24 @@ jobs: BUILD_NAME=package_tsan EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + submodules: true - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -525,26 +468,24 @@ jobs: BUILD_NAME=package_msan EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + submodules: true - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -568,26 +509,24 @@ jobs: BUILD_NAME=package_debug EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + submodules: true - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -600,49 +539,6 @@ jobs: ########################################################################################## ##################################### SPECIAL BUILDS ##################################### ########################################################################################## - BuilderDebShared: - needs: [DockerHubPush, FastTest, StyleCheck] - runs-on: [self-hosted, builder] - steps: - - name: Set envs - run: | - cat >> "$GITHUB_ENV" << 'EOF' - TEMP_PATH=${{runner.temp}}/build_check - IMAGES_PATH=${{runner.temp}}/images_path - REPO_COPY=${{runner.temp}}/build_check/ClickHouse - CACHES_PATH=${{runner.temp}}/../ccaches - BUILD_NAME=binary_shared - EOF - - name: Download changed images - uses: actions/download-artifact@v2 - with: - name: changed_images - path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - - name: Check out repository code - uses: actions/checkout@v2 - - name: Build - run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 - sudo rm -fr "$TEMP_PATH" - mkdir -p "$TEMP_PATH" - cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" - cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - - name: Upload build URLs to artifacts - if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 - with: - name: ${{ env.BUILD_URLS }} - path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json - - name: Cleanup - if: always() - run: | - docker ps --quiet | xargs --no-run-if-empty docker kill ||: - docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: - sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" BuilderBinClangTidy: needs: [DockerHubPush, FastTest, StyleCheck] runs-on: [self-hosted, builder] @@ -657,26 +553,24 @@ jobs: BUILD_NAME=binary_tidy EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + submodules: true - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -700,26 +594,24 @@ jobs: BUILD_NAME=binary_darwin EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + submodules: true - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -743,26 +635,24 @@ jobs: BUILD_NAME=binary_aarch64 EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + submodules: true - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -786,26 +676,24 @@ jobs: BUILD_NAME=binary_freebsd EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + submodules: true - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -829,26 +717,24 @@ jobs: BUILD_NAME=binary_darwin_aarch64 EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + submodules: true - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -872,26 +758,24 @@ jobs: BUILD_NAME=binary_ppc64le EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + submodules: true - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -915,26 +799,24 @@ jobs: BUILD_NAME=binary_amd64_compat EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + submodules: true - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -958,26 +840,24 @@ jobs: BUILD_NAME=binary_aarch64_v80compat EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + submodules: true - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -996,12 +876,10 @@ jobs: - BuilderDebAarch64 runs-on: [self-hosted, style-checker] steps: - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 with: + clear-repository: true fetch-depth: 0 # It MUST BE THE SAME for all dependencies and the job itself - name: Check docker clickhouse/clickhouse-server building run: | @@ -1040,14 +918,13 @@ jobs: NEEDS_DATA_PATH=${{runner.temp}}/needs.json EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Report Builder run: | sudo rm -fr "$TEMP_PATH" @@ -1069,12 +946,10 @@ jobs: - BuilderBinDarwin - BuilderBinDarwinAarch64 - BuilderBinFreeBSD - # - BuilderBinGCC - BuilderBinPPC64 - BuilderBinAmd64Compat - BuilderBinAarch64V80Compat - BuilderBinClangTidy - - BuilderDebShared runs-on: [self-hosted, style-checker] if: ${{ success() || failure() }} steps: @@ -1087,14 +962,13 @@ jobs: NEEDS_DATA_PATH=${{runner.temp}}/needs.json EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Report Builder run: | sudo rm -fr "$TEMP_PATH" @@ -1127,14 +1001,13 @@ jobs: KILL_TIMEOUT=10800 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1164,14 +1037,13 @@ jobs: RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1201,14 +1073,13 @@ jobs: RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1238,14 +1109,13 @@ jobs: RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1275,14 +1145,13 @@ jobs: RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1310,14 +1179,13 @@ jobs: KILL_TIMEOUT=10800 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1347,14 +1215,13 @@ jobs: RUN_BY_HASH_TOTAL=2 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1384,14 +1251,13 @@ jobs: RUN_BY_HASH_TOTAL=2 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1421,14 +1287,13 @@ jobs: RUN_BY_HASH_TOTAL=6 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1458,14 +1323,13 @@ jobs: RUN_BY_HASH_TOTAL=6 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1495,14 +1359,13 @@ jobs: RUN_BY_HASH_TOTAL=6 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1532,14 +1395,13 @@ jobs: RUN_BY_HASH_TOTAL=6 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1569,14 +1431,13 @@ jobs: RUN_BY_HASH_TOTAL=6 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1606,14 +1467,13 @@ jobs: RUN_BY_HASH_TOTAL=6 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1643,14 +1503,13 @@ jobs: RUN_BY_HASH_TOTAL=5 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1680,14 +1539,13 @@ jobs: RUN_BY_HASH_TOTAL=5 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1717,14 +1575,13 @@ jobs: RUN_BY_HASH_TOTAL=5 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1754,14 +1611,13 @@ jobs: RUN_BY_HASH_TOTAL=5 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1791,14 +1647,13 @@ jobs: RUN_BY_HASH_TOTAL=5 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1826,14 +1681,13 @@ jobs: KILL_TIMEOUT=10800 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1863,14 +1717,13 @@ jobs: RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1900,14 +1753,13 @@ jobs: RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1937,14 +1789,13 @@ jobs: RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1974,14 +1825,13 @@ jobs: RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -2011,14 +1861,13 @@ jobs: RUN_BY_HASH_TOTAL=5 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -2048,14 +1897,13 @@ jobs: RUN_BY_HASH_TOTAL=5 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -2085,14 +1933,13 @@ jobs: RUN_BY_HASH_TOTAL=5 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -2122,14 +1969,13 @@ jobs: RUN_BY_HASH_TOTAL=5 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -2159,14 +2005,13 @@ jobs: RUN_BY_HASH_TOTAL=5 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -2196,14 +2041,13 @@ jobs: RUN_BY_HASH_TOTAL=2 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -2233,14 +2077,13 @@ jobs: RUN_BY_HASH_TOTAL=2 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -2270,14 +2113,13 @@ jobs: RUN_BY_HASH_TOTAL=6 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -2307,14 +2149,13 @@ jobs: RUN_BY_HASH_TOTAL=6 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -2344,14 +2185,13 @@ jobs: RUN_BY_HASH_TOTAL=6 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -2381,14 +2221,13 @@ jobs: RUN_BY_HASH_TOTAL=6 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -2418,14 +2257,13 @@ jobs: RUN_BY_HASH_TOTAL=6 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -2455,14 +2293,13 @@ jobs: RUN_BY_HASH_TOTAL=6 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -2492,14 +2329,13 @@ jobs: RUN_BY_HASH_TOTAL=5 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -2529,14 +2365,13 @@ jobs: RUN_BY_HASH_TOTAL=5 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -2566,14 +2401,13 @@ jobs: RUN_BY_HASH_TOTAL=5 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -2603,14 +2437,13 @@ jobs: RUN_BY_HASH_TOTAL=5 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -2640,14 +2473,13 @@ jobs: RUN_BY_HASH_TOTAL=5 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -2675,14 +2507,13 @@ jobs: KILL_TIMEOUT=3600 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -2698,7 +2529,7 @@ jobs: sudo rm -fr "$TEMP_PATH" TestsBugfixCheck: needs: [CheckLabels, StyleCheck] - runs-on: [self-hosted, stress-tester] + runs-on: [self-hosted, func-tester] steps: - name: Set envs run: | @@ -2710,14 +2541,13 @@ jobs: REPO_COPY=${{runner.temp}}/tests_bugfix_check/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Bugfix test run: | sudo rm -fr "$TEMP_PATH" @@ -2735,7 +2565,7 @@ jobs: python3 functional_test_check.py "Stateless $CHECK_NAME" "$KILL_TIMEOUT" \ --validate-bugfix --post-commit-status=file || echo 'ignore exit code' - python3 bugfix_validate_check.py "${TEMP_PATH}/stateless/post_commit_status.tsv" "${TEMP_PATH}/integration/post_commit_status.tsv" + python3 bugfix_validate_check.py "${TEMP_PATH}/stateless/functional_commit_status.tsv" "${TEMP_PATH}/integration/integration_commit_status.tsv" - name: Cleanup if: always() run: | @@ -2759,14 +2589,13 @@ jobs: KILL_TIMEOUT=3600 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -2794,14 +2623,13 @@ jobs: KILL_TIMEOUT=3600 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -2829,14 +2657,13 @@ jobs: KILL_TIMEOUT=3600 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -2864,14 +2691,13 @@ jobs: KILL_TIMEOUT=3600 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -2899,14 +2725,13 @@ jobs: KILL_TIMEOUT=3600 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -2934,14 +2759,13 @@ jobs: KILL_TIMEOUT=3600 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -2969,14 +2793,13 @@ jobs: KILL_TIMEOUT=3600 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -3006,14 +2829,13 @@ jobs: REPO_COPY=${{runner.temp}}/stress_thread/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Stress test run: | sudo rm -fr "$TEMP_PATH" @@ -3044,14 +2866,13 @@ jobs: REPO_COPY=${{runner.temp}}/stress_thread/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Stress test run: | sudo rm -fr "$TEMP_PATH" @@ -3078,14 +2899,13 @@ jobs: REPO_COPY=${{runner.temp}}/stress_memory/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Stress test run: | sudo rm -fr "$TEMP_PATH" @@ -3112,14 +2932,13 @@ jobs: REPO_COPY=${{runner.temp}}/stress_undefined/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Stress test run: | sudo rm -fr "$TEMP_PATH" @@ -3146,14 +2965,13 @@ jobs: REPO_COPY=${{runner.temp}}/stress_debug/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Stress test run: | sudo rm -fr "$TEMP_PATH" @@ -3183,14 +3001,13 @@ jobs: REPO_COPY=${{runner.temp}}/ast_fuzzer_asan/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Fuzzer run: | sudo rm -fr "$TEMP_PATH" @@ -3217,14 +3034,13 @@ jobs: REPO_COPY=${{runner.temp}}/ast_fuzzer_tsan/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Fuzzer run: | sudo rm -fr "$TEMP_PATH" @@ -3251,14 +3067,13 @@ jobs: REPO_COPY=${{runner.temp}}/ast_fuzzer_ubsan/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Fuzzer run: | sudo rm -fr "$TEMP_PATH" @@ -3285,14 +3100,13 @@ jobs: REPO_COPY=${{runner.temp}}/ast_fuzzer_msan/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Fuzzer run: | sudo rm -fr "$TEMP_PATH" @@ -3319,14 +3133,13 @@ jobs: REPO_COPY=${{runner.temp}}/ast_fuzzer_debug/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Fuzzer run: | sudo rm -fr "$TEMP_PATH" @@ -3358,14 +3171,13 @@ jobs: RUN_BY_HASH_TOTAL=6 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Integration test run: | sudo rm -fr "$TEMP_PATH" @@ -3394,14 +3206,13 @@ jobs: RUN_BY_HASH_TOTAL=6 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Integration test run: | sudo rm -fr "$TEMP_PATH" @@ -3430,14 +3241,13 @@ jobs: RUN_BY_HASH_TOTAL=6 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Integration test run: | sudo rm -fr "$TEMP_PATH" @@ -3466,14 +3276,13 @@ jobs: RUN_BY_HASH_TOTAL=6 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Integration test run: | sudo rm -fr "$TEMP_PATH" @@ -3502,14 +3311,13 @@ jobs: RUN_BY_HASH_TOTAL=6 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Integration test run: | sudo rm -fr "$TEMP_PATH" @@ -3538,14 +3346,13 @@ jobs: RUN_BY_HASH_TOTAL=6 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Integration test run: | sudo rm -fr "$TEMP_PATH" @@ -3574,14 +3381,13 @@ jobs: RUN_BY_HASH_TOTAL=6 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Integration test run: | sudo rm -fr "$TEMP_PATH" @@ -3610,14 +3416,13 @@ jobs: RUN_BY_HASH_TOTAL=6 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Integration test run: | sudo rm -fr "$TEMP_PATH" @@ -3646,14 +3451,13 @@ jobs: RUN_BY_HASH_TOTAL=6 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Integration test run: | sudo rm -fr "$TEMP_PATH" @@ -3682,14 +3486,13 @@ jobs: RUN_BY_HASH_TOTAL=6 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Integration test run: | sudo rm -fr "$TEMP_PATH" @@ -3718,14 +3521,13 @@ jobs: RUN_BY_HASH_TOTAL=6 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Integration test run: | sudo rm -fr "$TEMP_PATH" @@ -3754,14 +3556,13 @@ jobs: RUN_BY_HASH_TOTAL=6 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Integration test run: | sudo rm -fr "$TEMP_PATH" @@ -3790,14 +3591,13 @@ jobs: RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Integration test run: | sudo rm -fr "$TEMP_PATH" @@ -3826,14 +3626,13 @@ jobs: RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Integration test run: | sudo rm -fr "$TEMP_PATH" @@ -3862,14 +3661,13 @@ jobs: RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Integration test run: | sudo rm -fr "$TEMP_PATH" @@ -3898,14 +3696,13 @@ jobs: RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Integration test run: | sudo rm -fr "$TEMP_PATH" @@ -3932,14 +3729,13 @@ jobs: REPO_COPY=${{runner.temp}}/integration_tests_asan_flaky_check/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Integration test run: | sudo rm -fr "$TEMP_PATH" @@ -3969,14 +3765,13 @@ jobs: REPO_COPY=${{runner.temp}}/unit_tests_asan/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Unit test run: | sudo rm -fr "$TEMP_PATH" @@ -4003,14 +3798,13 @@ jobs: REPO_COPY=${{runner.temp}}/unit_tests_asan/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Unit test run: | sudo rm -fr "$TEMP_PATH" @@ -4037,14 +3831,13 @@ jobs: REPO_COPY=${{runner.temp}}/unit_tests_tsan/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Unit test run: | sudo rm -fr "$TEMP_PATH" @@ -4071,14 +3864,13 @@ jobs: REPO_COPY=${{runner.temp}}/unit_tests_msan/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Unit test run: | sudo rm -fr "$TEMP_PATH" @@ -4105,14 +3897,13 @@ jobs: REPO_COPY=${{runner.temp}}/unit_tests_ubsan/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Unit test run: | sudo rm -fr "$TEMP_PATH" @@ -4144,14 +3935,13 @@ jobs: RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Performance Comparison run: | sudo rm -fr "$TEMP_PATH" @@ -4180,14 +3970,13 @@ jobs: RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Performance Comparison run: | sudo rm -fr "$TEMP_PATH" @@ -4216,14 +4005,13 @@ jobs: RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Performance Comparison run: | sudo rm -fr "$TEMP_PATH" @@ -4252,14 +4040,13 @@ jobs: RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Performance Comparison run: | sudo rm -fr "$TEMP_PATH" @@ -4288,14 +4075,13 @@ jobs: RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Performance Comparison run: | sudo rm -fr "$TEMP_PATH" @@ -4324,14 +4110,13 @@ jobs: RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Performance Comparison run: | sudo rm -fr "$TEMP_PATH" @@ -4360,14 +4145,13 @@ jobs: RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Performance Comparison run: | sudo rm -fr "$TEMP_PATH" @@ -4396,14 +4180,13 @@ jobs: RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Performance Comparison run: | sudo rm -fr "$TEMP_PATH" @@ -4433,14 +4216,13 @@ jobs: REPO_COPY=${{runner.temp}}/sqlancer_release/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: SQLancer run: | sudo rm -fr "$TEMP_PATH" @@ -4467,14 +4249,13 @@ jobs: REPO_COPY=${{runner.temp}}/sqlancer_debug/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: SQLancer run: | sudo rm -fr "$TEMP_PATH" @@ -4593,18 +4374,16 @@ jobs: - UnitTestsMsan - UnitTestsUBsan - UnitTestsReleaseClang - - SharedBuildSmokeTest - CompatibilityCheck - IntegrationTestsFlakyCheck - SQLancerTestRelease - SQLancerTestDebug runs-on: [self-hosted, style-checker] steps: - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Finish label run: | cd "$GITHUB_WORKSPACE/tests/ci" diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 0b0f125d641..9200e5e87b8 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -20,7 +20,7 @@ jobs: REPO_COPY=${{runner.temp}}/release_packages/ClickHouse EOF - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 with: # Always use the most recent script version ref: master @@ -50,12 +50,10 @@ jobs: DockerServerImages: runs-on: [self-hosted, style-checker] steps: - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 with: + clear-repository: true fetch-depth: 0 # otherwise we will have no version info - name: Check docker clickhouse/clickhouse-server building run: | diff --git a/.github/workflows/release_branches.yml b/.github/workflows/release_branches.yml index bf35ca76fc6..251087f33a5 100644 --- a/.github/workflows/release_branches.yml +++ b/.github/workflows/release_branches.yml @@ -15,34 +15,32 @@ jobs: DockerHubPushAarch64: runs-on: [self-hosted, style-checker-aarch64] steps: - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Images check run: | cd "$GITHUB_WORKSPACE/tests/ci" python3 docker_images_check.py --suffix aarch64 - name: Upload images files to artifacts - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: changed_images_aarch64 path: ${{ runner.temp }}/docker_images_check/changed_images_aarch64.json DockerHubPushAmd64: runs-on: [self-hosted, style-checker] steps: - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Images check run: | cd "$GITHUB_WORKSPACE/tests/ci" python3 docker_images_check.py --suffix amd64 - name: Upload images files to artifacts - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: changed_images_amd64 path: ${{ runner.temp }}/docker_images_check/changed_images_amd64.json @@ -50,18 +48,17 @@ jobs: needs: [DockerHubPushAmd64, DockerHubPushAarch64] runs-on: [self-hosted, style-checker] steps: - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Download changed aarch64 images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images_aarch64 path: ${{ runner.temp }} - name: Download changed amd64 images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images_amd64 path: ${{ runner.temp }} @@ -70,7 +67,7 @@ jobs: cd "$GITHUB_WORKSPACE/tests/ci" python3 docker_manifests_merge.py --suffix amd64 --suffix aarch64 - name: Upload images files to artifacts - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: changed_images path: ${{ runner.temp }}/changed_images.json @@ -85,13 +82,12 @@ jobs: REPO_COPY=${{runner.temp}}/compatibility_check/ClickHouse REPORTS_PATH=${{runner.temp}}/reports_dir EOF - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - name: CompatibilityCheck @@ -123,28 +119,25 @@ jobs: BUILD_NAME=package_release EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 with: + clear-repository: true + submodules: true fetch-depth: 0 # otherwise we will have no info about contributors - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -168,24 +161,24 @@ jobs: BUILD_NAME=package_aarch64 EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ runner.temp }}/images_path - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 with: - fetch-depth: 0 # otherwise we will have no info about contributors + clear-repository: true + submodules: true + fetch-depth: 0 # For a proper version and performance artifacts - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ runner.temp }}/build_check/${{ env.BUILD_URLS }}.json @@ -209,26 +202,24 @@ jobs: BUILD_NAME=package_asan EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + submodules: true - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -252,26 +243,24 @@ jobs: BUILD_NAME=package_ubsan EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + submodules: true - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -295,26 +284,24 @@ jobs: BUILD_NAME=package_tsan EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + submodules: true - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -338,26 +325,24 @@ jobs: BUILD_NAME=package_msan EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + submodules: true - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -381,26 +366,24 @@ jobs: BUILD_NAME=package_debug EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + submodules: true - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -424,28 +407,25 @@ jobs: BUILD_NAME=binary_darwin EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 with: + clear-repository: true + submodules: true fetch-depth: 0 # otherwise we will have no info about contributors - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -469,28 +449,25 @@ jobs: BUILD_NAME=binary_darwin_aarch64 EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 with: + clear-repository: true + submodules: true fetch-depth: 0 # otherwise we will have no info about contributors - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -509,12 +486,10 @@ jobs: - BuilderDebAarch64 runs-on: [self-hosted, style-checker] steps: - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 with: + clear-repository: true fetch-depth: 0 # It MUST BE THE SAME for all dependencies and the job itself - name: Check docker clickhouse/clickhouse-server building run: | @@ -553,14 +528,13 @@ jobs: NEEDS_DATA_PATH=${{runner.temp}}/needs.json EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Report Builder run: | sudo rm -fr "$TEMP_PATH" @@ -592,14 +566,13 @@ jobs: NEEDS_DATA_PATH=${{runner.temp}}/needs.json EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Report Builder run: | sudo rm -fr "$TEMP_PATH" @@ -623,11 +596,10 @@ jobs: - BuilderDebAarch64 runs-on: [self-hosted, style-checker] steps: - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Mark Commit Release Ready run: | cd "$GITHUB_WORKSPACE/tests/ci" @@ -649,14 +621,13 @@ jobs: KILL_TIMEOUT=10800 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -684,14 +655,13 @@ jobs: KILL_TIMEOUT=10800 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -721,14 +691,13 @@ jobs: RUN_BY_HASH_TOTAL=2 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -758,14 +727,13 @@ jobs: RUN_BY_HASH_TOTAL=2 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -795,14 +763,13 @@ jobs: RUN_BY_HASH_TOTAL=3 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -832,14 +799,13 @@ jobs: RUN_BY_HASH_TOTAL=3 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -869,14 +835,13 @@ jobs: RUN_BY_HASH_TOTAL=3 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -904,14 +869,13 @@ jobs: KILL_TIMEOUT=10800 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -941,14 +905,13 @@ jobs: RUN_BY_HASH_TOTAL=3 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -978,14 +941,13 @@ jobs: RUN_BY_HASH_TOTAL=3 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1015,14 +977,13 @@ jobs: RUN_BY_HASH_TOTAL=3 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1052,14 +1013,13 @@ jobs: RUN_BY_HASH_TOTAL=3 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1089,14 +1049,13 @@ jobs: RUN_BY_HASH_TOTAL=3 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1126,14 +1085,13 @@ jobs: RUN_BY_HASH_TOTAL=3 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1164,14 +1122,13 @@ jobs: KILL_TIMEOUT=3600 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1199,14 +1156,13 @@ jobs: KILL_TIMEOUT=3600 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1234,14 +1190,13 @@ jobs: KILL_TIMEOUT=3600 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1269,14 +1224,13 @@ jobs: KILL_TIMEOUT=3600 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1304,14 +1258,13 @@ jobs: KILL_TIMEOUT=3600 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1339,14 +1292,13 @@ jobs: KILL_TIMEOUT=3600 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1374,14 +1326,13 @@ jobs: KILL_TIMEOUT=3600 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1411,14 +1362,13 @@ jobs: REPO_COPY=${{runner.temp}}/stress_thread/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Stress test run: | sudo rm -fr "$TEMP_PATH" @@ -1449,14 +1399,13 @@ jobs: REPO_COPY=${{runner.temp}}/stress_thread/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Stress test run: | sudo rm -fr "$TEMP_PATH" @@ -1483,14 +1432,13 @@ jobs: REPO_COPY=${{runner.temp}}/stress_memory/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Stress test run: | sudo rm -fr "$TEMP_PATH" @@ -1517,14 +1465,13 @@ jobs: REPO_COPY=${{runner.temp}}/stress_undefined/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Stress test run: | sudo rm -fr "$TEMP_PATH" @@ -1551,14 +1498,13 @@ jobs: REPO_COPY=${{runner.temp}}/stress_debug/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Stress test run: | sudo rm -fr "$TEMP_PATH" @@ -1590,14 +1536,13 @@ jobs: RUN_BY_HASH_TOTAL=3 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Integration test run: | sudo rm -fr "$TEMP_PATH" @@ -1626,14 +1571,13 @@ jobs: RUN_BY_HASH_TOTAL=3 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Integration test run: | sudo rm -fr "$TEMP_PATH" @@ -1662,14 +1606,13 @@ jobs: RUN_BY_HASH_TOTAL=3 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Integration test run: | sudo rm -fr "$TEMP_PATH" @@ -1698,14 +1641,13 @@ jobs: RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Integration test run: | sudo rm -fr "$TEMP_PATH" @@ -1734,14 +1676,13 @@ jobs: RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Integration test run: | sudo rm -fr "$TEMP_PATH" @@ -1770,14 +1711,13 @@ jobs: RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Integration test run: | sudo rm -fr "$TEMP_PATH" @@ -1806,14 +1746,13 @@ jobs: RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Integration test run: | sudo rm -fr "$TEMP_PATH" @@ -1842,14 +1781,13 @@ jobs: RUN_BY_HASH_TOTAL=2 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Integration test run: | sudo rm -fr "$TEMP_PATH" @@ -1878,14 +1816,13 @@ jobs: RUN_BY_HASH_TOTAL=2 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Integration test run: | sudo rm -fr "$TEMP_PATH" @@ -1944,11 +1881,10 @@ jobs: - CompatibilityCheck runs-on: [self-hosted, style-checker] steps: - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Finish label run: | cd "$GITHUB_WORKSPACE/tests/ci" diff --git a/.github/workflows/tags_stable.yml b/.github/workflows/tags_stable.yml index f8cfa1137cc..f5b42e9c882 100644 --- a/.github/workflows/tags_stable.yml +++ b/.github/workflows/tags_stable.yml @@ -34,7 +34,7 @@ jobs: run: | echo "GITHUB_TAG=${GITHUB_REF#refs/tags/}" >> "$GITHUB_ENV" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 with: ref: master fetch-depth: 0 diff --git a/.github/workflows/woboq.yml b/.github/workflows/woboq.yml index b928a4a8d3d..363652c9f33 100644 --- a/.github/workflows/woboq.yml +++ b/.github/workflows/woboq.yml @@ -21,12 +21,10 @@ jobs: REPO_COPY=${{runner.temp}}/codebrowser/ClickHouse IMAGES_PATH=${{runner.temp}}/images_path EOF - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 with: + clear-repository: true submodules: 'true' - name: Codebrowser run: | diff --git a/.gitmodules b/.gitmodules index 0805b6d5492..406e8a7e11e 100644 --- a/.gitmodules +++ b/.gitmodules @@ -104,13 +104,13 @@ url = https://github.com/ClickHouse/aws-sdk-cpp.git [submodule "aws-c-event-stream"] path = contrib/aws-c-event-stream - url = https://github.com/ClickHouse/aws-c-event-stream.git + url = https://github.com/awslabs/aws-c-event-stream.git [submodule "aws-c-common"] path = contrib/aws-c-common url = https://github.com/ClickHouse/aws-c-common.git [submodule "aws-checksums"] path = contrib/aws-checksums - url = https://github.com/ClickHouse/aws-checksums.git + url = https://github.com/awslabs/aws-checksums.git [submodule "contrib/curl"] path = contrib/curl url = https://github.com/curl/curl.git @@ -294,3 +294,33 @@ [submodule "contrib/libdivide"] path = contrib/libdivide url = https://github.com/ridiculousfish/libdivide.git +[submodule "contrib/aws-crt-cpp"] + path = contrib/aws-crt-cpp + url = https://github.com/ClickHouse/aws-crt-cpp.git +[submodule "contrib/aws-c-io"] + path = contrib/aws-c-io + url = https://github.com/ClickHouse/aws-c-io.git +[submodule "contrib/aws-c-mqtt"] + path = contrib/aws-c-mqtt + url = https://github.com/awslabs/aws-c-mqtt.git +[submodule "contrib/aws-c-auth"] + path = contrib/aws-c-auth + url = https://github.com/awslabs/aws-c-auth.git +[submodule "contrib/aws-c-cal"] + path = contrib/aws-c-cal + url = https://github.com/ClickHouse/aws-c-cal.git +[submodule "contrib/aws-c-sdkutils"] + path = contrib/aws-c-sdkutils + url = https://github.com/awslabs/aws-c-sdkutils.git +[submodule "contrib/aws-c-http"] + path = contrib/aws-c-http + url = https://github.com/awslabs/aws-c-http.git +[submodule "contrib/aws-c-s3"] + path = contrib/aws-c-s3 + url = https://github.com/awslabs/aws-c-s3.git +[submodule "contrib/aws-c-compression"] + path = contrib/aws-c-compression + url = https://github.com/awslabs/aws-c-compression.git +[submodule "contrib/aws-s2n-tls"] + path = contrib/aws-s2n-tls + url = https://github.com/aws/s2n-tls.git diff --git a/CMakeLists.txt b/CMakeLists.txt index 99997db96a1..ab976612401 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -73,22 +73,7 @@ message (STATUS "CMAKE_BUILD_TYPE: ${CMAKE_BUILD_TYPE}") string (TOUPPER ${CMAKE_BUILD_TYPE} CMAKE_BUILD_TYPE_UC) -option(USE_STATIC_LIBRARIES "Disable to use shared libraries" ON) -# DEVELOPER ONLY. -# Faster linking if turned on. -option(SPLIT_SHARED_LIBRARIES "Keep all internal libraries as separate .so files" OFF) - -if (USE_STATIC_LIBRARIES AND SPLIT_SHARED_LIBRARIES) - message(FATAL_ERROR "SPLIT_SHARED_LIBRARIES=1 must not be used together with USE_STATIC_LIBRARIES=1") -endif() - -if (NOT USE_STATIC_LIBRARIES AND SPLIT_SHARED_LIBRARIES) - set(BUILD_SHARED_LIBS 1 CACHE INTERNAL "") -endif () - -if (USE_STATIC_LIBRARIES) - list(REVERSE CMAKE_FIND_LIBRARY_SUFFIXES) -endif () +list(REVERSE CMAKE_FIND_LIBRARY_SUFFIXES) option (ENABLE_FUZZING "Fuzzy testing using libfuzzer" OFF) @@ -171,7 +156,7 @@ option(ENABLE_TESTS "Provide unit_test_dbms target with Google.Test unit tests" option(ENABLE_EXAMPLES "Build all example programs in 'examples' subdirectories" OFF) option(ENABLE_BENCHMARKS "Build all benchmark programs in 'benchmarks' subdirectories" OFF) -if (OS_LINUX AND (ARCH_AMD64 OR ARCH_AARCH64) AND USE_STATIC_LIBRARIES AND NOT SPLIT_SHARED_LIBRARIES AND NOT USE_MUSL) +if (OS_LINUX AND (ARCH_AMD64 OR ARCH_AARCH64) AND NOT USE_MUSL) # Only for Linux, x86_64 or aarch64. option(GLIBC_COMPATIBILITY "Enable compatibility with older glibc libraries." ON) elseif(GLIBC_COMPATIBILITY) @@ -377,15 +362,15 @@ set (DEBUG_INFO_FLAGS "-g -gdwarf-4") set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${COMPILER_FLAGS}") set (CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -O3 ${DEBUG_INFO_FLAGS} ${CMAKE_CXX_FLAGS_ADD}") -set (CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -O0 ${DEBUG_INFO_FLAGS} -fno-inline ${CMAKE_CXX_FLAGS_ADD}") +set (CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -O0 ${DEBUG_INFO_FLAGS} ${CMAKE_CXX_FLAGS_ADD}") set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${COMPILER_FLAGS} ${CMAKE_C_FLAGS_ADD}") set (CMAKE_C_FLAGS_RELWITHDEBINFO "${CMAKE_C_FLAGS_RELWITHDEBINFO} -O3 ${DEBUG_INFO_FLAGS} ${CMAKE_C_FLAGS_ADD}") -set (CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -O0 ${DEBUG_INFO_FLAGS} -fno-inline ${CMAKE_C_FLAGS_ADD}") +set (CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -O0 ${DEBUG_INFO_FLAGS} ${CMAKE_C_FLAGS_ADD}") set (CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} ${COMPILER_FLAGS} ${CMAKE_ASM_FLAGS_ADD}") set (CMAKE_ASM_FLAGS_RELWITHDEBINFO "${CMAKE_ASM_FLAGS_RELWITHDEBINFO} -O3 ${DEBUG_INFO_FLAGS} ${CMAKE_ASM_FLAGS_ADD}") -set (CMAKE_ASM_FLAGS_DEBUG "${CMAKE_ASM_FLAGS_DEBUG} -O0 ${DEBUG_INFO_FLAGS} -fno-inline ${CMAKE_ASM_FLAGS_ADD}") +set (CMAKE_ASM_FLAGS_DEBUG "${CMAKE_ASM_FLAGS_DEBUG} -O0 ${DEBUG_INFO_FLAGS} ${CMAKE_ASM_FLAGS_ADD}") if (COMPILER_CLANG) if (OS_DARWIN) @@ -467,22 +452,13 @@ endif () set (CMAKE_POSTFIX_VARIABLE "CMAKE_${CMAKE_BUILD_TYPE_UC}_POSTFIX") -if (USE_STATIC_LIBRARIES) - set (CMAKE_POSITION_INDEPENDENT_CODE OFF) - if (OS_LINUX AND NOT ARCH_AARCH64) - # Slightly more efficient code can be generated - # It's disabled for ARM because otherwise ClickHouse cannot run on Android. - set (CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -fno-pie") - set (CMAKE_C_FLAGS_RELWITHDEBINFO "${CMAKE_C_FLAGS_RELWITHDEBINFO} -fno-pie") - set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -no-pie -Wl,-no-pie") - endif () -else () - set (CMAKE_POSITION_INDEPENDENT_CODE ON) - # This is required for clang on Arch linux, that uses PIE by default. - # See enable-SSP-and-PIE-by-default.patch [1]. - # - # [1]: https://github.com/archlinux/svntogit-packages/blob/6e681aa860e65ad46a1387081482eb875c2200f2/trunk/enable-SSP-and-PIE-by-default.patch - set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -no-pie") +set (CMAKE_POSITION_INDEPENDENT_CODE OFF) +if (OS_LINUX AND NOT ARCH_AARCH64) + # Slightly more efficient code can be generated + # It's disabled for ARM because otherwise ClickHouse cannot run on Android. + set (CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -fno-pie") + set (CMAKE_C_FLAGS_RELWITHDEBINFO "${CMAKE_C_FLAGS_RELWITHDEBINFO} -fno-pie") + set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -no-pie -Wl,-no-pie") endif () if (ENABLE_TESTS) @@ -504,10 +480,7 @@ else () set (CLICKHOUSE_ETC_DIR "${CMAKE_INSTALL_PREFIX}/etc") endif () -message (STATUS - "Building for: ${CMAKE_SYSTEM} ${CMAKE_SYSTEM_PROCESSOR} ${CMAKE_LIBRARY_ARCHITECTURE} ; - USE_STATIC_LIBRARIES=${USE_STATIC_LIBRARIES} - SPLIT_SHARED_LIBRARIES=${SPLIT_SHARED_LIBRARIES}") +message (STATUS "Building for: ${CMAKE_SYSTEM} ${CMAKE_SYSTEM_PROCESSOR} ${CMAKE_LIBRARY_ARCHITECTURE}") include (GNUInstallDirs) @@ -553,7 +526,7 @@ macro (clickhouse_add_executable target) # - _je_zone_register due to JEMALLOC_PRIVATE_NAMESPACE=je_ under OS X. # - but jemalloc-cmake does not run private_namespace.sh # so symbol name should be _zone_register - if (ENABLE_JEMALLOC AND USE_STATIC_LIBRARIES AND OS_DARWIN) + if (ENABLE_JEMALLOC AND OS_DARWIN) set_property(TARGET ${target} APPEND PROPERTY LINK_OPTIONS -u_zone_register) endif() endif() diff --git a/LICENSE b/LICENSE index 8b0ac080f01..65c5df824c6 100644 --- a/LICENSE +++ b/LICENSE @@ -1,4 +1,4 @@ -Copyright 2016-2022 ClickHouse, Inc. +Copyright 2016-2023 ClickHouse, Inc. Apache License Version 2.0, January 2004 @@ -188,7 +188,7 @@ Copyright 2016-2022 ClickHouse, Inc. same "printed page" as the copyright notice for easier identification within third-party archives. - Copyright 2016-2022 ClickHouse, Inc. + Copyright 2016-2023 ClickHouse, Inc. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/base/base/BorrowedObjectPool.h b/base/base/BorrowedObjectPool.h index bb4c9cd1c21..05a23d5835e 100644 --- a/base/base/BorrowedObjectPool.h +++ b/base/base/BorrowedObjectPool.h @@ -10,7 +10,7 @@ #include /** Pool for limited size objects that cannot be used from different threads simultaneously. - * The main use case is to have fixed size of objects that can be reused in difference threads during their lifetime + * The main use case is to have fixed size of objects that can be reused in different threads during their lifetime * and have to be initialized on demand. * Two main properties of pool are allocated objects size and borrowed objects size. * Allocated objects size is size of objects that are currently allocated by the pool. diff --git a/base/base/CMakeLists.txt b/base/base/CMakeLists.txt index d788bd6f092..5d9f1de8309 100644 --- a/base/base/CMakeLists.txt +++ b/base/base/CMakeLists.txt @@ -8,16 +8,13 @@ set (SRCS getPageSize.cpp getThreadId.cpp JSON.cpp - LineReader.cpp mremap.cpp phdr_cache.cpp preciseExp10.cpp - setTerminalEcho.cpp shift10.cpp sleep.cpp terminalColors.cpp errnoToString.cpp - ReplxxLineReader.cpp StringRef.cpp safeExit.cpp throwError.cpp @@ -40,17 +37,8 @@ else () target_compile_definitions(common PUBLIC WITH_COVERAGE=0) endif () -# FIXME: move libraries for line reading out from base -if (TARGET ch_rust::skim) - target_link_libraries(common PUBLIC ch_rust::skim) -endif() - target_include_directories(common PUBLIC .. "${CMAKE_CURRENT_BINARY_DIR}/..") -if (OS_DARWIN AND NOT USE_STATIC_LIBRARIES) - target_link_libraries(common PUBLIC -Wl,-U,_inside_main) -endif() - target_link_libraries (common PUBLIC ch_contrib::cityhash diff --git a/base/base/setTerminalEcho.cpp b/base/base/setTerminalEcho.cpp deleted file mode 100644 index 759dca19119..00000000000 --- a/base/base/setTerminalEcho.cpp +++ /dev/null @@ -1,28 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include - - -void setTerminalEcho(bool enable) -{ - /// Obtain terminal attributes, - /// toggle the ECHO flag - /// and set them back. - - struct termios tty{}; - - if (0 != tcgetattr(STDIN_FILENO, &tty)) - throw std::runtime_error(std::string("setTerminalEcho failed get: ") + errnoToString()); - - if (enable) - tty.c_lflag |= ECHO; - else - tty.c_lflag &= ~ECHO; - - if (0 != tcsetattr(STDIN_FILENO, TCSANOW, &tty)) - throw std::runtime_error(std::string("setTerminalEcho failed set: ") + errnoToString()); -} diff --git a/base/base/setTerminalEcho.h b/base/base/setTerminalEcho.h deleted file mode 100644 index 98e8f5a87e3..00000000000 --- a/base/base/setTerminalEcho.h +++ /dev/null @@ -1,4 +0,0 @@ -#pragma once - -/// Enable or disable echoing of typed characters. Throws std::runtime_error on error. -void setTerminalEcho(bool enable); diff --git a/base/glibc-compatibility/CMakeLists.txt b/base/glibc-compatibility/CMakeLists.txt index 7aa9a3b4a61..0539f0c231d 100644 --- a/base/glibc-compatibility/CMakeLists.txt +++ b/base/glibc-compatibility/CMakeLists.txt @@ -37,7 +37,7 @@ if (GLIBC_COMPATIBILITY) target_include_directories(glibc-compatibility PRIVATE libcxxabi ${musl_arch_include_dir}) - if (( NOT USE_STATIC_LIBRARIES AND NOT USE_STATIC_LIBRARIES ) OR ENABLE_OPENSSL_DYNAMIC) + if (ENABLE_OPENSSL_DYNAMIC) target_compile_options(glibc-compatibility PRIVATE -fPIC) endif () diff --git a/cmake/cpu_features.cmake b/cmake/cpu_features.cmake index a554992caf3..b0ff2349957 100644 --- a/cmake/cpu_features.cmake +++ b/cmake/cpu_features.cmake @@ -102,6 +102,11 @@ elseif (ARCH_AMD64) SET(ENABLE_AVX512_FOR_SPEC_OP 0) endif() + # ClickHouse can be cross-compiled (e.g. on an ARM host for x86) but it is also possible to build ClickHouse on x86 w/o AVX for x86 w/ + # AVX. We only check that the compiler can emit certain SIMD instructions, we don't care if the host system is able to run the binary. + # Therefore, use check_cxx_source_compiles (= does the code compile+link?) instead of check_cxx_source_runs (= does the code + # compile+link+run). + set (TEST_FLAG "-mssse3") set (CMAKE_REQUIRED_FLAGS "${TEST_FLAG} -O0") check_cxx_source_compiles(" diff --git a/cmake/sanitize.cmake b/cmake/sanitize.cmake index 3e3bb7ec2b2..cf7f7606618 100644 --- a/cmake/sanitize.cmake +++ b/cmake/sanitize.cmake @@ -25,7 +25,7 @@ if (SANITIZE) if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU") set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${ASAN_FLAGS}") endif() - if (USE_STATIC_LIBRARIES AND CMAKE_CXX_COMPILER_ID STREQUAL "GNU") + if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU") set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -static-libasan") endif () if (COMPILER_GCC) @@ -50,7 +50,7 @@ if (SANITIZE) if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU") set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fsanitize=memory") endif() - if (USE_STATIC_LIBRARIES AND CMAKE_CXX_COMPILER_ID STREQUAL "GNU") + if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU") set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -static-libmsan") endif () @@ -71,7 +71,7 @@ if (SANITIZE) if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU") set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fsanitize=thread") endif() - if (USE_STATIC_LIBRARIES AND CMAKE_CXX_COMPILER_ID STREQUAL "GNU") + if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU") set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -static-libtsan") endif () if (COMPILER_GCC) @@ -103,7 +103,7 @@ if (SANITIZE) if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU") set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fsanitize=undefined") endif() - if (USE_STATIC_LIBRARIES AND CMAKE_CXX_COMPILER_ID STREQUAL "GNU") + if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU") set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -static-libubsan") endif () if (COMPILER_GCC) diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index 6f80059498e..13c4722e149 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -115,12 +115,25 @@ endif() add_contrib (llvm-project-cmake llvm-project) add_contrib (libfuzzer-cmake llvm-project) add_contrib (libxml2-cmake libxml2) -add_contrib (aws-s3-cmake + +add_contrib (aws-cmake aws + aws-c-auth + aws-c-cal aws-c-common + aws-c-compression aws-c-event-stream + aws-c-http + aws-c-io + aws-c-mqtt + aws-c-s3 + aws-c-sdkutils + aws-s2n-tls aws-checksums + aws-crt-cpp + aws-cmake ) + add_contrib (base64-cmake base64) add_contrib (simdjson-cmake simdjson) add_contrib (rapidjson-cmake rapidjson) diff --git a/contrib/arrow-cmake/CMakeLists.txt b/contrib/arrow-cmake/CMakeLists.txt index c289c88ef7b..ae6f270a768 100644 --- a/contrib/arrow-cmake/CMakeLists.txt +++ b/contrib/arrow-cmake/CMakeLists.txt @@ -78,23 +78,14 @@ set(FLATBUFFERS_BINARY_DIR "${ClickHouse_BINARY_DIR}/contrib/flatbuffers") set(FLATBUFFERS_INCLUDE_DIR "${FLATBUFFERS_SRC_DIR}/include") # set flatbuffers CMake options -if (USE_STATIC_LIBRARIES) - set(FLATBUFFERS_BUILD_FLATLIB ON CACHE BOOL "Enable the build of the flatbuffers library") - set(FLATBUFFERS_BUILD_SHAREDLIB OFF CACHE BOOL "Disable the build of the flatbuffers shared library") -else () - set(FLATBUFFERS_BUILD_SHAREDLIB ON CACHE BOOL "Enable the build of the flatbuffers shared library") - set(FLATBUFFERS_BUILD_FLATLIB OFF CACHE BOOL "Disable the build of the flatbuffers library") -endif () +set(FLATBUFFERS_BUILD_FLATLIB ON CACHE BOOL "Enable the build of the flatbuffers library") +set(FLATBUFFERS_BUILD_SHAREDLIB OFF CACHE BOOL "Disable the build of the flatbuffers shared library") set(FLATBUFFERS_BUILD_TESTS OFF CACHE BOOL "Skip flatbuffers tests") add_subdirectory(${FLATBUFFERS_SRC_DIR} "${FLATBUFFERS_BINARY_DIR}") add_library(_flatbuffers INTERFACE) -if(USE_STATIC_LIBRARIES) - target_link_libraries(_flatbuffers INTERFACE flatbuffers) -else() - target_link_libraries(_flatbuffers INTERFACE flatbuffers_shared) -endif() +target_link_libraries(_flatbuffers INTERFACE flatbuffers) target_include_directories(_flatbuffers INTERFACE ${FLATBUFFERS_INCLUDE_DIR}) # === hdfs diff --git a/contrib/aws b/contrib/aws index 00b03604543..4a12641211d 160000 --- a/contrib/aws +++ b/contrib/aws @@ -1 +1 @@ -Subproject commit 00b03604543367d7e310cb0993973fdcb723ea79 +Subproject commit 4a12641211d4dbc8e2fdb2dd0f1eea0927db9252 diff --git a/contrib/aws-c-auth b/contrib/aws-c-auth new file mode 160000 index 00000000000..30df6c407e2 --- /dev/null +++ b/contrib/aws-c-auth @@ -0,0 +1 @@ +Subproject commit 30df6c407e2df43bd244e2c34c9b4a4b87372bfb diff --git a/contrib/aws-c-cal b/contrib/aws-c-cal new file mode 160000 index 00000000000..85dd7664b78 --- /dev/null +++ b/contrib/aws-c-cal @@ -0,0 +1 @@ +Subproject commit 85dd7664b786a389c6fb1a6f031ab4bb2282133d diff --git a/contrib/aws-c-common b/contrib/aws-c-common index 736a82d1697..324fd1d973c 160000 --- a/contrib/aws-c-common +++ b/contrib/aws-c-common @@ -1 +1 @@ -Subproject commit 736a82d1697c108b04a277e66438a7f4e19b6857 +Subproject commit 324fd1d973ccb25c813aa747bf1759cfde5121c5 diff --git a/contrib/aws-c-compression b/contrib/aws-c-compression new file mode 160000 index 00000000000..b517b7decd0 --- /dev/null +++ b/contrib/aws-c-compression @@ -0,0 +1 @@ +Subproject commit b517b7decd0dac30be2162f5186c250221c53aff diff --git a/contrib/aws-c-event-stream b/contrib/aws-c-event-stream index 3bc33662f9c..39bfa94a14b 160000 --- a/contrib/aws-c-event-stream +++ b/contrib/aws-c-event-stream @@ -1 +1 @@ -Subproject commit 3bc33662f9ccff4f4cbcf9509cc78c26e022fde0 +Subproject commit 39bfa94a14b7126bf0c1330286ef8db452d87e66 diff --git a/contrib/aws-c-http b/contrib/aws-c-http new file mode 160000 index 00000000000..2c5a2a7d555 --- /dev/null +++ b/contrib/aws-c-http @@ -0,0 +1 @@ +Subproject commit 2c5a2a7d5556600b9782ffa6c9d7e09964df1abc diff --git a/contrib/aws-c-io b/contrib/aws-c-io new file mode 160000 index 00000000000..5d32c453560 --- /dev/null +++ b/contrib/aws-c-io @@ -0,0 +1 @@ +Subproject commit 5d32c453560d0823df521a686bf7fbacde7f9be3 diff --git a/contrib/aws-c-mqtt b/contrib/aws-c-mqtt new file mode 160000 index 00000000000..882c689561a --- /dev/null +++ b/contrib/aws-c-mqtt @@ -0,0 +1 @@ +Subproject commit 882c689561a3db1466330ccfe3b63637e0a575d3 diff --git a/contrib/aws-c-s3 b/contrib/aws-c-s3 new file mode 160000 index 00000000000..a41255ece72 --- /dev/null +++ b/contrib/aws-c-s3 @@ -0,0 +1 @@ +Subproject commit a41255ece72a7c887bba7f9d998ca3e14f4c8a1b diff --git a/contrib/aws-c-sdkutils b/contrib/aws-c-sdkutils new file mode 160000 index 00000000000..25bf5cf225f --- /dev/null +++ b/contrib/aws-c-sdkutils @@ -0,0 +1 @@ +Subproject commit 25bf5cf225f977c3accc6a05a0a7a181ef2a4a30 diff --git a/contrib/aws-checksums b/contrib/aws-checksums index 519d6d90938..48e7c0e0147 160000 --- a/contrib/aws-checksums +++ b/contrib/aws-checksums @@ -1 +1 @@ -Subproject commit 519d6d9093819b6cf89ffff589a27ef8f83d0f65 +Subproject commit 48e7c0e01479232f225c8044d76c84e74192889d diff --git a/contrib/aws-cmake/AwsFeatureTests.cmake b/contrib/aws-cmake/AwsFeatureTests.cmake new file mode 100644 index 00000000000..54727e08d6b --- /dev/null +++ b/contrib/aws-cmake/AwsFeatureTests.cmake @@ -0,0 +1,114 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: Apache-2.0. + +include(CheckCSourceRuns) + +option(USE_CPU_EXTENSIONS "Whenever possible, use functions optimized for CPUs with specific extensions (ex: SSE, AVX)." ON) + +# In the current (11/2/21) state of mingw64, the packaged gcc is not capable of emitting properly aligned avx2 instructions under certain circumstances. +# This leads to crashes for windows builds using mingw64 when invoking the avx2-enabled versions of certain functions. Until we can find a better +# work-around, disable avx2 (and all other extensions) in mingw builds. +# +# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=54412 +# +if (MINGW) + message(STATUS "MINGW detected! Disabling avx2 and other CPU extensions") + set(USE_CPU_EXTENSIONS OFF) +endif() + +if(NOT CMAKE_CROSSCOMPILING) + check_c_source_runs(" + #include + bool foo(int a, int b, int *c) { + return __builtin_mul_overflow(a, b, c); + } + + int main() { + int out; + if (foo(1, 2, &out)) { + return 0; + } + + return 0; + }" AWS_HAVE_GCC_OVERFLOW_MATH_EXTENSIONS) + + if (USE_CPU_EXTENSIONS) + check_c_source_runs(" + int main() { + int foo = 42; + _mulx_u32(1, 2, &foo); + return foo != 2; + }" AWS_HAVE_MSVC_MULX) + endif() + +endif() + +check_c_source_compiles(" + #include + #if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP) + int main() { + return 0; + } + #else + it's not windows desktop + #endif +" AWS_HAVE_WINAPI_DESKTOP) + +check_c_source_compiles(" + int main() { +#if !(defined(__x86_64__) || defined(__i386__) || defined(_M_X64) || defined(_M_IX86)) +# error \"not intel\" +#endif + return 0; + } +" AWS_ARCH_INTEL) + +check_c_source_compiles(" + int main() { +#if !(defined(__aarch64__) || defined(_M_ARM64)) +# error \"not arm64\" +#endif + return 0; + } +" AWS_ARCH_ARM64) + +check_c_source_compiles(" + int main() { +#if !(defined(__arm__) || defined(_M_ARM)) +# error \"not arm\" +#endif + return 0; + } +" AWS_ARCH_ARM32) + +check_c_source_compiles(" +int main() { + int foo = 42, bar = 24; + __asm__ __volatile__(\"\":\"=r\"(foo):\"r\"(bar):\"memory\"); +}" AWS_HAVE_GCC_INLINE_ASM) + +check_c_source_compiles(" +#include +int main() { +#ifdef __linux__ + getauxval(AT_HWCAP); + getauxval(AT_HWCAP2); +#endif + return 0; +}" AWS_HAVE_AUXV) + +string(REGEX MATCH "^(aarch64|arm)" ARM_CPU "${CMAKE_SYSTEM_PROCESSOR}") +if(NOT LEGACY_COMPILER_SUPPORT OR ARM_CPU) + check_c_source_compiles(" + #include + int main() { + backtrace(NULL, 0); + return 0; + }" AWS_HAVE_EXECINFO) +endif() + +check_c_source_compiles(" +#include +int main() { + return 1; +}" AWS_HAVE_LINUX_IF_LINK_H) diff --git a/contrib/aws-cmake/AwsSIMD.cmake b/contrib/aws-cmake/AwsSIMD.cmake new file mode 100644 index 00000000000..bd6f4064e78 --- /dev/null +++ b/contrib/aws-cmake/AwsSIMD.cmake @@ -0,0 +1,74 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: Apache-2.0. + +include(CheckCCompilerFlag) +include(CheckIncludeFile) + +if (USE_CPU_EXTENSIONS) + if (MSVC) + check_c_compiler_flag("/arch:AVX2" HAVE_M_AVX2_FLAG) + if (HAVE_M_AVX2_FLAG) + set(AVX2_CFLAGS "/arch:AVX2") + endif() + else() + check_c_compiler_flag(-mavx2 HAVE_M_AVX2_FLAG) + if (HAVE_M_AVX2_FLAG) + set(AVX2_CFLAGS "-mavx -mavx2") + endif() + endif() + + + cmake_push_check_state() + set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} ${AVX2_CFLAGS}") + + check_c_source_compiles(" + #include + #include + #include + + int main() { + __m256i vec; + memset(&vec, 0, sizeof(vec)); + + _mm256_shuffle_epi8(vec, vec); + _mm256_set_epi32(1,2,3,4,5,6,7,8); + _mm256_permutevar8x32_epi32(vec, vec); + + return 0; + }" HAVE_AVX2_INTRINSICS) + + check_c_source_compiles(" + #include + #include + + int main() { + __m256i vec; + memset(&vec, 0, sizeof(vec)); + return (int)_mm256_extract_epi64(vec, 2); + }" HAVE_MM256_EXTRACT_EPI64) + + cmake_pop_check_state() +endif() # USE_CPU_EXTENSIONS + +macro(simd_add_definition_if target definition) + if(${definition}) + target_compile_definitions(${target} PRIVATE -D${definition}) + endif(${definition}) +endmacro(simd_add_definition_if) + +# Configure private preprocessor definitions for SIMD-related features +# Does not set any processor feature codegen flags +function(simd_add_definitions target) + simd_add_definition_if(${target} HAVE_AVX2_INTRINSICS) + simd_add_definition_if(${target} HAVE_MM256_EXTRACT_EPI64) +endfunction(simd_add_definitions) + +# Adds source files only if AVX2 is supported. These files will be built with +# avx2 intrinsics enabled. +# Usage: simd_add_source_avx2(target file1.c file2.c ...) +function(simd_add_source_avx2 target) + foreach(file ${ARGN}) + target_sources(${target} PRIVATE ${file}) + set_source_files_properties(${file} PROPERTIES COMPILE_FLAGS "${AVX2_CFLAGS}") + endforeach() +endfunction(simd_add_source_avx2) diff --git a/contrib/aws-cmake/AwsThreadAffinity.cmake b/contrib/aws-cmake/AwsThreadAffinity.cmake new file mode 100644 index 00000000000..9e53481272c --- /dev/null +++ b/contrib/aws-cmake/AwsThreadAffinity.cmake @@ -0,0 +1,50 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: Apache-2.0. + +include(CheckSymbolExists) + +# Check if the platform supports setting thread affinity +# (important for hitting full NIC entitlement on NUMA architectures) +function(aws_set_thread_affinity_method target) + + # Non-POSIX, Android, and Apple platforms do not support thread affinity. + if (NOT UNIX OR ANDROID OR APPLE) + target_compile_definitions(${target} PRIVATE + -DAWS_AFFINITY_METHOD=AWS_AFFINITY_METHOD_NONE) + return() + endif() + + cmake_push_check_state() + list(APPEND CMAKE_REQUIRED_DEFINITIONS -D_GNU_SOURCE) + list(APPEND CMAKE_REQUIRED_LIBRARIES pthread) + + set(headers "pthread.h") + # BSDs put nonportable pthread declarations in a separate header. + if(CMAKE_SYSTEM_NAME MATCHES BSD) + set(headers "${headers};pthread_np.h") + endif() + + # Using pthread attrs is the preferred method, but is glibc-specific. + check_symbol_exists(pthread_attr_setaffinity_np "${headers}" USE_PTHREAD_ATTR_SETAFFINITY) + if (USE_PTHREAD_ATTR_SETAFFINITY) + target_compile_definitions(${target} PRIVATE + -DAWS_AFFINITY_METHOD=AWS_AFFINITY_METHOD_PTHREAD_ATTR) + return() + endif() + + # This method is still nonportable, but is supported by musl and BSDs. + check_symbol_exists(pthread_setaffinity_np "${headers}" USE_PTHREAD_SETAFFINITY) + if (USE_PTHREAD_SETAFFINITY) + target_compile_definitions(${target} PRIVATE + -DAWS_AFFINITY_METHOD=AWS_AFFINITY_METHOD_PTHREAD) + return() + endif() + + # If we got here, we expected thread affinity support but didn't find it. + # We still build with degraded NUMA performance, but show a warning. + message(WARNING "No supported method for setting thread affinity") + target_compile_definitions(${target} PRIVATE + -DAWS_AFFINITY_METHOD=AWS_AFFINITY_METHOD_NONE) + + cmake_pop_check_state() +endfunction() diff --git a/contrib/aws-cmake/AwsThreadName.cmake b/contrib/aws-cmake/AwsThreadName.cmake new file mode 100644 index 00000000000..a67416b4f83 --- /dev/null +++ b/contrib/aws-cmake/AwsThreadName.cmake @@ -0,0 +1,61 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: Apache-2.0. + +include(CheckSymbolExists) + +# Check how the platform supports setting thread name +function(aws_set_thread_name_method target) + + if (WINDOWS) + # On Windows we do a runtime check, instead of compile-time check + return() + elseif (APPLE) + # All Apple platforms we support have the same function, so no need for compile-time check. + return() + endif() + + cmake_push_check_state() + list(APPEND CMAKE_REQUIRED_DEFINITIONS -D_GNU_SOURCE) + list(APPEND CMAKE_REQUIRED_LIBRARIES pthread) + + # The start of the test program + set(c_source_start " + #define _GNU_SOURCE + #include + + #if defined(__FreeBSD__) || defined(__NETBSD__) + #include + #endif + + int main() { + pthread_t thread_id; + ") + + # The end of the test program + set(c_source_end "}") + + # pthread_setname_np() usually takes 2 args + check_c_source_compiles(" + ${c_source_start} + pthread_setname_np(thread_id, \"asdf\"); + ${c_source_end}" + PTHREAD_SETNAME_TAKES_2ARGS) + if (PTHREAD_SETNAME_TAKES_2ARGS) + target_compile_definitions(${target} PRIVATE -DAWS_PTHREAD_SETNAME_TAKES_2ARGS) + return() + endif() + + # But on NetBSD it takes 3! + check_c_source_compiles(" + ${c_source_start} + pthread_setname_np(thread_id, \"asdf\", NULL); + ${c_source_end} + " PTHREAD_SETNAME_TAKES_3ARGS) + if (PTHREAD_SETNAME_TAKES_3ARGS) + target_compile_definitions(${target} PRIVATE -DAWS_PTHREAD_SETNAME_TAKES_3ARGS) + return() + endif() + + # And on many older/weirder platforms it's just not supported + cmake_pop_check_state() +endfunction() diff --git a/contrib/aws-cmake/CMakeLists.txt b/contrib/aws-cmake/CMakeLists.txt new file mode 100644 index 00000000000..52533cd6483 --- /dev/null +++ b/contrib/aws-cmake/CMakeLists.txt @@ -0,0 +1,376 @@ +set(ENABLE_AWS_S3_DEFAULT OFF) + +if(ENABLE_LIBRARIES AND (OS_LINUX OR OS_DARWIN) AND TARGET OpenSSL::Crypto) + set(ENABLE_AWS_S3_DEFAULT ON) +endif() + +option(ENABLE_AWS_S3 "Enable AWS S3" ${ENABLE_AWS_S3_DEFAULT}) + +if(ENABLE_AWS_S3) + if(NOT TARGET OpenSSL::Crypto) + message (${RECONFIGURE_MESSAGE_LEVEL} "Can't use AWS SDK without OpenSSL") + elseif(NOT (OS_LINUX OR OS_DARWIN)) + message (${RECONFIGURE_MESSAGE_LEVEL} "Can't use AWS SDK with platform ${CMAKE_SYSTEM_NAME}") + endif() +endif() + +if(NOT ENABLE_AWS_S3) + message(STATUS "Not using AWS S3") + return() +endif() + + +# Utilities. +include("${ClickHouse_SOURCE_DIR}/contrib/aws-cmake/AwsFeatureTests.cmake") +include("${ClickHouse_SOURCE_DIR}/contrib/aws-cmake/AwsThreadAffinity.cmake") +include("${ClickHouse_SOURCE_DIR}/contrib/aws-cmake/AwsThreadName.cmake") +include("${ClickHouse_SOURCE_DIR}/contrib/aws-cmake/AwsSIMD.cmake") + + +# Gather sources and options. +set(AWS_SOURCES) +set(AWS_PUBLIC_INCLUDES) +set(AWS_PRIVATE_INCLUDES) +set(AWS_PUBLIC_COMPILE_DEFS) +set(AWS_PRIVATE_COMPILE_DEFS) +set(AWS_PRIVATE_LIBS) + +if (CMAKE_BUILD_TYPE_UC STREQUAL "DEBUG") + list(APPEND AWS_PRIVATE_COMPILE_DEFS "-DDEBUG_BUILD") +endif() + +set(ENABLE_OPENSSL_ENCRYPTION ON) +if (ENABLE_OPENSSL_ENCRYPTION) + list(APPEND AWS_PRIVATE_COMPILE_DEFS "-DENABLE_OPENSSL_ENCRYPTION") +endif() + +set(USE_S2N ON) +if (USE_S2N) + list(APPEND AWS_PRIVATE_COMPILE_DEFS "-DUSE_S2N") +endif() + + +# Directories. +SET(AWS_SDK_DIR "${ClickHouse_SOURCE_DIR}/contrib/aws") +SET(AWS_SDK_CORE_DIR "${AWS_SDK_DIR}/aws-cpp-sdk-core") +SET(AWS_SDK_S3_DIR "${AWS_SDK_DIR}/aws-cpp-sdk-s3") + +SET(AWS_AUTH_DIR "${ClickHouse_SOURCE_DIR}/contrib/aws-c-auth") +SET(AWS_CAL_DIR "${ClickHouse_SOURCE_DIR}/contrib/aws-c-cal") +SET(AWS_CHECKSUMS_DIR "${ClickHouse_SOURCE_DIR}/contrib/aws-checksums") +SET(AWS_COMMON_DIR "${ClickHouse_SOURCE_DIR}/contrib/aws-c-common") +SET(AWS_COMPRESSION_DIR "${ClickHouse_SOURCE_DIR}/contrib/aws-c-compression") +SET(AWS_CRT_DIR "${ClickHouse_SOURCE_DIR}/contrib/aws-crt-cpp") +SET(AWS_EVENT_STREAM_DIR "${ClickHouse_SOURCE_DIR}/contrib/aws-c-event-stream") +SET(AWS_HTTP_DIR "${ClickHouse_SOURCE_DIR}/contrib/aws-c-http") +SET(AWS_IO_DIR "${ClickHouse_SOURCE_DIR}/contrib/aws-c-io") +SET(AWS_MQTT_DIR "${ClickHouse_SOURCE_DIR}/contrib/aws-c-mqtt") +SET(AWS_S2N_TLS_DIR "${ClickHouse_SOURCE_DIR}/contrib/aws-s2n-tls") +SET(AWS_S3_DIR "${ClickHouse_SOURCE_DIR}/contrib/aws-c-s3") +SET(AWS_SDKUTILS_DIR "${ClickHouse_SOURCE_DIR}/contrib/aws-c-sdkutils") + + +# aws-cpp-sdk-core +file(GLOB AWS_SDK_CORE_SRC + "${AWS_SDK_CORE_DIR}/source/*.cpp" + "${AWS_SDK_CORE_DIR}/source/auth/*.cpp" + "${AWS_SDK_CORE_DIR}/source/auth/bearer-token-provider/*.cpp" + "${AWS_SDK_CORE_DIR}/source/auth/signer/*.cpp" + "${AWS_SDK_CORE_DIR}/source/auth/signer-provider/*.cpp" + "${AWS_SDK_CORE_DIR}/source/client/*.cpp" + "${AWS_SDK_CORE_DIR}/source/config/*.cpp" + "${AWS_SDK_CORE_DIR}/source/config/defaults/*.cpp" + "${AWS_SDK_CORE_DIR}/source/endpoint/*.cpp" + "${AWS_SDK_CORE_DIR}/source/endpoint/internal/*.cpp" + "${AWS_SDK_CORE_DIR}/source/external/cjson/*.cpp" + "${AWS_SDK_CORE_DIR}/source/external/tinyxml2/*.cpp" + "${AWS_SDK_CORE_DIR}/source/http/*.cpp" + "${AWS_SDK_CORE_DIR}/source/http/standard/*.cpp" + "${AWS_SDK_CORE_DIR}/source/internal/*.cpp" + "${AWS_SDK_CORE_DIR}/source/monitoring/*.cpp" + "${AWS_SDK_CORE_DIR}/source/utils/*.cpp" + "${AWS_SDK_CORE_DIR}/source/utils/base64/*.cpp" + "${AWS_SDK_CORE_DIR}/source/utils/crypto/*.cpp" + "${AWS_SDK_CORE_DIR}/source/utils/crypto/openssl/*.cpp" + "${AWS_SDK_CORE_DIR}/source/utils/crypto/factory/*.cpp" + "${AWS_SDK_CORE_DIR}/source/utils/event/*.cpp" + "${AWS_SDK_CORE_DIR}/source/utils/json/*.cpp" + "${AWS_SDK_CORE_DIR}/source/utils/logging/*.cpp" + "${AWS_SDK_CORE_DIR}/source/utils/memory/*.cpp" + "${AWS_SDK_CORE_DIR}/source/utils/memory/stl/*.cpp" + "${AWS_SDK_CORE_DIR}/source/utils/stream/*.cpp" + "${AWS_SDK_CORE_DIR}/source/utils/threading/*.cpp" + "${AWS_SDK_CORE_DIR}/source/utils/xml/*.cpp" +) + +if(OS_LINUX OR OS_DARWIN) + file(GLOB AWS_SDK_CORE_NET_SRC "${AWS_SDK_CORE_DIR}/source/net/linux-shared/*.cpp") + file(GLOB AWS_SDK_CORE_PLATFORM_SRC "${AWS_SDK_CORE_DIR}/source/platform/linux-shared/*.cpp") +else() + file(GLOB AWS_SDK_CORE_NET_SRC "${AWS_SDK_CORE_DIR}/source/net/*.cpp") + set(AWS_SDK_CORE_PLATFORM_SRC) +endif() + +OPTION(USE_AWS_MEMORY_MANAGEMENT "Aws memory management" OFF) +configure_file("${AWS_SDK_CORE_DIR}/include/aws/core/SDKConfig.h.in" + "${CMAKE_CURRENT_BINARY_DIR}/include/aws/core/SDKConfig.h" @ONLY) + +list(APPEND AWS_PUBLIC_COMPILE_DEFS "-DAWS_SDK_VERSION_MAJOR=1") +list(APPEND AWS_PUBLIC_COMPILE_DEFS "-DAWS_SDK_VERSION_MINOR=10") +list(APPEND AWS_PUBLIC_COMPILE_DEFS "-DAWS_SDK_VERSION_PATCH=36") + +list(APPEND AWS_SOURCES ${AWS_SDK_CORE_SRC} ${AWS_SDK_CORE_NET_SRC} ${AWS_SDK_CORE_PLATFORM_SRC}) + +list(APPEND AWS_PUBLIC_INCLUDES + "${AWS_SDK_CORE_DIR}/include/" + "${CMAKE_CURRENT_BINARY_DIR}/include" +) + + +# aws-cpp-sdk-s3 +file(GLOB AWS_SDK_S3_SRC + "${AWS_SDK_S3_DIR}/source/*.cpp" + "${AWS_SDK_S3_DIR}/source/model/*.cpp" +) + +list(APPEND AWS_SOURCES ${AWS_SDK_S3_SRC}) +list(APPEND AWS_PUBLIC_INCLUDES "${AWS_SDK_S3_DIR}/include/") + + +# aws-c-auth +file(GLOB AWS_AUTH_SRC + "${AWS_AUTH_DIR}/source/*.c" +) + +list(APPEND AWS_SOURCES ${AWS_AUTH_SRC}) +list(APPEND AWS_PUBLIC_INCLUDES "${AWS_AUTH_DIR}/include/") + + +# aws-c-cal +file(GLOB AWS_CAL_SRC + "${AWS_CAL_DIR}/source/*.c" +) + +if (ENABLE_OPENSSL_ENCRYPTION) + file(GLOB AWS_CAL_OS_SRC + "${AWS_CAL_DIR}/source/unix/*.c" + ) + list(APPEND AWS_PRIVATE_LIBS OpenSSL::Crypto) +endif() + +list(APPEND AWS_SOURCES ${AWS_CAL_SRC} ${AWS_CAL_OS_SRC}) +list(APPEND AWS_PRIVATE_INCLUDES "${AWS_CAL_DIR}/include/") + + +# aws-c-event-stream +file(GLOB AWS_EVENT_STREAM_SRC + "${AWS_EVENT_STREAM_DIR}/source/*.c" +) + +list(APPEND AWS_SOURCES ${AWS_EVENT_STREAM_SRC}) +list(APPEND AWS_PRIVATE_INCLUDES "${AWS_EVENT_STREAM_DIR}/include/") + + +# aws-c-common +file(GLOB AWS_COMMON_SRC + "${AWS_COMMON_DIR}/source/*.c" + "${AWS_COMMON_DIR}/source/external/*.c" + "${AWS_COMMON_DIR}/source/posix/*.c" +) + +file(GLOB AWS_COMMON_ARCH_SRC + "${AWS_COMMON_DIR}/source/arch/generic/*.c" +) + +if (AWS_ARCH_INTEL) + file(GLOB AWS_COMMON_ARCH_SRC + "${AWS_COMMON_DIR}/source/arch/intel/cpuid.c" + "${AWS_COMMON_DIR}/source/arch/intel/asm/*.c" + ) +elseif (AWS_ARCH_ARM64 OR AWS_ARCH_ARM32) + if (AWS_HAVE_AUXV) + file(GLOB AWS_COMMON_ARCH_SRC + "${AWS_COMMON_DIR}/source/arch/arm/asm/*.c" + ) + endif() +endif() + +set(AWS_COMMON_AVX2_SRC) +if (HAVE_AVX2_INTRINSICS) + list(APPEND AWS_PRIVATE_COMPILE_DEFS "-DUSE_SIMD_ENCODING") + set(AWS_COMMON_AVX2_SRC "${AWS_COMMON_DIR}/source/arch/intel/encoding_avx2.c") + set_source_files_properties(${AWS_COMMON_AVX2_SRC} PROPERTIES COMPILE_FLAGS "${AVX2_CFLAGS}") +endif() + +configure_file("${AWS_COMMON_DIR}/include/aws/common/config.h.in" + "${CMAKE_CURRENT_BINARY_DIR}/include/aws/common/config.h" @ONLY) + +list(APPEND AWS_SOURCES ${AWS_COMMON_SRC} ${AWS_COMMON_ARCH_SRC} ${AWS_COMMON_AVX2_SRC}) + +list(APPEND AWS_PUBLIC_INCLUDES + "${AWS_COMMON_DIR}/include/" + "${CMAKE_CURRENT_BINARY_DIR}/include" +) + + +# aws-checksums +file(GLOB AWS_CHECKSUMS_SRC + "${AWS_CHECKSUMS_DIR}/source/*.c" + "${AWS_CHECKSUMS_DIR}/source/intel/*.c" + "${AWS_CHECKSUMS_DIR}/source/intel/asm/*.c" + "${AWS_CHECKSUMS_DIR}/source/arm/*.c" +) + +if(AWS_ARCH_INTEL AND AWS_HAVE_GCC_INLINE_ASM) + file(GLOB AWS_CHECKSUMS_ARCH_SRC + "${AWS_CHECKSUMS_DIR}/source/intel/asm/*.c" + ) +endif() + +if (AWS_ARCH_ARM64) + file(GLOB AWS_CHECKSUMS_ARCH_SRC + "${AWS_CHECKSUMS_DIR}/source/arm/*.c" + ) + set_source_files_properties("${AWS_CHECKSUMS_DIR}/source/arm/crc32c_arm.c" PROPERTIES COMPILE_FLAGS -march=armv8-a+crc) +elseif (AWS_ARCH_ARM32) + if (AWS_ARM32_CRC) + file(GLOB AWS_CHECKSUMS_ARCH_SRC + "${AWS_CHECKSUMS_DIR}/source/arm/*.c" + "${AWS_CHECKSUMS_DIR}/source/arm/asm/*.c" + ) + set_source_files_properties(source/arm/crc32c_arm.c PROPERTIES COMPILE_FLAGS -march=armv8-a+crc) + endif() +endif() + +list(APPEND AWS_SOURCES ${AWS_CHECKSUMS_SRC} ${AWS_CHECKSUMS_ARCH_SRC}) +list(APPEND AWS_PRIVATE_INCLUDES "${AWS_CHECKSUMS_DIR}/include/") + + +# aws-c-io +file(GLOB AWS_IO_SRC + "${AWS_IO_DIR}/source/*.c" +) + +if (OS_LINUX) + file(GLOB AWS_IO_OS_SRC + "${AWS_IO_DIR}/source/linux/*.c" + "${AWS_IO_DIR}/source/posix/*.c" + ) +elseif (OS_DARWIN) + file(GLOB AWS_IO_OS_SRC + "${AWS_IO_DIR}/source/bsd/*.c" + "${AWS_IO_DIR}/source/posix/*.c" + ) +endif() + +set(AWS_IO_TLS_SRC) +if (USE_S2N) + file(GLOB AWS_IO_TLS_SRC + "${AWS_IO_DIR}/source/s2n/*.c" + ) +endif() + +list(APPEND AWS_SOURCES ${AWS_IO_SRC} ${AWS_IO_OS_SRC} ${AWS_IO_TLS_SRC}) +list(APPEND AWS_PUBLIC_INCLUDES "${AWS_IO_DIR}/include/") + + +# aws-s2n-tls +if (USE_S2N) + file(GLOB AWS_S2N_TLS_SRC + "${AWS_S2N_TLS_DIR}/crypto/*.c" + "${AWS_S2N_TLS_DIR}/error/*.c" + "${AWS_S2N_TLS_DIR}/stuffer/*.c" + "${AWS_S2N_TLS_DIR}/pq-crypto/*.c" + "${AWS_S2N_TLS_DIR}/pq-crypto/kyber_r3/*.c" + "${AWS_S2N_TLS_DIR}/tls/*.c" + "${AWS_S2N_TLS_DIR}/tls/extensions/*.c" + "${AWS_S2N_TLS_DIR}/utils/*.c" + ) + + list(APPEND AWS_SOURCES ${AWS_S2N_TLS_SRC}) + + list(APPEND AWS_PRIVATE_INCLUDES + "${AWS_S2N_TLS_DIR}/" + "${AWS_S2N_TLS_DIR}/api/" + ) +endif() + + +# aws-crt-cpp +file(GLOB AWS_CRT_SRC + "${AWS_CRT_DIR}/source/*.cpp" + "${AWS_CRT_DIR}/source/auth/*.cpp" + "${AWS_CRT_DIR}/source/crypto/*.cpp" + "${AWS_CRT_DIR}/source/endpoints/*.cpp" + "${AWS_CRT_DIR}/source/external/*.cpp" + "${AWS_CRT_DIR}/source/http/*.cpp" + "${AWS_CRT_DIR}/source/io/*.cpp" +) + +list(APPEND AWS_SOURCES ${AWS_CRT_SRC}) +list(APPEND AWS_PUBLIC_INCLUDES "${AWS_CRT_DIR}/include/") + + +# aws-c-mqtt +file(GLOB AWS_MQTT_SRC + "${AWS_MQTT_DIR}/source/*.c" +) + +list(APPEND AWS_SOURCES ${AWS_MQTT_SRC}) +list(APPEND AWS_PUBLIC_INCLUDES "${AWS_MQTT_DIR}/include/") + + +# aws-c-http +file(GLOB AWS_HTTP_SRC + "${AWS_HTTP_DIR}/source/*.c" +) + +list(APPEND AWS_SOURCES ${AWS_HTTP_SRC}) +list(APPEND AWS_PRIVATE_INCLUDES "${AWS_HTTP_DIR}/include/") + + +# aws-c-compression +file(GLOB AWS_COMPRESSION_SRC + "${AWS_COMPRESSION_DIR}/source/*.c" +) + +list(APPEND AWS_SOURCES ${AWS_COMPRESSION_SRC}) +list(APPEND AWS_PRIVATE_INCLUDES "${AWS_COMPRESSION_DIR}/include/") + + +# aws-c-s3 +file(GLOB AWS_S3_SRC + "${AWS_S3_DIR}/source/*.c" +) + +list(APPEND AWS_SOURCES ${AWS_S3_SRC}) +list(APPEND AWS_PRIVATE_INCLUDES "${AWS_S3_DIR}/include/") + + +# aws-c-sdkutils +file(GLOB AWS_SDKUTILS_SRC + "${AWS_SDKUTILS_DIR}/source/*.c" +) + +list(APPEND AWS_SOURCES ${AWS_SDKUTILS_SRC}) +list(APPEND AWS_PUBLIC_INCLUDES "${AWS_SDKUTILS_DIR}/include/") + + +# Add library. +add_library(_aws ${AWS_SOURCES}) + +target_include_directories(_aws SYSTEM BEFORE PUBLIC ${AWS_PUBLIC_INCLUDES}) +target_include_directories(_aws SYSTEM BEFORE PRIVATE ${AWS_PRIVATE_INCLUDES}) +target_compile_definitions(_aws PUBLIC ${AWS_PUBLIC_COMPILE_DEFS}) +target_compile_definitions(_aws PRIVATE ${AWS_PRIVATE_COMPILE_DEFS}) +target_link_libraries(_aws PRIVATE ${AWS_PRIVATE_LIBS}) + +aws_set_thread_affinity_method(_aws) +aws_set_thread_name_method(_aws) + +# The library is large - avoid bloat. +if (OMIT_HEAVY_DEBUG_SYMBOLS) + target_compile_options (_aws PRIVATE -g0) +endif() + +add_library(ch_contrib::aws_s3 ALIAS _aws) diff --git a/contrib/aws-crt-cpp b/contrib/aws-crt-cpp new file mode 160000 index 00000000000..ec0bea288f4 --- /dev/null +++ b/contrib/aws-crt-cpp @@ -0,0 +1 @@ +Subproject commit ec0bea288f451d884c0d80d534bc5c66241c39a4 diff --git a/contrib/aws-s2n-tls b/contrib/aws-s2n-tls new file mode 160000 index 00000000000..15d534e8a9c --- /dev/null +++ b/contrib/aws-s2n-tls @@ -0,0 +1 @@ +Subproject commit 15d534e8a9ca1eda6bacee514e37d08b4f38a526 diff --git a/contrib/aws-s3-cmake/CMakeLists.txt b/contrib/aws-s3-cmake/CMakeLists.txt deleted file mode 100644 index eabed601722..00000000000 --- a/contrib/aws-s3-cmake/CMakeLists.txt +++ /dev/null @@ -1,122 +0,0 @@ -if(NOT OS_FREEBSD) - option(ENABLE_S3 "Enable S3" ${ENABLE_LIBRARIES}) -elseif(ENABLE_S3) - message (${RECONFIGURE_MESSAGE_LEVEL} "Can't use S3 on FreeBSD") -endif() - -if(NOT ENABLE_S3) - message(STATUS "Not using S3") - return() -endif() - -SET(AWS_S3_LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/aws/aws-cpp-sdk-s3") -SET(AWS_CORE_LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/aws/aws-cpp-sdk-core") -SET(AWS_CHECKSUMS_LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/aws-checksums") -SET(AWS_COMMON_LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/aws-c-common") -SET(AWS_EVENT_STREAM_LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/aws-c-event-stream") - -OPTION(USE_AWS_MEMORY_MANAGEMENT "Aws memory management" OFF) -configure_file("${AWS_CORE_LIBRARY_DIR}/include/aws/core/SDKConfig.h.in" - "${CMAKE_CURRENT_BINARY_DIR}/include/aws/core/SDKConfig.h" @ONLY) - -configure_file("${AWS_COMMON_LIBRARY_DIR}/include/aws/common/config.h.in" - "${CMAKE_CURRENT_BINARY_DIR}/include/aws/common/config.h" @ONLY) - - -file(GLOB AWS_CORE_SOURCES - "${AWS_CORE_LIBRARY_DIR}/source/*.cpp" - "${AWS_CORE_LIBRARY_DIR}/source/auth/*.cpp" - "${AWS_CORE_LIBRARY_DIR}/source/client/*.cpp" - "${AWS_CORE_LIBRARY_DIR}/source/http/*.cpp" - "${AWS_CORE_LIBRARY_DIR}/source/http/standard/*.cpp" - "${AWS_CORE_LIBRARY_DIR}/source/config/*.cpp" - "${AWS_CORE_LIBRARY_DIR}/source/external/cjson/*.cpp" - "${AWS_CORE_LIBRARY_DIR}/source/external/tinyxml2/*.cpp" - "${AWS_CORE_LIBRARY_DIR}/source/internal/*.cpp" - "${AWS_CORE_LIBRARY_DIR}/source/monitoring/*.cpp" - "${AWS_CORE_LIBRARY_DIR}/source/net/*.cpp" - "${AWS_CORE_LIBRARY_DIR}/source/linux-shared/*.cpp" - "${AWS_CORE_LIBRARY_DIR}/source/platform/linux-shared/*.cpp" - "${AWS_CORE_LIBRARY_DIR}/source/utils/*.cpp" - "${AWS_CORE_LIBRARY_DIR}/source/utils/base64/*.cpp" - "${AWS_CORE_LIBRARY_DIR}/source/utils/event/*.cpp" - "${AWS_CORE_LIBRARY_DIR}/source/utils/crypto/*.cpp" - "${AWS_CORE_LIBRARY_DIR}/source/utils/crypto/openssl/*.cpp" - "${AWS_CORE_LIBRARY_DIR}/source/utils/crypto/factory/*.cpp" - "${AWS_CORE_LIBRARY_DIR}/source/utils/json/*.cpp" - "${AWS_CORE_LIBRARY_DIR}/source/utils/logging/*.cpp" - "${AWS_CORE_LIBRARY_DIR}/source/utils/memory/*.cpp" - "${AWS_CORE_LIBRARY_DIR}/source/utils/memory/stl/*.cpp" - "${AWS_CORE_LIBRARY_DIR}/source/utils/stream/*.cpp" - "${AWS_CORE_LIBRARY_DIR}/source/utils/threading/*.cpp" - "${AWS_CORE_LIBRARY_DIR}/source/utils/xml/*.cpp" -) - -file(GLOB AWS_S3_SOURCES - "${AWS_S3_LIBRARY_DIR}/source/*.cpp" -) - -file(GLOB AWS_S3_MODEL_SOURCES - "${AWS_S3_LIBRARY_DIR}/source/model/*.cpp" -) - -file(GLOB AWS_EVENT_STREAM_SOURCES - "${AWS_EVENT_STREAM_LIBRARY_DIR}/source/*.c" -) - -file(GLOB AWS_COMMON_SOURCES - "${AWS_COMMON_LIBRARY_DIR}/source/*.c" - "${AWS_COMMON_LIBRARY_DIR}/source/posix/*.c" -) - -file(GLOB AWS_CHECKSUMS_SOURCES - "${AWS_CHECKSUMS_LIBRARY_DIR}/source/*.c" - "${AWS_CHECKSUMS_LIBRARY_DIR}/source/intel/*.c" - "${AWS_CHECKSUMS_LIBRARY_DIR}/source/arm/*.c" -) - -file(GLOB S3_UNIFIED_SRC - ${AWS_EVENT_STREAM_SOURCES} - ${AWS_COMMON_SOURCES} - ${AWS_S3_SOURCES} - ${AWS_S3_MODEL_SOURCES} - ${AWS_CORE_SOURCES} -) - -set(S3_INCLUDES - "${AWS_COMMON_LIBRARY_DIR}/include/" - "${AWS_EVENT_STREAM_LIBRARY_DIR}/include/" - "${AWS_S3_LIBRARY_DIR}/include/" - "${AWS_CORE_LIBRARY_DIR}/include/" - "${CMAKE_CURRENT_BINARY_DIR}/include/" -) - -add_library(_aws_s3_checksums ${AWS_CHECKSUMS_SOURCES}) -target_include_directories(_aws_s3_checksums SYSTEM PUBLIC "${AWS_CHECKSUMS_LIBRARY_DIR}/include/") -if(CMAKE_BUILD_TYPE_UC STREQUAL "DEBUG") - target_compile_definitions(_aws_s3_checksums PRIVATE "-DDEBUG_BUILD") -endif() -set_target_properties(_aws_s3_checksums PROPERTIES LINKER_LANGUAGE C) -set_property(TARGET _aws_s3_checksums PROPERTY C_STANDARD 99) - -add_library(_aws_s3 ${S3_UNIFIED_SRC}) - -target_compile_definitions(_aws_s3 PUBLIC "AWS_SDK_VERSION_MAJOR=1") -target_compile_definitions(_aws_s3 PUBLIC "AWS_SDK_VERSION_MINOR=7") -target_compile_definitions(_aws_s3 PUBLIC "AWS_SDK_VERSION_PATCH=231") -target_include_directories(_aws_s3 SYSTEM BEFORE PUBLIC ${S3_INCLUDES}) - -if (TARGET OpenSSL::SSL) - target_compile_definitions(_aws_s3 PUBLIC -DENABLE_OPENSSL_ENCRYPTION) - target_link_libraries(_aws_s3 PRIVATE OpenSSL::Crypto OpenSSL::SSL) -endif() - -target_link_libraries(_aws_s3 PRIVATE _aws_s3_checksums) - -# The library is large - avoid bloat. -if (OMIT_HEAVY_DEBUG_SYMBOLS) - target_compile_options (_aws_s3 PRIVATE -g0) - target_compile_options (_aws_s3_checksums PRIVATE -g0) -endif() - -add_library(ch_contrib::aws_s3 ALIAS _aws_s3) diff --git a/contrib/boringssl-cmake/CMakeLists.txt b/contrib/boringssl-cmake/CMakeLists.txt index 9a1f667f52e..828919476a7 100644 --- a/contrib/boringssl-cmake/CMakeLists.txt +++ b/contrib/boringssl-cmake/CMakeLists.txt @@ -139,13 +139,6 @@ if(NOT OPENSSL_NO_ASM) endif() endif() -if(BUILD_SHARED_LIBS) - add_definitions(-DBORINGSSL_SHARED_LIBRARY) - # Enable position-independent code globally. This is needed because - # some library targets are OBJECT libraries. - set(CMAKE_POSITION_INDEPENDENT_CODE TRUE) -endif() - set( CRYPTO_ios_aarch64_SOURCES diff --git a/contrib/c-ares-cmake/CMakeLists.txt b/contrib/c-ares-cmake/CMakeLists.txt index 4b1170f9dd1..63e145bec18 100644 --- a/contrib/c-ares-cmake/CMakeLists.txt +++ b/contrib/c-ares-cmake/CMakeLists.txt @@ -63,13 +63,8 @@ SET(SRCS "${LIBRARY_DIR}/src/lib/windows_port.c" ) -if (USE_STATIC_LIBRARIES) - add_library(_c-ares STATIC ${SRCS}) - target_compile_definitions(_c-ares PUBLIC CARES_STATICLIB) -else() - add_library(_c-ares SHARED ${SRCS}) - target_compile_definitions(_c-ares PUBLIC CARES_BUILDING_LIBRARY) -endif() +add_library(_c-ares STATIC ${SRCS}) +target_compile_definitions(_c-ares PUBLIC CARES_STATICLIB) target_compile_definitions(_c-ares PRIVATE HAVE_CONFIG_H=1) diff --git a/contrib/cctz b/contrib/cctz index 5c8528fb35e..7c78edd52b4 160000 --- a/contrib/cctz +++ b/contrib/cctz @@ -1 +1 @@ -Subproject commit 5c8528fb35e89ee0b3a7157490423fba0d4dd7b5 +Subproject commit 7c78edd52b4d65acc103c2f195818ffcabe6fe0d diff --git a/contrib/libunwind-cmake/CMakeLists.txt b/contrib/libunwind-cmake/CMakeLists.txt index 155853a0bca..733f99d07f5 100644 --- a/contrib/libunwind-cmake/CMakeLists.txt +++ b/contrib/libunwind-cmake/CMakeLists.txt @@ -43,7 +43,10 @@ set_target_properties(unwind PROPERTIES FOLDER "contrib/libunwind-cmake") target_include_directories(unwind SYSTEM BEFORE PUBLIC $) target_compile_definitions(unwind PRIVATE -D_LIBUNWIND_NO_HEAP=1 -D_DEBUG -D_LIBUNWIND_IS_NATIVE_ONLY) -target_compile_options(unwind PRIVATE -fno-exceptions -funwind-tables -fno-sanitize=all $<$:-nostdinc++ -fno-rtti>) + +# We should enable optimizations (otherwise it will be too slow in debug) +# and disable sanitizers (otherwise infinite loop may happen) +target_compile_options(unwind PRIVATE -O3 -fno-exceptions -funwind-tables -fno-sanitize=all $<$:-nostdinc++ -fno-rtti>) check_c_compiler_flag(-Wunused-but-set-variable HAVE_WARNING_UNUSED_BUT_SET_VARIABLE) if (HAVE_WARNING_UNUSED_BUT_SET_VARIABLE) diff --git a/contrib/libuv-cmake/CMakeLists.txt b/contrib/libuv-cmake/CMakeLists.txt index ba6bc746c59..7ca2cef2251 100644 --- a/contrib/libuv-cmake/CMakeLists.txt +++ b/contrib/libuv-cmake/CMakeLists.txt @@ -136,11 +136,6 @@ add_library(ch_contrib::uv ALIAS _uv) target_compile_definitions(_uv PRIVATE ${uv_defines}) target_include_directories(_uv SYSTEM PUBLIC ${SOURCE_DIR}/include PRIVATE ${SOURCE_DIR}/src) target_link_libraries(_uv ${uv_libraries}) -if (NOT USE_STATIC_LIBRARIES) - target_compile_definitions(_uv - INTERFACE USING_UV_SHARED=1 - PRIVATE BUILDING_UV_SHARED=1) -endif() if(UNIX) # Now for some gibbering horrors from beyond the stars... diff --git a/contrib/llvm-project-cmake/CMakeLists.txt b/contrib/llvm-project-cmake/CMakeLists.txt index 7af4a23bc9d..8759c16ac3e 100644 --- a/contrib/llvm-project-cmake/CMakeLists.txt +++ b/contrib/llvm-project-cmake/CMakeLists.txt @@ -6,8 +6,6 @@ endif() option (ENABLE_EMBEDDED_COMPILER "Enable support for 'compile_expressions' option for query execution" ${ENABLE_EMBEDDED_COMPILER_DEFAULT}) -# If USE_STATIC_LIBRARIES=0 was passed to CMake, we'll still build LLVM statically to keep complexity minimal. - if (NOT ENABLE_EMBEDDED_COMPILER) message(STATUS "Not using LLVM") return() diff --git a/contrib/sentry-native-cmake/CMakeLists.txt b/contrib/sentry-native-cmake/CMakeLists.txt index 520fa176b91..377f955f856 100644 --- a/contrib/sentry-native-cmake/CMakeLists.txt +++ b/contrib/sentry-native-cmake/CMakeLists.txt @@ -1,4 +1,4 @@ -if (NOT OS_FREEBSD AND NOT SPLIT_SHARED_LIBRARIES AND NOT (OS_DARWIN AND COMPILER_CLANG)) +if (NOT OS_FREEBSD AND NOT (OS_DARWIN AND COMPILER_CLANG)) option (ENABLE_SENTRY "Enable Sentry" ${ENABLE_LIBRARIES}) else() option (ENABLE_SENTRY "Enable Sentry" OFF) @@ -51,11 +51,7 @@ endif() add_library(_sentry ${SRCS}) -if(BUILD_SHARED_LIBS) - target_compile_definitions(_sentry PRIVATE SENTRY_BUILD_SHARED) -else() - target_compile_definitions(_sentry PUBLIC SENTRY_BUILD_STATIC) -endif() +target_compile_definitions(_sentry PUBLIC SENTRY_BUILD_STATIC) target_link_libraries(_sentry PRIVATE ch_contrib::curl pthread) target_include_directories(_sentry PUBLIC "${SRC_DIR}/include" PRIVATE "${SRC_DIR}/src") diff --git a/contrib/sysroot b/contrib/sysroot index e9fb375d0a1..f0081b2649b 160000 --- a/contrib/sysroot +++ b/contrib/sysroot @@ -1 +1 @@ -Subproject commit e9fb375d0a1e5ebfd74c043f088f2342552103f8 +Subproject commit f0081b2649b94837855f3bc7d05ef326b100bad8 diff --git a/docker/images.json b/docker/images.json index 8339205b52f..bc8e9924955 100644 --- a/docker/images.json +++ b/docker/images.json @@ -2,7 +2,6 @@ "docker/packager/binary": { "name": "clickhouse/binary-builder", "dependent": [ - "docker/test/split_build_smoke_test", "docker/test/codebrowser" ] }, @@ -55,10 +54,6 @@ "name": "clickhouse/stress-test", "dependent": [] }, - "docker/test/split_build_smoke_test": { - "name": "clickhouse/split-build-smoke-test", - "dependent": [] - }, "docker/test/codebrowser": { "name": "clickhouse/codebrowser", "dependent": [] diff --git a/docker/packager/binary/build.sh b/docker/packager/binary/build.sh index 436fcbe921c..912366fb4ff 100755 --- a/docker/packager/binary/build.sh +++ b/docker/packager/binary/build.sh @@ -107,8 +107,6 @@ fi mv ./programs/clickhouse* /output [ -x ./programs/self-extracting/clickhouse ] && mv ./programs/self-extracting/clickhouse /output mv ./src/unit_tests_dbms /output ||: # may not exist for some binary builds -find . -name '*.so' -print -exec mv '{}' /output \; -find . -name '*.so.*' -print -exec mv '{}' /output \; prepare_combined_output () { local OUTPUT @@ -165,7 +163,7 @@ then ) fi -# May be set for split build or for performance test. +# May be set for performance test. if [ "" != "$COMBINED_OUTPUT" ] then prepare_combined_output /output diff --git a/docker/packager/packager b/docker/packager/packager index 716071fcac6..e097c003cf9 100755 --- a/docker/packager/packager +++ b/docker/packager/packager @@ -100,12 +100,11 @@ def run_docker_image_with_env( subprocess.check_call(cmd, shell=True) -def is_release_build(build_type, package_type, sanitizer, shared_libraries): +def is_release_build(build_type, package_type, sanitizer): return ( build_type == "" and package_type == "deb" and sanitizer == "" - and not shared_libraries ) @@ -116,7 +115,6 @@ def parse_env_variables( package_type, cache, distcc_hosts, - shared_libraries, clang_tidy, version, author, @@ -218,7 +216,7 @@ def parse_env_variables( cmake_flags.append("-DCMAKE_INSTALL_PREFIX=/usr") cmake_flags.append("-DCMAKE_INSTALL_SYSCONFDIR=/etc") cmake_flags.append("-DCMAKE_INSTALL_LOCALSTATEDIR=/var") - if is_release_build(build_type, package_type, sanitizer, shared_libraries): + if is_release_build(build_type, package_type, sanitizer): cmake_flags.append("-DSPLIT_DEBUG_SYMBOLS=ON") result.append("WITH_PERFORMANCE=1") if is_cross_arm: @@ -231,12 +229,10 @@ def parse_env_variables( cmake_flags.append(f"-DCMAKE_C_COMPILER={cc}") cmake_flags.append(f"-DCMAKE_CXX_COMPILER={cxx}") - # Create combined output archive for shared library build and for performance tests. + # Create combined output archive for performance tests. if package_type == "coverity": result.append("COMBINED_OUTPUT=coverity") result.append('COVERITY_TOKEN="$COVERITY_TOKEN"') - elif shared_libraries: - result.append("COMBINED_OUTPUT=shared_build") if sanitizer: result.append(f"SANITIZER={sanitizer}") @@ -285,15 +281,6 @@ def parse_env_variables( result.append("BINARY_OUTPUT=tests") cmake_flags.append("-DENABLE_TESTS=1") - if shared_libraries: - cmake_flags.append("-DUSE_STATIC_LIBRARIES=0 -DSPLIT_SHARED_LIBRARIES=1") - # We can't always build utils because it requires too much space, but - # we have to build them at least in some way in CI. The shared library - # build is probably the least heavy disk-wise. - cmake_flags.append("-DENABLE_UTILS=1") - # utils are not included into clickhouse-bundle, so build everything - build_target = "all" - if clang_tidy: cmake_flags.append("-DENABLE_CLANG_TIDY=1") cmake_flags.append("-DENABLE_TESTS=1") @@ -371,7 +358,6 @@ if __name__ == "__main__": default="", ) - parser.add_argument("--shared-libraries", action="store_true") parser.add_argument("--clang-tidy", action="store_true") parser.add_argument("--cache", choices=("ccache", "distcc", ""), default="") parser.add_argument( @@ -424,7 +410,6 @@ if __name__ == "__main__": args.package_type, args.cache, args.distcc_hosts, - args.shared_libraries, args.clang_tidy, args.version, args.author, diff --git a/docker/server/entrypoint.sh b/docker/server/entrypoint.sh index 52f4f67281e..dfd8f2e3e54 100755 --- a/docker/server/entrypoint.sh +++ b/docker/server/entrypoint.sh @@ -80,7 +80,7 @@ do done # if clickhouse user is defined - create it (user "default" already exists out of box) -if [ -n "$CLICKHOUSE_USER" ] && [ "$CLICKHOUSE_USER" != "default" ] || [ -n "$CLICKHOUSE_PASSWORD" ]; then +if [ -n "$CLICKHOUSE_USER" ] && [ "$CLICKHOUSE_USER" != "default" ] || [ -n "$CLICKHOUSE_PASSWORD" ] || [ "$CLICKHOUSE_ACCESS_MANAGEMENT" != "0" ]; then echo "$0: create new user '$CLICKHOUSE_USER' instead 'default'" cat < /etc/clickhouse-server/users.d/default-user.xml @@ -120,8 +120,8 @@ if [ -n "$(ls /docker-entrypoint-initdb.d/)" ] || [ -n "$CLICKHOUSE_DB" ]; then pid="$!" # check if clickhouse is ready to accept connections - # will try to send ping clickhouse via http_port (max 12 retries by default, with 1 sec timeout and 1 sec delay between retries) - tries=${CLICKHOUSE_INIT_TIMEOUT:-12} + # will try to send ping clickhouse via http_port (max 1000 retries by default, with 1 sec timeout and 1 sec delay between retries) + tries=${CLICKHOUSE_INIT_TIMEOUT:-1000} while ! wget --spider --no-check-certificate -T 1 -q "$URL" 2>/dev/null; do if [ "$tries" -le "0" ]; then echo >&2 'ClickHouse init process failed.' diff --git a/docker/test/fuzzer/query-fuzzer-tweaks-users.xml b/docker/test/fuzzer/query-fuzzer-tweaks-users.xml index 2f09573f942..1771a03f8a9 100644 --- a/docker/test/fuzzer/query-fuzzer-tweaks-users.xml +++ b/docker/test/fuzzer/query-fuzzer-tweaks-users.xml @@ -2,6 +2,7 @@ 10 + + + + diff --git a/docker/test/fuzzer/run-fuzzer.sh b/docker/test/fuzzer/run-fuzzer.sh index bd539ca978b..164f2e28d76 100755 --- a/docker/test/fuzzer/run-fuzzer.sh +++ b/docker/test/fuzzer/run-fuzzer.sh @@ -51,7 +51,6 @@ function clone ) ls -lath ||: - } function wget_with_retry @@ -75,6 +74,7 @@ function download ./clickhouse ||: ln -s ./clickhouse ./clickhouse-server ln -s ./clickhouse ./clickhouse-client + ln -s ./clickhouse ./clickhouse-local # clickhouse-server is in the current dir export PATH="$PWD:$PATH" @@ -91,6 +91,12 @@ function configure cp -av --dereference "$script_dir"/query-fuzzer-tweaks-users.xml db/users.d cp -av --dereference "$script_dir"/allow-nullable-key.xml db/config.d + cat > db/config.d/max_server_memory_usage_to_ram_ratio.xml < + 0.75 + +EOL + cat > db/config.d/core.xml < @@ -151,7 +157,7 @@ function fuzz mkdir -p /var/run/clickhouse-server # NOTE: we use process substitution here to preserve keep $! as a pid of clickhouse-server - clickhouse-server --config-file db/config.xml --pid-file /var/run/clickhouse-server/clickhouse-server.pid -- --path db 2>&1 | pigz > server.log.gz & + clickhouse-server --config-file db/config.xml --pid-file /var/run/clickhouse-server/clickhouse-server.pid -- --path db > server.log 2>&1 & server_pid=$! kill -0 $server_pid @@ -256,12 +262,21 @@ quit if [ "$server_died" == 1 ] then # The server has died. - task_exit_code=210 - echo "failure" > status.txt - if ! zgrep --text -ao "Received signal.*\|Logical error.*\|Assertion.*failed\|Failed assertion.*\|.*runtime error: .*\|.*is located.*\|SUMMARY: AddressSanitizer:.*\|SUMMARY: MemorySanitizer:.*\|SUMMARY: ThreadSanitizer:.*\|.*_LIBCPP_ASSERT.*" server.log.gz > description.txt + if ! grep --text -ao "Received signal.*\|Logical error.*\|Assertion.*failed\|Failed assertion.*\|.*runtime error: .*\|.*is located.*\|SUMMARY: AddressSanitizer:.*\|SUMMARY: MemorySanitizer:.*\|SUMMARY: ThreadSanitizer:.*\|.*_LIBCPP_ASSERT.*" server.log > description.txt then echo "Lost connection to server. See the logs." > description.txt fi + + if grep -E --text 'Sanitizer: (out-of-memory|failed to allocate)' description.txt + then + # OOM of sanitizer is not a problem we can handle - treat it as success, but preserve the description. + task_exit_code=0 + echo "success" > status.txt + else + task_exit_code=210 + echo "failure" > status.txt + fi + elif [ "$fuzzer_exit_code" == "143" ] || [ "$fuzzer_exit_code" == "0" ] then # Variants of a normal run: @@ -327,24 +342,28 @@ case "$stage" in time fuzz ;& "report") + CORE_LINK='' if [ -f core.gz ]; then CORE_LINK='core.gz' fi + +grep --text -F '' server.log > fatal.log ||: + +pigz server.log + cat > report.html < AST Fuzzer for PR #${PR_TO_TEST} @ ${SHA_TO_TEST} @@ -352,17 +371,32 @@ th { cursor: pointer; }
-

AST Fuzzer for PR #${PR_TO_TEST} @ ${SHA_TO_TEST}

+

AST Fuzzer for PR #${PR_TO_TEST} @ ${SHA_TO_TEST}

- - + + + + + + + + + + + + +
Test nameTest statusDescription
AST Fuzzer$(cat status.txt)$(cat description.txt)
Test nameTest statusDescription
AST Fuzzer$(cat status.txt)$( + clickhouse-local --input-format RawBLOB --output-format RawBLOB --query "SELECT encodeXMLComponent(*) FROM table" < description.txt || cat description.txt + )
$( + clickhouse-local --input-format RawBLOB --output-format RawBLOB --query "SELECT encodeXMLComponent(*) FROM table" < fatal.log || cat fatal.log + )
diff --git a/docker/test/integration/runner/Dockerfile b/docker/test/integration/runner/Dockerfile index 84c04dd03ec..ccfd63c8ed0 100644 --- a/docker/test/integration/runner/Dockerfile +++ b/docker/test/integration/runner/Dockerfile @@ -83,6 +83,7 @@ RUN python3 -m pip install \ pytest \ pytest-order==1.0.0 \ pytest-timeout \ + pytest-random \ pytest-xdist \ pytest-repeat \ pytz \ diff --git a/docker/test/performance-comparison/perf.py b/docker/test/performance-comparison/perf.py index cb23372d31f..76e4c5ad8c1 100755 --- a/docker/test/performance-comparison/perf.py +++ b/docker/test/performance-comparison/perf.py @@ -297,6 +297,7 @@ if not args.use_existing_tables: # Let's sync the data to avoid writeback affects performance os.system("sync") +reportStageEnd("sync") # By default, test all queries. queries_to_run = range(0, len(test_queries)) diff --git a/docker/test/split_build_smoke_test/Dockerfile b/docker/test/split_build_smoke_test/Dockerfile deleted file mode 100644 index 5f84eb42216..00000000000 --- a/docker/test/split_build_smoke_test/Dockerfile +++ /dev/null @@ -1,9 +0,0 @@ -# rebuild in #33610 -# docker build -t clickhouse/split-build-smoke-test . -ARG FROM_TAG=latest -FROM clickhouse/binary-builder:$FROM_TAG - -COPY run.sh /run.sh -COPY process_split_build_smoke_test_result.py / - -CMD /run.sh diff --git a/docker/test/split_build_smoke_test/process_split_build_smoke_test_result.py b/docker/test/split_build_smoke_test/process_split_build_smoke_test_result.py deleted file mode 100755 index b5bc82e6818..00000000000 --- a/docker/test/split_build_smoke_test/process_split_build_smoke_test_result.py +++ /dev/null @@ -1,64 +0,0 @@ -#!/usr/bin/env python3 - -import os -import logging -import argparse -import csv - -RESULT_LOG_NAME = "run.log" - - -def process_result(result_folder): - - status = "success" - description = "Server started and responded" - summary = [("Smoke test", "OK")] - with open(os.path.join(result_folder, RESULT_LOG_NAME), "r") as run_log: - lines = run_log.read().split("\n") - if not lines or lines[0].strip() != "OK": - status = "failure" - logging.info("Lines is not ok: %s", str("\n".join(lines))) - summary = [("Smoke test", "FAIL")] - description = "Server failed to respond, see result in logs" - - result_logs = [] - server_log_path = os.path.join(result_folder, "clickhouse-server.log") - stderr_log_path = os.path.join(result_folder, "stderr.log") - client_stderr_log_path = os.path.join(result_folder, "clientstderr.log") - - if os.path.exists(server_log_path): - result_logs.append(server_log_path) - - if os.path.exists(stderr_log_path): - result_logs.append(stderr_log_path) - - if os.path.exists(client_stderr_log_path): - result_logs.append(client_stderr_log_path) - - return status, description, summary, result_logs - - -def write_results(results_file, status_file, results, status): - with open(results_file, "w") as f: - out = csv.writer(f, delimiter="\t") - out.writerows(results) - with open(status_file, "w") as f: - out = csv.writer(f, delimiter="\t") - out.writerow(status) - - -if __name__ == "__main__": - logging.basicConfig(level=logging.INFO, format="%(asctime)s %(message)s") - parser = argparse.ArgumentParser( - description="ClickHouse script for parsing results of split build smoke test" - ) - parser.add_argument("--in-results-dir", default="/test_output/") - parser.add_argument("--out-results-file", default="/test_output/test_results.tsv") - parser.add_argument("--out-status-file", default="/test_output/check_status.tsv") - args = parser.parse_args() - - state, description, test_results, logs = process_result(args.in_results_dir) - logging.info("Result parsed") - status = (state, description) - write_results(args.out_results_file, args.out_status_file, test_results, status) - logging.info("Result written") diff --git a/docker/test/split_build_smoke_test/run.sh b/docker/test/split_build_smoke_test/run.sh deleted file mode 100755 index b565d7a481e..00000000000 --- a/docker/test/split_build_smoke_test/run.sh +++ /dev/null @@ -1,22 +0,0 @@ -#!/bin/bash - -set -x - -install_and_run_server() { - mkdir /unpacked - tar -xzf /package_folder/shared_build.tgz -C /unpacked --strip 1 - LD_LIBRARY_PATH=/unpacked /unpacked/clickhouse-server --config /unpacked/config/config.xml >/test_output/stderr.log 2>&1 & -} - -run_client() { - for i in {1..100}; do - sleep 1 - LD_LIBRARY_PATH=/unpacked /unpacked/clickhouse-client --query "select 'OK'" > /test_output/run.log 2> /test_output/clientstderr.log && break - [[ $i == 100 ]] && echo 'FAIL' - done -} - -install_and_run_server -run_client -mv /var/log/clickhouse-server/clickhouse-server.log /test_output/clickhouse-server.log -/process_split_build_smoke_test_result.py || echo -e "failure\tCannot parse results" > /test_output/check_status.tsv diff --git a/docker/test/stateless/setup_minio.sh b/docker/test/stateless/setup_minio.sh index a1de7f2d6ed..c756ce4669d 100755 --- a/docker/test/stateless/setup_minio.sh +++ b/docker/test/stateless/setup_minio.sh @@ -1,90 +1,151 @@ #!/bin/bash -USAGE='Usage for local run: +set -euxf -o pipefail -./docker/test/stateless/setup_minio.sh { stateful | stateless } ./tests/ +export MINIO_ROOT_USER=${MINIO_ROOT_USER:-clickhouse} +export MINIO_ROOT_PASSWORD=${MINIO_ROOT_PASSWORD:-clickhouse} -' +usage() { + echo $"Usage: $0 (default path: /usr/share/clickhouse-test)" + exit 1 +} -set -e -x -a -u - -TEST_TYPE="$1" -shift - -case $TEST_TYPE in - stateless) QUERY_DIR=0_stateless ;; - stateful) QUERY_DIR=1_stateful ;; - *) echo "unknown test type $TEST_TYPE"; echo "${USAGE}"; exit 1 ;; -esac - -ls -lha - -mkdir -p ./minio_data - -if [ ! -f ./minio ]; then - MINIO_SERVER_VERSION=${MINIO_SERVER_VERSION:-2022-09-07T22-25-02Z} - MINIO_CLIENT_VERSION=${MINIO_CLIENT_VERSION:-2022-08-28T20-08-11Z} - case $(uname -m) in - x86_64) BIN_ARCH=amd64 ;; - aarch64) BIN_ARCH=arm64 ;; - *) echo "unknown architecture $(uname -m)"; exit 1 ;; - esac - echo 'MinIO binary not found, downloading...' - - BINARY_TYPE=$(uname -s | tr '[:upper:]' '[:lower:]') - - wget "https://dl.min.io/server/minio/release/${BINARY_TYPE}-${BIN_ARCH}/archive/minio.RELEASE.${MINIO_SERVER_VERSION}" -O ./minio \ - && wget "https://dl.min.io/client/mc/release/${BINARY_TYPE}-${BIN_ARCH}/archive/mc.RELEASE.${MINIO_CLIENT_VERSION}" -O ./mc \ - && chmod +x ./mc ./minio -fi - -MINIO_ROOT_USER=${MINIO_ROOT_USER:-clickhouse} -MINIO_ROOT_PASSWORD=${MINIO_ROOT_PASSWORD:-clickhouse} - -./minio --version -./minio server --address ":11111" ./minio_data & - -i=0 -while ! curl -v --silent http://localhost:11111 2>&1 | grep AccessDenied -do - if [[ $i == 60 ]]; then - echo "Failed to setup minio" - exit 0 +check_arg() { + local query_dir + if [ ! $# -eq 1 ]; then + if [ ! $# -eq 2 ]; then + echo "ERROR: need either one or two arguments, (default path: /usr/share/clickhouse-test)" + usage + fi fi - echo "Trying to connect to minio" - sleep 1 - i=$((i + 1)) -done + case "$1" in + stateless) + query_dir="0_stateless" + ;; + stateful) + query_dir="1_stateful" + ;; + *) + echo "unknown test type ${test_type}" + usage + ;; + esac + echo ${query_dir} +} -lsof -i :11111 +find_arch() { + local arch + case $(uname -m) in + x86_64) + arch="amd64" + ;; + aarch64) + arch="arm64" + ;; + *) + echo "unknown architecture $(uname -m)"; + exit 1 + ;; + esac + echo ${arch} +} -sleep 5 +find_os() { + local os + os=$(uname -s | tr '[:upper:]' '[:lower:]') + echo "${os}" +} -./mc alias set clickminio http://localhost:11111 clickhouse clickhouse -./mc admin user add clickminio test testtest -./mc admin policy set clickminio readwrite user=test -./mc mb clickminio/test -if [ "$TEST_TYPE" = "stateless" ]; then - ./mc policy set public clickminio/test -fi +download_minio() { + local os + local arch + local minio_server_version=${MINIO_SERVER_VERSION:-2022-09-07T22-25-02Z} + local minio_client_version=${MINIO_CLIENT_VERSION:-2022-08-28T20-08-11Z} + os=$(find_os) + arch=$(find_arch) + wget "https://dl.min.io/server/minio/release/${os}-${arch}/archive/minio.RELEASE.${minio_server_version}" -O ./minio + wget "https://dl.min.io/client/mc/release/${os}-${arch}/archive/mc.RELEASE.${minio_client_version}" -O ./mc + chmod +x ./mc ./minio +} -# Upload data to Minio. By default after unpacking all tests will in -# /usr/share/clickhouse-test/queries +start_minio() { + mkdir -p ./minio_data + ./minio --version + ./minio server --address ":11111" ./minio_data & + wait_for_it + lsof -i :11111 + sleep 5 +} -TEST_PATH=${1:-/usr/share/clickhouse-test} -MINIO_DATA_PATH=${TEST_PATH}/queries/${QUERY_DIR}/data_minio +setup_minio() { + local test_type=$1 + ./mc alias set clickminio http://localhost:11111 clickhouse clickhouse + ./mc admin user add clickminio test testtest + ./mc admin policy set clickminio readwrite user=test + ./mc mb clickminio/test + if [ "$test_type" = "stateless" ]; then + ./mc policy set public clickminio/test + fi +} -# Iterating over globs will cause redudant FILE variale to be a path to a file, not a filename -# shellcheck disable=SC2045 -for FILE in $(ls "${MINIO_DATA_PATH}"); do - echo "$FILE"; - ./mc cp "${MINIO_DATA_PATH}"/"$FILE" clickminio/test/"$FILE"; -done +# uploads data to minio, by default after unpacking all tests +# will be in /usr/share/clickhouse-test/queries +upload_data() { + local query_dir=$1 + local test_path=$2 + local data_path=${test_path}/queries/${query_dir}/data_minio -mkdir -p ~/.aws -cat <> ~/.aws/credentials + # iterating over globs will cause redundant file variable to be + # a path to a file, not a filename + # shellcheck disable=SC2045 + for file in $(ls "${data_path}"); do + echo "${file}"; + ./mc cp "${data_path}"/"${file}" clickminio/test/"${file}"; + done +} + +setup_aws_credentials() { + local minio_root_user=${MINIO_ROOT_USER:-clickhouse} + local minio_root_password=${MINIO_ROOT_PASSWORD:-clickhouse} + mkdir -p ~/.aws + cat <> ~/.aws/credentials [default] -aws_access_key_id=${MINIO_ROOT_USER} -aws_secret_access_key=${MINIO_ROOT_PASSWORD} +aws_access_key_id=${minio_root_user} +aws_secret_access_key=${minio_root_password} EOT +} + +wait_for_it() { + local counter=0 + local max_counter=60 + local url="http://localhost:11111" + local params=( + --silent + --verbose + ) + while ! curl "${params[@]}" "${url}" 2>&1 | grep AccessDenied + do + if [[ ${counter} == "${max_counter}" ]]; then + echo "failed to setup minio" + exit 0 + fi + echo "trying to connect to minio" + sleep 1 + counter=$((counter + 1)) + done +} + +main() { + local query_dir + query_dir=$(check_arg "$@") + if [ ! -f ./minio ]; then + download_minio + fi + start_minio + setup_minio "$1" + upload_data "${query_dir}" "${2:-/usr/share/clickhouse-test}" + setup_aws_credentials +} + +main "$@" \ No newline at end of file diff --git a/docker/test/stress/README.md b/docker/test/stress/README.md index 96807b9f9a6..c22721fd7da 100644 --- a/docker/test/stress/README.md +++ b/docker/test/stress/README.md @@ -1,6 +1,6 @@ -Allow to run simple ClickHouse stress test in Docker from debian packages. +Allows to run simple ClickHouse stress test in Docker from debian packages. Actually it runs multiple copies of clickhouse-test (functional tests). -This allows to find problems like segmentation fault which cause shutdown of server. +This allows to find problems like failed assertions and memory safety issues. Usage: ``` diff --git a/docker/test/stress/run.sh b/docker/test/stress/run.sh index 01e0f5b4897..dc2b184dc5a 100644 --- a/docker/test/stress/run.sh +++ b/docker/test/stress/run.sh @@ -11,31 +11,6 @@ set -x # core.COMM.PID-TID sysctl kernel.core_pattern='core.%e.%p-%P' -# Thread Fuzzer allows to check more permutations of possible thread scheduling -# and find more potential issues. -# Temporarily disable ThreadFuzzer with tsan because of https://github.com/google/sanitizers/issues/1540 -is_tsan_build=$(clickhouse local -q "select value like '% -fsanitize=thread %' from system.build_options where name='CXX_FLAGS'") -if [ "$is_tsan_build" -eq "0" ]; then - export THREAD_FUZZER_CPU_TIME_PERIOD_US=1000 - export THREAD_FUZZER_SLEEP_PROBABILITY=0.1 - export THREAD_FUZZER_SLEEP_TIME_US=100000 - - export THREAD_FUZZER_pthread_mutex_lock_BEFORE_MIGRATE_PROBABILITY=1 - export THREAD_FUZZER_pthread_mutex_lock_AFTER_MIGRATE_PROBABILITY=1 - export THREAD_FUZZER_pthread_mutex_unlock_BEFORE_MIGRATE_PROBABILITY=1 - export THREAD_FUZZER_pthread_mutex_unlock_AFTER_MIGRATE_PROBABILITY=1 - - export THREAD_FUZZER_pthread_mutex_lock_BEFORE_SLEEP_PROBABILITY=0.001 - export THREAD_FUZZER_pthread_mutex_lock_AFTER_SLEEP_PROBABILITY=0.001 - export THREAD_FUZZER_pthread_mutex_unlock_BEFORE_SLEEP_PROBABILITY=0.001 - export THREAD_FUZZER_pthread_mutex_unlock_AFTER_SLEEP_PROBABILITY=0.001 - export THREAD_FUZZER_pthread_mutex_lock_BEFORE_SLEEP_TIME_US=10000 - - export THREAD_FUZZER_pthread_mutex_lock_AFTER_SLEEP_TIME_US=10000 - export THREAD_FUZZER_pthread_mutex_unlock_BEFORE_SLEEP_TIME_US=10000 - export THREAD_FUZZER_pthread_mutex_unlock_AFTER_SLEEP_TIME_US=10000 -fi - function install_packages() { @@ -54,7 +29,7 @@ function configure() # we mount tests folder from repo to /usr/share ln -s /usr/share/clickhouse-test/clickhouse-test /usr/bin/clickhouse-test - ln -s /usr/share/clickhouse-test/ci/download_release_packets.py /usr/bin/download_release_packets + ln -s /usr/share/clickhouse-test/ci/download_release_packages.py /usr/bin/download_release_packages ln -s /usr/share/clickhouse-test/ci/get_previous_release_tag.py /usr/bin/get_previous_release_tag # avoid too slow startup @@ -78,6 +53,7 @@ function configure() local total_mem total_mem=$(awk '/MemTotal/ { print $(NF-1) }' /proc/meminfo) # KiB total_mem=$(( total_mem*1024 )) # bytes + # Set maximum memory usage as half of total memory (less chance of OOM). # # But not via max_server_memory_usage but via max_memory_usage_for_user, @@ -90,16 +66,17 @@ function configure() # max_server_memory_usage will be hard limit, and queries that should be # executed regardless memory limits will use max_memory_usage_for_user=0, # instead of relying on max_untracked_memory - local max_server_mem - max_server_mem=$((total_mem*75/100)) # 75% - echo "Setting max_server_memory_usage=$max_server_mem" + + max_server_memory_usage_to_ram_ratio=0.5 + echo "Setting max_server_memory_usage_to_ram_ratio to ${max_server_memory_usage_to_ram_ratio}" cat > /etc/clickhouse-server/config.d/max_server_memory_usage.xml < - ${max_server_mem} + ${max_server_memory_usage_to_ram_ratio} EOL + local max_users_mem - max_users_mem=$((total_mem*50/100)) # 50% + max_users_mem=$((total_mem*30/100)) # 30% echo "Setting max_memory_usage_for_user=$max_users_mem" cat > /etc/clickhouse-server/users.d/max_memory_usage_for_user.xml < @@ -123,6 +100,29 @@ EOL $PWD EOL + + # Let OOM killer terminate other processes before clickhouse-server: + cat > /etc/clickhouse-server/config.d/oom_score.xml < + -1000 + +EOL + + # Analyzer is not yet ready for testing + cat > /etc/clickhouse-server/users.d/no_analyzer.xml < + + + + + + + + + + +EOL + } function stop() @@ -210,6 +210,31 @@ quit install_packages package_folder +# Thread Fuzzer allows to check more permutations of possible thread scheduling +# and find more potential issues. +# Temporarily disable ThreadFuzzer with tsan because of https://github.com/google/sanitizers/issues/1540 +is_tsan_build=$(clickhouse local -q "select value like '% -fsanitize=thread %' from system.build_options where name='CXX_FLAGS'") +if [ "$is_tsan_build" -eq "0" ]; then + export THREAD_FUZZER_CPU_TIME_PERIOD_US=1000 + export THREAD_FUZZER_SLEEP_PROBABILITY=0.1 + export THREAD_FUZZER_SLEEP_TIME_US=100000 + + export THREAD_FUZZER_pthread_mutex_lock_BEFORE_MIGRATE_PROBABILITY=1 + export THREAD_FUZZER_pthread_mutex_lock_AFTER_MIGRATE_PROBABILITY=1 + export THREAD_FUZZER_pthread_mutex_unlock_BEFORE_MIGRATE_PROBABILITY=1 + export THREAD_FUZZER_pthread_mutex_unlock_AFTER_MIGRATE_PROBABILITY=1 + + export THREAD_FUZZER_pthread_mutex_lock_BEFORE_SLEEP_PROBABILITY=0.001 + export THREAD_FUZZER_pthread_mutex_lock_AFTER_SLEEP_PROBABILITY=0.001 + export THREAD_FUZZER_pthread_mutex_unlock_BEFORE_SLEEP_PROBABILITY=0.001 + export THREAD_FUZZER_pthread_mutex_unlock_AFTER_SLEEP_PROBABILITY=0.001 + export THREAD_FUZZER_pthread_mutex_lock_BEFORE_SLEEP_TIME_US=10000 + + export THREAD_FUZZER_pthread_mutex_lock_AFTER_SLEEP_TIME_US=10000 + export THREAD_FUZZER_pthread_mutex_unlock_BEFORE_SLEEP_TIME_US=10000 + export THREAD_FUZZER_pthread_mutex_unlock_AFTER_SLEEP_TIME_US=10000 +fi + export ZOOKEEPER_FAULT_INJECTION=1 configure @@ -334,219 +359,228 @@ zgrep -Fa "########################################" /test_output/* > /dev/null zgrep -Fa " received signal " /test_output/gdb.log > /dev/null \ && echo -e 'Found signal in gdb.log\tFAIL' >> /test_output/test_results.tsv -echo -e "Backward compatibility check\n" +if [ "$DISABLE_BC_CHECK" -ne "1" ]; then + echo -e "Backward compatibility check\n" -echo "Get previous release tag" -previous_release_tag=$(clickhouse-client --version | grep -o "[0-9]*\.[0-9]*\.[0-9]*\.[0-9]*" | get_previous_release_tag) -echo $previous_release_tag + echo "Get previous release tag" + previous_release_tag=$(clickhouse-client --version | grep -o "[0-9]*\.[0-9]*\.[0-9]*\.[0-9]*" | get_previous_release_tag) + echo $previous_release_tag -echo "Clone previous release repository" -git clone https://github.com/ClickHouse/ClickHouse.git --no-tags --progress --branch=$previous_release_tag --no-recurse-submodules --depth=1 previous_release_repository + echo "Clone previous release repository" + git clone https://github.com/ClickHouse/ClickHouse.git --no-tags --progress --branch=$previous_release_tag --no-recurse-submodules --depth=1 previous_release_repository -echo "Download previous release server" -mkdir previous_release_package_folder + echo "Download clickhouse-server from the previous release" + mkdir previous_release_package_folder -echo $previous_release_tag | download_release_packets && echo -e 'Download script exit code\tOK' >> /test_output/test_results.tsv \ - || echo -e 'Download script failed\tFAIL' >> /test_output/test_results.tsv + echo $previous_release_tag | download_release_packages && echo -e 'Download script exit code\tOK' >> /test_output/test_results.tsv \ + || echo -e 'Download script failed\tFAIL' >> /test_output/test_results.tsv -mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.clean.log -for table in query_log trace_log -do - clickhouse-local --path /var/lib/clickhouse/ --only-system-tables -q "select * from system.$table format TSVWithNamesAndTypes" | pigz > /test_output/$table.tsv.gz ||: -done - -tar -chf /test_output/coordination.tar /var/lib/clickhouse/coordination ||: - -# Check if we cloned previous release repository successfully -if ! [ "$(ls -A previous_release_repository/tests/queries)" ] -then - echo -e "Backward compatibility check: Failed to clone previous release tests\tFAIL" >> /test_output/test_results.tsv -elif ! [ "$(ls -A previous_release_package_folder/clickhouse-common-static_*.deb && ls -A previous_release_package_folder/clickhouse-server_*.deb)" ] -then - echo -e "Backward compatibility check: Failed to download previous release packets\tFAIL" >> /test_output/test_results.tsv -else - echo -e "Successfully cloned previous release tests\tOK" >> /test_output/test_results.tsv - echo -e "Successfully downloaded previous release packets\tOK" >> /test_output/test_results.tsv - - # Uninstall current packages - dpkg --remove clickhouse-client - dpkg --remove clickhouse-server - dpkg --remove clickhouse-common-static-dbg - dpkg --remove clickhouse-common-static - - rm -rf /var/lib/clickhouse/* - - # Make BC check more funny by forcing Ordinary engine for system database - mkdir /var/lib/clickhouse/metadata - echo "ATTACH DATABASE system ENGINE=Ordinary" > /var/lib/clickhouse/metadata/system.sql - - # Install previous release packages - install_packages previous_release_package_folder - - # Start server from previous release - # Previous version may not be ready for fault injections - export ZOOKEEPER_FAULT_INJECTION=0 - configure - - # Avoid "Setting s3_check_objects_after_upload is neither a builtin setting..." - rm -f /etc/clickhouse-server/users.d/enable_blobs_check.xml ||: - rm -f /etc/clickhouse-server/users.d/marks.xml ||: - - # Remove s3 related configs to avoid "there is no disk type `cache`" - rm -f /etc/clickhouse-server/config.d/storage_conf.xml ||: - rm -f /etc/clickhouse-server/config.d/azure_storage_conf.xml ||: - - # Turn on after 22.12 - rm -f /etc/clickhouse-server/config.d/compressed_marks_and_index.xml ||: - # it uses recently introduced settings which previous versions may not have - rm -f /etc/clickhouse-server/users.d/insert_keeper_retries.xml ||: - - start - - clickhouse-client --query="SELECT 'Server version: ', version()" - - # Install new package before running stress test because we should use new - # clickhouse-client and new clickhouse-test. - # - # But we should leave old binary in /usr/bin/ and debug symbols in - # /usr/lib/debug/usr/bin (if any) for gdb and internal DWARF parser, so it - # will print sane stacktraces and also to avoid possible crashes. - # - # FIXME: those files can be extracted directly from debian package, but - # actually better solution will be to use different PATH instead of playing - # games with files from packages. - mv /usr/bin/clickhouse previous_release_package_folder/ - mv /usr/lib/debug/usr/bin/clickhouse.debug previous_release_package_folder/ - install_packages package_folder - mv /usr/bin/clickhouse package_folder/ - mv /usr/lib/debug/usr/bin/clickhouse.debug package_folder/ - mv previous_release_package_folder/clickhouse /usr/bin/ - mv previous_release_package_folder/clickhouse.debug /usr/lib/debug/usr/bin/clickhouse.debug - - mkdir tmp_stress_output - - ./stress --test-cmd="/usr/bin/clickhouse-test --queries=\"previous_release_repository/tests/queries\"" --backward-compatibility-check --output-folder tmp_stress_output --global-time-limit=1200 \ - && echo -e 'Backward compatibility check: Test script exit code\tOK' >> /test_output/test_results.tsv \ - || echo -e 'Backward compatibility check: Test script failed\tFAIL' >> /test_output/test_results.tsv - rm -rf tmp_stress_output - - clickhouse-client --query="SELECT 'Tables count:', count() FROM system.tables" - - stop 1 - mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.backward.stress.log - - # Start new server - mv package_folder/clickhouse /usr/bin/ - mv package_folder/clickhouse.debug /usr/lib/debug/usr/bin/clickhouse.debug - export ZOOKEEPER_FAULT_INJECTION=1 - configure - start 500 - clickhouse-client --query "SELECT 'Backward compatibility check: Server successfully started', 'OK'" >> /test_output/test_results.tsv \ - || (echo -e 'Backward compatibility check: Server failed to start\tFAIL' >> /test_output/test_results.tsv \ - && grep -a ".*Application" /var/log/clickhouse-server/clickhouse-server.log >> /test_output/bc_check_application_errors.txt) - - clickhouse-client --query="SELECT 'Server version: ', version()" - - # Let the server run for a while before checking log. - sleep 60 - - stop - mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.backward.clean.log - - # Error messages (we should ignore some errors) - # FIXME https://github.com/ClickHouse/ClickHouse/issues/38643 ("Unknown index: idx.") - # FIXME https://github.com/ClickHouse/ClickHouse/issues/39174 ("Cannot parse string 'Hello' as UInt64") - # FIXME Not sure if it's expected, but some tests from BC check may not be finished yet when we restarting server. - # Let's just ignore all errors from queries ("} TCPHandler: Code:", "} executeQuery: Code:") - # FIXME https://github.com/ClickHouse/ClickHouse/issues/39197 ("Missing columns: 'v3' while processing query: 'v3, k, v1, v2, p'") - # NOTE Incompatibility was introduced in https://github.com/ClickHouse/ClickHouse/pull/39263, it's expected - # ("This engine is deprecated and is not supported in transactions", "[Queue = DB::MergeMutateRuntimeQueue]: Code: 235. DB::Exception: Part") - # FIXME https://github.com/ClickHouse/ClickHouse/issues/39174 - bad mutation does not indicate backward incompatibility - echo "Check for Error messages in server log:" - zgrep -Fav -e "Code: 236. DB::Exception: Cancelled merging parts" \ - -e "Code: 236. DB::Exception: Cancelled mutating parts" \ - -e "REPLICA_IS_ALREADY_ACTIVE" \ - -e "REPLICA_ALREADY_EXISTS" \ - -e "ALL_REPLICAS_LOST" \ - -e "DDLWorker: Cannot parse DDL task query" \ - -e "RaftInstance: failed to accept a rpc connection due to error 125" \ - -e "UNKNOWN_DATABASE" \ - -e "NETWORK_ERROR" \ - -e "UNKNOWN_TABLE" \ - -e "ZooKeeperClient" \ - -e "KEEPER_EXCEPTION" \ - -e "DirectoryMonitor" \ - -e "TABLE_IS_READ_ONLY" \ - -e "Code: 1000, e.code() = 111, Connection refused" \ - -e "UNFINISHED" \ - -e "NETLINK_ERROR" \ - -e "Renaming unexpected part" \ - -e "PART_IS_TEMPORARILY_LOCKED" \ - -e "and a merge is impossible: we didn't find" \ - -e "found in queue and some source parts for it was lost" \ - -e "is lost forever." \ - -e "Unknown index: idx." \ - -e "Cannot parse string 'Hello' as UInt64" \ - -e "} TCPHandler: Code:" \ - -e "} executeQuery: Code:" \ - -e "Missing columns: 'v3' while processing query: 'v3, k, v1, v2, p'" \ - -e "This engine is deprecated and is not supported in transactions" \ - -e "[Queue = DB::MergeMutateRuntimeQueue]: Code: 235. DB::Exception: Part" \ - -e "The set of parts restored in place of" \ - -e "(ReplicatedMergeTreeAttachThread): Initialization failed. Error" \ - -e "Code: 269. DB::Exception: Destination table is myself" \ - -e "Coordination::Exception: Connection loss" \ - -e "MutateFromLogEntryTask" \ - -e "No connection to ZooKeeper, cannot get shared table ID" \ - -e "Session expired" \ - /var/log/clickhouse-server/clickhouse-server.backward.clean.log | zgrep -Fa "" > /test_output/bc_check_error_messages.txt \ - && echo -e 'Backward compatibility check: Error message in clickhouse-server.log (see bc_check_error_messages.txt)\tFAIL' >> /test_output/test_results.tsv \ - || echo -e 'Backward compatibility check: No Error messages in clickhouse-server.log\tOK' >> /test_output/test_results.tsv - - # Remove file bc_check_error_messages.txt if it's empty - [ -s /test_output/bc_check_error_messages.txt ] || rm /test_output/bc_check_error_messages.txt - - # Sanitizer asserts - zgrep -Fa "==================" /var/log/clickhouse-server/stderr.log >> /test_output/tmp - zgrep -Fa "WARNING" /var/log/clickhouse-server/stderr.log >> /test_output/tmp - zgrep -Fav -e "ASan doesn't fully support makecontext/swapcontext functions" -e "DB::Exception" /test_output/tmp > /dev/null \ - && echo -e 'Backward compatibility check: Sanitizer assert (in stderr.log)\tFAIL' >> /test_output/test_results.tsv \ - || echo -e 'Backward compatibility check: No sanitizer asserts\tOK' >> /test_output/test_results.tsv - rm -f /test_output/tmp - - # OOM - zgrep -Fa " Application: Child process was terminated by signal 9" /var/log/clickhouse-server/clickhouse-server.backward.*.log > /dev/null \ - && echo -e 'Backward compatibility check: OOM killer (or signal 9) in clickhouse-server.log\tFAIL' >> /test_output/test_results.tsv \ - || echo -e 'Backward compatibility check: No OOM messages in clickhouse-server.log\tOK' >> /test_output/test_results.tsv - - # Logical errors - echo "Check for Logical errors in server log:" - zgrep -Fa -A20 "Code: 49, e.displayText() = DB::Exception:" /var/log/clickhouse-server/clickhouse-server.backward.*.log > /test_output/bc_check_logical_errors.txt \ - && echo -e 'Backward compatibility check: Logical error thrown (see clickhouse-server.log or bc_check_logical_errors.txt)\tFAIL' >> /test_output/test_results.tsv \ - || echo -e 'Backward compatibility check: No logical errors\tOK' >> /test_output/test_results.tsv - - # Remove file bc_check_logical_errors.txt if it's empty - [ -s /test_output/bc_check_logical_errors.txt ] || rm /test_output/bc_check_logical_errors.txt - - # Crash - zgrep -Fa "########################################" /var/log/clickhouse-server/clickhouse-server.backward.*.log > /dev/null \ - && echo -e 'Backward compatibility check: Killed by signal (in clickhouse-server.log)\tFAIL' >> /test_output/test_results.tsv \ - || echo -e 'Backward compatibility check: Not crashed\tOK' >> /test_output/test_results.tsv - - # It also checks for crash without stacktrace (printed by watchdog) - echo "Check for Fatal message in server log:" - zgrep -Fa " " /var/log/clickhouse-server/clickhouse-server.backward.*.log > /test_output/bc_check_fatal_messages.txt \ - && echo -e 'Backward compatibility check: Fatal message in clickhouse-server.log (see bc_check_fatal_messages.txt)\tFAIL' >> /test_output/test_results.tsv \ - || echo -e 'Backward compatibility check: No fatal messages in clickhouse-server.log\tOK' >> /test_output/test_results.tsv - - # Remove file bc_check_fatal_messages.txt if it's empty - [ -s /test_output/bc_check_fatal_messages.txt ] || rm /test_output/bc_check_fatal_messages.txt - - tar -chf /test_output/coordination.backward.tar /var/lib/clickhouse/coordination ||: + mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.clean.log for table in query_log trace_log do - clickhouse-local --path /var/lib/clickhouse/ --only-system-tables -q "select * from system.$table format TSVWithNamesAndTypes" | pigz > /test_output/$table.backward.tsv.gz ||: + clickhouse-local --path /var/lib/clickhouse/ --only-system-tables -q "select * from system.$table format TSVWithNamesAndTypes" | pigz > /test_output/$table.tsv.gz ||: done + + tar -chf /test_output/coordination.tar /var/lib/clickhouse/coordination ||: + + # Check if we cloned previous release repository successfully + if ! [ "$(ls -A previous_release_repository/tests/queries)" ] + then + echo -e "Backward compatibility check: Failed to clone previous release tests\tFAIL" >> /test_output/test_results.tsv + elif ! [ "$(ls -A previous_release_package_folder/clickhouse-common-static_*.deb && ls -A previous_release_package_folder/clickhouse-server_*.deb)" ] + then + echo -e "Backward compatibility check: Failed to download previous release packages\tFAIL" >> /test_output/test_results.tsv + else + echo -e "Successfully cloned previous release tests\tOK" >> /test_output/test_results.tsv + echo -e "Successfully downloaded previous release packages\tOK" >> /test_output/test_results.tsv + + # Uninstall current packages + dpkg --remove clickhouse-client + dpkg --remove clickhouse-server + dpkg --remove clickhouse-common-static-dbg + dpkg --remove clickhouse-common-static + + rm -rf /var/lib/clickhouse/* + + # Make BC check more funny by forcing Ordinary engine for system database + mkdir /var/lib/clickhouse/metadata + echo "ATTACH DATABASE system ENGINE=Ordinary" > /var/lib/clickhouse/metadata/system.sql + + # Install previous release packages + install_packages previous_release_package_folder + + # Start server from previous release + # Previous version may not be ready for fault injections + export ZOOKEEPER_FAULT_INJECTION=0 + configure + + # Avoid "Setting s3_check_objects_after_upload is neither a builtin setting..." + rm -f /etc/clickhouse-server/users.d/enable_blobs_check.xml ||: + rm -f /etc/clickhouse-server/users.d/marks.xml ||: + + # Remove s3 related configs to avoid "there is no disk type `cache`" + rm -f /etc/clickhouse-server/config.d/storage_conf.xml ||: + rm -f /etc/clickhouse-server/config.d/azure_storage_conf.xml ||: + + # Turn on after 22.12 + rm -f /etc/clickhouse-server/config.d/compressed_marks_and_index.xml ||: + # it uses recently introduced settings which previous versions may not have + rm -f /etc/clickhouse-server/users.d/insert_keeper_retries.xml ||: + + start + + clickhouse-client --query="SELECT 'Server version: ', version()" + + # Install new package before running stress test because we should use new + # clickhouse-client and new clickhouse-test. + # + # But we should leave old binary in /usr/bin/ and debug symbols in + # /usr/lib/debug/usr/bin (if any) for gdb and internal DWARF parser, so it + # will print sane stacktraces and also to avoid possible crashes. + # + # FIXME: those files can be extracted directly from debian package, but + # actually better solution will be to use different PATH instead of playing + # games with files from packages. + mv /usr/bin/clickhouse previous_release_package_folder/ + mv /usr/lib/debug/usr/bin/clickhouse.debug previous_release_package_folder/ + install_packages package_folder + mv /usr/bin/clickhouse package_folder/ + mv /usr/lib/debug/usr/bin/clickhouse.debug package_folder/ + mv previous_release_package_folder/clickhouse /usr/bin/ + mv previous_release_package_folder/clickhouse.debug /usr/lib/debug/usr/bin/clickhouse.debug + + mkdir tmp_stress_output + + ./stress --test-cmd="/usr/bin/clickhouse-test --queries=\"previous_release_repository/tests/queries\"" --backward-compatibility-check --output-folder tmp_stress_output --global-time-limit=1200 \ + && echo -e 'Backward compatibility check: Test script exit code\tOK' >> /test_output/test_results.tsv \ + || echo -e 'Backward compatibility check: Test script failed\tFAIL' >> /test_output/test_results.tsv + rm -rf tmp_stress_output + + # We experienced deadlocks in this command in very rare cases. Let's debug it: + timeout 10m clickhouse-client --query="SELECT 'Tables count:', count() FROM system.tables" || + ( + echo "thread apply all backtrace (on select tables count)" >> /test_output/gdb.log + timeout 30m gdb -batch -ex 'thread apply all backtrace' -p "$(cat /var/run/clickhouse-server/clickhouse-server.pid)" | ts '%Y-%m-%d %H:%M:%S' >> /test_output/gdb.log + clickhouse stop --force + ) + + stop 1 + mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.backward.stress.log + + # Start new server + mv package_folder/clickhouse /usr/bin/ + mv package_folder/clickhouse.debug /usr/lib/debug/usr/bin/clickhouse.debug + # Disable fault injections on start (we don't test them here, and it can lead to tons of requests in case of huge number of tables). + export ZOOKEEPER_FAULT_INJECTION=0 + configure + start 500 + clickhouse-client --query "SELECT 'Backward compatibility check: Server successfully started', 'OK'" >> /test_output/test_results.tsv \ + || (echo -e 'Backward compatibility check: Server failed to start\tFAIL' >> /test_output/test_results.tsv \ + && grep -a ".*Application" /var/log/clickhouse-server/clickhouse-server.log >> /test_output/bc_check_application_errors.txt) + + clickhouse-client --query="SELECT 'Server version: ', version()" + + # Let the server run for a while before checking log. + sleep 60 + + stop + mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.backward.dirty.log + + # Error messages (we should ignore some errors) + # FIXME https://github.com/ClickHouse/ClickHouse/issues/38643 ("Unknown index: idx.") + # FIXME https://github.com/ClickHouse/ClickHouse/issues/39174 ("Cannot parse string 'Hello' as UInt64") + # FIXME Not sure if it's expected, but some tests from BC check may not be finished yet when we restarting server. + # Let's just ignore all errors from queries ("} TCPHandler: Code:", "} executeQuery: Code:") + # FIXME https://github.com/ClickHouse/ClickHouse/issues/39197 ("Missing columns: 'v3' while processing query: 'v3, k, v1, v2, p'") + # NOTE Incompatibility was introduced in https://github.com/ClickHouse/ClickHouse/pull/39263, it's expected + # ("This engine is deprecated and is not supported in transactions", "[Queue = DB::MergeMutateRuntimeQueue]: Code: 235. DB::Exception: Part") + # FIXME https://github.com/ClickHouse/ClickHouse/issues/39174 - bad mutation does not indicate backward incompatibility + echo "Check for Error messages in server log:" + zgrep -Fav -e "Code: 236. DB::Exception: Cancelled merging parts" \ + -e "Code: 236. DB::Exception: Cancelled mutating parts" \ + -e "REPLICA_IS_ALREADY_ACTIVE" \ + -e "REPLICA_ALREADY_EXISTS" \ + -e "ALL_REPLICAS_LOST" \ + -e "DDLWorker: Cannot parse DDL task query" \ + -e "RaftInstance: failed to accept a rpc connection due to error 125" \ + -e "UNKNOWN_DATABASE" \ + -e "NETWORK_ERROR" \ + -e "UNKNOWN_TABLE" \ + -e "ZooKeeperClient" \ + -e "KEEPER_EXCEPTION" \ + -e "DirectoryMonitor" \ + -e "TABLE_IS_READ_ONLY" \ + -e "Code: 1000, e.code() = 111, Connection refused" \ + -e "UNFINISHED" \ + -e "NETLINK_ERROR" \ + -e "Renaming unexpected part" \ + -e "PART_IS_TEMPORARILY_LOCKED" \ + -e "and a merge is impossible: we didn't find" \ + -e "found in queue and some source parts for it was lost" \ + -e "is lost forever." \ + -e "Unknown index: idx." \ + -e "Cannot parse string 'Hello' as UInt64" \ + -e "} TCPHandler: Code:" \ + -e "} executeQuery: Code:" \ + -e "Missing columns: 'v3' while processing query: 'v3, k, v1, v2, p'" \ + -e "This engine is deprecated and is not supported in transactions" \ + -e "[Queue = DB::MergeMutateRuntimeQueue]: Code: 235. DB::Exception: Part" \ + -e "The set of parts restored in place of" \ + -e "(ReplicatedMergeTreeAttachThread): Initialization failed. Error" \ + -e "Code: 269. DB::Exception: Destination table is myself" \ + -e "Coordination::Exception: Connection loss" \ + -e "MutateFromLogEntryTask" \ + -e "No connection to ZooKeeper, cannot get shared table ID" \ + -e "Session expired" \ + /var/log/clickhouse-server/clickhouse-server.backward.dirty.log | zgrep -Fa "" > /test_output/bc_check_error_messages.txt \ + && echo -e 'Backward compatibility check: Error message in clickhouse-server.log (see bc_check_error_messages.txt)\tFAIL' >> /test_output/test_results.tsv \ + || echo -e 'Backward compatibility check: No Error messages in clickhouse-server.log\tOK' >> /test_output/test_results.tsv + + # Remove file bc_check_error_messages.txt if it's empty + [ -s /test_output/bc_check_error_messages.txt ] || rm /test_output/bc_check_error_messages.txt + + # Sanitizer asserts + zgrep -Fa "==================" /var/log/clickhouse-server/stderr.log >> /test_output/tmp + zgrep -Fa "WARNING" /var/log/clickhouse-server/stderr.log >> /test_output/tmp + zgrep -Fav -e "ASan doesn't fully support makecontext/swapcontext functions" -e "DB::Exception" /test_output/tmp > /dev/null \ + && echo -e 'Backward compatibility check: Sanitizer assert (in stderr.log)\tFAIL' >> /test_output/test_results.tsv \ + || echo -e 'Backward compatibility check: No sanitizer asserts\tOK' >> /test_output/test_results.tsv + rm -f /test_output/tmp + + # OOM + zgrep -Fa " Application: Child process was terminated by signal 9" /var/log/clickhouse-server/clickhouse-server.backward.*.log > /dev/null \ + && echo -e 'Backward compatibility check: OOM killer (or signal 9) in clickhouse-server.log\tFAIL' >> /test_output/test_results.tsv \ + || echo -e 'Backward compatibility check: No OOM messages in clickhouse-server.log\tOK' >> /test_output/test_results.tsv + + # Logical errors + echo "Check for Logical errors in server log:" + zgrep -Fa -A20 "Code: 49, e.displayText() = DB::Exception:" /var/log/clickhouse-server/clickhouse-server.backward.*.log > /test_output/bc_check_logical_errors.txt \ + && echo -e 'Backward compatibility check: Logical error thrown (see clickhouse-server.log or bc_check_logical_errors.txt)\tFAIL' >> /test_output/test_results.tsv \ + || echo -e 'Backward compatibility check: No logical errors\tOK' >> /test_output/test_results.tsv + + # Remove file bc_check_logical_errors.txt if it's empty + [ -s /test_output/bc_check_logical_errors.txt ] || rm /test_output/bc_check_logical_errors.txt + + # Crash + zgrep -Fa "########################################" /var/log/clickhouse-server/clickhouse-server.backward.*.log > /dev/null \ + && echo -e 'Backward compatibility check: Killed by signal (in clickhouse-server.log)\tFAIL' >> /test_output/test_results.tsv \ + || echo -e 'Backward compatibility check: Not crashed\tOK' >> /test_output/test_results.tsv + + # It also checks for crash without stacktrace (printed by watchdog) + echo "Check for Fatal message in server log:" + zgrep -Fa " " /var/log/clickhouse-server/clickhouse-server.backward.*.log > /test_output/bc_check_fatal_messages.txt \ + && echo -e 'Backward compatibility check: Fatal message in clickhouse-server.log (see bc_check_fatal_messages.txt)\tFAIL' >> /test_output/test_results.tsv \ + || echo -e 'Backward compatibility check: No fatal messages in clickhouse-server.log\tOK' >> /test_output/test_results.tsv + + # Remove file bc_check_fatal_messages.txt if it's empty + [ -s /test_output/bc_check_fatal_messages.txt ] || rm /test_output/bc_check_fatal_messages.txt + + tar -chf /test_output/coordination.backward.tar /var/lib/clickhouse/coordination ||: + for table in query_log trace_log + do + clickhouse-local --path /var/lib/clickhouse/ --only-system-tables -q "select * from system.$table format TSVWithNamesAndTypes" | pigz > /test_output/$table.backward.tsv.gz ||: + done + fi fi dmesg -T > /test_output/dmesg.log diff --git a/docker/test/stress/stress b/docker/test/stress/stress index a0ec86f7fbe..d1860e9e14b 100755 --- a/docker/test/stress/stress +++ b/docker/test/stress/stress @@ -14,9 +14,6 @@ def get_options(i, backward_compatibility_check): if 0 < i: options.append("--order=random") - if i % 3 == 1: - options.append("--db-engine=Ordinary") - if i % 3 == 2 and not backward_compatibility_check: options.append( '''--db-engine="Replicated('/test/db/test_{}', 's1', 'r1')"'''.format(i) diff --git a/docker/test/testflows/runner/Dockerfile b/docker/test/testflows/runner/Dockerfile deleted file mode 100644 index bfc3ed5e39f..00000000000 --- a/docker/test/testflows/runner/Dockerfile +++ /dev/null @@ -1,82 +0,0 @@ -# docker build -t clickhouse/testflows-runner . -FROM ubuntu:20.04 - -# ARG for quick switch to a given ubuntu mirror -ARG apt_archive="http://archive.ubuntu.com" -RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list - -RUN apt-get update \ - && env DEBIAN_FRONTEND=noninteractive apt-get install --yes \ - ca-certificates \ - bash \ - btrfs-progs \ - e2fsprogs \ - iptables \ - xfsprogs \ - tar \ - pigz \ - wget \ - git \ - iproute2 \ - cgroupfs-mount \ - python3-pip \ - tzdata \ - libicu-dev \ - bsdutils \ - curl \ - liblua5.1-dev \ - luajit \ - libssl-dev \ - libcurl4-openssl-dev \ - gdb \ - && rm -rf \ - /var/lib/apt/lists/* \ - /var/cache/debconf \ - /tmp/* \ - && apt-get clean - -ENV TZ=Europe/Moscow -RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone - -RUN pip3 install urllib3 testflows==1.7.20 docker-compose==1.29.2 docker==5.0.0 dicttoxml kazoo tzlocal==2.1 pytz python-dateutil numpy - -ENV DOCKER_CHANNEL stable -ENV DOCKER_VERSION 20.10.6 - -# Architecture of the image when BuildKit/buildx is used -ARG TARGETARCH - -# Install docker -RUN arch=${TARGETARCH:-amd64} \ - && case $arch in \ - amd64) rarch=x86_64 ;; \ - arm64) rarch=aarch64 ;; \ - esac \ - && set -eux \ - && if ! wget -nv -O docker.tgz "https://download.docker.com/linux/static/${DOCKER_CHANNEL}/${rarch}/docker-${DOCKER_VERSION}.tgz"; then \ - echo >&2 "error: failed to download 'docker-${DOCKER_VERSION}' from '${DOCKER_CHANNEL}' for '${rarch}'" \ - && exit 1; \ - fi \ - && tar --extract \ - --file docker.tgz \ - --strip-components 1 \ - --directory /usr/local/bin/ \ - && rm docker.tgz \ - && dockerd --version \ - && docker --version - -COPY modprobe.sh /usr/local/bin/modprobe -COPY dockerd-entrypoint.sh /usr/local/bin/ -COPY process_testflows_result.py /usr/local/bin/ - -RUN set -x \ - && addgroup --system dockremap \ - && adduser --system dockremap \ - && adduser dockremap dockremap \ - && echo 'dockremap:165536:65536' >> /etc/subuid \ - && echo 'dockremap:165536:65536' >> /etc/subgid - -VOLUME /var/lib/docker -EXPOSE 2375 -ENTRYPOINT ["dockerd-entrypoint.sh"] -CMD ["sh", "-c", "python3 regression.py --no-color -o new-fails --local --clickhouse-binary-path ${CLICKHOUSE_TESTS_SERVER_BIN_PATH} --log test.log ${TESTFLOWS_OPTS}; cat test.log | tfs report results --format json > results.json; /usr/local/bin/process_testflows_result.py || echo -e 'failure\tCannot parse results' > check_status.tsv; find * -type f | grep _instances | grep clickhouse-server | xargs -n1 tar -rvf clickhouse_logs.tar; gzip -9 clickhouse_logs.tar"] diff --git a/docker/test/testflows/runner/dockerd-entrypoint.sh b/docker/test/testflows/runner/dockerd-entrypoint.sh deleted file mode 100755 index d310ee583bf..00000000000 --- a/docker/test/testflows/runner/dockerd-entrypoint.sh +++ /dev/null @@ -1,39 +0,0 @@ -#!/bin/bash -set -e - -echo "Configure to use Yandex dockerhub-proxy" -mkdir -p /etc/docker/ -cat > /etc/docker/daemon.json << EOF -{ - "insecure-registries" : ["dockerhub-proxy.dockerhub-proxy-zone:5000"], - "registry-mirrors" : ["http://dockerhub-proxy.dockerhub-proxy-zone:5000"] -} -EOF - -# In case of test hung it is convenient to use pytest --pdb to debug it, -# and on hung you can simply press Ctrl-C and it will spawn a python pdb, -# but on SIGINT dockerd will exit, so ignore it to preserve the daemon. -trap '' INT -dockerd --host=unix:///var/run/docker.sock --host=tcp://0.0.0.0:2375 &>/var/log/somefile & - -set +e -reties=0 -while true; do - docker info &>/dev/null && break - reties=$((reties+1)) - if [[ $reties -ge 100 ]]; then # 10 sec max - echo "Can't start docker daemon, timeout exceeded." >&2 - exit 1; - fi - sleep 0.1 -done -set -e - -echo "Start tests" -export CLICKHOUSE_TESTS_SERVER_BIN_PATH=/clickhouse -export CLICKHOUSE_TESTS_CLIENT_BIN_PATH=/clickhouse -export CLICKHOUSE_TESTS_BASE_CONFIG_DIR=/clickhouse-config -export CLICKHOUSE_ODBC_BRIDGE_BINARY_PATH=/clickhouse-odbc-bridge - -cd /ClickHouse/tests/testflows -exec "$@" diff --git a/docker/test/testflows/runner/modprobe.sh b/docker/test/testflows/runner/modprobe.sh deleted file mode 100755 index cb6a527736b..00000000000 --- a/docker/test/testflows/runner/modprobe.sh +++ /dev/null @@ -1,20 +0,0 @@ -#!/bin/sh -set -eu - -# "modprobe" without modprobe -# https://twitter.com/lucabruno/status/902934379835662336 - -# this isn't 100% fool-proof, but it'll have a much higher success rate than simply using the "real" modprobe - -# Docker often uses "modprobe -va foo bar baz" -# so we ignore modules that start with "-" -for module; do - if [ "${module#-}" = "$module" ]; then - ip link show "$module" || true - lsmod | grep "$module" || true - fi -done - -# remove /usr/local/... from PATH so we can exec the real modprobe as a last resort -export PATH='/usr/sbin:/usr/bin:/sbin:/bin' -exec modprobe "$@" diff --git a/docker/test/testflows/runner/process_testflows_result.py b/docker/test/testflows/runner/process_testflows_result.py deleted file mode 100755 index 8bfc4ac0b0f..00000000000 --- a/docker/test/testflows/runner/process_testflows_result.py +++ /dev/null @@ -1,71 +0,0 @@ -#!/usr/bin/env python3 - -import os -import logging -import argparse -import csv -import json - - -def process_result(result_folder): - json_path = os.path.join(result_folder, "results.json") - if not os.path.exists(json_path): - return "success", "No testflows in branch", None, [] - - test_binary_log = os.path.join(result_folder, "test.log") - with open(json_path) as source: - results = json.loads(source.read()) - - total_tests = 0 - total_ok = 0 - total_fail = 0 - total_other = 0 - test_results = [] - for test in results["tests"]: - test_name = test["test"]["test_name"] - test_result = test["result"]["result_type"].upper() - test_time = str(test["result"]["message_rtime"]) - total_tests += 1 - if test_result == "OK": - total_ok += 1 - elif test_result == "FAIL" or test_result == "ERROR": - total_fail += 1 - else: - total_other += 1 - - test_results.append((test_name, test_result, test_time)) - if total_fail != 0: - status = "failure" - else: - status = "success" - - description = "failed: {}, passed: {}, other: {}".format( - total_fail, total_ok, total_other - ) - return status, description, test_results, [json_path, test_binary_log] - - -def write_results(results_file, status_file, results, status): - with open(results_file, "w") as f: - out = csv.writer(f, delimiter="\t") - out.writerows(results) - with open(status_file, "w") as f: - out = csv.writer(f, delimiter="\t") - out.writerow(status) - - -if __name__ == "__main__": - logging.basicConfig(level=logging.INFO, format="%(asctime)s %(message)s") - parser = argparse.ArgumentParser( - description="ClickHouse script for parsing results of Testflows tests" - ) - parser.add_argument("--in-results-dir", default="./") - parser.add_argument("--out-results-file", default="./test_results.tsv") - parser.add_argument("--out-status-file", default="./check_status.tsv") - args = parser.parse_args() - - state, description, test_results, logs = process_result(args.in_results_dir) - logging.info("Result parsed") - status = (state, description) - write_results(args.out_results_file, args.out_status_file, test_results, status) - logging.info("Result written") diff --git a/docs/en/development/continuous-integration.md b/docs/en/development/continuous-integration.md index 677fb81efdd..ef3efa75d66 100644 --- a/docs/en/development/continuous-integration.md +++ b/docs/en/development/continuous-integration.md @@ -118,7 +118,6 @@ Builds ClickHouse in various configurations for use in further steps. You have t - **Compiler**: `gcc-9` or `clang-10` (or `clang-10-xx` for other architectures e.g. `clang-10-freebsd`). - **Build type**: `Debug` or `RelWithDebInfo` (cmake). - **Sanitizer**: `none` (without sanitizers), `address` (ASan), `memory` (MSan), `undefined` (UBSan), or `thread` (TSan). -- **Split** `splitted` is a [split build](../development/build.md#split-build) - **Status**: `success` or `fail` - **Build log**: link to the building and files copying log, useful when build failed. - **Build time**. @@ -130,7 +129,6 @@ Builds ClickHouse in various configurations for use in further steps. You have t - `clickhouse`: Main built binary. - `clickhouse-odbc-bridge` - `unit_tests_dbms`: GoogleTest binary with ClickHouse unit tests. - - `shared_build.tgz`: build with shared libraries. - `performance.tgz`: Special package for performance tests. @@ -169,16 +167,6 @@ concurrency-related errors. If it fails: of error. -## Split Build Smoke Test - -Checks that the server build in [split build](../development/developer-instruction.md#split-build) -configuration can start and run simple queries. If it fails: - - * Fix other test errors first; - * Build the server in [split build](../development/developer-instruction.md#split-build) configuration - locally and check whether it can start and run `select 1`. - - ## Compatibility Check Checks that `clickhouse` binary runs on distributions with old libc versions. If it fails, ask a maintainer for help. diff --git a/docs/en/engines/table-engines/integrations/kafka.md b/docs/en/engines/table-engines/integrations/kafka.md index 88a0d08ebbd..7de856716fb 100644 --- a/docs/en/engines/table-engines/integrations/kafka.md +++ b/docs/en/engines/table-engines/integrations/kafka.md @@ -34,7 +34,14 @@ SETTINGS [kafka_max_block_size = 0,] [kafka_skip_broken_messages = N,] [kafka_commit_every_batch = 0,] - [kafka_thread_per_consumer = 0] + [kafka_client_id = '',] + [kafka_poll_timeout_ms = 0,] + [kafka_poll_max_batch_size = 0,] + [kafka_flush_interval_ms = 0,] + [kafka_thread_per_consumer = 0,] + [kafka_handle_error_mode = 'default',] + [kafka_commit_on_select = false,] + [kafka_max_rows_per_message = 1]; ``` Required parameters: @@ -46,13 +53,20 @@ Required parameters: Optional parameters: -- `kafka_row_delimiter` — Delimiter character, which ends the message. +- `kafka_row_delimiter` — Delimiter character, which ends the message. **This setting is deprecated and is no longer used, not left for compatibility reasons.** - `kafka_schema` — Parameter that must be used if the format requires a schema definition. For example, [Cap’n Proto](https://capnproto.org/) requires the path to the schema file and the name of the root `schema.capnp:Message` object. -- `kafka_num_consumers` — The number of consumers per table. Default: `1`. Specify more consumers if the throughput of one consumer is insufficient. The total number of consumers should not exceed the number of partitions in the topic, since only one consumer can be assigned per partition, and must not be greater than the number of physical cores on the server where ClickHouse is deployed. -- `kafka_max_block_size` — The maximum batch size (in messages) for poll (default: `max_block_size`). -- `kafka_skip_broken_messages` — Kafka message parser tolerance to schema-incompatible messages per block. Default: `0`. If `kafka_skip_broken_messages = N` then the engine skips *N* Kafka messages that cannot be parsed (a message equals a row of data). -- `kafka_commit_every_batch` — Commit every consumed and handled batch instead of a single commit after writing a whole block (default: `0`). -- `kafka_thread_per_consumer` — Provide independent thread for each consumer (default: `0`). When enabled, every consumer flush the data independently, in parallel (otherwise — rows from several consumers squashed to form one block). +- `kafka_num_consumers` — The number of consumers per table. Specify more consumers if the throughput of one consumer is insufficient. The total number of consumers should not exceed the number of partitions in the topic, since only one consumer can be assigned per partition, and must not be greater than the number of physical cores on the server where ClickHouse is deployed. Default: `1`. +- `kafka_max_block_size` — The maximum batch size (in messages) for poll. Default: [max_insert_block_size](../../../operations/settings/settings.md#setting-max_insert_block_size). +- `kafka_skip_broken_messages` — Kafka message parser tolerance to schema-incompatible messages per block. If `kafka_skip_broken_messages = N` then the engine skips *N* Kafka messages that cannot be parsed (a message equals a row of data). Default: `0`. +- `kafka_commit_every_batch` — Commit every consumed and handled batch instead of a single commit after writing a whole block. Default: `0`. +- `kafka_client_id` — Client identifier. Empty by default. +- `kafka_poll_timeout_ms` — Timeout for single poll from Kafka. Default: [stream_poll_timeout_ms](../../../operations/settings/settings.md#stream_poll_timeout_ms). +- `kafka_poll_max_batch_size` — Maximum amount of messages to be polled in a single Kafka poll. Default: [max_block_size](../../../operations/settings/settings.md#setting-max_block_size). +- `kafka_flush_interval_ms` — Timeout for flushing data from Kafka. Default: [stream_flush_interval_ms](../../../operations/settings/settings.md#stream-flush-interval-ms). +- `kafka_thread_per_consumer` — Provide independent thread for each consumer. When enabled, every consumer flush the data independently, in parallel (otherwise — rows from several consumers squashed to form one block). Default: `0`. +- `kafka_handle_error_mode` — How to handle errors for Kafka engine. Possible values: default, stream. +- `kafka_commit_on_select` — Commit messages when select query is made. Default: `false`. +- `kafka_max_rows_per_message` — The maximum number of rows written in one kafka message for row-based formats. Default : `1`. Examples: @@ -94,7 +108,7 @@ Do not use this method in new projects. If possible, switch old projects to the ``` sql Kafka(kafka_broker_list, kafka_topic_list, kafka_group_name, kafka_format - [, kafka_row_delimiter, kafka_schema, kafka_num_consumers, kafka_skip_broken_messages]) + [, kafka_row_delimiter, kafka_schema, kafka_num_consumers, kafka_max_block_size, kafka_skip_broken_messages, kafka_commit_every_batch, kafka_client_id, kafka_poll_timeout_ms, kafka_poll_max_batch_size, kafka_flush_interval_ms, kafka_thread_per_consumer, kafka_handle_error_mode, kafka_commit_on_select, kafka_max_rows_per_message]); ``` @@ -193,6 +207,14 @@ Example: - `_headers.name` — Array of message's headers keys. - `_headers.value` — Array of message's headers values. +## Data formats support {#data-formats-support} + +Kafka engine supports all [formats](../../../interfaces/formats.md) supported in ClickHouse. +The number of rows in one Kafka message depends on whether the format is row-based or block-based: + +- For row-based formats the number of rows in one Kafka message can be controlled by setting `kafka_max_rows_per_message`. +- For block-based formats we cannot divide block into smaller parts, but the number of rows in one block can be controlled by general setting [max_block_size](../../../operations/settings/settings.md#setting-max_block_size). + **See Also** - [Virtual columns](../../../engines/table-engines/index.md#table_engines-virtual_columns) diff --git a/docs/en/engines/table-engines/integrations/nats.md b/docs/en/engines/table-engines/integrations/nats.md index 90b30dc8295..35ad9068be8 100644 --- a/docs/en/engines/table-engines/integrations/nats.md +++ b/docs/en/engines/table-engines/integrations/nats.md @@ -37,8 +37,10 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] [nats_max_block_size = N,] [nats_flush_interval_ms = N,] [nats_username = 'user',] - [nats_password = 'password'] - [redis_password = 'clickhouse'] + [nats_password = 'password',] + [nats_token = 'clickhouse',] + [nats_startup_connect_tries = '5'] + [nats_max_rows_per_message = 1] ``` Required parameters: @@ -49,7 +51,7 @@ Required parameters: Optional parameters: -- `nats_row_delimiter` – Delimiter character, which ends the message. +- `nats_row_delimiter` – Delimiter character, which ends the message. **This setting is deprecated and is no longer used, not left for compatibility reasons.** - `nats_schema` – Parameter that must be used if the format requires a schema definition. For example, [Cap’n Proto](https://capnproto.org/) requires the path to the schema file and the name of the root `schema.capnp:Message` object. - `nats_num_consumers` – The number of consumers per table. Default: `1`. Specify more consumers if the throughput of one consumer is insufficient. - `nats_queue_group` – Name for queue group of NATS subscribers. Default is the table name. @@ -57,11 +59,13 @@ Optional parameters: - `nats_reconnect_wait` – Amount of time in milliseconds to sleep between each reconnect attempt. Default: `5000`. - `nats_server_list` - Server list for connection. Can be specified to connect to NATS cluster. - `nats_skip_broken_messages` - NATS message parser tolerance to schema-incompatible messages per block. Default: `0`. If `nats_skip_broken_messages = N` then the engine skips *N* RabbitMQ messages that cannot be parsed (a message equals a row of data). -- `nats_max_block_size` - Number of row collected by poll(s) for flushing data from NATS. -- `nats_flush_interval_ms` - Timeout for flushing data read from NATS. +- `nats_max_block_size` - Number of row collected by poll(s) for flushing data from NATS. Default: [max_insert_block_size](../../../operations/settings/settings.md#setting-max_insert_block_size). +- `nats_flush_interval_ms` - Timeout for flushing data read from NATS. Default: [stream_flush_interval_ms](../../../operations/settings/settings.md#stream-flush-interval-ms). - `nats_username` - NATS username. - `nats_password` - NATS password. - `nats_token` - NATS auth token. +- `nats_startup_connect_tries` - Number of connect tries at startup. Default: `5`. +- `nats_max_rows_per_message` — The maximum number of rows written in one NATS message for row-based formats. (default : `1`). SSL connection: @@ -159,6 +163,14 @@ If you want to change the target table by using `ALTER`, we recommend disabling ## Virtual Columns {#virtual-columns} -- `_subject` - NATS message subject. +- `_subject` - NATS message subject. + +## Data formats support {#data-formats-support} + +NATS engine supports all [formats](../../../interfaces/formats.md) supported in ClickHouse. +The number of rows in one NATS message depends on whether the format is row-based or block-based: + +- For row-based formats the number of rows in one NATS message can be controlled by setting `nats_max_rows_per_message`. +- For block-based formats we cannot divide block into smaller parts, but the number of rows in one block can be controlled by general setting [max_block_size](../../../operations/settings/settings.md#setting-max_block_size). [Original article](https://clickhouse.com/docs/en/engines/table-engines/integrations/nats/) diff --git a/docs/en/engines/table-engines/integrations/rabbitmq.md b/docs/en/engines/table-engines/integrations/rabbitmq.md index 9227e5cdbfd..2e5a45931f8 100644 --- a/docs/en/engines/table-engines/integrations/rabbitmq.md +++ b/docs/en/engines/table-engines/integrations/rabbitmq.md @@ -37,8 +37,16 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] [rabbitmq_persistent = 0,] [rabbitmq_skip_broken_messages = N,] [rabbitmq_max_block_size = N,] - [rabbitmq_flush_interval_ms = N] - [rabbitmq_queue_settings_list = 'x-dead-letter-exchange=my-dlx,x-max-length=10,x-overflow=reject-publish'] + [rabbitmq_flush_interval_ms = N,] + [rabbitmq_queue_settings_list = 'x-dead-letter-exchange=my-dlx,x-max-length=10,x-overflow=reject-publish',] + [rabbitmq_queue_consume = false,] + [rabbitmq_address = '',] + [rabbitmq_vhost = '/',] + [rabbitmq_queue_consume = false,] + [rabbitmq_username = '',] + [rabbitmq_password = '',] + [rabbitmq_commit_on_select = false,] + [rabbitmq_max_rows_per_message = 1] ``` Required parameters: @@ -49,19 +57,27 @@ Required parameters: Optional parameters: -- `rabbitmq_exchange_type` – The type of RabbitMQ exchange: `direct`, `fanout`, `topic`, `headers`, `consistent_hash`. Default: `fanout`. -- `rabbitmq_routing_key_list` – A comma-separated list of routing keys. -- `rabbitmq_row_delimiter` – Delimiter character, which ends the message. -- `rabbitmq_schema` – Parameter that must be used if the format requires a schema definition. For example, [Cap’n Proto](https://capnproto.org/) requires the path to the schema file and the name of the root `schema.capnp:Message` object. -- `rabbitmq_num_consumers` – The number of consumers per table. Default: `1`. Specify more consumers if the throughput of one consumer is insufficient. -- `rabbitmq_num_queues` – Total number of queues. Default: `1`. Increasing this number can significantly improve performance. -- `rabbitmq_queue_base` - Specify a hint for queue names. Use cases of this setting are described below. -- `rabbitmq_deadletter_exchange` - Specify name for a [dead letter exchange](https://www.rabbitmq.com/dlx.html). You can create another table with this exchange name and collect messages in cases when they are republished to dead letter exchange. By default dead letter exchange is not specified. -- `rabbitmq_persistent` - If set to 1 (true), in insert query delivery mode will be set to 2 (marks messages as 'persistent'). Default: `0`. -- `rabbitmq_skip_broken_messages` – RabbitMQ message parser tolerance to schema-incompatible messages per block. Default: `0`. If `rabbitmq_skip_broken_messages = N` then the engine skips *N* RabbitMQ messages that cannot be parsed (a message equals a row of data). -- `rabbitmq_max_block_size` -- `rabbitmq_flush_interval_ms` -- `rabbitmq_queue_settings_list` - allows to set RabbitMQ settings when creating a queue. Available settings: `x-max-length`, `x-max-length-bytes`, `x-message-ttl`, `x-expires`, `x-priority`, `x-max-priority`, `x-overflow`, `x-dead-letter-exchange`, `x-queue-type`. The `durable` setting is enabled automatically for the queue. +- `rabbitmq_exchange_type` – The type of RabbitMQ exchange: `direct`, `fanout`, `topic`, `headers`, `consistent_hash`. Default: `fanout`. +- `rabbitmq_routing_key_list` – A comma-separated list of routing keys. +- `rabbitmq_row_delimiter` – Delimiter character, which ends the message. **This setting is deprecated and is no longer used, not left for compatibility reasons.** +- `rabbitmq_schema` – Parameter that must be used if the format requires a schema definition. For example, [Cap’n Proto](https://capnproto.org/) requires the path to the schema file and the name of the root `schema.capnp:Message` object. +- `rabbitmq_num_consumers` – The number of consumers per table. Specify more consumers if the throughput of one consumer is insufficient. Default: `1` +- `rabbitmq_num_queues` – Total number of queues. Increasing this number can significantly improve performance. Default: `1`. +- `rabbitmq_queue_base` - Specify a hint for queue names. Use cases of this setting are described below. +- `rabbitmq_deadletter_exchange` - Specify name for a [dead letter exchange](https://www.rabbitmq.com/dlx.html). You can create another table with this exchange name and collect messages in cases when they are republished to dead letter exchange. By default dead letter exchange is not specified. +- `rabbitmq_persistent` - If set to 1 (true), in insert query delivery mode will be set to 2 (marks messages as 'persistent'). Default: `0`. +- `rabbitmq_skip_broken_messages` – RabbitMQ message parser tolerance to schema-incompatible messages per block. If `rabbitmq_skip_broken_messages = N` then the engine skips *N* RabbitMQ messages that cannot be parsed (a message equals a row of data). Default: `0`. +- `rabbitmq_max_block_size` - Number of row collected before flushing data from RabbitMQ. Default: [max_insert_block_size](../../../operations/settings/settings.md#setting-max_insert_block_size). +- `rabbitmq_flush_interval_ms` - Timeout for flushing data from RabbitMQ. Default: [stream_flush_interval_ms](../../../operations/settings/settings.md#stream-flush-interval-ms). +- `rabbitmq_queue_settings_list` - allows to set RabbitMQ settings when creating a queue. Available settings: `x-max-length`, `x-max-length-bytes`, `x-message-ttl`, `x-expires`, `x-priority`, `x-max-priority`, `x-overflow`, `x-dead-letter-exchange`, `x-queue-type`. The `durable` setting is enabled automatically for the queue. +- `rabbitmq_address` - Address for connection. Use ether this setting or `rabbitmq_host_port`. +- `rabbitmq_vhost` - RabbitMQ vhost. Default: `'\'`. +- `rabbitmq_queue_consume` - Use user-defined queues and do not make any RabbitMQ setup: declaring exchanges, queues, bindings. Default: `false`. +- `rabbitmq_username` - RabbitMQ username. +- `rabbitmq_password` - RabbitMQ password. +- `rabbitmq_commit_on_select` - Commit messages when select query is made. Default: `false`. +- `rabbitmq_max_rows_per_message` — The maximum number of rows written in one RabbitMQ message for row-based formats. Default : `1`. + SSL connection: @@ -166,11 +182,20 @@ Example: ## Virtual Columns {#virtual-columns} -- `_exchange_name` - RabbitMQ exchange name. -- `_channel_id` - ChannelID, on which consumer, who received the message, was declared. -- `_delivery_tag` - DeliveryTag of the received message. Scoped per channel. -- `_redelivered` - `redelivered` flag of the message. -- `_message_id` - messageID of the received message; non-empty if was set, when message was published. -- `_timestamp` - timestamp of the received message; non-empty if was set, when message was published. +- `_exchange_name` - RabbitMQ exchange name. +- `_channel_id` - ChannelID, on which consumer, who received the message, was declared. +- `_delivery_tag` - DeliveryTag of the received message. Scoped per channel. +- `_redelivered` - `redelivered` flag of the message. +- `_message_id` - messageID of the received message; non-empty if was set, when message was published. +- `_timestamp` - timestamp of the received message; non-empty if was set, when message was published. + +## Data formats support {#data-formats-support} + +RabbitMQ engine supports all [formats](../../../interfaces/formats.md) supported in ClickHouse. +The number of rows in one RabbitMQ message depends on whether the format is row-based or block-based: + +- For row-based formats the number of rows in one RabbitMQ message can be controlled by setting `rabbitmq_max_rows_per_message`. +- For block-based formats we cannot divide block into smaller parts, but the number of rows in one block can be controlled by general setting [max_block_size](../../../operations/settings/settings.md#setting-max_block_size). + [Original article](https://clickhouse.com/docs/en/engines/table-engines/integrations/rabbitmq/) diff --git a/docs/en/interfaces/cli.md b/docs/en/interfaces/cli.md index 4f07f99fb26..e3b40d83efe 100644 --- a/docs/en/interfaces/cli.md +++ b/docs/en/interfaces/cli.md @@ -2,11 +2,10 @@ slug: /en/interfaces/cli sidebar_position: 17 sidebar_label: Command-Line Client +title: Command-Line Client --- import ConnectionDetails from '@site/docs/en/_snippets/_gather_your_details_native.md'; -# Command-line Client - ## clickhouse-client ClickHouse provides a native command-line client: `clickhouse-client`. The client supports command-line options and configuration files. For more information, see [Configuring](#interfaces_cli_configuration). diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md index b7ef859f974..3fe26fa8eff 100644 --- a/docs/en/interfaces/formats.md +++ b/docs/en/interfaces/formats.md @@ -85,7 +85,7 @@ The supported formats are: | [MySQLDump](#mysqldump) | ✔ | ✗ | -You can control some format processing parameters with the ClickHouse settings. For more information read the [Settings](../operations/settings/settings.md) section. +You can control some format processing parameters with the ClickHouse settings. For more information read the [Settings](/docs/en/operations/settings/settings.md) section. ## TabSeparated {#tabseparated} @@ -148,12 +148,12 @@ Only a small set of symbols are escaped. You can easily stumble onto a string va Arrays are written as a list of comma-separated values in square brackets. Number items in the array are formatted as normally. `Date` and `DateTime` types are written in single quotes. Strings are written in single quotes with the same escaping rules as above. -[NULL](../sql-reference/syntax.md) is formatted according to setting [format_tsv_null_representation](../operations/settings/settings.md#format_tsv_null_representation) (default value is `\N`). +[NULL](/docs/en/sql-reference/syntax.md) is formatted according to setting [format_tsv_null_representation](/docs/en/operations/settings/settings.md/#format_tsv_null_representation) (default value is `\N`). In input data, ENUM values can be represented as names or as ids. First, we try to match the input value to the ENUM name. If we fail and the input value is a number, we try to match this number to ENUM id. -If input data contains only ENUM ids, it's recommended to enable the setting [input_format_tsv_enum_as_number](../operations/settings/settings.md#input_format_tsv_enum_as_number) to optimize ENUM parsing. +If input data contains only ENUM ids, it's recommended to enable the setting [input_format_tsv_enum_as_number](/docs/en/operations/settings/settings.md/#input_format_tsv_enum_as_number) to optimize ENUM parsing. -Each element of [Nested](../sql-reference/data-types/nested-data-structures/nested.md) structures is represented as array. +Each element of [Nested](/docs/en/sql-reference/data-types/nested-data-structures/nested.md) structures is represented as an array. For example: @@ -183,12 +183,12 @@ SELECT * FROM nestedt FORMAT TSV ### TabSeparated format settings {#tabseparated-format-settings} -- [format_tsv_null_representation](../operations/settings/settings.md#format_tsv_null_representation) - custom NULL representation in TSV format. Default value - `\N`. -- [input_format_tsv_empty_as_default](../operations/settings/settings.md#input_format_tsv_empty_as_default) - treat empty fields in TSV input as default values. Default value - `false`. For complex default expressions [input_format_defaults_for_omitted_fields](../operations/settings/settings.md#input_format_defaults_for_omitted_fields) must be enabled too. -- [input_format_tsv_enum_as_number](../operations/settings/settings.md#input_format_tsv_enum_as_number) - treat inserted enum values in TSV formats as enum indices. Default value - `false`. -- [input_format_tsv_use_best_effort_in_schema_inference](../operations/settings/settings.md#input_format_tsv_use_best_effort_in_schema_inference) - use some tweaks and heuristics to infer schema in TSV format. If disabled, all fields will be inferred as Strings. Default value - `true`. -- [output_format_tsv_crlf_end_of_line](../operations/settings/settings.md#output_format_tsv_crlf_end_of_line) - if it is set true, end of line in TSV output format will be `\r\n` instead of `\n`. Default value - `false`. -- [input_format_tsv_skip_first_lines](../operations/settings/settings.md#input_format_tsv_skip_first_lines) - skip specified number of lines at the beginning of data. Default value - `0`. +- [format_tsv_null_representation](/docs/en/operations/settings/settings.md/#format_tsv_null_representation) - custom NULL representation in TSV format. Default value - `\N`. +- [input_format_tsv_empty_as_default](/docs/en/operations/settings/settings.md/#input_format_tsv_empty_as_default) - treat empty fields in TSV input as default values. Default value - `false`. For complex default expressions [input_format_defaults_for_omitted_fields](/docs/en/operations/settings/settings.md/#input_format_defaults_for_omitted_fields) must be enabled too. +- [input_format_tsv_enum_as_number](/docs/en/operations/settings/settings.md/#input_format_tsv_enum_as_number) - treat inserted enum values in TSV formats as enum indices. Default value - `false`. +- [input_format_tsv_use_best_effort_in_schema_inference](/docs/en/operations/settings/settings.md/#input_format_tsv_use_best_effort_in_schema_inference) - use some tweaks and heuristics to infer schema in TSV format. If disabled, all fields will be inferred as Strings. Default value - `true`. +- [output_format_tsv_crlf_end_of_line](/docs/en/operations/settings/settings.md/#output_format_tsv_crlf_end_of_line) - if it is set true, end of line in TSV output format will be `\r\n` instead of `\n`. Default value - `false`. +- [input_format_tsv_skip_first_lines](/docs/en/operations/settings/settings.md/#input_format_tsv_skip_first_lines) - skip specified number of lines at the beginning of data. Default value - `0`. ## TabSeparatedRaw {#tabseparatedraw} @@ -204,8 +204,8 @@ Differs from the `TabSeparated` format in that the column names are written in t During parsing, the first row is expected to contain the column names. You can use column names to determine their position and to check their correctness. :::warning -If setting [input_format_with_names_use_header](../operations/settings/settings.md#input_format_with_names_use_header) is set to 1, -the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](../operations/settings/settings.md#input_format_skip_unknown_fields) is set to 1. +If setting [input_format_with_names_use_header](/docs/en/operations/settings/settings.md/#input_format_with_names_use_header) is set to 1, +the columns from the input data will be mapped to the columns of the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings.md/#input_format_skip_unknown_fields) is set to 1. Otherwise, the first row will be skipped. ::: @@ -216,10 +216,10 @@ This format is also available under the name `TSVWithNames`. Differs from the `TabSeparated` format in that the column names are written to the first row, while the column types are in the second row. :::warning -If setting [input_format_with_names_use_header](../operations/settings/settings.md#input_format_with_names_use_header) is set to 1, -the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](../operations/settings/settings.md#input_format_skip_unknown_fields) is set to 1. +If setting [input_format_with_names_use_header](/docs/en/operations/settings/settings.md/#input_format_with_names_use_header) is set to 1, +the columns from the input data will be mapped to the columns in the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings.md/#input_format_skip_unknown_fields) is set to 1. Otherwise, the first row will be skipped. -If setting [input_format_with_types_use_header](../operations/settings/settings.md#input_format_with_types_use_header) is set to 1, +If setting [input_format_with_types_use_header](/docs/en/operations/settings/settings.md/#input_format_with_types_use_header) is set to 1, the types from input data will be compared with the types of the corresponding columns from the table. Otherwise, the second row will be skipped. ::: @@ -245,7 +245,7 @@ This format allows specifying a custom format string with placeholders for value It uses settings `format_template_resultset`, `format_template_row`, `format_template_rows_between_delimiter` and some settings of other formats (e.g. `output_format_json_quote_64bit_integers` when using `JSON` escaping, see further) -Setting `format_template_row` specifies path to file, which contains format string for rows with the following syntax: +Setting `format_template_row` specifies the path to the file containing format strings for rows with the following syntax: `delimiter_1${column_1:serializeAs_1}delimiter_2${column_2:serializeAs_2} ... delimiter_N`, @@ -253,10 +253,10 @@ where `delimiter_i` is a delimiter between values (`$` symbol can be escaped as `column_i` is a name or index of a column whose values are to be selected or inserted (if empty, then column will be skipped), `serializeAs_i` is an escaping rule for the column values. The following escaping rules are supported: -- `CSV`, `JSON`, `XML` (similarly to the formats of the same names) -- `Escaped` (similarly to `TSV`) -- `Quoted` (similarly to `Values`) -- `Raw` (without escaping, similarly to `TSVRaw`) +- `CSV`, `JSON`, `XML` (similar to the formats of the same names) +- `Escaped` (similar to `TSV`) +- `Quoted` (similar to `Values`) +- `Raw` (without escaping, similar to `TSVRaw`) - `None` (no escaping rule, see further) If an escaping rule is omitted, then `None` will be used. `XML` is suitable only for output. @@ -269,9 +269,9 @@ the values of `SearchPhrase`, `c` and `price` columns, which are escaped as `Quo `Search phrase: 'bathroom interior design', count: 2166, ad price: $3;` -The `format_template_rows_between_delimiter` setting specifies delimiter between rows, which is printed (or expected) after every row except the last one (`\n` by default) +The `format_template_rows_between_delimiter` setting specifies the delimiter between rows, which is printed (or expected) after every row except the last one (`\n` by default) -Setting `format_template_resultset` specifies the path to file, which contains a format string for resultset. Format string for resultset has the same syntax as a format string for row and allows to specify a prefix, a suffix and a way to print some additional information. It contains the following placeholders instead of column names: +Setting `format_template_resultset` specifies the path to the file, which contains a format string for resultset. Format string for resultset has the same syntax as a format string for row and allows to specify a prefix, a suffix and a way to print some additional information. It contains the following placeholders instead of column names: - `data` is the rows with data in `format_template_row` format, separated by `format_template_rows_between_delimiter`. This placeholder must be the first placeholder in the format string. - `totals` is the row with total values in `format_template_row` format (when using WITH TOTALS) @@ -284,8 +284,8 @@ Setting `format_template_resultset` specifies the path to file, which contains a - `bytes_read` is the number of bytes (uncompressed) has been read The placeholders `data`, `totals`, `min` and `max` must not have escaping rule specified (or `None` must be specified explicitly). The remaining placeholders may have any escaping rule specified. -If the `format_template_resultset` setting is an empty string, `${data}` is used as default value. -For insert queries format allows skipping some columns or some fields if prefix or suffix (see example). +If the `format_template_resultset` setting is an empty string, `${data}` is used as the default value. +For insert queries format allows skipping some columns or fields if prefix or suffix (see example). Select example: @@ -373,8 +373,8 @@ All delimiters in the input data must be strictly equal to delimiters in specifi ## TemplateIgnoreSpaces {#templateignorespaces} This format is suitable only for input. -Similar to `Template`, but skips whitespace characters between delimiters and values in the input stream. However, if format strings contain whitespace characters, these characters will be expected in the input stream. Also allows to specify empty placeholders (`${}` or `${:None}`) to split some delimiter into separate parts to ignore spaces between them. Such placeholders are used only for skipping whitespace characters. -It’s possible to read `JSON` using this format, if values of columns have the same order in all rows. For example, the following request can be used for inserting data from output example of format [JSON](#json): +Similar to `Template`, but skips whitespace characters between delimiters and values in the input stream. However, if format strings contain whitespace characters, these characters will be expected in the input stream. Also allows specifying empty placeholders (`${}` or `${:None}`) to split some delimiter into separate parts to ignore spaces between them. Such placeholders are used only for skipping whitespace characters. +It’s possible to read `JSON` using this format if the values of columns have the same order in all rows. For example, the following request can be used for inserting data from its output example of format [JSON](#json): ``` sql INSERT INTO table_name SETTINGS @@ -411,7 +411,7 @@ SearchPhrase=curtain designs count()=1064 SearchPhrase=baku count()=1000 ``` -[NULL](../sql-reference/syntax.md) is formatted as `\N`. +[NULL](/docs/en/sql-reference/syntax.md) is formatted as `\N`. ``` sql SELECT * FROM t_null FORMAT TSKV @@ -427,49 +427,49 @@ Both data output and parsing are supported in this format. For parsing, any orde Parsing allows the presence of the additional field `tskv` without the equal sign or a value. This field is ignored. -During import, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](../operations/settings/settings.md#input_format_skip_unknown_fields) is set to 1. +During import, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings.md/#input_format_skip_unknown_fields) is set to 1. ## CSV {#csv} Comma Separated Values format ([RFC](https://tools.ietf.org/html/rfc4180)). -When formatting, rows are enclosed in double-quotes. A double quote inside a string is output as two double quotes in a row. There are no other rules for escaping characters. Date and date-time are enclosed in double-quotes. Numbers are output without quotes. Values are separated by a delimiter character, which is `,` by default. The delimiter character is defined in the setting [format_csv_delimiter](../operations/settings/settings.md#format_csv_delimiter). Rows are separated using the Unix line feed (LF). Arrays are serialized in CSV as follows: first, the array is serialized to a string as in TabSeparated format, and then the resulting string is output to CSV in double-quotes. Tuples in CSV format are serialized as separate columns (that is, their nesting in the tuple is lost). +When formatting, rows are enclosed in double quotes. A double quote inside a string is output as two double quotes in a row. There are no other rules for escaping characters. Date and date-time are enclosed in double quotes. Numbers are output without quotes. Values are separated by a delimiter character, which is `,` by default. The delimiter character is defined in the setting [format_csv_delimiter](/docs/en/operations/settings/settings.md/#format_csv_delimiter). Rows are separated using the Unix line feed (LF). Arrays are serialized in CSV as follows: first, the array is serialized to a string as in TabSeparated format, and then the resulting string is output to CSV in double quotes. Tuples in CSV format are serialized as separate columns (that is, their nesting in the tuple is lost). ``` bash $ clickhouse-client --format_csv_delimiter="|" --query="INSERT INTO test.csv FORMAT CSV" < data.csv ``` -\*By default, the delimiter is `,`. See the [format_csv_delimiter](../operations/settings/settings.md#format_csv_delimiter) setting for more information. +\*By default, the delimiter is `,`. See the [format_csv_delimiter](/docs/en/operations/settings/settings.md/#format_csv_delimiter) setting for more information. When parsing, all values can be parsed either with or without quotes. Both double and single quotes are supported. Rows can also be arranged without quotes. In this case, they are parsed up to the delimiter character or line feed (CR or LF). In violation of the RFC, when parsing rows without quotes, the leading and trailing spaces and tabs are ignored. For the line feed, Unix (LF), Windows (CR LF) and Mac OS Classic (CR LF) types are all supported. -`NULL` is formatted according to setting [format_csv_null_representation](../operations/settings/settings.md#format_csv_null_representation) (default value is `\N`). +`NULL` is formatted according to setting [format_csv_null_representation](/docs/en/operations/settings/settings.md/#format_csv_null_representation) (default value is `\N`). -In input data, ENUM values can be represented as names or as ids. First, we try to match the input value to the ENUM name. If we fail and the input value is a number, we try to match this number to ENUM id. -If input data contains only ENUM ids, it's recommended to enable the setting [input_format_csv_enum_as_number](../operations/settings/settings.md#input_format_csv_enum_as_number) to optimize ENUM parsing. +In input data, ENUM values can be represented as names or as ids. First, we try to match the input value to the ENUM name. If we fail and the input value is a number, we try to match this number to the ENUM id. +If input data contains only ENUM ids, it's recommended to enable the setting [input_format_csv_enum_as_number](/docs/en/operations/settings/settings.md/#input_format_csv_enum_as_number) to optimize ENUM parsing. The CSV format supports the output of totals and extremes the same way as `TabSeparated`. ### CSV format settings {#csv-format-settings} -- [format_csv_delimiter](../operations/settings/settings.md#format_csv_delimiter) - the character to be considered as a delimiter in CSV data. Default value - `,`. -- [format_csv_allow_single_quotes](../operations/settings/settings.md#format_csv_allow_single_quotes) - allow strings in single quotes. Default value - `true`. -- [format_csv_allow_double_quotes](../operations/settings/settings.md#format_csv_allow_double_quotes) - allow strings in double quotes. Default value - `true`. -- [format_csv_null_representation](../operations/settings/settings.md#format_tsv_null_representation) - custom NULL representation in CSV format. Default value - `\N`. -- [input_format_csv_empty_as_default](../operations/settings/settings.md#input_format_csv_empty_as_default) - treat empty fields in CSV input as default values. Default value - `true`. For complex default expressions [input_format_defaults_for_omitted_fields](../operations/settings/settings.md#input_format_defaults_for_omitted_fields) must be enabled too. -- [input_format_csv_enum_as_number](../operations/settings/settings.md#input_format_csv_enum_as_number) - treat inserted enum values in CSV formats as enum indices. Default value - `false`. -- [input_format_csv_use_best_effort_in_schema_inference](../operations/settings/settings.md#input_format_csv_use_best_effort_in_schema_inference) - use some tweaks and heuristics to infer schema in CSV format. If disabled, all fields will be inferred as Strings. Default value - `true`. -- [input_format_csv_arrays_as_nested_csv](../operations/settings/settings.md#input_format_csv_arrays_as_nested_csv) - when reading Array from CSV, expect that its elements were serialized in nested CSV and then put into string. Default value - `false`. -- [output_format_csv_crlf_end_of_line](../operations/settings/settings.md#output_format_csv_crlf_end_of_line) - if it is set true, end of line in CSV output format will be `\r\n` instead of `\n`. Default value - `false`. -- [input_format_csv_skip_first_lines](../operations/settings/settings.md#input_format_csv_skip_first_lines) - skip specified number of lines at the beginning of data. Default value - `0`. +- [format_csv_delimiter](/docs/en/operations/settings/settings.md/#format_csv_delimiter) - the character to be considered as a delimiter in CSV data. Default value - `,`. +- [format_csv_allow_single_quotes](/docs/en/operations/settings/settings.md/#format_csv_allow_single_quotes) - allow strings in single quotes. Default value - `true`. +- [format_csv_allow_double_quotes](/docs/en/operations/settings/settings.md/#format_csv_allow_double_quotes) - allow strings in double quotes. Default value - `true`. +- [format_csv_null_representation](/docs/en/operations/settings/settings.md/#format_tsv_null_representation) - custom NULL representation in CSV format. Default value - `\N`. +- [input_format_csv_empty_as_default](/docs/en/operations/settings/settings.md/#input_format_csv_empty_as_default) - treat empty fields in CSV input as default values. Default value - `true`. For complex default expressions, [input_format_defaults_for_omitted_fields](/docs/en/operations/settings/settings.md/#input_format_defaults_for_omitted_fields) must be enabled too. +- [input_format_csv_enum_as_number](/docs/en/operations/settings/settings.md/#input_format_csv_enum_as_number) - treat inserted enum values in CSV formats as enum indices. Default value - `false`. +- [input_format_csv_use_best_effort_in_schema_inference](/docs/en/operations/settings/settings.md/#input_format_csv_use_best_effort_in_schema_inference) - use some tweaks and heuristics to infer schema in CSV format. If disabled, all fields will be inferred as Strings. Default value - `true`. +- [input_format_csv_arrays_as_nested_csv](/docs/en/operations/settings/settings.md/#input_format_csv_arrays_as_nested_csv) - when reading Array from CSV, expect that its elements were serialized in nested CSV and then put into string. Default value - `false`. +- [output_format_csv_crlf_end_of_line](/docs/en/operations/settings/settings.md/#output_format_csv_crlf_end_of_line) - if it is set to true, end of line in CSV output format will be `\r\n` instead of `\n`. Default value - `false`. +- [input_format_csv_skip_first_lines](/docs/en/operations/settings/settings.md/#input_format_csv_skip_first_lines) - skip the specified number of lines at the beginning of data. Default value - `0`. ## CSVWithNames {#csvwithnames} Also prints the header row with column names, similar to [TabSeparatedWithNames](#tabseparatedwithnames). :::warning -If setting [input_format_with_names_use_header](../operations/settings/settings.md#input_format_with_names_use_header) is set to 1, -the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](../operations/settings/settings.md#input_format_skip_unknown_fields) is set to 1. +If setting [input_format_with_names_use_header](/docs/en/operations/settings/settings.md/#input_format_with_names_use_header) is set to 1, +the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings.md/#input_format_skip_unknown_fields) is set to 1. Otherwise, the first row will be skipped. ::: @@ -478,16 +478,16 @@ Otherwise, the first row will be skipped. Also prints two header rows with column names and types, similar to [TabSeparatedWithNamesAndTypes](#tabseparatedwithnamesandtypes). :::warning -If setting [input_format_with_names_use_header](../operations/settings/settings.md#input_format_with_names_use_header) is set to 1, -the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](../operations/settings/settings.md#input_format_skip_unknown_fields) is set to 1. +If setting [input_format_with_names_use_header](/docs/en/operations/settings/settings.md/#input_format_with_names_use_header) is set to 1, +the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings.md/#input_format_skip_unknown_fields) is set to 1. Otherwise, the first row will be skipped. -If setting [input_format_with_types_use_header](../operations/settings/settings.md#input_format_with_types_use_header) is set to 1, +If setting [input_format_with_types_use_header](/docs/en/operations/settings/settings.md/#input_format_with_types_use_header) is set to 1, the types from input data will be compared with the types of the corresponding columns from the table. Otherwise, the second row will be skipped. ::: ## CustomSeparated {#format-customseparated} -Similar to [Template](#format-template), but it prints or reads all names and types of columns and uses escaping rule from [format_custom_escaping_rule](../operations/settings/settings.md#format_custom_escaping_rule) setting and delimiters from [format_custom_field_delimiter](../operations/settings/settings.md#format_custom_field_delimiter), [format_custom_row_before_delimiter](../operations/settings/settings.md#format_custom_row_before_delimiter), [format_custom_row_after_delimiter](../operations/settings/settings.md#format_custom_row_after_delimiter), [format_custom_row_between_delimiter](../operations/settings/settings.md#format_custom_row_between_delimiter), [format_custom_result_before_delimiter](../operations/settings/settings.md#format_custom_result_before_delimiter) and [format_custom_result_after_delimiter](../operations/settings/settings.md#format_custom_result_after_delimiter) settings, not from format strings. +Similar to [Template](#format-template), but it prints or reads all names and types of columns and uses escaping rule from [format_custom_escaping_rule](/docs/en/operations/settings/settings.md/#format_custom_escaping_rule) setting and delimiters from [format_custom_field_delimiter](/docs/en/operations/settings/settings.md/#format_custom_field_delimiter), [format_custom_row_before_delimiter](/docs/en/operations/settings/settings.md/#format_custom_row_before_delimiter), [format_custom_row_after_delimiter](/docs/en/operations/settings/settings.md/#format_custom_row_after_delimiter), [format_custom_row_between_delimiter](/docs/en/operations/settings/settings.md/#format_custom_row_between_delimiter), [format_custom_result_before_delimiter](/docs/en/operations/settings/settings.md/#format_custom_result_before_delimiter) and [format_custom_result_after_delimiter](/docs/en/operations/settings/settings.md/#format_custom_result_after_delimiter) settings, not from format strings. There is also `CustomSeparatedIgnoreSpaces` format, which is similar to [TemplateIgnoreSpaces](#templateignorespaces). @@ -496,8 +496,8 @@ There is also `CustomSeparatedIgnoreSpaces` format, which is similar to [Templat Also prints the header row with column names, similar to [TabSeparatedWithNames](#tabseparatedwithnames). :::warning -If setting [input_format_with_names_use_header](../operations/settings/settings.md#input_format_with_names_use_header) is set to 1, -the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](../operations/settings/settings.md#input_format_skip_unknown_fields) is set to 1. +If setting [input_format_with_names_use_header](/docs/en/operations/settings/settings.md/#input_format_with_names_use_header) is set to 1, +the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings.md/#input_format_skip_unknown_fields) is set to 1. Otherwise, the first row will be skipped. ::: @@ -506,10 +506,10 @@ Otherwise, the first row will be skipped. Also prints two header rows with column names and types, similar to [TabSeparatedWithNamesAndTypes](#tabseparatedwithnamesandtypes). :::warning -If setting [input_format_with_names_use_header](../operations/settings/settings.md#input_format_with_names_use_header) is set to 1, -the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](../operations/settings/settings.md#input_format_skip_unknown_fields) is set to 1. +If setting [input_format_with_names_use_header](/docs/en/operations/settings/settings.md/#input_format_with_names_use_header) is set to 1, +the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings.md/#input_format_skip_unknown_fields) is set to 1. Otherwise, the first row will be skipped. -If setting [input_format_with_types_use_header](../operations/settings/settings.md#input_format_with_types_use_header) is set to 1, +If setting [input_format_with_types_use_header](/docs/en/operations/settings/settings.md/#input_format_with_types_use_header) is set to 1, the types from input data will be compared with the types of the corresponding columns from the table. Otherwise, the second row will be skipped. ::: @@ -531,15 +531,15 @@ INSERT INTO table (x, y, z) VALUES (6, 7, 'Hello'), (7, 8, 'Hello'); INSERT INTO table (x, y, z) VALUES (8, 9, 'Hello'), (9, 10, 'Hello'); ``` -To read data output by this format ypu can use [MySQLDump](#mysqldump) input format. +To read data output by this format you can use [MySQLDump](#mysqldump) input format. ### SQLInsert format settings {#sqlinsert-format-settings} -- [output_format_sql_insert_max_batch_size](../operations/settings/settings.md#output_format_sql_insert_max_batch_size) - The maximum number of rows in one INSERT statement. Default value - `65505`. -- [output_format_sql_insert_table_name](../operations/settings/settings.md#output_format_sql_insert_table_name) - The name of table in the output INSERT query. Default value - `'table'`. -- [output_format_sql_insert_include_column_names](../operations/settings/settings.md#output_format_sql_insert_include_column_names) - Include column names in INSERT query. Default value - `true`. -- [output_format_sql_insert_use_replace](../operations/settings/settings.md#output_format_sql_insert_use_replace) - Use REPLACE statement instead of INSERT. Default value - `false`. -- [output_format_sql_insert_quote_names](../operations/settings/settings.md#output_format_sql_insert_quote_names) - Quote column names with "\`" characters . Default value - `true`. +- [output_format_sql_insert_max_batch_size](/docs/en/operations/settings/settings.md/#output_format_sql_insert_max_batch_size) - The maximum number of rows in one INSERT statement. Default value - `65505`. +- [output_format_sql_insert_table_name](/docs/en/operations/settings/settings.md/#output_format_sql_insert_table_name) - The name of the table in the output INSERT query. Default value - `'table'`. +- [output_format_sql_insert_include_column_names](/docs/en/operations/settings/settings.md/#output_format_sql_insert_include_column_names) - Include column names in INSERT query. Default value - `true`. +- [output_format_sql_insert_use_replace](/docs/en/operations/settings/settings.md/#output_format_sql_insert_use_replace) - Use REPLACE statement instead of INSERT. Default value - `false`. +- [output_format_sql_insert_quote_names](/docs/en/operations/settings/settings.md/#output_format_sql_insert_quote_names) - Quote column names with "\`" characters. Default value - `true`. ## JSON {#json} @@ -599,7 +599,7 @@ SELECT SearchPhrase, count() AS c FROM test.hits GROUP BY SearchPhrase WITH TOTA } ``` -The JSON is compatible with JavaScript. To ensure this, some characters are additionally escaped: the slash `/` is escaped as `\/`; alternative line breaks `U+2028` and `U+2029`, which break some browsers, are escaped as `\uXXXX`. ASCII control characters are escaped: backspace, form feed, line feed, carriage return, and horizontal tab are replaced with `\b`, `\f`, `\n`, `\r`, `\t` , as well as the remaining bytes in the 00-1F range using `\uXXXX` sequences. Invalid UTF-8 sequences are changed to the replacement character � so the output text will consist of valid UTF-8 sequences. For compatibility with JavaScript, Int64 and UInt64 integers are enclosed in double-quotes by default. To remove the quotes, you can set the configuration parameter [output_format_json_quote_64bit_integers](../operations/settings/settings.md#output_format_json_quote_64bit_integers) to 0. +The JSON is compatible with JavaScript. To ensure this, some characters are additionally escaped: the slash `/` is escaped as `\/`; alternative line breaks `U+2028` and `U+2029`, which break some browsers, are escaped as `\uXXXX`. ASCII control characters are escaped: backspace, form feed, line feed, carriage return, and horizontal tab are replaced with `\b`, `\f`, `\n`, `\r`, `\t` , as well as the remaining bytes in the 00-1F range using `\uXXXX` sequences. Invalid UTF-8 sequences are changed to the replacement character � so the output text will consist of valid UTF-8 sequences. For compatibility with JavaScript, Int64 and UInt64 integers are enclosed in double quotes by default. To remove the quotes, you can set the configuration parameter [output_format_json_quote_64bit_integers](/docs/en/operations/settings/settings.md/#output_format_json_quote_64bit_integers) to 0. `rows` – The total number of output rows. @@ -610,14 +610,14 @@ If the query contains GROUP BY, rows_before_limit_at_least is the exact number o `extremes` – Extreme values (when extremes are set to 1). -ClickHouse supports [NULL](../sql-reference/syntax.md), which is displayed as `null` in the JSON output. To enable `+nan`, `-nan`, `+inf`, `-inf` values in output, set the [output_format_json_quote_denormals](../operations/settings/settings.md#output_format_json_quote_denormals) to 1. +ClickHouse supports [NULL](/docs/en/sql-reference/syntax.md), which is displayed as `null` in the JSON output. To enable `+nan`, `-nan`, `+inf`, `-inf` values in output, set the [output_format_json_quote_denormals](/docs/en/operations/settings/settings.md/#output_format_json_quote_denormals) to 1. **See Also** - [JSONEachRow](#jsoneachrow) format -- [output_format_json_array_of_rows](../operations/settings/settings.md#output_format_json_array_of_rows) setting +- [output_format_json_array_of_rows](/docs/en/operations/settings/settings.md/#output_format_json_array_of_rows) setting -For JSON input format, if setting [input_format_json_validate_types_from_metadata](../operations/settings/settings.md#input_format_json_validate_types_from_metadata) is set to 1, +For JSON input format, if setting [input_format_json_validate_types_from_metadata](/docs/en/operations/settings/settings.md/#input_format_json_validate_types_from_metadata) is set to 1, the types from metadata in input data will be compared with the types of the corresponding columns from the table. ## JSONStrings {#jsonstrings} @@ -690,8 +690,8 @@ Example: } ``` -During import, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](../operations/settings/settings.md#input_format_skip_unknown_fields) is set to 1. -Columns that are not present in the block will be filled with default values (you can use [input_format_defaults_for_omitted_fields](../operations/settings/settings.md#input_format_defaults_for_omitted_fields) setting here) +During import, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings.md/#input_format_skip_unknown_fields) is set to 1. +Columns that are not present in the block will be filled with default values (you can use the [input_format_defaults_for_omitted_fields](/docs/en/operations/settings/settings.md/#input_format_defaults_for_omitted_fields) setting here) ## JSONColumnsWithMetadata {#jsoncolumnsmonoblock} @@ -739,14 +739,14 @@ Example: } ``` -For JSONColumnsWithMetadata input format, if setting [input_format_json_validate_types_from_metadata](../operations/settings/settings.md#input_format_json_validate_types_from_metadata) is set to 1, +For JSONColumnsWithMetadata input format, if setting [input_format_json_validate_types_from_metadata](/docs/en/operations/settings/settings.md/#input_format_json_validate_types_from_metadata) is set to 1, the types from metadata in input data will be compared with the types of the corresponding columns from the table. ## JSONAsString {#jsonasstring} In this format, a single JSON object is interpreted as a single value. If the input has several JSON objects (comma separated), they are interpreted as separate rows. If the input data is enclosed in square brackets, it is interpreted as an array of JSONs. -This format can only be parsed for table with a single field of type [String](../sql-reference/data-types/string.md). The remaining columns must be set to [DEFAULT](../sql-reference/statements/create/table.md#default) or [MATERIALIZED](../sql-reference/statements/create/table.md#materialized), or omitted. Once you collect whole JSON object to string you can use [JSON functions](../sql-reference/functions/json-functions.md) to process it. +This format can only be parsed for a table with a single field of type [String](/docs/en/sql-reference/data-types/string.md). The remaining columns must be set to [DEFAULT](/docs/en/sql-reference/statements/create/table.md/#default) or [MATERIALIZED](/docs/en/sql-reference/statements/create/table.md/#materialized), or omitted. Once you collect the whole JSON object to string you can use [JSON functions](/docs/en/sql-reference/functions/json-functions.md) to process it. **Examples** @@ -891,7 +891,7 @@ Example: ] ``` -Columns that are not present in the block will be filled with default values (you can use [input_format_defaults_for_omitted_fields](../operations/settings/settings.md#input_format_defaults_for_omitted_fields) setting here) +Columns that are not present in the block will be filled with default values (you can use [input_format_defaults_for_omitted_fields](/docs/en/operations/settings/settings.md/#input_format_defaults_for_omitted_fields) setting here) ## JSONEachRow {#jsoneachrow} @@ -905,7 +905,7 @@ Example: {"num":44,"str":"hello","arr":[0,1,2,3]} ``` -While importing data columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](../operations/settings/settings.md#input_format_skip_unknown_fields) is set to 1. +While importing data columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings.md/#input_format_skip_unknown_fields) is set to 1. ## JSONStringsEachRow {#jsonstringseachrow} @@ -960,8 +960,8 @@ Differs from `JSONEachRow`/`JSONStringsEachRow` in that ClickHouse will also yie Differs from `JSONCompactEachRow` format in that it also prints the header row with column names, similar to [TabSeparatedWithNames](#tabseparatedwithnames). :::warning -If setting [input_format_with_names_use_header](../operations/settings/settings.md#input_format_with_names_use_header) is set to 1, -the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](../operations/settings/settings.md#input_format_skip_unknown_fields) is set to 1. +If setting [input_format_with_names_use_header](/docs/en/operations/settings/settings.md/#input_format_with_names_use_header) is set to 1, +the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings.md/#input_format_skip_unknown_fields) is set to 1. Otherwise, the first row will be skipped. ::: @@ -970,10 +970,10 @@ Otherwise, the first row will be skipped. Differs from `JSONCompactEachRow` format in that it also prints two header rows with column names and types, similar to [TabSeparatedWithNamesAndTypes](#tabseparatedwithnamesandtypes). :::warning -If setting [input_format_with_names_use_header](../operations/settings/settings.md#input_format_with_names_use_header) is set to 1, -the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](../operations/settings/settings.md#input_format_skip_unknown_fields) is set to 1. +If setting [input_format_with_names_use_header](/docs/en/operations/settings/settings.md/#input_format_with_names_use_header) is set to 1, +the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings.md/#input_format_skip_unknown_fields) is set to 1. Otherwise, the first row will be skipped. -If setting [input_format_with_types_use_header](../operations/settings/settings.md#input_format_with_types_use_header) is set to 1, +If setting [input_format_with_types_use_header](/docs/en/operations/settings/settings.md/#input_format_with_types_use_header) is set to 1, the types from input data will be compared with the types of the corresponding columns from the table. Otherwise, the second row will be skipped. ::: @@ -982,8 +982,8 @@ the types from input data will be compared with the types of the corresponding c Differs from `JSONCompactStringsEachRow` in that in that it also prints the header row with column names, similar to [TabSeparatedWithNames](#tabseparatedwithnames). :::warning -If setting [input_format_with_names_use_header](../operations/settings/settings.md#input_format_with_names_use_header) is set to 1, -the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](../operations/settings/settings.md#input_format_skip_unknown_fields) is set to 1. +If setting [input_format_with_names_use_header](/docs/en/operations/settings/settings.md/#input_format_with_names_use_header) is set to 1, +the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings.md/#input_format_skip_unknown_fields) is set to 1. Otherwise, the first row will be skipped. ::: @@ -992,10 +992,10 @@ Otherwise, the first row will be skipped. Differs from `JSONCompactStringsEachRow` in that it also prints two header rows with column names and types, similar to [TabSeparatedWithNamesAndTypes](#tabseparatedwithnamesandtypes). :::warning -If setting [input_format_with_names_use_header](../operations/settings/settings.md#input_format_with_names_use_header) is set to 1, -the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](../operations/settings/settings.md#input_format_skip_unknown_fields) is set to 1. +If setting [input_format_with_names_use_header](/docs/en/operations/settings/settings.md/#input_format_with_names_use_header) is set to 1, +the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings.md/#input_format_skip_unknown_fields) is set to 1. Otherwise, the first row will be skipped. -If setting [input_format_with_types_use_header](../operations/settings/settings.md#input_format_with_types_use_header) is set to 1, +If setting [input_format_with_types_use_header](/docs/en/operations/settings/settings.md/#input_format_with_types_use_header) is set to 1, the types from input data will be compared with the types of the corresponding columns from the table. Otherwise, the second row will be skipped. ::: @@ -1009,7 +1009,7 @@ the types from input data will be compared with the types of the corresponding c ## JSONObjectEachRow {#jsonobjecteachrow} -In this format, all data is represented as a single JSON Object, each row is represented as separate field of this object similar to JSONEachRow format. +In this format, all data is represented as a single JSON Object, each row is represented as a separate field of this object similar to JSONEachRow format. Example: @@ -1021,12 +1021,12 @@ Example: } ``` -To use object name as column value you can use special setting [format_json_object_each_row_column_for_object_name](../operations/settings/settings.md#format_json_object_each_row_column_for_object_name). Value of this setting is set to the name of a column, that is used as JSON key for a row in resulting object. +To use an object name as a column value you can use the special setting [format_json_object_each_row_column_for_object_name](/docs/en/operations/settings/settings.md/#format_json_object_each_row_column_for_object_name). The value of this setting is set to the name of a column, that is used as JSON key for a row in the resulting object. Examples: For output: -Let's say we have table `test` with two columns: +Let's say we have the table `test` with two columns: ``` ┌─object_name─┬─number─┐ │ first_obj │ 1 │ @@ -1051,7 +1051,7 @@ The output: For input: -Let's say we stored output from previous example in a file with name `data.json`: +Let's say we stored output from the previous example in a file named `data.json`: ```sql select * from file('data.json', JSONObjectEachRow, 'object_name String, number UInt64') settings format_json_object_each_row_column_for_object_name='object_name' ``` @@ -1093,9 +1093,9 @@ ClickHouse ignores spaces between elements and commas after the objects. You can **Omitted values processing** -ClickHouse substitutes omitted values with the default values for the corresponding [data types](../sql-reference/data-types/index.md). +ClickHouse substitutes omitted values with the default values for the corresponding [data types](/docs/en/sql-reference/data-types/index.md). -If `DEFAULT expr` is specified, ClickHouse uses different substitution rules depending on the [input_format_defaults_for_omitted_fields](../operations/settings/settings.md#input_format_defaults_for_omitted_fields) setting. +If `DEFAULT expr` is specified, ClickHouse uses different substitution rules depending on the [input_format_defaults_for_omitted_fields](/docs/en/operations/settings/settings.md/#input_format_defaults_for_omitted_fields) setting. Consider the following table: @@ -1140,7 +1140,7 @@ Any set of bytes can be output in the strings. Use the `JSONEachRow` format if y ### Usage of Nested Structures {#jsoneachrow-nested} -If you have a table with [Nested](../sql-reference/data-types/nested-data-structures/nested.md) data type columns, you can insert JSON data with the same structure. Enable this feature with the [input_format_import_nested_json](../operations/settings/settings.md#input_format_import_nested_json) setting. +If you have a table with [Nested](/docs/en/sql-reference/data-types/nested-data-structures/nested.md) data type columns, you can insert JSON data with the same structure. Enable this feature with the [input_format_import_nested_json](/docs/en/operations/settings/settings.md/#input_format_import_nested_json) setting. For example, consider the following table: @@ -1154,7 +1154,7 @@ As you can see in the `Nested` data type description, ClickHouse treats each com INSERT INTO json_each_row_nested FORMAT JSONEachRow {"n.s": ["abc", "def"], "n.i": [1, 23]} ``` -To insert data as a hierarchical JSON object, set [input_format_import_nested_json=1](../operations/settings/settings.md#input_format_import_nested_json). +To insert data as a hierarchical JSON object, set [input_format_import_nested_json=1](/docs/en/operations/settings/settings.md/#input_format_import_nested_json). ``` json { @@ -1199,18 +1199,18 @@ SELECT * FROM json_each_row_nested ### JSON formats settings {#json-formats-settings} -- [input_format_import_nested_json](../operations/settings/settings.md#input_format_import_nested_json) - map nested JSON data to nested tables (it works for JSONEachRow format). Default value - `false`. -- [input_format_json_read_bools_as_numbers](../operations/settings/settings.md#input_format_json_read_bools_as_numbers) - allow to parse bools as numbers in JSON input formats. Default value - `true`. -- [input_format_json_read_numbers_as_strings](../operations/settings/settings.md#input_format_json_read_numbers_as_strings) - allow to parse numbers as strings in JSON input formats. Default value - `false`. -- [input_format_json_read_objects_as_strings](../operations/settings/settings.md#input_format_json_read_objects_as_strings) - allow to parse JSON objects as strings in JSON input formats. Default value - `false`. -- [output_format_json_quote_64bit_integers](../operations/settings/settings.md#output_format_json_quote_64bit_integers) - controls quoting of 64-bit integers in JSON output format. Default value - `true`. -- [output_format_json_quote_64bit_floats](../operations/settings/settings.md#output_format_json_quote_64bit_floats) - controls quoting of 64-bit floats in JSON output format. Default value - `false`. -- [output_format_json_quote_denormals](../operations/settings/settings.md#output_format_json_quote_denormals) - enables '+nan', '-nan', '+inf', '-inf' outputs in JSON output format. Default value - `false`. -- [output_format_json_quote_decimals](../operations/settings/settings.md#output_format_json_quote_decimals) - controls quoting of decimals in JSON output format. Default value - `false`. -- [output_format_json_escape_forward_slashes](../operations/settings/settings.md#output_format_json_escape_forward_slashes) - controls escaping forward slashes for string outputs in JSON output format. Default value - `true`. -- [output_format_json_named_tuples_as_objects](../operations/settings/settings.md#output_format_json_named_tuples_as_objects) - serialize named tuple columns as JSON objects. Default value - `false`. -- [output_format_json_array_of_rows](../operations/settings/settings.md#output_format_json_array_of_rows) - output a JSON array of all rows in JSONEachRow(Compact) format. Default value - `false`. -- [output_format_json_validate_utf8](../operations/settings/settings.md#output_format_json_validate_utf8) - enables validation of UTF-8 sequences in JSON output formats (note that it doesn't impact formats JSON/JSONCompact/JSONColumnsWithMetadata, they always validate utf8). Default value - `false`. +- [input_format_import_nested_json](/docs/en/operations/settings/settings.md/#input_format_import_nested_json) - map nested JSON data to nested tables (it works for JSONEachRow format). Default value - `false`. +- [input_format_json_read_bools_as_numbers](/docs/en/operations/settings/settings.md/#input_format_json_read_bools_as_numbers) - allow to parse bools as numbers in JSON input formats. Default value - `true`. +- [input_format_json_read_numbers_as_strings](/docs/en/operations/settings/settings.md/#input_format_json_read_numbers_as_strings) - allow to parse numbers as strings in JSON input formats. Default value - `false`. +- [input_format_json_read_objects_as_strings](/docs/en/operations/settings/settings.md/#input_format_json_read_objects_as_strings) - allow to parse JSON objects as strings in JSON input formats. Default value - `false`. +- [output_format_json_quote_64bit_integers](/docs/en/operations/settings/settings.md/#output_format_json_quote_64bit_integers) - controls quoting of 64-bit integers in JSON output format. Default value - `true`. +- [output_format_json_quote_64bit_floats](/docs/en/operations/settings/settings.md/#output_format_json_quote_64bit_floats) - controls quoting of 64-bit floats in JSON output format. Default value - `false`. +- [output_format_json_quote_denormals](/docs/en/operations/settings/settings.md/#output_format_json_quote_denormals) - enables '+nan', '-nan', '+inf', '-inf' outputs in JSON output format. Default value - `false`. +- [output_format_json_quote_decimals](/docs/en/operations/settings/settings.md/#output_format_json_quote_decimals) - controls quoting of decimals in JSON output format. Default value - `false`. +- [output_format_json_escape_forward_slashes](/docs/en/operations/settings/settings.md/#output_format_json_escape_forward_slashes) - controls escaping forward slashes for string outputs in JSON output format. Default value - `true`. +- [output_format_json_named_tuples_as_objects](/docs/en/operations/settings/settings.md/#output_format_json_named_tuples_as_objects) - serialize named tuple columns as JSON objects. Default value - `false`. +- [output_format_json_array_of_rows](/docs/en/operations/settings/settings.md/#output_format_json_array_of_rows) - output a JSON array of all rows in JSONEachRow(Compact) format. Default value - `false`. +- [output_format_json_validate_utf8](/docs/en/operations/settings/settings.md/#output_format_json_validate_utf8) - enables validation of UTF-8 sequences in JSON output formats (note that it doesn't impact formats JSON/JSONCompact/JSONColumnsWithMetadata, they always validate utf8). Default value - `false`. ## BSONEachRow {#bsoneachrow} @@ -1221,49 +1221,49 @@ For output it uses the following correspondence between ClickHouse types and BSO | ClickHouse type | BSON Type | |-----------------------------------------------------------------------------------------------------------|-----------------------------------------------------------------------------------------------------------| -| [Bool](../sql-reference/data-types/boolean.md) | `\x08` boolean | -| [Int8/UInt8](../sql-reference/data-types/int-uint.md) | `\x10` int32 | -| [Int16UInt16](../sql-reference/data-types/int-uint.md) | `\x10` int32 | -| [Int32](../sql-reference/data-types/int-uint.md) | `\x10` int32 | -| [UInt32](../sql-reference/data-types/int-uint.md) | `\x12` int64 | -| [Int64/UInt64](../sql-reference/data-types/int-uint.md) | `\x12` int64 | -| [Float32/Float64](../sql-reference/data-types/float.md) | `\x01` double | -| [Date](../sql-reference/data-types/date.md)/[Date32](../sql-reference/data-types/date32.md) | `\x10` int32 | -| [DateTime](../sql-reference/data-types/datetime.md) | `\x12` int64 | -| [DateTime64](../sql-reference/data-types/datetime64.md) | `\x09` datetime | -| [Decimal32](../sql-reference/data-types/decimal.md) | `\x10` int32 | -| [Decimal64](../sql-reference/data-types/decimal.md) | `\x12` int64 | -| [Decimal128](../sql-reference/data-types/decimal.md) | `\x05` binary, `\x00` binary subtype, size = 16 | -| [Decimal256](../sql-reference/data-types/decimal.md) | `\x05` binary, `\x00` binary subtype, size = 32 | -| [Int128/UInt128](../sql-reference/data-types/int-uint.md) | `\x05` binary, `\x00` binary subtype, size = 16 | -| [Int256/UInt256](../sql-reference/data-types/int-uint.md) | `\x05` binary, `\x00` binary subtype, size = 32 | -| [String](../sql-reference/data-types/string.md)/[FixedString](../sql-reference/data-types/fixedstring.md) | `\x05` binary, `\x00` binary subtype or \x02 string if setting output_format_bson_string_as_string is enabled | -| [UUID](../sql-reference/data-types/uuid.md) | `\x05` binary, `\x04` uuid subtype, size = 16 | -| [Array](../sql-reference/data-types/array.md) | `\x04` array | -| [Tuple](../sql-reference/data-types/tuple.md) | `\x04` array | -| [Named Tuple](../sql-reference/data-types/tuple.md) | `\x03` document | -| [Map](../sql-reference/data-types/map.md) (with String keys) | `\x03` document | +| [Bool](/docs/en/sql-reference/data-types/boolean.md) | `\x08` boolean | +| [Int8/UInt8](/docs/en/sql-reference/data-types/int-uint.md) | `\x10` int32 | +| [Int16UInt16](/docs/en/sql-reference/data-types/int-uint.md) | `\x10` int32 | +| [Int32](/docs/en/sql-reference/data-types/int-uint.md) | `\x10` int32 | +| [UInt32](/docs/en/sql-reference/data-types/int-uint.md) | `\x12` int64 | +| [Int64/UInt64](/docs/en/sql-reference/data-types/int-uint.md) | `\x12` int64 | +| [Float32/Float64](/docs/en/sql-reference/data-types/float.md) | `\x01` double | +| [Date](/docs/en/sql-reference/data-types/date.md)/[Date32](/docs/en/sql-reference/data-types/date32.md) | `\x10` int32 | +| [DateTime](/docs/en/sql-reference/data-types/datetime.md) | `\x12` int64 | +| [DateTime64](/docs/en/sql-reference/data-types/datetime64.md) | `\x09` datetime | +| [Decimal32](/docs/en/sql-reference/data-types/decimal.md) | `\x10` int32 | +| [Decimal64](/docs/en/sql-reference/data-types/decimal.md) | `\x12` int64 | +| [Decimal128](/docs/en/sql-reference/data-types/decimal.md) | `\x05` binary, `\x00` binary subtype, size = 16 | +| [Decimal256](/docs/en/sql-reference/data-types/decimal.md) | `\x05` binary, `\x00` binary subtype, size = 32 | +| [Int128/UInt128](/docs/en/sql-reference/data-types/int-uint.md) | `\x05` binary, `\x00` binary subtype, size = 16 | +| [Int256/UInt256](/docs/en/sql-reference/data-types/int-uint.md) | `\x05` binary, `\x00` binary subtype, size = 32 | +| [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md) | `\x05` binary, `\x00` binary subtype or \x02 string if setting output_format_bson_string_as_string is enabled | +| [UUID](/docs/en/sql-reference/data-types/uuid.md) | `\x05` binary, `\x04` uuid subtype, size = 16 | +| [Array](/docs/en/sql-reference/data-types/array.md) | `\x04` array | +| [Tuple](/docs/en/sql-reference/data-types/tuple.md) | `\x04` array | +| [Named Tuple](/docs/en/sql-reference/data-types/tuple.md) | `\x03` document | +| [Map](/docs/en/sql-reference/data-types/map.md) (with String keys) | `\x03` document | For input it uses the following correspondence between BSON types and ClickHouse types: | BSON Type | ClickHouse Type | |------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------| -| `\x01` double | [Float32/Float64](../sql-reference/data-types/float.md) | -| `\x02` string | [String](../sql-reference/data-types/string.md)/[FixedString](../sql-reference/data-types/fixedstring.md) | -| `\x03` document | [Map](../sql-reference/data-types/map.md)/[Named Tuple](../sql-reference/data-types/tuple.md) | -| `\x04` array | [Array](../sql-reference/data-types/array.md)/[Tuple](../sql-reference/data-types/tuple.md) | -| `\x05` binary, `\x00` binary subtype | [String](../sql-reference/data-types/string.md)/[FixedString](../sql-reference/data-types/fixedstring.md) | -| `\x05` binary, `\x02` old binary subtype | [String](../sql-reference/data-types/string.md)/[FixedString](../sql-reference/data-types/fixedstring.md) | -| `\x05` binary, `\x03` old uuid subtype | [UUID](../sql-reference/data-types/uuid.md) | -| `\x05` binary, `\x04` uuid subtype | [UUID](../sql-reference/data-types/uuid.md) | -| `\x07` ObjectId | [String](../sql-reference/data-types/string.md)/[FixedString](../sql-reference/data-types/fixedstring.md) | -| `\x08` boolean | [Bool](../sql-reference/data-types/boolean.md) | -| `\x09` datetime | [DateTime64](../sql-reference/data-types/datetime64.md) | -| `\x0A` null value | [NULL](../sql-reference/data-types/nullable.md) | -| `\x0D` JavaScript code | [String](../sql-reference/data-types/string.md)/[FixedString](../sql-reference/data-types/fixedstring.md) | -| `\x0E` symbol | [String](../sql-reference/data-types/string.md)/[FixedString](../sql-reference/data-types/fixedstring.md) | -| `\x10` int32 | [Int32/UInt32](../sql-reference/data-types/int-uint.md)/[Decimal32](../sql-reference/data-types/decimal.md) | -| `\x12` int64 | [Int64/UInt64](../sql-reference/data-types/int-uint.md)/[Decimal64](../sql-reference/data-types/decimal.md)/[DateTime64](../sql-reference/data-types/datetime64.md) | +| `\x01` double | [Float32/Float64](/docs/en/sql-reference/data-types/float.md) | +| `\x02` string | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md) | +| `\x03` document | [Map](/docs/en/sql-reference/data-types/map.md)/[Named Tuple](/docs/en/sql-reference/data-types/tuple.md) | +| `\x04` array | [Array](/docs/en/sql-reference/data-types/array.md)/[Tuple](/docs/en/sql-reference/data-types/tuple.md) | +| `\x05` binary, `\x00` binary subtype | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md) | +| `\x05` binary, `\x02` old binary subtype | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md) | +| `\x05` binary, `\x03` old uuid subtype | [UUID](/docs/en/sql-reference/data-types/uuid.md) | +| `\x05` binary, `\x04` uuid subtype | [UUID](/docs/en/sql-reference/data-types/uuid.md) | +| `\x07` ObjectId | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md) | +| `\x08` boolean | [Bool](/docs/en/sql-reference/data-types/boolean.md) | +| `\x09` datetime | [DateTime64](/docs/en/sql-reference/data-types/datetime64.md) | +| `\x0A` null value | [NULL](/docs/en/sql-reference/data-types/nullable.md) | +| `\x0D` JavaScript code | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md) | +| `\x0E` symbol | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md) | +| `\x10` int32 | [Int32/UInt32](/docs/en/sql-reference/data-types/int-uint.md)/[Decimal32](/docs/en/sql-reference/data-types/decimal.md) | +| `\x12` int64 | [Int64/UInt64](/docs/en/sql-reference/data-types/int-uint.md)/[Decimal64](/docs/en/sql-reference/data-types/decimal.md)/[DateTime64](/docs/en/sql-reference/data-types/datetime64.md) | Other BSON types are not supported. Also, it performs conversion between different integer types (for example, you can insert BSON int32 value into ClickHouse UInt8). Big integers and decimals (Int128/UInt128/Int256/UInt256/Decimal128/Decimal256) can be parsed from BSON Binary value with `\x00` binary subtype. In this case this format will validate that the size of binary data equals the size of expected value. @@ -1272,8 +1272,8 @@ Note: this format don't work properly on Big-Endian platforms. ### BSON format settings {#bson-format-settings} -- [output_format_bson_string_as_string](../operations/settings/settings.md#output_format_bson_string_as_string) - use BSON String type instead of Binary for String columns. Default value - `false`. -- [input_format_bson_skip_fields_with_unsupported_types_in_schema_inference](../operations/settings/settings.md#input_format_bson_skip_fields_with_unsupported_types_in_schema_inference) - allow skipping columns with unsupported types while schema inference for format BSONEachRow. Default value - `false`. +- [output_format_bson_string_as_string](/docs/en/operations/settings/settings.md/#output_format_bson_string_as_string) - use BSON String type instead of Binary for String columns. Default value - `false`. +- [input_format_bson_skip_fields_with_unsupported_types_in_schema_inference](/docs/en/operations/settings/settings.md/#input_format_bson_skip_fields_with_unsupported_types_in_schema_inference) - allow skipping columns with unsupported types while schema inference for format BSONEachRow. Default value - `false`. ## Native {#native} @@ -1292,7 +1292,7 @@ Outputs data as Unicode-art tables, also using ANSI-escape sequences for setting A full grid of the table is drawn, and each row occupies two lines in the terminal. Each result block is output as a separate table. This is necessary so that blocks can be output without buffering results (buffering would be necessary in order to pre-calculate the visible width of all the values). -[NULL](../sql-reference/syntax.md) is output as `ᴺᵁᴸᴸ`. +[NULL](/docs/en/sql-reference/syntax.md) is output as `ᴺᵁᴸᴸ`. Example (shown for the [PrettyCompact](#prettycompact) format): @@ -1406,12 +1406,12 @@ Differs from [PrettySpaceNoEscapes](#prettyspacenoescapes) in that up to 10,000 ## Pretty formats settings {#pretty-formats-settings} -- [output_format_pretty_max_rows](../operations/settings/settings.md#output_format_pretty_max_rows) - rows limit for Pretty formats. Default value - `10000`. -- [output_format_pretty_max_column_pad_width](../operations/settings/settings.md#output_format_pretty_max_column_pad_width) - maximum width to pad all values in a column in Pretty formats. Default value - `250`. -- [output_format_pretty_max_value_width](../operations/settings/settings.md#output_format_pretty_max_value_width) - Maximum width of value to display in Pretty formats. If greater - it will be cut. Default value - `10000`. -- [output_format_pretty_color](../operations/settings/settings.md#output_format_pretty_color) - use ANSI escape sequences to paint colors in Pretty formats. Default value - `true`. -- [output_format_pretty_grid_charset](../operations/settings/settings.md#output_format_pretty_grid_charset) - Charset for printing grid borders. Available charsets: ASCII, UTF-8. Default value - `UTF-8`. -- [output_format_pretty_row_numbers](../operations/settings/settings.md#output_format_pretty_row_numbers) - Add row numbers before each row for pretty output format. Default value - `false`. +- [output_format_pretty_max_rows](/docs/en/operations/settings/settings.md/#output_format_pretty_max_rows) - rows limit for Pretty formats. Default value - `10000`. +- [output_format_pretty_max_column_pad_width](/docs/en/operations/settings/settings.md/#output_format_pretty_max_column_pad_width) - maximum width to pad all values in a column in Pretty formats. Default value - `250`. +- [output_format_pretty_max_value_width](/docs/en/operations/settings/settings.md/#output_format_pretty_max_value_width) - Maximum width of value to display in Pretty formats. If greater - it will be cut. Default value - `10000`. +- [output_format_pretty_color](/docs/en/operations/settings/settings.md/#output_format_pretty_color) - use ANSI escape sequences to paint colors in Pretty formats. Default value - `true`. +- [output_format_pretty_grid_charset](/docs/en/operations/settings/settings.md/#output_format_pretty_grid_charset) - Charset for printing grid borders. Available charsets: ASCII, UTF-8. Default value - `UTF-8`. +- [output_format_pretty_row_numbers](/docs/en/operations/settings/settings.md/#output_format_pretty_row_numbers) - Add row numbers before each row for pretty output format. Default value - `false`. ## RowBinary {#rowbinary} @@ -1426,7 +1426,7 @@ FixedString is represented simply as a sequence of bytes. Array is represented as a varint length (unsigned [LEB128](https://en.wikipedia.org/wiki/LEB128)), followed by successive elements of the array. -For [NULL](../sql-reference/syntax.md#null-literal) support, an additional byte containing 1 or 0 is added before each [Nullable](../sql-reference/data-types/nullable.md) value. If 1, then the value is `NULL` and this byte is interpreted as a separate value. If 0, the value after the byte is not `NULL`. +For [NULL](/docs/en/sql-reference/syntax.md/#null-literal) support, an additional byte containing 1 or 0 is added before each [Nullable](/docs/en/sql-reference/data-types/nullable.md) value. If 1, then the value is `NULL` and this byte is interpreted as a separate value. If 0, the value after the byte is not `NULL`. ## RowBinaryWithNames {#rowbinarywithnames} @@ -1436,8 +1436,8 @@ Similar to [RowBinary](#rowbinary), but with added header: - N `String`s specifying column names :::warning -If setting [input_format_with_names_use_header](../operations/settings/settings.md#input_format_with_names_use_header) is set to 1, -the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](../operations/settings/settings.md#input_format_skip_unknown_fields) is set to 1. +If setting [input_format_with_names_use_header](/docs/en/operations/settings/settings.md/#input_format_with_names_use_header) is set to 1, +the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings.md/#input_format_skip_unknown_fields) is set to 1. Otherwise, the first row will be skipped. ::: @@ -1450,20 +1450,20 @@ Similar to [RowBinary](#rowbinary), but with added header: - N `String`s specifying column types :::warning -If setting [input_format_with_names_use_header](../operations/settings/settings.md#input_format_with_names_use_header) is set to 1, -the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](../operations/settings/settings.md#input_format_skip_unknown_fields) is set to 1. +If setting [input_format_with_names_use_header](/docs/en/operations/settings/settings.md/#input_format_with_names_use_header) is set to 1, +the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings.md/#input_format_skip_unknown_fields) is set to 1. Otherwise, the first row will be skipped. -If setting [input_format_with_types_use_header](../operations/settings/settings.md#input_format_with_types_use_header) is set to 1, +If setting [input_format_with_types_use_header](/docs/en/operations/settings/settings.md/#input_format_with_types_use_header) is set to 1, the types from input data will be compared with the types of the corresponding columns from the table. Otherwise, the second row will be skipped. ::: ## RowBinary format settings {#row-binary-format-settings} -- [format_binary_max_string_size](../operations/settings/settings.md#format_binary_max_string_size) - The maximum allowed size for String in RowBinary format. Default value - `1GiB`. +- [format_binary_max_string_size](/docs/en/operations/settings/settings.md/#format_binary_max_string_size) - The maximum allowed size for String in RowBinary format. Default value - `1GiB`. ## Values {#data-format-values} -Prints every row in brackets. Rows are separated by commas. There is no comma after the last row. The values inside the brackets are also comma-separated. Numbers are output in a decimal format without quotes. Arrays are output in square brackets. Strings, dates, and dates with times are output in quotes. Escaping rules and parsing are similar to the [TabSeparated](#tabseparated) format. During formatting, extra spaces aren’t inserted, but during parsing, they are allowed and skipped (except for spaces inside array values, which are not allowed). [NULL](../sql-reference/syntax.md) is represented as `NULL`. +Prints every row in brackets. Rows are separated by commas. There is no comma after the last row. The values inside the brackets are also comma-separated. Numbers are output in a decimal format without quotes. Arrays are output in square brackets. Strings, dates, and dates with times are output in quotes. Escaping rules and parsing are similar to the [TabSeparated](#tabseparated) format. During formatting, extra spaces aren’t inserted, but during parsing, they are allowed and skipped (except for spaces inside array values, which are not allowed). [NULL](/docs/en/sql-reference/syntax.md) is represented as `NULL`. The minimum set of characters that you need to escape when passing data in Values ​​format: single quotes and backslashes. @@ -1471,16 +1471,16 @@ This is the format that is used in `INSERT INTO t VALUES ...`, but you can also ## Values format settings {#values-format-settings} -- [input_format_values_interpret_expressions](../operations/settings/settings.md#input_format_values_interpret_expressions) - if the field could not be parsed by streaming parser, run SQL parser and try to interpret it as SQL expression. Default value - `true`. -- [input_format_values_deduce_templates_of_expressions](../operations/settings/settings.md#input_format_values_deduce_templates_of_expressions) -if the field could not be parsed by streaming parser, run SQL parser, deduce template of the SQL expression, try to parse all rows using template and then interpret expression for all rows. Default value - `true`. -- [input_format_values_accurate_types_of_literals](../operations/settings/settings.md#input_format_values_accurate_types_of_literals) - when parsing and interpreting expressions using template, check actual type of literal to avoid possible overflow and precision issues. Default value - `true`. +- [input_format_values_interpret_expressions](/docs/en/operations/settings/settings.md/#input_format_values_interpret_expressions) - if the field could not be parsed by streaming parser, run SQL parser and try to interpret it as SQL expression. Default value - `true`. +- [input_format_values_deduce_templates_of_expressions](/docs/en/operations/settings/settings.md/#input_format_values_deduce_templates_of_expressions) -if the field could not be parsed by streaming parser, run SQL parser, deduce template of the SQL expression, try to parse all rows using template and then interpret expression for all rows. Default value - `true`. +- [input_format_values_accurate_types_of_literals](/docs/en/operations/settings/settings.md/#input_format_values_accurate_types_of_literals) - when parsing and interpreting expressions using template, check actual type of literal to avoid possible overflow and precision issues. Default value - `true`. ## Vertical {#vertical} Prints each value on a separate line with the column name specified. This format is convenient for printing just one or a few rows if each row consists of a large number of columns. -[NULL](../sql-reference/syntax.md) is output as `ᴺᵁᴸᴸ`. +[NULL](/docs/en/sql-reference/syntax.md) is output as `ᴺᵁᴸᴸ`. Example: @@ -1593,27 +1593,27 @@ See also [Format Schema](#formatschema). ### Data Types Matching {#data_types-matching-capnproto} -The table below shows supported data types and how they match ClickHouse [data types](../sql-reference/data-types/index.md) in `INSERT` and `SELECT` queries. +The table below shows supported data types and how they match ClickHouse [data types](/docs/en/sql-reference/data-types/index.md) in `INSERT` and `SELECT` queries. | CapnProto data type (`INSERT`) | ClickHouse data type | CapnProto data type (`SELECT`) | |--------------------------------|-----------------------------------------------------------|--------------------------------| -| `UINT8`, `BOOL` | [UInt8](../sql-reference/data-types/int-uint.md) | `UINT8` | -| `INT8` | [Int8](../sql-reference/data-types/int-uint.md) | `INT8` | -| `UINT16` | [UInt16](../sql-reference/data-types/int-uint.md), [Date](../sql-reference/data-types/date.md) | `UINT16` | -| `INT16` | [Int16](../sql-reference/data-types/int-uint.md) | `INT16` | -| `UINT32` | [UInt32](../sql-reference/data-types/int-uint.md), [DateTime](../sql-reference/data-types/datetime.md) | `UINT32` | -| `INT32` | [Int32](../sql-reference/data-types/int-uint.md) | `INT32` | -| `UINT64` | [UInt64](../sql-reference/data-types/int-uint.md) | `UINT64` | -| `INT64` | [Int64](../sql-reference/data-types/int-uint.md), [DateTime64](../sql-reference/data-types/datetime.md) | `INT64` | -| `FLOAT32` | [Float32](../sql-reference/data-types/float.md) | `FLOAT32` | -| `FLOAT64` | [Float64](../sql-reference/data-types/float.md) | `FLOAT64` | -| `TEXT, DATA` | [String](../sql-reference/data-types/string.md), [FixedString](../sql-reference/data-types/fixedstring.md) | `TEXT, DATA` | -| `union(T, Void), union(Void, T)` | [Nullable(T)](../sql-reference/data-types/date.md) | `union(T, Void), union(Void, T)` | -| `ENUM` | [Enum(8\|16)](../sql-reference/data-types/enum.md) | `ENUM` | -| `LIST` | [Array](../sql-reference/data-types/array.md) | `LIST` | -| `STRUCT` | [Tuple](../sql-reference/data-types/tuple.md) | `STRUCT` | +| `UINT8`, `BOOL` | [UInt8](/docs/en/sql-reference/data-types/int-uint.md) | `UINT8` | +| `INT8` | [Int8](/docs/en/sql-reference/data-types/int-uint.md) | `INT8` | +| `UINT16` | [UInt16](/docs/en/sql-reference/data-types/int-uint.md), [Date](/docs/en/sql-reference/data-types/date.md) | `UINT16` | +| `INT16` | [Int16](/docs/en/sql-reference/data-types/int-uint.md) | `INT16` | +| `UINT32` | [UInt32](/docs/en/sql-reference/data-types/int-uint.md), [DateTime](/docs/en/sql-reference/data-types/datetime.md) | `UINT32` | +| `INT32` | [Int32](/docs/en/sql-reference/data-types/int-uint.md) | `INT32` | +| `UINT64` | [UInt64](/docs/en/sql-reference/data-types/int-uint.md) | `UINT64` | +| `INT64` | [Int64](/docs/en/sql-reference/data-types/int-uint.md), [DateTime64](/docs/en/sql-reference/data-types/datetime.md) | `INT64` | +| `FLOAT32` | [Float32](/docs/en/sql-reference/data-types/float.md) | `FLOAT32` | +| `FLOAT64` | [Float64](/docs/en/sql-reference/data-types/float.md) | `FLOAT64` | +| `TEXT, DATA` | [String](/docs/en/sql-reference/data-types/string.md), [FixedString](/docs/en/sql-reference/data-types/fixedstring.md) | `TEXT, DATA` | +| `union(T, Void), union(Void, T)` | [Nullable(T)](/docs/en/sql-reference/data-types/date.md) | `union(T, Void), union(Void, T)` | +| `ENUM` | [Enum(8\|16)](/docs/en/sql-reference/data-types/enum.md) | `ENUM` | +| `LIST` | [Array](/docs/en/sql-reference/data-types/array.md) | `LIST` | +| `STRUCT` | [Tuple](/docs/en/sql-reference/data-types/tuple.md) | `STRUCT` | -For working with `Enum` in CapnProto format use the [format_capn_proto_enum_comparising_mode](../operations/settings/settings.md#format_capn_proto_enum_comparising_mode) setting. +For working with `Enum` in CapnProto format use the [format_capn_proto_enum_comparising_mode](/docs/en/operations/settings/settings.md/#format_capn_proto_enum_comparising_mode) setting. Arrays can be nested and can have a value of the `Nullable` type as an argument. `Tuple` type also can be nested. @@ -1644,10 +1644,10 @@ $ clickhouse-client --query = "SELECT * FROM test.hits FORMAT CapnProto SETTINGS Expose metrics in [Prometheus text-based exposition format](https://prometheus.io/docs/instrumenting/exposition_formats/#text-based-format). The output table should have a proper structure. -Columns `name` ([String](../sql-reference/data-types/string.md)) and `value` (number) are required. -Rows may optionally contain `help` ([String](../sql-reference/data-types/string.md)) and `timestamp` (number). -Column `type` ([String](../sql-reference/data-types/string.md)) is either `counter`, `gauge`, `histogram`, `summary`, `untyped` or empty. -Each metric value may also have some `labels` ([Map(String, String)](../sql-reference/data-types/map.md)). +Columns `name` ([String](/docs/en/sql-reference/data-types/string.md)) and `value` (number) are required. +Rows may optionally contain `help` ([String](/docs/en/sql-reference/data-types/string.md)) and `timestamp` (number). +Column `type` ([String](/docs/en/sql-reference/data-types/string.md)) is either `counter`, `gauge`, `histogram`, `summary`, `untyped` or empty. +Each metric value may also have some `labels` ([Map(String, String)](/docs/en/sql-reference/data-types/map.md)). Several consequent rows may refer to the one metric with different labels. The table should be sorted by metric name (e.g., with `ORDER BY name`). There's special requirements for labels for `histogram` and `summary`, see [Prometheus doc](https://prometheus.io/docs/instrumenting/exposition_formats/#histograms-and-summaries) for the details. Special rules applied to row with labels `{'count':''}` and `{'sum':''}`, they'll be converted to `_count` and `_sum` respectively. @@ -1759,7 +1759,7 @@ message MessageType { ``` ClickHouse tries to find a column named `x.y.z` (or `x_y_z` or `X.y_Z` and so on). -Nested messages are suitable to input or output a [nested data structures](../sql-reference/data-types/nested-data-structures/nested.md). +Nested messages are suitable to input or output a [nested data structures](/docs/en/sql-reference/data-types/nested-data-structures/nested.md). Default values defined in a protobuf schema like this @@ -1771,7 +1771,7 @@ message MessageType { } ``` -are not applied; the [table defaults](../sql-reference/statements/create/table.md#create-default-values) are used instead of them. +are not applied; the [table defaults](/docs/en/sql-reference/statements/create/table.md/#create-default-values) are used instead of them. ClickHouse inputs and outputs protobuf messages in the `length-delimited` format. It means before every message should be written its length as a [varint](https://developers.google.com/protocol-buffers/docs/encoding#varints). @@ -1789,25 +1789,25 @@ ClickHouse Avro format supports reading and writing [Avro data files](https://av ### Data Types Matching {#data_types-matching} -The table below shows supported data types and how they match ClickHouse [data types](../sql-reference/data-types/index.md) in `INSERT` and `SELECT` queries. +The table below shows supported data types and how they match ClickHouse [data types](/docs/en/sql-reference/data-types/index.md) in `INSERT` and `SELECT` queries. -| Avro data type `INSERT` | ClickHouse data type | Avro data type `SELECT` | -|---------------------------------------------|-----------------------------------------------------------------------------------------------------------------------|------------------------------| -| `boolean`, `int`, `long`, `float`, `double` | [Int(8\|16\|32)](../sql-reference/data-types/int-uint.md), [UInt(8\|16\|32)](../sql-reference/data-types/int-uint.md) | `int` | -| `boolean`, `int`, `long`, `float`, `double` | [Int64](../sql-reference/data-types/int-uint.md), [UInt64](../sql-reference/data-types/int-uint.md) | `long` | -| `boolean`, `int`, `long`, `float`, `double` | [Float32](../sql-reference/data-types/float.md) | `float` | -| `boolean`, `int`, `long`, `float`, `double` | [Float64](../sql-reference/data-types/float.md) | `double` | -| `bytes`, `string`, `fixed`, `enum` | [String](../sql-reference/data-types/string.md) | `bytes` or `string` \* | -| `bytes`, `string`, `fixed` | [FixedString(N)](../sql-reference/data-types/fixedstring.md) | `fixed(N)` | -| `enum` | [Enum(8\|16)](../sql-reference/data-types/enum.md) | `enum` | -| `array(T)` | [Array(T)](../sql-reference/data-types/array.md) | `array(T)` | -| `union(null, T)`, `union(T, null)` | [Nullable(T)](../sql-reference/data-types/date.md) | `union(null, T)` | -| `null` | [Nullable(Nothing)](../sql-reference/data-types/special-data-types/nothing.md) | `null` | -| `int (date)` \** | [Date](../sql-reference/data-types/date.md) | `int (date)` \** | -| `long (timestamp-millis)` \** | [DateTime64(3)](../sql-reference/data-types/datetime.md) | `long (timestamp-millis)` \* | -| `long (timestamp-micros)` \** | [DateTime64(6)](../sql-reference/data-types/datetime.md) | `long (timestamp-micros)` \* | +| Avro data type `INSERT` | ClickHouse data type | Avro data type `SELECT` | +|---------------------------------------------|----------------------------------------------------------------------------------------------------|------------------------------| +| `boolean`, `int`, `long`, `float`, `double` | [Int(8\|16\|32)](/docs/en/sql-reference/data-types/int-uint.md), [UInt(8\|16\|32)](/docs/en/sql-reference/data-types/int-uint.md) | `int` | +| `boolean`, `int`, `long`, `float`, `double` | [Int64](/docs/en/sql-reference/data-types/int-uint.md), [UInt64](/docs/en/sql-reference/data-types/int-uint.md) | `long` | +| `boolean`, `int`, `long`, `float`, `double` | [Float32](/docs/en/sql-reference/data-types/float.md) | `float` | +| `boolean`, `int`, `long`, `float`, `double` | [Float64](/docs/en/sql-reference/data-types/float.md) | `double` | +| `bytes`, `string`, `fixed`, `enum` | [String](/docs/en/sql-reference/data-types/string.md) | `bytes` or `string` \* | +| `bytes`, `string`, `fixed` | [FixedString(N)](/docs/en/sql-reference/data-types/fixedstring.md) | `fixed(N)` | +| `enum` | [Enum(8\|16)](/docs/en/sql-reference/data-types/enum.md) | `enum` | +| `array(T)` | [Array(T)](/docs/en/sql-reference/data-types/array.md) | `array(T)` | +| `union(null, T)`, `union(T, null)` | [Nullable(T)](/docs/en/sql-reference/data-types/date.md) | `union(null, T)` | +| `null` | [Nullable(Nothing)](/docs/en/sql-reference/data-types/special-data-types/nothing.md) | `null` | +| `int (date)` \** | [Date](/docs/en/sql-reference/data-types/date.md) | `int (date)` \** | +| `long (timestamp-millis)` \** | [DateTime64(3)](/docs/en/sql-reference/data-types/datetime.md) | `long (timestamp-millis)` \* | +| `long (timestamp-micros)` \** | [DateTime64(6)](/docs/en/sql-reference/data-types/datetime.md) | `long (timestamp-micros)` \* | -\* `bytes` is default, controlled by [output_format_avro_string_column_pattern](../operations/settings/settings.md#output_format_avro_string_column_pattern) +\* `bytes` is default, controlled by [output_format_avro_string_column_pattern](/docs/en/operations/settings/settings.md/#output_format_avro_string_column_pattern) \** [Avro logical types](https://avro.apache.org/docs/current/spec.html#Logical+Types) Unsupported Avro data types: `record` (non-root), `map` @@ -1827,9 +1827,9 @@ The root schema of input Avro file must be of `record` type. To find the correspondence between table columns and fields of Avro schema ClickHouse compares their names. This comparison is case-sensitive. Unused fields are skipped. -Data types of ClickHouse table columns can differ from the corresponding fields of the Avro data inserted. When inserting data, ClickHouse interprets data types according to the table above and then [casts](../sql-reference/functions/type-conversion-functions.md#type_conversion_function-cast) the data to corresponding column type. +Data types of ClickHouse table columns can differ from the corresponding fields of the Avro data inserted. When inserting data, ClickHouse interprets data types according to the table above and then [casts](/docs/en/sql-reference/functions/type-conversion-functions.md/#type_conversion_function-cast) the data to corresponding column type. -While importing data, when field is not found in schema and setting [input_format_avro_allow_missing_fields](../operations/settings/settings.md#input_format_avro_allow_missing_fields) is enabled, default value will be used instead of error. +While importing data, when field is not found in schema and setting [input_format_avro_allow_missing_fields](/docs/en/operations/settings/settings.md/#input_format_avro_allow_missing_fields) is enabled, default value will be used instead of error. ### Selecting Data {#selecting-data-1} @@ -1844,7 +1844,7 @@ Column names must: - start with `[A-Za-z_]` - subsequently contain only `[A-Za-z0-9_]` -Output Avro file compression and sync interval can be configured with [output_format_avro_codec](../operations/settings/settings.md#output_format_avro_codec) and [output_format_avro_sync_interval](../operations/settings/settings.md#output_format_avro_sync_interval) respectively. +Output Avro file compression and sync interval can be configured with [output_format_avro_codec](/docs/en/operations/settings/settings.md/#output_format_avro_codec) and [output_format_avro_sync_interval](/docs/en/operations/settings/settings.md/#output_format_avro_sync_interval) respectively. ## AvroConfluent {#data-format-avro-confluent} @@ -1854,7 +1854,7 @@ Each Avro message embeds a schema id that can be resolved to the actual schema w Schemas are cached once resolved. -Schema Registry URL is configured with [format_avro_schema_registry_url](../operations/settings/settings.md#format_avro_schema_registry_url). +Schema Registry URL is configured with [format_avro_schema_registry_url](/docs/en/operations/settings/settings.md/#format_avro_schema_registry_url). ### Data Types Matching {#data_types-matching-1} @@ -1862,7 +1862,7 @@ Same as [Avro](#data-format-avro). ### Usage {#usage} -To quickly verify schema resolution you can use [kafkacat](https://github.com/edenhill/kafkacat) with [clickhouse-local](../operations/utilities/clickhouse-local.md): +To quickly verify schema resolution you can use [kafkacat](https://github.com/edenhill/kafkacat) with [clickhouse-local](/docs/en/operations/utilities/clickhouse-local.md): ``` bash $ kafkacat -b kafka-broker -C -t topic1 -o beginning -f '%s' -c 3 | clickhouse-local --input-format AvroConfluent --format_avro_schema_registry_url 'http://schema-registry' -S "field1 Int64, field2 String" -q 'select * from table' @@ -1871,7 +1871,7 @@ $ kafkacat -b kafka-broker -C -t topic1 -o beginning -f '%s' -c 3 | clickhouse- 3 c ``` -To use `AvroConfluent` with [Kafka](../engines/table-engines/integrations/kafka.md): +To use `AvroConfluent` with [Kafka](/docs/en/engines/table-engines/integrations/kafka.md): ``` sql CREATE TABLE topic1_stream @@ -1903,36 +1903,36 @@ Setting `format_avro_schema_registry_url` needs to be configured in `users.xml` ### Data Types Matching {#data-types-matching-parquet} -The table below shows supported data types and how they match ClickHouse [data types](../sql-reference/data-types/index.md) in `INSERT` and `SELECT` queries. +The table below shows supported data types and how they match ClickHouse [data types](/docs/en/sql-reference/data-types/index.md) in `INSERT` and `SELECT` queries. | Parquet data type (`INSERT`) | ClickHouse data type | Parquet data type (`SELECT`) | -|------------------------------|-----------------------------------------------------------|------------------------------| -| `UINT8`, `BOOL` | [UInt8](../sql-reference/data-types/int-uint.md) | `UINT8` | -| `INT8` | [Int8](../sql-reference/data-types/int-uint.md) | `INT8` | -| `UINT16` | [UInt16](../sql-reference/data-types/int-uint.md) | `UINT16` | -| `INT16` | [Int16](../sql-reference/data-types/int-uint.md) | `INT16` | -| `UINT32` | [UInt32](../sql-reference/data-types/int-uint.md) | `UINT32` | -| `INT32` | [Int32](../sql-reference/data-types/int-uint.md) | `INT32` | -| `UINT64` | [UInt64](../sql-reference/data-types/int-uint.md) | `UINT64` | -| `INT64` | [Int64](../sql-reference/data-types/int-uint.md) | `INT64` | -| `FLOAT`, `HALF_FLOAT` | [Float32](../sql-reference/data-types/float.md) | `FLOAT` | -| `DOUBLE` | [Float64](../sql-reference/data-types/float.md) | `DOUBLE` | -| `DATE32` | [Date](../sql-reference/data-types/date.md) | `UINT16` | -| `DATE64`, `TIMESTAMP` | [DateTime](../sql-reference/data-types/datetime.md) | `UINT32` | -| `STRING`, `BINARY` | [String](../sql-reference/data-types/string.md) | `BINARY` | -| — | [FixedString](../sql-reference/data-types/fixedstring.md) | `BINARY` | -| `DECIMAL` | [Decimal](../sql-reference/data-types/decimal.md) | `DECIMAL` | -| `LIST` | [Array](../sql-reference/data-types/array.md) | `LIST` | -| `STRUCT` | [Tuple](../sql-reference/data-types/tuple.md) | `STRUCT` | -| `MAP` | [Map](../sql-reference/data-types/map.md) | `MAP` | +|------------------------------|-----------------------------------------------------------|----------------------------| +| `BOOL` | [Bool](/docs/en/sql-reference/data-types/boolean.md) | `BOOL` | +| `UINT8`, `BOOL` | [UInt8](/docs/en/sql-reference/data-types/int-uint.md) | `UINT8` | +| `INT8` | [Int8](/docs/en/sql-reference/data-types/int-uint.md) | `INT8` | +| `UINT16` | [UInt16](/docs/en/sql-reference/data-types/int-uint.md) | `UINT16` | +| `INT16` | [Int16](/docs/en/sql-reference/data-types/int-uint.md) | `INT16` | +| `UINT32` | [UInt32](/docs/en/sql-reference/data-types/int-uint.md) | `UINT32` | +| `INT32` | [Int32](/docs/en/sql-reference/data-types/int-uint.md) | `INT32` | +| `UINT64` | [UInt64](/docs/en/sql-reference/data-types/int-uint.md) | `UINT64` | +| `INT64` | [Int64](/docs/en/sql-reference/data-types/int-uint.md) | `INT64` | +| `FLOAT` | [Float32](/docs/en/sql-reference/data-types/float.md) | `FLOAT` | +| `DOUBLE` | [Float64](/docs/en/sql-reference/data-types/float.md) | `DOUBLE` | +| `DATE` | [Date32](/docs/en/sql-reference/data-types/date.md) | `DATE` | +| `TIME (ms)` | [DateTime](/docs/en/sql-reference/data-types/datetime.md) | `UINT32` | +| `TIMESTAMP`, `TIME (us, ns)` | [DateTime64](/docs/en/sql-reference/data-types/datetime64.md) | `TIMESTAMP` | +| `STRING`, `BINARY` | [String](/docs/en/sql-reference/data-types/string.md) | `BINARY` | +| — | [FixedString](/docs/en/sql-reference/data-types/fixedstring.md) | `BINARY` | +| `DECIMAL` | [Decimal](/docs/en/sql-reference/data-types/decimal.md) | `DECIMAL` | +| `LIST` | [Array](/docs/en/sql-reference/data-types/array.md) | `LIST` | +| `STRUCT` | [Tuple](/docs/en/sql-reference/data-types/tuple.md) | `STRUCT` | +| `MAP` | [Map](/docs/en/sql-reference/data-types/map.md) | `MAP` | Arrays can be nested and can have a value of the `Nullable` type as an argument. `Tuple` and `Map` types also can be nested. -ClickHouse supports configurable precision of `Decimal` type. The `INSERT` query treats the Parquet `DECIMAL` type as the ClickHouse `Decimal128` type. +Unsupported Parquet data types: `FIXED_SIZE_BINARY`, `JSON`, `UUID`, `ENUM`. -Unsupported Parquet data types: `TIME32`, `FIXED_SIZE_BINARY`, `JSON`, `UUID`, `ENUM`. - -Data types of ClickHouse table columns can differ from the corresponding fields of the Parquet data inserted. When inserting data, ClickHouse interprets data types according to the table above and then [cast](../sql-reference/functions/type-conversion-functions/#type_conversion_function-cast) the data to that data type which is set for the ClickHouse table column. +Data types of ClickHouse table columns can differ from the corresponding fields of the Parquet data inserted. When inserting data, ClickHouse interprets data types according to the table above and then [cast](/docs/en/sql-reference/functions/type-conversion-functions/#type_conversion_function-cast) the data to that data type which is set for the ClickHouse table column. ### Inserting and Selecting Data {#inserting-and-selecting-data-parquet} @@ -1948,16 +1948,16 @@ You can select data from a ClickHouse table and save them into some file in the $ clickhouse-client --query="SELECT * FROM {some_table} FORMAT Parquet" > {some_file.pq} ``` -To exchange data with Hadoop, you can use [HDFS table engine](../engines/table-engines/integrations/hdfs.md). +To exchange data with Hadoop, you can use [HDFS table engine](/docs/en/engines/table-engines/integrations/hdfs.md). ### Parquet format settings {#parquet-format-settings} -- [output_format_parquet_row_group_size](../operations/settings/settings.md#output_format_parquet_row_group_size) - row group size in rows while data output. Default value - `1000000`. -- [output_format_parquet_string_as_string](../operations/settings/settings.md#output_format_parquet_string_as_string) - use Parquet String type instead of Binary for String columns. Default value - `false`. -- [input_format_parquet_import_nested](../operations/settings/settings.md#input_format_parquet_import_nested) - allow inserting array of structs into [Nested](../sql-reference/data-types/nested-data-structures/nested.md) table in Parquet input format. Default value - `false`. -- [input_format_parquet_case_insensitive_column_matching](../operations/settings/settings.md#input_format_parquet_case_insensitive_column_matching) - ignore case when matching Parquet columns with ClickHouse columns. Default value - `false`. -- [input_format_parquet_allow_missing_columns](../operations/settings/settings.md#input_format_parquet_allow_missing_columns) - allow missing columns while reading Parquet data. Default value - `false`. -- [input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference](../operations/settings/settings.md#input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference) - allow skipping columns with unsupported types while schema inference for Parquet format. Default value - `false`. +- [output_format_parquet_row_group_size](/docs/en/operations/settings/settings.md/#output_format_parquet_row_group_size) - row group size in rows while data output. Default value - `1000000`. +- [output_format_parquet_string_as_string](/docs/en/operations/settings/settings.md/#output_format_parquet_string_as_string) - use Parquet String type instead of Binary for String columns. Default value - `false`. +- [input_format_parquet_import_nested](/docs/en/operations/settings/settings.md/#input_format_parquet_import_nested) - allow inserting array of structs into [Nested](/docs/en/sql-reference/data-types/nested-data-structures/nested.md) table in Parquet input format. Default value - `false`. +- [input_format_parquet_case_insensitive_column_matching](/docs/en/operations/settings/settings.md/#input_format_parquet_case_insensitive_column_matching) - ignore case when matching Parquet columns with ClickHouse columns. Default value - `false`. +- [input_format_parquet_allow_missing_columns](/docs/en/operations/settings/settings.md/#input_format_parquet_allow_missing_columns) - allow missing columns while reading Parquet data. Default value - `false`. +- [input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference](/docs/en/operations/settings/settings.md/#input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference) - allow skipping columns with unsupported types while schema inference for Parquet format. Default value - `false`. ## Arrow {#data-format-arrow} @@ -1967,39 +1967,39 @@ To exchange data with Hadoop, you can use [HDFS table engine](../engines/table-e ### Data Types Matching {#data-types-matching-arrow} -The table below shows supported data types and how they match ClickHouse [data types](../sql-reference/data-types/index.md) in `INSERT` and `SELECT` queries. +The table below shows supported data types and how they match ClickHouse [data types](/docs/en/sql-reference/data-types/index.md) in `INSERT` and `SELECT` queries. -| Arrow data type (`INSERT`) | ClickHouse data type | Arrow data type (`SELECT`) | -|----------------------------|-----------------------------------------------------|----------------------------| -| `UINT8`, `BOOL` | [UInt8](../sql-reference/data-types/int-uint.md) | `UINT8` | -| `INT8` | [Int8](../sql-reference/data-types/int-uint.md) | `INT8` | -| `UINT16` | [UInt16](../sql-reference/data-types/int-uint.md) | `UINT16` | -| `INT16` | [Int16](../sql-reference/data-types/int-uint.md) | `INT16` | -| `UINT32` | [UInt32](../sql-reference/data-types/int-uint.md) | `UINT32` | -| `INT32` | [Int32](../sql-reference/data-types/int-uint.md) | `INT32` | -| `UINT64` | [UInt64](../sql-reference/data-types/int-uint.md) | `UINT64` | -| `INT64` | [Int64](../sql-reference/data-types/int-uint.md) | `INT64` | -| `FLOAT`, `HALF_FLOAT` | [Float32](../sql-reference/data-types/float.md) | `FLOAT32` | -| `DOUBLE` | [Float64](../sql-reference/data-types/float.md) | `FLOAT64` | -| `DATE32` | [Date](../sql-reference/data-types/date.md) | `UINT16` | -| `DATE64`, `TIMESTAMP` | [DateTime](../sql-reference/data-types/datetime.md) | `UINT32` | -| `STRING`, `BINARY` | [String](../sql-reference/data-types/string.md) | `BINARY` | -| `STRING`, `BINARY` | [FixedString](../sql-reference/data-types/fixedstring.md) | `BINARY` | -| `DECIMAL` | [Decimal](../sql-reference/data-types/decimal.md) | `DECIMAL` | -| `DECIMAL256` | [Decimal256](../sql-reference/data-types/decimal.md)| `DECIMAL256` | -| `LIST` | [Array](../sql-reference/data-types/array.md) | `LIST` | -| `STRUCT` | [Tuple](../sql-reference/data-types/tuple.md) | `STRUCT` | -| `MAP` | [Map](../sql-reference/data-types/map.md) | `MAP` | +| Arrow data type (`INSERT`) | ClickHouse data type | Arrow data type (`SELECT`) | +|---------------------------------|-----------------------------------------------------------|----------------------------| +| `BOOL` | [Bool](/docs/en/sql-reference/data-types/boolean.md) | `BOOL` | +| `UINT8`, `BOOL` | [UInt8](/docs/en/sql-reference/data-types/int-uint.md) | `UINT8` | +| `INT8` | [Int8](/docs/en/sql-reference/data-types/int-uint.md) | `INT8` | +| `UINT16` | [UInt16](/docs/en/sql-reference/data-types/int-uint.md) | `UINT16` | +| `INT16` | [Int16](/docs/en/sql-reference/data-types/int-uint.md) | `INT16` | +| `UINT32` | [UInt32](/docs/en/sql-reference/data-types/int-uint.md) | `UINT32` | +| `INT32` | [Int32](/docs/en/sql-reference/data-types/int-uint.md) | `INT32` | +| `UINT64` | [UInt64](/docs/en/sql-reference/data-types/int-uint.md) | `UINT64` | +| `INT64` | [Int64](/docs/en/sql-reference/data-types/int-uint.md) | `INT64` | +| `FLOAT`, `HALF_FLOAT` | [Float32](/docs/en/sql-reference/data-types/float.md) | `FLOAT32` | +| `DOUBLE` | [Float64](/docs/en/sql-reference/data-types/float.md) | `FLOAT64` | +| `DATE32` | [Date32](/docs/en/sql-reference/data-types/date32.md) | `UINT16` | +| `DATE64` | [DateTime](/docs/en/sql-reference/data-types/datetime.md) | `UINT32` | +| `TIMESTAMP`, `TIME32`, `TIME64` | [DateTime64](/docs/en/sql-reference/data-types/datetime64.md) | `UINT32` | +| `STRING`, `BINARY` | [String](/docs/en/sql-reference/data-types/string.md) | `BINARY` | +| `STRING`, `BINARY` | [FixedString](/docs/en/sql-reference/data-types/fixedstring.md) | `BINARY` | +| `DECIMAL` | [Decimal](/docs/en/sql-reference/data-types/decimal.md) | `DECIMAL` | +| `DECIMAL256` | [Decimal256](/docs/en/sql-reference/data-types/decimal.md) | `DECIMAL256` | +| `LIST` | [Array](/docs/en/sql-reference/data-types/array.md) | `LIST` | +| `STRUCT` | [Tuple](/docs/en/sql-reference/data-types/tuple.md) | `STRUCT` | +| `MAP` | [Map](/docs/en/sql-reference/data-types/map.md) | `MAP` | Arrays can be nested and can have a value of the `Nullable` type as an argument. `Tuple` and `Map` types also can be nested. -The `DICTIONARY` type is supported for `INSERT` queries, and for `SELECT` queries there is an [output_format_arrow_low_cardinality_as_dictionary](../operations/settings/settings.md#output-format-arrow-low-cardinality-as-dictionary) setting that allows to output [LowCardinality](../sql-reference/data-types/lowcardinality.md) type as a `DICTIONARY` type. +The `DICTIONARY` type is supported for `INSERT` queries, and for `SELECT` queries there is an [output_format_arrow_low_cardinality_as_dictionary](/docs/en/operations/settings/settings.md/#output-format-arrow-low-cardinality-as-dictionary) setting that allows to output [LowCardinality](/docs/en/sql-reference/data-types/lowcardinality.md) type as a `DICTIONARY` type. -ClickHouse supports configurable precision of the `Decimal` type. The `INSERT` query treats the Arrow `DECIMAL` type as the ClickHouse `Decimal128` type. +Unsupported Arrow data types: `FIXED_SIZE_BINARY`, `JSON`, `UUID`, `ENUM`. -Unsupported Arrow data types: `TIME32`, `FIXED_SIZE_BINARY`, `JSON`, `UUID`, `ENUM`. - -The data types of ClickHouse table columns do not have to match the corresponding Arrow data fields. When inserting data, ClickHouse interprets data types according to the table above and then [casts](../sql-reference/functions/type-conversion-functions.md#type_conversion_function-cast) the data to the data type set for the ClickHouse table column. +The data types of ClickHouse table columns do not have to match the corresponding Arrow data fields. When inserting data, ClickHouse interprets data types according to the table above and then [casts](/docs/en/sql-reference/functions/type-conversion-functions.md/#type_conversion_function-cast) the data to the data type set for the ClickHouse table column. ### Inserting Data {#inserting-data-arrow} @@ -2019,12 +2019,12 @@ $ clickhouse-client --query="SELECT * FROM {some_table} FORMAT Arrow" > {filenam ### Arrow format settings {#parquet-format-settings} -- [output_format_arrow_low_cardinality_as_dictionary](../operations/settings/settings.md#output_format_arrow_low_cardinality_as_dictionary) - enable output ClickHouse LowCardinality type as Dictionary Arrow type. Default value - `false`. -- [output_format_arrow_string_as_string](../operations/settings/settings.md#output_format_arrow_string_as_string) - use Arrow String type instead of Binary for String columns. Default value - `false`. -- [input_format_arrow_import_nested](../operations/settings/settings.md#input_format_arrow_import_nested) - allow inserting array of structs into Nested table in Arrow input format. Default value - `false`. -- [input_format_arrow_case_insensitive_column_matching](../operations/settings/settings.md#input_format_arrow_case_insensitive_column_matching) - ignore case when matching Arrow columns with ClickHouse columns. Default value - `false`. -- [input_format_arrow_allow_missing_columns](../operations/settings/settings.md#input_format_arrow_allow_missing_columns) - allow missing columns while reading Arrow data. Default value - `false`. -- [input_format_arrow_skip_columns_with_unsupported_types_in_schema_inference](../operations/settings/settings.md#input_format_arrow_skip_columns_with_unsupported_types_in_schema_inference) - allow skipping columns with unsupported types while schema inference for Arrow format. Default value - `false`. +- [output_format_arrow_low_cardinality_as_dictionary](/docs/en/operations/settings/settings.md/#output_format_arrow_low_cardinality_as_dictionary) - enable output ClickHouse LowCardinality type as Dictionary Arrow type. Default value - `false`. +- [output_format_arrow_string_as_string](/docs/en/operations/settings/settings.md/#output_format_arrow_string_as_string) - use Arrow String type instead of Binary for String columns. Default value - `false`. +- [input_format_arrow_import_nested](/docs/en/operations/settings/settings.md/#input_format_arrow_import_nested) - allow inserting array of structs into Nested table in Arrow input format. Default value - `false`. +- [input_format_arrow_case_insensitive_column_matching](/docs/en/operations/settings/settings.md/#input_format_arrow_case_insensitive_column_matching) - ignore case when matching Arrow columns with ClickHouse columns. Default value - `false`. +- [input_format_arrow_allow_missing_columns](/docs/en/operations/settings/settings.md/#input_format_arrow_allow_missing_columns) - allow missing columns while reading Arrow data. Default value - `false`. +- [input_format_arrow_skip_columns_with_unsupported_types_in_schema_inference](/docs/en/operations/settings/settings.md/#input_format_arrow_skip_columns_with_unsupported_types_in_schema_inference) - allow skipping columns with unsupported types while schema inference for Arrow format. Default value - `false`. ## ArrowStream {#data-format-arrow-stream} @@ -2036,35 +2036,30 @@ $ clickhouse-client --query="SELECT * FROM {some_table} FORMAT Arrow" > {filenam ### Data Types Matching {#data-types-matching-orc} -The table below shows supported data types and how they match ClickHouse [data types](../sql-reference/data-types/index.md) in `INSERT` and `SELECT` queries. +The table below shows supported data types and how they match ClickHouse [data types](/docs/en/sql-reference/data-types/index.md) in `INSERT` and `SELECT` queries. -| ORC data type (`INSERT`) | ClickHouse data type | ORC data type (`SELECT`) | -|--------------------------|-----------------------------------------------------|--------------------------| -| `UINT8`, `BOOL` | [UInt8](../sql-reference/data-types/int-uint.md) | `UINT8` | -| `INT8` | [Int8](../sql-reference/data-types/int-uint.md) | `INT8` | -| `UINT16` | [UInt16](../sql-reference/data-types/int-uint.md) | `UINT16` | -| `INT16` | [Int16](../sql-reference/data-types/int-uint.md) | `INT16` | -| `UINT32` | [UInt32](../sql-reference/data-types/int-uint.md) | `UINT32` | -| `INT32` | [Int32](../sql-reference/data-types/int-uint.md) | `INT32` | -| `UINT64` | [UInt64](../sql-reference/data-types/int-uint.md) | `UINT64` | -| `INT64` | [Int64](../sql-reference/data-types/int-uint.md) | `INT64` | -| `FLOAT`, `HALF_FLOAT` | [Float32](../sql-reference/data-types/float.md) | `FLOAT` | -| `DOUBLE` | [Float64](../sql-reference/data-types/float.md) | `DOUBLE` | -| `DATE32` | [Date](../sql-reference/data-types/date.md) | `DATE32` | -| `DATE64`, `TIMESTAMP` | [DateTime](../sql-reference/data-types/datetime.md) | `TIMESTAMP` | -| `STRING`, `BINARY` | [String](../sql-reference/data-types/string.md) | `BINARY` | -| `DECIMAL` | [Decimal](../sql-reference/data-types/decimal.md) | `DECIMAL` | -| `LIST` | [Array](../sql-reference/data-types/array.md) | `LIST` | -| `STRUCT` | [Tuple](../sql-reference/data-types/tuple.md) | `STRUCT` | -| `MAP` | [Map](../sql-reference/data-types/map.md) | `MAP` | +| ORC data type (`INSERT`) | ClickHouse data type | ORC data type (`SELECT`) | +|---------------------------------------|---------------------------------------------------------|--------------------------| +| `Boolean` | [UInt8](/docs/en/sql-reference/data-types/int-uint.md) | `Boolean` | +| `Tinyint` | [Int8](/docs/en/sql-reference/data-types/int-uint.md) | `Tinyint` | +| `Smallint` | [Int16](/docs/en/sql-reference/data-types/int-uint.md) | `Smallint` | +| `Int` | [Int32](/docs/en/sql-reference/data-types/int-uint.md) | `Int` | +| `Bigint` | [Int64](/docs/en/sql-reference/data-types/int-uint.md) | `Bigint` | +| `Float` | [Float32](/docs/en/sql-reference/data-types/float.md) | `Float` | +| `Double` | [Float64](/docs/en/sql-reference/data-types/float.md) | `Double` | +| `Decimal` | [Decimal](/docs/en/sql-reference/data-types/decimal.md) | `Decimal` | +| `Date` | [Date32](/docs/en/sql-reference/data-types/date32.md) | `Date` | +| `Timestamp` | [DateTime64](/docs/en/sql-reference/data-types/datetime64.md) | `Timestamp` | +| `String`, `Char`, `Varchar`, `Binary` | [String](/docs/en/sql-reference/data-types/string.md) | `Binary` | +| `List` | [Array](/docs/en/sql-reference/data-types/array.md) | `List` | +| `Struct` | [Tuple](/docs/en/sql-reference/data-types/tuple.md) | `Struct` | +| `Map` | [Map](/docs/en/sql-reference/data-types/map.md) | `Map` | + +Other types are not supported. Arrays can be nested and can have a value of the `Nullable` type as an argument. `Tuple` and `Map` types also can be nested. -ClickHouse supports configurable precision of the `Decimal` type. The `INSERT` query treats the ORC `DECIMAL` type as the ClickHouse `Decimal128` type. - -Unsupported ORC data types: `TIME32`, `FIXED_SIZE_BINARY`, `JSON`, `UUID`, `ENUM`. - -The data types of ClickHouse table columns do not have to match the corresponding ORC data fields. When inserting data, ClickHouse interprets data types according to the table above and then [casts](../sql-reference/functions/type-conversion-functions.md#type_conversion_function-cast) the data to the data type set for the ClickHouse table column. +The data types of ClickHouse table columns do not have to match the corresponding ORC data fields. When inserting data, ClickHouse interprets data types according to the table above and then [casts](/docs/en/sql-reference/functions/type-conversion-functions.md/#type_conversion_function-cast) the data to the data type set for the ClickHouse table column. ### Inserting Data {#inserting-data-orc} @@ -2084,18 +2079,18 @@ $ clickhouse-client --query="SELECT * FROM {some_table} FORMAT ORC" > {filename. ### Arrow format settings {#parquet-format-settings} -- [output_format_arrow_string_as_string](../operations/settings/settings.md#output_format_arrow_string_as_string) - use Arrow String type instead of Binary for String columns. Default value - `false`. -- [input_format_arrow_import_nested](../operations/settings/settings.md#input_format_arrow_import_nested) - allow inserting array of structs into Nested table in Arrow input format. Default value - `false`. -- [input_format_arrow_case_insensitive_column_matching](../operations/settings/settings.md#input_format_arrow_case_insensitive_column_matching) - ignore case when matching Arrow columns with ClickHouse columns. Default value - `false`. -- [input_format_arrow_allow_missing_columns](../operations/settings/settings.md#input_format_arrow_allow_missing_columns) - allow missing columns while reading Arrow data. Default value - `false`. -- [input_format_arrow_skip_columns_with_unsupported_types_in_schema_inference](../operations/settings/settings.md#input_format_arrow_skip_columns_with_unsupported_types_in_schema_inference) - allow skipping columns with unsupported types while schema inference for Arrow format. Default value - `false`. +- [output_format_arrow_string_as_string](/docs/en/operations/settings/settings.md/#output_format_arrow_string_as_string) - use Arrow String type instead of Binary for String columns. Default value - `false`. +- [input_format_arrow_import_nested](/docs/en/operations/settings/settings.md/#input_format_arrow_import_nested) - allow inserting array of structs into Nested table in Arrow input format. Default value - `false`. +- [input_format_arrow_case_insensitive_column_matching](/docs/en/operations/settings/settings.md/#input_format_arrow_case_insensitive_column_matching) - ignore case when matching Arrow columns with ClickHouse columns. Default value - `false`. +- [input_format_arrow_allow_missing_columns](/docs/en/operations/settings/settings.md/#input_format_arrow_allow_missing_columns) - allow missing columns while reading Arrow data. Default value - `false`. +- [input_format_arrow_skip_columns_with_unsupported_types_in_schema_inference](/docs/en/operations/settings/settings.md/#input_format_arrow_skip_columns_with_unsupported_types_in_schema_inference) - allow skipping columns with unsupported types while schema inference for Arrow format. Default value - `false`. -To exchange data with Hadoop, you can use [HDFS table engine](../engines/table-engines/integrations/hdfs.md). +To exchange data with Hadoop, you can use [HDFS table engine](/docs/en/engines/table-engines/integrations/hdfs.md). ## LineAsString {#lineasstring} -In this format, every line of input data is interpreted as a single string value. This format can only be parsed for table with a single field of type [String](../sql-reference/data-types/string.md). The remaining columns must be set to [DEFAULT](../sql-reference/statements/create/table.md#default) or [MATERIALIZED](../sql-reference/statements/create/table.md#materialized), or omitted. +In this format, every line of input data is interpreted as a single string value. This format can only be parsed for table with a single field of type [String](/docs/en/sql-reference/data-types/string.md). The remaining columns must be set to [DEFAULT](/docs/en/sql-reference/statements/create/table.md/#default) or [MATERIALIZED](/docs/en/sql-reference/statements/create/table.md/#materialized), or omitted. **Example** @@ -2122,9 +2117,9 @@ Each line of imported data is parsed according to the regular expression. When working with the `Regexp` format, you can use the following settings: -- `format_regexp` — [String](../sql-reference/data-types/string.md). Contains regular expression in the [re2](https://github.com/google/re2/wiki/Syntax) format. +- `format_regexp` — [String](/docs/en/sql-reference/data-types/string.md). Contains regular expression in the [re2](https://github.com/google/re2/wiki/Syntax) format. -- `format_regexp_escaping_rule` — [String](../sql-reference/data-types/string.md). The following escaping rules are supported: +- `format_regexp_escaping_rule` — [String](/docs/en/sql-reference/data-types/string.md). The following escaping rules are supported: - CSV (similarly to [CSV](#csv)) - JSON (similarly to [JSONEachRow](#jsoneachrow)) @@ -2132,17 +2127,17 @@ When working with the `Regexp` format, you can use the following settings: - Quoted (similarly to [Values](#data-format-values)) - Raw (extracts subpatterns as a whole, no escaping rules, similarly to [TSVRaw](#tabseparatedraw)) -- `format_regexp_skip_unmatched` — [UInt8](../sql-reference/data-types/int-uint.md). Defines the need to throw an exception in case the `format_regexp` expression does not match the imported data. Can be set to `0` or `1`. +- `format_regexp_skip_unmatched` — [UInt8](/docs/en/sql-reference/data-types/int-uint.md). Defines the need to throw an exception in case the `format_regexp` expression does not match the imported data. Can be set to `0` or `1`. **Usage** -The regular expression from [format_regexp](../operations/settings/settings.md#format_regexp) setting is applied to every line of imported data. The number of subpatterns in the regular expression must be equal to the number of columns in imported dataset. +The regular expression from [format_regexp](/docs/en/operations/settings/settings.md/#format_regexp) setting is applied to every line of imported data. The number of subpatterns in the regular expression must be equal to the number of columns in imported dataset. Lines of the imported data must be separated by newline character `'\n'` or DOS-style newline `"\r\n"`. -The content of every matched subpattern is parsed with the method of corresponding data type, according to [format_regexp_escaping_rule](../operations/settings/settings.md#format_regexp_escaping_rule) setting. +The content of every matched subpattern is parsed with the method of corresponding data type, according to [format_regexp_escaping_rule](/docs/en/operations/settings/settings.md/#format_regexp_escaping_rule) setting. -If the regular expression does not match the line and [format_regexp_skip_unmatched](../operations/settings/settings.md#format_regexp_escaping_rule) is set to 1, the line is silently skipped. Otherwise, exception is thrown. +If the regular expression does not match the line and [format_regexp_skip_unmatched](/docs/en/operations/settings/settings.md/#format_regexp_escaping_rule) is set to 1, the line is silently skipped. Otherwise, exception is thrown. **Example** @@ -2190,25 +2185,25 @@ e.g. `schemafile.proto:MessageType`. If the file has the standard extension for the format (for example, `.proto` for `Protobuf`), it can be omitted and in this case, the format schema looks like `schemafile:MessageType`. -If you input or output data via the [client](../interfaces/cli.md) in the [interactive mode](../interfaces/cli.md#cli_usage), the file name specified in the format schema +If you input or output data via the [client](/docs/en/interfaces/cli.md) in the [interactive mode](/docs/en/interfaces/cli.md/#cli_usage), the file name specified in the format schema can contain an absolute path or a path relative to the current directory on the client. -If you use the client in the [batch mode](../interfaces/cli.md#cli_usage), the path to the schema must be relative due to security reasons. +If you use the client in the [batch mode](/docs/en/interfaces/cli.md/#cli_usage), the path to the schema must be relative due to security reasons. -If you input or output data via the [HTTP interface](../interfaces/http.md) the file name specified in the format schema -should be located in the directory specified in [format_schema_path](../operations/server-configuration-parameters/settings.md#server_configuration_parameters-format_schema_path) +If you input or output data via the [HTTP interface](/docs/en/interfaces/http.md) the file name specified in the format schema +should be located in the directory specified in [format_schema_path](/docs/en/operations/server-configuration-parameters/settings.md/#server_configuration_parameters-format_schema_path) in the server configuration. ## Skipping Errors {#skippingerrors} -Some formats such as `CSV`, `TabSeparated`, `TSKV`, `JSONEachRow`, `Template`, `CustomSeparated` and `Protobuf` can skip broken row if parsing error occurred and continue parsing from the beginning of next row. See [input_format_allow_errors_num](../operations/settings/settings.md#input_format_allow_errors_num) and -[input_format_allow_errors_ratio](../operations/settings/settings.md#input_format_allow_errors_ratio) settings. +Some formats such as `CSV`, `TabSeparated`, `TSKV`, `JSONEachRow`, `Template`, `CustomSeparated` and `Protobuf` can skip broken row if parsing error occurred and continue parsing from the beginning of next row. See [input_format_allow_errors_num](/docs/en/operations/settings/settings.md/#input_format_allow_errors_num) and +[input_format_allow_errors_ratio](/docs/en/operations/settings/settings.md/#input_format_allow_errors_ratio) settings. Limitations: - In case of parsing error `JSONEachRow` skips all data until the new line (or EOF), so rows must be delimited by `\n` to count errors correctly. - `Template` and `CustomSeparated` use delimiter after the last column and delimiter between rows to find the beginning of next row, so skipping errors works only if at least one of them is not empty. ## RawBLOB {#rawblob} -In this format, all input data is read to a single value. It is possible to parse only a table with a single field of type [String](../sql-reference/data-types/string.md) or similar. +In this format, all input data is read to a single value. It is possible to parse only a table with a single field of type [String](/docs/en/sql-reference/data-types/string.md) or similar. The result is output in binary format without delimiters and escaping. If more than one value is output, the format is ambiguous, and it will be impossible to read the data back. Below is a comparison of the formats `RawBLOB` and [TabSeparatedRaw](#tabseparatedraw). @@ -2255,18 +2250,18 @@ ClickHouse supports reading and writing [MessagePack](https://msgpack.org/) data | MessagePack data type (`INSERT`) | ClickHouse data type | MessagePack data type (`SELECT`) | |--------------------------------------------------------------------|-----------------------------------------------------------|------------------------------------| -| `uint N`, `positive fixint` | [UIntN](../sql-reference/data-types/int-uint.md) | `uint N` | -| `int N` | [IntN](../sql-reference/data-types/int-uint.md) | `int N` | -| `bool` | [UInt8](../sql-reference/data-types/int-uint.md) | `uint 8` | -| `fixstr`, `str 8`, `str 16`, `str 32`, `bin 8`, `bin 16`, `bin 32` | [String](../sql-reference/data-types/string.md) | `bin 8`, `bin 16`, `bin 32` | -| `fixstr`, `str 8`, `str 16`, `str 32`, `bin 8`, `bin 16`, `bin 32` | [FixedString](../sql-reference/data-types/fixedstring.md) | `bin 8`, `bin 16`, `bin 32` | -| `float 32` | [Float32](../sql-reference/data-types/float.md) | `float 32` | -| `float 64` | [Float64](../sql-reference/data-types/float.md) | `float 64` | -| `uint 16` | [Date](../sql-reference/data-types/date.md) | `uint 16` | -| `uint 32` | [DateTime](../sql-reference/data-types/datetime.md) | `uint 32` | -| `uint 64` | [DateTime64](../sql-reference/data-types/datetime.md) | `uint 64` | -| `fixarray`, `array 16`, `array 32` | [Array](../sql-reference/data-types/array.md) | `fixarray`, `array 16`, `array 32` | -| `fixmap`, `map 16`, `map 32` | [Map](../sql-reference/data-types/map.md) | `fixmap`, `map 16`, `map 32` | +| `uint N`, `positive fixint` | [UIntN](/docs/en/sql-reference/data-types/int-uint.md) | `uint N` | +| `int N`, `negative fixint` | [IntN](/docs/en/sql-reference/data-types/int-uint.md) | `int N` | +| `bool` | [UInt8](/docs/en/sql-reference/data-types/int-uint.md) | `uint 8` | +| `fixstr`, `str 8`, `str 16`, `str 32`, `bin 8`, `bin 16`, `bin 32` | [String](/docs/en/sql-reference/data-types/string.md) | `bin 8`, `bin 16`, `bin 32` | +| `fixstr`, `str 8`, `str 16`, `str 32`, `bin 8`, `bin 16`, `bin 32` | [FixedString](/docs/en/sql-reference/data-types/fixedstring.md) | `bin 8`, `bin 16`, `bin 32` | +| `float 32` | [Float32](/docs/en/sql-reference/data-types/float.md) | `float 32` | +| `float 64` | [Float64](/docs/en/sql-reference/data-types/float.md) | `float 64` | +| `uint 16` | [Date](/docs/en/sql-reference/data-types/date.md) | `uint 16` | +| `uint 32` | [DateTime](/docs/en/sql-reference/data-types/datetime.md) | `uint 32` | +| `uint 64` | [DateTime64](/docs/en/sql-reference/data-types/datetime.md) | `uint 64` | +| `fixarray`, `array 16`, `array 32` | [Array](/docs/en/sql-reference/data-types/array.md) | `fixarray`, `array 16`, `array 32` | +| `fixmap`, `map 16`, `map 32` | [Map](/docs/en/sql-reference/data-types/map.md) | `fixmap`, `map 16`, `map 32` | Example: @@ -2280,17 +2275,17 @@ $ clickhouse-client --query="SELECT * FROM msgpack FORMAT MsgPack" > tmp_msgpack ### MsgPack format settings {#msgpack-format-settings} -- [input_format_msgpack_number_of_columns](../operations/settings/settings.md#input_format_msgpack_number_of_columns) - the number of columns in inserted MsgPack data. Used for automatic schema inference from data. Default value - `0`. -- [output_format_msgpack_uuid_representation](../operations/settings/settings.md#output_format_msgpack_uuid_representation) - the way how to output UUID in MsgPack format. Default value - `EXT`. +- [input_format_msgpack_number_of_columns](/docs/en/operations/settings/settings.md/#input_format_msgpack_number_of_columns) - the number of columns in inserted MsgPack data. Used for automatic schema inference from data. Default value - `0`. +- [output_format_msgpack_uuid_representation](/docs/en/operations/settings/settings.md/#output_format_msgpack_uuid_representation) - the way how to output UUID in MsgPack format. Default value - `EXT`. ## MySQLDump {#mysqldump} ClickHouse supports reading MySQL [dumps](https://dev.mysql.com/doc/refman/8.0/en/mysqldump.html). It reads all data from INSERT queries belonging to one table in dump. If there are more than one table, by default it reads data from the first one. -You can specify the name of the table from which to read data from using [input_format_mysql_dump_table_name](../operations/settings/settings.md#input_format_mysql_dump_table_name) settings. -If setting [input_format_mysql_dump_map_columns](../operations/settings/settings.md#input_format_mysql_dump_map_columns) is set to 1 and +You can specify the name of the table from which to read data from using [input_format_mysql_dump_table_name](/docs/en/operations/settings/settings.md/#input_format_mysql_dump_table_name) settings. +If setting [input_format_mysql_dump_map_columns](/docs/en/operations/settings/settings.md/#input_format_mysql_dump_map_columns) is set to 1 and dump contains CREATE query for specified table or column names in INSERT query the columns from input data will be mapped to the columns from the table by their names, -columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](../operations/settings/settings.md#input_format_skip_unknown_fields) is set to 1. +columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings.md/#input_format_skip_unknown_fields) is set to 1. This format supports schema inference: if the dump contains CREATE query for the specified table, the structure is extracted from it, otherwise schema is inferred from the data of INSERT queries. Examples: @@ -2349,3 +2344,5 @@ Query id: 17d59664-ebce-4053-bb79-d46a516fb590 │ 3 │ └───┘ ``` + +[Original article](https://clickhouse.com/docs/en/interfaces/formats) diff --git a/docs/en/interfaces/schema-inference.md b/docs/en/interfaces/schema-inference.md new file mode 100644 index 00000000000..394e6906a23 --- /dev/null +++ b/docs/en/interfaces/schema-inference.md @@ -0,0 +1,1573 @@ +--- +slug: /en/interfaces/schema-inference +sidebar_position: 21 +sidebar_label: Schema inference +title: Automatic schema inference from input data +--- + +ClickHouse can automatically determine the structure of input data in almost all supported [Input formats](formats.md). +This document will describe when schema inference is used, how it works with different input formats and which settings +can control it. + +## Usage {#usage} + +Schema inference is used when ClickHouse needs to read the data in a specific data format and the structure is unknown. + +## Table functions [file](../sql-reference/table-functions/file.md), [s3](../sql-reference/table-functions/s3.md), [url](../sql-reference/table-functions/url.md), [hdfs](../sql-reference/table-functions/hdfs.md). + +These table functions have the optional argument `structure` with the structure of input data. If this argument is not specified or set to `auto`, the structure will be inferred from the data. + +**Example:** + +Let's say we have a file `hobbies.jsonl` in JSONEachRow format in the `user_files` directory with this content: +```json +{"id" : 1, "age" : 25, "name" : "Josh", "hobbies" : ["football", "cooking", "music"]} +{"id" : 2, "age" : 19, "name" : "Alan", "hobbies" : ["tennis", "art"]} +{"id" : 3, "age" : 32, "name" : "Lana", "hobbies" : ["fitness", "reading", "shopping"]} +{"id" : 4, "age" : 47, "name" : "Brayan", "hobbies" : ["movies", "skydiving"]} +``` + +ClickHouse can read this data without you specifying its structure: +```sql +SELECT * FROM file('hobbies.jsonl') +``` +```response +┌─id─┬─age─┬─name───┬─hobbies──────────────────────────┐ +│ 1 │ 25 │ Josh │ ['football','cooking','music'] │ +│ 2 │ 19 │ Alan │ ['tennis','art'] │ +│ 3 │ 32 │ Lana │ ['fitness','reading','shopping'] │ +│ 4 │ 47 │ Brayan │ ['movies','skydiving'] │ +└────┴─────┴────────┴──────────────────────────────────┘ +``` + +Note: the format `JSONEachRow` was automatically determined by the file extension `.jsonl`. + +You can see an automatically determined structure using the `DESCRIBE` query: +```sql +DESCRIBE file('hobbies.jsonl') +``` +```response +┌─name────┬─type────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ id │ Nullable(Int64) │ │ │ │ │ │ +│ age │ Nullable(Int64) │ │ │ │ │ │ +│ name │ Nullable(String) │ │ │ │ │ │ +│ hobbies │ Array(Nullable(String)) │ │ │ │ │ │ +└─────────┴─────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +``` + +## Table engines [File](../engines/table-engines/special/file.md), [S3](../engines/table-engines/integrations/s3.md), [URL](../engines/table-engines/special/url.md), [HDFS](../engines/table-engines/integrations/hdfs.md) + +If the list of columns is not specified in `CREATE TABLE` query, the structure of the table will be inferred automatically from the data. + +**Example:** + +Let's use the file `hobbies.jsonl`. We can create a table with engine `File` with the data from this file: +```sql +CREATE TABLE hobbies ENGINE=File(JSONEachRow, 'hobbies.jsonl') +``` +```response +Ok. +``` +```sql +SELECT * FROM hobbies +``` +```response +┌─id─┬─age─┬─name───┬─hobbies──────────────────────────┐ +│ 1 │ 25 │ Josh │ ['football','cooking','music'] │ +│ 2 │ 19 │ Alan │ ['tennis','art'] │ +│ 3 │ 32 │ Lana │ ['fitness','reading','shopping'] │ +│ 4 │ 47 │ Brayan │ ['movies','skydiving'] │ +└────┴─────┴────────┴──────────────────────────────────┘ +``` +```sql +DESCRIBE TABLE hobbies +``` +```response +┌─name────┬─type────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ id │ Nullable(Int64) │ │ │ │ │ │ +│ age │ Nullable(Int64) │ │ │ │ │ │ +│ name │ Nullable(String) │ │ │ │ │ │ +│ hobbies │ Array(Nullable(String)) │ │ │ │ │ │ +└─────────┴─────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +``` + +## clickhouse-local + +`clickhouse-local` has an optional parameter `-S/--structure` with the structure of input data. If this parameter is not specified or set to `auto`, the structure will be inferred from the data. + +**Example:** + +Let's use the file `hobbies.jsonl`. We can query the data from this file using `clickhouse-local`: +```shell +clickhouse-local --file='hobbies.jsonl' --table='hobbies' --query='DESCRIBE TABLE hobbies' +``` +```response +id Nullable(Int64) +age Nullable(Int64) +name Nullable(String) +hobbies Array(Nullable(String)) +``` +```shell +clickhouse-local --file='hobbies.jsonl' --table='hobbies' --query='SELECT * FROM hobbies' +``` +```response +1 25 Josh ['football','cooking','music'] +2 19 Alan ['tennis','art'] +3 32 Lana ['fitness','reading','shopping'] +4 47 Brayan ['movies','skydiving'] +``` + +# Using structure from insertion table {#using-structure-from-insertion-table} + +When table functions `file/s3/url/hdfs` are used to insert data into a table, +there is an option to use the structure from the insertion table instead of extracting it from the data. +It can improve insertion performance because schema inference can take some time. Also, it will be helpful when the table has an optimized schema, so +no conversions between types will be performed. + +There is a special setting [use_structure_from_insertion_table_in_table_functions](/docs/en/operations/settings/settings.md/#use_structure_from_insertion_table_in_table_functions) +that controls this behaviour. It has 3 possible values: +- 0 - table function will extract the structure from the data. +- 1 - table function will use the structure from the insertion table. +- 2 - ClickHouse will automatically determine if it's possible to use the structure from the insertion table or use schema inference. Default value. + +**Example 1:** + +Let's create table `hobbies1` with the next structure: +```sql +CREATE TABLE hobbies1 +( + `id` UInt64, + `age` LowCardinality(UInt8), + `name` String, + `hobbies` Array(String) +) +ENGINE = MergeTree +ORDER BY id; +``` + +And insert data from the file `hobbies.jsonl`: + +```sql +INSERT INTO hobbies1 SELECT * FROM file(hobbies.jsonl) +``` + +In this case, all columns from the file are inserted into the table without changes, so ClickHouse will use the structure from the insertion table instead of schema inference. + +**Example 2:** + +Let's create table `hobbies2` with the next structure: +```sql +CREATE TABLE hobbies2 +( + `id` UInt64, + `age` LowCardinality(UInt8), + `hobbies` Array(String) +) + ENGINE = MergeTree +ORDER BY id; +``` + +And insert data from the file `hobbies.jsonl`: + +```sql +INSERT INTO hobbies2 SELECT id, age, hobbies FROM file(hobbies.jsonl) +``` + +In this case, all columns in the `SELECT` query are present in the table, so ClickHouse will use the structure from the insertion table. +Note that it will work only for input formats that support reading a subset of columns like JSONEachRow, TSKV, Parquet, etc. (so it won't work for example for TSV format). + +**Example 3:** + +Let's create table `hobbies3` with the next structure: + +```sql +CREATE TABLE hobbies3 +( + `identifier` UInt64, + `age` LowCardinality(UInt8), + `hobbies` Array(String) +) + ENGINE = MergeTree +ORDER BY identifier; +``` + +And insert data from the file `hobbies.jsonl`: + +```sql +INSERT INTO hobbies3 SELECT id, age, hobbies FROM file(hobbies.jsonl) +``` + +In this case, column `id` is used in the `SELECT` query, but the table doesn't have this column (it has a column with the name `identifier`), +so ClickHouse cannot use the structure from the insertion table, and schema inference will be used. + +**Example 4:** + +Let's create table `hobbies4` with the next structure: + +```sql +CREATE TABLE hobbies4 +( + `id` UInt64, + `any_hobby` Nullable(String) +) + ENGINE = MergeTree +ORDER BY id; +``` + +And insert data from the file `hobbies.jsonl`: + +```sql +INSERT INTO hobbies4 SELECT id, empty(hobbies) ? NULL : hobbies[1] FROM file(hobbies.jsonl) +``` + +In this case, there are some operations performed on the column `hobbies` in the `SELECT` query to insert it into the table, so ClickHouse cannot use the structure from the insertion table, and schema inference will be used. + +# Schema inference cache {#schema-inference-cache} + +For most input formats schema inference reads some data to determine its structure and this process can take some time. +To prevent inferring the same schema every time ClickHouse read the data from the same file, the inferred schema is cached and when accessing the same file again, ClickHouse will use the schema from the cache. + +There are special settings that control this cache: +- `schema_inference_cache_max_elements_for_{file/s3/hdfs/url}` - the maximum number of cached schemas for the corresponding table function. The default value is `4096`. These settings should be set in the server config. +- `use_cache_for_{file,s3,hdfs,url}_schema_inference` - allows turning on/off using cache for schema inference. These settings can be used in queries. + +The schema of the file can be changed by modifying the data or by changing format settings. +For this reason, the schema inference cache identifies the schema by file source, format name, used format settings, and the last modification time of the file. + +Note: some files accessed by url in `url` table function may not contain information about the last modification time; for this case, there is a special setting +`schema_inference_cache_require_modification_time_for_url`. Disabling this setting allows the use of the schema from cache without the last modification time for such files. + +There is also a system table [schema_inference_cache](../operations/system-tables/schema_inference_cache.md) with all current schemas in cache and system query `SYSTEM DROP SCHEMA CACHE [FOR File/S3/URL/HDFS]` +that allows cleaning the schema cache for all sources, or for a specific source. + +**Examples:** + +Let's try to infer the structure of a sample dataset from s3 `github-2022.ndjson.gz` and see how the schema inference cache works: + +```sql +DESCRIBE TABLE s3('https://datasets-documentation.s3.eu-west-3.amazonaws.com/github/github-2022.ndjson.gz') +SETTINGS allow_experimental_object_type = 1 +``` +```response +┌─name───────┬─type─────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ type │ Nullable(String) │ │ │ │ │ │ +│ actor │ Object(Nullable('json')) │ │ │ │ │ │ +│ repo │ Object(Nullable('json')) │ │ │ │ │ │ +│ created_at │ Nullable(String) │ │ │ │ │ │ +│ payload │ Object(Nullable('json')) │ │ │ │ │ │ +└────────────┴──────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ + +5 rows in set. Elapsed: 0.601 sec. +``` +```sql +DESCRIBE TABLE s3('https://datasets-documentation.s3.eu-west-3.amazonaws.com/github/github-2022.ndjson.gz') +SETTINGS allow_experimental_object_type = 1 +``` +```response +┌─name───────┬─type─────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ type │ Nullable(String) │ │ │ │ │ │ +│ actor │ Object(Nullable('json')) │ │ │ │ │ │ +│ repo │ Object(Nullable('json')) │ │ │ │ │ │ +│ created_at │ Nullable(String) │ │ │ │ │ │ +│ payload │ Object(Nullable('json')) │ │ │ │ │ │ +└────────────┴──────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ + +5 rows in set. Elapsed: 0.059 sec. +``` + +As you can see, the second query succeeded almost instantly. + +Let's try to change some settings that can affect inferred schema: + +```sql +DESCRIBE TABLE s3('https://datasets-documentation.s3.eu-west-3.amazonaws.com/github/github-2022.ndjson.gz') +SETTINGS input_format_json_read_objects_as_strings = 1 + +┌─name───────┬─type─────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ type │ Nullable(String) │ │ │ │ │ │ +│ actor │ Nullable(String) │ │ │ │ │ │ +│ repo │ Nullable(String) │ │ │ │ │ │ +│ created_at │ Nullable(String) │ │ │ │ │ │ +│ payload │ Nullable(String) │ │ │ │ │ │ +└────────────┴──────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ + +5 rows in set. Elapsed: 0.611 sec +``` + +As you can see, the schema from the cache was not used for the same file, because the setting that can affect inferred schema was changed. + +Let's check the content of `system.schema_inference_cache` table: + +```sql +SELECT schema, format, source FROM system.schema_inference_cache WHERE storage='S3' +``` +```response +┌─schema──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┬─format─┬─source───────────────────────────────────────────────────────────────────────────────────────────────────┐ +│ type Nullable(String), actor Object(Nullable('json')), repo Object(Nullable('json')), created_at Nullable(String), payload Object(Nullable('json')) │ NDJSON │ datasets-documentation.s3.eu-west-3.amazonaws.com443/datasets-documentation/github/github-2022.ndjson.gz │ +│ type Nullable(String), actor Nullable(String), repo Nullable(String), created_at Nullable(String), payload Nullable(String) │ NDJSON │ datasets-documentation.s3.eu-west-3.amazonaws.com443/datasets-documentation/github/github-2022.ndjson.gz │ +└─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┴────────┴──────────────────────────────────────────────────────────────────────────────────────────────────────────┘ +``` + +As you can see, there are two different schemas for the same file. + +We can clear the schema cache using a system query: +```sql +SYSTEM DROP SCHEMA CACHE FOR S3 +``` +```response +Ok. +``` +```sql +SELECT count() FROM system.schema_inference_cache WHERE storage='S3' +``` +```response +┌─count()─┐ +│ 0 │ +└─────────┘ +``` + +# Text formats {#text-formats} + +For text formats, ClickHouse reads the data row by row, extracts column values according to the format, +and then uses some recursive parsers and heuristics to determine the type for each value. The maximum number of rows read from the data in schema inference +is controlled by the setting `input_format_max_rows_to_read_for_schema_inference` with default value 25000. +By default, all inferred types are [Nullable](../sql-reference/data-types/nullable.md), but you can change this by setting `schema_inference_make_columns_nullable` (see examples in the [settings](#settings-for-text-formats) section). + +## JSON formats {#json-formats} + +In JSON formats ClickHouse parses values according to the JSON specification and then tries to find the most appropriate data type for them. + +Let's see how it works, what types can be inferred and what specific settings can be used in JSON formats. + +**Examples** + +Here and further, the [format](../sql-reference/table-functions/format.md) table function will be used in examples. + +Integers, Floats, Bools, Strings: +```sql +DESC format(JSONEachRow, '{"int" : 42, "float" : 42.42, "string" : "Hello, World!"}'); +``` +```response +┌─name───┬─type──────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ int │ Nullable(Int64) │ │ │ │ │ │ +│ float │ Nullable(Float64) │ │ │ │ │ │ +│ bool │ Nullable(Bool) │ │ │ │ │ │ +│ string │ Nullable(String) │ │ │ │ │ │ +└────────┴───────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +``` + +Dates, DateTimes: + +```sql +DESC format(JSONEachRow, '{"date" : "2022-01-01", "datetime" : "2022-01-01 00:00:00"}') +``` +```response +┌─name─────┬─type────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ date │ Nullable(Date) │ │ │ │ │ │ +│ datetime │ Nullable(DateTime64(9)) │ │ │ │ │ │ +└──────────┴─────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +``` + +Arrays: +```sql +DESC format(JSONEachRow, '{"arr" : [1, 2, 3], "nested_arrays" : [[1, 2, 3], [4, 5, 6], []]}') +``` +```response +┌─name──────────┬─type──────────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ arr │ Array(Nullable(Int64)) │ │ │ │ │ │ +│ nested_arrays │ Array(Array(Nullable(Int64))) │ │ │ │ │ │ +└───────────────┴───────────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +``` + +If an array contains `null`, ClickHouse will use types from the other array elements: +```sql +DESC format(JSONEachRow, '{"arr" : [null, 42, null]}') +``` +```response +┌─name─┬─type───────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ arr │ Array(Nullable(Int64)) │ │ │ │ │ │ +└──────┴────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +``` + +Tuples: + +In JSON formats we treat Arrays with elements of different types as Tuples. +```sql +DESC format(JSONEachRow, '{"tuple" : [1, "Hello, World!", [1, 2, 3]]}') +``` +```response +┌─name──┬─type─────────────────────────────────────────────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ tuple │ Tuple(Nullable(Int64), Nullable(String), Array(Nullable(Int64))) │ │ │ │ │ │ +└───────┴──────────────────────────────────────────────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +``` + +If some values are `null` or empty, we use types of corresponding values from the other rows: +```sql +DESC format(JSONEachRow, $$ + {"tuple" : [1, null, null]} + {"tuple" : [null, "Hello, World!", []]} + {"tuple" : [null, null, [1, 2, 3]]} + $$) +``` +```response +┌─name──┬─type─────────────────────────────────────────────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ tuple │ Tuple(Nullable(Int64), Nullable(String), Array(Nullable(Int64))) │ │ │ │ │ │ +└───────┴──────────────────────────────────────────────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +``` + +Maps: + +In JSON we can read objects with values of the same type as Map type. +```sql +DESC format(JSONEachRow, '{"map" : {"key1" : 42, "key2" : 24, "key3" : 4}}') +``` +```response +┌─name─┬─type─────────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ map │ Map(String, Nullable(Int64)) │ │ │ │ │ │ +└──────┴──────────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +``` + +JSON Object type (if setting `allow_experimental_object_type` is enabled): + +```sql +SET allow_experimental_object_type = 1 +DESC format(JSONEachRow, $$ + {"obj" : {"key1" : 42}} + {"obj" : {"key2" : "Hello, World!"}} + {"obj" : {"key1" : 24, "key3" : {"a" : 42, "b" : null}}} + $$) +``` +```response +┌─name─┬─type─────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ obj │ Object(Nullable('json')) │ │ │ │ │ │ +└──────┴──────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +``` + +Nested complex types: +```sql +DESC format(JSONEachRow, '{"value" : [[[42, 24], []], {"key1" : 42, "key2" : 24}]}') +``` +```response +┌─name──┬─type───────────────────────────────────────────────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ value │ Tuple(Array(Array(Nullable(Int64))), Map(String, Nullable(Int64))) │ │ │ │ │ │ +└───────┴────────────────────────────────────────────────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +``` + +If ClickHouse cannot determine the type, because the data contains only nulls, an exception will be thrown: +```sql +DESC format(JSONEachRow, '{"arr" : [null, null]}') +``` +```response +Code: 652. DB::Exception: Received from localhost:9000. DB::Exception: +Cannot determine type for column 'arr' by first 1 rows of data, +most likely this column contains only Nulls or empty Arrays/Maps. +... +``` + +### JSON settings {#json-settings} + +#### input_format_json_read_objects_as_strings + +Enabling this setting allows reading nested JSON objects as strings. +This setting can be used to read nested JSON objects without using JSON object type. + +This setting is enabled by default. + +```sql +SET input_format_json_read_objects_as_strings = 1; +DESC format(JSONEachRow, $$ + {"obj" : {"key1" : 42, "key2" : [1,2,3,4]}} + {"obj" : {"key3" : {"nested_key" : 1}}} + $$) +``` +```response +┌─name─┬─type─────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ obj │ Nullable(String) │ │ │ │ │ │ +└──────┴──────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +``` + +#### input_format_json_try_infer_numbers_from_strings + +Enabling this setting allows inferring numbers from string values. + +This setting is enabled by default. + +**Example:** + +```sql +SET input_format_json_try_infer_numbers_from_strings = 1; +DESC format(JSONEachRow, $$ + {"value" : "42"} + {"value" : "424242424242"} + $$) +``` +```reponse +┌─name──┬─type────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ value │ Nullable(Int64) │ │ │ │ │ │ +└───────┴─────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +``` + +#### input_format_json_read_numbers_as_strings + +Enabling this setting allows reading numeric values as strings. + +This setting is disabled by default. + +**Example** + +```sql +SET input_format_json_read_numbers_as_strings = 1; +DESC format(JSONEachRow, $$ + {"value" : 1055} + {"value" : "unknown"} + $$) +``` +```response +┌─name──┬─type─────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ value │ Nullable(String) │ │ │ │ │ │ +└───────┴──────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +``` + +#### input_format_json_read_bools_as_numbers + +Enabling this setting allows reading Bool values as numbers. + +This setting is enabled by default. + +**Example:** + +```sql +SET input_format_json_read_bools_as_numbers = 1; +DESC format(JSONEachRow, $$ + {"value" : true} + {"value" : 42} + $$) +``` +```response +┌─name──┬─type────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ value │ Nullable(Int64) │ │ │ │ │ │ +└───────┴─────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +``` + +## CSV {#csv} + +In CSV format ClickHouse extracts column values from the row according to delimiters. ClickHouse expects all types except numbers and strings to be enclosed in double quotes. If the value is in double quotes, ClickHouse tries to parse +the data inside quotes using the recursive parser and then tries to find the most appropriate data type for it. If the value is not in double quotes, ClickHouse tries to parse it as a number, +and if the value is not a number, ClickHouse treats it as a string. + +If you don't want ClickHouse to try to determine complex types using some parsers and heuristics, you can disable setting `input_format_csv_use_best_effort_in_schema_inference` +and ClickHouse will treat all columns as Strings. + +**Examples:** + +Integers, Floats, Bools, Strings: +```sql +DESC format(CSV, '42,42.42,true,"Hello,World!"') +``` +```response +┌─name─┬─type──────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ c1 │ Nullable(Int64) │ │ │ │ │ │ +│ c2 │ Nullable(Float64) │ │ │ │ │ │ +│ c3 │ Nullable(Bool) │ │ │ │ │ │ +│ c4 │ Nullable(String) │ │ │ │ │ │ +└──────┴───────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +``` + +Strings without quotes: +```sql +DESC format(CSV, 'Hello world!,World hello!') +``` +```response +┌─name─┬─type─────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ c1 │ Nullable(String) │ │ │ │ │ │ +│ c2 │ Nullable(String) │ │ │ │ │ │ +└──────┴──────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +``` + +Dates, DateTimes: + +```sql +DESC format(CSV, '"2020-01-01","2020-01-01 00:00:00"') +``` +```response +┌─name─┬─type────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ c1 │ Nullable(Date) │ │ │ │ │ │ +│ c2 │ Nullable(DateTime64(9)) │ │ │ │ │ │ +└──────┴─────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +``` + +Arrays: +```sql +DESC format(CSV, '"[1,2,3]","[[1, 2], [], [3, 4]]"') +``` +```response +┌─name─┬─type──────────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ c1 │ Array(Nullable(Int64)) │ │ │ │ │ │ +│ c2 │ Array(Array(Nullable(Int64))) │ │ │ │ │ │ +└──────┴───────────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +``` +```sql +DESC format(CSV, $$"['Hello', 'world']","[['Abc', 'Def'], []]"$$) +``` +```response +┌─name─┬─type───────────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ c1 │ Array(Nullable(String)) │ │ │ │ │ │ +│ c2 │ Array(Array(Nullable(String))) │ │ │ │ │ │ +└──────┴────────────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +``` + +If an array contains null, ClickHouse will use types from the other array elements: +```sql +DESC format(CSV, '"[NULL, 42, NULL]"') +``` +```response +┌─name─┬─type───────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ c1 │ Array(Nullable(Int64)) │ │ │ │ │ │ +└──────┴────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +``` + +Maps: +```sql +DESC format(CSV, $$"{'key1' : 42, 'key2' : 24}"$$) +``` +```response +┌─name─┬─type─────────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ c1 │ Map(String, Nullable(Int64)) │ │ │ │ │ │ +└──────┴──────────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +``` + +Nested Arrays and Maps: +```sql +DESC format(CSV, $$"[{'key1' : [[42, 42], []], 'key2' : [[null], [42]]}]"$$) +``` +```response +┌─name─┬─type──────────────────────────────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ c1 │ Array(Map(String, Array(Array(Nullable(Int64))))) │ │ │ │ │ │ +└──────┴───────────────────────────────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +``` + +If ClickHouse cannot determine the type inside quotes, because the data contains only nulls, ClickHouse will treat it as String: +```sql +DESC format(CSV, '"[NULL, NULL]"') +``` +```response +┌─name─┬─type─────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ c1 │ Nullable(String) │ │ │ │ │ │ +└──────┴──────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +``` + +Example with disabled setting `input_format_csv_use_best_effort_in_schema_inference`: +```sql +SET input_format_csv_use_best_effort_in_schema_inference = 0 +DESC format(CSV, '"[1,2,3]",42.42,Hello World!') +``` +```response +┌─name─┬─type─────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ c1 │ Nullable(String) │ │ │ │ │ │ +│ c2 │ Nullable(String) │ │ │ │ │ │ +│ c3 │ Nullable(String) │ │ │ │ │ │ +└──────┴──────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +``` + +## TSV/TSKV {#tsv-tskv} + +In TSV/TSKV formats ClickHouse extracts column value from the row according to tabular delimiters and then parses extracted value using +the recursive parser to determine the most appropriate type. If the type cannot be determined, ClickHouse treats this value as String. + +If you don't want ClickHouse to try to determine complex types using some parsers and heuristics, you can disable setting `input_format_tsv_use_best_effort_in_schema_inference` +and ClickHouse will treat all columns as Strings. + + +**Examples:** + +Integers, Floats, Bools, Strings: +```sql +DESC format(TSV, '42 42.42 true Hello,World!') +``` +```response +┌─name─┬─type──────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ c1 │ Nullable(Int64) │ │ │ │ │ │ +│ c2 │ Nullable(Float64) │ │ │ │ │ │ +│ c3 │ Nullable(Bool) │ │ │ │ │ │ +│ c4 │ Nullable(String) │ │ │ │ │ │ +└──────┴───────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +``` +```sql +DESC format(TSKV, 'int=42 float=42.42 bool=true string=Hello,World!\n') +``` +```response +┌─name───┬─type──────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ int │ Nullable(Int64) │ │ │ │ │ │ +│ float │ Nullable(Float64) │ │ │ │ │ │ +│ bool │ Nullable(Bool) │ │ │ │ │ │ +│ string │ Nullable(String) │ │ │ │ │ │ +└────────┴───────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +``` + +Dates, DateTimes: + +```sql +DESC format(TSV, '2020-01-01 2020-01-01 00:00:00') +``` +```response +┌─name─┬─type────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ c1 │ Nullable(Date) │ │ │ │ │ │ +│ c2 │ Nullable(DateTime64(9)) │ │ │ │ │ │ +└──────┴─────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +``` + +Arrays: +```sql +DESC format(TSV, '[1,2,3] [[1, 2], [], [3, 4]]') +``` +```response +┌─name─┬─type──────────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ c1 │ Array(Nullable(Int64)) │ │ │ │ │ │ +│ c2 │ Array(Array(Nullable(Int64))) │ │ │ │ │ │ +└──────┴───────────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +``` +```sql +DESC format(TSV, '[''Hello'', ''world''] [[''Abc'', ''Def''], []]') +``` +```response +┌─name─┬─type───────────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ c1 │ Array(Nullable(String)) │ │ │ │ │ │ +│ c2 │ Array(Array(Nullable(String))) │ │ │ │ │ │ +└──────┴────────────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +``` + +If an array contains null, ClickHouse will use types from the other array elements: +```sql +DESC format(TSV, '[NULL, 42, NULL]') +``` +```response +┌─name─┬─type───────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ c1 │ Array(Nullable(Int64)) │ │ │ │ │ │ +└──────┴────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +``` + +Tuples: +```sql +DESC format(TSV, $$(42, 'Hello, world!')$$) +``` +```response +┌─name─┬─type─────────────────────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ c1 │ Tuple(Nullable(Int64), Nullable(String)) │ │ │ │ │ │ +└──────┴──────────────────────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +``` + +Maps: +```sql +DESC format(TSV, $${'key1' : 42, 'key2' : 24}$$) +``` +```response +┌─name─┬─type─────────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ c1 │ Map(String, Nullable(Int64)) │ │ │ │ │ │ +└──────┴──────────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +``` + +Nested Arrays, Tuples and Maps: +```sql +DESC format(TSV, $$[{'key1' : [(42, 'Hello'), (24, NULL)], 'key2' : [(NULL, ','), (42, 'world!')]}]$$) +``` +```response +┌─name─┬─type────────────────────────────────────────────────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ c1 │ Array(Map(String, Array(Tuple(Nullable(Int64), Nullable(String))))) │ │ │ │ │ │ +└──────┴─────────────────────────────────────────────────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +``` + +If ClickHouse cannot determine the type, because the data contains only nulls, ClickHouse will treat it as String: +```sql +DESC format(TSV, '[NULL, NULL]') +``` +```response +┌─name─┬─type─────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ c1 │ Nullable(String) │ │ │ │ │ │ +└──────┴──────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +``` + +Example with disabled setting `input_format_tsv_use_best_effort_in_schema_inference`: +```sql +SET input_format_tsv_use_best_effort_in_schema_inference = 0 +DESC format(TSV, '[1,2,3] 42.42 Hello World!') +``` +```response +┌─name─┬─type─────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ c1 │ Nullable(String) │ │ │ │ │ │ +│ c2 │ Nullable(String) │ │ │ │ │ │ +│ c3 │ Nullable(String) │ │ │ │ │ │ +└──────┴──────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +``` + +## Values {#values} + +In Values format ClickHouse extracts column value from the row and then parses it using +the recursive parser similar to how literals are parsed. + +**Examples:** + +Integers, Floats, Bools, Strings: +```sql +DESC format(Values, $$(42, 42.42, true, 'Hello,World!')$$) +``` +```response +┌─name─┬─type──────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ c1 │ Nullable(Int64) │ │ │ │ │ │ +│ c2 │ Nullable(Float64) │ │ │ │ │ │ +│ c3 │ Nullable(Bool) │ │ │ │ │ │ +│ c4 │ Nullable(String) │ │ │ │ │ │ +└──────┴───────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +``` + +Dates, DateTimes: + +```sql +DESC format(Values, $$('2020-01-01', '2020-01-01 00:00:00')$$) +``` +```response +┌─name─┬─type────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ c1 │ Nullable(Date) │ │ │ │ │ │ +│ c2 │ Nullable(DateTime64(9)) │ │ │ │ │ │ +└──────┴─────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +``` + +Arrays: +```sql +DESC format(Values, '([1,2,3], [[1, 2], [], [3, 4]])') +``` +```response +┌─name─┬─type──────────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ c1 │ Array(Nullable(Int64)) │ │ │ │ │ │ +│ c2 │ Array(Array(Nullable(Int64))) │ │ │ │ │ │ +└──────┴───────────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +``` + +If an array contains null, ClickHouse will use types from the other array elements: +```sql +DESC format(Values, '([NULL, 42, NULL])') +``` +```response +┌─name─┬─type───────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ c1 │ Array(Nullable(Int64)) │ │ │ │ │ │ +└──────┴────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +``` + +Tuples: +```sql +DESC format(Values, $$((42, 'Hello, world!'))$$) +``` +```response +┌─name─┬─type─────────────────────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ c1 │ Tuple(Nullable(Int64), Nullable(String)) │ │ │ │ │ │ +└──────┴──────────────────────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +``` + +Maps: +```sql +DESC format(Values, $$({'key1' : 42, 'key2' : 24})$$) +``` +```response +┌─name─┬─type─────────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ c1 │ Map(String, Nullable(Int64)) │ │ │ │ │ │ +└──────┴──────────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +``` + +Nested Arrays, Tuples and Maps: +```sql +DESC format(Values, $$([{'key1' : [(42, 'Hello'), (24, NULL)], 'key2' : [(NULL, ','), (42, 'world!')]}])$$) +``` +```response +┌─name─┬─type────────────────────────────────────────────────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ c1 │ Array(Map(String, Array(Tuple(Nullable(Int64), Nullable(String))))) │ │ │ │ │ │ +└──────┴─────────────────────────────────────────────────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +``` + +If ClickHouse cannot determine the type, because the data contains only nulls, an exception will be thrown: +```sql +DESC format(Values, '([NULL, NULL])') +``` +```response +Code: 652. DB::Exception: Received from localhost:9000. DB::Exception: +Cannot determine type for column 'c1' by first 1 rows of data, +most likely this column contains only Nulls or empty Arrays/Maps. +... +``` + +Example with disabled setting `input_format_tsv_use_best_effort_in_schema_inference`: +```sql +SET input_format_tsv_use_best_effort_in_schema_inference = 0 +DESC format(TSV, '[1,2,3] 42.42 Hello World!') +``` +```response +┌─name─┬─type─────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ c1 │ Nullable(String) │ │ │ │ │ │ +│ c2 │ Nullable(String) │ │ │ │ │ │ +│ c3 │ Nullable(String) │ │ │ │ │ │ +└──────┴──────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +``` + +## CustomSeparated {#custom-separated} + +In CustomSeparated format ClickHouse first extracts all column values from the row according to specified delimiters and then tries to infer +the data type for each value according to escaping rule. + +**Example** + +```sql +SET format_custom_row_before_delimiter = '', + format_custom_row_after_delimiter = '\n', + format_custom_row_between_delimiter = '\n', + format_custom_result_before_delimiter = '\n', + format_custom_result_after_delimiter = '\n', + format_custom_field_delimiter = '', + format_custom_escaping_rule = 'Quoted' + +DESC format(CustomSeparated, $$ +42.42'Some string 1'[1, NULL, 3] + +NULL'Some string 3'[1, 2, NULL] + +$$) +``` +```response +┌─name─┬─type───────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ c1 │ Nullable(Float64) │ │ │ │ │ │ +│ c2 │ Nullable(String) │ │ │ │ │ │ +│ c3 │ Array(Nullable(Int64)) │ │ │ │ │ │ +└──────┴────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +``` + +## Template {#template} + +In Template format ClickHouse first extracts all column values from the row according to the specified template and then tries to infer the +data type for each value according to its escaping rule. + +**Example** + +Let's say we have a file `resultset` with the next content: +``` + +${data} +``` + +And a file `row_format` with the next content: +``` +${column_1:CSV}${column_2:Quoted}${column_3:JSON} +``` + +Then we can make the next queries: + +```sql +SET format_template_rows_between_delimiter = '\n', + format_template_row = 'row_format', + format_template_resultset = 'resultset_format' + +DESC format(Template, $$ +42.42'Some string 1'[1, null, 2] + +\N'Some string 3'[1, 2, null] + +$$) +``` +```response +┌─name─────┬─type───────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ column_1 │ Nullable(Float64) │ │ │ │ │ │ +│ column_2 │ Nullable(String) │ │ │ │ │ │ +│ column_3 │ Array(Nullable(Int64)) │ │ │ │ │ │ +└──────────┴────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +``` + +## Regexp {#regexp} + +Similar to Template, in Regexp format ClickHouse first extracts all column values from the row according to specified regular expression and then tries to infer +data type for each value according to the specified escaping rule. + +**Example** + +```sql +SET format_regexp = '^Line: value_1=(.+?), value_2=(.+?), value_3=(.+?)', + format_regexp_escaping_rule = 'CSV' + +DESC format(Regexp, $$Line: value_1=42, value_2="Some string 1", value_3="[1, NULL, 3]" +Line: value_1=2, value_2="Some string 2", value_3="[4, 5, NULL]"$$) +``` +```response +┌─name─┬─type───────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ c1 │ Nullable(Int64) │ │ │ │ │ │ +│ c2 │ Nullable(String) │ │ │ │ │ │ +│ c3 │ Array(Nullable(Int64)) │ │ │ │ │ │ +└──────┴────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +``` + +## Settings for text formats {settings-for-text-formats} + +### input_format_max_rows_to_read_for_schema_inference + +This setting controls the maximum number of rows to be read while schema inference. +The more rows are read, the more time is spent on schema inference, but the greater the chance to +correctly determine the types (especially when the data contains a lot of nulls). + +Default value: `25000`. + +### column_names_for_schema_inference + +The list of column names to use in schema inference for formats without explicit column names. Specified names will be used instead of default `c1,c2,c3,...`. The format: `column1,column2,column3,...`. + +**Example** + +```sql +DESC format(TSV, 'Hello, World! 42 [1, 2, 3]') settings column_names_for_schema_inference = 'str,int,arr' +``` +```response +┌─name─┬─type───────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ str │ Nullable(String) │ │ │ │ │ │ +│ int │ Nullable(Int64) │ │ │ │ │ │ +│ arr │ Array(Nullable(Int64)) │ │ │ │ │ │ +└──────┴────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +``` + +### schema_inference_hints + +The list of column names and types to use in schema inference instead of automatically determined types. The format: 'column_name1 column_type1, column_name2 column_type2, ...'. +This setting can be used to specify the types of columns that could not be determined automatically or for optimizing the schema. + +**Example** + +```sql +DESC format(JSONEachRow, '{"id" : 1, "age" : 25, "name" : "Josh", "status" : null, "hobbies" : ["football", "cooking"]}' +SETTINGS schema_inference_hints = 'age LowCardinality(UInt8), status Nullable(String)' +``` +```response +┌─name────┬─type────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ id │ Nullable(Int64) │ │ │ │ │ │ +│ age │ LowCardinality(UInt8) │ │ │ │ │ │ +│ name │ Nullable(String) │ │ │ │ │ │ +│ status │ Nullable(String) │ │ │ │ │ │ +│ hobbies │ Array(Nullable(String)) │ │ │ │ │ │ +└─────────┴─────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +``` + +### schema_inference_make_columns_nullable + +Controls making inferred types `Nullable` in schema inference for formats without information about nullability. +If the setting is enabled, all inferred type will be `Nullable`, if disabled, the inferred type will be `Nullable` only if the column contains `NULL` in a sample that is parsed during schema inference. + +Enabled by default. + +**Examples** + +```sql +SET schema_inference_make_columns_nullable = 1 +DESC format(JSONEachRow, $$ + {"id" : 1, "age" : 25, "name" : "Josh", "status" : null, "hobbies" : ["football", "cooking"]} + {"id" : 2, "age" : 19, "name" : "Alan", "status" : "married", "hobbies" : ["tennis", "art"]} + $$) +``` +```response +┌─name────┬─type────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ id │ Nullable(Int64) │ │ │ │ │ │ +│ age │ Nullable(Int64) │ │ │ │ │ │ +│ name │ Nullable(String) │ │ │ │ │ │ +│ status │ Nullable(String) │ │ │ │ │ │ +│ hobbies │ Array(Nullable(String)) │ │ │ │ │ │ +└─────────┴─────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +``` +```sql +SET schema_inference_make_columns_nullable = 0 +DESC format(JSONEachRow, $$ + {"id" : 1, "age" : 25, "name" : "Josh", "status" : null, "hobbies" : ["football", "cooking"]} + {"id" : 2, "age" : 19, "name" : "Alan", "status" : "married", "hobbies" : ["tennis", "art"]} + $$) +``` +```response + +┌─name────┬─type─────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ id │ Int64 │ │ │ │ │ │ +│ age │ Int64 │ │ │ │ │ │ +│ name │ String │ │ │ │ │ │ +│ status │ Nullable(String) │ │ │ │ │ │ +│ hobbies │ Array(String) │ │ │ │ │ │ +└─────────┴──────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +``` + +### input_format_try_infer_integers + +If enabled, ClickHouse will try to infer integers instead of floats in schema inference for text formats. +If all numbers in the column from sample data are integers, the result type will be `Int64`, if at least one number is float, the result type will be `Float64`. +If the sample data contains only integers and at least one integer is positive and overflows `Int64`, ClickHouse will infer `UInt64`. + +Enabled by default. + +**Examples** + +```sql +SET input_format_try_infer_integers = 0 +DESC format(JSONEachRow, $$ + {"number" : 1} + {"number" : 2} + $$) +``` +```response +┌─name───┬─type──────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ number │ Nullable(Float64) │ │ │ │ │ │ +└────────┴───────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +``` +```sql +SET input_format_try_infer_integers = 1 +DESC format(JSONEachRow, $$ + {"number" : 1} + {"number" : 2} + $$) +``` +```response +┌─name───┬─type────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ number │ Nullable(Int64) │ │ │ │ │ │ +└────────┴─────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +``` +```sql +DESC format(JSONEachRow, $$ + {"number" : 1} + {"number" : 18446744073709551615} + $$) +``` +```response +┌─name───┬─type─────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ number │ Nullable(UInt64) │ │ │ │ │ │ +└────────┴──────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +``` +```sql +DESC format(JSONEachRow, $$ + {"number" : 1} + {"number" : 2.2} + $$) +``` +```response +┌─name───┬─type──────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ number │ Nullable(Float64) │ │ │ │ │ │ +└────────┴───────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +``` + +### input_format_try_infer_datetimes + +If enabled, ClickHouse will try to infer type `DateTime64` from string fields in schema inference for text formats. +If all fields from a column in sample data were successfully parsed as datetimes, the result type will be `DateTime64(9)`, +if at least one field was not parsed as datetime, the result type will be `String`. + +Enabled by default. + +**Examples** + +```sql +SET input_format_try_infer_datetimes = 0 +DESC format(JSONEachRow, $$ + {"datetime" : "2021-01-01 00:00:00.000"} + {"datetime" : "2022-01-01 00:00:00.000"} + $$) +``` +```response +┌─name─────┬─type─────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ datetime │ Nullable(String) │ │ │ │ │ │ +└──────────┴──────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +``` +```sql +SET input_format_try_infer_datetimes = 1 +DESC format(JSONEachRow, $$ + {"datetime" : "2021-01-01 00:00:00.000"} + {"datetime" : "2022-01-01 00:00:00.000"} + $$) +``` +```response +┌─name─────┬─type────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ datetime │ Nullable(DateTime64(9)) │ │ │ │ │ │ +└──────────┴─────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +``` +```sql +DESC format(JSONEachRow, $$ + {"datetime" : "2021-01-01 00:00:00.000"} + {"datetime" : "unknown"} + $$) +``` +```response +┌─name─────┬─type─────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ datetime │ Nullable(String) │ │ │ │ │ │ +└──────────┴──────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +``` + +Note: Parsing datetimes during schema inference respect setting [date_time_input_format](/docs/en/operations/settings/settings.md#date_time_input_format) + +### input_format_try_infer_dates + +If enabled, ClickHouse will try to infer type `Date` from string fields in schema inference for text formats. +If all fields from a column in sample data were successfully parsed as dates, the result type will be `Date`, +if at least one field was not parsed as date, the result type will be `String`. + +Enabled by default. + +**Examples** + +```sql +SET input_format_try_infer_datetimes = 0, input_format_try_infer_dates = 0 +DESC format(JSONEachRow, $$ + {"date" : "2021-01-01"} + {"date" : "2022-01-01"} + $$) +``` +```response +┌─name─┬─type─────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ date │ Nullable(String) │ │ │ │ │ │ +└──────┴──────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +``` +```sql +SET input_format_try_infer_dates = 1 +DESC format(JSONEachRow, $$ + {"date" : "2021-01-01"} + {"date" : "2022-01-01"} + $$) +``` +```response +┌─name─┬─type───────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ date │ Nullable(Date) │ │ │ │ │ │ +└──────┴────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +``` +```sql +DESC format(JSONEachRow, $$ + {"date" : "2021-01-01"} + {"date" : "unknown"} + $$) +``` +```response +┌─name─┬─type─────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ date │ Nullable(String) │ │ │ │ │ │ +└──────┴──────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +``` + +# Self describing formats {#self-describing-formats} + +Self-describing formats contain information about the structure of the data in the data itself, +it can be some header with a description, a binary type tree, or some kind of table. +To automatically infer a schema from files in such formats, ClickHouse reads a part of the data containing +information about the types and converts it into a schema of the ClickHouse table. + +## Formats with -WithNamesAndTypes suffix {#formats-with-names-and-types} + +ClickHouse supports some text formats with the suffix -WithNamesAndTypes. This suffix means that the data contains two additional rows with column names and types before the actual data. +While schema inference for such formats, ClickHouse reads the first two rows and extracts column names and types. + +**Example** + +```sql +DESC format(TSVWithNamesAndTypes, +$$num str arr +UInt8 String Array(UInt8) +42 Hello, World! [1,2,3] +$$) +``` +```response +┌─name─┬─type─────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ num │ UInt8 │ │ │ │ │ │ +│ str │ String │ │ │ │ │ │ +│ arr │ Array(UInt8) │ │ │ │ │ │ +└──────┴──────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +``` + +## JSON formats with metadata {#json-with-metadata} + +Some JSON input formats ([JSON](formats.md#json), [JSONCompact](formats.md#json-compact), [JSONColumnsWithMetadata](formats.md#jsoncolumnswithmetadata)) contain metadata with column names and types. +In schema inference for such formats, ClickHouse reads this metadata. + +**Example** +```sql +DESC format(JSON, $$ +{ + "meta": + [ + { + "name": "num", + "type": "UInt8" + }, + { + "name": "str", + "type": "String" + }, + { + "name": "arr", + "type": "Array(UInt8)" + } + ], + + "data": + [ + { + "num": 42, + "str": "Hello, World", + "arr": [1,2,3] + } + ], + + "rows": 1, + + "statistics": + { + "elapsed": 0.005723915, + "rows_read": 1, + "bytes_read": 1 + } +} +$$) +``` +```response +┌─name─┬─type─────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ num │ UInt8 │ │ │ │ │ │ +│ str │ String │ │ │ │ │ │ +│ arr │ Array(UInt8) │ │ │ │ │ │ +└──────┴──────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +``` + +## Avro {#avro} + +In Avro format ClickHouse reads its schema from the data and converts it to ClickHouse schema using the following type matches: + +| Avro data type | ClickHouse data type | +|------------------------------------|--------------------------------------------------------------------------------| +| `boolean` | [Bool](../sql-reference/data-types/boolean.md) | +| `int` | [Int32](../sql-reference/data-types/int-uint.md) | +| `long` | [Int64](../sql-reference/data-types/int-uint.md) | +| `float` | [Float32](../sql-reference/data-types/float.md) | +| `double` | [Float64](../sql-reference/data-types/float.md) | +| `bytes`, `string` | [String](../sql-reference/data-types/string.md) | +| `fixed` | [FixedString(N)](../sql-reference/data-types/fixedstring.md) | +| `enum` | [Enum](../sql-reference/data-types/enum.md) | +| `array(T)` | [Array(T)](../sql-reference/data-types/array.md) | +| `union(null, T)`, `union(T, null)` | [Nullable(T)](../sql-reference/data-types/date.md) | +| `null` | [Nullable(Nothing)](../sql-reference/data-types/special-data-types/nothing.md) | + +Other Avro types are not supported. + +## Parquet {#parquet} + +In Parquet format ClickHouse reads its schema from the data and converts it to ClickHouse schema using the following type matches: + +| Parquet data type | ClickHouse data type | +|------------------------------|---------------------------------------------------------| +| `BOOL` | [Bool](../sql-reference/data-types/boolean.md) | +| `UINT8` | [UInt8](../sql-reference/data-types/int-uint.md) | +| `INT8` | [Int8](../sql-reference/data-types/int-uint.md) | +| `UINT16` | [UInt16](../sql-reference/data-types/int-uint.md) | +| `INT16` | [Int16](../sql-reference/data-types/int-uint.md) | +| `UINT32` | [UInt32](../sql-reference/data-types/int-uint.md) | +| `INT32` | [Int32](../sql-reference/data-types/int-uint.md) | +| `UINT64` | [UInt64](../sql-reference/data-types/int-uint.md) | +| `INT64` | [Int64](../sql-reference/data-types/int-uint.md) | +| `FLOAT` | [Float32](../sql-reference/data-types/float.md) | +| `DOUBLE` | [Float64](../sql-reference/data-types/float.md) | +| `DATE` | [Date32](../sql-reference/data-types/date32.md) | +| `TIME (ms)` | [DateTime](../sql-reference/data-types/datetime.md) | +| `TIMESTAMP`, `TIME (us, ns)` | [DateTime64](../sql-reference/data-types/datetime64.md) | +| `STRING`, `BINARY` | [String](../sql-reference/data-types/string.md) | +| `DECIMAL` | [Decimal](../sql-reference/data-types/decimal.md) | +| `LIST` | [Array](../sql-reference/data-types/array.md) | +| `STRUCT` | [Tuple](../sql-reference/data-types/tuple.md) | +| `MAP` | [Map](../sql-reference/data-types/map.md) | + +Other Parquet types are not supported. By default, all inferred types are inside `Nullable`, but it can be changed using the setting `schema_inference_make_columns_nullable`. + +## Arrow {#arrow} + +In Arrow format ClickHouse reads its schema from the data and converts it to ClickHouse schema using the following type matches: + +| Arrow data type | ClickHouse data type | +|---------------------------------|---------------------------------------------------------| +| `BOOL` | [Bool](../sql-reference/data-types/boolean.md) | +| `UINT8` | [UInt8](../sql-reference/data-types/int-uint.md) | +| `INT8` | [Int8](../sql-reference/data-types/int-uint.md) | +| `UINT16` | [UInt16](../sql-reference/data-types/int-uint.md) | +| `INT16` | [Int16](../sql-reference/data-types/int-uint.md) | +| `UINT32` | [UInt32](../sql-reference/data-types/int-uint.md) | +| `INT32` | [Int32](../sql-reference/data-types/int-uint.md) | +| `UINT64` | [UInt64](../sql-reference/data-types/int-uint.md) | +| `INT64` | [Int64](../sql-reference/data-types/int-uint.md) | +| `FLOAT`, `HALF_FLOAT` | [Float32](../sql-reference/data-types/float.md) | +| `DOUBLE` | [Float64](../sql-reference/data-types/float.md) | +| `DATE32` | [Date32](../sql-reference/data-types/date32.md) | +| `DATE64` | [DateTime](../sql-reference/data-types/datetime.md) | +| `TIMESTAMP`, `TIME32`, `TIME64` | [DateTime64](../sql-reference/data-types/datetime64.md) | +| `STRING`, `BINARY` | [String](../sql-reference/data-types/string.md) | +| `DECIMAL128`, `DECIMAL256` | [Decimal](../sql-reference/data-types/decimal.md) | +| `LIST` | [Array](../sql-reference/data-types/array.md) | +| `STRUCT` | [Tuple](../sql-reference/data-types/tuple.md) | +| `MAP` | [Map](../sql-reference/data-types/map.md) | + +Other Arrow types are not supported. By default, all inferred types are inside `Nullable`, but it can be changed using the setting `schema_inference_make_columns_nullable`. + +## ORC {#orc} + +In ORC format ClickHouse reads its schema from the data and converts it to ClickHouse schema using the following type matches: + +| ORC data type | ClickHouse data type | +|--------------------------------------|---------------------------------------------------------| +| `Boolean` | [Bool](../sql-reference/data-types/boolean.md) | +| `Tinyint` | [Int8](../sql-reference/data-types/int-uint.md) | +| `Smallint` | [Int16](../sql-reference/data-types/int-uint.md) | +| `Int` | [Int32](../sql-reference/data-types/int-uint.md) | +| `Bigint` | [Int64](../sql-reference/data-types/int-uint.md) | +| `Float` | [Float32](../sql-reference/data-types/float.md) | +| `Double` | [Float64](../sql-reference/data-types/float.md) | +| `Date` | [Date32](../sql-reference/data-types/date32.md) | +| `Timestamp` | [DateTime64](../sql-reference/data-types/datetime64.md) | +| `String`, `Char`, `Varchar`,`BINARY` | [String](../sql-reference/data-types/string.md) | +| `Decimal` | [Decimal](../sql-reference/data-types/decimal.md) | +| `List` | [Array](../sql-reference/data-types/array.md) | +| `Struct` | [Tuple](../sql-reference/data-types/tuple.md) | +| `Map` | [Map](../sql-reference/data-types/map.md) | + +Other ORC types are not supported. By default, all inferred types are inside `Nullable`, but it can be changed using the setting `schema_inference_make_columns_nullable`. + +## Native {#native} + +Native format is used inside ClickHouse and contains the schema in the data. +In schema inference, ClickHouse reads the schema from the data without any transformations. + +# Formats with external schema {#formats-with-external-schema} + +Such formats require a schema describing the data in a separate file in a specific schema language. +To automatically infer a schema from files in such formats, ClickHouse reads external schema from a separate file and transforms it to a ClickHouse table schema. + +# Protobuf {#protobuf} + +In schema inference for Protobuf format ClickHouse uses the following type matches: + +| Protobuf data type | ClickHouse data type | +|-------------------------------|---------------------------------------------------| +| `bool` | [UInt8](../sql-reference/data-types/int-uint.md) | +| `float` | [Float32](../sql-reference/data-types/float.md) | +| `double` | [Float64](../sql-reference/data-types/float.md) | +| `int32`, `sint32`, `sfixed32` | [Int32](../sql-reference/data-types/int-uint.md) | +| `int64`, `sint64`, `sfixed64` | [Int64](../sql-reference/data-types/int-uint.md) | +| `uint32`, `fixed32` | [UInt32](../sql-reference/data-types/int-uint.md) | +| `uint64`, `fixed64` | [UInt64](../sql-reference/data-types/int-uint.md) | +| `string`, `bytes` | [String](../sql-reference/data-types/string.md) | +| `enum` | [Enum](../sql-reference/data-types/enum.md) | +| `repeated T` | [Array(T)](../sql-reference/data-types/array.md) | +| `message`, `group` | [Tuple](../sql-reference/data-types/tuple.md) | + +# CapnProto {#capnproto} + +In schema inference for CapnProto format ClickHouse uses the following type matches: + +| CapnProto data type | ClickHouse data type | +|------------------------------------|--------------------------------------------------------| +| `Bool` | [UInt8](../sql-reference/data-types/int-uint.md) | +| `Int8` | [Int8](../sql-reference/data-types/int-uint.md) | +| `UInt8` | [UInt8](../sql-reference/data-types/int-uint.md) | +| `Int16` | [Int16](../sql-reference/data-types/int-uint.md) | +| `UInt16` | [UInt16](../sql-reference/data-types/int-uint.md) | +| `Int32` | [Int32](../sql-reference/data-types/int-uint.md) | +| `UInt32` | [UInt32](../sql-reference/data-types/int-uint.md) | +| `Int64` | [Int64](../sql-reference/data-types/int-uint.md) | +| `UInt64` | [UInt64](../sql-reference/data-types/int-uint.md) | +| `Float32` | [Float32](../sql-reference/data-types/float.md) | +| `Float64` | [Float64](../sql-reference/data-types/float.md) | +| `Text`, `Data` | [String](../sql-reference/data-types/string.md) | +| `enum` | [Enum](../sql-reference/data-types/enum.md) | +| `List` | [Array](../sql-reference/data-types/array.md) | +| `struct` | [Tuple](../sql-reference/data-types/tuple.md) | +| `union(T, Void)`, `union(Void, T)` | [Nullable(T)](../sql-reference/data-types/nullable.md) | + +# Strong-typed binary formats {#strong-typed-binary-formats} + +In such formats, each serialized value contains information about its type (and possibly about its name), but there is no information about the whole table. +In schema inference for such formats, ClickHouse reads data row by row (up to `input_format_max_rows_to_read_for_schema_inference` rows) and extracts +the type (and possibly name) for each value from the data and then converts these types to ClickHouse types. + +## MsgPack {msgpack} + +In MsgPack format there is no delimiter between rows, to use schema inference for this format you should specify the number of columns in the table +using the setting `input_format_msgpack_number_of_columns`. ClickHouse uses the following type matches: + +| MessagePack data type (`INSERT`) | ClickHouse data type | +|--------------------------------------------------------------------|-----------------------------------------------------------| +| `int N`, `uint N`, `negative fixint`, `positive fixint` | [Int64](../sql-reference/data-types/int-uint.md) | +| `bool` | [UInt8](../sql-reference/data-types/int-uint.md) | +| `fixstr`, `str 8`, `str 16`, `str 32`, `bin 8`, `bin 16`, `bin 32` | [String](../sql-reference/data-types/string.md) | +| `float 32` | [Float32](../sql-reference/data-types/float.md) | +| `float 64` | [Float64](../sql-reference/data-types/float.md) | +| `uint 16` | [Date](../sql-reference/data-types/date.md) | +| `uint 32` | [DateTime](../sql-reference/data-types/datetime.md) | +| `uint 64` | [DateTime64](../sql-reference/data-types/datetime.md) | +| `fixarray`, `array 16`, `array 32` | [Array](../sql-reference/data-types/array.md) | +| `fixmap`, `map 16`, `map 32` | [Map](../sql-reference/data-types/map.md) | + +By default, all inferred types are inside `Nullable`, but it can be changed using the setting `schema_inference_make_columns_nullable`. + +## BSONEachRow {#bsoneachrow} + +In BSONEachRow each row of data is presented as a BSON document. In schema inference ClickHouse reads BSON documents one by one and extracts +values, names, and types from the data and then transforms these types to ClickHouse types using the following type matches: + +| BSON Type | ClickHouse type | +|-----------------------------------------------------------------------------------------------|-----------------------------------------------------------------------------------------------------------------------------| +| `\x08` boolean | [Bool](../sql-reference/data-types/boolean.md) | +| `\x10` int32 | [Int32](../sql-reference/data-types/int-uint.md) | +| `\x12` int64 | [Int64](../sql-reference/data-types/int-uint.md) | +| `\x01` double | [Float64](../sql-reference/data-types/float.md) | +| `\x09` datetime | [DateTime64](../sql-reference/data-types/datetime64.md) | +| `\x05` binary with`\x00` binary subtype, `\x02` string, `\x0E` symbol, `\x0D` JavaScript code | [String](../sql-reference/data-types/string.md) | +| `\x07` ObjectId, | [FixedString(12)](../sql-reference/data-types/fixedstring.md) | +| `\x05` binary with `\x04` uuid subtype, size = 16 | [UUID](../sql-reference/data-types/uuid.md) | +| `\x04` array | [Array](../sql-reference/data-types/array.md)/[Tuple](../sql-reference/data-types/tuple.md) (if nested types are different) | +| `\x03` document | [Named Tuple](../sql-reference/data-types/tuple.md)/[Map](../sql-reference/data-types/map.md) (with String keys) | + +By default, all inferred types are inside `Nullable`, but it can be changed using the setting `schema_inference_make_columns_nullable`. + +# Formats with constant schema {#formats-with-constant-schema} + +Data in such formats always have the same schema. + +## LineAsString {#line-as-string} + +In this format, ClickHouse reads the whole line from the data into a single column with `String` data type. The inferred type for this format is always `String` and the column name is `line`. + +**Example** + +```sql +DESC format(LineAsString, 'Hello\nworld!') +``` +```response +┌─name─┬─type───┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ line │ String │ │ │ │ │ │ +└──────┴────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +``` + +## JSONAsString {#json-as-string} + +In this format, ClickHouse reads the whole JSON object from the data into a single column with `String` data type. The inferred type for this format is always `String` and the column name is `json`. + +**Example** + +```sql +DESC format(JSONAsString, '{"x" : 42, "y" : "Hello, World!"}') +``` +```response +┌─name─┬─type───┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ json │ String │ │ │ │ │ │ +└──────┴────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +``` + +## JSONAsObject {#json-as-object} + +In this format, ClickHouse reads the whole JSON object from the data into a single column with `Object('json')` data type. Inferred type for this format is always `String` and the column name is `json`. + +Note: This format works only if `allow_experimental_object_type` is enabled. + +**Example** + +```sql +DESC format(JSONAsString, '{"x" : 42, "y" : "Hello, World!"}') SETTINGS allow_experimental_object_type=1 +``` +```response +┌─name─┬─type───────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ json │ Object('json') │ │ │ │ │ │ +└──────┴────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +``` + +[Original article](https://clickhouse.com/docs/en/interfaces/schema-inference) diff --git a/docs/en/operations/backup.md b/docs/en/operations/backup.md index 061d95c1152..4feb434d762 100644 --- a/docs/en/operations/backup.md +++ b/docs/en/operations/backup.md @@ -9,6 +9,29 @@ slug: /en/operations/backup - [Backup/restore using an S3 disk](#backuprestore-using-an-s3-disk) - [Alternatives](#alternatives) +## Command summary + +```bash + BACKUP|RESTORE + TABLE [db.]table_name [AS [db.]table_name_in_backup] + [PARTITION[S] partition_expr [,...]] | + DICTIONARY [db.]dictionary_name [AS [db.]name_in_backup] | + DATABASE database_name [AS database_name_in_backup] + [EXCEPT TABLES ...] | + TEMPORARY TABLE table_name [AS table_name_in_backup] | + VIEW view_name [AS view_name_in_backup] + ALL TEMPORARY TABLES [EXCEPT ...] | + ALL DATABASES [EXCEPT ...] } [,...] + [ON CLUSTER 'cluster_name'] + TO|FROM File('/') | Disk('', '/') | S3('/', '', '') + [SETTINGS base_backup = File('/') | Disk(...) | S3('/', '', '')] + +``` + +:::note ALL +`ALL` is only applicable to the `RESTORE` command. +::: + ## Background While [replication](../engines/table-engines/mergetree-family/replication.md) provides protection from hardware failures, it does not protect against human errors: accidental deletion of data, deletion of the wrong table or a table on the wrong cluster, and software bugs that result in incorrect data processing or data corruption. In many cases mistakes like these will affect all replicas. ClickHouse has built-in safeguards to prevent some types of mistakes — for example, by default [you can’t just drop tables with a MergeTree-like engine containing more than 50 Gb of data](server-configuration-parameters/settings.md#max-table-size-to-drop). However, these safeguards do not cover all possible cases and can be circumvented. diff --git a/docs/en/operations/settings/merge-tree-settings.md b/docs/en/operations/settings/merge-tree-settings.md index 338ecf9ffd3..9f99570fb22 100644 --- a/docs/en/operations/settings/merge-tree-settings.md +++ b/docs/en/operations/settings/merge-tree-settings.md @@ -127,6 +127,13 @@ Default value: 100000. A large number of parts in a table reduces performance of ClickHouse queries and increases ClickHouse boot time. Most often this is a consequence of an incorrect design (mistakes when choosing a partitioning strategy - too small partitions). +## simultaneous_parts_removal_limit {#simultaneous-parts-removal-limit} + +If there are a lot of outdated parts cleanup thread will try to delete up to `simultaneous_parts_removal_limit` parts during one iteration. +`simultaneous_parts_removal_limit` set to `0` means unlimited. + +Default value: 0. + ## replicated_deduplication_window {#replicated-deduplication-window} The number of most recently inserted blocks for which ClickHouse Keeper stores hash sums to check for duplicates. diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index 35d6f47852a..895e071a560 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -1011,6 +1011,12 @@ The default value is 7500. The smaller the value, the more often data is flushed into the table. Setting the value too low leads to poor performance. +## stream_poll_timeout_ms {#stream_poll_timeout_ms} + +Timeout for polling data from/to streaming storages. + +Default value: 500. + ## load_balancing {#settings-load_balancing} Specifies the algorithm of replicas selection that is used for distributed query processing. @@ -3625,7 +3631,7 @@ z IPv4 Controls making inferred types `Nullable` in schema inference for formats without information about nullability. If the setting is enabled, the inferred type will be `Nullable` only if column contains `NULL` in a sample that is parsed during schema inference. -Default value: `false`. +Default value: `true`. ## input_format_try_infer_integers {#input_format_try_infer_integers} diff --git a/docs/en/operations/system-tables/schema_inference_cache.md b/docs/en/operations/system-tables/schema_inference_cache.md new file mode 100644 index 00000000000..31b0671dc34 --- /dev/null +++ b/docs/en/operations/system-tables/schema_inference_cache.md @@ -0,0 +1,70 @@ +--- +slug: /en/operations/system-tables/schema_inference_cache +--- +# Schema inference cache + +Contains information about all cached file schemas. + +Columns: +- `storage` ([String](/docs/en/sql-reference/data-types/string.md)) — Storage name: File, URL, S3 or HDFS. +- `source` ([String](/docs/en/sql-reference/data-types/string.md)) — File source. +- `format` ([String](/docs/en/sql-reference/data-types/string.md)) — Format name. +- `additional_format_info` ([String](/docs/en/sql-reference/data-types/string.md)) - Additional information required to identify the schema. For example, format specific settings. +- `registration_time` ([DateTime](/docs/en/sql-reference/data-types/datetime.md)) — Timestamp when schema was added in cache. +- `schema` ([String](/docs/en/sql-reference/data-types/string.md)) - Cached schema. + +**Example** + +Let's say we have a file `data.jsonl` with this content: +```json +{"id" : 1, "age" : 25, "name" : "Josh", "hobbies" : ["football", "cooking", "music"]} +{"id" : 2, "age" : 19, "name" : "Alan", "hobbies" : ["tennis", "art"]} +{"id" : 3, "age" : 32, "name" : "Lana", "hobbies" : ["fitness", "reading", "shopping"]} +{"id" : 4, "age" : 47, "name" : "Brayan", "hobbies" : ["movies", "skydiving"]} +``` + +:::tip +Place `data.jsonl` in the `user_files_path` directory. You can find this by looking +in your ClickHouse configuration files. The default is: +``` +/var/lib/clickhouse/user_files/ +``` +::: + +Open `clickhouse-client` and run the `DESCRIBE` query: + +```sql +DESCRIBE file('data.jsonl') SETTINGS input_format_try_infer_integers=0; +``` + +```response +┌─name────┬─type────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ id │ Nullable(Float64) │ │ │ │ │ │ +│ age │ Nullable(Float64) │ │ │ │ │ │ +│ name │ Nullable(String) │ │ │ │ │ │ +│ hobbies │ Array(Nullable(String)) │ │ │ │ │ │ +└─────────┴─────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +``` + +Let's see the content of the `system.schema_inference_cache` table: + +```sql +SELECT * +FROM system.schema_inference_cache +FORMAT Vertical +``` +```response +Row 1: +────── +storage: File +source: /home/droscigno/user_files/data.jsonl +format: JSONEachRow +additional_format_info: schema_inference_hints=, max_rows_to_read_for_schema_inference=25000, schema_inference_make_columns_nullable=true, try_infer_integers=false, try_infer_dates=true, try_infer_datetimes=true, try_infer_numbers_from_strings=true, read_bools_as_numbers=true, try_infer_objects=false +registration_time: 2022-12-29 17:49:52 +schema: id Nullable(Float64), age Nullable(Float64), name Nullable(String), hobbies Array(Nullable(String)) +``` + + +**See also** +- [Automatic schema inference from input data](/docs/en/interfaces/schema-inference.md) + diff --git a/docs/en/sql-reference/aggregate-functions/reference/grouparraylast.md b/docs/en/sql-reference/aggregate-functions/reference/grouparraylast.md new file mode 100644 index 00000000000..1b9681dc852 --- /dev/null +++ b/docs/en/sql-reference/aggregate-functions/reference/grouparraylast.md @@ -0,0 +1,41 @@ +--- +slug: /en/sql-reference/aggregate-functions/reference/grouparraylast +sidebar_position: 110 +--- + +# groupArrayLast + +Syntax: `groupArrayLast(max_size)(x)` + +Creates an array of last argument values. +For example, `groupArrayLast(1)(x)` is equivalent to `[anyLast (x)]`. + +In some cases, you can still rely on the order of execution. This applies to cases when `SELECT` comes from a subquery that uses `ORDER BY`. + +**Example** + +Query: + +```sql +select groupArrayLast(2)(number+1) numbers from numbers(10) +``` + +Result: + +```text +┌─numbers─┐ +│ [9,10] │ +└─────────┘ +``` + +In compare to `groupArray`: + +```sql +select groupArray(2)(number+1) numbers from numbers(10) +``` + +```text +┌─numbers─┐ +│ [1,2] │ +└─────────┘ +``` diff --git a/docs/en/sql-reference/aggregate-functions/reference/index.md b/docs/en/sql-reference/aggregate-functions/reference/index.md index ee17c37100c..40161145419 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/index.md +++ b/docs/en/sql-reference/aggregate-functions/reference/index.md @@ -32,6 +32,7 @@ ClickHouse-specific aggregate functions: - [topK](../../../sql-reference/aggregate-functions/reference/topk.md) - [topKWeighted](../../../sql-reference/aggregate-functions/reference/topkweighted.md) - [groupArray](../../../sql-reference/aggregate-functions/reference/grouparray.md) +- [groupArrayLast](../../../sql-reference/aggregate-functions/reference/grouparraylast.md) - [groupUniqArray](../../../sql-reference/aggregate-functions/reference/groupuniqarray.md) - [groupArrayInsertAt](../../../sql-reference/aggregate-functions/reference/grouparrayinsertat.md) - [groupArrayMovingAvg](../../../sql-reference/aggregate-functions/reference/grouparraymovingavg.md) diff --git a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md index c5d48945649..897945a6d9d 100644 --- a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md +++ b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md @@ -825,6 +825,23 @@ Setting fields: The `table` or `where` fields cannot be used together with the `query` field. And either one of the `table` or `query` fields must be declared. ::: +## Null + +A special source that can be used to create dummy (empty) dictionaries. Such dictionaries can useful for tests or with setups with separated data and query nodes at nodes with Distributed tables. + +``` sql +CREATE DICTIONARY null_dict ( + id UInt64, + val UInt8, + default_val UInt8 DEFAULT 123, + nullable_val Nullable(UInt8) +) +PRIMARY KEY id +SOURCE(NULL()) +LAYOUT(FLAT()) +LIFETIME(0); +``` + ## Related Content -- [Using dictionaries to accelerate queries](https://clickhouse.com/blog/faster-queries-dictionaries-clickhouse) \ No newline at end of file +- [Using dictionaries to accelerate queries](https://clickhouse.com/blog/faster-queries-dictionaries-clickhouse) diff --git a/docs/en/sql-reference/functions/date-time-functions.md b/docs/en/sql-reference/functions/date-time-functions.md index be8e26daa87..89fa72de8bf 100644 --- a/docs/en/sql-reference/functions/date-time-functions.md +++ b/docs/en/sql-reference/functions/date-time-functions.md @@ -410,35 +410,35 @@ Converts a date with time to a certain fixed date, while preserving the time. ## toRelativeYearNum -Converts a date with time or date to the number of the year, starting from a certain fixed point in the past. +Converts a date or date with time to the number of the year, starting from a certain fixed point in the past. ## toRelativeQuarterNum -Converts a date with time or date to the number of the quarter, starting from a certain fixed point in the past. +Converts a date or date with time to the number of the quarter, starting from a certain fixed point in the past. ## toRelativeMonthNum -Converts a date with time or date to the number of the month, starting from a certain fixed point in the past. +Converts a date or date with time to the number of the month, starting from a certain fixed point in the past. ## toRelativeWeekNum -Converts a date with time or date to the number of the week, starting from a certain fixed point in the past. +Converts a date or date with time to the number of the week, starting from a certain fixed point in the past. ## toRelativeDayNum -Converts a date with time or date to the number of the day, starting from a certain fixed point in the past. +Converts a date or date with time to the number of the day, starting from a certain fixed point in the past. ## toRelativeHourNum -Converts a date with time or date to the number of the hour, starting from a certain fixed point in the past. +Converts a date or date with time to the number of the hour, starting from a certain fixed point in the past. ## toRelativeMinuteNum -Converts a date with time or date to the number of the minute, starting from a certain fixed point in the past. +Converts a date or date with time to the number of the minute, starting from a certain fixed point in the past. ## toRelativeSecondNum -Converts a date with time or date to the number of the second, starting from a certain fixed point in the past. +Converts a date or date with time to the number of the second, starting from a certain fixed point in the past. ## toISOYear @@ -517,6 +517,154 @@ SELECT toDate('2016-12-27') AS date, toYearWeek(date) AS yearWeek0, toYearWeek(d └────────────┴───────────┴───────────┴───────────┘ ``` +## age + +Returns the `unit` component of the difference between `startdate` and `enddate`. The difference is calculated using a precision of 1 second. +E.g. the difference between `2021-12-29` and `2022-01-01` is 3 days for `day` unit, 0 months for `month` unit, 0 years for `year` unit. + + +**Syntax** + +``` sql +age('unit', startdate, enddate, [timezone]) +``` + +**Arguments** + +- `unit` — The type of interval for result. [String](../../sql-reference/data-types/string.md). + Possible values: + + - `second` (possible abbreviations: `ss`, `s`) + - `minute` (possible abbreviations: `mi`, `n`) + - `hour` (possible abbreviations: `hh`, `h`) + - `day` (possible abbreviations: `dd`, `d`) + - `week` (possible abbreviations: `wk`, `ww`) + - `month` (possible abbreviations: `mm`, `m`) + - `quarter` (possible abbreviations: `qq`, `q`) + - `year` (possible abbreviations: `yyyy`, `yy`) + +- `startdate` — The first time value to subtract (the subtrahend). [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). + +- `enddate` — The second time value to subtract from (the minuend). [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). + +- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) (optional). If specified, it is applied to both `startdate` and `enddate`. If not specified, timezones of `startdate` and `enddate` are used. If they are not the same, the result is unspecified. [String](../../sql-reference/data-types/string.md). + +**Returned value** + +Difference between `enddate` and `startdate` expressed in `unit`. + +Type: [Int](../../sql-reference/data-types/int-uint.md). + +**Example** + +Query: + +``` sql +SELECT age('hour', toDateTime('2018-01-01 22:30:00'), toDateTime('2018-01-02 23:00:00')); +``` + +Result: + +``` text +┌─age('hour', toDateTime('2018-01-01 22:30:00'), toDateTime('2018-01-02 23:00:00'))─┐ +│ 24 │ +└───────────────────────────────────────────────────────────────────────────────────┘ +``` + +Query: + +``` sql +SELECT + toDate('2022-01-01') AS e, + toDate('2021-12-29') AS s, + age('day', s, e) AS day_age, + age('month', s, e) AS month__age, + age('year', s, e) AS year_age; +``` + +Result: + +``` text +┌──────────e─┬──────────s─┬─day_age─┬─month__age─┬─year_age─┐ +│ 2022-01-01 │ 2021-12-29 │ 3 │ 0 │ 0 │ +└────────────┴────────────┴─────────┴────────────┴──────────┘ +``` + + +## date\_diff + +Returns the count of the specified `unit` boundaries crossed between the `startdate` and `enddate`. +The difference is calculated using relative units, e.g. the difference between `2021-12-29` and `2022-01-01` is 3 days for day unit (see [toRelativeDayNum](#torelativedaynum)), 1 month for month unit (see [toRelativeMonthNum](#torelativemonthnum)), 1 year for year unit (see [toRelativeYearNum](#torelativeyearnum)). + +**Syntax** + +``` sql +date_diff('unit', startdate, enddate, [timezone]) +``` + +Aliases: `dateDiff`, `DATE_DIFF`. + +**Arguments** + +- `unit` — The type of interval for result. [String](../../sql-reference/data-types/string.md). + Possible values: + + - `second` (possible abbreviations: `ss`, `s`) + - `minute` (possible abbreviations: `mi`, `n`) + - `hour` (possible abbreviations: `hh`, `h`) + - `day` (possible abbreviations: `dd`, `d`) + - `week` (possible abbreviations: `wk`, `ww`) + - `month` (possible abbreviations: `mm`, `m`) + - `quarter` (possible abbreviations: `qq`, `q`) + - `year` (possible abbreviations: `yyyy`, `yy`) + +- `startdate` — The first time value to subtract (the subtrahend). [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). + +- `enddate` — The second time value to subtract from (the minuend). [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). + +- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) (optional). If specified, it is applied to both `startdate` and `enddate`. If not specified, timezones of `startdate` and `enddate` are used. If they are not the same, the result is unspecified. [String](../../sql-reference/data-types/string.md). + +**Returned value** + +Difference between `enddate` and `startdate` expressed in `unit`. + +Type: [Int](../../sql-reference/data-types/int-uint.md). + +**Example** + +Query: + +``` sql +SELECT dateDiff('hour', toDateTime('2018-01-01 22:00:00'), toDateTime('2018-01-02 23:00:00')); +``` + +Result: + +``` text +┌─dateDiff('hour', toDateTime('2018-01-01 22:00:00'), toDateTime('2018-01-02 23:00:00'))─┐ +│ 25 │ +└────────────────────────────────────────────────────────────────────────────────────────┘ +``` + +Query: + +``` sql +SELECT + toDate('2022-01-01') AS e, + toDate('2021-12-29') AS s, + dateDiff('day', s, e) AS day_diff, + dateDiff('month', s, e) AS month__diff, + dateDiff('year', s, e) AS year_diff; +``` + +Result: + +``` text +┌──────────e─┬──────────s─┬─day_diff─┬─month__diff─┬─year_diff─┐ +│ 2022-01-01 │ 2021-12-29 │ 3 │ 1 │ 1 │ +└────────────┴────────────┴──────────┴─────────────┴───────────┘ +``` + ## date\_trunc Truncates date and time data to the specified part of date. @@ -637,80 +785,6 @@ Result: └───────────────────────────────────────────────┘ ``` -## date\_diff - -Returns the difference between two dates or dates with time values. -The difference is calculated using relative units, e.g. the difference between `2022-01-01` and `2021-12-29` is 3 days for day unit (see [toRelativeDayNum](#torelativedaynum)), 1 month for month unit (see [toRelativeMonthNum](#torelativemonthnum)), 1 year for year unit (see [toRelativeYearNum](#torelativeyearnum)). - -**Syntax** - -``` sql -date_diff('unit', startdate, enddate, [timezone]) -``` - -Aliases: `dateDiff`, `DATE_DIFF`. - -**Arguments** - -- `unit` — The type of interval for result. [String](../../sql-reference/data-types/string.md). - Possible values: - - - `second` - - `minute` - - `hour` - - `day` - - `week` - - `month` - - `quarter` - - `year` - -- `startdate` — The first time value to subtract (the subtrahend). [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). - -- `enddate` — The second time value to subtract from (the minuend). [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). - -- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) (optional). If specified, it is applied to both `startdate` and `enddate`. If not specified, timezones of `startdate` and `enddate` are used. If they are not the same, the result is unspecified. [String](../../sql-reference/data-types/string.md). - -**Returned value** - -Difference between `enddate` and `startdate` expressed in `unit`. - -Type: [Int](../../sql-reference/data-types/int-uint.md). - -**Example** - -Query: - -``` sql -SELECT dateDiff('hour', toDateTime('2018-01-01 22:00:00'), toDateTime('2018-01-02 23:00:00')); -``` - -Result: - -``` text -┌─dateDiff('hour', toDateTime('2018-01-01 22:00:00'), toDateTime('2018-01-02 23:00:00'))─┐ -│ 25 │ -└────────────────────────────────────────────────────────────────────────────────────────┘ -``` - -Query: - -``` sql -SELECT - toDate('2022-01-01') AS e, - toDate('2021-12-29') AS s, - dateDiff('day', s, e) AS day_diff, - dateDiff('month', s, e) AS month__diff, - dateDiff('year', s, e) AS year_diff; -``` - -Result: - -``` text -┌──────────e─┬──────────s─┬─day_diff─┬─month__diff─┬─year_diff─┐ -│ 2022-01-01 │ 2021-12-29 │ 3 │ 1 │ 1 │ -└────────────┴────────────┴──────────┴─────────────┴───────────┘ -``` - ## date\_sub Subtracts the time interval or date interval from the provided date or date with time. @@ -1085,6 +1159,8 @@ SELECT timeSlots(toDateTime64('1980-12-12 21:01:02.1234', 4, 'UTC'), toDecimal64 Formats a Time according to the given Format string. Format is a constant expression, so you cannot have multiple formats for a single result column. +formatDateTime uses MySQL datetime format style, refer to https://dev.mysql.com/doc/refman/8.0/en/date-and-time-functions.html#function_date-format. + **Syntax** ``` sql @@ -1158,6 +1234,64 @@ Result: └─────────────────────────────────────────────────────────────────────┘ ``` +**See Also** + +- [formatDateTimeInJodaSyntax](##formatDateTimeInJodaSyntax) + + +## formatDateTimeInJodaSyntax + +Similar to formatDateTime, except that it formats datetime in Joda style instead of MySQL style. Refer to https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html. + + +**Replacement fields** + +Using replacement fields, you can define a pattern for the resulting string. + + +| Placeholder | Description | Presentation | Examples | +| ----------- | ----------- | ------------- | -------- | +| G | era | text | AD | +| C | century of era (>=0) | number | 20 | +| Y | year of era (>=0) | year | 1996 | +| x | weekyear(not supported yet) | year | 1996 | +| w | week of weekyear(not supported yet) | number | 27 | +| e | day of week | number | 2 | +| E | day of week | text | Tuesday; Tue | +| y | year | year | 1996 | +| D | day of year | number | 189 | +| M | month of year | month | July; Jul; 07 | +| d | day of month | number | 10 | +| a | halfday of day | text | PM | +| K | hour of halfday (0~11) | number | 0 | +| h | clockhour of halfday (1~12) | number | 12 | +| H | hour of day (0~23) | number | 0 | +| k | clockhour of day (1~24) | number | 24 | +| m | minute of hour | number | 30 | +| s | second of minute | number | 55 | +| S | fraction of second(not supported yet) | number | 978 | +| z | time zone(short name not supported yet) | text | Pacific Standard Time; PST | +| Z | time zone offset/id(not supported yet) | zone | -0800; -08:00; America/Los_Angeles | +| ' | escape for text | delimiter| | +| '' | single quote | literal | ' | + +**Example** + +Query: + +``` sql +SELECT formatDateTimeInJodaSyntax(toDateTime('2010-01-04 12:34:56'), 'yyyy-MM-dd HH:mm:ss') +``` + +Result: + +``` +┌─formatDateTimeInJodaSyntax(toDateTime('2010-01-04 12:34:56'), 'yyyy-MM-dd HH:mm:ss')─┐ +│ 2010-01-04 12:34:56 │ +└─────────────────────────────────────────────────────────────────────────────────────────┘ +``` + + ## dateName Returns specified part of date. @@ -1241,6 +1375,8 @@ Result: Function converts Unix timestamp to a calendar date and a time of a day. When there is only a single argument of [Integer](../../sql-reference/data-types/int-uint.md) type, it acts in the same way as [toDateTime](../../sql-reference/functions/type-conversion-functions.md#todatetime) and return [DateTime](../../sql-reference/data-types/datetime.md) type. +FROM_UNIXTIME uses MySQL datetime format style, refer to https://dev.mysql.com/doc/refman/8.0/en/date-and-time-functions.html#function_date-format. + Alias: `fromUnixTimestamp`. **Example:** @@ -1273,6 +1409,28 @@ SELECT FROM_UNIXTIME(1234334543, '%Y-%m-%d %R:%S') AS DateTime; └─────────────────────┘ ``` +**See Also** + +- [fromUnixTimestampInJodaSyntax](##fromUnixTimestampInJodaSyntax) + + +## fromUnixTimestampInJodaSyntax +Similar to FROM_UNIXTIME, except that it formats time in Joda style instead of MySQL style. Refer to https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html. + +**Example:** +Query: +``` sql +SELECT fromUnixTimestampInJodaSyntax(1669804872, 'yyyy-MM-dd HH:mm:ss', 'UTC'); +``` + +Result: +``` +┌─fromUnixTimestampInJodaSyntax(1669804872, 'yyyy-MM-dd HH:mm:ss', 'UTC')─┐ +│ 2022-11-30 10:41:12 │ +└────────────────────────────────────────────────────────────────────────────┘ +``` + + ## toModifiedJulianDay Converts a [Proleptic Gregorian calendar](https://en.wikipedia.org/wiki/Proleptic_Gregorian_calendar) date in text form `YYYY-MM-DD` to a [Modified Julian Day](https://en.wikipedia.org/wiki/Julian_day#Variants) number in Int32. This function supports date from `0000-01-01` to `9999-12-31`. It raises an exception if the argument cannot be parsed as a date, or the date is invalid. diff --git a/docs/en/sql-reference/functions/random-functions.md b/docs/en/sql-reference/functions/random-functions.md index 4efa2131eb6..f0c0d3e4802 100644 --- a/docs/en/sql-reference/functions/random-functions.md +++ b/docs/en/sql-reference/functions/random-functions.md @@ -68,6 +68,483 @@ Result: └────────────┴────────────┴──────────────┴────────────────┴─────────────────┴──────────────────────┘ ``` +# Functions for Generating Random Numbers based on Distributions + +:::note +These functions are available starting from 22.10. +::: + + + +## randUniform + +Return random number based on [continuous uniform distribution](https://en.wikipedia.org/wiki/Continuous_uniform_distribution) in a specified range from `min` to `max`. + +**Syntax** + +``` sql +randUniform(min, max) +``` + +**Arguments** + +- `min` - `Float64` - min value of the range, +- `max` - `Float64` - max value of the range. + +**Returned value** + +- Pseudo-random number. + +Type: [Float64](/docs/en/sql-reference/data-types/float.md). + +**Example** + +Query: + +``` sql +SELECT randUniform(5.5, 10) FROM numbers(5) +``` + +Result: + +``` text +┌─randUniform(5.5, 10)─┐ +│ 8.094978491443102 │ +│ 7.3181248914450885 │ +│ 7.177741903868262 │ +│ 6.483347380953762 │ +│ 6.122286382885112 │ +└──────────────────────┘ +``` + + + +## randNormal + +Return random number based on [normal distribution](https://en.wikipedia.org/wiki/Normal_distribution). + +**Syntax** + +``` sql +randNormal(meam, variance) +``` + +**Arguments** + +- `meam` - `Float64` mean value of distribution, +- `variance` - `Float64` - [variance](https://en.wikipedia.org/wiki/Variance). + +**Returned value** + +- Pseudo-random number. + +Type: [Float64](/docs/en/sql-reference/data-types/float.md). + +**Example** + +Query: + +``` sql +SELECT randNormal(10, 2) FROM numbers(5) +``` + +Result: + +``` text +┌──randNormal(10, 2)─┐ +│ 13.389228911709653 │ +│ 8.622949707401295 │ +│ 10.801887062682981 │ +│ 4.5220192605895315 │ +│ 10.901239123982567 │ +└────────────────────┘ +``` + + + +## randLogNormal + +Return random number based on [log-normal distribution](https://en.wikipedia.org/wiki/Log-normal_distribution). + +**Syntax** + +``` sql +randLogNormal(meam, variance) +``` + +**Arguments** + +- `meam` - `Float64` mean value of distribution, +- `variance` - `Float64` - [variance](https://en.wikipedia.org/wiki/Variance). + +**Returned value** + +- Pseudo-random number. + +Type: [Float64](/docs/en/sql-reference/data-types/float.md). + +**Example** + +Query: + +``` sql +SELECT randLogNormal(100, 5) FROM numbers(5) +``` + +Result: + +``` text +┌─randLogNormal(100, 5)─┐ +│ 1.295699673937363e48 │ +│ 9.719869109186684e39 │ +│ 6.110868203189557e42 │ +│ 9.912675872925529e39 │ +│ 2.3564708490552458e42 │ +└───────────────────────┘ +``` + + + +## randBinomial + +Return random number based on [binomial distribution](https://en.wikipedia.org/wiki/Binomial_distribution). + +**Syntax** + +``` sql +randBinomial(experiments, probability) +``` + +**Arguments** + +- `experiments` - `UInt64` number of experiments, +- `probability` - `Float64` - probability of success in each experiment (values in `0...1` range only). + +**Returned value** + +- Pseudo-random number. + +Type: [UInt64](/docs/en/sql-reference/data-types/int-uint.md). + +**Example** + +Query: + +``` sql +SELECT randBinomial(100, .75) FROM numbers(5) +``` + +Result: + +``` text +┌─randBinomial(100, 0.75)─┐ +│ 74 │ +│ 78 │ +│ 76 │ +│ 77 │ +│ 80 │ +└─────────────────────────┘ +``` + + + +## randNegativeBinomial + +Return random number based on [negative binomial distribution](https://en.wikipedia.org/wiki/Negative_binomial_distribution). + +**Syntax** + +``` sql +randNegativeBinomial(experiments, probability) +``` + +**Arguments** + +- `experiments` - `UInt64` number of experiments, +- `probability` - `Float64` - probability of failure in each experiment (values in `0...1` range only). + +**Returned value** + +- Pseudo-random number. + +Type: [UInt64](/docs/en/sql-reference/data-types/int-uint.md). + +**Example** + +Query: + +``` sql +SELECT randNegativeBinomial(100, .75) FROM numbers(5) +``` + +Result: + +``` text +┌─randNegativeBinomial(100, 0.75)─┐ +│ 33 │ +│ 32 │ +│ 39 │ +│ 40 │ +│ 50 │ +└─────────────────────────────────┘ +``` + + + +## randPoisson + +Return random number based on [Poisson distribution](https://en.wikipedia.org/wiki/Poisson_distribution). + +**Syntax** + +``` sql +randPoisson(n) +``` + +**Arguments** + +- `n` - `UInt64` mean number of occurrences. + +**Returned value** + +- Pseudo-random number. + +Type: [UInt64](/docs/en/sql-reference/data-types/int-uint.md). + +**Example** + +Query: + +``` sql +SELECT randPoisson(10) FROM numbers(5) +``` + +Result: + +``` text +┌─randPoisson(10)─┐ +│ 8 │ +│ 8 │ +│ 7 │ +│ 10 │ +│ 6 │ +└─────────────────┘ +``` + + + +## randBernoulli + +Return random number based on [Bernoulli distribution](https://en.wikipedia.org/wiki/Bernoulli_distribution). + +**Syntax** + +``` sql +randBernoulli(probability) +``` + +**Arguments** + +- `probability` - `Float64` - probability of success (values in `0...1` range only). + +**Returned value** + +- Pseudo-random number. + +Type: [UInt64](/docs/en/sql-reference/data-types/int-uint.md). + +**Example** + +Query: + +``` sql +SELECT randBernoulli(.75) FROM numbers(5) +``` + +Result: + +``` text +┌─randBernoulli(0.75)─┐ +│ 1 │ +│ 1 │ +│ 0 │ +│ 1 │ +│ 1 │ +└─────────────────────┘ +``` + + + +## randExponential + +Return random number based on [exponential distribution](https://en.wikipedia.org/wiki/Exponential_distribution). + +**Syntax** + +``` sql +randExponential(lambda) +``` + +**Arguments** + +- `lambda` - `Float64` lambda value. + +**Returned value** + +- Pseudo-random number. + +Type: [Float64](/docs/en/sql-reference/data-types/float.md). + +**Example** + +Query: + +``` sql +SELECT randExponential(1/10) FROM numbers(5) +``` + +Result: + +``` text +┌─randExponential(divide(1, 10))─┐ +│ 44.71628934340778 │ +│ 4.211013337903262 │ +│ 10.809402553207766 │ +│ 15.63959406553284 │ +│ 1.8148392319860158 │ +└────────────────────────────────┘ +``` + + + +## randChiSquared + +Return random number based on [Chi-square distribution](https://en.wikipedia.org/wiki/Chi-squared_distribution) - a distribution of a sum of the squares of k independent standard normal random variables. + +**Syntax** + +``` sql +randChiSquared(degree_of_freedom) +``` + +**Arguments** + +- `degree_of_freedom` - `Float64` degree of freedom. + +**Returned value** + +- Pseudo-random number. + +Type: [Float64](/docs/en/sql-reference/data-types/float.md). + +**Example** + +Query: + +``` sql +SELECT randChiSquared(10) FROM numbers(5) +``` + +Result: + +``` text +┌─randChiSquared(10)─┐ +│ 10.015463656521543 │ +│ 9.621799919882768 │ +│ 2.71785015634699 │ +│ 11.128188665931908 │ +│ 4.902063104425469 │ +└────────────────────┘ +``` + + + +## randStudentT + +Return random number based on [Student's t-distribution](https://en.wikipedia.org/wiki/Student%27s_t-distribution). + +**Syntax** + +``` sql +randStudentT(degree_of_freedom) +``` + +**Arguments** + +- `degree_of_freedom` - `Float64` degree of freedom. + +**Returned value** + +- Pseudo-random number. + +Type: [Float64](/docs/en/sql-reference/data-types/float.md). + +**Example** + +Query: + +``` sql +SELECT randStudentT(10) FROM numbers(5) +``` + +Result: + +``` text +┌─────randStudentT(10)─┐ +│ 1.2217309938538725 │ +│ 1.7941971681200541 │ +│ -0.28192176076784664 │ +│ 0.2508897721303792 │ +│ -2.7858432909761186 │ +└──────────────────────┘ +``` + + + +## randFisherF + +Return random number based on [F-distribution](https://en.wikipedia.org/wiki/F-distribution). + +**Syntax** + +``` sql +randFisherF(d1, d2) +``` + +**Arguments** + +- `d1` - `Float64` d1 degree of freedom in `X = (S1 / d1) / (S2 / d2)`, +- `d2` - `Float64` d2 degree of freedom in `X = (S1 / d1) / (S2 / d2)`, + +**Returned value** + +- Pseudo-random number. + +Type: [Float64](/docs/en/sql-reference/data-types/float.md). + +**Example** + +Query: + +``` sql +SELECT randFisherF(10, 3) FROM numbers(5) +``` + +Result: + +``` text +┌──randFisherF(10, 3)─┐ +│ 7.286287504216609 │ +│ 0.26590779413050386 │ +│ 0.22207610901168987 │ +│ 0.7953362728449572 │ +│ 0.19278885985221572 │ +└─────────────────────┘ +``` + + + + # Random Functions for Working with Strings ## randomString diff --git a/docs/en/sql-reference/functions/type-conversion-functions.md b/docs/en/sql-reference/functions/type-conversion-functions.md index d82728b9721..bae45de07e9 100644 --- a/docs/en/sql-reference/functions/type-conversion-functions.md +++ b/docs/en/sql-reference/functions/type-conversion-functions.md @@ -14,7 +14,7 @@ ClickHouse has the [same behavior as C++ programs](https://en.cppreference.com/w ## toInt(8\|16\|32\|64\|128\|256) -Converts an input value to the [Int](../../sql-reference/data-types/int-uint.md) data type. This function family includes: +Converts an input value to the [Int](/docs/en/sql-reference/data-types/int-uint.md) data type. This function family includes: - `toInt8(expr)` — Results in the `Int8` data type. - `toInt16(expr)` — Results in the `Int16` data type. @@ -25,7 +25,7 @@ Converts an input value to the [Int](../../sql-reference/data-types/int-uint.md) **Arguments** -- `expr` — [Expression](../../sql-reference/syntax.md#syntax-expressions) returning a number or a string with the decimal representation of a number. Binary, octal, and hexadecimal representations of numbers are not supported. Leading zeroes are stripped. +- `expr` — [Expression](/docs/en/sql-reference/syntax.md/#syntax-expressions) returning a number or a string with the decimal representation of a number. Binary, octal, and hexadecimal representations of numbers are not supported. Leading zeroes are stripped. **Returned value** @@ -33,7 +33,7 @@ Integer value in the `Int8`, `Int16`, `Int32`, `Int64`, `Int128` or `Int256` dat Functions use [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning they truncate fractional digits of numbers. -The behavior of functions for the [NaN and Inf](../../sql-reference/data-types/float.md#data_type-float-nan-inf) arguments is undefined. Remember about [numeric conversions issues](#numeric-conversion-issues), when using the functions. +The behavior of functions for the [NaN and Inf](/docs/en/sql-reference/data-types/float.md/#data_type-float-nan-inf) arguments is undefined. Remember about [numeric conversions issues](#numeric-conversion-issues), when using the functions. **Example** @@ -114,7 +114,7 @@ Result: ## toUInt(8\|16\|32\|64\|256) -Converts an input value to the [UInt](../../sql-reference/data-types/int-uint.md) data type. This function family includes: +Converts an input value to the [UInt](/docs/en/sql-reference/data-types/int-uint.md) data type. This function family includes: - `toUInt8(expr)` — Results in the `UInt8` data type. - `toUInt16(expr)` — Results in the `UInt16` data type. @@ -124,7 +124,7 @@ Converts an input value to the [UInt](../../sql-reference/data-types/int-uint.md **Arguments** -- `expr` — [Expression](../../sql-reference/syntax.md#syntax-expressions) returning a number or a string with the decimal representation of a number. Binary, octal, and hexadecimal representations of numbers are not supported. Leading zeroes are stripped. +- `expr` — [Expression](/docs/en/sql-reference/syntax.md/#syntax-expressions) returning a number or a string with the decimal representation of a number. Binary, octal, and hexadecimal representations of numbers are not supported. Leading zeroes are stripped. **Returned value** @@ -132,7 +132,7 @@ Integer value in the `UInt8`, `UInt16`, `UInt32`, `UInt64` or `UInt256` data typ Functions use [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning they truncate fractional digits of numbers. -The behavior of functions for negative arguments and for the [NaN and Inf](../../sql-reference/data-types/float.md#data_type-float-nan-inf) arguments is undefined. If you pass a string with a negative number, for example `'-32'`, ClickHouse raises an exception. Remember about [numeric conversions issues](#numeric-conversion-issues), when using the functions. +The behavior of functions for negative arguments and for the [NaN and Inf](/docs/en/sql-reference/data-types/float.md/#data_type-float-nan-inf) arguments is undefined. If you pass a string with a negative number, for example `'-32'`, ClickHouse raises an exception. Remember about [numeric conversions issues](#numeric-conversion-issues), when using the functions. **Example** @@ -166,7 +166,111 @@ Result: ## toDate -Alias: `DATE`. +Converts the argument to `Date` data type. + +If the argument is `DateTime` or `DateTime64`, it truncates it, leaving the date component of the DateTime: +```sql +SELECT + now() AS x, + toDate(x) +``` +```response +┌───────────────────x─┬─toDate(now())─┐ +│ 2022-12-30 13:44:17 │ 2022-12-30 │ +└─────────────────────┴───────────────┘ +``` + +If the argument is a string, it is parsed as Date or DateTime. If it was parsed as DateTime, the date component is being used: +```sql +SELECT + toDate('2022-12-30') AS x, + toTypeName(x) +``` +```response +┌──────────x─┬─toTypeName(toDate('2022-12-30'))─┐ +│ 2022-12-30 │ Date │ +└────────────┴──────────────────────────────────┘ + +1 row in set. Elapsed: 0.001 sec. +``` +```sql +SELECT + toDate('2022-12-30 01:02:03') AS x, + toTypeName(x) +``` +```response +┌──────────x─┬─toTypeName(toDate('2022-12-30 01:02:03'))─┐ +│ 2022-12-30 │ Date │ +└────────────┴───────────────────────────────────────────┘ +``` + +If the argument is a number and it looks like a UNIX timestamp (is greater than 65535), it is interpreted as a DateTime, then truncated to Date in the current timezone. The timezone argument can be specified as a second argument of the function. The truncation to Date depends on the timezone: + +```sql +SELECT + now() AS current_time, + toUnixTimestamp(current_time) AS ts, + toDateTime(ts) AS time_Amsterdam, + toDateTime(ts, 'Pacific/Apia') AS time_Samoa, + toDate(time_Amsterdam) AS date_Amsterdam, + toDate(time_Samoa) AS date_Samoa, + toDate(ts) AS date_Amsterdam_2, + toDate(ts, 'Pacific/Apia') AS date_Samoa_2 +``` +```response +Row 1: +────── +current_time: 2022-12-30 13:51:54 +ts: 1672404714 +time_Amsterdam: 2022-12-30 13:51:54 +time_Samoa: 2022-12-31 01:51:54 +date_Amsterdam: 2022-12-30 +date_Samoa: 2022-12-31 +date_Amsterdam_2: 2022-12-30 +date_Samoa_2: 2022-12-31 +``` + +The example above demonstrates how the same UNIX timestamp can be interpreted as different dates in different time zones. + +If the argument is a number and it is smaller than 65536, it is interpreted as the number of days since 1970-01-01 (a UNIX day) and converted to Date. It corresponds to the internal numeric representation of the `Date` data type. Example: + +```sql +SELECT toDate(12345) +``` +```response +┌─toDate(12345)─┐ +│ 2003-10-20 │ +└───────────────┘ +``` + +This conversion does not depend on timezones. + +If the argument does not fit in the range of the Date type, it results in an implementation-defined behavior, that can saturate to the maximum supported date or overflow: +```sql +SELECT toDate(10000000000.) +``` +```response +┌─toDate(10000000000.)─┐ +│ 2106-02-07 │ +└──────────────────────┘ +``` + +The function `toDate` can be also written in alternative forms: + +```sql +SELECT + now() AS time, + toDate(time), + DATE(time), + CAST(time, 'Date') +``` +```response +┌────────────────time─┬─toDate(now())─┬─DATE(now())─┬─CAST(now(), 'Date')─┐ +│ 2022-12-30 13:54:58 │ 2022-12-30 │ 2022-12-30 │ 2022-12-30 │ +└─────────────────────┴───────────────┴─────────────┴─────────────────────┘ +``` + +Have a nice day working with dates and times. ## toDateOrZero @@ -184,7 +288,7 @@ Alias: `DATE`. ## toDate32 -Converts the argument to the [Date32](../../sql-reference/data-types/date32.md) data type. If the value is outside the range returns the border values supported by `Date32`. If the argument has [Date](../../sql-reference/data-types/date.md) type, borders of `Date` are taken into account. +Converts the argument to the [Date32](/docs/en/sql-reference/data-types/date32.md) data type. If the value is outside the range, `toDate32` returns the border values supported by `Date32`. If the argument has [Date](/docs/en/sql-reference/data-types/date.md) type, borders of `Date` are taken into account. **Syntax** @@ -194,13 +298,13 @@ toDate32(expr) **Arguments** -- `expr` — The value. [String](../../sql-reference/data-types/string.md), [UInt32](../../sql-reference/data-types/int-uint.md) or [Date](../../sql-reference/data-types/date.md). +- `expr` — The value. [String](/docs/en/sql-reference/data-types/string.md), [UInt32](/docs/en/sql-reference/data-types/int-uint.md) or [Date](/docs/en/sql-reference/data-types/date.md). **Returned value** - A calendar date. -Type: [Date32](../../sql-reference/data-types/date32.md). +Type: [Date32](/docs/en/sql-reference/data-types/date32.md). **Example** @@ -242,7 +346,7 @@ SELECT toDate32(toDate('1899-01-01')) AS value, toTypeName(value); ## toDate32OrZero -The same as [toDate32](#todate32) but returns the min value of [Date32](../../sql-reference/data-types/date32.md) if invalid argument is received. +The same as [toDate32](#todate32) but returns the min value of [Date32](/docs/en/sql-reference/data-types/date32.md) if an invalid argument is received. **Example** @@ -262,7 +366,7 @@ Result: ## toDate32OrNull -The same as [toDate32](#todate32) but returns `NULL` if invalid argument is received. +The same as [toDate32](#todate32) but returns `NULL` if an invalid argument is received. **Example** @@ -282,7 +386,7 @@ Result: ## toDate32OrDefault -Converts the argument to the [Date32](../../sql-reference/data-types/date32.md) data type. If the value is outside the range returns the lower border value supported by `Date32`. If the argument has [Date](../../sql-reference/data-types/date.md) type, borders of `Date` are taken into account. Returns default value if invalid argument is received. +Converts the argument to the [Date32](/docs/en/sql-reference/data-types/date32.md) data type. If the value is outside the range, `toDate32OrDefault` returns the lower border value supported by `Date32`. If the argument has [Date](/docs/en/sql-reference/data-types/date.md) type, borders of `Date` are taken into account. Returns default value if an invalid argument is received. **Example** @@ -304,7 +408,7 @@ Result: ## toDateTime64 -Converts the argument to the [DateTime64](../../sql-reference/data-types/datetime64.md) data type. +Converts the argument to the [DateTime64](/docs/en/sql-reference/data-types/datetime64.md) data type. **Syntax** @@ -314,7 +418,7 @@ toDateTime64(expr, scale, [timezone]) **Arguments** -- `expr` — The value. [String](../../sql-reference/data-types/string.md), [UInt32](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [DateTime](../../sql-reference/data-types/datetime.md). +- `expr` — The value. [String](/docs/en/sql-reference/data-types/string.md), [UInt32](/docs/en/sql-reference/data-types/int-uint.md), [Float](/docs/en/sql-reference/data-types/float.md) or [DateTime](/docs/en/sql-reference/data-types/datetime.md). - `scale` - Tick size (precision): 10-precision seconds. Valid range: [ 0 : 9 ]. - `timezone` - Time zone of the specified datetime64 object. @@ -322,7 +426,7 @@ toDateTime64(expr, scale, [timezone]) - A calendar date and time of day, with sub-second precision. -Type: [DateTime64](../../sql-reference/data-types/datetime64.md). +Type: [DateTime64](/docs/en/sql-reference/data-types/datetime64.md). **Example** @@ -378,7 +482,7 @@ SELECT toDateTime64('2019-01-01 00:00:00', 3, 'Asia/Istanbul') AS value, toTypeN ## toDecimal(32\|64\|128\|256) -Converts `value` to the [Decimal](../../sql-reference/data-types/decimal.md) data type with precision of `S`. The `value` can be a number or a string. The `S` (scale) parameter specifies the number of decimal places. +Converts `value` to the [Decimal](/docs/en/sql-reference/data-types/decimal.md) data type with precision of `S`. The `value` can be a number or a string. The `S` (scale) parameter specifies the number of decimal places. - `toDecimal32(value, S)` - `toDecimal64(value, S)` @@ -387,7 +491,7 @@ Converts `value` to the [Decimal](../../sql-reference/data-types/decimal.md) dat ## toDecimal(32\|64\|128\|256)OrNull -Converts an input string to a [Nullable(Decimal(P,S))](../../sql-reference/data-types/decimal.md) data type value. This family of functions include: +Converts an input string to a [Nullable(Decimal(P,S))](/docs/en/sql-reference/data-types/decimal.md) data type value. This family of functions includes: - `toDecimal32OrNull(expr, S)` — Results in `Nullable(Decimal32(S))` data type. - `toDecimal64OrNull(expr, S)` — Results in `Nullable(Decimal64(S))` data type. @@ -398,7 +502,7 @@ These functions should be used instead of `toDecimal*()` functions, if you prefe **Arguments** -- `expr` — [Expression](../../sql-reference/syntax.md#syntax-expressions), returns a value in the [String](../../sql-reference/data-types/string.md) data type. ClickHouse expects the textual representation of the decimal number. For example, `'1.111'`. +- `expr` — [Expression](/docs/en/sql-reference/syntax.md/#syntax-expressions), returns a value in the [String](/docs/en/sql-reference/data-types/string.md) data type. ClickHouse expects the textual representation of the decimal number. For example, `'1.111'`. - `S` — Scale, the number of decimal places in the resulting value. **Returned value** @@ -441,7 +545,7 @@ Result: ## toDecimal(32\|64\|128\|256)OrDefault -Converts an input string to a [Decimal(P,S)](../../sql-reference/data-types/decimal.md) data type value. This family of functions include: +Converts an input string to a [Decimal(P,S)](/docs/en/sql-reference/data-types/decimal.md) data type value. This family of functions includes: - `toDecimal32OrDefault(expr, S)` — Results in `Decimal32(S)` data type. - `toDecimal64OrDefault(expr, S)` — Results in `Decimal64(S)` data type. @@ -452,7 +556,7 @@ These functions should be used instead of `toDecimal*()` functions, if you prefe **Arguments** -- `expr` — [Expression](../../sql-reference/syntax.md#syntax-expressions), returns a value in the [String](../../sql-reference/data-types/string.md) data type. ClickHouse expects the textual representation of the decimal number. For example, `'1.111'`. +- `expr` — [Expression](/docs/en/sql-reference/syntax.md/#syntax-expressions), returns a value in the [String](/docs/en/sql-reference/data-types/string.md) data type. ClickHouse expects the textual representation of the decimal number. For example, `'1.111'`. - `S` — Scale, the number of decimal places in the resulting value. **Returned value** @@ -494,7 +598,7 @@ Result: ## toDecimal(32\|64\|128\|256)OrZero -Converts an input value to the [Decimal(P,S)](../../sql-reference/data-types/decimal.md) data type. This family of functions include: +Converts an input value to the [Decimal(P,S)](/docs/en/sql-reference/data-types/decimal.md) data type. This family of functions includes: - `toDecimal32OrZero( expr, S)` — Results in `Decimal32(S)` data type. - `toDecimal64OrZero( expr, S)` — Results in `Decimal64(S)` data type. @@ -505,7 +609,7 @@ These functions should be used instead of `toDecimal*()` functions, if you prefe **Arguments** -- `expr` — [Expression](../../sql-reference/syntax.md#syntax-expressions), returns a value in the [String](../../sql-reference/data-types/string.md) data type. ClickHouse expects the textual representation of the decimal number. For example, `'1.111'`. +- `expr` — [Expression](/docs/en/sql-reference/syntax.md/#syntax-expressions), returns a value in the [String](/docs/en/sql-reference/data-types/string.md) data type. ClickHouse expects the textual representation of the decimal number. For example, `'1.111'`. - `S` — Scale, the number of decimal places in the resulting value. **Returned value** @@ -564,7 +668,7 @@ YYYY-MM-DD hh:mm:ss As an exception, if converting from UInt32, Int32, UInt64, or Int64 numeric types to Date, and if the number is greater than or equal to 65536, the number is interpreted as a Unix timestamp (and not as the number of days) and is rounded to the date. This allows support for the common occurrence of writing ‘toDate(unix_timestamp)’, which otherwise would be an error and would require writing the more cumbersome ‘toDate(toDateTime(unix_timestamp))’. -Conversion between a date and date with time is performed the natural way: by adding a null time or dropping the time. +Conversion between a date and a date with time is performed the natural way: by adding a null time or dropping the time. Conversion between numeric types uses the same rules as assignments between different numeric types in C++. @@ -643,15 +747,15 @@ These functions accept a string and interpret the bytes placed at the beginning ## reinterpretAsString -This function accepts a number or date or date with time, and returns a string containing bytes representing the corresponding value in host order (little endian). Null bytes are dropped from the end. For example, a UInt32 type value of 255 is a string that is one byte long. +This function accepts a number or date or date with time and returns a string containing bytes representing the corresponding value in host order (little endian). Null bytes are dropped from the end. For example, a UInt32 type value of 255 is a string that is one byte long. ## reinterpretAsFixedString -This function accepts a number or date or date with time, and returns a FixedString containing bytes representing the corresponding value in host order (little endian). Null bytes are dropped from the end. For example, a UInt32 type value of 255 is a FixedString that is one byte long. +This function accepts a number or date or date with time and returns a FixedString containing bytes representing the corresponding value in host order (little endian). Null bytes are dropped from the end. For example, a UInt32 type value of 255 is a FixedString that is one byte long. ## reinterpretAsUUID -Accepts 16 bytes string and returns UUID containing bytes representing the corresponding value in network byte order (big-endian). If the string isn't long enough, the function works as if the string is padded with the necessary number of null bytes to the end. If the string longer than 16 bytes, the extra bytes at the end are ignored. +Accepts 16 bytes string and returns UUID containing bytes representing the corresponding value in network byte order (big-endian). If the string isn't long enough, the function works as if the string is padded with the necessary number of null bytes to the end. If the string is longer than 16 bytes, the extra bytes at the end are ignored. **Syntax** @@ -661,11 +765,11 @@ reinterpretAsUUID(fixed_string) **Arguments** -- `fixed_string` — Big-endian byte string. [FixedString](../../sql-reference/data-types/fixedstring.md#fixedstring). +- `fixed_string` — Big-endian byte string. [FixedString](/docs/en/sql-reference/data-types/fixedstring.md/#fixedstring). **Returned value** -- The UUID type value. [UUID](../../sql-reference/data-types/uuid.md#uuid-data-type). +- The UUID type value. [UUID](/docs/en/sql-reference/data-types/uuid.md/#uuid-data-type). **Examples** @@ -718,7 +822,7 @@ reinterpret(x, type) **Arguments** - `x` — Any type. -- `type` — Destination type. [String](../../sql-reference/data-types/string.md). +- `type` — Destination type. [String](/docs/en/sql-reference/data-types/string.md). **Returned value** @@ -757,7 +861,7 @@ x::t **Arguments** - `x` — A value to convert. May be of any type. -- `T` — The name of the target data type. [String](../../sql-reference/data-types/string.md). +- `T` — The name of the target data type. [String](/docs/en/sql-reference/data-types/string.md). - `t` — The target data type. **Returned value** @@ -806,9 +910,9 @@ Result: └─────────────────────┴─────────────────────┴────────────┴─────────────────────┴───────────────────────────┘ ``` -Conversion to FixedString(N) only works for arguments of type [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md). +Conversion to FixedString(N) only works for arguments of type [String](/docs/en/sql-reference/data-types/string.md) or [FixedString](/docs/en/sql-reference/data-types/fixedstring.md). -Type conversion to [Nullable](../../sql-reference/data-types/nullable.md) and back is supported. +Type conversion to [Nullable](/docs/en/sql-reference/data-types/nullable.md) and back is supported. **Example** @@ -844,7 +948,7 @@ Result: **See also** -- [cast_keep_nullable](../../operations/settings/settings.md#cast_keep_nullable) setting +- [cast_keep_nullable](/docs/en/operations/settings/settings.md/#cast_keep_nullable) setting ## accurateCast(x, T) @@ -882,7 +986,7 @@ Code: 70. DB::Exception: Received from localhost:9000. DB::Exception: Value in c ## accurateCastOrNull(x, T) -Converts input value `x` to the specified data type `T`. Always returns [Nullable](../../sql-reference/data-types/nullable.md) type and returns [NULL](../../sql-reference/syntax.md#null-literal) if the casted value is not representable in the target type. +Converts input value `x` to the specified data type `T`. Always returns [Nullable](/docs/en/sql-reference/data-types/nullable.md) type and returns [NULL](/docs/en/sql-reference/syntax.md/#null-literal) if the casted value is not representable in the target type. **Syntax** @@ -991,7 +1095,7 @@ Result: ## toInterval(Year\|Quarter\|Month\|Week\|Day\|Hour\|Minute\|Second) -Converts a Number type argument to an [Interval](../../sql-reference/data-types/special-data-types/interval.md) data type. +Converts a Number type argument to an [Interval](/docs/en/sql-reference/data-types/special-data-types/interval.md) data type. **Syntax** @@ -1039,7 +1143,7 @@ Result: ## parseDateTimeBestEffort ## parseDateTime32BestEffort -Converts a date and time in the [String](../../sql-reference/data-types/string.md) representation to [DateTime](../../sql-reference/data-types/datetime.md#data_type-datetime) data type. +Converts a date and time in the [String](/docs/en/sql-reference/data-types/string.md) representation to [DateTime](/docs/en/sql-reference/data-types/datetime.md/#data_type-datetime) data type. The function parses [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601), [RFC 1123 - 5.2.14 RFC-822 Date and Time Specification](https://tools.ietf.org/html/rfc1123#page-55), ClickHouse’s and some other date and time formats. @@ -1051,8 +1155,8 @@ parseDateTimeBestEffort(time_string [, time_zone]) **Arguments** -- `time_string` — String containing a date and time to convert. [String](../../sql-reference/data-types/string.md). -- `time_zone` — Time zone. The function parses `time_string` according to the time zone. [String](../../sql-reference/data-types/string.md). +- `time_string` — String containing a date and time to convert. [String](/docs/en/sql-reference/data-types/string.md). +- `time_zone` — Time zone. The function parses `time_string` according to the time zone. [String](/docs/en/sql-reference/data-types/string.md). **Supported non-standard formats** @@ -1175,7 +1279,7 @@ Same as [parseDateTimeBestEffortUS](#parsedatetimebesteffortUS) function except ## parseDateTime64BestEffort -Same as [parseDateTimeBestEffort](#parsedatetimebesteffort) function but also parse milliseconds and microseconds and returns [DateTime](../../sql-reference/functions/type-conversion-functions.md#data_type-datetime) data type. +Same as [parseDateTimeBestEffort](#parsedatetimebesteffort) function but also parse milliseconds and microseconds and returns [DateTime](/docs/en/sql-reference/functions/type-conversion-functions.md/#data_type-datetime) data type. **Syntax** @@ -1185,13 +1289,13 @@ parseDateTime64BestEffort(time_string [, precision [, time_zone]]) **Parameters** -- `time_string` — String containing a date or date with time to convert. [String](../../sql-reference/data-types/string.md). -- `precision` — Required precision. `3` — for milliseconds, `6` — for microseconds. Default — `3`. Optional. [UInt8](../../sql-reference/data-types/int-uint.md). -- `time_zone` — [Timezone](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone). The function parses `time_string` according to the timezone. Optional. [String](../../sql-reference/data-types/string.md). +- `time_string` — String containing a date or date with time to convert. [String](/docs/en/sql-reference/data-types/string.md). +- `precision` — Required precision. `3` — for milliseconds, `6` — for microseconds. Default — `3`. Optional. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `time_zone` — [Timezone](/docs/en/operations/server-configuration-parameters/settings.md/#server_configuration_parameters-timezone). The function parses `time_string` according to the timezone. Optional. [String](/docs/en/sql-reference/data-types/string.md). **Returned value** -- `time_string` converted to the [DateTime](../../sql-reference/data-types/datetime.md) data type. +- `time_string` converted to the [DateTime](/docs/en/sql-reference/data-types/datetime.md) data type. **Examples** @@ -1242,7 +1346,7 @@ Same as for [parseDateTime64BestEffort](#parsedatetime64besteffort), except that ## toLowCardinality -Converts input parameter to the [LowCardinality](../../sql-reference/data-types/lowcardinality.md) version of same data type. +Converts input parameter to the [LowCardinality](/docs/en/sql-reference/data-types/lowcardinality.md) version of same data type. To convert data from the `LowCardinality` data type use the [CAST](#type_conversion_function-cast) function. For example, `CAST(x as String)`. @@ -1254,7 +1358,7 @@ toLowCardinality(expr) **Arguments** -- `expr` — [Expression](../../sql-reference/syntax.md#syntax-expressions) resulting in one of the [supported data types](../../sql-reference/data-types/index.md#data_types). +- `expr` — [Expression](/docs/en/sql-reference/syntax.md/#syntax-expressions) resulting in one of the [supported data types](/docs/en/sql-reference/data-types/index.md/#data_types). **Returned values** @@ -1388,12 +1492,12 @@ formatRow(format, x, y, ...) **Arguments** -- `format` — Text format. For example, [CSV](../../interfaces/formats.md#csv), [TSV](../../interfaces/formats.md#tabseparated). +- `format` — Text format. For example, [CSV](/docs/en/interfaces/formats.md/#csv), [TSV](/docs/en/interfaces/formats.md/#tabseparated). - `x`,`y`, ... — Expressions. **Returned value** -- A formatted string (for text formats it's usually terminated with the new line character). +- A formatted string. (for text formats it's usually terminated with the new line character). **Example** @@ -1417,9 +1521,39 @@ Result: └──────────────────────────────────┘ ``` +**Note**: If format contains suffix/prefix, it will be written in each row. + +**Example** + +Query: + +``` sql +SELECT formatRow('CustomSeparated', number, 'good') +FROM numbers(3) +SETTINGS format_custom_result_before_delimiter='\n', format_custom_result_after_delimiter='' +``` + +Result: + +``` text +┌─formatRow('CustomSeparated', number, 'good')─┐ +│ +0 good + │ +│ +1 good + │ +│ +2 good + │ +└──────────────────────────────────────────────┘ +``` + +Note: Only row-based formats are supported in this function. + ## formatRowNoNewline -Converts arbitrary expressions into a string via given format. The function trims the last `\n` if any. +Converts arbitrary expressions into a string via given format. Differs from formatRow in that this function trims the last `\n` if any. **Syntax** @@ -1429,7 +1563,7 @@ formatRowNoNewline(format, x, y, ...) **Arguments** -- `format` — Text format. For example, [CSV](../../interfaces/formats.md#csv), [TSV](../../interfaces/formats.md#tabseparated). +- `format` — Text format. For example, [CSV](/docs/en/interfaces/formats.md/#csv), [TSV](/docs/en/interfaces/formats.md/#tabseparated). - `x`,`y`, ... — Expressions. **Returned value** @@ -1457,7 +1591,7 @@ Result: ## snowflakeToDateTime -Extracts time from [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID) as [DateTime](../data-types/datetime.md) format. +Extracts time from [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID) as [DateTime](/docs/en/sql-reference/data-types/datetime.md) format. **Syntax** @@ -1467,12 +1601,12 @@ snowflakeToDateTime(value [, time_zone]) **Parameters** -- `value` — Snowflake ID. [Int64](../data-types/int-uint.md). -- `time_zone` — [Timezone](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone). The function parses `time_string` according to the timezone. Optional. [String](../../sql-reference/data-types/string.md). +- `value` — Snowflake ID. [Int64](/docs/en/sql-reference/data-types/int-uint.md). +- `time_zone` — [Timezone](/docs/en/operations/server-configuration-parameters/settings.md/#server_configuration_parameters-timezone). The function parses `time_string` according to the timezone. Optional. [String](/docs/en/sql-reference/data-types/string.md). **Returned value** -- Input value converted to the [DateTime](../data-types/datetime.md) data type. +- Input value converted to the [DateTime](/docs/en/sql-reference/data-types/datetime.md) data type. **Example** @@ -1493,7 +1627,7 @@ Result: ## snowflakeToDateTime64 -Extracts time from [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID) as [DateTime64](../data-types/datetime64.md) format. +Extracts time from [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID) as [DateTime64](/docs/en/sql-reference/data-types/datetime64.md) format. **Syntax** @@ -1503,12 +1637,12 @@ snowflakeToDateTime64(value [, time_zone]) **Parameters** -- `value` — Snowflake ID. [Int64](../data-types/int-uint.md). -- `time_zone` — [Timezone](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone). The function parses `time_string` according to the timezone. Optional. [String](../../sql-reference/data-types/string.md). +- `value` — Snowflake ID. [Int64](/docs/en/sql-reference/data-types/int-uint.md). +- `time_zone` — [Timezone](/docs/en/operations/server-configuration-parameters/settings.md/#server_configuration_parameters-timezone). The function parses `time_string` according to the timezone. Optional. [String](/docs/en/sql-reference/data-types/string.md). **Returned value** -- Input value converted to the [DateTime64](../data-types/datetime64.md) data type. +- Input value converted to the [DateTime64](/docs/en/sql-reference/data-types/datetime64.md) data type. **Example** @@ -1529,7 +1663,7 @@ Result: ## dateTimeToSnowflake -Converts [DateTime](../data-types/datetime.md) value to the first [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID) at the giving time. +Converts [DateTime](/docs/en/sql-reference/data-types/datetime.md) value to the first [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID) at the giving time. **Syntax** @@ -1539,11 +1673,11 @@ dateTimeToSnowflake(value) **Parameters** -- `value` — Date and time. [DateTime](../../sql-reference/data-types/datetime.md). +- `value` — Date and time. [DateTime](/docs/en/sql-reference/data-types/datetime.md). **Returned value** -- Input value converted to the [Int64](../data-types/int-uint.md) data type as the first Snowflake ID at that time. +- Input value converted to the [Int64](/docs/en/sql-reference/data-types/int-uint.md) data type as the first Snowflake ID at that time. **Example** @@ -1563,7 +1697,7 @@ Result: ## dateTime64ToSnowflake -Convert [DateTime64](../data-types/datetime64.md) to the first [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID) at the giving time. +Convert [DateTime64](/docs/en/sql-reference/data-types/datetime64.md) to the first [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID) at the giving time. **Syntax** @@ -1573,11 +1707,11 @@ dateTime64ToSnowflake(value) **Parameters** -- `value` — Date and time. [DateTime64](../../sql-reference/data-types/datetime64.md). +- `value` — Date and time. [DateTime64](/docs/en/sql-reference/data-types/datetime64.md). **Returned value** -- Input value converted to the [Int64](../data-types/int-uint.md) data type as the first Snowflake ID at that time. +- Input value converted to the [Int64](/docs/en/sql-reference/data-types/int-uint.md) data type as the first Snowflake ID at that time. **Example** diff --git a/docs/en/sql-reference/statements/select/from.md b/docs/en/sql-reference/statements/select/from.md index 3013a173c16..b751384cb72 100644 --- a/docs/en/sql-reference/statements/select/from.md +++ b/docs/en/sql-reference/statements/select/from.md @@ -21,12 +21,11 @@ Subquery is another `SELECT` query that may be specified in parenthesis inside ` When `FINAL` is specified, ClickHouse fully merges the data before returning the result and thus performs all data transformations that happen during merges for the given table engine. -It is applicable when selecting data from tables that use the [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md)-engine family. Also supported for: +It is applicable when selecting data from ReplacingMergeTree, SummingMergeTree, AggregatingMergeTree, CollapsingMergeTree and VersionedCollapsingMergeTree tables. -- [Replicated](../../../engines/table-engines/mergetree-family/replication.md) versions of `MergeTree` engines. -- [View](../../../engines/table-engines/special/view.md), [Buffer](../../../engines/table-engines/special/buffer.md), [Distributed](../../../engines/table-engines/special/distributed.md), and [MaterializedView](../../../engines/table-engines/special/materializedview.md) engines that operate over other engines, provided they were created over `MergeTree`-engine tables. +`SELECT` queries with `FINAL` are executed in parallel. The [max_final_threads](../../../operations/settings/settings.md#max-final-threads) setting limits the number of threads used. -Now `SELECT` queries with `FINAL` are executed in parallel and slightly faster. But there are drawbacks (see below). The [max_final_threads](../../../operations/settings/settings.md#max-final-threads) setting limits the number of threads used. +There are drawbacks to using `FINAL` (see below). ### Drawbacks diff --git a/docs/ru/development/developer-instruction.md b/docs/ru/development/developer-instruction.md index 0713fe42f38..80472178ae2 100644 --- a/docs/ru/development/developer-instruction.md +++ b/docs/ru/development/developer-instruction.md @@ -169,12 +169,6 @@ sudo bash -c "$(wget -O - https://apt.llvm.org/llvm.sh)" cmake -D CMAKE_BUILD_TYPE=Debug .. -В случае использования на разработческой машине старого HDD или SSD, а также при желании использовать меньше места для артефактов сборки можно использовать следующую команду: -```bash -cmake -DUSE_DEBUG_HELPERS=1 -DUSE_STATIC_LIBRARIES=0 -DSPLIT_SHARED_LIBRARIES=1 .. -``` -При этом надо учесть, что получаемые в результате сборки исполнимые файлы будут динамически слинкованы с библиотеками, и поэтому фактически станут непереносимыми на другие компьютеры (либо для этого нужно будет предпринять значительно больше усилий по сравнению со статической сборкой). Плюсом же в данном случае является значительно меньшее время сборки (это проявляется не на первой сборке, а на последующих, после внесения изменений в исходный код - тратится меньшее время на линковку по сравнению со статической сборкой) и значительно меньшее использование места на жёстком диске (экономия более, чем в 3 раза по сравнению со статической сборкой). Для целей разработки, когда планируются только отладочные запуски на том же компьютере, где осуществлялась сборка, это может быть наиболее удобным вариантом. - Вы можете изменить вариант сборки, выполнив новую команду в директории build. Запустите ninja для сборки: diff --git a/docs/ru/sql-reference/functions/date-time-functions.md b/docs/ru/sql-reference/functions/date-time-functions.md index f430f5cae51..8fbcaf9568b 100644 --- a/docs/ru/sql-reference/functions/date-time-functions.md +++ b/docs/ru/sql-reference/functions/date-time-functions.md @@ -424,23 +424,23 @@ WITH toDateTime64('2020-01-01 10:20:30.999', 3) AS dt64 SELECT toStartOfSecond(d ## toRelativeYearNum {#torelativeyearnum} -Переводит дату-с-временем или дату в номер года, начиная с некоторого фиксированного момента в прошлом. +Переводит дату или дату-с-временем в номер года, начиная с некоторого фиксированного момента в прошлом. ## toRelativeQuarterNum {#torelativequarternum} -Переводит дату-с-временем или дату в номер квартала, начиная с некоторого фиксированного момента в прошлом. +Переводит дату или дату-с-временем в номер квартала, начиная с некоторого фиксированного момента в прошлом. ## toRelativeMonthNum {#torelativemonthnum} -Переводит дату-с-временем или дату в номер месяца, начиная с некоторого фиксированного момента в прошлом. +Переводит дату или дату-с-временем в номер месяца, начиная с некоторого фиксированного момента в прошлом. ## toRelativeWeekNum {#torelativeweeknum} -Переводит дату-с-временем или дату в номер недели, начиная с некоторого фиксированного момента в прошлом. +Переводит дату или дату-с-временем в номер недели, начиная с некоторого фиксированного момента в прошлом. ## toRelativeDayNum {#torelativedaynum} -Переводит дату-с-временем или дату в номер дня, начиная с некоторого фиксированного момента в прошлом. +Переводит дату или дату-с-временем в номер дня, начиная с некоторого фиксированного момента в прошлом. ## toRelativeHourNum {#torelativehournum} @@ -456,7 +456,7 @@ WITH toDateTime64('2020-01-01 10:20:30.999', 3) AS dt64 SELECT toStartOfSecond(d ## toISOYear {#toisoyear} -Переводит дату-с-временем или дату в число типа UInt16, содержащее номер ISO года. ISO год отличается от обычного года, потому что в соответствии с [ISO 8601:1988](https://en.wikipedia.org/wiki/ISO_8601) ISO год начинается необязательно первого января. +Переводит дату или дату-с-временем в число типа UInt16, содержащее номер ISO года. ISO год отличается от обычного года, потому что в соответствии с [ISO 8601:1988](https://en.wikipedia.org/wiki/ISO_8601) ISO год начинается необязательно первого января. **Пример** @@ -479,7 +479,7 @@ SELECT ## toISOWeek {#toisoweek} -Переводит дату-с-временем или дату в число типа UInt8, содержащее номер ISO недели. +Переводит дату или дату-с-временем в число типа UInt8, содержащее номер ISO недели. Начало ISO года отличается от начала обычного года, потому что в соответствии с [ISO 8601:1988](https://en.wikipedia.org/wiki/ISO_8601) первая неделя года - это неделя с четырьмя или более днями в этом году. 1 Января 2017 г. - воскресение, т.е. первая ISO неделя 2017 года началась в понедельник 2 января, поэтому 1 января 2017 это последняя неделя 2016 года. @@ -503,7 +503,7 @@ SELECT ``` ## toWeek(date\[, mode\]\[, timezone\]) {#toweek} -Переводит дату-с-временем или дату в число UInt8, содержащее номер недели. Второй аргументам mode задает режим, начинается ли неделя с воскресенья или с понедельника и должно ли возвращаемое значение находиться в диапазоне от 0 до 53 или от 1 до 53. Если аргумент mode опущен, то используется режим 0. +Переводит дату или дату-с-временем в число UInt8, содержащее номер недели. Второй аргументам mode задает режим, начинается ли неделя с воскресенья или с понедельника и должно ли возвращаемое значение находиться в диапазоне от 0 до 53 или от 1 до 53. Если аргумент mode опущен, то используется режим 0. `toISOWeek() ` эквивалентно `toWeek(date,3)`. @@ -569,6 +569,132 @@ SELECT toDate('2016-12-27') AS date, toYearWeek(date) AS yearWeek0, toYearWeek(d └────────────┴───────────┴───────────┴───────────┘ ``` +## age + +Вычисляет компонент `unit` разницы между `startdate` и `enddate`. Разница вычисляется с точностью в 1 секунду. +Например, разница между `2021-12-29` и `2022-01-01` 3 дня для единицы `day`, 0 месяцев для единицы `month`, 0 лет для единицы `year`. + +**Синтаксис** + +``` sql +age('unit', startdate, enddate, [timezone]) +``` + +**Аргументы** + +- `unit` — единица измерения времени, в которой будет выражено возвращаемое значение функции. [String](../../sql-reference/data-types/string.md). + Возможные значения: + + - `second` (возможные сокращения: `ss`, `s`) + - `minute` (возможные сокращения: `mi`, `n`) + - `hour` (возможные сокращения: `hh`, `h`) + - `day` (возможные сокращения: `dd`, `d`) + - `week` (возможные сокращения: `wk`, `ww`) + - `month` (возможные сокращения: `mm`, `m`) + - `quarter` (возможные сокращения: `qq`, `q`) + - `year` (возможные сокращения: `yyyy`, `yy`) + +- `startdate` — первая дата или дата со временем, которая вычитается из `enddate`. [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) или [DateTime64](../../sql-reference/data-types/datetime64.md). + +- `enddate` — вторая дата или дата со временем, из которой вычитается `startdate`. [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) или [DateTime64](../../sql-reference/data-types/datetime64.md). + +- `timezone` — [часовой пояс](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) (необязательно). Если этот аргумент указан, то он применяется как для `startdate`, так и для `enddate`. Если этот аргумент не указан, то используются часовые пояса аргументов `startdate` и `enddate`. Если часовые пояса аргументов `startdate` и `enddate` не совпадают, то результат не определен. [String](../../sql-reference/data-types/string.md). + +**Возвращаемое значение** + +Разница между `enddate` и `startdate`, выраженная в `unit`. + +Тип: [Int](../../sql-reference/data-types/int-uint.md). + +**Пример** + +Запрос: + +``` sql +SELECT age('hour', toDateTime('2018-01-01 22:30:00'), toDateTime('2018-01-02 23:00:00')); +``` + +Результат: + +``` text +┌─age('hour', toDateTime('2018-01-01 22:30:00'), toDateTime('2018-01-02 23:00:00'))─┐ +│ 24 │ +└───────────────────────────────────────────────────────────────────────────────────┘ +``` + +Запрос: + +``` sql +SELECT + toDate('2022-01-01') AS e, + toDate('2021-12-29') AS s, + age('day', s, e) AS day_age, + age('month', s, e) AS month__age, + age('year', s, e) AS year_age; +``` + +Результат: + +``` text +┌──────────e─┬──────────s─┬─day_age─┬─month__age─┬─year_age─┐ +│ 2022-01-01 │ 2021-12-29 │ 3 │ 0 │ 0 │ +└────────────┴────────────┴─────────┴────────────┴──────────┘ +``` + +## date\_diff {#date_diff} + +Вычисляет разницу указанных границ `unit` пересекаемых между `startdate` и `enddate`. + +**Синтаксис** + +``` sql +date_diff('unit', startdate, enddate, [timezone]) +``` + +Синонимы: `dateDiff`, `DATE_DIFF`. + +**Аргументы** + +- `unit` — единица измерения времени, в которой будет выражено возвращаемое значение функции. [String](../../sql-reference/data-types/string.md). + Возможные значения: + + - `second` (возможные сокращения: `ss`, `s`) + - `minute` (возможные сокращения: `mi`, `n`) + - `hour` (возможные сокращения: `hh`, `h`) + - `day` (возможные сокращения: `dd`, `d`) + - `week` (возможные сокращения: `wk`, `ww`) + - `month` (возможные сокращения: `mm`, `m`) + - `quarter` (возможные сокращения: `qq`, `q`) + - `year` (возможные сокращения: `yyyy`, `yy`) + +- `startdate` — первая дата или дата со временем, которая вычитается из `enddate`. [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) или [DateTime64](../../sql-reference/data-types/datetime64.md). + +- `enddate` — вторая дата или дата со временем, из которой вычитается `startdate`. [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) или [DateTime64](../../sql-reference/data-types/datetime64.md). + +- `timezone` — [часовой пояс](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) (необязательно). Если этот аргумент указан, то он применяется как для `startdate`, так и для `enddate`. Если этот аргумент не указан, то используются часовые пояса аргументов `startdate` и `enddate`. Если часовые пояса аргументов `startdate` и `enddate` не совпадают, то результат не определен. [String](../../sql-reference/data-types/string.md). + +**Возвращаемое значение** + +Разница между `enddate` и `startdate`, выраженная в `unit`. + +Тип: [Int](../../sql-reference/data-types/int-uint.md). + +**Пример** + +Запрос: + +``` sql +SELECT dateDiff('hour', toDateTime('2018-01-01 22:00:00'), toDateTime('2018-01-02 23:00:00')); +``` + +Результат: + +``` text +┌─dateDiff('hour', toDateTime('2018-01-01 22:00:00'), toDateTime('2018-01-02 23:00:00'))─┐ +│ 25 │ +└────────────────────────────────────────────────────────────────────────────────────────┘ +``` + ## date_trunc {#date_trunc} Отсекает от даты и времени части, меньшие чем указанная часть. @@ -689,60 +815,6 @@ SELECT date_add(YEAR, 3, toDate('2018-01-01')); └───────────────────────────────────────────────┘ ``` -## date\_diff {#date_diff} - -Вычисляет разницу между двумя значениями дат или дат со временем. - -**Синтаксис** - -``` sql -date_diff('unit', startdate, enddate, [timezone]) -``` - -Синонимы: `dateDiff`, `DATE_DIFF`. - -**Аргументы** - -- `unit` — единица измерения времени, в которой будет выражено возвращаемое значение функции. [String](../../sql-reference/data-types/string.md). - Возможные значения: - - - `second` - - `minute` - - `hour` - - `day` - - `week` - - `month` - - `quarter` - - `year` - -- `startdate` — первая дата или дата со временем, которая вычитается из `enddate`. [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) или [DateTime64](../../sql-reference/data-types/datetime64.md). - -- `enddate` — вторая дата или дата со временем, из которой вычитается `startdate`. [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) или [DateTime64](../../sql-reference/data-types/datetime64.md). - -- `timezone` — [часовой пояс](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) (необязательно). Если этот аргумент указан, то он применяется как для `startdate`, так и для `enddate`. Если этот аргумент не указан, то используются часовые пояса аргументов `startdate` и `enddate`. Если часовые пояса аргументов `startdate` и `enddate` не совпадают, то результат не определен. [String](../../sql-reference/data-types/string.md). - -**Возвращаемое значение** - -Разница между `enddate` и `startdate`, выраженная в `unit`. - -Тип: [Int](../../sql-reference/data-types/int-uint.md). - -**Пример** - -Запрос: - -``` sql -SELECT dateDiff('hour', toDateTime('2018-01-01 22:00:00'), toDateTime('2018-01-02 23:00:00')); -``` - -Результат: - -``` text -┌─dateDiff('hour', toDateTime('2018-01-01 22:00:00'), toDateTime('2018-01-02 23:00:00'))─┐ -│ 25 │ -└────────────────────────────────────────────────────────────────────────────────────────┘ -``` - ## date\_sub {#date_sub} Вычитает интервал времени или даты из указанной даты или даты со временем. diff --git a/docs/ru/sql-reference/functions/type-conversion-functions.md b/docs/ru/sql-reference/functions/type-conversion-functions.md index 0f6f61173b6..c2beb55fee1 100644 --- a/docs/ru/sql-reference/functions/type-conversion-functions.md +++ b/docs/ru/sql-reference/functions/type-conversion-functions.md @@ -1316,7 +1316,7 @@ formatRow(format, x, y, ...) **Возвращаемое значение** -- Отформатированная строка (в текстовых форматах обычно с завершающим переводом строки). +- Отформатированная строка. (в текстовых форматах обычно с завершающим переводом строки). **Пример** @@ -1340,9 +1340,39 @@ FROM numbers(3); └──────────────────────────────────┘ ``` +**Примечание**: если формат содержит префикс/суффикс, то он будет записан в каждой строке. + +**Пример** + +Запрос: + +``` sql +SELECT formatRow('CustomSeparated', number, 'good') +FROM numbers(3) +SETTINGS format_custom_result_before_delimiter='\n', format_custom_result_after_delimiter='' +``` + +Результат: + +``` text +┌─formatRow('CustomSeparated', number, 'good')─┐ +│ +0 good + │ +│ +1 good + │ +│ +2 good + │ +└──────────────────────────────────────────────┘ +``` + +**Примечание**: данная функция поддерживает только строковые форматы вывода. + ## formatRowNoNewline {#formatrownonewline} -Преобразует произвольные выражения в строку заданного формата. При этом удаляет лишние переводы строк `\n`, если они появились. +Преобразует произвольные выражения в строку заданного формата. Отличается от функции formatRow тем, что удаляет лишний перевод строки `\n` а конце, если он есть. **Синтаксис** diff --git a/docs/zh/development/continuous-integration.md b/docs/zh/development/continuous-integration.md index 67b1eb228f1..e0c8b41147a 100644 --- a/docs/zh/development/continuous-integration.md +++ b/docs/zh/development/continuous-integration.md @@ -85,7 +85,6 @@ git push - **Build type**: `Debug` or `RelWithDebInfo` (cmake). - **Sanitizer**: `none` (without sanitizers), `address` (ASan), `memory` (MSan), `undefined` (UBSan), or `thread` (TSan). - **Bundled**: `bundled` 构建使用来自 `contrib` 库, 而 `unbundled` 构建使用系统库. -- **Splitted**: `splitted` is a [split build](https://clickhouse.com/docs/en/development/build/#split-build) - **Status**: `成功` 或 `失败` - **Build log**: 链接到构建和文件复制日志, 当构建失败时很有用. - **Build time**. @@ -97,7 +96,6 @@ git push - `clickhouse`: Main built binary. - `clickhouse-odbc-bridge` - `unit_tests_dbms`: 带有 ClickHouse 单元测试的 GoogleTest 二进制文件. - - `shared_build.tgz`: 使用共享库构建. - `performance.tgz`: 用于性能测试的特殊包. ## 特殊构建检查 {#special-buildcheck} @@ -123,14 +121,6 @@ git push of error. ``` -## 冒烟测试 {#split-build-smoke-test} -检查[拆分构建](./build.md#split-build)配置中的服务器构建是否可以启动并运行简单查询.如果失败: -``` -* Fix other test errors first; -* Build the server in [split build](./build.md#split-build) configuration - locally and check whether it can start and run `select 1`. -``` - ## 兼容性检查 {#compatibility-check} 检查`clickhouse`二进制文件是否可以在带有旧libc版本的发行版上运行.如果失败, 请向维护人员寻求帮助. diff --git a/docs/zh/engines/table-engines/log-family/index.md b/docs/zh/engines/table-engines/log-family/index.md index 56776522445..1b24984f75f 100644 --- a/docs/zh/engines/table-engines/log-family/index.md +++ b/docs/zh/engines/table-engines/log-family/index.md @@ -11,7 +11,7 @@ sidebar_position: 29 这系列的引擎有: - [StripeLog](stripelog.md) -- [日志](log.md) +- [Log](log.md) - [TinyLog](tinylog.md) ## 共同属性 {#table_engines-log-engine-family-common-properties} diff --git a/programs/CMakeLists.txt b/programs/CMakeLists.txt index 5b6c5b26633..9e4f66562ca 100644 --- a/programs/CMakeLists.txt +++ b/programs/CMakeLists.txt @@ -13,12 +13,6 @@ option (ENABLE_CLICKHOUSE_SERVER "Server mode (main mode)" ${ENABLE_CLICKHOUSE_A option (ENABLE_CLICKHOUSE_CLIENT "Client mode (interactive tui/shell that connects to the server)" ${ENABLE_CLICKHOUSE_ALL}) -# Don't create self-extracting clickhouse for split build -if (ENABLE_CLICKHOUSE_SELF_EXTRACTING AND SPLIT_SHARED_LIBRARIES) - message (STATUS "Self-extracting on split build is not supported") - unset (ENABLE_CLICKHOUSE_SELF_EXTRACTING CACHE) -endif () - # https://clickhouse.com/docs/en/operations/utilities/clickhouse-local/ option (ENABLE_CLICKHOUSE_LOCAL "Local files fast processing mode" ${ENABLE_CLICKHOUSE_ALL}) @@ -173,10 +167,6 @@ else() message(STATUS "ClickHouse keeper-converter mode: OFF") endif() -if(NOT (USE_STATIC_LIBRARIES OR SPLIT_SHARED_LIBRARIES)) - set(CLICKHOUSE_ONE_SHARED ON) -endif() - if (ENABLE_CLICKHOUSE_DISKS) message(STATUS "Clickhouse disks mode: ON") else() @@ -192,11 +182,7 @@ endif() configure_file (config_tools.h.in ${CONFIG_INCLUDE_PATH}/config_tools.h) macro(clickhouse_target_link_split_lib target name) - if(NOT CLICKHOUSE_ONE_SHARED) - target_link_libraries(${target} PRIVATE clickhouse-${name}-lib) - else() - target_link_libraries(${target} PRIVATE clickhouse-lib) - endif() + target_link_libraries(${target} PRIVATE clickhouse-${name}-lib) endmacro() macro(clickhouse_program_add_library name) @@ -208,18 +194,16 @@ macro(clickhouse_program_add_library name) set(CLICKHOUSE_${name_uc}_LINK ${CLICKHOUSE_${name_uc}_LINK} PARENT_SCOPE) set(CLICKHOUSE_${name_uc}_INCLUDE ${CLICKHOUSE_${name_uc}_INCLUDE} PARENT_SCOPE) - if(NOT CLICKHOUSE_ONE_SHARED) - add_library(clickhouse-${name}-lib ${CLICKHOUSE_${name_uc}_SOURCES}) + add_library(clickhouse-${name}-lib ${CLICKHOUSE_${name_uc}_SOURCES}) - set(_link ${CLICKHOUSE_${name_uc}_LINK}) # can't use ${} in if() - if(_link) - target_link_libraries(clickhouse-${name}-lib ${CLICKHOUSE_${name_uc}_LINK}) - endif() + set(_link ${CLICKHOUSE_${name_uc}_LINK}) # can't use ${} in if() + if(_link) + target_link_libraries(clickhouse-${name}-lib ${CLICKHOUSE_${name_uc}_LINK}) + endif() - set(_include ${CLICKHOUSE_${name_uc}_INCLUDE}) # can't use ${} in if() - if (_include) - target_include_directories(clickhouse-${name}-lib ${CLICKHOUSE_${name_uc}_INCLUDE}) - endif() + set(_include ${CLICKHOUSE_${name_uc}_INCLUDE}) # can't use ${} in if() + if (_include) + target_include_directories(clickhouse-${name}-lib ${CLICKHOUSE_${name_uc}_INCLUDE}) endif() endmacro() @@ -263,68 +247,8 @@ if (ENABLE_CLICKHOUSE_SELF_EXTRACTING) add_subdirectory (self-extracting) endif () -if (CLICKHOUSE_ONE_SHARED) - add_library(clickhouse-lib SHARED - ${CLICKHOUSE_SERVER_SOURCES} - ${CLICKHOUSE_CLIENT_SOURCES} - ${CLICKHOUSE_LOCAL_SOURCES} - ${CLICKHOUSE_BENCHMARK_SOURCES} - ${CLICKHOUSE_COPIER_SOURCES} - ${CLICKHOUSE_EXTRACT_FROM_CONFIG_SOURCES} - ${CLICKHOUSE_COMPRESSOR_SOURCES} - ${CLICKHOUSE_FORMAT_SOURCES} - ${CLICKHOUSE_OBFUSCATOR_SOURCES} - ${CLICKHOUSE_GIT_IMPORT_SOURCES} - ${CLICKHOUSE_ODBC_BRIDGE_SOURCES} - ${CLICKHOUSE_KEEPER_SOURCES} - ${CLICKHOUSE_KEEPER_CONVERTER_SOURCES} - ${CLICKHOUSE_STATIC_FILES_DISK_UPLOADER_SOURCES} - ${CLICKHOUSE_SU_SOURCES}) - - target_link_libraries(clickhouse-lib - ${CLICKHOUSE_SERVER_LINK} - ${CLICKHOUSE_CLIENT_LINK} - ${CLICKHOUSE_LOCAL_LINK} - ${CLICKHOUSE_BENCHMARK_LINK} - ${CLICKHOUSE_COPIER_LINK} - ${CLICKHOUSE_EXTRACT_FROM_CONFIG_LINK} - ${CLICKHOUSE_COMPRESSOR_LINK} - ${CLICKHOUSE_FORMAT_LINK} - ${CLICKHOUSE_OBFUSCATOR_LINK} - ${CLICKHOUSE_GIT_IMPORT_LINK} - ${CLICKHOUSE_ODBC_BRIDGE_LINK} - ${CLICKHOUSE_KEEPER_LINK} - ${CLICKHOUSE_KEEPER_CONVERTER_LINK} - ${CLICKHOUSE_STATIC_FILES_DISK_UPLOADER_LINK} - ${CLICKHOUSE_SU_LINK}) - - target_include_directories(clickhouse-lib - ${CLICKHOUSE_SERVER_INCLUDE} - ${CLICKHOUSE_CLIENT_INCLUDE} - ${CLICKHOUSE_LOCAL_INCLUDE} - ${CLICKHOUSE_BENCHMARK_INCLUDE} - ${CLICKHOUSE_COPIER_INCLUDE} - ${CLICKHOUSE_EXTRACT_FROM_CONFIG_INCLUDE} - ${CLICKHOUSE_COMPRESSOR_INCLUDE} - ${CLICKHOUSE_FORMAT_INCLUDE} - ${CLICKHOUSE_OBFUSCATOR_INCLUDE} - ${CLICKHOUSE_GIT_IMPORT_INCLUDE} - ${CLICKHOUSE_ODBC_BRIDGE_INCLUDE} - ${CLICKHOUSE_KEEPER_INCLUDE} - ${CLICKHOUSE_KEEPER_CONVERTER_INCLUDE}) - - set_target_properties(clickhouse-lib PROPERTIES SOVERSION ${VERSION_MAJOR}.${VERSION_MINOR} VERSION ${VERSION_SO} OUTPUT_NAME clickhouse DEBUG_POSTFIX "") - install (TARGETS clickhouse-lib LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT clickhouse) -endif() - clickhouse_add_executable (clickhouse main.cpp) -if (NOT USE_STATIC_LIBRARIES AND SPLIT_SHARED_LIBRARIES) - # Shared split (dev) build: In CI, the server is run with custom LD_LIBRARY_PATH. This makes the harmful env check re-execute the - # process in a clean environment but as in CI the containing directory is not included in DT_RUNPATH/DT_RPATH, the server won't come up. - target_compile_definitions(clickhouse PRIVATE DISABLE_HARMFUL_ENV_VAR_CHECK) -endif () - # A library that prevent usage of several functions from libc. if (ARCH_AMD64 AND OS_LINUX AND NOT OS_ANDROID) set (HARMFUL_LIB harmful) diff --git a/programs/benchmark/Benchmark.cpp b/programs/benchmark/Benchmark.cpp index 017b28fe082..26099b352a3 100644 --- a/programs/benchmark/Benchmark.cpp +++ b/programs/benchmark/Benchmark.cpp @@ -683,7 +683,7 @@ int mainEntryClickHouseBenchmark(int argc, char ** argv) ("confidence", value()->default_value(5), "set the level of confidence for T-test [0=80%, 1=90%, 2=95%, 3=98%, 4=99%, 5=99.5%(default)") ("query_id", value()->default_value(""), "") ("max-consecutive-errors", value()->default_value(0), "set number of allowed consecutive errors") - ("continue_on_errors", "continue testing even if a query fails") + ("ignore-error,continue_on_errors", "continue testing even if a query fails") ("reconnect", "establish new connection for every query") ("client-side-time", "display the time including network communication instead of server-side time; note that for server versions before 22.8 we always display client-side time") ; @@ -738,7 +738,7 @@ int mainEntryClickHouseBenchmark(int argc, char ** argv) options["query_id"].as(), options["query"].as(), options["max-consecutive-errors"].as(), - options.count("continue_on_errors"), + options.count("ignore-error"), options.count("reconnect"), options.count("client-side-time"), print_stacktrace, diff --git a/programs/benchmark/CMakeLists.txt b/programs/benchmark/CMakeLists.txt index 3fa8deb6bd9..ad211399bb5 100644 --- a/programs/benchmark/CMakeLists.txt +++ b/programs/benchmark/CMakeLists.txt @@ -10,6 +10,4 @@ set (CLICKHOUSE_BENCHMARK_LINK clickhouse_program_add(benchmark) -if(NOT CLICKHOUSE_ONE_SHARED) - target_link_libraries (clickhouse-benchmark-lib PRIVATE clickhouse-client-lib) -endif() +target_link_libraries (clickhouse-benchmark-lib PRIVATE clickhouse-client-lib) diff --git a/programs/clickhouse-split-helper b/programs/clickhouse-split-helper deleted file mode 100755 index 14a86f76097..00000000000 --- a/programs/clickhouse-split-helper +++ /dev/null @@ -1,12 +0,0 @@ -#!/bin/sh - -# Helper for split build mode. -# Allows to run commands like -# clickhouse client -# clickhouse server -# ... - -set -e -CMD=$1 -shift -clickhouse-$CMD $* diff --git a/programs/client/CMakeLists.txt b/programs/client/CMakeLists.txt index d212da59908..e160355ef7b 100644 --- a/programs/client/CMakeLists.txt +++ b/programs/client/CMakeLists.txt @@ -13,6 +13,10 @@ set (CLICKHOUSE_CLIENT_LINK string_utils ) +if (TARGET ch_rust::skim) + list(APPEND CLICKHOUSE_CLIENT_LINK PRIVATE ch_rust::skim) +endif() + # Always use internal readpassphrase list(APPEND CLICKHOUSE_CLIENT_LINK PRIVATE readpassphrase) diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp index 9923b8b365a..af1a019e1f8 100644 --- a/programs/client/Client.cpp +++ b/programs/client/Client.cpp @@ -30,9 +30,10 @@ #include #include -#include -#include #include +#include +#include +#include #include #include @@ -41,6 +42,8 @@ #include #include +#include + #include #include @@ -827,6 +830,20 @@ bool Client::processWithFuzzing(const String & full_query) WriteBufferFromOStream ast_buf(std::cout, 4096); formatAST(*query, ast_buf, false /*highlight*/); ast_buf.next(); + if (const auto * insert = query->as()) + { + /// For inserts with data it's really useful to have the data itself available in the logs, as formatAST doesn't print it + if (insert->hasInlinedData()) + { + String bytes; + { + auto read_buf = getReadBufferFromASTInsertQuery(query); + WriteBufferFromString write_buf(bytes); + copyData(*read_buf, write_buf); + } + std::cout << std::endl << bytes; + } + } std::cout << std::endl << std::endl; try diff --git a/programs/install/Install.cpp b/programs/install/Install.cpp index de85572d5c6..d568012bb26 100644 --- a/programs/install/Install.cpp +++ b/programs/install/Install.cpp @@ -1051,18 +1051,12 @@ namespace return pid; } - int stop(const fs::path & pid_file, bool force, bool do_not_kill, unsigned max_tries) + bool sendSignalAndWaitForStop(const fs::path & pid_file, int signal, unsigned max_tries, unsigned wait_ms, const char * signal_name) { - if (force && do_not_kill) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Specified flags are incompatible"); - int pid = isRunning(pid_file); if (!pid) - return 0; - - int signal = force ? SIGKILL : SIGTERM; - const char * signal_name = force ? "kill" : "terminate"; + return true; if (0 == kill(pid, signal)) fmt::print("Sent {} signal to process with pid {}.\n", signal_name, pid); @@ -1078,46 +1072,51 @@ namespace fmt::print("Server stopped\n"); break; } - sleepForSeconds(1); + sleepForMilliseconds(wait_ms); } - if (try_num == max_tries) + return try_num < max_tries; + } + + int stop(const fs::path & pid_file, bool force, bool do_not_kill, unsigned max_tries) + { + if (force && do_not_kill) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Specified flags are incompatible"); + + int signal = force ? SIGKILL : SIGTERM; + const char * signal_name = force ? "kill" : "terminate"; + + if (sendSignalAndWaitForStop(pid_file, signal, max_tries, 1000, signal_name)) + return 0; + + int pid = isRunning(pid_file); + if (!pid) + return 0; + + if (do_not_kill) { - if (do_not_kill) - { - fmt::print("Process (pid = {}) is still running. Will not try to kill it.\n", pid); - return 1; - } - - fmt::print("Will terminate forcefully (pid = {}).\n", pid); - if (0 == kill(pid, 9)) - fmt::print("Sent kill signal (pid = {}).\n", pid); - else - throwFromErrno("Cannot send kill signal", ErrorCodes::SYSTEM_ERROR); - - /// Wait for the process (100 seconds). - constexpr size_t num_kill_check_tries = 1000; - constexpr size_t kill_check_delay_ms = 100; - for (size_t i = 0; i < num_kill_check_tries; ++i) - { - fmt::print("Waiting for server to be killed\n"); - if (!isRunning(pid_file)) - { - fmt::print("Server exited\n"); - break; - } - sleepForMilliseconds(kill_check_delay_ms); - } - - if (isRunning(pid_file)) - { - throw Exception(ErrorCodes::CANNOT_KILL, - "The server process still exists after {} tries (delay: {} ms)", - num_kill_check_tries, kill_check_delay_ms); - } + fmt::print("Process (pid = {}) is still running. Will not try to kill it.\n", pid); + return 1; } - return 0; + /// Send termination signal again, the server will receive it and immediately terminate. + fmt::print("Will send the termination signal again to force the termination (pid = {}).\n", pid); + if (sendSignalAndWaitForStop(pid_file, signal, std::min(10U, max_tries), 1000, signal_name)) + return 0; + + /// Send kill signal. Total wait is 100 seconds. + constexpr size_t num_kill_check_tries = 1000; + constexpr size_t kill_check_delay_ms = 100; + fmt::print("Will terminate forcefully (pid = {}).\n", pid); + if (sendSignalAndWaitForStop(pid_file, SIGKILL, num_kill_check_tries, kill_check_delay_ms, signal_name)) + return 0; + + if (!isRunning(pid_file)) + return 0; + + throw Exception(ErrorCodes::CANNOT_KILL, + "The server process still exists after {} tries (delay: {} ms)", + num_kill_check_tries, kill_check_delay_ms); } } diff --git a/programs/local/CMakeLists.txt b/programs/local/CMakeLists.txt index ad4406156c4..6943af48ab9 100644 --- a/programs/local/CMakeLists.txt +++ b/programs/local/CMakeLists.txt @@ -14,8 +14,10 @@ set (CLICKHOUSE_LOCAL_LINK clickhouse_program_add(local) -if(NOT CLICKHOUSE_ONE_SHARED) - target_link_libraries(clickhouse-local-lib PRIVATE clickhouse-server-lib) +target_link_libraries(clickhouse-local-lib PRIVATE clickhouse-server-lib) + +if (TARGET ch_rust::skim) + target_link_libraries(clickhouse-local-lib PRIVATE ch_rust::skim) endif() # Always use internal readpassphrase diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index 1614fb1a8b4..8ce87558630 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -207,7 +207,7 @@ void LocalServer::tryInitPath() global_context->setPath(path); - global_context->setTemporaryStorage(path + "tmp", "", 0); + global_context->setTemporaryStoragePath(path + "tmp/", 0); global_context->setFlagsPath(path + "flags"); global_context->setUserFilesPath(""); // user's files are everywhere diff --git a/programs/main.cpp b/programs/main.cpp index f40bafc7027..389eae92091 100644 --- a/programs/main.cpp +++ b/programs/main.cpp @@ -345,7 +345,7 @@ struct Checker ; -#if !defined(DISABLE_HARMFUL_ENV_VAR_CHECK) && !defined(USE_MUSL) +#if !defined(USE_MUSL) /// NOTE: We will migrate to full static linking or our own dynamic loader to make this code obsolete. void checkHarmfulEnvironmentVariables(char ** argv) { @@ -457,7 +457,7 @@ int main(int argc_, char ** argv_) /// Note: we forbid dlopen in our code. updatePHDRCache(); -#if !defined(DISABLE_HARMFUL_ENV_VAR_CHECK) && !defined(USE_MUSL) +#if !defined(USE_MUSL) checkHarmfulEnvironmentVariables(argv_); #endif diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index f98b33c7f87..a4ddd8553c6 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -205,46 +205,6 @@ int mainEntryClickHouseServer(int argc, char ** argv) namespace { -void setupTmpPath(Poco::Logger * log, const std::string & path) -try -{ - LOG_DEBUG(log, "Setting up {} to store temporary data in it", path); - - fs::create_directories(path); - - /// Clearing old temporary files. - fs::directory_iterator dir_end; - size_t unknown_files = 0; - for (fs::directory_iterator it(path); it != dir_end; ++it) - { - if (it->is_regular_file() && startsWith(it->path().filename(), "tmp")) - { - LOG_DEBUG(log, "Removing old temporary file {}", it->path().string()); - fs::remove(it->path()); - } - else - { - unknown_files++; - if (unknown_files < 100) - LOG_DEBUG(log, "Found unknown {} {} in temporary path", - it->is_regular_file() ? "file" : (it->is_directory() ? "directory" : "element"), - it->path().string()); - } - } - - if (unknown_files) - LOG_DEBUG(log, "Found {} unknown files in temporary path", unknown_files); -} -catch (...) -{ - DB::tryLogCurrentException( - log, - fmt::format( - "Caught exception while setup temporary path: {}. It is ok to skip this exception as cleaning old temporary files is not " - "necessary", - path)); -} - size_t waitServersToFinish(std::vector & servers, size_t seconds_to_wait) { const size_t sleep_max_ms = 1000 * seconds_to_wait; @@ -460,6 +420,33 @@ void Server::createServer( } } + +#if defined(OS_LINUX) +namespace +{ + +void setOOMScore(int value, Poco::Logger * log) +{ + try + { + std::string value_string = std::to_string(value); + DB::WriteBufferFromFile buf("/proc/self/oom_score_adj"); + buf.write(value_string.c_str(), value_string.size()); + buf.next(); + buf.close(); + } + catch (const Poco::Exception & e) + { + LOG_WARNING(log, "Failed to adjust OOM score: '{}'.", e.displayText()); + return; + } + LOG_INFO(log, "Set OOM score adjustment to {}", value); +} + +} +#endif + + void Server::uninitialize() { logger().information("shutting down"); @@ -743,6 +730,13 @@ try global_context->addWarningMessage("Server was built with sanitizer. It will work slowly."); #endif + const auto memory_amount = getMemoryAmount(); + + LOG_INFO(log, "Available RAM: {}; physical cores: {}; logical cores: {}.", + formatReadableSizeWithBinarySuffix(memory_amount), + getNumberOfPhysicalCPUCores(), // on ARM processors it can show only enabled at current moment cores + std::thread::hardware_concurrency()); + sanityChecks(*this); // Initialize global thread pool. Do it before we fetch configs from zookeeper @@ -816,8 +810,6 @@ try Settings::checkNoSettingNamesAtTopLevel(config(), config_path); - const auto memory_amount = getMemoryAmount(); - #if defined(OS_LINUX) std::string executable_path = getExecutablePath(); @@ -916,6 +908,21 @@ try } } } + + int default_oom_score = 0; + +#if !defined(NDEBUG) + /// In debug version on Linux, increase oom score so that clickhouse is killed + /// first, instead of some service. Use a carefully chosen random score of 555: + /// the maximum is 1000, and chromium uses 300 for its tab processes. Ignore + /// whatever errors that occur, because it's just a debugging aid and we don't + /// care if it breaks. + default_oom_score = 555; +#endif + + int oom_score = config().getInt("oom_score", default_oom_score); + if (oom_score) + setOOMScore(oom_score, log); #endif global_context->setRemoteHostFilter(config()); @@ -1013,13 +1020,21 @@ try LOG_TRACE(log, "Initialized DateLUT with time zone '{}'.", DateLUT::instance().getTimeZone()); /// Storage with temporary data for processing of heavy queries. + if (auto temporary_policy = config().getString("tmp_policy", ""); !temporary_policy.empty()) + { + size_t max_size = config().getUInt64("max_temporary_data_on_disk_size", 0); + global_context->setTemporaryStoragePolicy(temporary_policy, max_size); + } + else if (auto temporary_cache = config().getString("temporary_data_in_cache", ""); !temporary_cache.empty()) + { + size_t max_size = config().getUInt64("max_temporary_data_on_disk_size", 0); + global_context->setTemporaryStorageInCache(temporary_cache, max_size); + } + else { std::string temporary_path = config().getString("tmp_path", path / "tmp/"); - std::string temporary_policy = config().getString("tmp_policy", ""); size_t max_size = config().getUInt64("max_temporary_data_on_disk_size", 0); - const VolumePtr & volume = global_context->setTemporaryStorage(temporary_path, temporary_policy, max_size); - for (const DiskPtr & disk : volume->getDisks()) - setupTmpPath(log, disk->getPath()); + global_context->setTemporaryStoragePath(temporary_path, max_size); } /** Directory with 'flags': files indicating temporary settings for the server set by system administrator. @@ -1076,8 +1091,8 @@ try bool continue_if_corrupted = config().getBool("merge_tree_metadata_cache.continue_if_corrupted", false); try { - LOG_DEBUG( - log, "Initializing merge tree metadata cache lru_cache_size:{} continue_if_corrupted:{}", size, continue_if_corrupted); + LOG_DEBUG(log, "Initializing MergeTree metadata cache, lru_cache_size: {} continue_if_corrupted: {}", + ReadableSize(size), continue_if_corrupted); global_context->initializeMergeTreeMetadataCache(path_str + "/" + "rocksdb", size); } catch (...) @@ -1426,7 +1441,7 @@ try } catch (...) { - tryLogCurrentException(log); + tryLogCurrentException(log, "Caught exception while setting up access control."); throw; } @@ -1750,13 +1765,6 @@ try main_config_reloader->start(); access_control.startPeriodicReloading(); - { - LOG_INFO(log, "Available RAM: {}; physical cores: {}; logical cores: {}.", - formatReadableSizeWithBinarySuffix(memory_amount), - getNumberOfPhysicalCPUCores(), // on ARM processors it can show only enabled at current moment cores - std::thread::hardware_concurrency()); - } - /// try to load dictionaries immediately, throw on error and die try { diff --git a/programs/server/config.d/graphite.xml b/programs/server/config.d/graphite.xml new file mode 120000 index 00000000000..69a0411e243 --- /dev/null +++ b/programs/server/config.d/graphite.xml @@ -0,0 +1 @@ +../../../tests/config/config.d/graphite.xml \ No newline at end of file diff --git a/programs/server/config.xml b/programs/server/config.xml index 0cbc3d9339e..5f4a9fb983f 100644 --- a/programs/server/config.xml +++ b/programs/server/config.xml @@ -1464,4 +1464,8 @@ I don't recommend to change this setting. false --> + + diff --git a/rust/CMakeLists.txt b/rust/CMakeLists.txt index bf62fcbb151..1f11423a557 100644 --- a/rust/CMakeLists.txt +++ b/rust/CMakeLists.txt @@ -39,5 +39,21 @@ function(clickhouse_import_crate) corrosion_import_crate(NO_STD ${ARGN}) endfunction() -add_subdirectory (BLAKE3) -add_subdirectory (skim) +# Add crate from the build directory. +# +# Our crates has configuration files: +# - config for cargo (see config.toml.in) +# - and possibly config for build (build.rs.in) +# +# And to avoid overlaps different builds for one source directory, crate will +# be copied from source directory to the binary directory. +file(COPY ".cargo" DESTINATION "${CMAKE_CURRENT_BINARY_DIR}") +function(add_rust_subdirectory src) + set(dst "${CMAKE_CURRENT_BINARY_DIR}/${src}") + message(STATUS "Copy ${src} to ${dst}") + file(COPY "${src}" DESTINATION "${CMAKE_CURRENT_BINARY_DIR}") + add_subdirectory("${dst}" "${dst}") +endfunction() + +add_rust_subdirectory (BLAKE3) +add_rust_subdirectory (skim) diff --git a/rust/skim/CMakeLists.txt b/rust/skim/CMakeLists.txt index e626dd5742e..1e7a43aba7c 100644 --- a/rust/skim/CMakeLists.txt +++ b/rust/skim/CMakeLists.txt @@ -35,15 +35,7 @@ add_custom_command(OUTPUT ${ffi_binding_final_path} DEPENDS cargo-build__ch_rust_skim_rust) add_library(_ch_rust_skim_ffi ${ffi_binding_final_path}) -if (USE_STATIC_LIBRARIES OR NOT SPLIT_SHARED_LIBRARIES) - # static -else() - if (OS_DARWIN) - target_link_libraries(_ch_rust_skim_ffi PRIVATE -Wl,-undefined,dynamic_lookup) - else() - target_link_libraries(_ch_rust_skim_ffi PRIVATE -Wl,--unresolved-symbols=ignore-all) - endif() -endif() + # cxx bridge compiles such bindings set_target_properties(_ch_rust_skim_ffi PROPERTIES COMPILE_FLAGS "${CXXBRIDGE_CXXFLAGS}") diff --git a/rust/skim/build.rs.in b/rust/skim/build.rs.in index 0135c93222f..f0dd49e4130 100644 --- a/rust/skim/build.rs.in +++ b/rust/skim/build.rs.in @@ -5,4 +5,5 @@ fn main() { } build.compile("skim"); println!("cargo:rerun-if-changed=src/lib.rs"); + println!("cargo:rerun-if-changed=.cargo/config.toml"); } diff --git a/rust/skim/include/skim.h b/rust/skim/include/skim.h index 12cd257567b..8148474eba3 100644 --- a/rust/skim/include/skim.h +++ b/rust/skim/include/skim.h @@ -87,4 +87,4 @@ private: } // namespace cxxbridge1 } // namespace rust -::rust::String skim(::std::vector<::std::string> const &words); +::rust::String skim(::std::string const &prefix, ::std::vector<::std::string> const &words); diff --git a/rust/skim/src/lib.rs b/rust/skim/src/lib.rs index 29160329287..90f39cc8382 100644 --- a/rust/skim/src/lib.rs +++ b/rust/skim/src/lib.rs @@ -5,7 +5,7 @@ use cxx::{CxxString, CxxVector}; #[cxx::bridge] mod ffi { extern "Rust" { - fn skim(words: &CxxVector) -> Result; + fn skim(prefix: &CxxString, words: &CxxVector) -> Result; } } @@ -18,7 +18,7 @@ impl SkimItem for Item { } } -fn skim(words: &CxxVector) -> Result { +fn skim(prefix: &CxxString, words: &CxxVector) -> Result { // Let's check is terminal available. To avoid panic. if let Err(err) = TermInfo::from_env() { return Err(format!("{}", err)); @@ -26,6 +26,7 @@ fn skim(words: &CxxVector) -> Result { let options = SkimOptionsBuilder::default() .height(Some("30%")) + .query(Some(prefix.to_str().unwrap())) .tac(true) .tiebreak(Some("-score".to_string())) .build() diff --git a/src/AggregateFunctions/AggregateFunctionAggThrow.cpp b/src/AggregateFunctions/AggregateFunctionAggThrow.cpp index 432b1f39f84..359c6051abb 100644 --- a/src/AggregateFunctions/AggregateFunctionAggThrow.cpp +++ b/src/AggregateFunctions/AggregateFunctionAggThrow.cpp @@ -49,14 +49,16 @@ private: public: AggregateFunctionThrow(const DataTypes & argument_types_, const Array & parameters_, Float64 throw_probability_) - : IAggregateFunctionDataHelper(argument_types_, parameters_), throw_probability(throw_probability_) {} + : IAggregateFunctionDataHelper(argument_types_, parameters_, createResultType()) + , throw_probability(throw_probability_) + {} String getName() const override { return "aggThrow"; } - DataTypePtr getReturnType() const override + static DataTypePtr createResultType() { return std::make_shared(); } diff --git a/src/AggregateFunctions/AggregateFunctionAnalysisOfVariance.h b/src/AggregateFunctions/AggregateFunctionAnalysisOfVariance.h index e891fb191f6..da060ceb18e 100644 --- a/src/AggregateFunctions/AggregateFunctionAnalysisOfVariance.h +++ b/src/AggregateFunctions/AggregateFunctionAnalysisOfVariance.h @@ -37,10 +37,10 @@ class AggregateFunctionAnalysisOfVariance final : public IAggregateFunctionDataH { public: explicit AggregateFunctionAnalysisOfVariance(const DataTypes & arguments, const Array & params) - : IAggregateFunctionDataHelper(arguments, params) + : IAggregateFunctionDataHelper(arguments, params, createResultType()) {} - DataTypePtr getReturnType() const override + DataTypePtr createResultType() const { DataTypes types {std::make_shared>(), std::make_shared>() }; Strings names {"f_statistic", "p_value"}; diff --git a/src/AggregateFunctions/AggregateFunctionArgMinMax.h b/src/AggregateFunctions/AggregateFunctionArgMinMax.h index decb572b019..568b70fe77e 100644 --- a/src/AggregateFunctions/AggregateFunctionArgMinMax.h +++ b/src/AggregateFunctions/AggregateFunctionArgMinMax.h @@ -38,7 +38,6 @@ template class AggregateFunctionArgMinMax final : public IAggregateFunctionDataHelper> { private: - const DataTypePtr & type_res; const DataTypePtr & type_val; const SerializationPtr serialization_res; const SerializationPtr serialization_val; @@ -47,10 +46,9 @@ private: public: AggregateFunctionArgMinMax(const DataTypePtr & type_res_, const DataTypePtr & type_val_) - : Base({type_res_, type_val_}, {}) - , type_res(this->argument_types[0]) + : Base({type_res_, type_val_}, {}, type_res_) , type_val(this->argument_types[1]) - , serialization_res(type_res->getDefaultSerialization()) + , serialization_res(type_res_->getDefaultSerialization()) , serialization_val(type_val->getDefaultSerialization()) { if (!type_val->isComparable()) @@ -63,11 +61,6 @@ public: return StringRef(Data::ValueData_t::name()) == StringRef("min") ? "argMin" : "argMax"; } - DataTypePtr getReturnType() const override - { - return type_res; - } - void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override { if (this->data(place).value.changeIfBetter(*columns[1], row_num, arena)) diff --git a/src/AggregateFunctions/AggregateFunctionArray.h b/src/AggregateFunctions/AggregateFunctionArray.h index c6e29e77318..c0e676c33e7 100644 --- a/src/AggregateFunctions/AggregateFunctionArray.h +++ b/src/AggregateFunctions/AggregateFunctionArray.h @@ -30,7 +30,7 @@ private: public: AggregateFunctionArray(AggregateFunctionPtr nested_, const DataTypes & arguments, const Array & params_) - : IAggregateFunctionHelper(arguments, params_) + : IAggregateFunctionHelper(arguments, params_, createResultType(nested_)) , nested_func(nested_), num_arguments(arguments.size()) { assert(parameters == nested_func->getParameters()); @@ -44,9 +44,9 @@ public: return nested_func->getName() + "Array"; } - DataTypePtr getReturnType() const override + static DataTypePtr createResultType(const AggregateFunctionPtr & nested_) { - return nested_func->getReturnType(); + return nested_->getResultType(); } const IAggregateFunction & getBaseAggregateFunctionWithSameStateRepresentation() const override diff --git a/src/AggregateFunctions/AggregateFunctionAvg.h b/src/AggregateFunctions/AggregateFunctionAvg.h index ee46a40023d..a86c7d042fc 100644 --- a/src/AggregateFunctions/AggregateFunctionAvg.h +++ b/src/AggregateFunctions/AggregateFunctionAvg.h @@ -10,6 +10,7 @@ #include #include #include +#include #include "config.h" @@ -83,10 +84,20 @@ public: using Fraction = AvgFraction; explicit AggregateFunctionAvgBase(const DataTypes & argument_types_, - UInt32 num_scale_ = 0, UInt32 denom_scale_ = 0) - : Base(argument_types_, {}), num_scale(num_scale_), denom_scale(denom_scale_) {} + UInt32 num_scale_ = 0, UInt32 denom_scale_ = 0) + : Base(argument_types_, {}, createResultType()) + , num_scale(num_scale_) + , denom_scale(denom_scale_) + {} - DataTypePtr getReturnType() const override { return std::make_shared>(); } + AggregateFunctionAvgBase(const DataTypes & argument_types_, const DataTypePtr & result_type_, + UInt32 num_scale_ = 0, UInt32 denom_scale_ = 0) + : Base(argument_types_, {}, result_type_) + , num_scale(num_scale_) + , denom_scale(denom_scale_) + {} + + DataTypePtr createResultType() const { return std::make_shared>(); } bool allocatesMemoryInArena() const override { return false; } @@ -135,7 +146,7 @@ public: for (const auto & argument : this->argument_types) can_be_compiled &= canBeNativeType(*argument); - auto return_type = getReturnType(); + auto return_type = this->getResultType(); can_be_compiled &= canBeNativeType(*return_type); return can_be_compiled; diff --git a/src/AggregateFunctions/AggregateFunctionBitwise.h b/src/AggregateFunctions/AggregateFunctionBitwise.h index b8d3bc79007..6c94a72bf32 100644 --- a/src/AggregateFunctions/AggregateFunctionBitwise.h +++ b/src/AggregateFunctions/AggregateFunctionBitwise.h @@ -97,11 +97,12 @@ class AggregateFunctionBitwise final : public IAggregateFunctionDataHelper>({type}, {}) {} + : IAggregateFunctionDataHelper>({type}, {}, createResultType()) + {} String getName() const override { return Data::name(); } - DataTypePtr getReturnType() const override + static DataTypePtr createResultType() { return std::make_shared>(); } @@ -137,7 +138,7 @@ public: bool isCompilable() const override { - auto return_type = getReturnType(); + auto return_type = this->getResultType(); return canBeNativeType(*return_type); } @@ -151,7 +152,7 @@ public: { llvm::IRBuilder<> & b = static_cast &>(builder); - auto * return_type = toNativeType(b, getReturnType()); + auto * return_type = toNativeType(b, this->getResultType()); auto * value_ptr = aggregate_data_ptr; auto * value = b.CreateLoad(return_type, value_ptr); @@ -166,7 +167,7 @@ public: { llvm::IRBuilder<> & b = static_cast &>(builder); - auto * return_type = toNativeType(b, getReturnType()); + auto * return_type = toNativeType(b, this->getResultType()); auto * value_dst_ptr = aggregate_data_dst_ptr; auto * value_dst = b.CreateLoad(return_type, value_dst_ptr); @@ -183,7 +184,7 @@ public: { llvm::IRBuilder<> & b = static_cast &>(builder); - auto * return_type = toNativeType(b, getReturnType()); + auto * return_type = toNativeType(b, this->getResultType()); auto * value_ptr = aggregate_data_ptr; return b.CreateLoad(return_type, value_ptr); diff --git a/src/AggregateFunctions/AggregateFunctionBoundingRatio.h b/src/AggregateFunctions/AggregateFunctionBoundingRatio.h index 34e3fa2f747..8fca88889b8 100644 --- a/src/AggregateFunctions/AggregateFunctionBoundingRatio.h +++ b/src/AggregateFunctions/AggregateFunctionBoundingRatio.h @@ -112,7 +112,7 @@ public: } explicit AggregateFunctionBoundingRatio(const DataTypes & arguments) - : IAggregateFunctionDataHelper(arguments, {}) + : IAggregateFunctionDataHelper(arguments, {}, std::make_shared()) { const auto * x_arg = arguments.at(0).get(); const auto * y_arg = arguments.at(1).get(); @@ -122,11 +122,6 @@ public: ErrorCodes::BAD_ARGUMENTS); } - DataTypePtr getReturnType() const override - { - return std::make_shared(); - } - bool allocatesMemoryInArena() const override { return false; } void add(AggregateDataPtr __restrict place, const IColumn ** columns, const size_t row_num, Arena *) const override diff --git a/src/AggregateFunctions/AggregateFunctionCategoricalInformationValue.cpp b/src/AggregateFunctions/AggregateFunctionCategoricalInformationValue.cpp index 93b5de0c5ab..65dce832789 100644 --- a/src/AggregateFunctions/AggregateFunctionCategoricalInformationValue.cpp +++ b/src/AggregateFunctions/AggregateFunctionCategoricalInformationValue.cpp @@ -46,9 +46,9 @@ private: } public: - AggregateFunctionCategoricalIV(const DataTypes & arguments_, const Array & params_) : - IAggregateFunctionHelper{arguments_, params_}, - category_count{arguments_.size() - 1} + AggregateFunctionCategoricalIV(const DataTypes & arguments_, const Array & params_) + : IAggregateFunctionHelper{arguments_, params_, createResultType()} + , category_count{arguments_.size() - 1} { // notice: argument types has been checked before } @@ -121,7 +121,7 @@ public: buf.readStrict(place, sizeOfData()); } - DataTypePtr getReturnType() const override + static DataTypePtr createResultType() { return std::make_shared( std::make_shared>()); diff --git a/src/AggregateFunctions/AggregateFunctionCount.h b/src/AggregateFunctions/AggregateFunctionCount.h index 6e2c86f065b..91409463409 100644 --- a/src/AggregateFunctions/AggregateFunctionCount.h +++ b/src/AggregateFunctions/AggregateFunctionCount.h @@ -39,11 +39,13 @@ namespace ErrorCodes class AggregateFunctionCount final : public IAggregateFunctionDataHelper { public: - explicit AggregateFunctionCount(const DataTypes & argument_types_) : IAggregateFunctionDataHelper(argument_types_, {}) {} + explicit AggregateFunctionCount(const DataTypes & argument_types_) + : IAggregateFunctionDataHelper(argument_types_, {}, createResultType()) + {} String getName() const override { return "count"; } - DataTypePtr getReturnType() const override + static DataTypePtr createResultType() { return std::make_shared(); } @@ -167,7 +169,7 @@ public: { llvm::IRBuilder<> & b = static_cast &>(builder); - auto * return_type = toNativeType(b, getReturnType()); + auto * return_type = toNativeType(b, this->getResultType()); auto * count_value_ptr = aggregate_data_ptr; auto * count_value = b.CreateLoad(return_type, count_value_ptr); @@ -180,7 +182,7 @@ public: { llvm::IRBuilder<> & b = static_cast &>(builder); - auto * return_type = toNativeType(b, getReturnType()); + auto * return_type = toNativeType(b, this->getResultType()); auto * count_value_dst_ptr = aggregate_data_dst_ptr; auto * count_value_dst = b.CreateLoad(return_type, count_value_dst_ptr); @@ -197,7 +199,7 @@ public: { llvm::IRBuilder<> & b = static_cast &>(builder); - auto * return_type = toNativeType(b, getReturnType()); + auto * return_type = toNativeType(b, this->getResultType()); auto * count_value_ptr = aggregate_data_ptr; return b.CreateLoad(return_type, count_value_ptr); @@ -214,7 +216,7 @@ class AggregateFunctionCountNotNullUnary final { public: AggregateFunctionCountNotNullUnary(const DataTypePtr & argument, const Array & params) - : IAggregateFunctionDataHelper({argument}, params) + : IAggregateFunctionDataHelper({argument}, params, createResultType()) { if (!argument->isNullable()) throw Exception("Logical error: not Nullable data type passed to AggregateFunctionCountNotNullUnary", ErrorCodes::LOGICAL_ERROR); @@ -222,7 +224,7 @@ public: String getName() const override { return "count"; } - DataTypePtr getReturnType() const override + static DataTypePtr createResultType() { return std::make_shared(); } @@ -311,7 +313,7 @@ public: { llvm::IRBuilder<> & b = static_cast &>(builder); - auto * return_type = toNativeType(b, getReturnType()); + auto * return_type = toNativeType(b, this->getResultType()); auto * is_null_value = b.CreateExtractValue(values[0], {1}); auto * increment_value = b.CreateSelect(is_null_value, llvm::ConstantInt::get(return_type, 0), llvm::ConstantInt::get(return_type, 1)); @@ -327,7 +329,7 @@ public: { llvm::IRBuilder<> & b = static_cast &>(builder); - auto * return_type = toNativeType(b, getReturnType()); + auto * return_type = toNativeType(b, this->getResultType()); auto * count_value_dst_ptr = aggregate_data_dst_ptr; auto * count_value_dst = b.CreateLoad(return_type, count_value_dst_ptr); @@ -344,7 +346,7 @@ public: { llvm::IRBuilder<> & b = static_cast &>(builder); - auto * return_type = toNativeType(b, getReturnType()); + auto * return_type = toNativeType(b, this->getResultType()); auto * count_value_ptr = aggregate_data_ptr; return b.CreateLoad(return_type, count_value_ptr); diff --git a/src/AggregateFunctions/AggregateFunctionDeltaSum.h b/src/AggregateFunctions/AggregateFunctionDeltaSum.h index 36d0ef55346..199d2706d3a 100644 --- a/src/AggregateFunctions/AggregateFunctionDeltaSum.h +++ b/src/AggregateFunctions/AggregateFunctionDeltaSum.h @@ -31,7 +31,7 @@ class AggregationFunctionDeltaSum final { public: AggregationFunctionDeltaSum(const DataTypes & arguments, const Array & params) - : IAggregateFunctionDataHelper, AggregationFunctionDeltaSum>{arguments, params} + : IAggregateFunctionDataHelper, AggregationFunctionDeltaSum>{arguments, params, createResultType()} {} AggregationFunctionDeltaSum() @@ -40,7 +40,7 @@ public: String getName() const override { return "deltaSum"; } - DataTypePtr getReturnType() const override { return std::make_shared>(); } + static DataTypePtr createResultType() { return std::make_shared>(); } bool allocatesMemoryInArena() const override { return false; } diff --git a/src/AggregateFunctions/AggregateFunctionDeltaSumTimestamp.h b/src/AggregateFunctions/AggregateFunctionDeltaSumTimestamp.h index a311910de7f..5ca07bb0bdf 100644 --- a/src/AggregateFunctions/AggregateFunctionDeltaSumTimestamp.h +++ b/src/AggregateFunctions/AggregateFunctionDeltaSumTimestamp.h @@ -38,7 +38,7 @@ public: : IAggregateFunctionDataHelper< AggregationFunctionDeltaSumTimestampData, AggregationFunctionDeltaSumTimestamp - >{arguments, params} + >{arguments, params, createResultType()} {} AggregationFunctionDeltaSumTimestamp() @@ -52,7 +52,7 @@ public: String getName() const override { return "deltaSumTimestamp"; } - DataTypePtr getReturnType() const override { return std::make_shared>(); } + static DataTypePtr createResultType() { return std::make_shared>(); } void NO_SANITIZE_UNDEFINED ALWAYS_INLINE add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override { diff --git a/src/AggregateFunctions/AggregateFunctionDistinct.h b/src/AggregateFunctions/AggregateFunctionDistinct.h index 2d7362ba4cc..e09e0ef621d 100644 --- a/src/AggregateFunctions/AggregateFunctionDistinct.h +++ b/src/AggregateFunctions/AggregateFunctionDistinct.h @@ -168,7 +168,7 @@ private: public: AggregateFunctionDistinct(AggregateFunctionPtr nested_func_, const DataTypes & arguments, const Array & params_) - : IAggregateFunctionDataHelper(arguments, params_) + : IAggregateFunctionDataHelper(arguments, params_, nested_func_->getResultType()) , nested_func(nested_func_) , arguments_num(arguments.size()) { @@ -255,11 +255,6 @@ public: return nested_func->getName() + "Distinct"; } - DataTypePtr getReturnType() const override - { - return nested_func->getReturnType(); - } - bool allocatesMemoryInArena() const override { return true; diff --git a/src/AggregateFunctions/AggregateFunctionEntropy.h b/src/AggregateFunctions/AggregateFunctionEntropy.h index a51dd0537bf..9321b5c5825 100644 --- a/src/AggregateFunctions/AggregateFunctionEntropy.h +++ b/src/AggregateFunctions/AggregateFunctionEntropy.h @@ -92,14 +92,14 @@ private: public: explicit AggregateFunctionEntropy(const DataTypes & argument_types_) - : IAggregateFunctionDataHelper, AggregateFunctionEntropy>(argument_types_, {}) + : IAggregateFunctionDataHelper, AggregateFunctionEntropy>(argument_types_, {}, createResultType()) , num_args(argument_types_.size()) { } String getName() const override { return "entropy"; } - DataTypePtr getReturnType() const override + static DataTypePtr createResultType() { return std::make_shared>(); } diff --git a/src/AggregateFunctions/AggregateFunctionExponentialMovingAverage.cpp b/src/AggregateFunctions/AggregateFunctionExponentialMovingAverage.cpp index 2c055c37cca..bb48b3416be 100644 --- a/src/AggregateFunctions/AggregateFunctionExponentialMovingAverage.cpp +++ b/src/AggregateFunctions/AggregateFunctionExponentialMovingAverage.cpp @@ -29,7 +29,7 @@ private: public: AggregateFunctionExponentialMovingAverage(const DataTypes & argument_types_, const Array & params) - : IAggregateFunctionDataHelper(argument_types_, params) + : IAggregateFunctionDataHelper(argument_types_, params, createResultType()) { if (params.size() != 1) throw Exception{"Aggregate function " + getName() + " requires exactly one parameter: half decay time.", @@ -43,7 +43,7 @@ public: return "exponentialMovingAverage"; } - DataTypePtr getReturnType() const override + static DataTypePtr createResultType() { return std::make_shared>(); } diff --git a/src/AggregateFunctions/AggregateFunctionFactory.cpp b/src/AggregateFunctions/AggregateFunctionFactory.cpp index a8385ad8b59..38cc355b857 100644 --- a/src/AggregateFunctions/AggregateFunctionFactory.cpp +++ b/src/AggregateFunctions/AggregateFunctionFactory.cpp @@ -72,9 +72,12 @@ AggregateFunctionPtr AggregateFunctionFactory::get( { auto types_without_low_cardinality = convertLowCardinalityTypesToNested(argument_types); - /// If one of the types is Nullable, we apply aggregate function combinator "Null". - - if (std::any_of(types_without_low_cardinality.begin(), types_without_low_cardinality.end(), + /// If one of the types is Nullable, we apply aggregate function combinator "Null" if it's not window function. + /// Window functions are not real aggregate functions. Applying combinators doesn't make sense for them, + /// they must handle the nullability themselves + auto properties = tryGetPropertiesImpl(name); + bool is_window_function = properties.has_value() && properties->is_window_function; + if (!is_window_function && std::any_of(types_without_low_cardinality.begin(), types_without_low_cardinality.end(), [](const auto & type) { return type->isNullable(); })) { AggregateFunctionCombinatorPtr combinator = AggregateFunctionCombinatorFactory::instance().tryFindSuffix("Null"); diff --git a/src/AggregateFunctions/AggregateFunctionFlameGraph.cpp b/src/AggregateFunctions/AggregateFunctionFlameGraph.cpp deleted file mode 100644 index 5fc6b21926e..00000000000 --- a/src/AggregateFunctions/AggregateFunctionFlameGraph.cpp +++ /dev/null @@ -1,647 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -namespace DB -{ -namespace ErrorCodes -{ - extern const int FUNCTION_NOT_ALLOWED; - extern const int NOT_IMPLEMENTED; - extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; - extern const int ILLEGAL_TYPE_OF_ARGUMENT; -} - -struct AggregateFunctionFlameGraphTree -{ - struct ListNode; - - struct TreeNode - { - TreeNode * parent = nullptr; - ListNode * children = nullptr; - UInt64 ptr = 0; - size_t allocated = 0; - }; - - struct ListNode - { - ListNode * next = nullptr; - TreeNode * child = nullptr; - }; - - TreeNode root; - - static ListNode * createChild(TreeNode * parent, UInt64 ptr, Arena * arena) - { - - ListNode * list_node = reinterpret_cast(arena->alloc(sizeof(ListNode))); - TreeNode * tree_node = reinterpret_cast(arena->alloc(sizeof(TreeNode))); - - list_node->child = tree_node; - list_node->next = nullptr; - - tree_node->parent =parent; - tree_node->children = nullptr; - tree_node->ptr = ptr; - tree_node->allocated = 0; - - return list_node; - } - - TreeNode * find(const UInt64 * stack, size_t stack_size, Arena * arena) - { - TreeNode * node = &root; - for (size_t i = 0; i < stack_size; ++i) - { - UInt64 ptr = stack[i]; - if (ptr == 0) - break; - - if (!node->children) - { - node->children = createChild(node, ptr, arena); - node = node->children->child; - } - else - { - ListNode * list = node->children; - while (list->child->ptr != ptr && list->next) - list = list->next; - - if (list->child->ptr != ptr) - { - list->next = createChild(node, ptr, arena); - list = list->next; - } - - node = list->child; - } - } - - return node; - } - - static void append(DB::PaddedPODArray & values, DB::PaddedPODArray & offsets, std::vector & frame) - { - UInt64 prev = offsets.empty() ? 0 : offsets.back(); - offsets.push_back(prev + frame.size()); - for (UInt64 val : frame) - values.push_back(val); - } - - struct Trace - { - using Frames = std::vector; - - Frames frames; - - /// The total number of bytes allocated for traces with the same prefix. - size_t allocated_total = 0; - /// This counter is relevant in case we want to filter some traces with small amount of bytes. - /// It shows the total number of bytes for *filtered* traces with the same prefix. - /// This is the value which is used in flamegraph. - size_t allocated_self = 0; - }; - - using Traces = std::vector; - - Traces dump(size_t max_depth, size_t min_bytes) const - { - Traces traces; - Trace::Frames frames; - std::vector allocated_total; - std::vector allocated_self; - std::vector nodes; - - nodes.push_back(root.children); - allocated_total.push_back(root.allocated); - allocated_self.push_back(root.allocated); - - while (!nodes.empty()) - { - if (nodes.back() == nullptr) - { - traces.push_back({frames, allocated_total.back(), allocated_self.back()}); - - nodes.pop_back(); - allocated_total.pop_back(); - allocated_self.pop_back(); - - /// We don't have root's frame so framers are empty in the end. - if (!frames.empty()) - frames.pop_back(); - - continue; - } - - TreeNode * current = nodes.back()->child; - nodes.back() = nodes.back()->next; - - bool enough_bytes = current->allocated >= min_bytes; - bool enough_depth = max_depth == 0 || nodes.size() < max_depth; - - if (enough_bytes) - { - frames.push_back(current->ptr); - allocated_self.back() -= current->allocated; - - if (enough_depth) - { - allocated_total.push_back(current->allocated); - allocated_self.push_back(current->allocated); - nodes.push_back(current->children); - } - else - { - traces.push_back({frames, current->allocated, current->allocated}); - frames.pop_back(); - } - } - } - - return traces; - } -}; - -static void insertData(DB::PaddedPODArray & chars, DB::PaddedPODArray & offsets, const char * pos, size_t length) -{ - const size_t old_size = chars.size(); - const size_t new_size = old_size + length + 1; - - chars.resize(new_size); - if (length) - memcpy(chars.data() + old_size, pos, length); - chars[old_size + length] = 0; - offsets.push_back(new_size); -} - -/// Split str by line feed and write as separate row to ColumnString. -static void fillColumn(DB::PaddedPODArray & chars, DB::PaddedPODArray & offsets, const std::string & str) -{ - size_t start = 0; - size_t end = 0; - size_t size = str.size(); - - while (end < size) - { - if (str[end] == '\n') - { - insertData(chars, offsets, str.data() + start, end - start); - start = end + 1; - } - - ++end; - } - - if (start < end) - insertData(chars, offsets, str.data() + start, end - start); -} - -void dumpFlameGraph( - const AggregateFunctionFlameGraphTree::Traces & traces, - DB::PaddedPODArray & chars, - DB::PaddedPODArray & offsets) -{ - DB::WriteBufferFromOwnString out; - - std::unordered_map mapping; - -#if defined(__ELF__) && !defined(OS_FREEBSD) - auto symbol_index_ptr = DB::SymbolIndex::instance(); - const DB::SymbolIndex & symbol_index = *symbol_index_ptr; -#endif - - for (const auto & trace : traces) - { - if (trace.allocated_self == 0) - continue; - - for (size_t i = 0; i < trace.frames.size(); ++i) - { - if (i) - out << ";"; - - const void * ptr = reinterpret_cast(trace.frames[i]); - -#if defined(__ELF__) && !defined(OS_FREEBSD) - if (const auto * symbol = symbol_index.findSymbol(ptr)) - writeString(demangle(symbol->name), out); - else - DB::writePointerHex(ptr, out); -#else - DB::writePointerHex(ptr, out); -#endif - } - - out << ' ' << trace.allocated_self << "\n"; - } - - fillColumn(chars, offsets, out.str()); -} - -struct AggregateFunctionFlameGraphData -{ - struct Entry - { - AggregateFunctionFlameGraphTree::TreeNode * trace; - UInt64 size; - Entry * next = nullptr; - }; - - struct Pair - { - Entry * allocation = nullptr; - Entry * deallocation = nullptr; - }; - - using Entries = HashMap; - - AggregateFunctionFlameGraphTree tree; - Entries entries; - Entry * free_list = nullptr; - - Entry * alloc(Arena * arena) - { - if (free_list) - { - auto * res = free_list; - free_list = free_list->next; - return res; - } - - return reinterpret_cast(arena->alloc(sizeof(Entry))); - } - - void release(Entry * entry) - { - entry->next = free_list; - free_list = entry; - } - - static void track(Entry * allocation) - { - auto * node = allocation->trace; - while (node) - { - node->allocated += allocation->size; - node = node->parent; - } - } - - static void untrack(Entry * allocation) - { - auto * node = allocation->trace; - while (node) - { - node->allocated -= allocation->size; - node = node->parent; - } - } - - static Entry * tryFindMatchAndRemove(Entry *& list, UInt64 size) - { - if (!list) - return nullptr; - - if (list->size == size) - { - Entry * entry = list; - list = list->next; - return entry; - } - else - { - Entry * parent = list; - while (parent->next && parent->next->size != size) - parent = parent->next; - - if (parent->next && parent->next->size == size) - { - Entry * entry = parent->next; - parent->next = entry->next; - return entry; - } - - return nullptr; - } - } - - void add(UInt64 ptr, Int64 size, const UInt64 * stack, size_t stack_size, Arena * arena) - { - /// In case if argument is nullptr, only track allocations. - if (ptr == 0) - { - if (size > 0) - { - auto * node = tree.find(stack, stack_size, arena); - Entry entry{.trace = node, .size = UInt64(size)}; - track(&entry); - } - - return; - } - - auto & place = entries[ptr]; - if (size > 0) - { - if (auto * deallocation = tryFindMatchAndRemove(place.deallocation, size)) - { - release(deallocation); - } - else - { - auto * node = tree.find(stack, stack_size, arena); - - auto * allocation = alloc(arena); - allocation->size = UInt64(size); - allocation->trace = node; - - track(allocation); - - allocation->next = place.allocation; - place.allocation = allocation; - } - } - else if (size < 0) - { - UInt64 abs_size = -size; - if (auto * allocation = tryFindMatchAndRemove(place.allocation, abs_size)) - { - untrack(allocation); - release(allocation); - } - else - { - auto * deallocation = alloc(arena); - deallocation->size = abs_size; - - deallocation->next = place.deallocation; - place.deallocation = deallocation; - } - } - } - - void merge(const AggregateFunctionFlameGraphTree & other_tree, Arena * arena) - { - AggregateFunctionFlameGraphTree::Trace::Frames frames; - std::vector nodes; - - nodes.push_back(other_tree.root.children); - - while (!nodes.empty()) - { - if (nodes.back() == nullptr) - { - nodes.pop_back(); - - /// We don't have root's frame so framers are empty in the end. - if (!frames.empty()) - frames.pop_back(); - - continue; - } - - AggregateFunctionFlameGraphTree::TreeNode * current = nodes.back()->child; - nodes.back() = nodes.back()->next; - - frames.push_back(current->ptr); - - if (current->children) - nodes.push_back(current->children); - else - { - if (current->allocated) - add(0, current->allocated, frames.data(), frames.size(), arena); - - frames.pop_back(); - } - } - } - - void merge(const AggregateFunctionFlameGraphData & other, Arena * arena) - { - AggregateFunctionFlameGraphTree::Trace::Frames frames; - for (const auto & entry : other.entries) - { - for (auto * allocation = entry.value.second.allocation; allocation; allocation = allocation->next) - { - frames.clear(); - const auto * node = allocation->trace; - while (node->ptr) - { - frames.push_back(node->ptr); - node = node->parent; - } - - std::reverse(frames.begin(), frames.end()); - add(entry.value.first, allocation->size, frames.data(), frames.size(), arena); - untrack(allocation); - } - - for (auto * deallocation = entry.value.second.deallocation; deallocation; deallocation = deallocation->next) - { - add(entry.value.first, -Int64(deallocation->size), nullptr, 0, arena); - } - } - - merge(other.tree, arena); - } - - void dumpFlameGraph( - DB::PaddedPODArray & chars, - DB::PaddedPODArray & offsets, - size_t max_depth, size_t min_bytes) const - { - DB::dumpFlameGraph(tree.dump(max_depth, min_bytes), chars, offsets); - } -}; - -/// Aggregate function which builds a flamegraph using the list of stacktraces. -/// The output is an array of strings which can be used by flamegraph.pl util. -/// See https://github.com/brendangregg/FlameGraph -/// -/// Syntax: flameGraph(traces, [size = 1], [ptr = 0]) -/// - trace : Array(UInt64), a stacktrace -/// - size : Int64, an allocation size (for memory profiling) -/// - ptr : UInt64, an allocation address -/// In case if ptr != 0, a flameGraph will map allocations (size > 0) and deallocations (size < 0) with the same size and ptr. -/// Only allocations which were not freed are shown. Not mapped deallocations are ignored. -/// -/// Usage: -/// -/// * Build a flamegraph based on CPU query profiler -/// set query_profiler_cpu_time_period_ns=10000000; -/// SELECT SearchPhrase, COUNT(DISTINCT UserID) AS u FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10; -/// clickhouse client --allow_introspection_functions=1 -/// -q "select arrayJoin(flameGraph(arrayReverse(trace))) from system.trace_log where trace_type = 'CPU' and query_id = 'xxx'" -/// | ~/dev/FlameGraph/flamegraph.pl > flame_cpu.svg -/// -/// * Build a flamegraph based on memory query profiler, showing all allocations -/// set memory_profiler_sample_probability=1, max_untracked_memory=1; -/// SELECT SearchPhrase, COUNT(DISTINCT UserID) AS u FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10; -/// clickhouse client --allow_introspection_functions=1 -/// -q "select arrayJoin(flameGraph(trace, size)) from system.trace_log where trace_type = 'MemorySample' and query_id = 'xxx'" -/// | ~/dev/FlameGraph/flamegraph.pl --countname=bytes --color=mem > flame_mem.svg -/// -/// * Build a flamegraph based on memory query profiler, showing allocations which were not deallocated in query context -/// set memory_profiler_sample_probability=1, max_untracked_memory=1, use_uncompressed_cache=1, merge_tree_max_rows_to_use_cache=100000000000, merge_tree_max_bytes_to_use_cache=1000000000000; -/// SELECT SearchPhrase, COUNT(DISTINCT UserID) AS u FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10; -/// clickhouse client --allow_introspection_functions=1 -/// -q "select arrayJoin(flameGraph(trace, size, ptr)) from system.trace_log where trace_type = 'MemorySample' and query_id = 'xxx'" -/// | ~/dev/FlameGraph/flamegraph.pl --countname=bytes --color=mem > flame_mem_untracked.svg -/// -/// * Build a flamegraph based on memory query profiler, showing active allocations at the fixed point of time -/// set memory_profiler_sample_probability=1, max_untracked_memory=1; -/// SELECT SearchPhrase, COUNT(DISTINCT UserID) AS u FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10; -/// 1. Memory usage per second -/// select event_time, m, formatReadableSize(max(s) as m) from (select event_time, sum(size) over (order by event_time) as s from system.trace_log where query_id = 'xxx' and trace_type = 'MemorySample') group by event_time order by event_time; -/// 2. Find a time point with maximal memory usage -/// select argMax(event_time, s), max(s) from (select event_time, sum(size) over (order by event_time) as s from system.trace_log where query_id = 'xxx' and trace_type = 'MemorySample'); -/// 3. Fix active allocations at fixed point of time -/// clickhouse client --allow_introspection_functions=1 -/// -q "select arrayJoin(flameGraph(trace, size, ptr)) from (select * from system.trace_log where trace_type = 'MemorySample' and query_id = 'xxx' and event_time <= 'yyy' order by event_time)" -/// | ~/dev/FlameGraph/flamegraph.pl --countname=bytes --color=mem > flame_mem_time_point_pos.svg -/// 4. Find deallocations at fixed point of time -/// clickhouse client --allow_introspection_functions=1 -/// -q "select arrayJoin(flameGraph(trace, -size, ptr)) from (select * from system.trace_log where trace_type = 'MemorySample' and query_id = 'xxx' and event_time > 'yyy' order by event_time desc)" -/// | ~/dev/FlameGraph/flamegraph.pl --countname=bytes --color=mem > flame_mem_time_point_neg.svg -class AggregateFunctionFlameGraph final : public IAggregateFunctionDataHelper -{ -public: - explicit AggregateFunctionFlameGraph(const DataTypes & argument_types_) - : IAggregateFunctionDataHelper(argument_types_, {}) - {} - - String getName() const override { return "flameGraph"; } - - DataTypePtr getReturnType() const override - { - return std::make_shared(std::make_shared()); - } - - bool allocatesMemoryInArena() const override { return true; } - - void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override - { - const auto * trace = typeid_cast(columns[0]); - - const auto & trace_offsets = trace->getOffsets(); - const auto & trace_values = typeid_cast(&trace->getData())->getData(); - UInt64 prev_offset = 0; - if (row_num) - prev_offset = trace_offsets[row_num - 1]; - UInt64 trace_size = trace_offsets[row_num] - prev_offset; - - Int64 allocated = 1; - if (argument_types.size() >= 2) - { - const auto & sizes = typeid_cast(columns[1])->getData(); - allocated = sizes[row_num]; - } - - UInt64 ptr = 0; - if (argument_types.size() >= 3) - { - const auto & ptrs = typeid_cast(columns[2])->getData(); - ptr = ptrs[row_num]; - } - - this->data(place).add(ptr, allocated, trace_values.data() + prev_offset, trace_size, arena); - } - - void addManyDefaults( - AggregateDataPtr __restrict /*place*/, - const IColumn ** /*columns*/, - size_t /*length*/, - Arena * /*arena*/) const override - { - } - - void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override - { - this->data(place).merge(this->data(rhs), arena); - } - - void serialize(ConstAggregateDataPtr __restrict, WriteBuffer &, std::optional /* version */) const override - { - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Serialization for function flameGraph is not implemented."); - } - - void deserialize(AggregateDataPtr __restrict, ReadBuffer &, std::optional /* version */, Arena *) const override - { - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Deserialization for function flameGraph is not implemented."); - } - - void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override - { - auto & array = assert_cast(to); - auto & str = assert_cast(array.getData()); - - this->data(place).dumpFlameGraph(str.getChars(), str.getOffsets(), 0, 0); - - array.getOffsets().push_back(str.size()); - } -}; - -static void check(const std::string & name, const DataTypes & argument_types, const Array & params) -{ - assertNoParameters(name, params); - - if (argument_types.empty() || argument_types.size() > 3) - throw Exception( - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, - "Aggregate function {} requires 1 to 3 arguments : trace, [size = 1], [ptr = 0]", - name); - - auto ptr_type = std::make_shared(); - auto trace_type = std::make_shared(ptr_type); - auto size_type = std::make_shared(); - - if (!argument_types[0]->equals(*trace_type)) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, - "First argument (trace) for function {} must be Array(UInt64), but it has type {}", - name, argument_types[0]->getName()); - - if (argument_types.size() >= 2 && !argument_types[1]->equals(*size_type)) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, - "Second argument (size) for function {} must be Int64, but it has type {}", - name, argument_types[1]->getName()); - - if (argument_types.size() >= 3 && !argument_types[2]->equals(*ptr_type)) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, - "Third argument (ptr) for function {} must be UInt64, but it has type {}", - name, argument_types[2]->getName()); -} - -AggregateFunctionPtr createAggregateFunctionFlameGraph(const std::string & name, const DataTypes & argument_types, const Array & params, const Settings * settings) -{ - if (!settings->allow_introspection_functions) - throw Exception(ErrorCodes::FUNCTION_NOT_ALLOWED, - "Introspection functions are disabled, because setting 'allow_introspection_functions' is set to 0"); - - check(name, argument_types, params); - return std::make_shared(argument_types); -} - -void registerAggregateFunctionFlameGraph(AggregateFunctionFactory & factory) -{ - AggregateFunctionProperties properties = { .returns_default_when_only_null = true, .is_order_dependent = true }; - - factory.registerFunction("flameGraph", { createAggregateFunctionFlameGraph, properties }); -} - -} diff --git a/src/AggregateFunctions/AggregateFunctionForEach.h b/src/AggregateFunctions/AggregateFunctionForEach.h index c91c4dd7c86..69102424bf7 100644 --- a/src/AggregateFunctions/AggregateFunctionForEach.h +++ b/src/AggregateFunctions/AggregateFunctionForEach.h @@ -107,7 +107,7 @@ private: public: AggregateFunctionForEach(AggregateFunctionPtr nested_, const DataTypes & arguments, const Array & params_) - : IAggregateFunctionDataHelper(arguments, params_) + : IAggregateFunctionDataHelper(arguments, params_, createResultType(nested_)) , nested_func(nested_), num_arguments(arguments.size()) { nested_size_of_data = nested_func->sizeOfData(); @@ -125,9 +125,9 @@ public: return nested_func->getName() + "ForEach"; } - DataTypePtr getReturnType() const override + static DataTypePtr createResultType(AggregateFunctionPtr nested_) { - return std::make_shared(nested_func->getReturnType()); + return std::make_shared(nested_->getResultType()); } bool isVersioned() const override diff --git a/src/AggregateFunctions/AggregateFunctionGroupArray.cpp b/src/AggregateFunctions/AggregateFunctionGroupArray.cpp index 85075d5a4d6..7619904f2f3 100644 --- a/src/AggregateFunctions/AggregateFunctionGroupArray.cpp +++ b/src/AggregateFunctions/AggregateFunctionGroupArray.cpp @@ -40,15 +40,10 @@ inline AggregateFunctionPtr createAggregateFunctionGroupArrayImpl(const DataType return std::make_shared>(argument_type, parameters, std::forward(args)...); return std::make_shared>(argument_type, parameters, std::forward(args)...); - - // Link list implementation doesn't show noticeable performance improvement - // if (which.idx == TypeIndex::String) - // return std::make_shared>(argument_type, std::forward(args)...); - - // return std::make_shared>(argument_type, std::forward(args)...); } +template AggregateFunctionPtr createAggregateFunctionGroupArray( const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings *) { @@ -79,9 +74,13 @@ AggregateFunctionPtr createAggregateFunctionGroupArray( ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); if (!limit_size) - return createAggregateFunctionGroupArrayImpl>(argument_types[0], parameters); + { + if (Tlast) + throw Exception("groupArrayLast make sense only with max_elems (groupArrayLast(max_elems)())", ErrorCodes::BAD_ARGUMENTS); + return createAggregateFunctionGroupArrayImpl>(argument_types[0], parameters); + } else - return createAggregateFunctionGroupArrayImpl>(argument_types[0], parameters, max_elems); + return createAggregateFunctionGroupArrayImpl>(argument_types[0], parameters, max_elems); } AggregateFunctionPtr createAggregateFunctionGroupArraySample( @@ -114,7 +113,7 @@ AggregateFunctionPtr createAggregateFunctionGroupArraySample( else seed = thread_local_rng(); - return createAggregateFunctionGroupArrayImpl>(argument_types[0], parameters, max_elems, seed); + return createAggregateFunctionGroupArrayImpl>(argument_types[0], parameters, max_elems, seed); } } @@ -124,8 +123,9 @@ void registerAggregateFunctionGroupArray(AggregateFunctionFactory & factory) { AggregateFunctionProperties properties = { .returns_default_when_only_null = false, .is_order_dependent = true }; - factory.registerFunction("groupArray", { createAggregateFunctionGroupArray, properties }); + factory.registerFunction("groupArray", { createAggregateFunctionGroupArray, properties }); factory.registerFunction("groupArraySample", { createAggregateFunctionGroupArraySample, properties }); + factory.registerFunction("groupArrayLast", { createAggregateFunctionGroupArray, properties }); } } diff --git a/src/AggregateFunctions/AggregateFunctionGroupArray.h b/src/AggregateFunctions/AggregateFunctionGroupArray.h index 89b382de819..97d9c2b4995 100644 --- a/src/AggregateFunctions/AggregateFunctionGroupArray.h +++ b/src/AggregateFunctions/AggregateFunctionGroupArray.h @@ -37,24 +37,25 @@ enum class Sampler { NONE, RNG, - DETERMINATOR // TODO }; -template +template struct GroupArrayTrait { static constexpr bool has_limit = Thas_limit; + static constexpr bool last = Tlast; static constexpr Sampler sampler = Tsampler; }; template static constexpr const char * getNameByTrait() { + if (Trait::last) + return "groupArrayLast"; if (Trait::sampler == Sampler::NONE) return "groupArray"; else if (Trait::sampler == Sampler::RNG) return "groupArraySample"; - // else if (Trait::sampler == Sampler::DETERMINATOR) // TODO UNREACHABLE(); } @@ -100,6 +101,8 @@ struct GroupArrayNumericData using Allocator = MixedAlignedArenaAllocator; using Array = PODArray; + // For groupArrayLast() + size_t total_values = 0; Array value; }; @@ -121,7 +124,7 @@ public: explicit GroupArrayNumericImpl( const DataTypePtr & data_type_, const Array & parameters_, UInt64 max_elems_ = std::numeric_limits::max(), UInt64 seed_ = 123456) : IAggregateFunctionDataHelper, GroupArrayNumericImpl>( - {data_type_}, parameters_) + {data_type_}, parameters_, std::make_shared(data_type_)) , max_elems(max_elems_) , seed(seed_) { @@ -129,9 +132,7 @@ public: String getName() const override { return getNameByTrait(); } - DataTypePtr getReturnType() const override { return std::make_shared(this->argument_types[0]); } - - void insert(Data & a, const T & v, Arena * arena) const + void insertWithSampler(Data & a, const T & v, Arena * arena) const { ++a.total_values; if (a.value.size() < max_elems) @@ -153,88 +154,107 @@ public: void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override { + const auto & row_value = assert_cast &>(*columns[0]).getData()[row_num]; + auto & cur_elems = this->data(place); + + ++cur_elems.total_values; + if constexpr (Trait::sampler == Sampler::NONE) { - if (limit_num_elems && this->data(place).value.size() >= max_elems) + if (limit_num_elems && cur_elems.value.size() >= max_elems) + { + if constexpr (Trait::last) + cur_elems.value[(cur_elems.total_values - 1) % max_elems] = row_value; return; + } - this->data(place).value.push_back(assert_cast &>(*columns[0]).getData()[row_num], arena); + cur_elems.value.push_back(row_value, arena); } if constexpr (Trait::sampler == Sampler::RNG) { - auto & a = this->data(place); - ++a.total_values; - if (a.value.size() < max_elems) - a.value.push_back(assert_cast &>(*columns[0]).getData()[row_num], arena); + if (cur_elems.value.size() < max_elems) + cur_elems.value.push_back(row_value, arena); else { - UInt64 rnd = a.genRandom(a.total_values); + UInt64 rnd = cur_elems.genRandom(cur_elems.total_values); if (rnd < max_elems) - a.value[rnd] = assert_cast &>(*columns[0]).getData()[row_num]; + cur_elems.value[rnd] = row_value; } } - // TODO - // if constexpr (Trait::sampler == Sampler::DETERMINATOR) } void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override { - if constexpr (Trait::sampler == Sampler::NONE) - { - auto & cur_elems = this->data(place); - auto & rhs_elems = this->data(rhs); + auto & cur_elems = this->data(place); + auto & rhs_elems = this->data(rhs); - if (!limit_num_elems) + if (rhs_elems.value.empty()) + return; + + if constexpr (Trait::last) + mergeNoSamplerLast(cur_elems, rhs_elems, arena); + else if constexpr (Trait::sampler == Sampler::NONE) + mergeNoSampler(cur_elems, rhs_elems, arena); + else if constexpr (Trait::sampler == Sampler::RNG) + mergeWithRNGSampler(cur_elems, rhs_elems, arena); + } + + void mergeNoSamplerLast(Data & cur_elems, const Data & rhs_elems, Arena * arena) const + { + UInt64 new_elements = std::min(static_cast(max_elems), cur_elems.value.size() + rhs_elems.value.size()); + cur_elems.value.resize_exact(new_elements, arena); + for (auto & value : rhs_elems.value) + { + cur_elems.value[cur_elems.total_values % max_elems] = value; + ++cur_elems.total_values; + } + assert(rhs_elems.total_values >= rhs_elems.value.size()); + cur_elems.total_values += rhs_elems.total_values - rhs_elems.value.size(); + } + + void mergeNoSampler(Data & cur_elems, const Data & rhs_elems, Arena * arena) const + { + if (!limit_num_elems) + { + if (rhs_elems.value.size()) + cur_elems.value.insertByOffsets(rhs_elems.value, 0, rhs_elems.value.size(), arena); + } + else + { + UInt64 elems_to_insert = std::min(static_cast(max_elems) - cur_elems.value.size(), rhs_elems.value.size()); + if (elems_to_insert) + cur_elems.value.insertByOffsets(rhs_elems.value, 0, elems_to_insert, arena); + } + } + + void mergeWithRNGSampler(Data & cur_elems, const Data & rhs_elems, Arena * arena) const + { + if (rhs_elems.total_values <= max_elems) + { + for (size_t i = 0; i < rhs_elems.value.size(); ++i) + insertWithSampler(cur_elems, rhs_elems.value[i], arena); + } + else if (cur_elems.total_values <= max_elems) + { + decltype(cur_elems.value) from; + from.swap(cur_elems.value, arena); + cur_elems.value.assign(rhs_elems.value.begin(), rhs_elems.value.end(), arena); + cur_elems.total_values = rhs_elems.total_values; + for (size_t i = 0; i < from.size(); ++i) + insertWithSampler(cur_elems, from[i], arena); + } + else + { + cur_elems.randomShuffle(); + cur_elems.total_values += rhs_elems.total_values; + for (size_t i = 0; i < max_elems; ++i) { - if (rhs_elems.value.size()) - cur_elems.value.insertByOffsets(rhs_elems.value, 0, rhs_elems.value.size(), arena); - } - else - { - UInt64 elems_to_insert = std::min(static_cast(max_elems) - cur_elems.value.size(), rhs_elems.value.size()); - if (elems_to_insert) - cur_elems.value.insertByOffsets(rhs_elems.value, 0, elems_to_insert, arena); + UInt64 rnd = cur_elems.genRandom(cur_elems.total_values); + if (rnd < rhs_elems.total_values) + cur_elems.value[i] = rhs_elems.value[i]; } } - - if constexpr (Trait::sampler == Sampler::RNG) - { - if (this->data(rhs).value.empty()) /// rhs state is empty - return; - - auto & a = this->data(place); - auto & b = this->data(rhs); - - if (b.total_values <= max_elems) - { - for (size_t i = 0; i < b.value.size(); ++i) - insert(a, b.value[i], arena); - } - else if (a.total_values <= max_elems) - { - decltype(a.value) from; - from.swap(a.value, arena); - a.value.assign(b.value.begin(), b.value.end(), arena); - a.total_values = b.total_values; - for (size_t i = 0; i < from.size(); ++i) - insert(a, from[i], arena); - } - else - { - a.randomShuffle(); - a.total_values += b.total_values; - for (size_t i = 0; i < max_elems; ++i) - { - UInt64 rnd = a.genRandom(a.total_values); - if (rnd < b.total_values) - a.value[i] = b.value[i]; - } - } - } - - // TODO - // if constexpr (Trait::sampler == Sampler::DETERMINATOR) } void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional /* version */) const override @@ -244,6 +264,9 @@ public: writeVarUInt(size, buf); buf.write(reinterpret_cast(value.data()), size * sizeof(value[0])); + if constexpr (Trait::last) + DB::writeIntBinary(this->data(place).total_values, buf); + if constexpr (Trait::sampler == Sampler::RNG) { DB::writeIntBinary(this->data(place).total_values, buf); @@ -251,9 +274,6 @@ public: rng_buf << this->data(place).rng; DB::writeStringBinary(rng_buf.str(), buf); } - - // TODO - // if constexpr (Trait::sampler == Sampler::DETERMINATOR) } void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional /* version */, Arena * arena) const override @@ -269,9 +289,12 @@ public: auto & value = this->data(place).value; - value.resize(size, arena); + value.resize_exact(size, arena); buf.readStrict(reinterpret_cast(value.data()), size * sizeof(value[0])); + if constexpr (Trait::last) + DB::readIntBinary(this->data(place).total_values, buf); + if constexpr (Trait::sampler == Sampler::RNG) { DB::readIntBinary(this->data(place).total_values, buf); @@ -280,9 +303,6 @@ public: ReadBufferFromString rng_buf(rng_string); rng_buf >> this->data(place).rng; } - - // TODO - // if constexpr (Trait::sampler == Sampler::DETERMINATOR) } void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override @@ -398,6 +418,8 @@ struct GroupArrayGeneralData using Allocator = MixedAlignedArenaAllocator; using Array = PODArray; + // For groupArrayLast() + size_t total_values = 0; Array value; }; @@ -423,7 +445,7 @@ class GroupArrayGeneralImpl final public: GroupArrayGeneralImpl(const DataTypePtr & data_type_, const Array & parameters_, UInt64 max_elems_ = std::numeric_limits::max(), UInt64 seed_ = 123456) : IAggregateFunctionDataHelper, GroupArrayGeneralImpl>( - {data_type_}, parameters_) + {data_type_}, parameters_, std::make_shared(data_type_)) , data_type(this->argument_types[0]) , max_elems(max_elems_) , seed(seed_) @@ -432,9 +454,7 @@ public: String getName() const override { return getNameByTrait(); } - DataTypePtr getReturnType() const override { return std::make_shared(data_type); } - - void insert(Data & a, const Node * v, Arena * arena) const + void insertWithSampler(Data & a, const Node * v, Arena * arena) const { ++a.total_values; if (a.value.size() < max_elems) @@ -456,96 +476,110 @@ public: void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override { + auto & cur_elems = data(place); + + ++cur_elems.total_values; + if constexpr (Trait::sampler == Sampler::NONE) { - if (limit_num_elems && data(place).value.size() >= max_elems) + if (limit_num_elems && cur_elems.value.size() >= max_elems) + { + if (Trait::last) + { + Node * node = Node::allocate(*columns[0], row_num, arena); + cur_elems.value[(cur_elems.total_values - 1) % max_elems] = node; + } return; + } Node * node = Node::allocate(*columns[0], row_num, arena); - data(place).value.push_back(node, arena); + cur_elems.value.push_back(node, arena); } if constexpr (Trait::sampler == Sampler::RNG) { - auto & a = data(place); - ++a.total_values; - if (a.value.size() < max_elems) - a.value.push_back(Node::allocate(*columns[0], row_num, arena), arena); + if (cur_elems.value.size() < max_elems) + cur_elems.value.push_back(Node::allocate(*columns[0], row_num, arena), arena); else { - UInt64 rnd = a.genRandom(a.total_values); + UInt64 rnd = cur_elems.genRandom(cur_elems.total_values); if (rnd < max_elems) - a.value[rnd] = Node::allocate(*columns[0], row_num, arena); + cur_elems.value[rnd] = Node::allocate(*columns[0], row_num, arena); } } - // TODO - // if constexpr (Trait::sampler == Sampler::DETERMINATOR) } void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override { - if constexpr (Trait::sampler == Sampler::NONE) - mergeNoSampler(place, rhs, arena); - else if constexpr (Trait::sampler == Sampler::RNG) - mergeWithRNGSampler(place, rhs, arena); - // TODO - // else if constexpr (Trait::sampler == Sampler::DETERMINATOR) - } + auto & cur_elems = data(place); + auto & rhs_elems = data(rhs); - void ALWAYS_INLINE mergeNoSampler(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const - { - if (data(rhs).value.empty()) /// rhs state is empty + if (rhs_elems.value.empty()) return; + if constexpr (Trait::last) + mergeNoSamplerLast(cur_elems, rhs_elems, arena); + else if constexpr (Trait::sampler == Sampler::NONE) + mergeNoSampler(cur_elems, rhs_elems, arena); + else if constexpr (Trait::sampler == Sampler::RNG) + mergeWithRNGSampler(cur_elems, rhs_elems, arena); + } + + void ALWAYS_INLINE mergeNoSamplerLast(Data & cur_elems, const Data & rhs_elems, Arena * arena) const + { + UInt64 new_elements = std::min(static_cast(max_elems), cur_elems.value.size() + rhs_elems.value.size()); + cur_elems.value.resize_exact(new_elements, arena); + for (auto & value : rhs_elems.value) + { + cur_elems.value[cur_elems.total_values % max_elems] = value->clone(arena); + ++cur_elems.total_values; + } + assert(rhs_elems.total_values >= rhs_elems.value.size()); + cur_elems.total_values += rhs_elems.total_values - rhs_elems.value.size(); + } + + void ALWAYS_INLINE mergeNoSampler(Data & cur_elems, const Data & rhs_elems, Arena * arena) const + { UInt64 new_elems; if (limit_num_elems) { - if (data(place).value.size() >= max_elems) + if (cur_elems.value.size() >= max_elems) return; - - new_elems = std::min(data(rhs).value.size(), static_cast(max_elems) - data(place).value.size()); + new_elems = std::min(rhs_elems.value.size(), static_cast(max_elems) - cur_elems.value.size()); } else - new_elems = data(rhs).value.size(); + new_elems = rhs_elems.value.size(); - auto & a = data(place).value; - auto & b = data(rhs).value; for (UInt64 i = 0; i < new_elems; ++i) - a.push_back(b[i]->clone(arena), arena); + cur_elems.value.push_back(rhs_elems.value[i]->clone(arena), arena); } - void ALWAYS_INLINE mergeWithRNGSampler(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const + void ALWAYS_INLINE mergeWithRNGSampler(Data & cur_elems, const Data & rhs_elems, Arena * arena) const { - if (data(rhs).value.empty()) /// rhs state is empty - return; - - auto & a = data(place); - auto & b = data(rhs); - - if (b.total_values <= max_elems) + if (rhs_elems.total_values <= max_elems) { - for (size_t i = 0; i < b.value.size(); ++i) - insert(a, b.value[i], arena); + for (size_t i = 0; i < rhs_elems.value.size(); ++i) + insertWithSampler(cur_elems, rhs_elems.value[i], arena); } - else if (a.total_values <= max_elems) + else if (cur_elems.total_values <= max_elems) { - decltype(a.value) from; - from.swap(a.value, arena); - for (auto & node : b.value) - a.value.push_back(node->clone(arena), arena); - a.total_values = b.total_values; + decltype(cur_elems.value) from; + from.swap(cur_elems.value, arena); + for (auto & node : rhs_elems.value) + cur_elems.value.push_back(node->clone(arena), arena); + cur_elems.total_values = rhs_elems.total_values; for (size_t i = 0; i < from.size(); ++i) - insert(a, from[i], arena); + insertWithSampler(cur_elems, from[i], arena); } else { - a.randomShuffle(); - a.total_values += b.total_values; + cur_elems.randomShuffle(); + cur_elems.total_values += rhs_elems.total_values; for (size_t i = 0; i < max_elems; ++i) { - UInt64 rnd = a.genRandom(a.total_values); - if (rnd < b.total_values) - a.value[i] = b.value[i]->clone(arena); + UInt64 rnd = cur_elems.genRandom(cur_elems.total_values); + if (rnd < rhs_elems.total_values) + cur_elems.value[i] = rhs_elems.value[i]->clone(arena); } } } @@ -558,6 +592,9 @@ public: for (auto & node : value) node->write(buf); + if constexpr (Trait::last) + DB::writeIntBinary(data(place).total_values, buf); + if constexpr (Trait::sampler == Sampler::RNG) { DB::writeIntBinary(data(place).total_values, buf); @@ -565,9 +602,6 @@ public: rng_buf << data(place).rng; DB::writeStringBinary(rng_buf.str(), buf); } - - // TODO - // if constexpr (Trait::sampler == Sampler::DETERMINATOR) } void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional /* version */, Arena * arena) const override @@ -586,10 +620,13 @@ public: auto & value = data(place).value; - value.resize(elems, arena); + value.resize_exact(elems, arena); for (UInt64 i = 0; i < elems; ++i) value[i] = Node::read(buf, arena); + if constexpr (Trait::last) + DB::readIntBinary(data(place).total_values, buf); + if constexpr (Trait::sampler == Sampler::RNG) { DB::readIntBinary(data(place).total_values, buf); @@ -598,9 +635,6 @@ public: ReadBufferFromString rng_buf(rng_string); rng_buf >> data(place).rng; } - - // TODO - // if constexpr (Trait::sampler == Sampler::DETERMINATOR) } void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override @@ -626,224 +660,6 @@ public: bool allocatesMemoryInArena() const override { return true; } }; -template -struct GroupArrayListNodeBase : public GroupArrayNodeBase -{ - Node * next; -}; - -struct GroupArrayListNodeString : public GroupArrayListNodeBase -{ - using Node = GroupArrayListNodeString; - - /// Create node from string - static Node * allocate(const IColumn & column, size_t row_num, Arena * arena) - { - StringRef string = assert_cast(column).getDataAt(row_num); - - Node * node = reinterpret_cast(arena->alignedAlloc(sizeof(Node) + string.size, alignof(Node))); - node->next = nullptr; - node->size = string.size; - memcpy(node->data(), string.data, string.size); - - return node; - } - - void insertInto(IColumn & column) { assert_cast(column).insertData(data(), size); } -}; - -struct GroupArrayListNodeGeneral : public GroupArrayListNodeBase -{ - using Node = GroupArrayListNodeGeneral; - - static Node * allocate(const IColumn & column, size_t row_num, Arena * arena) - { - const char * begin = arena->alignedAlloc(sizeof(Node), alignof(Node)); - StringRef value = column.serializeValueIntoArena(row_num, *arena, begin); - - Node * node = reinterpret_cast(const_cast(begin)); - node->next = nullptr; - node->size = value.size; - - return node; - } - - void insertInto(IColumn & column) { column.deserializeAndInsertFromArena(data()); } -}; - - -template -struct GroupArrayGeneralListData -{ - UInt64 elems = 0; - Node * first = nullptr; - Node * last = nullptr; -}; - - -/// Implementation of groupArray for String or any ComplexObject via linked list -/// It has poor performance in case of many small objects -template -class GroupArrayGeneralListImpl final - : public IAggregateFunctionDataHelper, GroupArrayGeneralListImpl> -{ - static constexpr bool limit_num_elems = Trait::has_limit; - using Data = GroupArrayGeneralListData; - static Data & data(AggregateDataPtr __restrict place) { return *reinterpret_cast(place); } - static const Data & data(ConstAggregateDataPtr __restrict place) { return *reinterpret_cast(place); } - - DataTypePtr & data_type; - UInt64 max_elems; - -public: - GroupArrayGeneralListImpl(const DataTypePtr & data_type_, const Array & parameters_, UInt64 max_elems_ = std::numeric_limits::max()) - : IAggregateFunctionDataHelper, GroupArrayGeneralListImpl>({data_type_}, parameters_) - , data_type(this->argument_types[0]) - , max_elems(max_elems_) - { - } - - String getName() const override { return getNameByTrait(); } - - DataTypePtr getReturnType() const override { return std::make_shared(data_type); } - - void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override - { - if (limit_num_elems && data(place).elems >= max_elems) - return; - - Node * node = Node::allocate(*columns[0], row_num, arena); - - if (unlikely(!data(place).first)) - { - data(place).first = node; - data(place).last = node; - } - else - { - data(place).last->next = node; - data(place).last = node; - } - - ++data(place).elems; - } - - void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override - { - /// It is sadly, but rhs's Arena could be destroyed - - if (!data(rhs).first) /// rhs state is empty - return; - - UInt64 new_elems; - UInt64 cur_elems = data(place).elems; - if (limit_num_elems) - { - if (data(place).elems >= max_elems) - return; - - new_elems = std::min(data(place).elems + data(rhs).elems, static_cast(max_elems)); - } - else - { - new_elems = data(place).elems + data(rhs).elems; - } - - Node * p_rhs = data(rhs).first; - Node * p_lhs; - - if (unlikely(!data(place).last)) /// lhs state is empty - { - p_lhs = p_rhs->clone(arena); - data(place).first = data(place).last = p_lhs; - p_rhs = p_rhs->next; - ++cur_elems; - } - else - { - p_lhs = data(place).last; - } - - for (; cur_elems < new_elems; ++cur_elems) - { - Node * p_new = p_rhs->clone(arena); - p_lhs->next = p_new; - p_rhs = p_rhs->next; - p_lhs = p_new; - } - - p_lhs->next = nullptr; - data(place).last = p_lhs; - data(place).elems = new_elems; - } - - void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf) const override - { - writeVarUInt(data(place).elems, buf); - - Node * p = data(place).first; - while (p) - { - p->write(buf); - p = p->next; - } - } - - void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, Arena * arena) const override - { - UInt64 elems; - readVarUInt(elems, buf); - data(place).elems = elems; - - if (unlikely(elems == 0)) - return; - - if (unlikely(elems > AGGREGATE_FUNCTION_GROUP_ARRAY_MAX_ARRAY_SIZE)) - throw Exception("Too large array size", ErrorCodes::TOO_LARGE_ARRAY_SIZE); - - if (limit_num_elems && unlikely(elems > max_elems)) - throw Exception("Too large array size, it should not exceed " + toString(max_elems), ErrorCodes::TOO_LARGE_ARRAY_SIZE); - - Node * prev = Node::read(buf, arena); - data(place).first = prev; - - for (UInt64 i = 1; i < elems; ++i) - { - Node * cur = Node::read(buf, arena); - prev->next = cur; - prev = cur; - } - - prev->next = nullptr; - data(place).last = prev; - } - - void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override - { - auto & column_array = assert_cast(to); - - auto & offsets = column_array.getOffsets(); - offsets.push_back(offsets.back() + data(place).elems); - - auto & column_data = column_array.getData(); - - if (std::is_same_v) - { - auto & string_offsets = assert_cast(column_data).getOffsets(); - string_offsets.reserve(string_offsets.size() + data(place).elems); - } - - Node * p = data(place).first; - while (p) - { - p->insertInto(column_data); - p = p->next; - } - } - - bool allocatesMemoryInArena() const override { return true; } -}; - #undef AGGREGATE_FUNCTION_GROUP_ARRAY_MAX_ARRAY_SIZE } diff --git a/src/AggregateFunctions/AggregateFunctionGroupArrayInsertAt.h b/src/AggregateFunctions/AggregateFunctionGroupArrayInsertAt.h index a1a2ce2669b..42fe4083de1 100644 --- a/src/AggregateFunctions/AggregateFunctionGroupArrayInsertAt.h +++ b/src/AggregateFunctions/AggregateFunctionGroupArrayInsertAt.h @@ -64,7 +64,7 @@ private: public: AggregateFunctionGroupArrayInsertAtGeneric(const DataTypes & arguments, const Array & params) - : IAggregateFunctionDataHelper(arguments, params) + : IAggregateFunctionDataHelper(arguments, params, std::make_shared(arguments[0])) , type(argument_types[0]) , serialization(type->getDefaultSerialization()) { @@ -101,11 +101,6 @@ public: String getName() const override { return "groupArrayInsertAt"; } - DataTypePtr getReturnType() const override - { - return std::make_shared(type); - } - bool allocatesMemoryInArena() const override { return false; } void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override diff --git a/src/AggregateFunctions/AggregateFunctionGroupArrayMoving.h b/src/AggregateFunctions/AggregateFunctionGroupArrayMoving.h index 40867b1949a..4444de793b4 100644 --- a/src/AggregateFunctions/AggregateFunctionGroupArrayMoving.h +++ b/src/AggregateFunctions/AggregateFunctionGroupArrayMoving.h @@ -93,12 +93,15 @@ public: using ColumnResult = ColumnVectorOrDecimal; explicit MovingImpl(const DataTypePtr & data_type_, UInt64 window_size_ = std::numeric_limits::max()) - : IAggregateFunctionDataHelper>({data_type_}, {}) + : IAggregateFunctionDataHelper>({data_type_}, {}, createResultType(data_type_)) , window_size(window_size_) {} String getName() const override { return Data::name; } - DataTypePtr getReturnType() const override { return std::make_shared(getReturnTypeElement()); } + static DataTypePtr createResultType(const DataTypePtr & argument) + { + return std::make_shared(getReturnTypeElement(argument)); + } void NO_SANITIZE_UNDEFINED add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override { @@ -183,14 +186,14 @@ public: } private: - auto getReturnTypeElement() const + static auto getReturnTypeElement(const DataTypePtr & argument) { if constexpr (!is_decimal) return std::make_shared>(); else { using Res = DataTypeDecimal; - return std::make_shared(Res::maxPrecision(), getDecimalScale(*this->argument_types.at(0))); + return std::make_shared(Res::maxPrecision(), getDecimalScale(*argument)); } } }; diff --git a/src/AggregateFunctions/AggregateFunctionGroupBitmap.h b/src/AggregateFunctions/AggregateFunctionGroupBitmap.h index dacde67f3ca..5fe3128fa20 100644 --- a/src/AggregateFunctions/AggregateFunctionGroupBitmap.h +++ b/src/AggregateFunctions/AggregateFunctionGroupBitmap.h @@ -19,13 +19,13 @@ class AggregateFunctionBitmap final : public IAggregateFunctionDataHelper>({type}, {}) + : IAggregateFunctionDataHelper>({type}, {}, createResultType()) { } String getName() const override { return Data::name(); } - DataTypePtr getReturnType() const override { return std::make_shared>(); } + static DataTypePtr createResultType() { return std::make_shared>(); } bool allocatesMemoryInArena() const override { return false; } @@ -59,13 +59,13 @@ private: static constexpr size_t STATE_VERSION_1_MIN_REVISION = 54455; public: explicit AggregateFunctionBitmapL2(const DataTypePtr & type) - : IAggregateFunctionDataHelper>({type}, {}) + : IAggregateFunctionDataHelper>({type}, {}, createResultType()) { } String getName() const override { return Policy::name; } - DataTypePtr getReturnType() const override { return std::make_shared>(); } + static DataTypePtr createResultType() { return std::make_shared>(); } bool allocatesMemoryInArena() const override { return false; } diff --git a/src/AggregateFunctions/AggregateFunctionGroupUniqArray.cpp b/src/AggregateFunctions/AggregateFunctionGroupUniqArray.cpp index da934531f96..4589f68280f 100644 --- a/src/AggregateFunctions/AggregateFunctionGroupUniqArray.cpp +++ b/src/AggregateFunctions/AggregateFunctionGroupUniqArray.cpp @@ -26,8 +26,8 @@ class AggregateFunctionGroupUniqArrayDate : public AggregateFunctionGroupUniqArr { public: explicit AggregateFunctionGroupUniqArrayDate(const DataTypePtr & argument_type, const Array & parameters_, UInt64 max_elems_ = std::numeric_limits::max()) - : AggregateFunctionGroupUniqArray(argument_type, parameters_, max_elems_) {} - DataTypePtr getReturnType() const override { return std::make_shared(std::make_shared()); } + : AggregateFunctionGroupUniqArray(argument_type, parameters_, createResultType(), max_elems_) {} + static DataTypePtr createResultType() { return std::make_shared(std::make_shared()); } }; template @@ -35,8 +35,8 @@ class AggregateFunctionGroupUniqArrayDateTime : public AggregateFunctionGroupUni { public: explicit AggregateFunctionGroupUniqArrayDateTime(const DataTypePtr & argument_type, const Array & parameters_, UInt64 max_elems_ = std::numeric_limits::max()) - : AggregateFunctionGroupUniqArray(argument_type, parameters_, max_elems_) {} - DataTypePtr getReturnType() const override { return std::make_shared(std::make_shared()); } + : AggregateFunctionGroupUniqArray(argument_type, parameters_, createResultType(), max_elems_) {} + static DataTypePtr createResultType() { return std::make_shared(std::make_shared()); } }; template diff --git a/src/AggregateFunctions/AggregateFunctionGroupUniqArray.h b/src/AggregateFunctions/AggregateFunctionGroupUniqArray.h index 93db1644bd4..f8e426363d8 100644 --- a/src/AggregateFunctions/AggregateFunctionGroupUniqArray.h +++ b/src/AggregateFunctions/AggregateFunctionGroupUniqArray.h @@ -50,15 +50,16 @@ private: public: AggregateFunctionGroupUniqArray(const DataTypePtr & argument_type, const Array & parameters_, UInt64 max_elems_ = std::numeric_limits::max()) : IAggregateFunctionDataHelper, - AggregateFunctionGroupUniqArray>({argument_type}, parameters_), + AggregateFunctionGroupUniqArray>({argument_type}, parameters_, std::make_shared(argument_type)), max_elems(max_elems_) {} - String getName() const override { return "groupUniqArray"; } + AggregateFunctionGroupUniqArray(const DataTypePtr & argument_type, const Array & parameters_, const DataTypePtr & result_type_, UInt64 max_elems_ = std::numeric_limits::max()) + : IAggregateFunctionDataHelper, + AggregateFunctionGroupUniqArray>({argument_type}, parameters_, result_type_), + max_elems(max_elems_) {} - DataTypePtr getReturnType() const override - { - return std::make_shared(this->argument_types[0]); - } + + String getName() const override { return "groupUniqArray"; } bool allocatesMemoryInArena() const override { return false; } @@ -153,17 +154,12 @@ class AggregateFunctionGroupUniqArrayGeneric public: AggregateFunctionGroupUniqArrayGeneric(const DataTypePtr & input_data_type_, const Array & parameters_, UInt64 max_elems_ = std::numeric_limits::max()) - : IAggregateFunctionDataHelper>({input_data_type_}, parameters_) + : IAggregateFunctionDataHelper>({input_data_type_}, parameters_, std::make_shared(input_data_type_)) , input_data_type(this->argument_types[0]) , max_elems(max_elems_) {} String getName() const override { return "groupUniqArray"; } - DataTypePtr getReturnType() const override - { - return std::make_shared(input_data_type); - } - bool allocatesMemoryInArena() const override { return true; diff --git a/src/AggregateFunctions/AggregateFunctionHistogram.h b/src/AggregateFunctions/AggregateFunctionHistogram.h index fbd92aa8220..c559b3f115f 100644 --- a/src/AggregateFunctions/AggregateFunctionHistogram.h +++ b/src/AggregateFunctions/AggregateFunctionHistogram.h @@ -307,7 +307,7 @@ private: public: AggregateFunctionHistogram(UInt32 max_bins_, const DataTypes & arguments, const Array & params) - : IAggregateFunctionDataHelper>(arguments, params) + : IAggregateFunctionDataHelper>(arguments, params, createResultType()) , max_bins(max_bins_) { } @@ -316,7 +316,7 @@ public: { return Data::structSize(max_bins); } - DataTypePtr getReturnType() const override + static DataTypePtr createResultType() { DataTypes types; auto mean = std::make_shared>(); diff --git a/src/AggregateFunctions/AggregateFunctionIf.cpp b/src/AggregateFunctions/AggregateFunctionIf.cpp index c32454b10e4..ba4faec3aa1 100644 --- a/src/AggregateFunctions/AggregateFunctionIf.cpp +++ b/src/AggregateFunctions/AggregateFunctionIf.cpp @@ -23,7 +23,7 @@ public: throw Exception("Incorrect number of arguments for aggregate function with " + getName() + " suffix", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); - if (!isUInt8(arguments.back())) + if (!isUInt8(arguments.back()) && !arguments.back()->onlyNull()) throw Exception("Illegal type " + arguments.back()->getName() + " of last argument for aggregate function with " + getName() + " suffix", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); @@ -52,6 +52,7 @@ class AggregateFunctionIfNullUnary final private: size_t num_arguments; bool filter_is_nullable = false; + bool filter_is_only_null = false; /// The name of the nested function, including combinators (i.e. *If) /// @@ -84,10 +85,8 @@ private: return assert_cast(*filter_column).getData()[row_num] && !filter_null_map[row_num]; } - else - { - return assert_cast(*filter_column).getData()[row_num]; - } + + return assert_cast(*filter_column).getData()[row_num]; } public: @@ -106,10 +105,14 @@ public: "Aggregate function {} require at least one argument", getName()); filter_is_nullable = arguments[num_arguments - 1]->isNullable(); + filter_is_only_null = arguments[num_arguments - 1]->onlyNull(); } void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override { + if (filter_is_only_null) + return; + const ColumnNullable * column = assert_cast(columns[0]); const IColumn * nested_column = &column->getNestedColumn(); if (!column->isNullAt(row_num) && singleFilter(columns, row_num)) @@ -127,6 +130,9 @@ public: Arena * arena, ssize_t) const override { + if (filter_is_only_null) + return; + const ColumnNullable * column = assert_cast(columns[0]); const UInt8 * null_map = column->getNullMapData().data(); const IColumn * columns_param[] = {&column->getNestedColumn()}; @@ -177,6 +183,11 @@ public: #if USE_EMBEDDED_COMPILER + bool isCompilable() const override + { + return canBeNativeType(*this->argument_types.back()) && this->nested_function->isCompilable(); + } + void compileAdd(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr, const DataTypes & arguments_types, const std::vector & argument_values) const override { llvm::IRBuilder<> & b = static_cast &>(builder); @@ -224,6 +235,9 @@ class AggregateFunctionIfNullVariadic final : public AggregateFunctionNullBase< serialize_flag, AggregateFunctionIfNullVariadic> { +private: + bool filter_is_only_null = false; + public: String getName() const override @@ -243,6 +257,8 @@ public: for (size_t i = 0; i < number_of_arguments; ++i) is_nullable[i] = arguments[i]->isNullable(); + + filter_is_only_null = arguments.back()->onlyNull(); } static inline bool singleFilter(const IColumn ** columns, size_t row_num, size_t num_arguments) @@ -282,6 +298,9 @@ public: void addBatchSinglePlace( size_t row_begin, size_t row_end, AggregateDataPtr __restrict place, const IColumn ** columns, Arena * arena, ssize_t) const final { + if (filter_is_only_null) + return; + std::unique_ptr final_null_flags = std::make_unique(row_end); const size_t filter_column_num = number_of_arguments - 1; @@ -346,6 +365,11 @@ public: #if USE_EMBEDDED_COMPILER + bool isCompilable() const override + { + return canBeNativeType(*this->argument_types.back()) && this->nested_function->isCompilable(); + } + void compileAdd(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr, const DataTypes & arguments_types, const std::vector & argument_values) const override { /// TODO: Check @@ -448,7 +472,7 @@ AggregateFunctionPtr AggregateFunctionIf::getOwnNullAdapter( /// Nullability of the last argument (condition) does not affect the nullability of the result (NULL is processed as false). /// For other arguments it is as usual (at least one is NULL then the result is NULL if possible). - bool return_type_is_nullable = !properties.returns_default_when_only_null && getReturnType()->canBeInsideNullable() + bool return_type_is_nullable = !properties.returns_default_when_only_null && getResultType()->canBeInsideNullable() && std::any_of(arguments.begin(), arguments.end() - 1, [](const auto & element) { return element->isNullable(); }); bool need_to_serialize_flag = return_type_is_nullable || properties.returns_default_when_only_null; diff --git a/src/AggregateFunctions/AggregateFunctionIf.h b/src/AggregateFunctions/AggregateFunctionIf.h index ccc4809dd06..b5199a40aeb 100644 --- a/src/AggregateFunctions/AggregateFunctionIf.h +++ b/src/AggregateFunctions/AggregateFunctionIf.h @@ -36,13 +36,13 @@ private: public: AggregateFunctionIf(AggregateFunctionPtr nested, const DataTypes & types, const Array & params_) - : IAggregateFunctionHelper(types, params_) + : IAggregateFunctionHelper(types, params_, nested->getResultType()) , nested_func(nested), num_arguments(types.size()) { if (num_arguments == 0) throw Exception("Aggregate function " + getName() + " require at least one argument", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); - if (!isUInt8(types.back())) + if (!isUInt8(types.back()) && !types.back()->onlyNull()) throw Exception("Last argument for aggregate function " + getName() + " must be UInt8", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); } @@ -51,11 +51,6 @@ public: return nested_func->getName() + "If"; } - DataTypePtr getReturnType() const override - { - return nested_func->getReturnType(); - } - const IAggregateFunction & getBaseAggregateFunctionWithSameStateRepresentation() const override { return nested_func->getBaseAggregateFunctionWithSameStateRepresentation(); @@ -204,12 +199,16 @@ public: AggregateFunctionPtr getNestedFunction() const override { return nested_func; } + std::unordered_set getArgumentsThatCanBeOnlyNull() const override + { + return {num_arguments - 1}; + } #if USE_EMBEDDED_COMPILER bool isCompilable() const override { - return nested_func->isCompilable(); + return canBeNativeType(*this->argument_types.back()) && nested_func->isCompilable(); } void compileCreate(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr) const override diff --git a/src/AggregateFunctions/AggregateFunctionIntervalLengthSum.h b/src/AggregateFunctions/AggregateFunctionIntervalLengthSum.h index fdde50074aa..5b01da66364 100644 --- a/src/AggregateFunctions/AggregateFunctionIntervalLengthSum.h +++ b/src/AggregateFunctions/AggregateFunctionIntervalLengthSum.h @@ -177,11 +177,11 @@ public: String getName() const override { return "intervalLengthSum"; } explicit AggregateFunctionIntervalLengthSum(const DataTypes & arguments) - : IAggregateFunctionDataHelper>(arguments, {}) + : IAggregateFunctionDataHelper>(arguments, {}, createResultType()) { } - DataTypePtr getReturnType() const override + static DataTypePtr createResultType() { if constexpr (std::is_floating_point_v) return std::make_shared(); diff --git a/src/AggregateFunctions/AggregateFunctionMLMethod.h b/src/AggregateFunctions/AggregateFunctionMLMethod.h index b9d5d835f57..6545ee4fd53 100644 --- a/src/AggregateFunctions/AggregateFunctionMLMethod.h +++ b/src/AggregateFunctions/AggregateFunctionMLMethod.h @@ -309,7 +309,7 @@ public: UInt64 batch_size_, const DataTypes & arguments_types, const Array & params) - : IAggregateFunctionDataHelper>(arguments_types, params) + : IAggregateFunctionDataHelper>(arguments_types, params, createResultType()) , param_num(param_num_) , learning_rate(learning_rate_) , l2_reg_coef(l2_reg_coef_) @@ -319,8 +319,7 @@ public: { } - /// This function is called when SELECT linearRegression(...) is called - DataTypePtr getReturnType() const override + static DataTypePtr createResultType() { return std::make_shared(std::make_shared()); } diff --git a/src/AggregateFunctions/AggregateFunctionMannWhitney.h b/src/AggregateFunctions/AggregateFunctionMannWhitney.h index d861eef10ab..6176d6854fc 100644 --- a/src/AggregateFunctions/AggregateFunctionMannWhitney.h +++ b/src/AggregateFunctions/AggregateFunctionMannWhitney.h @@ -133,7 +133,7 @@ private: public: explicit AggregateFunctionMannWhitney(const DataTypes & arguments, const Array & params) - :IAggregateFunctionDataHelper ({arguments}, {}) + : IAggregateFunctionDataHelper ({arguments}, {}, createResultType()) { if (params.size() > 2) throw Exception("Aggregate function " + getName() + " require two parameter or less", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); @@ -174,7 +174,7 @@ public: bool allocatesMemoryInArena() const override { return true; } - DataTypePtr getReturnType() const override + static DataTypePtr createResultType() { DataTypes types { diff --git a/src/AggregateFunctions/AggregateFunctionMap.h b/src/AggregateFunctions/AggregateFunctionMap.h index f60cc71e78e..dc19bf3f71c 100644 --- a/src/AggregateFunctions/AggregateFunctionMap.h +++ b/src/AggregateFunctions/AggregateFunctionMap.h @@ -18,6 +18,7 @@ #include #include #include +#include "DataTypes/Serializations/ISerialization.h" #include "base/types.h" #include #include "AggregateFunctions/AggregateFunctionFactory.h" @@ -104,26 +105,32 @@ public: return nested_func->getDefaultVersion(); } - AggregateFunctionMap(AggregateFunctionPtr nested, const DataTypes & types) : Base(types, nested->getParameters()), nested_func(nested) + AggregateFunctionMap(AggregateFunctionPtr nested, const DataTypes & types) + : Base(types, nested->getParameters(), std::make_shared(DataTypes{getKeyType(types, nested), nested->getResultType()})) + , nested_func(nested) { - if (types.empty()) - throw Exception( - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Aggregate function " + getName() + " requires at least one argument"); - - if (types.size() > 1) - throw Exception( - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Aggregate function " + getName() + " requires only one map argument"); - - const auto * map_type = checkAndGetDataType(types[0].get()); - if (!map_type) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Aggregate function " + getName() + " requires map as argument"); - - key_type = map_type->getKeyType(); + key_type = getKeyType(types, nested_func); } String getName() const override { return nested_func->getName() + "Map"; } - DataTypePtr getReturnType() const override { return std::make_shared(DataTypes{key_type, nested_func->getReturnType()}); } + static DataTypePtr getKeyType(const DataTypes & types, const AggregateFunctionPtr & nested) + { + if (types.empty()) + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Aggregate function {}Map requires at least one argument", nested->getName()); + + if (types.size() > 1) + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Aggregate function {}Map requires only one map argument", nested->getName()); + + const auto * map_type = checkAndGetDataType(types[0].get()); + if (!map_type) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Aggregate function {}Map requires map as argument", nested->getName()); + + return map_type->getKeyType(); + } void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override { diff --git a/src/AggregateFunctions/AggregateFunctionMaxIntersections.h b/src/AggregateFunctions/AggregateFunctionMaxIntersections.h index d2f553172c9..e78684c9491 100644 --- a/src/AggregateFunctions/AggregateFunctionMaxIntersections.h +++ b/src/AggregateFunctions/AggregateFunctionMaxIntersections.h @@ -62,7 +62,8 @@ private: public: AggregateFunctionIntersectionsMax(AggregateFunctionIntersectionsKind kind_, const DataTypes & arguments) - : IAggregateFunctionDataHelper, AggregateFunctionIntersectionsMax>(arguments, {}), kind(kind_) + : IAggregateFunctionDataHelper, AggregateFunctionIntersectionsMax>(arguments, {}, createResultType(kind_)) + , kind(kind_) { if (!isNativeNumber(arguments[0])) throw Exception{getName() + ": first argument must be represented by integer", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; @@ -81,9 +82,9 @@ public: : "maxIntersectionsPosition"; } - DataTypePtr getReturnType() const override + static DataTypePtr createResultType(AggregateFunctionIntersectionsKind kind_) { - if (kind == AggregateFunctionIntersectionsKind::Count) + if (kind_ == AggregateFunctionIntersectionsKind::Count) return std::make_shared(); else return std::make_shared>(); diff --git a/src/AggregateFunctions/AggregateFunctionMeanZTest.h b/src/AggregateFunctions/AggregateFunctionMeanZTest.h index 7fecff591e6..97925d4e07c 100644 --- a/src/AggregateFunctions/AggregateFunctionMeanZTest.h +++ b/src/AggregateFunctions/AggregateFunctionMeanZTest.h @@ -36,7 +36,7 @@ private: public: AggregateFunctionMeanZTest(const DataTypes & arguments, const Array & params) - : IAggregateFunctionDataHelper>({arguments}, params) + : IAggregateFunctionDataHelper>({arguments}, params, createResultType()) { pop_var_x = params.at(0).safeGet(); pop_var_y = params.at(1).safeGet(); @@ -63,7 +63,7 @@ public: return Data::name; } - DataTypePtr getReturnType() const override + static DataTypePtr createResultType() { DataTypes types { diff --git a/src/AggregateFunctions/AggregateFunctionMerge.h b/src/AggregateFunctions/AggregateFunctionMerge.h index bb2d36eeed1..0cb44259816 100644 --- a/src/AggregateFunctions/AggregateFunctionMerge.h +++ b/src/AggregateFunctions/AggregateFunctionMerge.h @@ -30,7 +30,7 @@ private: public: AggregateFunctionMerge(const AggregateFunctionPtr & nested_, const DataTypePtr & argument, const Array & params_) - : IAggregateFunctionHelper({argument}, params_) + : IAggregateFunctionHelper({argument}, params_, createResultType(nested_)) , nested_func(nested_) { const DataTypeAggregateFunction * data_type = typeid_cast(argument.get()); @@ -45,9 +45,9 @@ public: return nested_func->getName() + "Merge"; } - DataTypePtr getReturnType() const override + static DataTypePtr createResultType(const AggregateFunctionPtr & nested_) { - return nested_func->getReturnType(); + return nested_->getResultType(); } const IAggregateFunction & getBaseAggregateFunctionWithSameStateRepresentation() const override diff --git a/src/AggregateFunctions/AggregateFunctionMinMaxAny.h b/src/AggregateFunctions/AggregateFunctionMinMaxAny.h index a6013f37b9d..314e68f83d9 100644 --- a/src/AggregateFunctions/AggregateFunctionMinMaxAny.h +++ b/src/AggregateFunctions/AggregateFunctionMinMaxAny.h @@ -1222,7 +1222,7 @@ private: public: explicit AggregateFunctionsSingleValue(const DataTypePtr & type) - : IAggregateFunctionDataHelper>({type}, {}) + : IAggregateFunctionDataHelper>({type}, {}, createResultType(type)) , serialization(type->getDefaultSerialization()) { if (StringRef(Data::name()) == StringRef("min") @@ -1236,12 +1236,11 @@ public: String getName() const override { return Data::name(); } - DataTypePtr getReturnType() const override + static DataTypePtr createResultType(const DataTypePtr & type_) { - auto result_type = this->argument_types.at(0); if constexpr (Data::is_nullable) - return makeNullable(result_type); - return result_type; + return makeNullable(type_); + return type_; } void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override diff --git a/src/AggregateFunctions/AggregateFunctionNothing.h b/src/AggregateFunctions/AggregateFunctionNothing.h index 13ef407be8b..de8a5868e04 100644 --- a/src/AggregateFunctions/AggregateFunctionNothing.h +++ b/src/AggregateFunctions/AggregateFunctionNothing.h @@ -6,6 +6,7 @@ #include #include #include +#include "DataTypes/IDataType.h" namespace DB @@ -19,16 +20,16 @@ class AggregateFunctionNothing final : public IAggregateFunctionHelper(arguments, params) {} + : IAggregateFunctionHelper(arguments, params, createResultType(arguments)) {} String getName() const override { return "nothing"; } - DataTypePtr getReturnType() const override + static DataTypePtr createResultType(const DataTypes & arguments) { - return argument_types.empty() ? std::make_shared(std::make_shared()) : argument_types.front(); + return arguments.empty() ? std::make_shared(std::make_shared()) : arguments.front(); } bool allocatesMemoryInArena() const override { return false; } diff --git a/src/AggregateFunctions/AggregateFunctionNull.cpp b/src/AggregateFunctions/AggregateFunctionNull.cpp index 01558b56667..f02c97b08b7 100644 --- a/src/AggregateFunctions/AggregateFunctionNull.cpp +++ b/src/AggregateFunctions/AggregateFunctionNull.cpp @@ -29,7 +29,13 @@ public: size_t size = arguments.size(); DataTypes res(size); for (size_t i = 0; i < size; ++i) - res[i] = removeNullable(arguments[i]); + { + /// Nullable(Nothing) is processed separately, don't convert it to Nothing. + if (arguments[i]->onlyNull()) + res[i] = arguments[i]; + else + res[i] = removeNullable(arguments[i]); + } return res; } @@ -41,12 +47,16 @@ public: { bool has_nullable_types = false; bool has_null_types = false; - for (const auto & arg_type : arguments) + std::unordered_set arguments_that_can_be_only_null; + if (nested_function) + arguments_that_can_be_only_null = nested_function->getArgumentsThatCanBeOnlyNull(); + + for (size_t i = 0; i < arguments.size(); ++i) { - if (arg_type->isNullable()) + if (arguments[i]->isNullable()) { has_nullable_types = true; - if (arg_type->onlyNull()) + if (arguments[i]->onlyNull() && !arguments_that_can_be_only_null.contains(i)) { has_null_types = true; break; @@ -87,7 +97,7 @@ public: transformed_nested_function->getParameters()); } - bool return_type_is_nullable = !properties.returns_default_when_only_null && nested_function->getReturnType()->canBeInsideNullable(); + bool return_type_is_nullable = !properties.returns_default_when_only_null && nested_function->getResultType()->canBeInsideNullable(); bool serialize_flag = return_type_is_nullable || properties.returns_default_when_only_null; if (arguments.size() == 1) diff --git a/src/AggregateFunctions/AggregateFunctionNull.h b/src/AggregateFunctions/AggregateFunctionNull.h index 26d36b84860..ae5573a5351 100644 --- a/src/AggregateFunctions/AggregateFunctionNull.h +++ b/src/AggregateFunctions/AggregateFunctionNull.h @@ -85,7 +85,8 @@ protected: public: AggregateFunctionNullBase(AggregateFunctionPtr nested_function_, const DataTypes & arguments, const Array & params) - : IAggregateFunctionHelper(arguments, params), nested_function{nested_function_} + : IAggregateFunctionHelper(arguments, params, createResultType(nested_function_)) + , nested_function{nested_function_} { if constexpr (result_is_nullable) prefix_size = nested_function->alignOfData(); @@ -99,12 +100,12 @@ public: return nested_function->getName(); } - DataTypePtr getReturnType() const override + static DataTypePtr createResultType(const AggregateFunctionPtr & nested_function_) { if constexpr (result_is_nullable) - return makeNullable(nested_function->getReturnType()); + return makeNullable(nested_function_->getResultType()); else - return nested_function->getReturnType(); + return nested_function_->getResultType(); } void create(AggregateDataPtr __restrict place) const override @@ -275,7 +276,7 @@ public: { llvm::IRBuilder<> & b = static_cast &>(builder); - auto * return_type = toNativeType(b, this->getReturnType()); + auto * return_type = toNativeType(b, this->getResultType()); llvm::Value * result = nullptr; diff --git a/src/AggregateFunctions/AggregateFunctionOrFill.h b/src/AggregateFunctions/AggregateFunctionOrFill.h index eff4fb2bdc0..eeec630be9a 100644 --- a/src/AggregateFunctions/AggregateFunctionOrFill.h +++ b/src/AggregateFunctions/AggregateFunctionOrFill.h @@ -30,16 +30,14 @@ private: AggregateFunctionPtr nested_function; size_t size_of_data; - DataTypePtr inner_type; bool inner_nullable; public: AggregateFunctionOrFill(AggregateFunctionPtr nested_function_, const DataTypes & arguments, const Array & params) - : IAggregateFunctionHelper{arguments, params} + : IAggregateFunctionHelper{arguments, params, createResultType(nested_function_->getResultType())} , nested_function{nested_function_} , size_of_data {nested_function->sizeOfData()} - , inner_type {nested_function->getReturnType()} - , inner_nullable {inner_type->isNullable()} + , inner_nullable {nested_function->getResultType()->isNullable()} { // nothing } @@ -246,22 +244,22 @@ public: readChar(place[size_of_data], buf); } - DataTypePtr getReturnType() const override + static DataTypePtr createResultType(const DataTypePtr & inner_type_) { if constexpr (UseNull) { // -OrNull - if (inner_nullable) - return inner_type; + if (inner_type_->isNullable()) + return inner_type_; - return std::make_shared(inner_type); + return std::make_shared(inner_type_); } else { // -OrDefault - return inner_type; + return inner_type_; } } diff --git a/src/AggregateFunctions/AggregateFunctionQuantile.h b/src/AggregateFunctions/AggregateFunctionQuantile.h index 39a9e09dc64..6427d03f089 100644 --- a/src/AggregateFunctions/AggregateFunctionQuantile.h +++ b/src/AggregateFunctions/AggregateFunctionQuantile.h @@ -72,7 +72,7 @@ private: public: AggregateFunctionQuantile(const DataTypes & argument_types_, const Array & params) : IAggregateFunctionDataHelper>( - argument_types_, params) + argument_types_, params, createResultType(argument_types_)) , levels(params, returns_many) , level(levels.levels[0]) , argument_type(this->argument_types[0]) @@ -83,14 +83,14 @@ public: String getName() const override { return Name::name; } - DataTypePtr getReturnType() const override + static DataTypePtr createResultType(const DataTypes & argument_types_) { DataTypePtr res; if constexpr (returns_float) res = std::make_shared>(); else - res = argument_type; + res = argument_types_[0]; if constexpr (returns_many) return std::make_shared(res); diff --git a/src/AggregateFunctions/AggregateFunctionRankCorrelation.h b/src/AggregateFunctions/AggregateFunctionRankCorrelation.h index 4a81c6cda82..4f9ca55f9f5 100644 --- a/src/AggregateFunctions/AggregateFunctionRankCorrelation.h +++ b/src/AggregateFunctions/AggregateFunctionRankCorrelation.h @@ -51,7 +51,7 @@ class AggregateFunctionRankCorrelation : { public: explicit AggregateFunctionRankCorrelation(const DataTypes & arguments) - :IAggregateFunctionDataHelper ({arguments}, {}) + :IAggregateFunctionDataHelper ({arguments}, {}, std::make_shared>()) {} String getName() const override @@ -61,11 +61,6 @@ public: bool allocatesMemoryInArena() const override { return true; } - DataTypePtr getReturnType() const override - { - return std::make_shared>(); - } - void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override { Float64 new_x = columns[0]->getFloat64(row_num); diff --git a/src/AggregateFunctions/AggregateFunctionResample.h b/src/AggregateFunctions/AggregateFunctionResample.h index fe04ada1a77..32458557ac5 100644 --- a/src/AggregateFunctions/AggregateFunctionResample.h +++ b/src/AggregateFunctions/AggregateFunctionResample.h @@ -43,7 +43,7 @@ public: size_t step_, const DataTypes & arguments, const Array & params) - : IAggregateFunctionHelper>{arguments, params} + : IAggregateFunctionHelper>{arguments, params, createResultType(nested_function_)} , nested_function{nested_function_} , last_col{arguments.size() - 1} , begin{begin_} @@ -190,9 +190,9 @@ public: nested_function->deserialize(place + i * size_of_data, buf, version, arena); } - DataTypePtr getReturnType() const override + static DataTypePtr createResultType(const AggregateFunctionPtr & nested_function_) { - return std::make_shared(nested_function->getReturnType()); + return std::make_shared(nested_function_->getResultType()); } template diff --git a/src/AggregateFunctions/AggregateFunctionRetention.h b/src/AggregateFunctions/AggregateFunctionRetention.h index 18d04fb1ea4..744b6d18f97 100644 --- a/src/AggregateFunctions/AggregateFunctionRetention.h +++ b/src/AggregateFunctions/AggregateFunctionRetention.h @@ -76,7 +76,7 @@ public: } explicit AggregateFunctionRetention(const DataTypes & arguments) - : IAggregateFunctionDataHelper(arguments, {}) + : IAggregateFunctionDataHelper(arguments, {}, std::make_shared(std::make_shared())) { for (const auto i : collections::range(0, arguments.size())) { @@ -90,12 +90,6 @@ public: events_size = static_cast(arguments.size()); } - - DataTypePtr getReturnType() const override - { - return std::make_shared(std::make_shared()); - } - bool allocatesMemoryInArena() const override { return false; } void add(AggregateDataPtr __restrict place, const IColumn ** columns, const size_t row_num, Arena *) const override diff --git a/src/AggregateFunctions/AggregateFunctionSequenceMatch.h b/src/AggregateFunctions/AggregateFunctionSequenceMatch.h index bcea408d26b..b4889a06e53 100644 --- a/src/AggregateFunctions/AggregateFunctionSequenceMatch.h +++ b/src/AggregateFunctions/AggregateFunctionSequenceMatch.h @@ -126,8 +126,8 @@ template class AggregateFunctionSequenceBase : public IAggregateFunctionDataHelper { public: - AggregateFunctionSequenceBase(const DataTypes & arguments, const Array & params, const String & pattern_) - : IAggregateFunctionDataHelper(arguments, params) + AggregateFunctionSequenceBase(const DataTypes & arguments, const Array & params, const String & pattern_, const DataTypePtr & result_type_) + : IAggregateFunctionDataHelper(arguments, params, result_type_) , pattern(pattern_) { arg_count = arguments.size(); @@ -617,14 +617,12 @@ class AggregateFunctionSequenceMatch final : public AggregateFunctionSequenceBas { public: AggregateFunctionSequenceMatch(const DataTypes & arguments, const Array & params, const String & pattern_) - : AggregateFunctionSequenceBase>(arguments, params, pattern_) {} + : AggregateFunctionSequenceBase>(arguments, params, pattern_, std::make_shared()) {} using AggregateFunctionSequenceBase>::AggregateFunctionSequenceBase; String getName() const override { return "sequenceMatch"; } - DataTypePtr getReturnType() const override { return std::make_shared(); } - bool allocatesMemoryInArena() const override { return false; } void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override @@ -655,14 +653,12 @@ class AggregateFunctionSequenceCount final : public AggregateFunctionSequenceBas { public: AggregateFunctionSequenceCount(const DataTypes & arguments, const Array & params, const String & pattern_) - : AggregateFunctionSequenceBase>(arguments, params, pattern_) {} + : AggregateFunctionSequenceBase>(arguments, params, pattern_, std::make_shared()) {} using AggregateFunctionSequenceBase>::AggregateFunctionSequenceBase; String getName() const override { return "sequenceCount"; } - DataTypePtr getReturnType() const override { return std::make_shared(); } - bool allocatesMemoryInArena() const override { return false; } void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override diff --git a/src/AggregateFunctions/AggregateFunctionSequenceNextNode.h b/src/AggregateFunctions/AggregateFunctionSequenceNextNode.h index 90caaee4d94..487889a0ca4 100644 --- a/src/AggregateFunctions/AggregateFunctionSequenceNextNode.h +++ b/src/AggregateFunctions/AggregateFunctionSequenceNextNode.h @@ -190,7 +190,7 @@ public: SequenceDirection seq_direction_, size_t min_required_args_, UInt64 max_elems_ = std::numeric_limits::max()) - : IAggregateFunctionDataHelper, Self>({data_type_}, parameters_) + : IAggregateFunctionDataHelper, Self>({data_type_}, parameters_, data_type_) , seq_base_kind(seq_base_kind_) , seq_direction(seq_direction_) , min_required_args(min_required_args_) @@ -202,8 +202,6 @@ public: String getName() const override { return "sequenceNextNode"; } - DataTypePtr getReturnType() const override { return data_type; } - bool haveSameStateRepresentationImpl(const IAggregateFunction & rhs) const override { return this->getName() == rhs.getName() && this->haveEqualArgumentTypes(rhs); diff --git a/src/AggregateFunctions/AggregateFunctionSimpleLinearRegression.h b/src/AggregateFunctions/AggregateFunctionSimpleLinearRegression.h index 06cdfc5e582..b0d448afb55 100644 --- a/src/AggregateFunctions/AggregateFunctionSimpleLinearRegression.h +++ b/src/AggregateFunctions/AggregateFunctionSimpleLinearRegression.h @@ -99,7 +99,7 @@ public: IAggregateFunctionDataHelper< AggregateFunctionSimpleLinearRegressionData, AggregateFunctionSimpleLinearRegression - > {arguments, params} + > {arguments, params, createResultType()} { // notice: arguments has been checked before } @@ -140,7 +140,7 @@ public: this->data(place).deserialize(buf); } - DataTypePtr getReturnType() const override + static DataTypePtr createResultType() { DataTypes types { diff --git a/src/AggregateFunctions/AggregateFunctionSimpleState.h b/src/AggregateFunctions/AggregateFunctionSimpleState.h index f50c86c684e..3af7d71395a 100644 --- a/src/AggregateFunctions/AggregateFunctionSimpleState.h +++ b/src/AggregateFunctions/AggregateFunctionSimpleState.h @@ -20,28 +20,28 @@ private: public: AggregateFunctionSimpleState(AggregateFunctionPtr nested_, const DataTypes & arguments_, const Array & params_) - : IAggregateFunctionHelper(arguments_, params_) + : IAggregateFunctionHelper(arguments_, params_, createResultType(nested_, params_)) , nested_func(nested_) { } String getName() const override { return nested_func->getName() + "SimpleState"; } - DataTypePtr getReturnType() const override + static DataTypePtr createResultType(const AggregateFunctionPtr & nested_, const Array & params_) { - DataTypeCustomSimpleAggregateFunction::checkSupportedFunctions(nested_func); + DataTypeCustomSimpleAggregateFunction::checkSupportedFunctions(nested_); // Need to make a clone to avoid recursive reference. - auto storage_type_out = DataTypeFactory::instance().get(nested_func->getReturnType()->getName()); + auto storage_type_out = DataTypeFactory::instance().get(nested_->getResultType()->getName()); // Need to make a new function with promoted argument types because SimpleAggregates requires arg_type = return_type. AggregateFunctionProperties properties; auto function - = AggregateFunctionFactory::instance().get(nested_func->getName(), {storage_type_out}, nested_func->getParameters(), properties); + = AggregateFunctionFactory::instance().get(nested_->getName(), {storage_type_out}, nested_->getParameters(), properties); // Need to make a clone because it'll be customized. - auto storage_type_arg = DataTypeFactory::instance().get(nested_func->getReturnType()->getName()); + auto storage_type_arg = DataTypeFactory::instance().get(nested_->getResultType()->getName()); DataTypeCustomNamePtr custom_name - = std::make_unique(function, DataTypes{nested_func->getReturnType()}, parameters); + = std::make_unique(function, DataTypes{nested_->getResultType()}, params_); storage_type_arg->setCustomization(std::make_unique(std::move(custom_name), nullptr)); return storage_type_arg; } diff --git a/src/AggregateFunctions/AggregateFunctionSparkbar.h b/src/AggregateFunctions/AggregateFunctionSparkbar.h index f0fbdd2f2e4..882575e2005 100644 --- a/src/AggregateFunctions/AggregateFunctionSparkbar.h +++ b/src/AggregateFunctions/AggregateFunctionSparkbar.h @@ -261,7 +261,7 @@ private: public: AggregateFunctionSparkbar(const DataTypes & arguments, const Array & params) : IAggregateFunctionDataHelper, AggregateFunctionSparkbar>( - arguments, params) + arguments, params, std::make_shared()) { width = params.at(0).safeGet(); if (params.size() == 3) @@ -283,11 +283,6 @@ public: return "sparkbar"; } - DataTypePtr getReturnType() const override - { - return std::make_shared(); - } - void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * /*arena*/) const override { X x = assert_cast *>(columns[0])->getData()[row_num]; diff --git a/src/AggregateFunctions/AggregateFunctionState.h b/src/AggregateFunctions/AggregateFunctionState.h index 20ccb2e543c..625fe1f36bc 100644 --- a/src/AggregateFunctions/AggregateFunctionState.h +++ b/src/AggregateFunctions/AggregateFunctionState.h @@ -23,7 +23,7 @@ private: public: AggregateFunctionState(AggregateFunctionPtr nested_, const DataTypes & arguments_, const Array & params_) - : IAggregateFunctionHelper(arguments_, params_) + : IAggregateFunctionHelper(arguments_, params_, nested_->getStateType()) , nested_func(nested_) {} @@ -32,11 +32,6 @@ public: return nested_func->getName() + "State"; } - DataTypePtr getReturnType() const override - { - return getStateType(); - } - const IAggregateFunction & getBaseAggregateFunctionWithSameStateRepresentation() const override { return nested_func->getBaseAggregateFunctionWithSameStateRepresentation(); diff --git a/src/AggregateFunctions/AggregateFunctionStatistics.h b/src/AggregateFunctions/AggregateFunctionStatistics.h index ad7177a32fa..eb2d66b7e94 100644 --- a/src/AggregateFunctions/AggregateFunctionStatistics.h +++ b/src/AggregateFunctions/AggregateFunctionStatistics.h @@ -115,15 +115,11 @@ class AggregateFunctionVariance final { public: explicit AggregateFunctionVariance(const DataTypePtr & arg) - : IAggregateFunctionDataHelper, AggregateFunctionVariance>({arg}, {}) {} + : IAggregateFunctionDataHelper, AggregateFunctionVariance>({arg}, {}, std::make_shared()) + {} String getName() const override { return Op::name; } - DataTypePtr getReturnType() const override - { - return std::make_shared(); - } - bool allocatesMemoryInArena() const override { return false; } void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override @@ -368,15 +364,11 @@ class AggregateFunctionCovariance final public: explicit AggregateFunctionCovariance(const DataTypes & args) : IAggregateFunctionDataHelper< CovarianceData, - AggregateFunctionCovariance>(args, {}) {} + AggregateFunctionCovariance>(args, {}, std::make_shared()) + {} String getName() const override { return Op::name; } - DataTypePtr getReturnType() const override - { - return std::make_shared(); - } - bool allocatesMemoryInArena() const override { return false; } void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override diff --git a/src/AggregateFunctions/AggregateFunctionStatisticsSimple.h b/src/AggregateFunctions/AggregateFunctionStatisticsSimple.h index d57b043b491..9ef62363a75 100644 --- a/src/AggregateFunctions/AggregateFunctionStatisticsSimple.h +++ b/src/AggregateFunctions/AggregateFunctionStatisticsSimple.h @@ -81,12 +81,12 @@ public: using ColVecResult = ColumnVector; explicit AggregateFunctionVarianceSimple(const DataTypes & argument_types_) - : IAggregateFunctionDataHelper>(argument_types_, {}) + : IAggregateFunctionDataHelper>(argument_types_, {}, std::make_shared>()) , src_scale(0) {} AggregateFunctionVarianceSimple(const IDataType & data_type, const DataTypes & argument_types_) - : IAggregateFunctionDataHelper>(argument_types_, {}) + : IAggregateFunctionDataHelper>(argument_types_, {}, std::make_shared>()) , src_scale(getDecimalScale(data_type)) {} @@ -117,11 +117,6 @@ public: UNREACHABLE(); } - DataTypePtr getReturnType() const override - { - return std::make_shared>(); - } - bool allocatesMemoryInArena() const override { return false; } void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override diff --git a/src/AggregateFunctions/AggregateFunctionSum.h b/src/AggregateFunctions/AggregateFunctionSum.h index 4cd0afc8760..14c2838c30d 100644 --- a/src/AggregateFunctions/AggregateFunctionSum.h +++ b/src/AggregateFunctions/AggregateFunctionSum.h @@ -411,23 +411,21 @@ public: } explicit AggregateFunctionSum(const DataTypes & argument_types_) - : IAggregateFunctionDataHelper>(argument_types_, {}) - , scale(0) + : IAggregateFunctionDataHelper>(argument_types_, {}, createResultType(0)) {} AggregateFunctionSum(const IDataType & data_type, const DataTypes & argument_types_) - : IAggregateFunctionDataHelper>(argument_types_, {}) - , scale(getDecimalScale(data_type)) + : IAggregateFunctionDataHelper>(argument_types_, {}, createResultType(getDecimalScale(data_type))) {} - DataTypePtr getReturnType() const override + static DataTypePtr createResultType(UInt32 scale_) { if constexpr (!is_decimal) return std::make_shared>(); else { using DataType = DataTypeDecimal; - return std::make_shared(DataType::maxPrecision(), scale); + return std::make_shared(DataType::maxPrecision(), scale_); } } @@ -548,7 +546,7 @@ public: for (const auto & argument_type : this->argument_types) can_be_compiled &= canBeNativeType(*argument_type); - auto return_type = getReturnType(); + auto return_type = this->getResultType(); can_be_compiled &= canBeNativeType(*return_type); return can_be_compiled; @@ -558,7 +556,7 @@ public: { llvm::IRBuilder<> & b = static_cast &>(builder); - auto * return_type = toNativeType(b, getReturnType()); + auto * return_type = toNativeType(b, this->getResultType()); auto * aggregate_sum_ptr = aggregate_data_ptr; b.CreateStore(llvm::Constant::getNullValue(return_type), aggregate_sum_ptr); @@ -568,7 +566,7 @@ public: { llvm::IRBuilder<> & b = static_cast &>(builder); - auto * return_type = toNativeType(b, getReturnType()); + auto * return_type = toNativeType(b, this->getResultType()); auto * sum_value_ptr = aggregate_data_ptr; auto * sum_value = b.CreateLoad(return_type, sum_value_ptr); @@ -586,7 +584,7 @@ public: { llvm::IRBuilder<> & b = static_cast &>(builder); - auto * return_type = toNativeType(b, getReturnType()); + auto * return_type = toNativeType(b, this->getResultType()); auto * sum_value_dst_ptr = aggregate_data_dst_ptr; auto * sum_value_dst = b.CreateLoad(return_type, sum_value_dst_ptr); @@ -602,7 +600,7 @@ public: { llvm::IRBuilder<> & b = static_cast &>(builder); - auto * return_type = toNativeType(b, getReturnType()); + auto * return_type = toNativeType(b, this->getResultType()); auto * sum_value_ptr = aggregate_data_ptr; return b.CreateLoad(return_type, sum_value_ptr); @@ -611,8 +609,6 @@ public: #endif private: - UInt32 scale; - static constexpr auto & castColumnToResult(IColumn & to) { if constexpr (is_decimal) diff --git a/src/AggregateFunctions/AggregateFunctionSumCount.h b/src/AggregateFunctions/AggregateFunctionSumCount.h index f1a5d85bb6c..7058204ed74 100644 --- a/src/AggregateFunctions/AggregateFunctionSumCount.h +++ b/src/AggregateFunctions/AggregateFunctionSumCount.h @@ -14,12 +14,13 @@ public: using Base = AggregateFunctionAvg; explicit AggregateFunctionSumCount(const DataTypes & argument_types_, UInt32 num_scale_ = 0) - : Base(argument_types_, num_scale_), scale(num_scale_) {} + : Base(argument_types_, createResultType(num_scale_), num_scale_) + {} - DataTypePtr getReturnType() const override + static DataTypePtr createResultType(UInt32 num_scale_) { auto second_elem = std::make_shared(); - return std::make_shared(DataTypes{getReturnTypeFirstElement(), std::move(second_elem)}); + return std::make_shared(DataTypes{getReturnTypeFirstElement(num_scale_), std::move(second_elem)}); } void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const final @@ -43,9 +44,7 @@ public: #endif private: - UInt32 scale; - - auto getReturnTypeFirstElement() const + static auto getReturnTypeFirstElement(UInt32 num_scale_) { using FieldType = AvgFieldType; @@ -54,7 +53,7 @@ private: else { using DataType = DataTypeDecimal; - return std::make_shared(DataType::maxPrecision(), scale); + return std::make_shared(DataType::maxPrecision(), num_scale_); } } }; diff --git a/src/AggregateFunctions/AggregateFunctionSumMap.h b/src/AggregateFunctions/AggregateFunctionSumMap.h index 1e32be987ff..4a1088a87bd 100644 --- a/src/AggregateFunctions/AggregateFunctionSumMap.h +++ b/src/AggregateFunctions/AggregateFunctionSumMap.h @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -80,7 +81,7 @@ public: AggregateFunctionMapBase(const DataTypePtr & keys_type_, const DataTypes & values_types_, const DataTypes & argument_types_) - : Base(argument_types_, {} /* parameters */) + : Base(argument_types_, {} /* parameters */, createResultType(keys_type_, values_types_, getName())) , keys_type(keys_type_) , keys_serialization(keys_type->getDefaultSerialization()) , values_types(values_types_) @@ -117,19 +118,22 @@ public: return 0; } - DataTypePtr getReturnType() const override + static DataTypePtr createResultType( + const DataTypePtr & keys_type_, + const DataTypes & values_types_, + const String & name_) { DataTypes types; - types.emplace_back(std::make_shared(keys_type)); + types.emplace_back(std::make_shared(keys_type_)); - for (const auto & value_type : values_types) + for (const auto & value_type : values_types_) { if constexpr (std::is_same_v) { if (!value_type->isSummable()) throw Exception{ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Values for {} cannot be summed, passed type {}", - getName(), value_type->getName()}; + name_, value_type->getName()}; } DataTypePtr result_type; @@ -139,7 +143,7 @@ public: if (value_type->onlyNull()) throw Exception{ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Cannot calculate {} of type {}", - getName(), value_type->getName()}; + name_, value_type->getName()}; // Overflow, meaning that the returned type is the same as // the input type. Nulls are skipped. @@ -153,7 +157,7 @@ public: if (!value_type_without_nullable->canBePromoted()) throw Exception{ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Values for {} are expected to be Numeric, Float or Decimal, passed type {}", - getName(), value_type->getName()}; + name_, value_type->getName()}; WhichDataType value_type_to_check(value_type_without_nullable); @@ -424,7 +428,10 @@ public: } bool keepKey(const T & key) const { return static_cast(*this).keepKey(key); } - String getName() const override { return static_cast(*this).getName(); } + String getName() const override { return getNameImpl(); } + +private: + static String getNameImpl() { return Derived::getNameImpl(); } }; template @@ -443,10 +450,10 @@ public: { // The constructor accepts parameters to have a uniform interface with // sumMapFiltered, but this function doesn't have any parameters. - assertNoParameters(getName(), params_); + assertNoParameters(getNameImpl(), params_); } - String getName() const override + static String getNameImpl() { if constexpr (overflow) { @@ -461,6 +468,7 @@ public: bool keepKey(const T &) const { return true; } }; + template class AggregateFunctionSumMapFiltered final : public AggregateFunctionMapBase(keys_to_keep_values)) throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Aggregate function {} requires an Array as a parameter", - getName()); + getNameImpl()); + + this->parameters = params_; keys_to_keep.reserve(keys_to_keep_values.size()); @@ -501,8 +511,17 @@ public: keys_to_keep.emplace(f.safeGet()); } - String getName() const override - { return overflow ? "sumMapFilteredWithOverflow" : "sumMapFiltered"; } + static String getNameImpl() + { + if constexpr (overflow) + { + return "sumMapFilteredWithOverflow"; + } + else + { + return "sumMapFiltered"; + } + } bool keepKey(const T & key) const { return keys_to_keep.count(key); } }; @@ -606,10 +625,10 @@ public: { // The constructor accepts parameters to have a uniform interface with // sumMapFiltered, but this function doesn't have any parameters. - assertNoParameters(getName(), params_); + assertNoParameters(getNameImpl(), params_); } - String getName() const override { return "minMap"; } + static String getNameImpl() { return "minMap"; } bool keepKey(const T &) const { return true; } }; @@ -630,10 +649,10 @@ public: { // The constructor accepts parameters to have a uniform interface with // sumMapFiltered, but this function doesn't have any parameters. - assertNoParameters(getName(), params_); + assertNoParameters(getNameImpl(), params_); } - String getName() const override { return "maxMap"; } + static String getNameImpl() { return "maxMap"; } bool keepKey(const T &) const { return true; } }; diff --git a/src/AggregateFunctions/AggregateFunctionTTest.h b/src/AggregateFunctions/AggregateFunctionTTest.h index b72e7a3cdcb..749e711d4f7 100644 --- a/src/AggregateFunctions/AggregateFunctionTTest.h +++ b/src/AggregateFunctions/AggregateFunctionTTest.h @@ -46,7 +46,7 @@ private: Float64 confidence_level; public: AggregateFunctionTTest(const DataTypes & arguments, const Array & params) - : IAggregateFunctionDataHelper>({arguments}, params) + : IAggregateFunctionDataHelper>({arguments}, params, createResultType(!params.empty())) { if (!params.empty()) { @@ -71,9 +71,9 @@ public: return Data::name; } - DataTypePtr getReturnType() const override + static DataTypePtr createResultType(bool need_confidence_interval_) { - if (need_confidence_interval) + if (need_confidence_interval_) { DataTypes types { diff --git a/src/AggregateFunctions/AggregateFunctionTopK.cpp b/src/AggregateFunctions/AggregateFunctionTopK.cpp index 4ebc80aceb5..b93aa703503 100644 --- a/src/AggregateFunctions/AggregateFunctionTopK.cpp +++ b/src/AggregateFunctions/AggregateFunctionTopK.cpp @@ -31,15 +31,33 @@ namespace template class AggregateFunctionTopKDate : public AggregateFunctionTopK { +public: using AggregateFunctionTopK::AggregateFunctionTopK; - DataTypePtr getReturnType() const override { return std::make_shared(std::make_shared()); } + + AggregateFunctionTopKDate(UInt64 threshold_, UInt64 load_factor, const DataTypes & argument_types_, const Array & params) + : AggregateFunctionTopK( + threshold_, + load_factor, + argument_types_, + params, + std::make_shared(std::make_shared())) + {} }; template class AggregateFunctionTopKDateTime : public AggregateFunctionTopK { +public: using AggregateFunctionTopK::AggregateFunctionTopK; - DataTypePtr getReturnType() const override { return std::make_shared(std::make_shared()); } + + AggregateFunctionTopKDateTime(UInt64 threshold_, UInt64 load_factor, const DataTypes & argument_types_, const Array & params) + : AggregateFunctionTopK( + threshold_, + load_factor, + argument_types_, + params, + std::make_shared(std::make_shared())) + {} }; diff --git a/src/AggregateFunctions/AggregateFunctionTopK.h b/src/AggregateFunctions/AggregateFunctionTopK.h index 98774254695..f1e57608195 100644 --- a/src/AggregateFunctions/AggregateFunctionTopK.h +++ b/src/AggregateFunctions/AggregateFunctionTopK.h @@ -40,14 +40,20 @@ protected: public: AggregateFunctionTopK(UInt64 threshold_, UInt64 load_factor, const DataTypes & argument_types_, const Array & params) - : IAggregateFunctionDataHelper, AggregateFunctionTopK>(argument_types_, params) - , threshold(threshold_), reserved(load_factor * threshold) {} + : IAggregateFunctionDataHelper, AggregateFunctionTopK>(argument_types_, params, createResultType(argument_types_)) + , threshold(threshold_), reserved(load_factor * threshold) + {} + + AggregateFunctionTopK(UInt64 threshold_, UInt64 load_factor, const DataTypes & argument_types_, const Array & params, const DataTypePtr & result_type_) + : IAggregateFunctionDataHelper, AggregateFunctionTopK>(argument_types_, params, result_type_) + , threshold(threshold_), reserved(load_factor * threshold) + {} String getName() const override { return is_weighted ? "topKWeighted" : "topK"; } - DataTypePtr getReturnType() const override + static DataTypePtr createResultType(const DataTypes & argument_types_) { - return std::make_shared(this->argument_types[0]); + return std::make_shared(argument_types_[0]); } bool allocatesMemoryInArena() const override { return false; } @@ -126,21 +132,20 @@ private: UInt64 threshold; UInt64 reserved; - DataTypePtr & input_data_type; static void deserializeAndInsert(StringRef str, IColumn & data_to); public: AggregateFunctionTopKGeneric( UInt64 threshold_, UInt64 load_factor, const DataTypes & argument_types_, const Array & params) - : IAggregateFunctionDataHelper>(argument_types_, params) - , threshold(threshold_), reserved(load_factor * threshold), input_data_type(this->argument_types[0]) {} + : IAggregateFunctionDataHelper>(argument_types_, params, createResultType(argument_types_)) + , threshold(threshold_), reserved(load_factor * threshold) {} String getName() const override { return is_weighted ? "topKWeighted" : "topK"; } - DataTypePtr getReturnType() const override + static DataTypePtr createResultType(const DataTypes & argument_types_) { - return std::make_shared(input_data_type); + return std::make_shared(argument_types_[0]); } bool allocatesMemoryInArena() const override diff --git a/src/AggregateFunctions/AggregateFunctionUniq.h b/src/AggregateFunctions/AggregateFunctionUniq.h index 1a98bfc8456..c782b9314fd 100644 --- a/src/AggregateFunctions/AggregateFunctionUniq.h +++ b/src/AggregateFunctions/AggregateFunctionUniq.h @@ -358,17 +358,12 @@ private: public: explicit AggregateFunctionUniq(const DataTypes & argument_types_) - : IAggregateFunctionDataHelper>(argument_types_, {}) + : IAggregateFunctionDataHelper>(argument_types_, {}, std::make_shared()) { } String getName() const override { return Data::getName(); } - DataTypePtr getReturnType() const override - { - return std::make_shared(); - } - bool allocatesMemoryInArena() const override { return false; } /// ALWAYS_INLINE is required to have better code layout for uniqHLL12 function @@ -462,7 +457,7 @@ private: public: explicit AggregateFunctionUniqVariadic(const DataTypes & arguments) - : IAggregateFunctionDataHelper>(arguments, {}) + : IAggregateFunctionDataHelper>(arguments, {}, std::make_shared()) { if (argument_is_tuple) num_args = typeid_cast(*arguments[0]).getElements().size(); @@ -472,11 +467,6 @@ public: String getName() const override { return Data::getName(); } - DataTypePtr getReturnType() const override - { - return std::make_shared(); - } - bool allocatesMemoryInArena() const override { return false; } void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override diff --git a/src/AggregateFunctions/AggregateFunctionUniqCombined.h b/src/AggregateFunctions/AggregateFunctionUniqCombined.h index 47b3081225b..d879e3b3dde 100644 --- a/src/AggregateFunctions/AggregateFunctionUniqCombined.h +++ b/src/AggregateFunctions/AggregateFunctionUniqCombined.h @@ -126,7 +126,8 @@ class AggregateFunctionUniqCombined final { public: AggregateFunctionUniqCombined(const DataTypes & argument_types_, const Array & params_) - : IAggregateFunctionDataHelper, AggregateFunctionUniqCombined>(argument_types_, params_) {} + : IAggregateFunctionDataHelper, AggregateFunctionUniqCombined>(argument_types_, params_, std::make_shared()) + {} String getName() const override { @@ -136,11 +137,6 @@ public: return "uniqCombined"; } - DataTypePtr getReturnType() const override - { - return std::make_shared(); - } - bool allocatesMemoryInArena() const override { return false; } void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override @@ -192,7 +188,7 @@ private: public: explicit AggregateFunctionUniqCombinedVariadic(const DataTypes & arguments, const Array & params) : IAggregateFunctionDataHelper, - AggregateFunctionUniqCombinedVariadic>(arguments, params) + AggregateFunctionUniqCombinedVariadic>(arguments, params, std::make_shared()) { if (argument_is_tuple) num_args = typeid_cast(*arguments[0]).getElements().size(); @@ -208,11 +204,6 @@ public: return "uniqCombined"; } - DataTypePtr getReturnType() const override - { - return std::make_shared(); - } - bool allocatesMemoryInArena() const override { return false; } void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override diff --git a/src/AggregateFunctions/AggregateFunctionUniqUpTo.h b/src/AggregateFunctions/AggregateFunctionUniqUpTo.h index 99f36b664d7..377f2580070 100644 --- a/src/AggregateFunctions/AggregateFunctionUniqUpTo.h +++ b/src/AggregateFunctions/AggregateFunctionUniqUpTo.h @@ -174,7 +174,7 @@ private: public: AggregateFunctionUniqUpTo(UInt8 threshold_, const DataTypes & argument_types_, const Array & params_) - : IAggregateFunctionDataHelper, AggregateFunctionUniqUpTo>(argument_types_, params_) + : IAggregateFunctionDataHelper, AggregateFunctionUniqUpTo>(argument_types_, params_, std::make_shared()) , threshold(threshold_) { } @@ -186,11 +186,6 @@ public: String getName() const override { return "uniqUpTo"; } - DataTypePtr getReturnType() const override - { - return std::make_shared(); - } - bool allocatesMemoryInArena() const override { return false; } /// ALWAYS_INLINE is required to have better code layout for uniqUpTo function @@ -235,7 +230,7 @@ private: public: AggregateFunctionUniqUpToVariadic(const DataTypes & arguments, const Array & params, UInt8 threshold_) - : IAggregateFunctionDataHelper, AggregateFunctionUniqUpToVariadic>(arguments, params) + : IAggregateFunctionDataHelper, AggregateFunctionUniqUpToVariadic>(arguments, params, std::make_shared()) , threshold(threshold_) { if (argument_is_tuple) @@ -251,11 +246,6 @@ public: String getName() const override { return "uniqUpTo"; } - DataTypePtr getReturnType() const override - { - return std::make_shared(); - } - bool allocatesMemoryInArena() const override { return false; } void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override diff --git a/src/AggregateFunctions/AggregateFunctionWindowFunnel.h b/src/AggregateFunctions/AggregateFunctionWindowFunnel.h index 8dad9643da5..472f230a24c 100644 --- a/src/AggregateFunctions/AggregateFunctionWindowFunnel.h +++ b/src/AggregateFunctions/AggregateFunctionWindowFunnel.h @@ -221,7 +221,7 @@ public: } AggregateFunctionWindowFunnel(const DataTypes & arguments, const Array & params) - : IAggregateFunctionDataHelper>(arguments, params) + : IAggregateFunctionDataHelper>(arguments, params, std::make_shared()) { events_size = arguments.size() - 1; window = params.at(0).safeGet(); @@ -245,11 +245,6 @@ public: } } - DataTypePtr getReturnType() const override - { - return std::make_shared(); - } - bool allocatesMemoryInArena() const override { return false; } void add(AggregateDataPtr __restrict place, const IColumn ** columns, const size_t row_num, Arena *) const override diff --git a/src/AggregateFunctions/CrossTab.h b/src/AggregateFunctions/CrossTab.h index 1284c210886..5868292c83f 100644 --- a/src/AggregateFunctions/CrossTab.h +++ b/src/AggregateFunctions/CrossTab.h @@ -118,7 +118,7 @@ class AggregateFunctionCrossTab : public IAggregateFunctionDataHelper>({arguments}, {}) + : IAggregateFunctionDataHelper>({arguments}, {}, createResultType()) { } @@ -132,7 +132,7 @@ public: return false; } - DataTypePtr getReturnType() const override + static DataTypePtr createResultType() { return std::make_shared>(); } diff --git a/src/AggregateFunctions/IAggregateFunction.h b/src/AggregateFunctions/IAggregateFunction.h index ada00791e69..a5d1887f85e 100644 --- a/src/AggregateFunctions/IAggregateFunction.h +++ b/src/AggregateFunctions/IAggregateFunction.h @@ -10,6 +10,7 @@ #include #include #include +#include #include "config.h" @@ -49,6 +50,7 @@ using ConstAggregateDataPtr = const char *; class IAggregateFunction; using AggregateFunctionPtr = std::shared_ptr; + struct AggregateFunctionProperties; /** Aggregate functions interface. @@ -59,18 +61,18 @@ struct AggregateFunctionProperties; * (which can be created in some memory pool), * and IAggregateFunction is the external interface for manipulating them. */ -class IAggregateFunction : public std::enable_shared_from_this +class IAggregateFunction : public std::enable_shared_from_this, public IResolvedFunction { public: - IAggregateFunction(const DataTypes & argument_types_, const Array & parameters_) - : argument_types(argument_types_), parameters(parameters_) {} + IAggregateFunction(const DataTypes & argument_types_, const Array & parameters_, const DataTypePtr & result_type_) + : result_type(result_type_) + , argument_types(argument_types_) + , parameters(parameters_) + {} /// Get main function name. virtual String getName() const = 0; - /// Get the result type. - virtual DataTypePtr getReturnType() const = 0; - /// Get the data type of internal state. By default it is AggregateFunction(name(params), argument_types...). virtual DataTypePtr getStateType() const; @@ -102,7 +104,7 @@ public: virtual size_t getDefaultVersion() const { return 0; } - virtual ~IAggregateFunction() = default; + ~IAggregateFunction() override = default; /** Data manipulating functions. */ @@ -343,13 +345,22 @@ public: return nullptr; } + /// For most functions if one of arguments is always NULL, we return NULL (it's implemented in combinator Null), + /// but in some functions we can want to process this argument somehow (for example condition argument in If combinator). + /// This method returns the set of argument indexes that can be always NULL, they will be skipped in combinator Null. + virtual std::unordered_set getArgumentsThatCanBeOnlyNull() const + { + return {}; + } + /** Return the nested function if this is an Aggregate Function Combinator. * Otherwise return nullptr. */ virtual AggregateFunctionPtr getNestedFunction() const { return {}; } - const DataTypes & getArgumentTypes() const { return argument_types; } - const Array & getParameters() const { return parameters; } + const DataTypePtr & getResultType() const override { return result_type; } + const DataTypes & getArgumentTypes() const override { return argument_types; } + const Array & getParameters() const override { return parameters; } // Any aggregate function can be calculated over a window, but there are some // window functions such as rank() that require a different interface, e.g. @@ -398,6 +409,7 @@ public: #endif protected: + DataTypePtr result_type; DataTypes argument_types; Array parameters; }; @@ -414,8 +426,8 @@ private: } public: - IAggregateFunctionHelper(const DataTypes & argument_types_, const Array & parameters_) - : IAggregateFunction(argument_types_, parameters_) {} + IAggregateFunctionHelper(const DataTypes & argument_types_, const Array & parameters_, const DataTypePtr & result_type_) + : IAggregateFunction(argument_types_, parameters_, result_type_) {} AddFunc getAddressOfAddFunction() const override { return &addFree; } @@ -695,15 +707,15 @@ public: // Derived class can `override` this to flag that DateTime64 is not supported. static constexpr bool DateTime64Supported = true; - IAggregateFunctionDataHelper(const DataTypes & argument_types_, const Array & parameters_) - : IAggregateFunctionHelper(argument_types_, parameters_) + IAggregateFunctionDataHelper(const DataTypes & argument_types_, const Array & parameters_, const DataTypePtr & result_type_) + : IAggregateFunctionHelper(argument_types_, parameters_, result_type_) { /// To prevent derived classes changing the destroy() without updating hasTrivialDestructor() to match it /// Enforce that either both of them are changed or none are - constexpr bool declares_destroy_and_hasTrivialDestructor = + constexpr bool declares_destroy_and_has_trivial_destructor = std::is_same_v == std::is_same_v; - static_assert(declares_destroy_and_hasTrivialDestructor, + static_assert(declares_destroy_and_has_trivial_destructor, "destroy() and hasTrivialDestructor() methods of an aggregate function must be either both overridden or not"); } @@ -824,6 +836,9 @@ struct AggregateFunctionProperties * Some may also name this property as "non-commutative". */ bool is_order_dependent = false; + + /// Indicates if it's actually window function. + bool is_window_function = false; }; diff --git a/src/AggregateFunctions/registerAggregateFunctions.cpp b/src/AggregateFunctions/registerAggregateFunctions.cpp index 08524cc9f97..ecf6ab51367 100644 --- a/src/AggregateFunctions/registerAggregateFunctions.cpp +++ b/src/AggregateFunctions/registerAggregateFunctions.cpp @@ -73,7 +73,6 @@ void registerAggregateFunctionExponentialMovingAverage(AggregateFunctionFactory void registerAggregateFunctionSparkbar(AggregateFunctionFactory &); void registerAggregateFunctionIntervalLengthSum(AggregateFunctionFactory &); void registerAggregateFunctionAnalysisOfVariance(AggregateFunctionFactory &); -void registerAggregateFunctionFlameGraph(AggregateFunctionFactory &); class AggregateFunctionCombinatorFactory; void registerAggregateFunctionCombinatorIf(AggregateFunctionCombinatorFactory &); @@ -159,7 +158,6 @@ void registerAggregateFunctions() registerAggregateFunctionExponentialMovingAverage(factory); registerAggregateFunctionSparkbar(factory); registerAggregateFunctionAnalysisOfVariance(factory); - registerAggregateFunctionFlameGraph(factory); registerWindowFunctions(factory); } diff --git a/src/Analyzer/FunctionNode.cpp b/src/Analyzer/FunctionNode.cpp index ad3959dfe9c..1b32cd5436d 100644 --- a/src/Analyzer/FunctionNode.cpp +++ b/src/Analyzer/FunctionNode.cpp @@ -2,6 +2,7 @@ #include #include +#include #include #include @@ -17,6 +18,11 @@ namespace DB { +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + FunctionNode::FunctionNode(String function_name_) : IQueryTreeNode(children_size) , function_name(function_name_) @@ -25,25 +31,41 @@ FunctionNode::FunctionNode(String function_name_) children[arguments_child_index] = std::make_shared(); } -void FunctionNode::resolveAsFunction(FunctionOverloadResolverPtr function_value, DataTypePtr result_type_value) +ColumnsWithTypeAndName FunctionNode::getArgumentTypes() const { - aggregate_function = nullptr; + ColumnsWithTypeAndName argument_types; + for (const auto & arg : getArguments().getNodes()) + { + ColumnWithTypeAndName argument; + argument.type = arg->getResultType(); + if (auto * constant = arg->as()) + argument.column = argument.type->createColumnConst(1, constant->getValue()); + argument_types.push_back(argument); + } + return argument_types; +} + +void FunctionNode::resolveAsFunction(FunctionBasePtr function_value) +{ + function_name = function_value->getName(); function = std::move(function_value); - result_type = std::move(result_type_value); - function_name = function->getName(); + kind = FunctionKind::ORDINARY; } -void FunctionNode::resolveAsAggregateFunction(AggregateFunctionPtr aggregate_function_value, DataTypePtr result_type_value) +void FunctionNode::resolveAsAggregateFunction(AggregateFunctionPtr aggregate_function_value) { - function = nullptr; - aggregate_function = std::move(aggregate_function_value); - result_type = std::move(result_type_value); - function_name = aggregate_function->getName(); + function_name = aggregate_function_value->getName(); + function = std::move(aggregate_function_value); + kind = FunctionKind::AGGREGATE; } -void FunctionNode::resolveAsWindowFunction(AggregateFunctionPtr window_function_value, DataTypePtr result_type_value) +void FunctionNode::resolveAsWindowFunction(AggregateFunctionPtr window_function_value) { - resolveAsAggregateFunction(window_function_value, result_type_value); + if (!hasWindow()) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Trying to resolve FunctionNode without window definition as a window function {}", window_function_value->getName()); + resolveAsAggregateFunction(window_function_value); + kind = FunctionKind::WINDOW; } void FunctionNode::dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, size_t indent) const @@ -63,8 +85,8 @@ void FunctionNode::dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state buffer << ", function_type: " << function_type; - if (result_type) - buffer << ", result_type: " + result_type->getName(); + if (function) + buffer << ", result_type: " + function->getResultType()->getName(); const auto & parameters = getParameters(); if (!parameters.getNodes().empty()) @@ -96,11 +118,19 @@ bool FunctionNode::isEqualImpl(const IQueryTreeNode & rhs) const isWindowFunction() != rhs_typed.isWindowFunction()) return false; - if (result_type && rhs_typed.result_type && !result_type->equals(*rhs_typed.getResultType())) + if (isResolved() != rhs_typed.isResolved()) return false; - else if (result_type && !rhs_typed.result_type) + if (!isResolved()) + return true; + + auto lhs_result_type = getResultType(); + auto rhs_result_type = rhs.getResultType(); + + if (lhs_result_type && rhs_result_type && !lhs_result_type->equals(*rhs_result_type)) return false; - else if (!result_type && rhs_typed.result_type) + else if (lhs_result_type && !rhs_result_type) + return false; + else if (!lhs_result_type && rhs_result_type) return false; return true; @@ -114,7 +144,10 @@ void FunctionNode::updateTreeHashImpl(HashState & hash_state) const hash_state.update(isAggregateFunction()); hash_state.update(isWindowFunction()); - if (result_type) + if (!isResolved()) + return; + + if (auto result_type = getResultType()) { auto result_type_name = result_type->getName(); hash_state.update(result_type_name.size()); @@ -130,8 +163,7 @@ QueryTreeNodePtr FunctionNode::cloneImpl() const * because ordinary functions or aggregate functions must be stateless. */ result_function->function = function; - result_function->aggregate_function = aggregate_function; - result_function->result_type = result_type; + result_function->kind = kind; return result_function; } diff --git a/src/Analyzer/FunctionNode.h b/src/Analyzer/FunctionNode.h index e746cf48581..501d439e55e 100644 --- a/src/Analyzer/FunctionNode.h +++ b/src/Analyzer/FunctionNode.h @@ -1,8 +1,12 @@ #pragma once +#include +#include #include #include #include +#include +#include namespace DB { @@ -15,6 +19,9 @@ namespace ErrorCodes class IFunctionOverloadResolver; using FunctionOverloadResolverPtr = std::shared_ptr; +class IFunctionBase; +using FunctionBasePtr = std::shared_ptr; + class IAggregateFunction; using AggregateFunctionPtr = std::shared_ptr; @@ -39,6 +46,14 @@ using AggregateFunctionPtr = std::shared_ptr; class FunctionNode; using FunctionNodePtr = std::shared_ptr; +enum class FunctionKind +{ + UNKNOWN, + ORDINARY, + AGGREGATE, + WINDOW, +}; + class FunctionNode final : public IQueryTreeNode { public: @@ -101,6 +116,8 @@ public: return children[arguments_child_index]; } + ColumnsWithTypeAndName getArgumentTypes() const; + /// Returns true if function node has window, false otherwise bool hasWindow() const { @@ -129,42 +146,46 @@ public: /** Get non aggregate function. * If function is not resolved nullptr returned. */ - const FunctionOverloadResolverPtr & getFunction() const + FunctionBasePtr getFunction() const { - return function; + if (kind != FunctionKind::ORDINARY) + return {}; + return std::reinterpret_pointer_cast(function); } /** Get aggregate function. * If function is not resolved nullptr returned. * If function is resolved as non aggregate function nullptr returned. */ - const AggregateFunctionPtr & getAggregateFunction() const + AggregateFunctionPtr getAggregateFunction() const { - return aggregate_function; + if (kind == FunctionKind::UNKNOWN || kind == FunctionKind::ORDINARY) + return {}; + return std::reinterpret_pointer_cast(function); } /// Is function node resolved bool isResolved() const { - return result_type != nullptr && (function != nullptr || aggregate_function != nullptr); + return function != nullptr; } /// Is function node window function bool isWindowFunction() const { - return getWindowNode() != nullptr; + return hasWindow(); } /// Is function node aggregate function bool isAggregateFunction() const { - return aggregate_function != nullptr && !isWindowFunction(); + return kind == FunctionKind::AGGREGATE; } /// Is function node ordinary function bool isOrdinaryFunction() const { - return function != nullptr; + return kind == FunctionKind::ORDINARY; } /** Resolve function node as non aggregate function. @@ -173,19 +194,19 @@ public: * Assume we have `multiIf` function with single condition, it can be converted to `if` function. * Function name must be updated accordingly. */ - void resolveAsFunction(FunctionOverloadResolverPtr function_value, DataTypePtr result_type_value); + void resolveAsFunction(FunctionBasePtr function_value); /** Resolve function node as aggregate function. * It is important that function name is updated with resolved function name. * Main motivation for this is query tree optimizations. */ - void resolveAsAggregateFunction(AggregateFunctionPtr aggregate_function_value, DataTypePtr result_type_value); + void resolveAsAggregateFunction(AggregateFunctionPtr aggregate_function_value); /** Resolve function node as window function. * It is important that function name is updated with resolved function name. * Main motivation for this is query tree optimizations. */ - void resolveAsWindowFunction(AggregateFunctionPtr window_function_value, DataTypePtr result_type_value); + void resolveAsWindowFunction(AggregateFunctionPtr window_function_value); QueryTreeNodeType getNodeType() const override { @@ -194,12 +215,11 @@ public: DataTypePtr getResultType() const override { - if (!result_type) + if (!function) throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "Function node with name '{}' is not resolved", function_name); - - return result_type; + return function->getResultType(); } void dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, size_t indent) const override; @@ -215,9 +235,8 @@ protected: private: String function_name; - FunctionOverloadResolverPtr function; - AggregateFunctionPtr aggregate_function; - DataTypePtr result_type; + FunctionKind kind = FunctionKind::UNKNOWN; + IResolvedFunctionPtr function; static constexpr size_t parameters_child_index = 0; static constexpr size_t arguments_child_index = 1; diff --git a/src/Analyzer/Passes/AggregateFunctionsArithmericOperationsPass.cpp b/src/Analyzer/Passes/AggregateFunctionsArithmericOperationsPass.cpp index 9b59faacfe0..e4e99c6e947 100644 --- a/src/Analyzer/Passes/AggregateFunctionsArithmericOperationsPass.cpp +++ b/src/Analyzer/Passes/AggregateFunctionsArithmericOperationsPass.cpp @@ -147,7 +147,6 @@ public: private: static inline void resolveAggregateFunctionNode(FunctionNode & function_node, const String & aggregate_function_name) { - auto function_result_type = function_node.getResultType(); auto function_aggregate_function = function_node.getAggregateFunction(); AggregateFunctionProperties properties; @@ -156,7 +155,7 @@ private: function_aggregate_function->getParameters(), properties); - function_node.resolveAsAggregateFunction(std::move(aggregate_function), std::move(function_result_type)); + function_node.resolveAsAggregateFunction(std::move(aggregate_function)); } }; diff --git a/src/Analyzer/Passes/CountDistinctPass.cpp b/src/Analyzer/Passes/CountDistinctPass.cpp index 05c31ec28ba..0384055e484 100644 --- a/src/Analyzer/Passes/CountDistinctPass.cpp +++ b/src/Analyzer/Passes/CountDistinctPass.cpp @@ -71,7 +71,7 @@ public: auto result_type = function_node->getResultType(); AggregateFunctionProperties properties; auto aggregate_function = AggregateFunctionFactory::instance().get("count", {}, {}, properties); - function_node->resolveAsAggregateFunction(std::move(aggregate_function), std::move(result_type)); + function_node->resolveAsAggregateFunction(std::move(aggregate_function)); function_node->getArguments().getNodes().clear(); } }; diff --git a/src/Analyzer/Passes/CustomizeFunctionsPass.cpp b/src/Analyzer/Passes/CustomizeFunctionsPass.cpp index 629ab411a55..7eb4a040970 100644 --- a/src/Analyzer/Passes/CustomizeFunctionsPass.cpp +++ b/src/Analyzer/Passes/CustomizeFunctionsPass.cpp @@ -138,7 +138,6 @@ public: static inline void resolveAggregateOrWindowFunctionNode(FunctionNode & function_node, const String & aggregate_function_name) { - auto function_result_type = function_node.getResultType(); auto function_aggregate_function = function_node.getAggregateFunction(); AggregateFunctionProperties properties; @@ -148,16 +147,15 @@ public: properties); if (function_node.isAggregateFunction()) - function_node.resolveAsAggregateFunction(std::move(aggregate_function), std::move(function_result_type)); + function_node.resolveAsAggregateFunction(std::move(aggregate_function)); else if (function_node.isWindowFunction()) - function_node.resolveAsWindowFunction(std::move(aggregate_function), std::move(function_result_type)); + function_node.resolveAsWindowFunction(std::move(aggregate_function)); } inline void resolveOrdinaryFunctionNode(FunctionNode & function_node, const String & function_name) const { - auto function_result_type = function_node.getResultType(); auto function = FunctionFactory::instance().get(function_name, context); - function_node.resolveAsFunction(function, std::move(function_result_type)); + function_node.resolveAsFunction(function->build(function_node.getArgumentTypes())); } private: diff --git a/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp b/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp index b1ecfe2d8fc..0c5a450135f 100644 --- a/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp +++ b/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp @@ -78,11 +78,11 @@ public: column.name += ".size0"; column.type = std::make_shared(); - resolveOrdinaryFunctionNode(*function_node, "equals"); - function_arguments_nodes.clear(); function_arguments_nodes.push_back(std::make_shared(column, column_source)); function_arguments_nodes.push_back(std::make_shared(static_cast(0))); + + resolveOrdinaryFunctionNode(*function_node, "equals"); } else if (function_name == "notEmpty") { @@ -90,11 +90,11 @@ public: column.name += ".size0"; column.type = std::make_shared(); - resolveOrdinaryFunctionNode(*function_node, "notEquals"); - function_arguments_nodes.clear(); function_arguments_nodes.push_back(std::make_shared(column, column_source)); function_arguments_nodes.push_back(std::make_shared(static_cast(0))); + + resolveOrdinaryFunctionNode(*function_node, "notEquals"); } } else if (column_type.isNullable()) @@ -112,9 +112,9 @@ public: column.name += ".null"; column.type = std::make_shared(); - resolveOrdinaryFunctionNode(*function_node, "not"); - function_arguments_nodes = {std::make_shared(column, column_source)}; + + resolveOrdinaryFunctionNode(*function_node, "not"); } } else if (column_type.isMap()) @@ -182,9 +182,9 @@ public: column.type = data_type_map.getKeyType(); auto has_function_argument = std::make_shared(column, column_source); - resolveOrdinaryFunctionNode(*function_node, "has"); - function_arguments_nodes[0] = std::move(has_function_argument); + + resolveOrdinaryFunctionNode(*function_node, "has"); } } } @@ -192,9 +192,8 @@ public: private: inline void resolveOrdinaryFunctionNode(FunctionNode & function_node, const String & function_name) const { - auto function_result_type = function_node.getResultType(); auto function = FunctionFactory::instance().get(function_name, context); - function_node.resolveAsFunction(function, std::move(function_result_type)); + function_node.resolveAsFunction(function->build(function_node.getArgumentTypes())); } ContextPtr & context; diff --git a/src/Analyzer/Passes/FuseFunctionsPass.cpp b/src/Analyzer/Passes/FuseFunctionsPass.cpp index f7e703cdaa4..f354a7b1ec3 100644 --- a/src/Analyzer/Passes/FuseFunctionsPass.cpp +++ b/src/Analyzer/Passes/FuseFunctionsPass.cpp @@ -59,14 +59,13 @@ private: std::unordered_set names_to_collect; }; -QueryTreeNodePtr createResolvedFunction(const ContextPtr & context, const String & name, const DataTypePtr & result_type, QueryTreeNodes arguments) +QueryTreeNodePtr createResolvedFunction(const ContextPtr & context, const String & name, QueryTreeNodes arguments) { auto function_node = std::make_shared(name); auto function = FunctionFactory::instance().get(name, context); - function_node->resolveAsFunction(std::move(function), result_type); function_node->getArguments().getNodes() = std::move(arguments); - + function_node->resolveAsFunction(function->build(function_node->getArgumentTypes())); return function_node; } @@ -74,11 +73,6 @@ FunctionNodePtr createResolvedAggregateFunction(const String & name, const Query { auto function_node = std::make_shared(name); - AggregateFunctionProperties properties; - auto aggregate_function = AggregateFunctionFactory::instance().get(name, {argument->getResultType()}, parameters, properties); - function_node->resolveAsAggregateFunction(aggregate_function, aggregate_function->getReturnType()); - function_node->getArguments().getNodes() = { argument }; - if (!parameters.empty()) { QueryTreeNodes parameter_nodes; @@ -86,18 +80,27 @@ FunctionNodePtr createResolvedAggregateFunction(const String & name, const Query parameter_nodes.emplace_back(std::make_shared(param)); function_node->getParameters().getNodes() = std::move(parameter_nodes); } + function_node->getArguments().getNodes() = { argument }; + + AggregateFunctionProperties properties; + auto aggregate_function = AggregateFunctionFactory::instance().get( + name, + { argument->getResultType() }, + parameters, + properties); + function_node->resolveAsAggregateFunction(aggregate_function); return function_node; } -QueryTreeNodePtr createTupleElementFunction(const ContextPtr & context, const DataTypePtr & result_type, QueryTreeNodePtr argument, UInt64 index) +QueryTreeNodePtr createTupleElementFunction(const ContextPtr & context, QueryTreeNodePtr argument, UInt64 index) { - return createResolvedFunction(context, "tupleElement", result_type, {std::move(argument), std::make_shared(index)}); + return createResolvedFunction(context, "tupleElement", {argument, std::make_shared(index)}); } -QueryTreeNodePtr createArrayElementFunction(const ContextPtr & context, const DataTypePtr & result_type, QueryTreeNodePtr argument, UInt64 index) +QueryTreeNodePtr createArrayElementFunction(const ContextPtr & context, QueryTreeNodePtr argument, UInt64 index) { - return createResolvedFunction(context, "arrayElement", result_type, {std::move(argument), std::make_shared(index)}); + return createResolvedFunction(context, "arrayElement", {argument, std::make_shared(index)}); } void replaceWithSumCount(QueryTreeNodePtr & node, const FunctionNodePtr & sum_count_node, ContextPtr context) @@ -115,20 +118,20 @@ void replaceWithSumCount(QueryTreeNodePtr & node, const FunctionNodePtr & sum_co if (function_name == "sum") { assert(node->getResultType()->equals(*sum_count_result_type->getElement(0))); - node = createTupleElementFunction(context, node->getResultType(), sum_count_node, 1); + node = createTupleElementFunction(context, sum_count_node, 1); } else if (function_name == "count") { assert(node->getResultType()->equals(*sum_count_result_type->getElement(1))); - node = createTupleElementFunction(context, node->getResultType(), sum_count_node, 2); + node = createTupleElementFunction(context, sum_count_node, 2); } else if (function_name == "avg") { - auto sum_result = createTupleElementFunction(context, sum_count_result_type->getElement(0), sum_count_node, 1); - auto count_result = createTupleElementFunction(context, sum_count_result_type->getElement(1), sum_count_node, 2); + auto sum_result = createTupleElementFunction(context, sum_count_node, 1); + auto count_result = createTupleElementFunction(context, sum_count_node, 2); /// To avoid integer division by zero - auto count_float_result = createResolvedFunction(context, "toFloat64", std::make_shared(), {count_result}); - node = createResolvedFunction(context, "divide", node->getResultType(), {sum_result, count_float_result}); + auto count_float_result = createResolvedFunction(context, "toFloat64", {count_result}); + node = createResolvedFunction(context, "divide", {sum_result, count_float_result}); } else { @@ -238,7 +241,7 @@ void tryFuseQuantiles(QueryTreeNodePtr query_tree_node, ContextPtr context) for (size_t i = 0; i < nodes_set.size(); ++i) { size_t array_index = i + 1; - *nodes[i] = createArrayElementFunction(context, result_array_type->getNestedType(), quantiles_node, array_index); + *nodes[i] = createArrayElementFunction(context, quantiles_node, array_index); } } } diff --git a/src/Analyzer/Passes/IfChainToMultiIfPass.cpp b/src/Analyzer/Passes/IfChainToMultiIfPass.cpp index f400b11765e..020edfe4820 100644 --- a/src/Analyzer/Passes/IfChainToMultiIfPass.cpp +++ b/src/Analyzer/Passes/IfChainToMultiIfPass.cpp @@ -55,8 +55,8 @@ public: return; auto multi_if_function = std::make_shared("multiIf"); - multi_if_function->resolveAsFunction(multi_if_function_ptr, std::make_shared()); multi_if_function->getArguments().getNodes() = std::move(multi_if_arguments); + multi_if_function->resolveAsFunction(multi_if_function_ptr->build(multi_if_function->getArgumentTypes())); node = std::move(multi_if_function); } diff --git a/src/Analyzer/Passes/IfTransformStringsToEnumPass.cpp b/src/Analyzer/Passes/IfTransformStringsToEnumPass.cpp index 65120632c0c..776fe63c803 100644 --- a/src/Analyzer/Passes/IfTransformStringsToEnumPass.cpp +++ b/src/Analyzer/Passes/IfTransformStringsToEnumPass.cpp @@ -47,49 +47,64 @@ QueryTreeNodePtr createCastFunction(QueryTreeNodePtr from, DataTypePtr result_ty auto enum_literal_node = std::make_shared(std::move(enum_literal)); auto cast_function = FunctionFactory::instance().get("_CAST", std::move(context)); - QueryTreeNodes arguments{std::move(from), std::move(enum_literal_node)}; + QueryTreeNodes arguments{ std::move(from), std::move(enum_literal_node) }; auto function_node = std::make_shared("_CAST"); - function_node->resolveAsFunction(std::move(cast_function), std::move(result_type)); function_node->getArguments().getNodes() = std::move(arguments); + function_node->resolveAsFunction(cast_function->build(function_node->getArgumentTypes())); + return function_node; } /// if(arg1, arg2, arg3) will be transformed to if(arg1, _CAST(arg2, Enum...), _CAST(arg3, Enum...)) /// where Enum is generated based on the possible values stored in string_values void changeIfArguments( - QueryTreeNodePtr & first, QueryTreeNodePtr & second, const std::set & string_values, const ContextPtr & context) + FunctionNode & if_node, const std::set & string_values, const ContextPtr & context) { auto result_type = getEnumType(string_values); - first = createCastFunction(first, result_type, context); - second = createCastFunction(second, result_type, context); + auto & argument_nodes = if_node.getArguments().getNodes(); + + argument_nodes[1] = createCastFunction(argument_nodes[1], result_type, context); + argument_nodes[2] = createCastFunction(argument_nodes[2], result_type, context); + + auto if_resolver = FunctionFactory::instance().get("if", context); + + if_node.resolveAsFunction(if_resolver->build(if_node.getArgumentTypes())); } /// transform(value, array_from, array_to, default_value) will be transformed to transform(value, array_from, _CAST(array_to, Array(Enum...)), _CAST(default_value, Enum...)) /// where Enum is generated based on the possible values stored in string_values void changeTransformArguments( - QueryTreeNodePtr & array_to, - QueryTreeNodePtr & default_value, + FunctionNode & transform_node, const std::set & string_values, const ContextPtr & context) { auto result_type = getEnumType(string_values); + auto & arguments = transform_node.getArguments().getNodes(); + + auto & array_to = arguments[2]; + auto & default_value = arguments[3]; + array_to = createCastFunction(array_to, std::make_shared(result_type), context); default_value = createCastFunction(default_value, std::move(result_type), context); + + auto transform_resolver = FunctionFactory::instance().get("transform", context); + + transform_node.resolveAsFunction(transform_resolver->build(transform_node.getArgumentTypes())); } void wrapIntoToString(FunctionNode & function_node, QueryTreeNodePtr arg, ContextPtr context) { - assert(isString(function_node.getResultType())); - auto to_string_function = FunctionFactory::instance().get("toString", std::move(context)); - QueryTreeNodes arguments{std::move(arg)}; - - function_node.resolveAsFunction(std::move(to_string_function), std::make_shared()); + QueryTreeNodes arguments{ std::move(arg) }; function_node.getArguments().getNodes() = std::move(arguments); + + function_node.resolveAsFunction(to_string_function->build(function_node.getArgumentTypes())); + + assert(isString(function_node.getResultType())); } class ConvertStringsToEnumVisitor : public InDepthQueryTreeVisitor @@ -117,7 +132,8 @@ public: return; auto modified_if_node = function_node->clone(); - auto & argument_nodes = modified_if_node->as()->getArguments().getNodes(); + auto * function_if_node = modified_if_node->as(); + auto & argument_nodes = function_if_node->getArguments().getNodes(); const auto * first_literal = argument_nodes[1]->as(); const auto * second_literal = argument_nodes[2]->as(); @@ -132,7 +148,7 @@ public: string_values.insert(first_literal->getValue().get()); string_values.insert(second_literal->getValue().get()); - changeIfArguments(argument_nodes[1], argument_nodes[2], string_values, context); + changeIfArguments(*function_if_node, string_values, context); wrapIntoToString(*function_node, std::move(modified_if_node), context); return; } @@ -143,7 +159,8 @@ public: return; auto modified_transform_node = function_node->clone(); - auto & argument_nodes = modified_transform_node->as()->getArguments().getNodes(); + auto * function_modified_transform_node = modified_transform_node->as(); + auto & argument_nodes = function_modified_transform_node->getArguments().getNodes(); if (!isString(function_node->getResultType())) return; @@ -176,7 +193,7 @@ public: string_values.insert(literal_default->getValue().get()); - changeTransformArguments(argument_nodes[2], argument_nodes[3], string_values, context); + changeTransformArguments(*function_modified_transform_node, string_values, context); wrapIntoToString(*function_node, std::move(modified_transform_node), context); return; } diff --git a/src/Analyzer/Passes/MultiIfToIfPass.cpp b/src/Analyzer/Passes/MultiIfToIfPass.cpp index 6d2ebac33e6..7e13675bf98 100644 --- a/src/Analyzer/Passes/MultiIfToIfPass.cpp +++ b/src/Analyzer/Passes/MultiIfToIfPass.cpp @@ -27,7 +27,7 @@ public: return; auto result_type = function_node->getResultType(); - function_node->resolveAsFunction(if_function_ptr, std::move(result_type)); + function_node->resolveAsFunction(if_function_ptr->build(function_node->getArgumentTypes())); } private: diff --git a/src/Analyzer/Passes/NormalizeCountVariantsPass.cpp b/src/Analyzer/Passes/NormalizeCountVariantsPass.cpp index cd6aa4d76f4..3580b64497d 100644 --- a/src/Analyzer/Passes/NormalizeCountVariantsPass.cpp +++ b/src/Analyzer/Passes/NormalizeCountVariantsPass.cpp @@ -53,12 +53,10 @@ private: static inline void resolveAsCountAggregateFunction(FunctionNode & function_node) { - auto function_result_type = function_node.getResultType(); - AggregateFunctionProperties properties; auto aggregate_function = AggregateFunctionFactory::instance().get("count", {}, {}, properties); - function_node.resolveAsAggregateFunction(std::move(aggregate_function), std::move(function_result_type)); + function_node.resolveAsAggregateFunction(std::move(aggregate_function)); } }; diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp index 6f56d6fca8e..8d923d2a69d 100644 --- a/src/Analyzer/Passes/QueryAnalysisPass.cpp +++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp @@ -4302,11 +4302,13 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi bool force_grouping_standard_compatibility = scope.context->getSettingsRef().force_grouping_standard_compatibility; auto grouping_function = std::make_shared(force_grouping_standard_compatibility); auto grouping_function_adaptor = std::make_shared(std::move(grouping_function)); - function_node.resolveAsFunction(std::move(grouping_function_adaptor), std::make_shared()); + function_node.resolveAsFunction(grouping_function_adaptor->build({})); return result_projection_names; } } + const auto & settings = scope.context->getSettingsRef(); + if (function_node.isWindowFunction()) { if (!AggregateFunctionFactory::instance().isAggregateFunctionName(function_name)) @@ -4324,10 +4326,14 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi "Window function '{}' does not support lambda arguments", function_name); - AggregateFunctionProperties properties; - auto aggregate_function = AggregateFunctionFactory::instance().get(function_name, argument_types, parameters, properties); + bool need_add_or_null = settings.aggregate_functions_null_for_empty && !function_name.ends_with("OrNull"); - function_node.resolveAsWindowFunction(aggregate_function, aggregate_function->getReturnType()); + AggregateFunctionProperties properties; + auto aggregate_function = need_add_or_null + ? AggregateFunctionFactory::instance().get(function_name + "OrNull", argument_types, parameters, properties) + : AggregateFunctionFactory::instance().get(function_name, argument_types, parameters, properties); + + function_node.resolveAsWindowFunction(aggregate_function); bool window_node_is_identifier = function_node.getWindowNode()->getNodeType() == QueryTreeNodeType::IDENTIFIER; ProjectionName window_projection_name = resolveWindow(function_node.getWindowNode(), scope); @@ -4384,9 +4390,13 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi "Aggregate function '{}' does not support lambda arguments", function_name); + bool need_add_or_null = settings.aggregate_functions_null_for_empty && !function_name.ends_with("OrNull"); + AggregateFunctionProperties properties; - auto aggregate_function = AggregateFunctionFactory::instance().get(function_name, argument_types, parameters, properties); - function_node.resolveAsAggregateFunction(aggregate_function, aggregate_function->getReturnType()); + auto aggregate_function = need_add_or_null + ? AggregateFunctionFactory::instance().get(function_name + "OrNull", argument_types, parameters, properties) + : AggregateFunctionFactory::instance().get(function_name, argument_types, parameters, properties); + function_node.resolveAsAggregateFunction(aggregate_function); return result_projection_names; } @@ -4563,6 +4573,8 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi constant_value = std::make_shared(std::move(column_constant_value), result_type); } } + + function_node.resolveAsFunction(std::move(function_base)); } catch (Exception & e) { @@ -4570,8 +4582,6 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi throw; } - function_node.resolveAsFunction(std::move(function), std::move(result_type)); - if (constant_value) node = std::make_shared(std::move(constant_value), node); diff --git a/src/Analyzer/Passes/SumIfToCountIfPass.cpp b/src/Analyzer/Passes/SumIfToCountIfPass.cpp index 91c277d35b3..7e120b6828d 100644 --- a/src/Analyzer/Passes/SumIfToCountIfPass.cpp +++ b/src/Analyzer/Passes/SumIfToCountIfPass.cpp @@ -81,6 +81,7 @@ public: if (nested_if_function_arguments_nodes.size() != 3) return; + auto & cond_argument = nested_if_function_arguments_nodes[0]; const auto * if_true_condition_constant_node = nested_if_function_arguments_nodes[1]->as(); const auto * if_false_condition_constant_node = nested_if_function_arguments_nodes[2]->as(); @@ -107,8 +108,8 @@ public: return; } - /// Rewrite `sum(if(cond, 0, 1))` into `countIf(not(cond))`. - if (if_true_condition_value == 0 && if_false_condition_value == 1) + /// Rewrite `sum(if(cond, 0, 1))` into `countIf(not(cond))` if condition is not Nullable (otherwise the result can be different). + if (if_true_condition_value == 0 && if_false_condition_value == 1 && !cond_argument->getResultType()->isNullable()) { DataTypePtr not_function_result_type = std::make_shared(); @@ -117,11 +118,12 @@ public: not_function_result_type = makeNullable(not_function_result_type); auto not_function = std::make_shared("not"); - not_function->resolveAsFunction(FunctionFactory::instance().get("not", context), std::move(not_function_result_type)); auto & not_function_arguments = not_function->getArguments().getNodes(); not_function_arguments.push_back(std::move(nested_if_function_arguments_nodes[0])); + not_function->resolveAsFunction(FunctionFactory::instance().get("not", context)->build(not_function->getArgumentTypes())); + function_node_arguments_nodes[0] = std::move(not_function); function_node_arguments_nodes.resize(1); @@ -139,8 +141,7 @@ private: function_node.getAggregateFunction()->getParameters(), properties); - auto function_result_type = function_node.getResultType(); - function_node.resolveAsAggregateFunction(std::move(aggregate_function), std::move(function_result_type)); + function_node.resolveAsAggregateFunction(std::move(aggregate_function)); } ContextPtr & context; diff --git a/src/Analyzer/Passes/UniqInjectiveFunctionsEliminationPass.cpp b/src/Analyzer/Passes/UniqInjectiveFunctionsEliminationPass.cpp index 1716c37228a..37bad70da57 100644 --- a/src/Analyzer/Passes/UniqInjectiveFunctionsEliminationPass.cpp +++ b/src/Analyzer/Passes/UniqInjectiveFunctionsEliminationPass.cpp @@ -76,7 +76,7 @@ public: properties); auto function_result_type = function_node->getResultType(); - function_node->resolveAsAggregateFunction(std::move(aggregate_function), std::move(function_result_type)); + function_node->resolveAsAggregateFunction(std::move(aggregate_function)); } }; diff --git a/src/Analyzer/QueryTreePassManager.cpp b/src/Analyzer/QueryTreePassManager.cpp index ca9d4e3d1e3..06a1fec4698 100644 --- a/src/Analyzer/QueryTreePassManager.cpp +++ b/src/Analyzer/QueryTreePassManager.cpp @@ -21,6 +21,7 @@ #include #include +#include #include #include @@ -44,6 +45,23 @@ namespace class ValidationChecker : public InDepthQueryTreeVisitor { String pass_name; + + void visitColumn(ColumnNode * column) const + { + if (column->getColumnSourceOrNull() == nullptr) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Column {} {} query tree node does not have valid source node after running {} pass", + column->getColumnName(), column->getColumnType(), pass_name); + } + + void visitFunction(FunctionNode * function) const + { + if (!function->isResolved()) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Function {} is not resolved after running {} pass", + function->dumpTree(), pass_name); + } + public: explicit ValidationChecker(String pass_name_) : pass_name(std::move(pass_name_)) @@ -51,13 +69,10 @@ public: void visitImpl(QueryTreeNodePtr & node) const { - auto * column = node->as(); - if (!column) - return; - if (column->getColumnSourceOrNull() == nullptr) - throw Exception(ErrorCodes::LOGICAL_ERROR, - "Column {} {} query tree node does not have valid source node after running {} pass", - column->getColumnName(), column->getColumnType(), pass_name); + if (auto * column = node->as()) + return visitColumn(column); + else if (auto * function = node->as()) + return visitFunction(function); } }; #endif diff --git a/src/Backups/BackupCoordinationLocal.cpp b/src/Backups/BackupCoordinationLocal.cpp index d4064902a40..e199e43fe01 100644 --- a/src/Backups/BackupCoordinationLocal.cpp +++ b/src/Backups/BackupCoordinationLocal.cpp @@ -188,15 +188,6 @@ std::optional BackupCoordinationLocal::getFileInfo(const SizeAndChecks return it->second; } -std::optional BackupCoordinationLocal::getFileSizeAndChecksum(const String & file_name) const -{ - std::lock_guard lock{mutex}; - auto it = file_names.find(file_name); - if (it == file_names.end()) - return std::nullopt; - return it->second; -} - String BackupCoordinationLocal::getNextArchiveSuffix() { std::lock_guard lock{mutex}; diff --git a/src/Backups/BackupCoordinationLocal.h b/src/Backups/BackupCoordinationLocal.h index aca7f71545b..43145a42bf6 100644 --- a/src/Backups/BackupCoordinationLocal.h +++ b/src/Backups/BackupCoordinationLocal.h @@ -48,7 +48,6 @@ public: std::optional getFileInfo(const String & file_name) const override; std::optional getFileInfo(const SizeAndChecksum & size_and_checksum) const override; - std::optional getFileSizeAndChecksum(const String & file_name) const override; String getNextArchiveSuffix() override; Strings getAllArchiveSuffixes() const override; diff --git a/src/Backups/BackupCoordinationRemote.cpp b/src/Backups/BackupCoordinationRemote.cpp index 8d8cfc4225e..18789802769 100644 --- a/src/Backups/BackupCoordinationRemote.cpp +++ b/src/Backups/BackupCoordinationRemote.cpp @@ -575,15 +575,6 @@ std::optional BackupCoordinationRemote::getFileInfo(const SizeAndCheck return deserializeFileInfo(file_info_str); } -std::optional BackupCoordinationRemote::getFileSizeAndChecksum(const String & file_name) const -{ - auto zk = getZooKeeper(); - String size_and_checksum; - if (!zk->tryGet(zookeeper_path + "/file_names/" + escapeForFileName(file_name), size_and_checksum)) - return std::nullopt; - return deserializeSizeAndChecksum(size_and_checksum); -} - String BackupCoordinationRemote::getNextArchiveSuffix() { auto zk = getZooKeeper(); diff --git a/src/Backups/BackupCoordinationRemote.h b/src/Backups/BackupCoordinationRemote.h index 83ddd7b16dc..711fadb539e 100644 --- a/src/Backups/BackupCoordinationRemote.h +++ b/src/Backups/BackupCoordinationRemote.h @@ -51,7 +51,6 @@ public: bool hasFiles(const String & directory) const override; std::optional getFileInfo(const String & file_name) const override; std::optional getFileInfo(const SizeAndChecksum & size_and_checksum) const override; - std::optional getFileSizeAndChecksum(const String & file_name) const override; String getNextArchiveSuffix() override; Strings getAllArchiveSuffixes() const override; diff --git a/src/Backups/BackupFactory.h b/src/Backups/BackupFactory.h index 9057d2cbfae..92a5e16533c 100644 --- a/src/Backups/BackupFactory.h +++ b/src/Backups/BackupFactory.h @@ -34,6 +34,7 @@ public: bool is_internal_backup = false; std::shared_ptr backup_coordination; std::optional backup_uuid; + bool deduplicate_files = true; }; static BackupFactory & instance(); diff --git a/src/Backups/BackupIO_S3.cpp b/src/Backups/BackupIO_S3.cpp index e474fabf232..f2f0a2ef5e3 100644 --- a/src/Backups/BackupIO_S3.cpp +++ b/src/Backups/BackupIO_S3.cpp @@ -80,6 +80,12 @@ namespace throw Exception(outcome.GetError().GetMessage(), ErrorCodes::S3_ERROR); return outcome.GetResult().GetContents(); } + + bool isNotFoundError(Aws::S3::S3Errors error) + { + return error == Aws::S3::S3Errors::RESOURCE_NOT_FOUND + || error == Aws::S3::S3Errors::NO_SUCH_KEY; + } } @@ -370,7 +376,7 @@ void BackupWriterS3::removeFile(const String & file_name) request.SetBucket(s3_uri.bucket); request.SetKey(fs::path(s3_uri.key) / file_name); auto outcome = client->DeleteObject(request); - if (!outcome.IsSuccess()) + if (!outcome.IsSuccess() && !isNotFoundError(outcome.GetError().GetErrorType())) throw Exception(outcome.GetError().GetMessage(), ErrorCodes::S3_ERROR); } @@ -428,7 +434,7 @@ void BackupWriterS3::removeFilesBatch(const Strings & file_names) request.SetDelete(delkeys); auto outcome = client->DeleteObjects(request); - if (!outcome.IsSuccess()) + if (!outcome.IsSuccess() && !isNotFoundError(outcome.GetError().GetErrorType())) throw Exception(outcome.GetError().GetMessage(), ErrorCodes::S3_ERROR); } } diff --git a/src/Backups/BackupImpl.cpp b/src/Backups/BackupImpl.cpp index 6d9ac0bbdad..ec35b8ed07a 100644 --- a/src/Backups/BackupImpl.cpp +++ b/src/Backups/BackupImpl.cpp @@ -167,17 +167,19 @@ BackupImpl::BackupImpl( const ContextPtr & context_, bool is_internal_backup_, const std::shared_ptr & coordination_, - const std::optional & backup_uuid_) + const std::optional & backup_uuid_, + bool deduplicate_files_) : backup_name_for_logging(backup_name_for_logging_) , archive_params(archive_params_) , use_archives(!archive_params.archive_name.empty()) , open_mode(OpenMode::WRITE) , writer(std::move(writer_)) , is_internal_backup(is_internal_backup_) - , coordination(coordination_ ? coordination_ : std::make_shared()) + , coordination(coordination_) , uuid(backup_uuid_) , version(CURRENT_BACKUP_VERSION) , base_backup_info(base_backup_info_) + , deduplicate_files(deduplicate_files_) , log(&Poco::Logger::get("BackupImpl")) { open(context_); @@ -287,6 +289,7 @@ void BackupImpl::writeBackupMetadata() Poco::AutoPtr config{new Poco::Util::XMLConfiguration()}; config->setInt("version", CURRENT_BACKUP_VERSION); + config->setBool("deduplicate_files", deduplicate_files); config->setString("timestamp", toString(LocalDateTime{timestamp})); config->setString("uuid", toString(*uuid)); @@ -759,7 +762,7 @@ void BackupImpl::writeFile(const String & file_name, BackupEntryPtr entry) }; /// Empty file, nothing to backup - if (info.size == 0) + if (info.size == 0 && deduplicate_files) { coordination->addFileInfo(info); return; @@ -828,7 +831,7 @@ void BackupImpl::writeFile(const String & file_name, BackupEntryPtr entry) } /// Maybe we have a copy of this file in the backup already. - if (coordination->getFileInfo(std::pair{info.size, info.checksum})) + if (coordination->getFileInfo(std::pair{info.size, info.checksum}) && deduplicate_files) { LOG_TRACE(log, "File {} already exist in current backup, adding reference", adjusted_path); coordination->addFileInfo(info); @@ -861,7 +864,7 @@ void BackupImpl::writeFile(const String & file_name, BackupEntryPtr entry) bool is_data_file_required; coordination->addFileInfo(info, is_data_file_required); - if (!is_data_file_required) + if (!is_data_file_required && deduplicate_files) { LOG_TRACE(log, "File {} doesn't exist in current backup, but we have file with same size and checksum", adjusted_path); return; /// We copy data only if it's a new combination of size & checksum. diff --git a/src/Backups/BackupImpl.h b/src/Backups/BackupImpl.h index 7df4638affa..9fc881bf680 100644 --- a/src/Backups/BackupImpl.h +++ b/src/Backups/BackupImpl.h @@ -47,9 +47,10 @@ public: const std::optional & base_backup_info_, std::shared_ptr writer_, const ContextPtr & context_, - bool is_internal_backup_ = false, - const std::shared_ptr & coordination_ = {}, - const std::optional & backup_uuid_ = {}); + bool is_internal_backup_, + const std::shared_ptr & coordination_, + const std::optional & backup_uuid_, + bool deduplicate_files_); ~BackupImpl() override; @@ -132,6 +133,7 @@ private: String lock_file_name; std::atomic num_files_written = 0; bool writing_finalized = false; + bool deduplicate_files = true; const Poco::Logger * log; }; diff --git a/src/Backups/BackupSettings.cpp b/src/Backups/BackupSettings.cpp index 5266296c248..8c54b29141a 100644 --- a/src/Backups/BackupSettings.cpp +++ b/src/Backups/BackupSettings.cpp @@ -65,6 +65,7 @@ namespace M(String, password) \ M(Bool, structure_only) \ M(Bool, async) \ + M(Bool, deduplicate_files) \ M(UInt64, shard_num) \ M(UInt64, replica_num) \ M(Bool, internal) \ diff --git a/src/Backups/BackupSettings.h b/src/Backups/BackupSettings.h index 5c5f336aa45..2e7717c3afe 100644 --- a/src/Backups/BackupSettings.h +++ b/src/Backups/BackupSettings.h @@ -32,6 +32,9 @@ struct BackupSettings /// Whether the BACKUP command must return immediately without waiting until the backup has completed. bool async = false; + /// Whether the BACKUP will omit similar files (within one backup only). + bool deduplicate_files = true; + /// 1-based shard index to store in the backup. 0 means all shards. /// Can only be used with BACKUP ON CLUSTER. size_t shard_num = 0; diff --git a/src/Backups/BackupsWorker.cpp b/src/Backups/BackupsWorker.cpp index affcea94c57..267400ce66d 100644 --- a/src/Backups/BackupsWorker.cpp +++ b/src/Backups/BackupsWorker.cpp @@ -286,6 +286,7 @@ void BackupsWorker::doBackup( backup_create_params.is_internal_backup = backup_settings.internal; backup_create_params.backup_coordination = backup_coordination; backup_create_params.backup_uuid = backup_settings.backup_uuid; + backup_create_params.deduplicate_files = backup_settings.deduplicate_files; BackupMutablePtr backup = BackupFactory::instance().createBackup(backup_create_params); /// Write the backup. diff --git a/src/Backups/IBackupCoordination.h b/src/Backups/IBackupCoordination.h index 5e120218544..7ff911488aa 100644 --- a/src/Backups/IBackupCoordination.h +++ b/src/Backups/IBackupCoordination.h @@ -108,7 +108,6 @@ public: virtual std::optional getFileInfo(const String & file_name) const = 0; virtual std::optional getFileInfo(const SizeAndChecksum & size_and_checksum) const = 0; - virtual std::optional getFileSizeAndChecksum(const String & file_name) const = 0; /// Generates a new archive suffix, e.g. "001", "002", "003", ... virtual String getNextArchiveSuffix() = 0; diff --git a/src/Backups/registerBackupEngineS3.cpp b/src/Backups/registerBackupEngineS3.cpp index 33b0049dc4d..ad190b4f4a0 100644 --- a/src/Backups/registerBackupEngineS3.cpp +++ b/src/Backups/registerBackupEngineS3.cpp @@ -116,7 +116,16 @@ void registerBackupEngineS3(BackupFactory & factory) else { auto writer = std::make_shared(S3::URI{s3_uri}, access_key_id, secret_access_key, params.context); - return std::make_unique(backup_name_for_logging, archive_params, params.base_backup_info, writer, params.context, params.is_internal_backup, params.backup_coordination, params.backup_uuid); + return std::make_unique( + backup_name_for_logging, + archive_params, + params.base_backup_info, + writer, + params.context, + params.is_internal_backup, + params.backup_coordination, + params.backup_uuid, + params.deduplicate_files); } #else throw Exception("S3 support is disabled", ErrorCodes::SUPPORT_IS_DISABLED); diff --git a/src/Backups/registerBackupEnginesFileAndDisk.cpp b/src/Backups/registerBackupEnginesFileAndDisk.cpp index def9c5ba188..49ad51534eb 100644 --- a/src/Backups/registerBackupEnginesFileAndDisk.cpp +++ b/src/Backups/registerBackupEnginesFileAndDisk.cpp @@ -181,7 +181,16 @@ void registerBackupEnginesFileAndDisk(BackupFactory & factory) writer = std::make_shared(path); else writer = std::make_shared(disk, path); - return std::make_unique(backup_name_for_logging, archive_params, params.base_backup_info, writer, params.context, params.is_internal_backup, params.backup_coordination, params.backup_uuid); + return std::make_unique( + backup_name_for_logging, + archive_params, + params.base_backup_info, + writer, + params.context, + params.is_internal_backup, + params.backup_coordination, + params.backup_uuid, + params.deduplicate_files); } }; diff --git a/src/Bridge/IBridge.cpp b/src/Bridge/IBridge.cpp index afaaf11b26a..2d97bba6287 100644 --- a/src/Bridge/IBridge.cpp +++ b/src/Bridge/IBridge.cpp @@ -61,14 +61,8 @@ namespace Poco::Net::SocketAddress socketBindListen(Poco::Net::ServerSocket & socket, const std::string & host, UInt16 port, Poco::Logger * log) { auto address = makeSocketAddress(host, port, log); -#if POCO_VERSION < 0x01080000 - socket.bind(address, /* reuseAddress = */ true); -#else socket.bind(address, /* reuseAddress = */ true, /* reusePort = */ false); -#endif - socket.listen(/* backlog = */ 64); - return address; } } diff --git a/src/BridgeHelper/IBridgeHelper.cpp b/src/BridgeHelper/IBridgeHelper.cpp index 7d6ce74c698..3445b655784 100644 --- a/src/BridgeHelper/IBridgeHelper.cpp +++ b/src/BridgeHelper/IBridgeHelper.cpp @@ -2,11 +2,10 @@ #include #include -#include -#include #include #include + namespace fs = std::filesystem; namespace DB @@ -97,9 +96,13 @@ std::unique_ptr IBridgeHelper::startBridgeCommand() LOG_TRACE(getLog(), "Starting {}", serviceAlias()); + /// We will terminate it with the KILL signal instead of the TERM signal, + /// because it's more reliable for arbitrary third-party ODBC drivers. + /// The drivers can spawn threads, install their own signal handlers... we don't care. + ShellCommand::Config command_config(path.string()); command_config.arguments = cmd_args; - command_config.terminate_in_destructor_strategy = ShellCommand::DestructorStrategy(true); + command_config.terminate_in_destructor_strategy = ShellCommand::DestructorStrategy(true, SIGKILL); return ShellCommand::executeDirect(command_config); } diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index d5bcfab1dd0..84b75590706 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -11,11 +11,7 @@ if(COMPILER_PIPE) else() set(MAX_COMPILER_MEMORY 1500) endif() -if(USE_STATIC_LIBRARIES) - set(MAX_LINKER_MEMORY 3500) -else() - set(MAX_LINKER_MEMORY 2500) -endif() +set(MAX_LINKER_MEMORY 3500) include(../cmake/limit_jobs.cmake) include (../cmake/version.cmake) @@ -200,10 +196,6 @@ endif () add_library(clickhouse_common_io ${clickhouse_common_io_headers} ${clickhouse_common_io_sources}) -if (SPLIT_SHARED_LIBRARIES) - target_compile_definitions(clickhouse_common_io PRIVATE SPLIT_SHARED_LIBRARIES) -endif () - add_library (clickhouse_malloc OBJECT Common/malloc.cpp) set_source_files_properties(Common/malloc.cpp PROPERTIES COMPILE_FLAGS "-fno-builtin") @@ -227,18 +219,7 @@ add_subdirectory(Common/Config) set (all_modules) macro(add_object_library name common_path) - if (USE_STATIC_LIBRARIES OR NOT SPLIT_SHARED_LIBRARIES) - add_headers_and_sources(dbms ${common_path}) - else () - list (APPEND all_modules ${name}) - add_headers_and_sources(${name} ${common_path}) - add_library(${name} SHARED ${${name}_sources} ${${name}_headers}) - if (OS_DARWIN) - target_link_libraries (${name} PRIVATE -Wl,-undefined,dynamic_lookup) - else() - target_link_libraries (${name} PRIVATE -Wl,--unresolved-symbols=ignore-all) - endif() - endif () + add_headers_and_sources(dbms ${common_path}) endmacro() add_object_library(clickhouse_access Access) @@ -297,28 +278,12 @@ if (TARGET ch_contrib::nuraft) add_object_library(clickhouse_coordination Coordination) endif() -if (USE_STATIC_LIBRARIES OR NOT SPLIT_SHARED_LIBRARIES) - add_library (dbms STATIC ${dbms_headers} ${dbms_sources}) - target_link_libraries (dbms PRIVATE ch_contrib::libdivide) - if (TARGET ch_contrib::jemalloc) - target_link_libraries (dbms PRIVATE ch_contrib::jemalloc) - endif() - set (all_modules dbms) -else() - add_library (dbms SHARED ${dbms_headers} ${dbms_sources}) - target_link_libraries (dbms PUBLIC ${all_modules}) - target_link_libraries (clickhouse_interpreters PRIVATE ch_contrib::libdivide) - if (TARGET ch_contrib::jemalloc) - target_link_libraries (clickhouse_interpreters PRIVATE ch_contrib::jemalloc) - endif() - list (APPEND all_modules dbms) - # force all split libs to be linked - if (OS_DARWIN) - set (CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,-undefined,error") - else() - set (CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,--no-as-needed") - endif() -endif () +add_library (dbms STATIC ${dbms_headers} ${dbms_sources}) +target_link_libraries (dbms PRIVATE ch_contrib::libdivide) +if (TARGET ch_contrib::jemalloc) + target_link_libraries (dbms PRIVATE ch_contrib::jemalloc) +endif() +set (all_modules dbms) macro (dbms_target_include_directories) foreach (module ${all_modules}) @@ -332,6 +297,12 @@ macro (dbms_target_link_libraries) endforeach () endmacro () +macro (dbms_target_include_directories) + foreach (module ${all_modules}) + target_include_directories (${module} ${ARGN}) + endforeach () +endmacro () + dbms_target_include_directories (PUBLIC "${ClickHouse_SOURCE_DIR}/src" "${ClickHouse_BINARY_DIR}/src") target_include_directories (clickhouse_common_io PUBLIC "${ClickHouse_SOURCE_DIR}/src" "${ClickHouse_BINARY_DIR}/src") @@ -344,7 +315,7 @@ set_source_files_properties( Common/Elf.cpp Common/Dwarf.cpp Common/SymbolIndex.cpp - PROPERTIES COMPILE_FLAGS "-O3 ${WITHOUT_COVERAGE}") + PROPERTIES COMPILE_FLAGS "-O2 ${WITHOUT_COVERAGE}") target_link_libraries (clickhouse_common_io PRIVATE @@ -390,6 +361,7 @@ if (TARGET ch_contrib::cpuid) endif() dbms_target_link_libraries(PUBLIC ch_contrib::abseil_swiss_tables) +target_link_libraries (clickhouse_common_io PUBLIC ch_contrib::abseil_swiss_tables) # Make dbms depend on roaring instead of clickhouse_common_io so that roaring itself can depend on clickhouse_common_io # That way we we can redirect malloc/free functions avoiding circular dependencies @@ -589,6 +561,11 @@ if (TARGET ch_contrib::annoy) dbms_target_link_libraries(PUBLIC ch_contrib::annoy) endif() +if (TARGET ch_rust::skim) + # Add only -I, library is needed only for clickhouse-client/clickhouse-local + dbms_target_include_directories(PRIVATE $) +endif() + include ("${ClickHouse_SOURCE_DIR}/cmake/add_check.cmake") if (ENABLE_TESTS) diff --git a/src/Client/CMakeLists.txt b/src/Client/CMakeLists.txt index 119414a8a70..83bbe418246 100644 --- a/src/Client/CMakeLists.txt +++ b/src/Client/CMakeLists.txt @@ -1,3 +1,3 @@ if (ENABLE_EXAMPLES) add_subdirectory(examples) -endif() \ No newline at end of file +endif() diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index 09e44a3ac09..75d439d26d5 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -1,42 +1,30 @@ #include +#include +#include +#include +#include +#include -#include -#include -#include -#include - -#include "config.h" - +#include +#include +#include +#include #include #include -#include -#include #include -#include #include #include #include #include -#include -#include -#include -#include -#include -#include - -#include "config_version.h" - #include #include #include #include #include #include -#include - -#include -#include -#include "TestTags.h" +#include +#include +#include #include #include @@ -53,26 +41,36 @@ #include #include #include +#include #include #include #include -#include -#include #include #include #include #include #include +#include +#include #include #include #include #include #include -#include #include -#include + +#include +#include + #include +#include +#include +#include +#include + +#include "config_version.h" +#include "config.h" namespace fs = std::filesystem; @@ -1036,7 +1034,13 @@ void ClientBase::onEndOfStream() progress_indication.clearProgressOutput(*tty_buf); if (output_format) + { + /// Do our best to estimate the start of the query so the output format matches the one reported by the server + bool is_running = false; + output_format->setStartTime( + clock_gettime_ns(CLOCK_MONOTONIC) - static_cast(progress_indication.elapsedSeconds() * 1000000000), is_running); output_format->finalize(); + } resetOutput(); @@ -1112,6 +1116,8 @@ void ClientBase::onProfileEvents(Block & block) /// Flush all buffers. void ClientBase::resetOutput() { + if (output_format) + output_format->finalize(); output_format.reset(); logs_out_stream.reset(); diff --git a/src/Client/ClientBaseHelpers.h b/src/Client/ClientBaseHelpers.h index 2a79332eb98..adc1c81b3c5 100644 --- a/src/Client/ClientBaseHelpers.h +++ b/src/Client/ClientBaseHelpers.h @@ -4,7 +4,7 @@ #include "config.h" #if USE_REPLXX -# include +# include #endif diff --git a/src/Client/ConnectionParameters.cpp b/src/Client/ConnectionParameters.cpp index 6d538fee307..1a0922b2133 100644 --- a/src/Client/ConnectionParameters.cpp +++ b/src/Client/ConnectionParameters.cpp @@ -9,7 +9,6 @@ #include #include #include -#include #include #include diff --git a/base/base/LineReader.cpp b/src/Client/LineReader.cpp similarity index 99% rename from base/base/LineReader.cpp rename to src/Client/LineReader.cpp index cc632f79638..f49e48be617 100644 --- a/base/base/LineReader.cpp +++ b/src/Client/LineReader.cpp @@ -1,4 +1,4 @@ -#include +#include #include #include @@ -65,6 +65,9 @@ void addNewWords(Words & to, const Words & from, Compare comp) } +namespace DB +{ + replxx::Replxx::completions_t LineReader::Suggest::getCompletions(const String & prefix, size_t prefix_length) { std::string_view last_word; @@ -202,3 +205,5 @@ LineReader::InputStatus LineReader::readOneLine(const String & prompt) trim(input); return INPUT_LINE; } + +} diff --git a/base/base/LineReader.h b/src/Client/LineReader.h similarity index 99% rename from base/base/LineReader.h rename to src/Client/LineReader.h index d4ab327fe00..321cf41b77e 100644 --- a/base/base/LineReader.h +++ b/src/Client/LineReader.h @@ -9,6 +9,9 @@ #include #include +namespace DB +{ + class LineReader { public: @@ -68,3 +71,5 @@ protected: virtual InputStatus readOneLine(const String & prompt); virtual void addToHistory(const String &) {} }; + +} diff --git a/src/Client/QueryFuzzer.cpp b/src/Client/QueryFuzzer.cpp index d5cd4ef1548..018e0c6f130 100644 --- a/src/Client/QueryFuzzer.cpp +++ b/src/Client/QueryFuzzer.cpp @@ -327,9 +327,7 @@ void QueryFuzzer::fuzzOrderByList(IAST * ast) // Add element if (fuzz_rand() % 50 == 0) { - auto pos = list->children.empty() - ? list->children.begin() - : list->children.begin() + fuzz_rand() % list->children.size(); + auto * pos = list->children.empty() ? list->children.begin() : list->children.begin() + fuzz_rand() % list->children.size(); auto col = getRandomColumnLike(); if (col) { @@ -373,9 +371,7 @@ void QueryFuzzer::fuzzColumnLikeExpressionList(IAST * ast) // Add element if (fuzz_rand() % 50 == 0) { - auto pos = impl->children.empty() - ? impl->children.begin() - : impl->children.begin() + fuzz_rand() % impl->children.size(); + auto * pos = impl->children.empty() ? impl->children.begin() : impl->children.begin() + fuzz_rand() % impl->children.size(); auto col = getRandomColumnLike(); if (col) impl->children.insert(pos, col); diff --git a/base/base/ReplxxLineReader.cpp b/src/Client/ReplxxLineReader.cpp similarity index 97% rename from base/base/ReplxxLineReader.cpp rename to src/Client/ReplxxLineReader.cpp index 9e29f7744fa..9fc0fec761d 100644 --- a/base/base/ReplxxLineReader.cpp +++ b/src/Client/ReplxxLineReader.cpp @@ -1,6 +1,10 @@ -#include +#include #include +#include +#include +#include + #include #include #include @@ -108,13 +112,11 @@ void writeRetry(int fd, const std::string & data) } std::string readFile(const std::string & path) { - std::ifstream t(path); - std::string str; - t.seekg(0, std::ios::end); - str.reserve(t.tellg()); - t.seekg(0, std::ios::beg); - str.assign((std::istreambuf_iterator(t)), std::istreambuf_iterator()); - return str; + std::string out; + DB::WriteBufferFromString out_buffer(out); + DB::ReadBufferFromFile in_buffer(path); + DB::copyData(in_buffer, out_buffer); + return out; } /// Simple wrapper for temporary files. @@ -269,6 +271,9 @@ void convertHistoryFile(const std::string & path, replxx::Replxx & rx) } +namespace DB +{ + static bool replxx_last_is_delimiter = false; void ReplxxLineReader::setLastIsDelimiter(bool flag) { @@ -402,10 +407,11 @@ ReplxxLineReader::ReplxxLineReader( words.push_back(hs.get().text()); } + std::string current_query(rx.get_state().text()); std::string new_query; try { - new_query = std::string(skim(words)); + new_query = std::string(skim(current_query, words)); } catch (const std::exception & e) { @@ -507,3 +513,5 @@ void ReplxxLineReader::enableBracketedPaste() bracketed_paste_enabled = true; rx.enable_bracketed_paste(); } + +} diff --git a/base/base/ReplxxLineReader.h b/src/Client/ReplxxLineReader.h similarity index 98% rename from base/base/ReplxxLineReader.h rename to src/Client/ReplxxLineReader.h index 428fbf144c3..d36a1d0f42c 100644 --- a/base/base/ReplxxLineReader.h +++ b/src/Client/ReplxxLineReader.h @@ -1,9 +1,11 @@ #pragma once #include "LineReader.h" - #include +namespace DB +{ + class ReplxxLineReader : public LineReader { public: @@ -36,3 +38,5 @@ private: std::string editor; }; + +} diff --git a/src/Client/Suggest.h b/src/Client/Suggest.h index 25d45f7ffaf..cfe9315879c 100644 --- a/src/Client/Suggest.h +++ b/src/Client/Suggest.h @@ -5,8 +5,8 @@ #include #include #include +#include #include -#include #include diff --git a/src/Columns/ColumnAggregateFunction.cpp b/src/Columns/ColumnAggregateFunction.cpp index f51a0426199..58643f7a9b7 100644 --- a/src/Columns/ColumnAggregateFunction.cpp +++ b/src/Columns/ColumnAggregateFunction.cpp @@ -146,7 +146,7 @@ MutableColumnPtr ColumnAggregateFunction::convertToValues(MutableColumnPtr colum /// insertResultInto may invalidate states, so we must unshare ownership of them column_aggregate_func.ensureOwnership(); - MutableColumnPtr res = func->getReturnType()->createColumn(); + MutableColumnPtr res = func->getResultType()->createColumn(); res->reserve(data.size()); /// If there are references to states in final column, we must hold their ownership diff --git a/src/Columns/ColumnFunction.h b/src/Columns/ColumnFunction.h index 4781406c3b9..257bd1146fd 100644 --- a/src/Columns/ColumnFunction.h +++ b/src/Columns/ColumnFunction.h @@ -13,7 +13,7 @@ namespace ErrorCodes } class IFunctionBase; -using FunctionBasePtr = std::shared_ptr; +using FunctionBasePtr = std::shared_ptr; /** A column containing a lambda expression. * Behaves like a constant-column. Contains an expression, but not input or output data. diff --git a/src/Common/AllocationTrace.h b/src/Common/AllocationTrace.h deleted file mode 100644 index 332808c8015..00000000000 --- a/src/Common/AllocationTrace.h +++ /dev/null @@ -1,16 +0,0 @@ -#pragma once -#include - -/// This is a structure which is returned by MemoryTracker. -/// Methods onAlloc/onFree should be called after actual memory allocation if it succeed. -/// For now, it will only collect allocation trace with sample_probability. -struct AllocationTrace -{ - AllocationTrace() = default; - explicit AllocationTrace(double sample_probability_); - - void onAlloc(void * ptr, size_t size) const; - void onFree(void * ptr, size_t size) const; - - double sample_probability = 0; -}; diff --git a/src/Common/Allocator.h b/src/Common/Allocator.h index 8c4f2ef1690..c348eaea006 100644 --- a/src/Common/Allocator.h +++ b/src/Common/Allocator.h @@ -92,10 +92,8 @@ public: void * alloc(size_t size, size_t alignment = 0) { checkSize(size); - auto trace = CurrentMemoryTracker::alloc(size); - void * ptr = allocNoTrack(size, alignment); - trace.onAlloc(ptr, size); - return ptr; + CurrentMemoryTracker::alloc(size); + return allocNoTrack(size, alignment); } /// Free memory range. @@ -105,8 +103,7 @@ public: { checkSize(size); freeNoTrack(buf, size); - auto trace = CurrentMemoryTracker::free(size); - trace.onFree(buf, size); + CurrentMemoryTracker::free(size); } catch (...) { @@ -132,16 +129,13 @@ public: && alignment <= MALLOC_MIN_ALIGNMENT) { /// Resize malloc'd memory region with no special alignment requirement. - auto trace = CurrentMemoryTracker::realloc(old_size, new_size); - trace.onFree(buf, old_size); + CurrentMemoryTracker::realloc(old_size, new_size); void * new_buf = ::realloc(buf, new_size); if (nullptr == new_buf) DB::throwFromErrno(fmt::format("Allocator: Cannot realloc from {} to {}.", ReadableSize(old_size), ReadableSize(new_size)), DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY); buf = new_buf; - trace.onAlloc(buf, new_size); - if constexpr (clear_memory) if (new_size > old_size) memset(reinterpret_cast(buf) + old_size, 0, new_size - old_size); @@ -149,8 +143,7 @@ public: else if (old_size >= MMAP_THRESHOLD && new_size >= MMAP_THRESHOLD) { /// Resize mmap'd memory region. - auto trace = CurrentMemoryTracker::realloc(old_size, new_size); - trace.onFree(buf, old_size); + CurrentMemoryTracker::realloc(old_size, new_size); // On apple and freebsd self-implemented mremap used (common/mremap.h) buf = clickhouse_mremap(buf, old_size, new_size, MREMAP_MAYMOVE, @@ -159,17 +152,14 @@ public: DB::throwFromErrno(fmt::format("Allocator: Cannot mremap memory chunk from {} to {}.", ReadableSize(old_size), ReadableSize(new_size)), DB::ErrorCodes::CANNOT_MREMAP); - trace.onAlloc(buf, new_size); /// No need for zero-fill, because mmap guarantees it. } else if (new_size < MMAP_THRESHOLD) { /// Small allocs that requires a copy. Assume there's enough memory in system. Call CurrentMemoryTracker once. - auto trace = CurrentMemoryTracker::realloc(old_size, new_size); - trace.onFree(buf, old_size); + CurrentMemoryTracker::realloc(old_size, new_size); void * new_buf = allocNoTrack(new_size, alignment); - trace.onAlloc(new_buf, new_size); memcpy(new_buf, buf, std::min(old_size, new_size)); freeNoTrack(buf, old_size); buf = new_buf; diff --git a/src/Common/AllocatorWithMemoryTracking.h b/src/Common/AllocatorWithMemoryTracking.h index b43870e05b2..815c326ed62 100644 --- a/src/Common/AllocatorWithMemoryTracking.h +++ b/src/Common/AllocatorWithMemoryTracking.h @@ -30,24 +30,21 @@ struct AllocatorWithMemoryTracking throw std::bad_alloc(); size_t bytes = n * sizeof(T); - auto trace = CurrentMemoryTracker::alloc(bytes); + CurrentMemoryTracker::alloc(bytes); T * p = static_cast(malloc(bytes)); if (!p) throw std::bad_alloc(); - trace.onAlloc(p, bytes); - return p; } void deallocate(T * p, size_t n) noexcept { - size_t bytes = n * sizeof(T); - free(p); - auto trace = CurrentMemoryTracker::free(bytes); - trace.onFree(p, bytes); + + size_t bytes = n * sizeof(T); + CurrentMemoryTracker::free(bytes); } }; diff --git a/src/Common/AsynchronousMetrics.cpp b/src/Common/AsynchronousMetrics.cpp index d4626d317c7..b68fcab2449 100644 --- a/src/Common/AsynchronousMetrics.cpp +++ b/src/Common/AsynchronousMetrics.cpp @@ -2,12 +2,7 @@ #include #include #include -#include #include -#include -#include -#include -#include #include #include #include diff --git a/src/Common/ConcurrentBoundedQueue.h b/src/Common/ConcurrentBoundedQueue.h index fa692a4b273..fd4a2d5790b 100644 --- a/src/Common/ConcurrentBoundedQueue.h +++ b/src/Common/ConcurrentBoundedQueue.h @@ -109,8 +109,7 @@ public: template [[nodiscard]] bool emplace(Args &&... args) { - emplaceImpl(std::nullopt /* timeout in milliseconds */, std::forward(args...)); - return true; + return emplaceImpl(std::nullopt /* timeout in milliseconds */, std::forward(args...)); } /// Returns false if queue is finished and empty diff --git a/src/Common/CurrentMemoryTracker.cpp b/src/Common/CurrentMemoryTracker.cpp index 0147a095185..720df07efb9 100644 --- a/src/Common/CurrentMemoryTracker.cpp +++ b/src/Common/CurrentMemoryTracker.cpp @@ -37,7 +37,7 @@ MemoryTracker * getMemoryTracker() using DB::current_thread; -AllocationTrace CurrentMemoryTracker::allocImpl(Int64 size, bool throw_if_memory_exceeded) +void CurrentMemoryTracker::allocImpl(Int64 size, bool throw_if_memory_exceeded) { #ifdef MEMORY_TRACKER_DEBUG_CHECKS if (unlikely(memory_tracker_always_throw_logical_error_on_allocation)) @@ -55,9 +55,8 @@ AllocationTrace CurrentMemoryTracker::allocImpl(Int64 size, bool throw_if_memory if (will_be > current_thread->untracked_memory_limit) { - auto res = memory_tracker->allocImpl(will_be, throw_if_memory_exceeded); + memory_tracker->allocImpl(will_be, throw_if_memory_exceeded); current_thread->untracked_memory = 0; - return res; } else { @@ -69,40 +68,36 @@ AllocationTrace CurrentMemoryTracker::allocImpl(Int64 size, bool throw_if_memory /// total_memory_tracker only, ignore untracked_memory else { - return memory_tracker->allocImpl(size, throw_if_memory_exceeded); + memory_tracker->allocImpl(size, throw_if_memory_exceeded); } - - return AllocationTrace(memory_tracker->getSampleProbability()); } - - return AllocationTrace(0); } void CurrentMemoryTracker::check() { if (auto * memory_tracker = getMemoryTracker()) - std::ignore = memory_tracker->allocImpl(0, true); + memory_tracker->allocImpl(0, true); } -AllocationTrace CurrentMemoryTracker::alloc(Int64 size) +void CurrentMemoryTracker::alloc(Int64 size) { bool throw_if_memory_exceeded = true; - return allocImpl(size, throw_if_memory_exceeded); + allocImpl(size, throw_if_memory_exceeded); } -AllocationTrace CurrentMemoryTracker::allocNoThrow(Int64 size) +void CurrentMemoryTracker::allocNoThrow(Int64 size) { bool throw_if_memory_exceeded = false; - return allocImpl(size, throw_if_memory_exceeded); + allocImpl(size, throw_if_memory_exceeded); } -AllocationTrace CurrentMemoryTracker::realloc(Int64 old_size, Int64 new_size) +void CurrentMemoryTracker::realloc(Int64 old_size, Int64 new_size) { Int64 addition = new_size - old_size; - return addition > 0 ? alloc(addition) : free(-addition); + addition > 0 ? alloc(addition) : free(-addition); } -AllocationTrace CurrentMemoryTracker::free(Int64 size) +void CurrentMemoryTracker::free(Int64 size) { if (auto * memory_tracker = getMemoryTracker()) { @@ -111,20 +106,15 @@ AllocationTrace CurrentMemoryTracker::free(Int64 size) current_thread->untracked_memory -= size; if (current_thread->untracked_memory < -current_thread->untracked_memory_limit) { - Int64 untracked_memory = current_thread->untracked_memory; + memory_tracker->free(-current_thread->untracked_memory); current_thread->untracked_memory = 0; - return memory_tracker->free(-untracked_memory); } } /// total_memory_tracker only, ignore untracked_memory else { - return memory_tracker->free(size); + memory_tracker->free(size); } - - return AllocationTrace(memory_tracker->getSampleProbability()); } - - return AllocationTrace(0); } diff --git a/src/Common/CurrentMemoryTracker.h b/src/Common/CurrentMemoryTracker.h index ba46f458e4a..e125e4cbe4a 100644 --- a/src/Common/CurrentMemoryTracker.h +++ b/src/Common/CurrentMemoryTracker.h @@ -1,20 +1,19 @@ #pragma once #include -#include /// Convenience methods, that use current thread's memory_tracker if it is available. struct CurrentMemoryTracker { /// Call the following functions before calling of corresponding operations with memory allocators. - [[nodiscard]] static AllocationTrace alloc(Int64 size); - [[nodiscard]] static AllocationTrace allocNoThrow(Int64 size); - [[nodiscard]] static AllocationTrace realloc(Int64 old_size, Int64 new_size); + static void alloc(Int64 size); + static void allocNoThrow(Int64 size); + static void realloc(Int64 old_size, Int64 new_size); /// This function should be called after memory deallocation. - [[nodiscard]] static AllocationTrace free(Int64 size); + static void free(Int64 size); static void check(); private: - [[nodiscard]] static AllocationTrace allocImpl(Int64 size, bool throw_if_memory_exceeded); + static void allocImpl(Int64 size, bool throw_if_memory_exceeded); }; diff --git a/src/Common/DateLUTImpl.h b/src/Common/DateLUTImpl.h index 2f8aa487621..84f063f9555 100644 --- a/src/Common/DateLUTImpl.h +++ b/src/Common/DateLUTImpl.h @@ -1204,6 +1204,11 @@ public: return res; } + template + inline DateTimeComponents toDateTimeComponents(DateOrTime v) const + { + return toDateTimeComponents(lut[toLUTIndex(v)].date); + } inline UInt64 toNumYYYYMMDDhhmmss(Time t) const { diff --git a/src/Common/FiberStack.h b/src/Common/FiberStack.h index 91bb632d807..c55608311d0 100644 --- a/src/Common/FiberStack.h +++ b/src/Common/FiberStack.h @@ -57,8 +57,7 @@ public: } /// Do not count guard page in memory usage. - auto trace = CurrentMemoryTracker::alloc(num_pages * page_size); - trace.onAlloc(vp, num_pages * page_size); + CurrentMemoryTracker::alloc(num_pages * page_size); boost::context::stack_context sctx; sctx.size = num_bytes; @@ -78,7 +77,6 @@ public: ::munmap(vp, sctx.size); /// Do not count guard page in memory usage. - auto trace = CurrentMemoryTracker::free(sctx.size - page_size); - trace.onFree(vp, sctx.size - page_size); + CurrentMemoryTracker::free(sctx.size - page_size); } }; diff --git a/src/Common/HashTable/ClearableHashSet.h b/src/Common/HashTable/ClearableHashSet.h index 371302dd8a9..292f0e4059a 100644 --- a/src/Common/HashTable/ClearableHashSet.h +++ b/src/Common/HashTable/ClearableHashSet.h @@ -48,6 +48,30 @@ struct ClearableHashTableCell : public BaseCell ClearableHashTableCell(const Key & key_, const State & state) : BaseCell(key_, state), version(state.version) {} }; +using StringRefBaseCell = HashSetCellWithSavedHash, ClearableHashSetState>; + +/// specialization for StringRef to allow zero size key (empty string) +template <> +struct ClearableHashTableCell : public StringRefBaseCell +{ + using State = ClearableHashSetState; + using value_type = typename StringRefBaseCell::value_type; + + UInt32 version; + + bool isZero(const State & state) const { return version != state.version; } + static bool isZero(const StringRef & key_, const State & state_) { return StringRefBaseCell::isZero(key_, state_); } + + /// Set the key value to zero. + void setZero() { version = 0; } + + /// Do I need to store the zero key separately (that is, can a zero key be inserted into the hash table). + static constexpr bool need_zero_value_storage = true; + + ClearableHashTableCell() { } //-V730 /// NOLINT + ClearableHashTableCell(const StringRef & key_, const State & state) : StringRefBaseCell(key_, state), version(state.version) { } +}; + template < typename Key, typename Hash = DefaultHash, diff --git a/src/Common/LocalDateTime.h b/src/Common/LocalDateTime.h index a002be17787..1aa4f421ea1 100644 --- a/src/Common/LocalDateTime.h +++ b/src/Common/LocalDateTime.h @@ -31,7 +31,7 @@ private: void init(time_t time, const DateLUTImpl & time_zone) { - DateLUTImpl::DateTimeComponents components = time_zone.toDateTimeComponents(time); + DateLUTImpl::DateTimeComponents components = time_zone.toDateTimeComponents(static_cast(time)); m_year = components.date.year; m_month = components.date.month; diff --git a/src/Common/MemoryTracker.cpp b/src/Common/MemoryTracker.cpp index d0d0d6b8686..27d0adcf24f 100644 --- a/src/Common/MemoryTracker.cpp +++ b/src/Common/MemoryTracker.cpp @@ -1,7 +1,6 @@ #include "MemoryTracker.h" #include -#include #include #include #include @@ -83,53 +82,6 @@ inline std::string_view toDescription(OvercommitResult result) } } -bool shouldTrackAllocation(DB::Float64 probability, void * ptr) -{ - return sipHash64(uintptr_t(ptr)) < std::numeric_limits::max() * probability; -} - -AllocationTrace updateAllocationTrace(AllocationTrace trace, const std::optional & sample_probability) -{ - if (unlikely(sample_probability)) - return AllocationTrace(*sample_probability); - - return trace; -} - -AllocationTrace getAllocationTrace(std::optional & sample_probability) -{ - if (unlikely(sample_probability)) - return AllocationTrace(*sample_probability); - - return AllocationTrace(0); -} - -} - -AllocationTrace::AllocationTrace(double sample_probability_) : sample_probability(sample_probability_) {} - -void AllocationTrace::onAlloc(void * ptr, size_t size) const -{ - if (likely(sample_probability == 0)) - return; - - if (sample_probability < 1 && !shouldTrackAllocation(sample_probability, ptr)) - return; - - MemoryTrackerBlockerInThread untrack_lock(VariableContext::Global); - DB::TraceSender::send(DB::TraceType::MemorySample, StackTrace(), {.size = Int64(size), .ptr = ptr}); -} - -void AllocationTrace::onFree(void * ptr, size_t size) const -{ - if (likely(sample_probability == 0)) - return; - - if (sample_probability < 1 && !shouldTrackAllocation(sample_probability, ptr)) - return; - - MemoryTrackerBlockerInThread untrack_lock(VariableContext::Global); - DB::TraceSender::send(DB::TraceType::MemorySample, StackTrace(), {.size = -Int64(size), .ptr = ptr}); } namespace ProfileEvents @@ -183,7 +135,7 @@ void MemoryTracker::logMemoryUsage(Int64 current) const } -AllocationTrace MemoryTracker::allocImpl(Int64 size, bool throw_if_memory_exceeded, MemoryTracker * query_tracker) +void MemoryTracker::allocImpl(Int64 size, bool throw_if_memory_exceeded, MemoryTracker * query_tracker) { if (size < 0) throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Negative size ({}) is passed to MemoryTracker. It is a bug.", size); @@ -202,14 +154,9 @@ AllocationTrace MemoryTracker::allocImpl(Int64 size, bool throw_if_memory_exceed /// Since the MemoryTrackerBlockerInThread should respect the level, we should go to the next parent. if (auto * loaded_next = parent.load(std::memory_order_relaxed)) - { - MemoryTracker * tracker = level == VariableContext::Process ? this : query_tracker; - return updateAllocationTrace( - loaded_next->allocImpl(size, throw_if_memory_exceeded, tracker), - sample_probability); - } - - return getAllocationTrace(sample_probability); + loaded_next->allocImpl(size, throw_if_memory_exceeded, + level == VariableContext::Process ? this : query_tracker); + return; } /** Using memory_order_relaxed means that if allocations are done simultaneously, @@ -236,6 +183,14 @@ AllocationTrace MemoryTracker::allocImpl(Int64 size, bool throw_if_memory_exceed allocation_traced = true; } + std::bernoulli_distribution sample(sample_probability); + if (unlikely(sample_probability > 0.0 && sample(thread_local_rng))) + { + MemoryTrackerBlockerInThread untrack_lock(VariableContext::Global); + DB::TraceSender::send(DB::TraceType::MemorySample, StackTrace(), {.size = size}); + allocation_traced = true; + } + std::bernoulli_distribution fault(fault_probability); if (unlikely(fault_probability > 0.0 && fault(thread_local_rng))) { @@ -354,22 +309,16 @@ AllocationTrace MemoryTracker::allocImpl(Int64 size, bool throw_if_memory_exceed } if (auto * loaded_next = parent.load(std::memory_order_relaxed)) - { - MemoryTracker * tracker = level == VariableContext::Process ? this : query_tracker; - return updateAllocationTrace( - loaded_next->allocImpl(size, throw_if_memory_exceeded, tracker), - sample_probability); - } - - return getAllocationTrace(sample_probability); + loaded_next->allocImpl(size, throw_if_memory_exceeded, + level == VariableContext::Process ? this : query_tracker); } void MemoryTracker::adjustWithUntrackedMemory(Int64 untracked_memory) { if (untracked_memory > 0) - std::ignore = allocImpl(untracked_memory, /*throw_if_memory_exceeded*/ false); + allocImpl(untracked_memory, /*throw_if_memory_exceeded*/ false); else - std::ignore = free(-untracked_memory); + free(-untracked_memory); } bool MemoryTracker::updatePeak(Int64 will_be, bool log_memory_usage) @@ -388,7 +337,8 @@ bool MemoryTracker::updatePeak(Int64 will_be, bool log_memory_usage) return false; } -AllocationTrace MemoryTracker::free(Int64 size) + +void MemoryTracker::free(Int64 size) { if (MemoryTrackerBlockerInThread::isBlocked(level)) { @@ -403,9 +353,15 @@ AllocationTrace MemoryTracker::free(Int64 size) /// Since the MemoryTrackerBlockerInThread should respect the level, we should go to the next parent. if (auto * loaded_next = parent.load(std::memory_order_relaxed)) - return updateAllocationTrace(loaded_next->free(size), sample_probability); + loaded_next->free(size); + return; + } - return getAllocationTrace(sample_probability); + std::bernoulli_distribution sample(sample_probability); + if (unlikely(sample_probability > 0.0 && sample(thread_local_rng))) + { + MemoryTrackerBlockerInThread untrack_lock(VariableContext::Global); + DB::TraceSender::send(DB::TraceType::MemorySample, StackTrace(), {.size = -size}); } Int64 accounted_size = size; @@ -433,15 +389,12 @@ AllocationTrace MemoryTracker::free(Int64 size) if (auto * overcommit_tracker_ptr = overcommit_tracker.load(std::memory_order_relaxed)) overcommit_tracker_ptr->tryContinueQueryExecutionAfterFree(accounted_size); - AllocationTrace res = getAllocationTrace(sample_probability); if (auto * loaded_next = parent.load(std::memory_order_relaxed)) - res = updateAllocationTrace(loaded_next->free(size), sample_probability); + loaded_next->free(size); auto metric_loaded = metric.load(std::memory_order_relaxed); if (metric_loaded != CurrentMetrics::end()) CurrentMetrics::sub(metric_loaded, accounted_size); - - return res; } @@ -525,14 +478,3 @@ void MemoryTracker::setOrRaiseProfilerLimit(Int64 value) while ((value == 0 || old_value < value) && !profiler_limit.compare_exchange_weak(old_value, value)) ; } - -double MemoryTracker::getSampleProbability() -{ - if (sample_probability) - return *sample_probability; - - if (auto * loaded_next = parent.load(std::memory_order_relaxed)) - return loaded_next->getSampleProbability(); - - return 0; -} diff --git a/src/Common/MemoryTracker.h b/src/Common/MemoryTracker.h index e1f61b1585a..f6113d31423 100644 --- a/src/Common/MemoryTracker.h +++ b/src/Common/MemoryTracker.h @@ -2,11 +2,9 @@ #include #include -#include #include #include #include -#include #if !defined(NDEBUG) #define MEMORY_TRACKER_DEBUG_CHECKS @@ -67,7 +65,7 @@ private: double fault_probability = 0; /// To randomly sample allocations and deallocations in trace_log. - std::optional sample_probability; + double sample_probability = 0; /// Singly-linked list. All information will be passed to subsequent memory trackers also (it allows to implement trackers hierarchy). /// In terms of tree nodes it is the list of parents. Lifetime of these trackers should "include" lifetime of current tracker. @@ -92,8 +90,8 @@ private: /// allocImpl(...) and free(...) should not be used directly friend struct CurrentMemoryTracker; - [[nodiscard]] AllocationTrace allocImpl(Int64 size, bool throw_if_memory_exceeded, MemoryTracker * query_tracker = nullptr); - [[nodiscard]] AllocationTrace free(Int64 size); + void allocImpl(Int64 size, bool throw_if_memory_exceeded, MemoryTracker * query_tracker = nullptr); + void free(Int64 size); public: static constexpr auto USAGE_EVENT_NAME = "MemoryTrackerUsage"; @@ -148,8 +146,6 @@ public: sample_probability = value; } - double getSampleProbability(); - void setProfilerStep(Int64 value) { profiler_step = value; diff --git a/src/Common/MemoryTrackerBlockerInThread.h b/src/Common/MemoryTrackerBlockerInThread.h index 73794049007..d3882056f54 100644 --- a/src/Common/MemoryTrackerBlockerInThread.h +++ b/src/Common/MemoryTrackerBlockerInThread.h @@ -28,5 +28,4 @@ public: } friend class MemoryTracker; - friend struct AllocationTrace; }; diff --git a/src/Common/ShellCommand.cpp b/src/Common/ShellCommand.cpp index 17ee1b880b3..b9f90c8cbb1 100644 --- a/src/Common/ShellCommand.cpp +++ b/src/Common/ShellCommand.cpp @@ -72,11 +72,11 @@ ShellCommand::~ShellCommand() if (process_terminated_normally) return; - LOG_TRACE(getLogger(), "Will kill shell command pid {} with SIGTERM", pid); + LOG_TRACE(getLogger(), "Will kill shell command pid {} with signal {}", pid, config.terminate_in_destructor_strategy.termination_signal); - int retcode = kill(pid, SIGTERM); + int retcode = kill(pid, config.terminate_in_destructor_strategy.termination_signal); if (retcode != 0) - LOG_WARNING(getLogger(), "Cannot kill shell command pid {} errno '{}'", pid, errnoToString()); + LOG_WARNING(getLogger(), "Cannot kill shell command pid {}, error: '{}'", pid, errnoToString()); } else { diff --git a/src/Common/ShellCommand.h b/src/Common/ShellCommand.h index dfc4a826f62..da65d2ae494 100644 --- a/src/Common/ShellCommand.h +++ b/src/Common/ShellCommand.h @@ -27,18 +27,18 @@ namespace DB class ShellCommand final { public: - ~ShellCommand(); struct DestructorStrategy final { - explicit DestructorStrategy(bool terminate_in_destructor_, size_t wait_for_normal_exit_before_termination_seconds_ = 0) - : terminate_in_destructor(terminate_in_destructor_) + explicit DestructorStrategy(bool terminate_in_destructor_, int termination_signal_, size_t wait_for_normal_exit_before_termination_seconds_ = 0) + : terminate_in_destructor(terminate_in_destructor_), termination_signal(termination_signal_) , wait_for_normal_exit_before_termination_seconds(wait_for_normal_exit_before_termination_seconds_) { } bool terminate_in_destructor; + int termination_signal; /// If terminate in destructor is true, command will wait until send SIGTERM signal to created process size_t wait_for_normal_exit_before_termination_seconds = 0; @@ -64,7 +64,7 @@ public: bool pipe_stdin_only = false; - DestructorStrategy terminate_in_destructor_strategy = DestructorStrategy(false); + DestructorStrategy terminate_in_destructor_strategy = DestructorStrategy(false, 0); }; /// Run the command using /bin/sh -c. diff --git a/src/Common/Stopwatch.h b/src/Common/Stopwatch.h index 32d1fca337d..79f650179e2 100644 --- a/src/Common/Stopwatch.h +++ b/src/Common/Stopwatch.h @@ -40,6 +40,10 @@ public: * Pass CLOCK_MONOTONIC_COARSE, if you need better performance with acceptable cost of several milliseconds of inaccuracy. */ explicit Stopwatch(clockid_t clock_type_ = CLOCK_MONOTONIC) : clock_type(clock_type_) { start(); } + explicit Stopwatch(clockid_t clock_type_, UInt64 start_nanoseconds, bool is_running_) + : start_ns(start_nanoseconds), clock_type(clock_type_), is_running(is_running_) + { + } void start() { start_ns = nanoseconds(); is_running = true; } void stop() { stop_ns = nanoseconds(); is_running = false; } @@ -51,6 +55,8 @@ public: UInt64 elapsedMilliseconds() const { return elapsedNanoseconds() / 1000000UL; } double elapsedSeconds() const { return static_cast(elapsedNanoseconds()) / 1000000000ULL; } + UInt64 getStart() { return start_ns; } + private: UInt64 start_ns = 0; UInt64 stop_ns = 0; diff --git a/src/Common/ThreadPool.cpp b/src/Common/ThreadPool.cpp index b70b1fc5e60..e65b5511e05 100644 --- a/src/Common/ThreadPool.cpp +++ b/src/Common/ThreadPool.cpp @@ -156,9 +156,10 @@ ReturnType ThreadPoolImpl::scheduleImpl(Job job, ssize_t priority, std:: propagate_opentelemetry_tracing_context ? DB::OpenTelemetry::CurrentContext() : DB::OpenTelemetry::TracingContextOnThread()); ++scheduled_jobs; - new_job_or_shutdown.notify_one(); } + new_job_or_shutdown.notify_one(); + return static_cast(true); } diff --git a/src/Common/ThreadStatus.cpp b/src/Common/ThreadStatus.cpp index b62a7af6c71..81650f107a4 100644 --- a/src/Common/ThreadStatus.cpp +++ b/src/Common/ThreadStatus.cpp @@ -188,13 +188,10 @@ void ThreadStatus::updatePerformanceCounters() } } -void ThreadStatus::assertState(const std::initializer_list & permitted_states, const char * description) const +void ThreadStatus::assertState(ThreadState permitted_state, const char * description) const { - for (auto permitted_state : permitted_states) - { - if (getCurrentState() == permitted_state) - return; - } + if (getCurrentState() == permitted_state) + return; if (description) throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected thread state {}: {}", getCurrentState(), description); diff --git a/src/Common/ThreadStatus.h b/src/Common/ThreadStatus.h index 6ec46e3e9dc..cbcd8c3c30a 100644 --- a/src/Common/ThreadStatus.h +++ b/src/Common/ThreadStatus.h @@ -87,10 +87,6 @@ public: LogsLevel client_logs_level = LogsLevel::none; String query; - /// Query without new lines (see toOneLineQuery()) - /// Used to print in case of fatal error - /// (to avoid calling extra code in the fatal error handler) - String one_line_query; UInt64 normalized_query_hash = 0; std::vector finished_threads_counters_memory; @@ -296,7 +292,7 @@ protected: void logToQueryThreadLog(QueryThreadLog & thread_log, const String & current_database, std::chrono::time_point now); - void assertState(const std::initializer_list & permitted_states, const char * description = nullptr) const; + void assertState(ThreadState permitted_state, const char * description = nullptr) const; private: diff --git a/src/Common/TraceSender.cpp b/src/Common/TraceSender.cpp index 91d07367a82..64d7b2b0eaf 100644 --- a/src/Common/TraceSender.cpp +++ b/src/Common/TraceSender.cpp @@ -33,7 +33,6 @@ void TraceSender::send(TraceType trace_type, const StackTrace & stack_trace, Ext + sizeof(TraceType) /// trace type + sizeof(UInt64) /// thread_id + sizeof(Int64) /// size - + sizeof(void *) /// ptr + sizeof(ProfileEvents::Event) /// event + sizeof(ProfileEvents::Count); /// increment @@ -75,7 +74,6 @@ void TraceSender::send(TraceType trace_type, const StackTrace & stack_trace, Ext writePODBinary(trace_type, out); writePODBinary(thread_id, out); writePODBinary(extras.size, out); - writePODBinary(UInt64(extras.ptr), out); writePODBinary(extras.event, out); writePODBinary(extras.increment, out); diff --git a/src/Common/TraceSender.h b/src/Common/TraceSender.h index 68ba15ee400..21b44b651dd 100644 --- a/src/Common/TraceSender.h +++ b/src/Common/TraceSender.h @@ -28,9 +28,8 @@ class TraceSender public: struct Extras { - /// size, ptr - for memory tracing is the amount of memory allocated; for other trace types it is 0. + /// size - for memory tracing is the amount of memory allocated; for other trace types it is 0. Int64 size{}; - void * ptr = nullptr; /// Event type and increment for 'ProfileEvent' trace type; for other trace types defaults. ProfileEvents::Event event{ProfileEvents::end()}; ProfileEvents::Count increment{}; diff --git a/src/Common/UnicodeBar.cpp b/src/Common/UnicodeBar.cpp index efc85ad35e4..bad39d8080c 100644 --- a/src/Common/UnicodeBar.cpp +++ b/src/Common/UnicodeBar.cpp @@ -9,6 +9,13 @@ #include +namespace DB +{ + namespace ErrorCodes + { + extern const int LOGICAL_ERROR; + } +} namespace UnicodeBar { @@ -26,36 +33,64 @@ namespace UnicodeBar return (x - min) / (max - min) * max_width; } - size_t getWidthInBytes(double width) + namespace { - return static_cast(ceil(width - 1.0 / 8) * UNICODE_BAR_CHAR_SIZE); + /// We use the following Unicode characters to draw the bar: + /// U+2588 "█" Full block + /// U+2589 "▉" Left seven eighths block + /// U+258A "▊" Left three quarters block + /// U+258B "▋" Left five eighths block + /// U+258C "▌" Left half block + /// U+258D "▍" Left three eighths block + /// U+258E "▎" Left one quarter block + /// U+258F "▏" Left one eighth block + constexpr size_t GRADES_IN_FULL_BAR = 8; + constexpr char FULL_BAR[] = "█"; + constexpr char FRACTIONAL_BARS[] = "▏▎▍▌▋▊▉"; /// 7 elements: 1/8, 2/8, 3/8, 4/8, 5/8, 6/8, 7/8 } - void render(double width, char * dst) + size_t getWidthInBytes(double width) { - size_t floor_width = static_cast(floor(width)); + Int64 int_width = static_cast(width * GRADES_IN_FULL_BAR); + return (int_width / GRADES_IN_FULL_BAR) * UNICODE_BAR_CHAR_SIZE + (int_width % GRADES_IN_FULL_BAR ? UNICODE_BAR_CHAR_SIZE : 0); + } + + static char* checkedCopy(const char * src, size_t src_size, char * dst, const char * dst_end) + { + if (dst + src_size > dst_end) + throw DB::Exception( + DB::ErrorCodes::LOGICAL_ERROR, + "Not enough space in buffer for UnicodeBar::render, required: {}, got: {}", + src_size, dst_end - dst); + + memcpy(dst, src, src_size); + return dst + src_size; + } + + void render(double width, char * dst, const char * dst_end) + { + Int64 int_width = static_cast(width * GRADES_IN_FULL_BAR); + size_t floor_width = (int_width / GRADES_IN_FULL_BAR); for (size_t i = 0; i < floor_width; ++i) { - memcpy(dst, "█", UNICODE_BAR_CHAR_SIZE); - dst += UNICODE_BAR_CHAR_SIZE; + dst = checkedCopy(FULL_BAR, UNICODE_BAR_CHAR_SIZE, dst, dst_end); } - size_t remainder = static_cast(floor((width - floor_width) * 8)); + size_t remainder = int_width % GRADES_IN_FULL_BAR; if (remainder) { - memcpy(dst, &"▏▎▍▌▋▋▊▉"[(remainder - 1) * UNICODE_BAR_CHAR_SIZE], UNICODE_BAR_CHAR_SIZE); - dst += UNICODE_BAR_CHAR_SIZE; + dst = checkedCopy(&FRACTIONAL_BARS[(remainder - 1) * UNICODE_BAR_CHAR_SIZE], UNICODE_BAR_CHAR_SIZE, dst, dst_end); } - *dst = 0; + checkedCopy("\0", 1, dst, dst_end); } std::string render(double width) { - std::string res(getWidthInBytes(width), '\0'); - render(width, res.data()); + std::string res(getWidthInBytes(width) + 1, '\0'); + render(width, res.data(), res.data() + res.size()); return res; } } diff --git a/src/Common/UnicodeBar.h b/src/Common/UnicodeBar.h index 64705aa5022..78e925bdb3c 100644 --- a/src/Common/UnicodeBar.h +++ b/src/Common/UnicodeBar.h @@ -14,6 +14,6 @@ namespace UnicodeBar size_t getWidthInBytes(double width); /// In `dst` there must be a space for barWidthInBytes(width) characters and a trailing zero. - void render(double width, char * dst); + void render(double width, char * dst, const char * dst_end); std::string render(double width); } diff --git a/src/Common/ZooKeeper/TestKeeper.cpp b/src/Common/ZooKeeper/TestKeeper.cpp index 134374f98d0..4f53a8ac307 100644 --- a/src/Common/ZooKeeper/TestKeeper.cpp +++ b/src/Common/ZooKeeper/TestKeeper.cpp @@ -219,6 +219,7 @@ std::pair TestKeeperCreateRequest::process(TestKeeper::Contai created_node.stat.mtime = created_node.stat.ctime; created_node.stat.numChildren = 0; created_node.stat.dataLength = static_cast(data.length()); + created_node.stat.ephemeralOwner = is_ephemeral ? 1 : 0; created_node.data = data; created_node.is_ephemeral = is_ephemeral; created_node.is_sequental = is_sequential; diff --git a/src/Common/ZooKeeper/ZooKeeperCommon.cpp b/src/Common/ZooKeeper/ZooKeeperCommon.cpp index 8976f1098ac..4cc1c24ef8b 100644 --- a/src/Common/ZooKeeper/ZooKeeperCommon.cpp +++ b/src/Common/ZooKeeper/ZooKeeperCommon.cpp @@ -36,7 +36,7 @@ std::string ZooKeeperRequest::toString() const "OpNum = {}\n" "Additional info:\n{}", xid, - getOpNum(), + Coordination::toString(getOpNum()), toStringImpl()); } diff --git a/src/Common/ZooKeeper/ZooKeeperImpl.cpp b/src/Common/ZooKeeper/ZooKeeperImpl.cpp index 7cbe7d7b0f2..251bf023f08 100644 --- a/src/Common/ZooKeeper/ZooKeeperImpl.cpp +++ b/src/Common/ZooKeeper/ZooKeeperImpl.cpp @@ -342,7 +342,6 @@ ZooKeeper::ZooKeeper( default_acls.emplace_back(std::move(acl)); } - /// It makes sense (especially, for async requests) to inject a fault in two places: /// pushRequest (before request is sent) and receiveEvent (after request was executed). if (0 < args.send_fault_probability && args.send_fault_probability <= 1) @@ -676,7 +675,7 @@ void ZooKeeper::receiveThread() if (earliest_operation) { throw Exception(Error::ZOPERATIONTIMEOUT, "Operation timeout (no response) for request {} for path: {}", - earliest_operation->request->getOpNum(), earliest_operation->request->getPath()); + toString(earliest_operation->request->getOpNum()), earliest_operation->request->getPath()); } waited_us += max_wait_us; if (waited_us >= args.session_timeout_ms * 1000) @@ -870,7 +869,7 @@ void ZooKeeper::finalize(bool error_send, bool error_receive, const String & rea if (already_started) return; - LOG_INFO(log, "Finalizing session {}: finalization_started={}, queue_finished={}, reason={}", + LOG_INFO(log, "Finalizing session {}. finalization_started: {}, queue_finished: {}, reason: '{}'", session_id, already_started, requests_queue.isFinished(), reason); auto expire_session_if_not_expired = [&] diff --git a/src/Storages/MergeTree/ZooKeeperWithFaultInjection.h b/src/Common/ZooKeeper/ZooKeeperWithFaultInjection.h similarity index 100% rename from src/Storages/MergeTree/ZooKeeperWithFaultInjection.h rename to src/Common/ZooKeeper/ZooKeeperWithFaultInjection.h diff --git a/src/Common/clickhouse_malloc.cpp b/src/Common/clickhouse_malloc.cpp index afdad3c6599..3f69ebdf58d 100644 --- a/src/Common/clickhouse_malloc.cpp +++ b/src/Common/clickhouse_malloc.cpp @@ -9,11 +9,7 @@ extern "C" void * clickhouse_malloc(size_t size) { void * res = malloc(size); if (res) - { - AllocationTrace trace; - size_t actual_size = Memory::trackMemory(size, trace); - trace.onAlloc(res, actual_size); - } + Memory::trackMemory(size); return res; } @@ -21,29 +17,17 @@ extern "C" void * clickhouse_calloc(size_t number_of_members, size_t size) { void * res = calloc(number_of_members, size); if (res) - { - AllocationTrace trace; - size_t actual_size = Memory::trackMemory(number_of_members * size, trace); - trace.onAlloc(res, actual_size); - } + Memory::trackMemory(number_of_members * size); return res; } extern "C" void * clickhouse_realloc(void * ptr, size_t size) { if (ptr) - { - AllocationTrace trace; - size_t actual_size = Memory::untrackMemory(ptr, trace); - trace.onFree(ptr, actual_size); - } + Memory::untrackMemory(ptr); void * res = realloc(ptr, size); if (res) - { - AllocationTrace trace; - size_t actual_size = Memory::trackMemory(size, trace); - trace.onAlloc(res, actual_size); - } + Memory::trackMemory(size); return res; } @@ -58,9 +42,7 @@ extern "C" void * clickhouse_reallocarray(void * ptr, size_t number_of_members, extern "C" void clickhouse_free(void * ptr) { - AllocationTrace trace; - size_t actual_size = Memory::untrackMemory(ptr, trace); - trace.onFree(ptr, actual_size); + Memory::untrackMemory(ptr); free(ptr); } @@ -68,10 +50,6 @@ extern "C" int clickhouse_posix_memalign(void ** memptr, size_t alignment, size_ { int res = posix_memalign(memptr, alignment, size); if (res == 0) - { - AllocationTrace trace; - size_t actual_size = Memory::trackMemory(size, trace); - trace.onAlloc(*memptr, actual_size); - } + Memory::trackMemory(size); return res; } diff --git a/src/Common/filesystemHelpers.cpp b/src/Common/filesystemHelpers.cpp index 07a08dc7fbc..aabe7422d14 100644 --- a/src/Common/filesystemHelpers.cpp +++ b/src/Common/filesystemHelpers.cpp @@ -64,11 +64,11 @@ bool enoughSpaceInDirectory(const std::string & path, size_t data_size) return data_size <= free_space; } -std::unique_ptr createTemporaryFile(const std::string & path) +std::unique_ptr createTemporaryFile(const std::string & folder_path) { ProfileEvents::increment(ProfileEvents::ExternalProcessingFilesTotal); - fs::create_directories(path); - return std::make_unique(path); + fs::create_directories(folder_path); + return std::make_unique(folder_path); } #if !defined(OS_LINUX) @@ -352,7 +352,8 @@ time_t getModificationTime(const std::string & path) struct stat st; if (stat(path.c_str(), &st) == 0) return st.st_mtime; - DB::throwFromErrnoWithPath("Cannot check modification time for file: " + path, path, DB::ErrorCodes::CANNOT_STAT); + std::error_code m_ec(errno, std::generic_category()); + throw fs::filesystem_error("Cannot check modification time for file", path, m_ec); } time_t getChangeTime(const std::string & path) @@ -360,7 +361,8 @@ time_t getChangeTime(const std::string & path) struct stat st; if (stat(path.c_str(), &st) == 0) return st.st_ctime; - DB::throwFromErrnoWithPath("Cannot check change time for file: " + path, path, DB::ErrorCodes::CANNOT_STAT); + std::error_code m_ec(errno, std::generic_category()); + throw fs::filesystem_error("Cannot check change time for file", path, m_ec); } Poco::Timestamp getModificationTimestamp(const std::string & path) diff --git a/src/Common/filesystemHelpers.h b/src/Common/filesystemHelpers.h index 0e6e16941bb..14ee5f54322 100644 --- a/src/Common/filesystemHelpers.h +++ b/src/Common/filesystemHelpers.h @@ -14,10 +14,10 @@ namespace fs = std::filesystem; namespace DB { -using TemporaryFile = Poco::TemporaryFile; +using PocoTemporaryFile = Poco::TemporaryFile; bool enoughSpaceInDirectory(const std::string & path, size_t data_size); -std::unique_ptr createTemporaryFile(const std::string & path); +std::unique_ptr createTemporaryFile(const std::string & folder_path); // Determine what block device is responsible for specified path diff --git a/src/Common/memory.h b/src/Common/memory.h index 87ccdce070a..4cb1c535e56 100644 --- a/src/Common/memory.h +++ b/src/Common/memory.h @@ -112,19 +112,16 @@ inline ALWAYS_INLINE size_t getActualAllocationSize(size_t size, TAlign... align template ... TAlign> requires DB::OptionalArgument -inline ALWAYS_INLINE size_t trackMemory(std::size_t size, AllocationTrace & trace, TAlign... align) +inline ALWAYS_INLINE void trackMemory(std::size_t size, TAlign... align) { std::size_t actual_size = getActualAllocationSize(size, align...); - trace = CurrentMemoryTracker::allocNoThrow(actual_size); - return actual_size; + CurrentMemoryTracker::allocNoThrow(actual_size); } template ... TAlign> requires DB::OptionalArgument -inline ALWAYS_INLINE size_t untrackMemory(void * ptr [[maybe_unused]], AllocationTrace & trace, std::size_t size [[maybe_unused]] = 0, TAlign... align [[maybe_unused]]) noexcept +inline ALWAYS_INLINE void untrackMemory(void * ptr [[maybe_unused]], std::size_t size [[maybe_unused]] = 0, TAlign... align [[maybe_unused]]) noexcept { - std::size_t actual_size = 0; - try { #if USE_JEMALLOC @@ -133,26 +130,23 @@ inline ALWAYS_INLINE size_t untrackMemory(void * ptr [[maybe_unused]], Allocatio if (likely(ptr != nullptr)) { if constexpr (sizeof...(TAlign) == 1) - actual_size = sallocx(ptr, MALLOCX_ALIGN(alignToSizeT(align...))); + CurrentMemoryTracker::free(sallocx(ptr, MALLOCX_ALIGN(alignToSizeT(align...)))); else - actual_size = sallocx(ptr, 0); + CurrentMemoryTracker::free(sallocx(ptr, 0)); } #else if (size) - actual_size = size; + CurrentMemoryTracker::free(size); # if defined(_GNU_SOURCE) /// It's innaccurate resource free for sanitizers. malloc_usable_size() result is greater or equal to allocated size. else - actual_size = malloc_usable_size(ptr); + CurrentMemoryTracker::free(malloc_usable_size(ptr)); # endif #endif - trace = CurrentMemoryTracker::free(actual_size); } catch (...) { } - - return actual_size; } } diff --git a/src/Common/new_delete.cpp b/src/Common/new_delete.cpp index d0170bd820c..871ab750907 100644 --- a/src/Common/new_delete.cpp +++ b/src/Common/new_delete.cpp @@ -50,74 +50,50 @@ static struct InitializeJemallocZoneAllocatorForOSX void * operator new(std::size_t size) { - AllocationTrace trace; - std::size_t actual_size = Memory::trackMemory(size, trace); - void * ptr = Memory::newImpl(size); - trace.onAlloc(ptr, actual_size); - return ptr; + Memory::trackMemory(size); + return Memory::newImpl(size); } void * operator new(std::size_t size, std::align_val_t align) { - AllocationTrace trace; - std::size_t actual_size = Memory::trackMemory(size, trace, align); - void * ptr = Memory::newImpl(size, align); - trace.onAlloc(ptr, actual_size); - return ptr; + Memory::trackMemory(size, align); + return Memory::newImpl(size, align); } void * operator new[](std::size_t size) { - AllocationTrace trace; - std::size_t actual_size = Memory::trackMemory(size, trace); - void * ptr = Memory::newImpl(size); - trace.onAlloc(ptr, actual_size); - return ptr; + Memory::trackMemory(size); + return Memory::newImpl(size); } void * operator new[](std::size_t size, std::align_val_t align) { - AllocationTrace trace; - std::size_t actual_size = Memory::trackMemory(size, trace, align); - void * ptr = Memory::newImpl(size, align); - trace.onAlloc(ptr, actual_size); - return ptr; + Memory::trackMemory(size, align); + return Memory::newImpl(size, align); } void * operator new(std::size_t size, const std::nothrow_t &) noexcept { - AllocationTrace trace; - std::size_t actual_size = Memory::trackMemory(size, trace); - void * ptr = Memory::newNoExept(size); - trace.onAlloc(ptr, actual_size); - return ptr; + Memory::trackMemory(size); + return Memory::newNoExept(size); } void * operator new[](std::size_t size, const std::nothrow_t &) noexcept { - AllocationTrace trace; - std::size_t actual_size = Memory::trackMemory(size, trace); - void * ptr = Memory::newNoExept(size); - trace.onAlloc(ptr, actual_size); - return ptr; + Memory::trackMemory(size); + return Memory::newNoExept(size); } void * operator new(std::size_t size, std::align_val_t align, const std::nothrow_t &) noexcept { - AllocationTrace trace; - std::size_t actual_size = Memory::trackMemory(size, trace, align); - void * ptr = Memory::newNoExept(size, align); - trace.onAlloc(ptr, actual_size); - return ptr; + Memory::trackMemory(size, align); + return Memory::newNoExept(size, align); } void * operator new[](std::size_t size, std::align_val_t align, const std::nothrow_t &) noexcept { - AllocationTrace trace; - std::size_t actual_size = Memory::trackMemory(size, trace, align); - void * ptr = Memory::newNoExept(size, align); - trace.onAlloc(ptr, actual_size); - return ptr; + Memory::trackMemory(size, align); + return Memory::newNoExept(size, align); } /// delete @@ -133,64 +109,48 @@ void * operator new[](std::size_t size, std::align_val_t align, const std::nothr void operator delete(void * ptr) noexcept { - AllocationTrace trace; - std::size_t actual_size = Memory::untrackMemory(ptr, trace); - trace.onFree(ptr, actual_size); + Memory::untrackMemory(ptr); Memory::deleteImpl(ptr); } void operator delete(void * ptr, std::align_val_t align) noexcept { - AllocationTrace trace; - std::size_t actual_size = Memory::untrackMemory(ptr, trace, 0, align); - trace.onFree(ptr, actual_size); + Memory::untrackMemory(ptr, 0, align); Memory::deleteImpl(ptr); } void operator delete[](void * ptr) noexcept { - AllocationTrace trace; - std::size_t actual_size = Memory::untrackMemory(ptr, trace); - trace.onFree(ptr, actual_size); + Memory::untrackMemory(ptr); Memory::deleteImpl(ptr); } void operator delete[](void * ptr, std::align_val_t align) noexcept { - AllocationTrace trace; - std::size_t actual_size = Memory::untrackMemory(ptr, trace, 0, align); - trace.onFree(ptr, actual_size); + Memory::untrackMemory(ptr, 0, align); Memory::deleteImpl(ptr); } void operator delete(void * ptr, std::size_t size) noexcept { - AllocationTrace trace; - std::size_t actual_size = Memory::untrackMemory(ptr, trace, size); - trace.onFree(ptr, actual_size); + Memory::untrackMemory(ptr, size); Memory::deleteSized(ptr, size); } void operator delete(void * ptr, std::size_t size, std::align_val_t align) noexcept { - AllocationTrace trace; - std::size_t actual_size = Memory::untrackMemory(ptr, trace, size, align); - trace.onFree(ptr, actual_size); + Memory::untrackMemory(ptr, size, align); Memory::deleteSized(ptr, size, align); } void operator delete[](void * ptr, std::size_t size) noexcept { - AllocationTrace trace; - std::size_t actual_size = Memory::untrackMemory(ptr, trace, size); - trace.onFree(ptr, actual_size); + Memory::untrackMemory(ptr, size); Memory::deleteSized(ptr, size); } void operator delete[](void * ptr, std::size_t size, std::align_val_t align) noexcept { - AllocationTrace trace; - std::size_t actual_size = Memory::untrackMemory(ptr, trace, size, align); - trace.onFree(ptr, actual_size); + Memory::untrackMemory(ptr, size, align); Memory::deleteSized(ptr, size, align); } diff --git a/src/Common/remapExecutable.cpp b/src/Common/remapExecutable.cpp index b987a4aac09..206314ea295 100644 --- a/src/Common/remapExecutable.cpp +++ b/src/Common/remapExecutable.cpp @@ -1,6 +1,6 @@ #include "remapExecutable.h" -#if defined(OS_LINUX) && defined(__amd64__) && defined(__SSE2__) && !defined(SANITIZER) && defined(NDEBUG) && !defined(SPLIT_SHARED_LIBRARIES) +#if defined(OS_LINUX) && defined(__amd64__) && defined(__SSE2__) && !defined(SANITIZER) && defined(NDEBUG) #include #include diff --git a/src/Compression/tests/gtest_compressionCodec.cpp b/src/Compression/tests/gtest_compressionCodec.cpp index 4765aeb45ed..b4c29bf9ce6 100644 --- a/src/Compression/tests/gtest_compressionCodec.cpp +++ b/src/Compression/tests/gtest_compressionCodec.cpp @@ -1203,7 +1203,7 @@ auto DDperformanceTestSequence() + generateSeq(G(SameValueGenerator(42)), 0, times); // best } -// prime numbers in ascending order with some random repitions hit all the cases of Gorilla. +// prime numbers in ascending order with some random repetitions hit all the cases of Gorilla. auto PrimesWithMultiplierGenerator = [](int multiplier = 1) { return [multiplier](auto i) diff --git a/src/Coordination/KeeperDispatcher.cpp b/src/Coordination/KeeperDispatcher.cpp index f6973b30fb2..92aeac425d5 100644 --- a/src/Coordination/KeeperDispatcher.cpp +++ b/src/Coordination/KeeperDispatcher.cpp @@ -296,7 +296,7 @@ bool KeeperDispatcher::putRequest(const Coordination::ZooKeeperRequestPtr & requ return true; } -void KeeperDispatcher::initialize(const Poco::Util::AbstractConfiguration & config, bool standalone_keeper, bool start_async) +void KeeperDispatcher::initialize(const Poco::Util::AbstractConfiguration & config, bool standalone_keeper, bool start_async, const MultiVersion::Version & macros) { LOG_DEBUG(log, "Initializing storage dispatcher"); @@ -307,7 +307,7 @@ void KeeperDispatcher::initialize(const Poco::Util::AbstractConfiguration & conf responses_thread = ThreadFromGlobalPool([this] { responseThread(); }); snapshot_thread = ThreadFromGlobalPool([this] { snapshotThread(); }); - snapshot_s3.startup(config); + snapshot_s3.startup(config, macros); server = std::make_unique(configuration_and_settings, config, responses_queue, snapshots_queue, snapshot_s3); @@ -687,7 +687,7 @@ bool KeeperDispatcher::isServerActive() const return checkInit() && hasLeader() && !server->isRecovering(); } -void KeeperDispatcher::updateConfiguration(const Poco::Util::AbstractConfiguration & config) +void KeeperDispatcher::updateConfiguration(const Poco::Util::AbstractConfiguration & config, const MultiVersion::Version & macros) { auto diff = server->getConfigurationDiff(config); if (diff.empty()) @@ -704,7 +704,7 @@ void KeeperDispatcher::updateConfiguration(const Poco::Util::AbstractConfigurati throw Exception(ErrorCodes::SYSTEM_ERROR, "Cannot push configuration update to queue"); } - snapshot_s3.updateS3Configuration(config); + snapshot_s3.updateS3Configuration(config, macros); } void KeeperDispatcher::updateKeeperStatLatency(uint64_t process_time_ms) diff --git a/src/Coordination/KeeperDispatcher.h b/src/Coordination/KeeperDispatcher.h index 632e5e65e5f..ff902d8e036 100644 --- a/src/Coordination/KeeperDispatcher.h +++ b/src/Coordination/KeeperDispatcher.h @@ -15,6 +15,8 @@ #include #include #include +#include +#include namespace DB { @@ -109,7 +111,8 @@ public: /// Initialization from config. /// standalone_keeper -- we are standalone keeper application (not inside clickhouse server) - void initialize(const Poco::Util::AbstractConfiguration & config, bool standalone_keeper, bool start_async); + /// 'macros' are used to substitute macros in endpoint of disks + void initialize(const Poco::Util::AbstractConfiguration & config, bool standalone_keeper, bool start_async, const MultiVersion::Version & macros); void startServer(); @@ -124,7 +127,8 @@ public: /// Registered in ConfigReloader callback. Add new configuration changes to /// update_configuration_queue. Keeper Dispatcher apply them asynchronously. - void updateConfiguration(const Poco::Util::AbstractConfiguration & config); + /// 'macros' are used to substitute macros in endpoint of disks + void updateConfiguration(const Poco::Util::AbstractConfiguration & config, const MultiVersion::Version & macros); /// Shutdown internal keeper parts (server, state machine, log storage, etc) void shutdown(); diff --git a/src/Coordination/KeeperSnapshotManagerS3.cpp b/src/Coordination/KeeperSnapshotManagerS3.cpp index 361265e382a..b793cef4b94 100644 --- a/src/Coordination/KeeperSnapshotManagerS3.cpp +++ b/src/Coordination/KeeperSnapshotManagerS3.cpp @@ -14,6 +14,7 @@ #include #include #include +#include #include #include @@ -47,7 +48,7 @@ KeeperSnapshotManagerS3::KeeperSnapshotManagerS3() , uuid(UUIDHelpers::generateV4()) {} -void KeeperSnapshotManagerS3::updateS3Configuration(const Poco::Util::AbstractConfiguration & config) +void KeeperSnapshotManagerS3::updateS3Configuration(const Poco::Util::AbstractConfiguration & config, const MultiVersion::Version & macros) { try { @@ -64,7 +65,7 @@ void KeeperSnapshotManagerS3::updateS3Configuration(const Poco::Util::AbstractCo auto auth_settings = S3::AuthSettings::loadFromConfig(config_prefix, config); - auto endpoint = config.getString(config_prefix + ".endpoint"); + String endpoint = macros->expand(config.getString(config_prefix + ".endpoint")); auto new_uri = S3::URI{endpoint}; { @@ -261,9 +262,9 @@ void KeeperSnapshotManagerS3::uploadSnapshot(const std::string & path, bool asyn uploadSnapshotImpl(path); } -void KeeperSnapshotManagerS3::startup(const Poco::Util::AbstractConfiguration & config) +void KeeperSnapshotManagerS3::startup(const Poco::Util::AbstractConfiguration & config, const MultiVersion::Version & macros) { - updateS3Configuration(config); + updateS3Configuration(config, macros); snapshot_s3_thread = ThreadFromGlobalPool([this] { snapshotS3Thread(); }); } diff --git a/src/Coordination/KeeperSnapshotManagerS3.h b/src/Coordination/KeeperSnapshotManagerS3.h index 5b62d114aae..197f528b192 100644 --- a/src/Coordination/KeeperSnapshotManagerS3.h +++ b/src/Coordination/KeeperSnapshotManagerS3.h @@ -3,6 +3,8 @@ #include "config.h" #include +#include +#include #if USE_AWS_S3 #include @@ -21,10 +23,12 @@ class KeeperSnapshotManagerS3 public: KeeperSnapshotManagerS3(); - void updateS3Configuration(const Poco::Util::AbstractConfiguration & config); + /// 'macros' are used to substitute macros in endpoint of disks + void updateS3Configuration(const Poco::Util::AbstractConfiguration & config, const MultiVersion::Version & macros); void uploadSnapshot(const std::string & path, bool async_upload = true); - void startup(const Poco::Util::AbstractConfiguration & config); + /// 'macros' are used to substitute macros in endpoint of disks + void startup(const Poco::Util::AbstractConfiguration & config, const MultiVersion::Version & macros); void shutdown(); private: using SnapshotS3Queue = ConcurrentBoundedQueue; @@ -56,10 +60,10 @@ class KeeperSnapshotManagerS3 public: KeeperSnapshotManagerS3() = default; - void updateS3Configuration(const Poco::Util::AbstractConfiguration &) {} + void updateS3Configuration(const Poco::Util::AbstractConfiguration &, const MultiVersion::Version &) {} void uploadSnapshot(const std::string &, [[maybe_unused]] bool async_upload = true) {} - void startup(const Poco::Util::AbstractConfiguration &) {} + void startup(const Poco::Util::AbstractConfiguration &, const MultiVersion::Version &) {} void shutdown() {} }; diff --git a/src/Coordination/KeeperStateMachine.cpp b/src/Coordination/KeeperStateMachine.cpp index ee5bfa48357..dde8b30bf79 100644 --- a/src/Coordination/KeeperStateMachine.cpp +++ b/src/Coordination/KeeperStateMachine.cpp @@ -132,7 +132,7 @@ void assertDigest( "Digest for nodes is not matching after {} request of type '{}'.\nExpected digest - {}, actual digest - {} (digest " "{}). Keeper will terminate to avoid inconsistencies.\nExtra information about the request:\n{}", committing ? "committing" : "preprocessing", - request.getOpNum(), + Coordination::toString(request.getOpNum()), first.value, second.value, first.version, diff --git a/src/Coordination/KeeperStorage.cpp b/src/Coordination/KeeperStorage.cpp index fb472201aec..72921c4ac1d 100644 --- a/src/Coordination/KeeperStorage.cpp +++ b/src/Coordination/KeeperStorage.cpp @@ -1704,7 +1704,7 @@ struct KeeperStorageMultiRequestProcessor final : public KeeperStorageRequestPro break; default: throw DB::Exception( - ErrorCodes::BAD_ARGUMENTS, "Illegal command as part of multi ZooKeeper request {}", sub_zk_request->getOpNum()); + ErrorCodes::BAD_ARGUMENTS, "Illegal command as part of multi ZooKeeper request {}", Coordination::toString(sub_zk_request->getOpNum())); } } diff --git a/src/Coordination/TinyContext.cpp b/src/Coordination/TinyContext.cpp index 967e6b23d70..47b0a48dcda 100644 --- a/src/Coordination/TinyContext.cpp +++ b/src/Coordination/TinyContext.cpp @@ -36,7 +36,12 @@ void TinyContext::initializeKeeperDispatcher([[maybe_unused]] bool start_async) if (config_ref.has("keeper_server")) { keeper_dispatcher = std::make_shared(); - keeper_dispatcher->initialize(config_ref, true, start_async); + + MultiVersion::Version macros; + + if (config_ref.has("macros")) + macros = std::make_unique(config_ref, "macros", &Poco::Logger::get("TinyContext")); + keeper_dispatcher->initialize(config_ref, true, start_async, macros); } } @@ -71,7 +76,12 @@ void TinyContext::updateKeeperConfiguration([[maybe_unused]] const Poco::Util::A if (!keeper_dispatcher) return; - keeper_dispatcher->updateConfiguration(config_); + MultiVersion::Version macros; + + if (config_.has("macros")) + macros = std::make_unique(config_, "macros", &Poco::Logger::get("TinyContext")); + + keeper_dispatcher->updateConfiguration(config_, macros); } } diff --git a/src/Core/IResolvedFunction.h b/src/Core/IResolvedFunction.h new file mode 100644 index 00000000000..64c69f597c7 --- /dev/null +++ b/src/Core/IResolvedFunction.h @@ -0,0 +1,29 @@ +#pragma once + +#include +#include + +namespace DB +{ +class IDataType; + +using DataTypePtr = std::shared_ptr; +using DataTypes = std::vector; + +struct Array; + +class IResolvedFunction +{ +public: + virtual const DataTypePtr & getResultType() const = 0; + + virtual const DataTypes & getArgumentTypes() const = 0; + + virtual const Array & getParameters() const = 0; + + virtual ~IResolvedFunction() = default; +}; + +using IResolvedFunctionPtr = std::shared_ptr; + +} diff --git a/src/Core/Settings.cpp b/src/Core/Settings.cpp index 7bac3f04fc6..fa1a10d22f2 100644 --- a/src/Core/Settings.cpp +++ b/src/Core/Settings.cpp @@ -148,31 +148,30 @@ std::vector Settings::getAllRegisteredNames() const void Settings::set(std::string_view name, const Field & value) { - BaseSettings::set(name, value); - if (name == "compatibility") - applyCompatibilitySetting(); + applyCompatibilitySetting(value.get()); /// If we change setting that was changed by compatibility setting before /// we should remove it from settings_changed_by_compatibility_setting, /// otherwise the next time we will change compatibility setting /// this setting will be changed too (and we don't want it). else if (settings_changed_by_compatibility_setting.contains(name)) settings_changed_by_compatibility_setting.erase(name); + + BaseSettings::set(name, value); } -void Settings::applyCompatibilitySetting() +void Settings::applyCompatibilitySetting(const String & compatibility_value) { /// First, revert all changes applied by previous compatibility setting for (const auto & setting_name : settings_changed_by_compatibility_setting) resetToDefault(setting_name); settings_changed_by_compatibility_setting.clear(); - String compatibility = getString("compatibility"); /// If setting value is empty, we don't need to change settings - if (compatibility.empty()) + if (compatibility_value.empty()) return; - ClickHouseVersion version(compatibility); + ClickHouseVersion version(compatibility_value); /// Iterate through ClickHouse version in descending order and apply reversed /// changes for each version that is higher that version from compatibility setting for (auto it = settings_changes_history.rbegin(); it != settings_changes_history.rend(); ++it) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 450304b2abd..2357948a1f6 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -565,6 +565,7 @@ static constexpr UInt64 operator""_GiB(unsigned long long value) M(UInt64, max_distributed_depth, 5, "Maximum distributed query depth", 0) \ M(Bool, database_replicated_always_detach_permanently, false, "Execute DETACH TABLE as DETACH TABLE PERMANENTLY if database engine is Replicated", 0) \ M(Bool, database_replicated_allow_only_replicated_engine, false, "Allow to create only Replicated tables in database with engine Replicated", 0) \ + M(Bool, database_replicated_allow_replicated_engine_arguments, true, "Allow to create only Replicated tables in database with engine Replicated with explicit arguments", 0) \ M(DistributedDDLOutputMode, distributed_ddl_output_mode, DistributedDDLOutputMode::THROW, "Format of distributed DDL query result", 0) \ M(UInt64, distributed_ddl_entry_format_version, 3, "Compatibility version of distributed DDL (ON CLUSTER) queries", 0) \ \ @@ -770,7 +771,7 @@ static constexpr UInt64 operator""_GiB(unsigned long long value) M(Bool, input_format_json_try_infer_numbers_from_strings, true, "Try to infer numbers from string fields while schema inference", 0) \ M(Bool, input_format_json_validate_types_from_metadata, true, "For JSON/JSONCompact/JSONColumnsWithMetadata input formats this controls whether format parser should check if data types from input metadata match data types of the corresponding columns from the table", 0) \ M(Bool, input_format_json_read_numbers_as_strings, false, "Allow to parse numbers as strings in JSON input formats", 0) \ - M(Bool, input_format_json_read_objects_as_strings, false, "Allow to parse JSON objects as strings in JSON input formats", 0) \ + M(Bool, input_format_json_read_objects_as_strings, true, "Allow to parse JSON objects as strings in JSON input formats", 0) \ M(Bool, input_format_try_infer_integers, true, "Try to infer integers instead of floats while schema inference in text formats", 0) \ M(Bool, input_format_try_infer_dates, true, "Try to infer dates from string fields while schema inference in text formats", 0) \ M(Bool, input_format_try_infer_datetimes, true, "Try to infer datetimes from string fields while schema inference in text formats", 0) \ @@ -927,7 +928,7 @@ struct Settings : public BaseSettings, public IHints<2, Settings void setDefaultValue(const String & name) { resetToDefault(name); } private: - void applyCompatibilitySetting(); + void applyCompatibilitySetting(const String & compatibility); std::unordered_set settings_changed_by_compatibility_setting; }; diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h index 62b3c1b9c98..d67f1b94d5d 100644 --- a/src/Core/SettingsChangesHistory.h +++ b/src/Core/SettingsChangesHistory.h @@ -3,6 +3,7 @@ #include #include #include +#include #include #include @@ -28,7 +29,8 @@ public: for (const auto & split_element : split) { size_t component; - if (!tryParse(component, split_element)) + ReadBufferFromString buf(split_element); + if (!tryReadIntText(component, buf) || !buf.eof()) throw Exception{ErrorCodes::BAD_ARGUMENTS, "Cannot parse ClickHouse version here: {}", version}; components.push_back(component); } @@ -78,6 +80,7 @@ namespace SettingsChangesHistory /// It's used to implement `compatibility` setting (see https://github.com/ClickHouse/ClickHouse/issues/35972) static std::map settings_changes_history = { + {"23.1", {{"input_format_json_read_objects_as_strings", 0, 1, "Enable reading nested json objects as strings while object type is experimental"}}}, {"22.12", {{"max_size_to_preallocate_for_aggregation", 10'000'000, 100'000'000, "This optimizes performance"}, {"query_plan_aggregation_in_order", 0, 1, "Enable some refactoring around query plan"}, {"format_binary_max_string_size", 0, 1_GiB, "Prevent allocating large amount of memory"}}}, diff --git a/src/Daemon/BaseDaemon.cpp b/src/Daemon/BaseDaemon.cpp index 6cd952bfa83..d4e76a0cad8 100644 --- a/src/Daemon/BaseDaemon.cpp +++ b/src/Daemon/BaseDaemon.cpp @@ -4,6 +4,7 @@ #include #include +#include #include #include @@ -178,12 +179,9 @@ __attribute__((__weak__)) void collectCrashLog( class SignalListener : public Poco::Runnable { public: - enum Signals : int - { - StdTerminate = -1, - StopThread = -2, - SanitizerTrap = -3, - }; + static constexpr int StdTerminate = -1; + static constexpr int StopThread = -2; + static constexpr int SanitizerTrap = -3; explicit SignalListener(BaseDaemon & daemon_) : log(&Poco::Logger::get("BaseDaemon")) @@ -208,7 +206,7 @@ public: // Don't use strsignal here, because it's not thread-safe. LOG_TRACE(log, "Received signal {}", sig); - if (sig == Signals::StopThread) + if (sig == StopThread) { LOG_INFO(log, "Stop SignalListener thread"); break; @@ -219,7 +217,7 @@ public: BaseDaemon::instance().closeLogs(BaseDaemon::instance().logger()); LOG_INFO(log, "Opened new log file after received signal."); } - else if (sig == Signals::StdTerminate) + else if (sig == StdTerminate) { UInt32 thread_num; std::string message; @@ -306,7 +304,7 @@ private: if (auto thread_group = thread_ptr->getThreadGroup()) { - query = thread_group->one_line_query; + query = DB::toOneLineQuery(thread_group->query); } if (auto logs_queue = thread_ptr->getInternalTextLogsQueue()) @@ -604,34 +602,6 @@ void BaseDaemon::closeFDs() } } -namespace -{ -/// In debug version on Linux, increase oom score so that clickhouse is killed -/// first, instead of some service. Use a carefully chosen random score of 555: -/// the maximum is 1000, and chromium uses 300 for its tab processes. Ignore -/// whatever errors that occur, because it's just a debugging aid and we don't -/// care if it breaks. -#if defined(OS_LINUX) && !defined(NDEBUG) -void debugIncreaseOOMScore() -{ - const std::string new_score = "555"; - try - { - DB::WriteBufferFromFile buf("/proc/self/oom_score_adj"); - buf.write(new_score.c_str(), new_score.size()); - buf.close(); - } - catch (const Poco::Exception & e) - { - LOG_WARNING(&Poco::Logger::root(), "Failed to adjust OOM score: '{}'.", e.displayText()); - return; - } - LOG_INFO(&Poco::Logger::root(), "Set OOM score adjustment to {}", new_score); -} -#else -void debugIncreaseOOMScore() {} -#endif -} void BaseDaemon::initialize(Application & self) { @@ -798,7 +768,6 @@ void BaseDaemon::initialize(Application & self) initializeTerminationAndSignalProcessing(); logRevision(); - debugIncreaseOOMScore(); for (const auto & key : DB::getMultipleKeysFromConfig(config(), "", "graphite")) { @@ -909,7 +878,7 @@ void BaseDaemon::initializeTerminationAndSignalProcessing() void BaseDaemon::logRevision() const { - Poco::Logger::root().information("Starting " + std::string{VERSION_FULL} + logger().information("Starting " + std::string{VERSION_FULL} + " (revision: " + std::to_string(ClickHouseRevision::getVersionRevision()) + ", git hash: " + (git_hash.empty() ? "" : git_hash) + ", build id: " + (build_id.empty() ? "" : build_id) + ")" @@ -958,7 +927,6 @@ void BaseDaemon::handleSignal(int signal_id) std::lock_guard lock(signal_handler_mutex); { ++terminate_signals_counter; - sigint_signals_counter += signal_id == SIGINT; signal_event.notify_all(); } @@ -973,9 +941,9 @@ void BaseDaemon::onInterruptSignals(int signal_id) is_cancelled = true; LOG_INFO(&logger(), "Received termination signal ({})", strsignal(signal_id)); // NOLINT(concurrency-mt-unsafe) // it is not thread-safe but ok in this context - if (sigint_signals_counter >= 2) + if (terminate_signals_counter >= 2) { - LOG_INFO(&logger(), "Received second signal Interrupt. Immediately terminate."); + LOG_INFO(&logger(), "This is the second termination signal. Immediately terminate."); call_default_signal_handler(signal_id); /// If the above did not help. _exit(128 + signal_id); diff --git a/src/Daemon/BaseDaemon.h b/src/Daemon/BaseDaemon.h index cb4aa0c2da6..d28f9403c16 100644 --- a/src/Daemon/BaseDaemon.h +++ b/src/Daemon/BaseDaemon.h @@ -162,7 +162,6 @@ protected: std::mutex signal_handler_mutex; std::condition_variable signal_event; std::atomic_size_t terminate_signals_counter{0}; - std::atomic_size_t sigint_signals_counter{0}; std::string config_path; DB::ConfigProcessor::LoadedConfig loaded_config; diff --git a/src/Daemon/CMakeLists.txt b/src/Daemon/CMakeLists.txt index e1a9f09003c..35ea2122dbb 100644 --- a/src/Daemon/CMakeLists.txt +++ b/src/Daemon/CMakeLists.txt @@ -7,11 +7,7 @@ add_library (daemon GitHash.generated.cpp ) -if (OS_DARWIN AND NOT USE_STATIC_LIBRARIES) - target_link_libraries (daemon PUBLIC -Wl,-undefined,dynamic_lookup) -endif() - -target_link_libraries (daemon PUBLIC loggers common PRIVATE clickhouse_common_io clickhouse_common_config) +target_link_libraries (daemon PUBLIC loggers common PRIVATE clickhouse_parsers clickhouse_common_io clickhouse_common_config) if (TARGET ch_contrib::sentry) target_link_libraries (daemon PRIVATE ch_contrib::sentry dbms) diff --git a/src/DataTypes/DataTypeAggregateFunction.cpp b/src/DataTypes/DataTypeAggregateFunction.cpp index 7056fcff42f..ab6d024f5d8 100644 --- a/src/DataTypes/DataTypeAggregateFunction.cpp +++ b/src/DataTypes/DataTypeAggregateFunction.cpp @@ -67,7 +67,7 @@ String DataTypeAggregateFunction::getNameImpl(bool with_version) const if (!parameters.empty()) { stream << '('; - for (size_t i = 0; i < parameters.size(); ++i) + for (size_t i = 0, size = parameters.size(); i < size; ++i) { if (i) stream << ", "; diff --git a/src/DataTypes/DataTypeAggregateFunction.h b/src/DataTypes/DataTypeAggregateFunction.h index 4a92e6c5703..2d712d9c686 100644 --- a/src/DataTypes/DataTypeAggregateFunction.h +++ b/src/DataTypes/DataTypeAggregateFunction.h @@ -30,9 +30,9 @@ private: public: static constexpr bool is_parametric = true; - DataTypeAggregateFunction(const AggregateFunctionPtr & function_, const DataTypes & argument_types_, + DataTypeAggregateFunction(AggregateFunctionPtr function_, const DataTypes & argument_types_, const Array & parameters_, std::optional version_ = std::nullopt) - : function(function_) + : function(std::move(function_)) , argument_types(argument_types_) , parameters(parameters_) , version(version_) @@ -51,7 +51,7 @@ public: bool canBeInsideNullable() const override { return false; } - DataTypePtr getReturnType() const { return function->getReturnType(); } + DataTypePtr getReturnType() const { return function->getResultType(); } DataTypePtr getReturnTypeToPredict() const { return function->getReturnTypeToPredict(); } DataTypes getArgumentsDataTypes() const { return argument_types; } diff --git a/src/DataTypes/DataTypeCustomSimpleAggregateFunction.cpp b/src/DataTypes/DataTypeCustomSimpleAggregateFunction.cpp index efcab212094..e963b4b62e8 100644 --- a/src/DataTypes/DataTypeCustomSimpleAggregateFunction.cpp +++ b/src/DataTypes/DataTypeCustomSimpleAggregateFunction.cpp @@ -30,10 +30,26 @@ namespace ErrorCodes void DataTypeCustomSimpleAggregateFunction::checkSupportedFunctions(const AggregateFunctionPtr & function) { /// TODO Make it sane. - static const std::vector supported_functions{"any", "anyLast", "min", - "max", "sum", "sumWithOverflow", "groupBitAnd", "groupBitOr", "groupBitXor", - "sumMap", "minMap", "maxMap", "groupArrayArray", "groupUniqArrayArray", - "sumMappedArrays", "minMappedArrays", "maxMappedArrays"}; + static const std::vector supported_functions{ + "any", + "anyLast", + "min", + "max", + "sum", + "sumWithOverflow", + "groupBitAnd", + "groupBitOr", + "groupBitXor", + "sumMap", + "minMap", + "maxMap", + "groupArrayArray", + "groupArrayLastArray", + "groupUniqArrayArray", + "sumMappedArrays", + "minMappedArrays", + "maxMappedArrays", + }; // check function if (std::find(std::begin(supported_functions), std::end(supported_functions), function->getName()) == std::end(supported_functions)) @@ -131,9 +147,9 @@ static std::pair create(const ASTPtr & argum DataTypePtr storage_type = DataTypeFactory::instance().get(argument_types[0]->getName()); - if (!function->getReturnType()->equals(*removeLowCardinality(storage_type))) + if (!function->getResultType()->equals(*removeLowCardinality(storage_type))) { - throw Exception("Incompatible data types between aggregate function '" + function->getName() + "' which returns " + function->getReturnType()->getName() + " and column storage type " + storage_type->getName(), + throw Exception("Incompatible data types between aggregate function '" + function->getName() + "' which returns " + function->getResultType()->getName() + " and column storage type " + storage_type->getName(), ErrorCodes::BAD_ARGUMENTS); } diff --git a/src/DataTypes/DataTypeFixedString.h b/src/DataTypes/DataTypeFixedString.h index 7c089866b23..437e3f9d3f5 100644 --- a/src/DataTypes/DataTypeFixedString.h +++ b/src/DataTypes/DataTypeFixedString.h @@ -5,6 +5,7 @@ #include #define MAX_FIXEDSTRING_SIZE 0xFFFFFF +#define MAX_FIXEDSTRING_SIZE_WITHOUT_SUSPICIOUS 256 namespace DB diff --git a/src/DataTypes/Serializations/SerializationBool.cpp b/src/DataTypes/Serializations/SerializationBool.cpp index 1efacaaecc5..2a63b24c837 100644 --- a/src/DataTypes/Serializations/SerializationBool.cpp +++ b/src/DataTypes/Serializations/SerializationBool.cpp @@ -259,7 +259,7 @@ void SerializationBool::deserializeTextCSV(IColumn & column, ReadBuffer & istr, if (istr.eof()) throw Exception("Expected boolean value but get EOF.", ErrorCodes::CANNOT_PARSE_BOOL); - deserializeImpl(column, istr, settings, [&](ReadBuffer & buf){ return buf.eof() || *buf.position() == settings.csv.delimiter || *buf.position() == '\n'; }); + deserializeImpl(column, istr, settings, [&](ReadBuffer & buf){ return buf.eof() || *buf.position() == settings.csv.delimiter || *buf.position() == '\n' || *buf.position() == '\r'; }); } void SerializationBool::serializeTextRaw(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const diff --git a/src/DataTypes/transformTypesRecursively.cpp b/src/DataTypes/transformTypesRecursively.cpp index da3af0beee7..fd97254c7ef 100644 --- a/src/DataTypes/transformTypesRecursively.cpp +++ b/src/DataTypes/transformTypesRecursively.cpp @@ -8,12 +8,58 @@ namespace DB { -void transformTypesRecursively(DataTypes & types, std::function transform_simple_types, std::function transform_complex_types) +TypeIndexesSet getTypesIndexes(const DataTypes & types) { TypeIndexesSet type_indexes; for (const auto & type : types) type_indexes.insert(type->getTypeId()); + return type_indexes; +} +void transformTypesRecursively(DataTypes & types, std::function transform_simple_types, std::function transform_complex_types) +{ + TypeIndexesSet type_indexes = getTypesIndexes(types); + + /// Nullable + if (type_indexes.contains(TypeIndex::Nullable)) + { + std::vector is_nullable; + is_nullable.reserve(types.size()); + DataTypes nested_types; + nested_types.reserve(types.size()); + for (const auto & type : types) + { + if (const DataTypeNullable * type_nullable = typeid_cast(type.get())) + { + is_nullable.push_back(1); + nested_types.push_back(type_nullable->getNestedType()); + } + else + { + is_nullable.push_back(0); + nested_types.push_back(type); + } + } + + transformTypesRecursively(nested_types, transform_simple_types, transform_complex_types); + for (size_t i = 0; i != types.size(); ++i) + { + /// Type could be changed so it cannot be inside Nullable anymore. + if (is_nullable[i] && nested_types[i]->canBeInsideNullable()) + types[i] = makeNullable(nested_types[i]); + else + types[i] = nested_types[i]; + } + + if (transform_complex_types) + { + /// Some types could be changed. + type_indexes = getTypesIndexes(types); + transform_complex_types(types, type_indexes); + } + + return; + } /// Arrays if (type_indexes.contains(TypeIndex::Array)) @@ -114,42 +160,6 @@ void transformTypesRecursively(DataTypes & types, std::function is_nullable; - is_nullable.reserve(types.size()); - DataTypes nested_types; - nested_types.reserve(types.size()); - for (const auto & type : types) - { - if (const DataTypeNullable * type_nullable = typeid_cast(type.get())) - { - is_nullable.push_back(1); - nested_types.push_back(type_nullable->getNestedType()); - } - else - { - is_nullable.push_back(0); - nested_types.push_back(type); - } - } - - transformTypesRecursively(nested_types, transform_simple_types, transform_complex_types); - for (size_t i = 0; i != types.size(); ++i) - { - if (is_nullable[i]) - types[i] = makeNullable(nested_types[i]); - else - types[i] = nested_types[i]; - } - - if (transform_complex_types) - transform_complex_types(types, type_indexes); - - return; - } - transform_simple_types(types, type_indexes); } diff --git a/src/Databases/DatabaseOnDisk.cpp b/src/Databases/DatabaseOnDisk.cpp index 197f1a0543b..b1e265c084c 100644 --- a/src/Databases/DatabaseOnDisk.cpp +++ b/src/Databases/DatabaseOnDisk.cpp @@ -539,11 +539,19 @@ String DatabaseOnDisk::getObjectMetadataPath(const String & object_name) const time_t DatabaseOnDisk::getObjectMetadataModificationTime(const String & object_name) const { String table_metadata_path = getObjectMetadataPath(object_name); - - if (fs::exists(table_metadata_path)) + try + { return FS::getModificationTime(table_metadata_path); - else - return static_cast(0); + } + catch (const fs::filesystem_error & e) + { + if (e.code() == std::errc::no_such_file_or_directory) + { + return static_cast(0); + } + else + throw; + } } void DatabaseOnDisk::iterateMetadataFiles(ContextPtr local_context, const IteratingFunction & process_metadata_file) const diff --git a/src/Databases/DatabaseReplicated.cpp b/src/Databases/DatabaseReplicated.cpp index 2a9f06e77fc..a909c2e0d41 100644 --- a/src/Databases/DatabaseReplicated.cpp +++ b/src/Databases/DatabaseReplicated.cpp @@ -1,36 +1,39 @@ #include -#include -#include -#include -#include -#include -#include -#include -#include + +#include + +#include +#include +#include +#include #include +#include #include #include #include #include +#include #include -#include -#include +#include +#include #include -#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include #include #include #include -#include -#include -#include -#include #include -#include -#include -#include -#include - -#include +#include +#include +#include namespace DB { @@ -401,7 +404,7 @@ void DatabaseReplicated::createEmptyLogEntry(const ZooKeeperPtr & current_zookee bool DatabaseReplicated::waitForReplicaToProcessAllEntries(UInt64 timeout_ms) { - if (!ddl_worker) + if (!ddl_worker || is_probably_dropped) return false; return ddl_worker->waitForReplicaToProcessAllEntries(timeout_ms); } @@ -473,9 +476,10 @@ void DatabaseReplicated::startupTables(ThreadPool & thread_pool, LoadingStrictne chassert(!TSA_SUPPRESS_WARNING_FOR_READ(tables_metadata_digest)); TSA_SUPPRESS_WARNING_FOR_WRITE(tables_metadata_digest) = digest; - ddl_worker = std::make_unique(this, getContext()); if (is_probably_dropped) return; + + ddl_worker = std::make_unique(this, getContext()); ddl_worker->startup(); } @@ -491,7 +495,7 @@ bool DatabaseReplicated::checkDigestValid(const ContextPtr & local_context, bool LOG_TEST(log, "Current in-memory metadata digest: {}", tables_metadata_digest); /// Database is probably being dropped - if (!local_context->getZooKeeperMetadataTransaction() && !ddl_worker->isCurrentlyActive()) + if (!local_context->getZooKeeperMetadataTransaction() && (!ddl_worker || !ddl_worker->isCurrentlyActive())) return true; UInt64 local_digest = 0; @@ -584,7 +588,14 @@ void DatabaseReplicated::checkQueryValid(const ASTPtr & query, ContextPtr query_ bool enable_functional_tests_helper = getContext()->getConfigRef().has("_functional_tests_helper_database_replicated_replace_args_macros"); if (!enable_functional_tests_helper) - LOG_WARNING(log, "It's not recommended to explicitly specify zookeeper_path and replica_name in ReplicatedMergeTree arguments"); + { + if (query_context->getSettingsRef().database_replicated_allow_replicated_engine_arguments) + LOG_WARNING(log, "It's not recommended to explicitly specify zookeeper_path and replica_name in ReplicatedMergeTree arguments"); + else + throw Exception(ErrorCodes::INCORRECT_QUERY, + "It's not allowed to specify explicit zookeeper_path and replica_name for ReplicatedMergeTree arguments in Replicated database. " + "If you really want to specify them explicitly, enable setting database_replicated_allow_replicated_engine_arguments."); + } if (maybe_shard_macros && maybe_replica_macros) return; @@ -897,31 +908,37 @@ void DatabaseReplicated::recoverLostReplica(const ZooKeeperPtr & current_zookeep for (const auto & id : dropped_tables) DatabaseCatalog::instance().waitTableFinallyDropped(id); - /// FIXME: Use proper dependency calculation instead of just moving MV to the end - using NameToMetadata = std::pair; - std::vector table_name_to_metadata_sorted; - table_name_to_metadata_sorted.reserve(table_name_to_metadata.size()); - std::move(table_name_to_metadata.begin(), table_name_to_metadata.end(), std::back_inserter(table_name_to_metadata_sorted)); - std::sort(table_name_to_metadata_sorted.begin(), table_name_to_metadata_sorted.end(), [](const NameToMetadata & lhs, const NameToMetadata & rhs) -> bool - { - const bool is_materialized_view_lhs = lhs.second.find("MATERIALIZED VIEW") != std::string::npos; - const bool is_materialized_view_rhs = rhs.second.find("MATERIALIZED VIEW") != std::string::npos; - return is_materialized_view_lhs < is_materialized_view_rhs; - }); - for (const auto & name_and_meta : table_name_to_metadata_sorted) + /// Create all needed tables in a proper order + TablesDependencyGraph tables_dependencies("DatabaseReplicated (" + getDatabaseName() + ")"); + for (const auto & [table_name, create_table_query] : table_name_to_metadata) { - if (isTableExist(name_and_meta.first, getContext())) + /// Note that table_name could contain a dot inside (e.g. .inner.1234-1234-1234-1234) + /// And QualifiedTableName::parseFromString doesn't handle this. + auto qualified_name = QualifiedTableName{.database = getDatabaseName(), .table = table_name}; + auto query_ast = parseQueryFromMetadataInZooKeeper(table_name, create_table_query); + tables_dependencies.addDependencies(qualified_name, getDependenciesFromCreateQuery(getContext(), qualified_name, query_ast)); + } + + tables_dependencies.checkNoCyclicDependencies(); + auto tables_to_create = tables_dependencies.getTablesSortedByDependency(); + + for (const auto & table_id : tables_to_create) + { + auto table_name = table_id.getTableName(); + auto create_query_string = table_name_to_metadata[table_name]; + if (isTableExist(table_name, getContext())) { - assert(name_and_meta.second == readMetadataFile(name_and_meta.first)); + assert(create_query_string == readMetadataFile(table_name)); continue; } - auto query_ast = parseQueryFromMetadataInZooKeeper(name_and_meta.first, name_and_meta.second); + auto query_ast = parseQueryFromMetadataInZooKeeper(table_name, create_query_string); LOG_INFO(log, "Executing {}", serializeAST(*query_ast)); auto create_query_context = make_query_context(); InterpreterCreateQuery(query_ast, create_query_context).execute(); } + LOG_INFO(log, "All tables are created successfully"); if (max_log_ptr_at_creation != 0) { @@ -1012,8 +1029,51 @@ ASTPtr DatabaseReplicated::parseQueryFromMetadataInZooKeeper(const String & node return ast; } +void DatabaseReplicated::dropReplica( + DatabaseReplicated * database, const String & database_zookeeper_path, const String & full_replica_name) +{ + assert(!database || database_zookeeper_path == database->zookeeper_path); + + if (full_replica_name.find('/') != std::string::npos) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Invalid replica name: {}", full_replica_name); + + auto zookeeper = Context::getGlobalContextInstance()->getZooKeeper(); + + String database_mark = zookeeper->get(database_zookeeper_path); + if (database_mark != REPLICATED_DATABASE_MARK) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Path {} does not look like a path of Replicated database", database_zookeeper_path); + + String database_replica_path = fs::path(database_zookeeper_path) / "replicas" / full_replica_name; + if (!zookeeper->exists(database_replica_path)) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Replica {} does not exist (database path: {})", + full_replica_name, database_zookeeper_path); + + if (zookeeper->exists(database_replica_path + "/active")) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Replica {} is active, cannot drop it (database path: {})", + full_replica_name, database_zookeeper_path); + + zookeeper->set(database_replica_path, DROPPED_MARK, -1); + /// Notify other replicas that cluster configuration was changed (if we can) + if (database) + database->createEmptyLogEntry(zookeeper); + + zookeeper->tryRemoveRecursive(database_replica_path); + if (zookeeper->tryRemove(database_zookeeper_path + "/replicas") == Coordination::Error::ZOK) + { + /// It was the last replica, remove all metadata + zookeeper->tryRemoveRecursive(database_zookeeper_path); + } +} + void DatabaseReplicated::drop(ContextPtr context_) { + if (is_probably_dropped) + { + /// Don't need to drop anything from ZooKeeper + DatabaseAtomic::drop(context_); + return; + } + auto current_zookeeper = getZooKeeper(); current_zookeeper->set(replica_path, DROPPED_MARK, -1); createEmptyLogEntry(current_zookeeper); @@ -1031,8 +1091,6 @@ void DatabaseReplicated::drop(ContextPtr context_) void DatabaseReplicated::stopReplication() { - if (is_probably_dropped) - return; if (ddl_worker) ddl_worker->shutdown(); } @@ -1048,7 +1106,7 @@ void DatabaseReplicated::shutdown() void DatabaseReplicated::dropTable(ContextPtr local_context, const String & table_name, bool sync) { auto txn = local_context->getZooKeeperMetadataTransaction(); - assert(!ddl_worker->isCurrentlyActive() || txn || startsWith(table_name, ".inner_id.")); + assert(!ddl_worker || !ddl_worker->isCurrentlyActive() || txn || startsWith(table_name, ".inner_id.")); if (txn && txn->isInitialQuery() && !txn->isCreateOrReplaceQuery()) { String metadata_zk_path = zookeeper_path + "/metadata/" + escapeForFileName(table_name); diff --git a/src/Databases/DatabaseReplicated.h b/src/Databases/DatabaseReplicated.h index 0c9a3b77844..6a897f7322a 100644 --- a/src/Databases/DatabaseReplicated.h +++ b/src/Databases/DatabaseReplicated.h @@ -77,6 +77,8 @@ public: bool shouldReplicateQuery(const ContextPtr & query_context, const ASTPtr & query_ptr) const override; + static void dropReplica(DatabaseReplicated * database, const String & database_zookeeper_path, const String & full_replica_name); + friend struct DatabaseReplicatedTask; friend class DatabaseReplicatedDDLWorker; private: diff --git a/src/Databases/DatabaseReplicatedWorker.cpp b/src/Databases/DatabaseReplicatedWorker.cpp index 8c2983e1939..66ae5cd250c 100644 --- a/src/Databases/DatabaseReplicatedWorker.cpp +++ b/src/Databases/DatabaseReplicatedWorker.cpp @@ -2,6 +2,7 @@ #include #include #include +#include #include namespace fs = std::filesystem; @@ -36,6 +37,13 @@ bool DatabaseReplicatedDDLWorker::initializeMainThread() auto zookeeper = getAndSetZooKeeper(); if (database->is_readonly) database->tryConnectToZooKeeperAndInitDatabase(LoadingStrictnessLevel::ATTACH); + if (database->is_probably_dropped) + { + /// The flag was set in tryConnectToZooKeeperAndInitDatabase + LOG_WARNING(log, "Exiting main thread, because the database was probably dropped"); + /// NOTE It will not stop cleanup thread until DDLWorker::shutdown() call (cleanup thread will just do nothing) + break; + } initializeReplication(); initialized = true; return true; @@ -62,6 +70,16 @@ void DatabaseReplicatedDDLWorker::initializeReplication() /// Invariant: replica is lost if it's log_ptr value is less then max_log_ptr - logs_to_keep. auto zookeeper = getAndSetZooKeeper(); + + /// Create "active" node (remove previous one if necessary) + String active_path = fs::path(database->replica_path) / "active"; + String active_id = toString(ServerUUID::get()); + zookeeper->handleEphemeralNodeExistence(active_path, active_id); + zookeeper->create(active_path, active_id, zkutil::CreateMode::Ephemeral); + active_node_holder.reset(); + active_node_holder_zookeeper = zookeeper; + active_node_holder = zkutil::EphemeralNodeHolder::existing(active_path, *active_node_holder_zookeeper); + String log_ptr_str = zookeeper->get(database->replica_path + "/log_ptr"); UInt32 our_log_ptr = parse(log_ptr_str); UInt32 max_log_ptr = parse(zookeeper->get(database->zookeeper_path + "/max_log_ptr")); diff --git a/src/Databases/DatabaseReplicatedWorker.h b/src/Databases/DatabaseReplicatedWorker.h index 638b177460a..41edf2221b8 100644 --- a/src/Databases/DatabaseReplicatedWorker.h +++ b/src/Databases/DatabaseReplicatedWorker.h @@ -1,5 +1,6 @@ #pragma once #include +#include namespace DB { @@ -49,6 +50,12 @@ private: String current_task; std::atomic logs_to_keep = std::numeric_limits::max(); + + + /// EphemeralNodeHolder has reference to ZooKeeper, it may become dangling + ZooKeeperPtr active_node_holder_zookeeper; + /// It will remove "active" node when database is detached + zkutil::EphemeralNodeHolderPtr active_node_holder; }; } diff --git a/src/Dictionaries/MongoDBDictionarySource.cpp b/src/Dictionaries/MongoDBDictionarySource.cpp index 9c751d5ce97..c6b6a01d241 100644 --- a/src/Dictionaries/MongoDBDictionarySource.cpp +++ b/src/Dictionaries/MongoDBDictionarySource.cpp @@ -145,13 +145,9 @@ MongoDBDictionarySource::MongoDBDictionarySource( connection->connect(host, port); if (!user.empty()) { -#if POCO_VERSION >= 0x01070800 Poco::MongoDB::Database poco_db(db); if (!poco_db.authenticate(*connection, user, password, method.empty() ? Poco::MongoDB::Database::AUTH_SCRAM_SHA1 : method)) throw Exception(ErrorCodes::MONGODB_CANNOT_AUTHENTICATE, "Cannot authenticate in MongoDB, incorrect user or password"); -#else - authenticate(*connection, db, user, password); -#endif } } } diff --git a/src/Dictionaries/NullDictionarySource.cpp b/src/Dictionaries/NullDictionarySource.cpp new file mode 100644 index 00000000000..45dcc77f93d --- /dev/null +++ b/src/Dictionaries/NullDictionarySource.cpp @@ -0,0 +1,48 @@ +#include "NullDictionarySource.h" +#include +#include +#include +#include "DictionarySourceFactory.h" +#include "DictionarySourceHelpers.h" +#include "DictionaryStructure.h" +#include "registerDictionaries.h" + + +namespace DB +{ +NullDictionarySource::NullDictionarySource(Block & sample_block_) : sample_block(sample_block_) +{ +} + +NullDictionarySource::NullDictionarySource(const NullDictionarySource & other) : sample_block(other.sample_block) +{ +} + +QueryPipeline NullDictionarySource::loadAll() +{ + LOG_TRACE(&Poco::Logger::get("NullDictionarySource"), "loadAll {}", toString()); + return QueryPipeline(std::make_shared(sample_block)); +} + + +std::string NullDictionarySource::toString() const +{ + return "Null"; +} + + +void registerDictionarySourceNull(DictionarySourceFactory & factory) +{ + auto create_table_source + = [=](const DictionaryStructure & /* dict_struct */, + const Poco::Util::AbstractConfiguration & /* config */, + const std::string & /* config_prefix */, + Block & sample_block, + ContextPtr /* global_context */, + const std::string & /* default_database */, + bool /* created_from_ddl*/) -> DictionarySourcePtr { return std::make_unique(sample_block); }; + + factory.registerSource("null", create_table_source); +} + +} diff --git a/src/Dictionaries/NullDictionarySource.h b/src/Dictionaries/NullDictionarySource.h new file mode 100644 index 00000000000..7eb02055e3a --- /dev/null +++ b/src/Dictionaries/NullDictionarySource.h @@ -0,0 +1,53 @@ +#pragma once + +#include +#include "IDictionarySource.h" + +namespace DB +{ +namespace ErrorCodes +{ + extern const int NOT_IMPLEMENTED; +} + +/// Allows creating empty dictionary +class NullDictionarySource final : public IDictionarySource +{ +public: + NullDictionarySource(Block & sample_block_); + + NullDictionarySource(const NullDictionarySource & other); + + QueryPipeline loadAll() override; + + QueryPipeline loadUpdatedAll() override + { + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method loadUpdatedAll is unsupported for NullDictionarySource"); + } + + QueryPipeline loadIds(const std::vector & /*ids*/) override + { + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method loadIds is unsupported for NullDictionarySource"); + } + + QueryPipeline loadKeys(const Columns & /*key_columns*/, const std::vector & /*requested_rows*/) override + { + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method loadKeys is unsupported for NullDictionarySource"); + } + + bool isModified() const override { return false; } + + bool supportsSelectiveLoad() const override { return false; } + + ///Not supported for NullDictionarySource + bool hasUpdateField() const override { return false; } + + DictionarySourcePtr clone() const override { return std::make_shared(*this); } + + std::string toString() const override; + +private: + Block sample_block; +}; + +} diff --git a/src/Dictionaries/RangeHashedDictionary.cpp b/src/Dictionaries/RangeHashedDictionary.cpp index 4ebef1c6737..95bcd1076d5 100644 --- a/src/Dictionaries/RangeHashedDictionary.cpp +++ b/src/Dictionaries/RangeHashedDictionary.cpp @@ -15,7 +15,6 @@ #include #include -#include #include @@ -1017,91 +1016,7 @@ Pipe RangeHashedDictionary::read(const Names & column_names return result; } -template -static DictionaryPtr createRangeHashedDictionary(const std::string & full_name, - const DictionaryStructure & dict_struct, - const Poco::Util::AbstractConfiguration & config, - const std::string & config_prefix, - DictionarySourcePtr source_ptr) -{ - static constexpr auto layout_name = dictionary_key_type == DictionaryKeyType::Simple ? "range_hashed" : "complex_key_range_hashed"; - - if constexpr (dictionary_key_type == DictionaryKeyType::Simple) - { - if (dict_struct.key) - throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "'key' is not supported for dictionary of layout 'range_hashed'"); - } - else - { - if (dict_struct.id) - throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "'id' is not supported for dictionary of layout 'complex_key_range_hashed'"); - } - - if (!dict_struct.range_min || !dict_struct.range_max) - throw Exception( - ErrorCodes::BAD_ARGUMENTS, - "{}: dictionary of layout '{}' requires .structure.range_min and .structure.range_max", - full_name, - layout_name); - - const auto dict_id = StorageID::fromDictionaryConfig(config, config_prefix); - const DictionaryLifetime dict_lifetime{config, config_prefix + ".lifetime"}; - const bool require_nonempty = config.getBool(config_prefix + ".require_nonempty", false); - - String dictionary_layout_prefix = config_prefix + ".layout." + layout_name; - const bool convert_null_range_bound_to_open = config.getBool(dictionary_layout_prefix + ".convert_null_range_bound_to_open", true); - String range_lookup_strategy = config.getString(dictionary_layout_prefix + ".range_lookup_strategy", "min"); - RangeHashedDictionaryLookupStrategy lookup_strategy = RangeHashedDictionaryLookupStrategy::min; - - if (range_lookup_strategy == "min") - lookup_strategy = RangeHashedDictionaryLookupStrategy::min; - else if (range_lookup_strategy == "max") - lookup_strategy = RangeHashedDictionaryLookupStrategy::max; - - RangeHashedDictionaryConfiguration configuration - { - .convert_null_range_bound_to_open = convert_null_range_bound_to_open, - .lookup_strategy = lookup_strategy, - .require_nonempty = require_nonempty - }; - - DictionaryPtr result = std::make_unique>( - dict_id, - dict_struct, - std::move(source_ptr), - dict_lifetime, - configuration); - - return result; -} - -void registerDictionaryRangeHashed(DictionaryFactory & factory) -{ - auto create_layout_simple = [=](const std::string & full_name, - const DictionaryStructure & dict_struct, - const Poco::Util::AbstractConfiguration & config, - const std::string & config_prefix, - DictionarySourcePtr source_ptr, - ContextPtr /* global_context */, - bool /*created_from_ddl*/) -> DictionaryPtr - { - return createRangeHashedDictionary(full_name, dict_struct, config, config_prefix, std::move(source_ptr)); - }; - - factory.registerLayout("range_hashed", create_layout_simple, false); - - auto create_layout_complex = [=](const std::string & full_name, - const DictionaryStructure & dict_struct, - const Poco::Util::AbstractConfiguration & config, - const std::string & config_prefix, - DictionarySourcePtr source_ptr, - ContextPtr /* context */, - bool /*created_from_ddl*/) -> DictionaryPtr - { - return createRangeHashedDictionary(full_name, dict_struct, config, config_prefix, std::move(source_ptr)); - }; - - factory.registerLayout("complex_key_range_hashed", create_layout_complex, true); -} +template class RangeHashedDictionary; +template class RangeHashedDictionary; } diff --git a/src/Dictionaries/RangeHashedDictionary.h b/src/Dictionaries/RangeHashedDictionary.h index 78d62e9d7de..257bfcd528c 100644 --- a/src/Dictionaries/RangeHashedDictionary.h +++ b/src/Dictionaries/RangeHashedDictionary.h @@ -248,4 +248,7 @@ private: Arena string_arena; }; +extern template class RangeHashedDictionary; +extern template class RangeHashedDictionary; + } diff --git a/src/Dictionaries/getDictionaryConfigurationFromAST.cpp b/src/Dictionaries/getDictionaryConfigurationFromAST.cpp index 8fc00fe9345..38b513bfecd 100644 --- a/src/Dictionaries/getDictionaryConfigurationFromAST.cpp +++ b/src/Dictionaries/getDictionaryConfigurationFromAST.cpp @@ -344,11 +344,14 @@ void buildPrimaryKeyConfiguration( auto identifier_name = key_names.front(); - auto it = std::find_if(children.begin(), children.end(), [&](const ASTPtr & node) - { - const ASTDictionaryAttributeDeclaration * dict_attr = node->as(); - return dict_attr->name == identifier_name; - }); + const auto * it = std::find_if( + children.begin(), + children.end(), + [&](const ASTPtr & node) + { + const ASTDictionaryAttributeDeclaration * dict_attr = node->as(); + return dict_attr->name == identifier_name; + }); if (it == children.end()) { diff --git a/src/Dictionaries/registerDictionaries.cpp b/src/Dictionaries/registerDictionaries.cpp index 4ade5d88bd4..f0526f4ce37 100644 --- a/src/Dictionaries/registerDictionaries.cpp +++ b/src/Dictionaries/registerDictionaries.cpp @@ -6,6 +6,7 @@ namespace DB class DictionarySourceFactory; +void registerDictionarySourceNull(DictionarySourceFactory & factory); void registerDictionarySourceFile(DictionarySourceFactory & source_factory); void registerDictionarySourceMysql(DictionarySourceFactory & source_factory); void registerDictionarySourceClickHouse(DictionarySourceFactory & source_factory); @@ -36,6 +37,7 @@ void registerDictionaries() { { auto & source_factory = DictionarySourceFactory::instance(); + registerDictionarySourceNull(source_factory); registerDictionarySourceFile(source_factory); registerDictionarySourceMysql(source_factory); registerDictionarySourceClickHouse(source_factory); diff --git a/src/Dictionaries/registerRangeHashedDictionary.cpp b/src/Dictionaries/registerRangeHashedDictionary.cpp new file mode 100644 index 00000000000..93784a0709e --- /dev/null +++ b/src/Dictionaries/registerRangeHashedDictionary.cpp @@ -0,0 +1,101 @@ +#include "RangeHashedDictionary.h" +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int UNSUPPORTED_METHOD; + extern const int BAD_ARGUMENTS; +} + +template +static DictionaryPtr createRangeHashedDictionary(const std::string & full_name, + const DictionaryStructure & dict_struct, + const Poco::Util::AbstractConfiguration & config, + const std::string & config_prefix, + DictionarySourcePtr source_ptr) +{ + static constexpr auto layout_name = dictionary_key_type == DictionaryKeyType::Simple ? "range_hashed" : "complex_key_range_hashed"; + + if constexpr (dictionary_key_type == DictionaryKeyType::Simple) + { + if (dict_struct.key) + throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "'key' is not supported for dictionary of layout 'range_hashed'"); + } + else + { + if (dict_struct.id) + throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "'id' is not supported for dictionary of layout 'complex_key_range_hashed'"); + } + + if (!dict_struct.range_min || !dict_struct.range_max) + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "{}: dictionary of layout '{}' requires .structure.range_min and .structure.range_max", + full_name, + layout_name); + + const auto dict_id = StorageID::fromDictionaryConfig(config, config_prefix); + const DictionaryLifetime dict_lifetime{config, config_prefix + ".lifetime"}; + const bool require_nonempty = config.getBool(config_prefix + ".require_nonempty", false); + + String dictionary_layout_prefix = config_prefix + ".layout." + layout_name; + const bool convert_null_range_bound_to_open = config.getBool(dictionary_layout_prefix + ".convert_null_range_bound_to_open", true); + String range_lookup_strategy = config.getString(dictionary_layout_prefix + ".range_lookup_strategy", "min"); + RangeHashedDictionaryLookupStrategy lookup_strategy = RangeHashedDictionaryLookupStrategy::min; + + if (range_lookup_strategy == "min") + lookup_strategy = RangeHashedDictionaryLookupStrategy::min; + else if (range_lookup_strategy == "max") + lookup_strategy = RangeHashedDictionaryLookupStrategy::max; + + RangeHashedDictionaryConfiguration configuration + { + .convert_null_range_bound_to_open = convert_null_range_bound_to_open, + .lookup_strategy = lookup_strategy, + .require_nonempty = require_nonempty + }; + + DictionaryPtr result = std::make_unique>( + dict_id, + dict_struct, + std::move(source_ptr), + dict_lifetime, + configuration); + + return result; +} + +void registerDictionaryRangeHashed(DictionaryFactory & factory) +{ + auto create_layout_simple = [=](const std::string & full_name, + const DictionaryStructure & dict_struct, + const Poco::Util::AbstractConfiguration & config, + const std::string & config_prefix, + DictionarySourcePtr source_ptr, + ContextPtr /* global_context */, + bool /*created_from_ddl*/) -> DictionaryPtr + { + return createRangeHashedDictionary(full_name, dict_struct, config, config_prefix, std::move(source_ptr)); + }; + + factory.registerLayout("range_hashed", create_layout_simple, false); + + auto create_layout_complex = [=](const std::string & full_name, + const DictionaryStructure & dict_struct, + const Poco::Util::AbstractConfiguration & config, + const std::string & config_prefix, + DictionarySourcePtr source_ptr, + ContextPtr /* context */, + bool /*created_from_ddl*/) -> DictionaryPtr + { + return createRangeHashedDictionary(full_name, dict_struct, config, config_prefix, std::move(source_ptr)); + }; + + factory.registerLayout("complex_key_range_hashed", create_layout_complex, true); +} + +} diff --git a/src/Disks/DiskDecorator.cpp b/src/Disks/DiskDecorator.cpp deleted file mode 100644 index f9017446dda..00000000000 --- a/src/Disks/DiskDecorator.cpp +++ /dev/null @@ -1,259 +0,0 @@ -#include "DiskDecorator.h" -#include -#include - -namespace DB -{ - -DiskDecorator::DiskDecorator(const DiskPtr & delegate_) - : IDisk(/* name_= */ "") - , delegate(delegate_) -{ -} - -DiskTransactionPtr DiskDecorator::createTransaction() -{ - return delegate->createTransaction(); -} - -const String & DiskDecorator::getName() const -{ - return delegate->getName(); -} - -ReservationPtr DiskDecorator::reserve(UInt64 bytes) -{ - return delegate->reserve(bytes); -} - -const String & DiskDecorator::getPath() const -{ - return delegate->getPath(); -} - -UInt64 DiskDecorator::getTotalSpace() const -{ - return delegate->getTotalSpace(); -} - -UInt64 DiskDecorator::getAvailableSpace() const -{ - return delegate->getAvailableSpace(); -} - -UInt64 DiskDecorator::getUnreservedSpace() const -{ - return delegate->getUnreservedSpace(); -} - -UInt64 DiskDecorator::getKeepingFreeSpace() const -{ - return delegate->getKeepingFreeSpace(); -} - -bool DiskDecorator::exists(const String & path) const -{ - return delegate->exists(path); -} - -bool DiskDecorator::isFile(const String & path) const -{ - return delegate->isFile(path); -} - -bool DiskDecorator::isDirectory(const String & path) const -{ - return delegate->isDirectory(path); -} - -size_t DiskDecorator::getFileSize(const String & path) const -{ - return delegate->getFileSize(path); -} - -void DiskDecorator::createDirectory(const String & path) -{ - delegate->createDirectory(path); -} - -void DiskDecorator::createDirectories(const String & path) -{ - delegate->createDirectories(path); -} - -void DiskDecorator::clearDirectory(const String & path) -{ - delegate->clearDirectory(path); -} - -void DiskDecorator::moveDirectory(const String & from_path, const String & to_path) -{ - delegate->moveDirectory(from_path, to_path); -} - -DirectoryIteratorPtr DiskDecorator::iterateDirectory(const String & path) const -{ - return delegate->iterateDirectory(path); -} - -void DiskDecorator::createFile(const String & path) -{ - delegate->createFile(path); -} - -void DiskDecorator::moveFile(const String & from_path, const String & to_path) -{ - delegate->moveFile(from_path, to_path); -} - -void DiskDecorator::replaceFile(const String & from_path, const String & to_path) -{ - delegate->replaceFile(from_path, to_path); -} - -void DiskDecorator::copy(const String & from_path, const std::shared_ptr & to_disk, const String & to_path) -{ - delegate->copy(from_path, to_disk, to_path); -} - -void DiskDecorator::copyDirectoryContent(const String & from_dir, const std::shared_ptr & to_disk, const String & to_dir) -{ - delegate->copyDirectoryContent(from_dir, to_disk, to_dir); -} - -void DiskDecorator::listFiles(const String & path, std::vector & file_names) const -{ - delegate->listFiles(path, file_names); -} - -std::unique_ptr -DiskDecorator::readFile( - const String & path, const ReadSettings & settings, std::optional read_hint, std::optional file_size) const -{ - return delegate->readFile(path, settings, read_hint, file_size); -} - -std::unique_ptr -DiskDecorator::writeFile(const String & path, size_t buf_size, WriteMode mode, const WriteSettings & settings) -{ - return delegate->writeFile(path, buf_size, mode, settings); -} - -void DiskDecorator::removeFile(const String & path) -{ - delegate->removeFile(path); -} - -void DiskDecorator::removeFileIfExists(const String & path) -{ - delegate->removeFileIfExists(path); -} - -void DiskDecorator::removeDirectory(const String & path) -{ - delegate->removeDirectory(path); -} - -void DiskDecorator::removeRecursive(const String & path) -{ - delegate->removeRecursive(path); -} - -void DiskDecorator::removeSharedFile(const String & path, bool keep_s3) -{ - delegate->removeSharedFile(path, keep_s3); -} - -void DiskDecorator::removeSharedFileIfExists(const String & path, bool keep_s3) -{ - delegate->removeSharedFileIfExists(path, keep_s3); -} - -void DiskDecorator::removeSharedFiles(const RemoveBatchRequest & files, bool keep_all_batch_data, const NameSet & file_names_remove_metadata_only) -{ - delegate->removeSharedFiles(files, keep_all_batch_data, file_names_remove_metadata_only); -} - -void DiskDecorator::removeSharedRecursive(const String & path, bool keep_all_batch_data, const NameSet & file_names_remove_metadata_only) -{ - delegate->removeSharedRecursive(path, keep_all_batch_data, file_names_remove_metadata_only); -} - -void DiskDecorator::setLastModified(const String & path, const Poco::Timestamp & timestamp) -{ - delegate->setLastModified(path, timestamp); -} - -Poco::Timestamp DiskDecorator::getLastModified(const String & path) const -{ - return delegate->getLastModified(path); -} - -time_t DiskDecorator::getLastChanged(const String & path) const -{ - return delegate->getLastChanged(path); -} - -void DiskDecorator::setReadOnly(const String & path) -{ - delegate->setReadOnly(path); -} - -void DiskDecorator::createHardLink(const String & src_path, const String & dst_path) -{ - delegate->createHardLink(src_path, dst_path); -} - -void DiskDecorator::truncateFile(const String & path, size_t size) -{ - delegate->truncateFile(path, size); -} - -Executor & DiskDecorator::getExecutor() -{ - return delegate->getExecutor(); -} - -SyncGuardPtr DiskDecorator::getDirectorySyncGuard(const String & path) const -{ - return delegate->getDirectorySyncGuard(path); -} - -void DiskDecorator::onFreeze(const String & path) -{ - delegate->onFreeze(path); -} - -void DiskDecorator::shutdown() -{ - delegate->shutdown(); -} - -void DiskDecorator::startupImpl(ContextPtr context) -{ - delegate->startupImpl(context); -} - -void DiskDecorator::applyNewSettings(const Poco::Util::AbstractConfiguration & config, ContextPtr context, const String & config_prefix, const DisksMap & map) -{ - delegate->applyNewSettings(config, context, config_prefix, map); -} - -DiskObjectStoragePtr DiskDecorator::createDiskObjectStorage() -{ - return delegate->createDiskObjectStorage(); -} - -ObjectStoragePtr DiskDecorator::getObjectStorage() -{ - return delegate->getObjectStorage(); -} - -DiskPtr DiskDecorator::getNestedDisk() const -{ - if (const auto * decorator = dynamic_cast(delegate.get())) - return decorator->getNestedDisk(); - return delegate; -} - -} diff --git a/src/Disks/DiskDecorator.h b/src/Disks/DiskDecorator.h deleted file mode 100644 index f7eface8c66..00000000000 --- a/src/Disks/DiskDecorator.h +++ /dev/null @@ -1,139 +0,0 @@ -#pragma once - -#include "Disks/IDisk.h" - -namespace DB -{ - -/** Forwards all methods to another disk. - * Methods can be overridden by descendants. - */ -class DiskDecorator : public IDisk -{ -public: - explicit DiskDecorator(const DiskPtr & delegate_); - - DiskTransactionPtr createTransaction() override; - const String & getName() const override; - ReservationPtr reserve(UInt64 bytes) override; - ~DiskDecorator() override = default; - const String & getPath() const override; - UInt64 getTotalSpace() const override; - UInt64 getAvailableSpace() const override; - UInt64 getUnreservedSpace() const override; - UInt64 getKeepingFreeSpace() const override; - bool exists(const String & path) const override; - bool isFile(const String & path) const override; - bool isDirectory(const String & path) const override; - size_t getFileSize(const String & path) const override; - void createDirectory(const String & path) override; - void createDirectories(const String & path) override; - void clearDirectory(const String & path) override; - void moveDirectory(const String & from_path, const String & to_path) override; - DirectoryIteratorPtr iterateDirectory(const String & path) const override; - void createFile(const String & path) override; - void moveFile(const String & from_path, const String & to_path) override; - void replaceFile(const String & from_path, const String & to_path) override; - void copy(const String & from_path, const std::shared_ptr & to_disk, const String & to_path) override; - void copyDirectoryContent(const String & from_dir, const std::shared_ptr & to_disk, const String & to_dir) override; - void listFiles(const String & path, std::vector & file_names) const override; - - std::unique_ptr readFile( - const String & path, - const ReadSettings & settings, - std::optional read_hint, - std::optional file_size) const override; - - std::unique_ptr writeFile( - const String & path, - size_t buf_size, - WriteMode mode, - const WriteSettings & settings) override; - - void removeFile(const String & path) override; - void removeFileIfExists(const String & path) override; - void removeSharedFileIfExists(const String & path, bool keep_s3) override; - - void removeDirectory(const String & path) override; - void removeRecursive(const String & path) override; - - void removeSharedFile(const String & path, bool keep_s3) override; - void removeSharedRecursive(const String & path, bool keep_all_batch_data, const NameSet & file_names_remove_metadata_only) override; - void removeSharedFiles(const RemoveBatchRequest & files, bool keep_all_batch_data, const NameSet & file_names_remove_metadata_only) override; - - void setLastModified(const String & path, const Poco::Timestamp & timestamp) override; - time_t getLastChanged(const String & path) const override; - Poco::Timestamp getLastModified(const String & path) const override; - void setReadOnly(const String & path) override; - void createHardLink(const String & src_path, const String & dst_path) override; - void truncateFile(const String & path, size_t size) override; - int open(const String & path, mode_t mode) const; - void close(int fd) const; - void sync(int fd) const; - String getUniqueId(const String & path) const override { return delegate->getUniqueId(path); } - bool checkUniqueId(const String & id) const override { return delegate->checkUniqueId(id); } - DataSourceDescription getDataSourceDescription() const override { return delegate->getDataSourceDescription(); } - bool isRemote() const override { return delegate->isRemote(); } - bool isReadOnly() const override { return delegate->isReadOnly(); } - bool isWriteOnce() const override { return delegate->isWriteOnce(); } - bool supportZeroCopyReplication() const override { return delegate->supportZeroCopyReplication(); } - bool supportParallelWrite() const override { return delegate->supportParallelWrite(); } - void onFreeze(const String & path) override; - SyncGuardPtr getDirectorySyncGuard(const String & path) const override; - void shutdown() override; - void startupImpl(ContextPtr context) override; - void applyNewSettings(const Poco::Util::AbstractConfiguration & config, ContextPtr context, const String & config_prefix, const DisksMap & map) override; - - bool supportsCache() const override { return delegate->supportsCache(); } - const String & getCacheBasePath() const override { return delegate->getCacheBasePath(); } - - StoredObjects getStorageObjects(const String & path) const override { return delegate->getStorageObjects(path); } - void getRemotePathsRecursive(const String & path, std::vector & paths_map) override { return delegate->getRemotePathsRecursive(path, paths_map); } - - DiskObjectStoragePtr createDiskObjectStorage() override; - ObjectStoragePtr getObjectStorage() override; - NameSet getCacheLayersNames() const override { return delegate->getCacheLayersNames(); } - - MetadataStoragePtr getMetadataStorage() override { return delegate->getMetadataStorage(); } - - std::unordered_map getSerializedMetadata(const std::vector & file_paths) const override { return delegate->getSerializedMetadata(file_paths); } - - UInt32 getRefCount(const String & path) const override { return delegate->getRefCount(path); } - - void syncRevision(UInt64 revision) override { delegate->syncRevision(revision); } - - UInt64 getRevision() const override { return delegate->getRevision(); } - - bool supportsStat() const override { return delegate->supportsStat(); } - struct stat stat(const String & path) const override { return delegate->stat(path); } - - bool supportsChmod() const override { return delegate->supportsChmod(); } - void chmod(const String & path, mode_t mode) override { delegate->chmod(path, mode); } - - virtual DiskPtr getNestedDisk() const; - -protected: - Executor & getExecutor() override; - - DiskPtr delegate; -}; - -/// TODO: Current reservation mechanism leaks IDisk abstraction details. -/// This hack is needed to return proper disk pointer (wrapper instead of implementation) from reservation object. -class ReservationDelegate : public IReservation -{ -public: - ReservationDelegate(ReservationPtr delegate_, DiskPtr wrapper_) : delegate(std::move(delegate_)), wrapper(wrapper_) { } - UInt64 getSize() const override { return delegate->getSize(); } - UInt64 getUnreservedSpace() const override { return delegate->getUnreservedSpace(); } - DiskPtr getDisk(size_t) const override { return wrapper; } - Disks getDisks() const override { return {wrapper}; } - void update(UInt64 new_size) override { delegate->update(new_size); } - -private: - ReservationPtr delegate; - DiskPtr wrapper; -}; - - -} diff --git a/src/Disks/DiskEncrypted.cpp b/src/Disks/DiskEncrypted.cpp index 79905283ddb..7c4bee6d861 100644 --- a/src/Disks/DiskEncrypted.cpp +++ b/src/Disks/DiskEncrypted.cpp @@ -209,7 +209,8 @@ DiskEncrypted::DiskEncrypted( } DiskEncrypted::DiskEncrypted(const String & name_, std::unique_ptr settings_) - : DiskDecorator(settings_->wrapped_disk) + : IDisk(name_) + , delegate(settings_->wrapped_disk) , encrypted_name(name_) , disk_path(settings_->disk_path) , disk_absolute_path(settings_->wrapped_disk->getPath() + settings_->disk_path) diff --git a/src/Disks/DiskEncrypted.h b/src/Disks/DiskEncrypted.h index 74da7cfa2c0..d38c916ee6e 100644 --- a/src/Disks/DiskEncrypted.h +++ b/src/Disks/DiskEncrypted.h @@ -4,7 +4,6 @@ #if USE_SSL #include -#include #include #include @@ -27,7 +26,7 @@ struct DiskEncryptedSettings /// Encrypted disk ciphers all written files on the fly and writes the encrypted files to an underlying (normal) disk. /// And when we read files from an encrypted disk it deciphers them automatically, /// so we can work with a encrypted disk like it's a normal disk. -class DiskEncrypted : public DiskDecorator +class DiskEncrypted : public IDisk { public: DiskEncrypted(const String & name_, const Poco::Util::AbstractConfiguration & config_, const String & config_prefix_, const DisksMap & map_); @@ -252,6 +251,32 @@ public: return std::make_shared(*this); } + UInt64 getTotalSpace() const override + { + return delegate->getTotalSpace(); + } + + UInt64 getAvailableSpace() const override + { + return delegate->getAvailableSpace(); + } + + UInt64 getUnreservedSpace() const override + { + return delegate->getUnreservedSpace(); + } + + bool supportZeroCopyReplication() const override + { + return delegate->supportZeroCopyReplication(); + } + + MetadataStoragePtr getMetadataStorage() override + { + return delegate->getMetadataStorage(); + } + + private: String wrappedPath(const String & path) const { @@ -261,6 +286,7 @@ private: return disk_path + path; } + DiskPtr delegate; const String encrypted_name; const String disk_path; const String disk_absolute_path; diff --git a/src/Disks/DiskLocal.cpp b/src/Disks/DiskLocal.cpp index 63ba1a45e34..d2cd30c1cfa 100644 --- a/src/Disks/DiskLocal.cpp +++ b/src/Disks/DiskLocal.cpp @@ -18,7 +18,6 @@ #include #include -#include #include #include #include @@ -775,7 +774,7 @@ void registerDiskLocal(DiskFactory & factory, bool global_skip_access_check) std::shared_ptr disk = std::make_shared(name, path, keep_free_space_bytes, context, config.getUInt("local_disk_check_period_ms", 0)); disk->startup(context, skip_access_check); - return std::make_shared(disk); + return disk; }; factory.registerDiskType("local", creator); } diff --git a/src/Disks/DiskRestartProxy.cpp b/src/Disks/DiskRestartProxy.cpp deleted file mode 100644 index 0b79ee51db9..00000000000 --- a/src/Disks/DiskRestartProxy.cpp +++ /dev/null @@ -1,378 +0,0 @@ -#include "DiskRestartProxy.h" - -#include -#include - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int DEADLOCK_AVOIDED; -} - -using Millis = std::chrono::milliseconds; -using Seconds = std::chrono::seconds; - -/// Holds restart read lock till buffer destruction. -class RestartAwareReadBuffer : public ReadBufferFromFileDecorator -{ -public: - RestartAwareReadBuffer(const DiskRestartProxy & disk, std::unique_ptr impl_) - : ReadBufferFromFileDecorator(std::move(impl_)), lock(disk.mutex) { } - - void prefetch() override - { - swap(*impl); - impl->prefetch(); - swap(*impl); - } - - void setReadUntilPosition(size_t position) override - { - swap(*impl); - impl->setReadUntilPosition(position); - swap(*impl); - } - - void setReadUntilEnd() override - { - swap(*impl); - impl->setReadUntilEnd(); - swap(*impl); - } - - String getInfoForLog() override { return impl->getInfoForLog(); } - -private: - ReadLock lock; -}; - -/// Holds restart read lock till buffer finalize. -class RestartAwareWriteBuffer : public WriteBufferFromFileDecorator -{ -public: - RestartAwareWriteBuffer(const DiskRestartProxy & disk, std::unique_ptr impl_) - : WriteBufferFromFileDecorator(std::move(impl_)), lock(disk.mutex) { } - - ~RestartAwareWriteBuffer() override - { - try - { - finalize(); - } - catch (...) - { - tryLogCurrentException(__PRETTY_FUNCTION__); - } - } - - void finalizeImpl() override - { - WriteBufferFromFileDecorator::finalizeImpl(); - - lock.unlock(); - } - -private: - ReadLock lock; -}; - -DiskRestartProxy::DiskRestartProxy(DiskPtr & delegate_) - : DiskDecorator(delegate_) -{} - -ReservationPtr DiskRestartProxy::reserve(UInt64 bytes) -{ - ReadLock lock (mutex); - auto ptr = DiskDecorator::reserve(bytes); - if (ptr) - { - auto disk_ptr = std::static_pointer_cast(shared_from_this()); - return std::make_unique(std::move(ptr), disk_ptr); - } - return ptr; -} - -const String & DiskRestartProxy::getPath() const -{ - ReadLock lock (mutex); - return DiskDecorator::getPath(); -} - -UInt64 DiskRestartProxy::getTotalSpace() const -{ - ReadLock lock (mutex); - return DiskDecorator::getTotalSpace(); -} - -UInt64 DiskRestartProxy::getAvailableSpace() const -{ - ReadLock lock (mutex); - return DiskDecorator::getAvailableSpace(); -} - -UInt64 DiskRestartProxy::getUnreservedSpace() const -{ - ReadLock lock (mutex); - return DiskDecorator::getUnreservedSpace(); -} - -UInt64 DiskRestartProxy::getKeepingFreeSpace() const -{ - ReadLock lock (mutex); - return DiskDecorator::getKeepingFreeSpace(); -} - -bool DiskRestartProxy::exists(const String & path) const -{ - ReadLock lock (mutex); - return DiskDecorator::exists(path); -} - -bool DiskRestartProxy::isFile(const String & path) const -{ - ReadLock lock (mutex); - return DiskDecorator::isFile(path); -} - -bool DiskRestartProxy::isDirectory(const String & path) const -{ - ReadLock lock (mutex); - return DiskDecorator::isDirectory(path); -} - -size_t DiskRestartProxy::getFileSize(const String & path) const -{ - ReadLock lock (mutex); - return DiskDecorator::getFileSize(path); -} - -void DiskRestartProxy::createDirectory(const String & path) -{ - ReadLock lock (mutex); - DiskDecorator::createDirectory(path); -} - -void DiskRestartProxy::createDirectories(const String & path) -{ - ReadLock lock (mutex); - DiskDecorator::createDirectories(path); -} - -void DiskRestartProxy::clearDirectory(const String & path) -{ - ReadLock lock (mutex); - DiskDecorator::clearDirectory(path); -} - -void DiskRestartProxy::moveDirectory(const String & from_path, const String & to_path) -{ - ReadLock lock (mutex); - DiskDecorator::moveDirectory(from_path, to_path); -} - -DirectoryIteratorPtr DiskRestartProxy::iterateDirectory(const String & path) const -{ - ReadLock lock (mutex); - return DiskDecorator::iterateDirectory(path); -} - -void DiskRestartProxy::createFile(const String & path) -{ - ReadLock lock (mutex); - DiskDecorator::createFile(path); -} - -void DiskRestartProxy::moveFile(const String & from_path, const String & to_path) -{ - ReadLock lock (mutex); - DiskDecorator::moveFile(from_path, to_path); -} - -void DiskRestartProxy::replaceFile(const String & from_path, const String & to_path) -{ - ReadLock lock (mutex); - DiskDecorator::replaceFile(from_path, to_path); -} - -void DiskRestartProxy::copy(const String & from_path, const std::shared_ptr & to_disk, const String & to_path) -{ - ReadLock lock (mutex); - DiskDecorator::copy(from_path, to_disk, to_path); -} - -void DiskRestartProxy::copyDirectoryContent(const String & from_dir, const std::shared_ptr & to_disk, const String & to_dir) -{ - ReadLock lock (mutex); - DiskDecorator::copyDirectoryContent(from_dir, to_disk, to_dir); -} - -void DiskRestartProxy::listFiles(const String & path, std::vector & file_names) const -{ - ReadLock lock (mutex); - DiskDecorator::listFiles(path, file_names); -} - -std::unique_ptr DiskRestartProxy::readFile( - const String & path, const ReadSettings & settings, std::optional read_hint, std::optional file_size) const -{ - ReadLock lock (mutex); - auto impl = DiskDecorator::readFile(path, settings, read_hint, file_size); - return std::make_unique(*this, std::move(impl)); -} - -std::unique_ptr DiskRestartProxy::writeFile(const String & path, size_t buf_size, WriteMode mode, const WriteSettings & settings) -{ - ReadLock lock (mutex); - auto impl = DiskDecorator::writeFile(path, buf_size, mode, settings); - return std::make_unique(*this, std::move(impl)); -} - -void DiskRestartProxy::removeFile(const String & path) -{ - ReadLock lock (mutex); - DiskDecorator::removeFile(path); -} - -void DiskRestartProxy::removeFileIfExists(const String & path) -{ - ReadLock lock (mutex); - DiskDecorator::removeFileIfExists(path); -} - -void DiskRestartProxy::removeDirectory(const String & path) -{ - ReadLock lock (mutex); - DiskDecorator::removeDirectory(path); -} - -void DiskRestartProxy::removeRecursive(const String & path) -{ - ReadLock lock (mutex); - DiskDecorator::removeRecursive(path); -} - -void DiskRestartProxy::removeSharedFile(const String & path, bool keep_s3) -{ - ReadLock lock (mutex); - DiskDecorator::removeSharedFile(path, keep_s3); -} - -void DiskRestartProxy::removeSharedFiles(const RemoveBatchRequest & files, bool keep_all_batch_data, const NameSet & file_names_remove_metadata_only) -{ - ReadLock lock (mutex); - DiskDecorator::removeSharedFiles(files, keep_all_batch_data, file_names_remove_metadata_only); -} - -void DiskRestartProxy::removeSharedRecursive(const String & path, bool keep_all_batch_data, const NameSet & file_names_remove_metadata_only) -{ - ReadLock lock (mutex); - DiskDecorator::removeSharedRecursive(path, keep_all_batch_data, file_names_remove_metadata_only); -} - -void DiskRestartProxy::setLastModified(const String & path, const Poco::Timestamp & timestamp) -{ - ReadLock lock (mutex); - DiskDecorator::setLastModified(path, timestamp); -} - -Poco::Timestamp DiskRestartProxy::getLastModified(const String & path) const -{ - ReadLock lock (mutex); - return DiskDecorator::getLastModified(path); -} - -void DiskRestartProxy::setReadOnly(const String & path) -{ - ReadLock lock (mutex); - DiskDecorator::setReadOnly(path); -} - -void DiskRestartProxy::createHardLink(const String & src_path, const String & dst_path) -{ - ReadLock lock (mutex); - DiskDecorator::createHardLink(src_path, dst_path); -} - -void DiskRestartProxy::truncateFile(const String & path, size_t size) -{ - ReadLock lock (mutex); - DiskDecorator::truncateFile(path, size); -} - -String DiskRestartProxy::getUniqueId(const String & path) const -{ - ReadLock lock (mutex); - return DiskDecorator::getUniqueId(path); -} - -bool DiskRestartProxy::checkUniqueId(const String & id) const -{ - ReadLock lock (mutex); - return DiskDecorator::checkUniqueId(id); -} - -const String & DiskRestartProxy::getCacheBasePath() const -{ - ReadLock lock (mutex); - return DiskDecorator::getCacheBasePath(); -} - -StoredObjects DiskRestartProxy::getStorageObjects(const String & path) const -{ - ReadLock lock (mutex); - return DiskDecorator::getStorageObjects(path); -} - -void DiskRestartProxy::getRemotePathsRecursive( - const String & path, std::vector & paths_map) -{ - ReadLock lock (mutex); - return DiskDecorator::getRemotePathsRecursive(path, paths_map); -} - -DiskPtr DiskRestartProxy::getNestedDisk() const -{ - DiskPtr delegate_copy; - - { - ReadLock lock (mutex); - delegate_copy = delegate; - } - - if (const auto * decorator = dynamic_cast(delegate_copy.get())) - return decorator->getNestedDisk(); - return delegate_copy; -} - -void DiskRestartProxy::restart(ContextPtr context) -{ - /// Speed up processing unhealthy requests. - DiskDecorator::shutdown(); - - WriteLock lock (mutex, std::defer_lock); - - LOG_INFO(log, "Acquiring lock to restart disk {}", DiskDecorator::getName()); - - auto start_time = std::chrono::steady_clock::now(); - auto lock_timeout = Seconds(120); - do - { - /// Use a small timeout to not block read operations for a long time. - if (lock.try_lock_for(Millis(10))) - break; - } while (std::chrono::steady_clock::now() - start_time < lock_timeout); - - if (!lock.owns_lock()) - throw Exception("Failed to acquire restart lock within timeout. Client should retry.", ErrorCodes::DEADLOCK_AVOIDED); - - LOG_INFO(log, "Restart lock acquired. Restarting disk {}", DiskDecorator::getName()); - - /// NOTE: access checking will cause deadlock here, so skip it. - DiskDecorator::startup(context, /* skip_access_check= */ true); - - LOG_INFO(log, "Disk restarted {}", DiskDecorator::getName()); -} - -} diff --git a/src/Disks/DiskRestartProxy.h b/src/Disks/DiskRestartProxy.h deleted file mode 100644 index fb4dde3bfa3..00000000000 --- a/src/Disks/DiskRestartProxy.h +++ /dev/null @@ -1,86 +0,0 @@ -#pragma once - -#include "DiskDecorator.h" - -#include -#include - -namespace DB -{ -using ReadLock = std::shared_lock; -using WriteLock = std::unique_lock; - -class RestartAwareReadBuffer; -class RestartAwareWriteBuffer; - -/** - * Gives possibility to change underlying disk settings at runtime calling 'restart' method. - * All disk methods are protected by read-lock. Read/Write buffers produced by disk holds read-lock till buffer is finalized/destructed. - * When 'restart' method is called write-lock is acquired to make sure that no operations are running on that disk. - */ -class DiskRestartProxy : public DiskDecorator -{ -public: - explicit DiskRestartProxy(DiskPtr & delegate_); - - ReservationPtr reserve(UInt64 bytes) override; - const String & getPath() const override; - UInt64 getTotalSpace() const override; - UInt64 getAvailableSpace() const override; - UInt64 getUnreservedSpace() const override; - UInt64 getKeepingFreeSpace() const override; - bool exists(const String & path) const override; - bool isFile(const String & path) const override; - bool isDirectory(const String & path) const override; - size_t getFileSize(const String & path) const override; - void createDirectory(const String & path) override; - void createDirectories(const String & path) override; - void clearDirectory(const String & path) override; - void moveDirectory(const String & from_path, const String & to_path) override; - DirectoryIteratorPtr iterateDirectory(const String & path) const override; - void createFile(const String & path) override; - void moveFile(const String & from_path, const String & to_path) override; - void replaceFile(const String & from_path, const String & to_path) override; - void copy(const String & from_path, const DiskPtr & to_disk, const String & to_path) override; - void copyDirectoryContent(const String & from_dir, const std::shared_ptr & to_disk, const String & to_dir) override; - void listFiles(const String & path, std::vector & file_names) const override; - std::unique_ptr readFile( - const String & path, - const ReadSettings & settings, - std::optional read_hint, - std::optional file_size) const override; - std::unique_ptr writeFile(const String & path, size_t buf_size, WriteMode mode, const WriteSettings & settings) override; - void removeFile(const String & path) override; - void removeFileIfExists(const String & path) override; - void removeDirectory(const String & path) override; - void removeRecursive(const String & path) override; - void removeSharedFile(const String & path, bool keep_s3) override; - void removeSharedFiles(const RemoveBatchRequest & files, bool keep_all_batch_data, const NameSet & file_names_remove_metadata_only) override; - void removeSharedRecursive(const String & path, bool keep_all_batch_data, const NameSet & file_names_remove_metadata_only) override; - void setLastModified(const String & path, const Poco::Timestamp & timestamp) override; - Poco::Timestamp getLastModified(const String & path) const override; - void setReadOnly(const String & path) override; - void createHardLink(const String & src_path, const String & dst_path) override; - void truncateFile(const String & path, size_t size) override; - String getUniqueId(const String & path) const override; - bool checkUniqueId(const String & id) const override; - - const String & getCacheBasePath() const override; - StoredObjects getStorageObjects(const String & path) const override; - void getRemotePathsRecursive(const String & path, std::vector & paths_map) override; - - void restart(ContextPtr context); - - DiskPtr getNestedDisk() const override; - -private: - friend class RestartAwareReadBuffer; - friend class RestartAwareWriteBuffer; - - /// Mutex to protect RW access. - mutable std::shared_timed_mutex mutex; - - Poco::Logger * log = &Poco::Logger::get("DiskRestartProxy"); -}; - -} diff --git a/src/Disks/FakeDiskTransaction.h b/src/Disks/FakeDiskTransaction.h index e80b45a94ec..46be885739e 100644 --- a/src/Disks/FakeDiskTransaction.h +++ b/src/Disks/FakeDiskTransaction.h @@ -16,6 +16,7 @@ public: {} void commit() override {} + void undo() override {} void createDirectory(const std::string & path) override { diff --git a/src/Disks/IDisk.h b/src/Disks/IDisk.h index c567566a5b3..e85a18c8729 100644 --- a/src/Disks/IDisk.h +++ b/src/Disks/IDisk.h @@ -256,15 +256,15 @@ public: /// For one local path there might be multiple remote paths in case of Log family engines. struct LocalPathWithObjectStoragePaths - { - std::string local_path; - std::string common_prefix_for_objects; - StoredObjects objects; + { + std::string local_path; + std::string common_prefix_for_objects; + StoredObjects objects; - LocalPathWithObjectStoragePaths( - const std::string & local_path_, const std::string & common_prefix_for_objects_, StoredObjects && objects_) - : local_path(local_path_), common_prefix_for_objects(common_prefix_for_objects_), objects(std::move(objects_)) {} - }; + LocalPathWithObjectStoragePaths( + const std::string & local_path_, const std::string & common_prefix_for_objects_, StoredObjects && objects_) + : local_path(local_path_), common_prefix_for_objects(common_prefix_for_objects_), objects(std::move(objects_)) {} + }; virtual void getRemotePathsRecursive(const String &, std::vector &) { diff --git a/src/Disks/IDiskTransaction.h b/src/Disks/IDiskTransaction.h index 572d86dcfdb..02c8731428d 100644 --- a/src/Disks/IDiskTransaction.h +++ b/src/Disks/IDiskTransaction.h @@ -30,6 +30,8 @@ public: /// If something fails rollback and throw exception. virtual void commit() = 0; + virtual void undo() = 0; + virtual ~IDiskTransaction() = default; /// Create directory. diff --git a/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp b/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp index f88bf281732..f49bff4d957 100644 --- a/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp +++ b/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp @@ -101,9 +101,7 @@ FileSegmentsHolderPtr CachedOnDiskReadBufferFromFile::getFileSegments(size_t off return cache->get(cache_key, offset, size); } - CreateFileSegmentSettings create_settings{ - .is_persistent = is_persistent - }; + CreateFileSegmentSettings create_settings(is_persistent ? FileSegmentKind::Persistent : FileSegmentKind::Regular); return cache->getOrSet(cache_key, offset, size, create_settings); } @@ -985,7 +983,7 @@ bool CachedOnDiskReadBufferFromFile::nextImplStep() } else { - LOG_TRACE(log, "No space left in cache, will continue without cache download"); + LOG_TRACE(log, "No space left in cache to reserve {} bytes, will continue without cache download", size); } if (!success) diff --git a/src/Disks/IO/CachedOnDiskWriteBufferFromFile.cpp b/src/Disks/IO/CachedOnDiskWriteBufferFromFile.cpp index c7d3b9d551c..98d6158341f 100644 --- a/src/Disks/IO/CachedOnDiskWriteBufferFromFile.cpp +++ b/src/Disks/IO/CachedOnDiskWriteBufferFromFile.cpp @@ -51,7 +51,7 @@ FileSegmentRangeWriter::FileSegmentRangeWriter( { } -bool FileSegmentRangeWriter::write(const char * data, size_t size, size_t offset, bool is_persistent) +bool FileSegmentRangeWriter::write(const char * data, size_t size, size_t offset, FileSegmentKind segment_kind) { if (finalized) return false; @@ -68,7 +68,7 @@ bool FileSegmentRangeWriter::write(const char * data, size_t size, size_t offset if (file_segments.empty() || file_segments.back().isDownloaded()) { - file_segment = &allocateFileSegment(expected_write_offset, is_persistent); + file_segment = &allocateFileSegment(expected_write_offset, segment_kind); } else { @@ -86,7 +86,7 @@ bool FileSegmentRangeWriter::write(const char * data, size_t size, size_t offset if (available_size == 0) { completeFileSegment(*file_segment); - file_segment = &allocateFileSegment(expected_write_offset, is_persistent); + file_segment = &allocateFileSegment(expected_write_offset, segment_kind); continue; } @@ -149,17 +149,14 @@ FileSegmentRangeWriter::~FileSegmentRangeWriter() } } -FileSegment & FileSegmentRangeWriter::allocateFileSegment(size_t offset, bool is_persistent) +FileSegment & FileSegmentRangeWriter::allocateFileSegment(size_t offset, FileSegmentKind segment_kind) { /** * Allocate a new file segment starting `offset`. * File segment capacity will equal `max_file_segment_size`, but actual size is 0. */ - CreateFileSegmentSettings create_settings - { - .is_persistent = is_persistent, - }; + CreateFileSegmentSettings create_settings(segment_kind); /// We set max_file_segment_size to be downloaded, /// if we have less size to write, file segment will be resized in complete() method. @@ -269,7 +266,8 @@ void CachedOnDiskWriteBufferFromFile::cacheData(char * data, size_t size, bool t try { - if (!cache_writer->write(data, size, current_download_offset, is_persistent_cache_file)) + auto segment_kind = is_persistent_cache_file ? FileSegmentKind::Persistent : FileSegmentKind::Regular; + if (!cache_writer->write(data, size, current_download_offset, segment_kind)) { LOG_INFO(log, "Write-through cache is stopped as cache limit is reached and nothing can be evicted"); return; diff --git a/src/Disks/IO/CachedOnDiskWriteBufferFromFile.h b/src/Disks/IO/CachedOnDiskWriteBufferFromFile.h index b72da2d3a40..194afe88d88 100644 --- a/src/Disks/IO/CachedOnDiskWriteBufferFromFile.h +++ b/src/Disks/IO/CachedOnDiskWriteBufferFromFile.h @@ -32,14 +32,14 @@ public: * Write a range of file segments. Allocate file segment of `max_file_segment_size` and write to * it until it is full and then allocate next file segment. */ - bool write(const char * data, size_t size, size_t offset, bool is_persistent); + bool write(const char * data, size_t size, size_t offset, FileSegmentKind segment_kind); void finalize(); ~FileSegmentRangeWriter(); private: - FileSegment & allocateFileSegment(size_t offset, bool is_persistent); + FileSegment & allocateFileSegment(size_t offset, FileSegmentKind segment_kind); void appendFilesystemCacheLog(const FileSegment & file_segment); diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp index 15ddbe551da..664f7b6919e 100644 --- a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp +++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp @@ -1,4 +1,5 @@ #include +#include "Common/Exception.h" #if USE_AZURE_BLOB_STORAGE @@ -176,7 +177,9 @@ void AzureObjectStorage::removeObject(const StoredObject & object) auto client_ptr = client.get(); auto delete_info = client_ptr->DeleteBlob(path); if (!delete_info.Value.Deleted) - throw Exception(ErrorCodes::AZURE_BLOB_STORAGE_ERROR, "Failed to delete file in AzureBlob Storage: {}", path); + throw Exception( + ErrorCodes::AZURE_BLOB_STORAGE_ERROR, "Failed to delete file (path: {}) in AzureBlob Storage, reason: {}", + path, delete_info.RawResponse ? delete_info.RawResponse->GetReasonPhrase() : "Unknown"); } void AzureObjectStorage::removeObjects(const StoredObjects & objects) @@ -187,21 +190,49 @@ void AzureObjectStorage::removeObjects(const StoredObjects & objects) LOG_TEST(log, "Removing object: {} (total: {})", object.absolute_path, objects.size()); auto delete_info = client_ptr->DeleteBlob(object.absolute_path); if (!delete_info.Value.Deleted) - throw Exception(ErrorCodes::AZURE_BLOB_STORAGE_ERROR, "Failed to delete file in AzureBlob Storage: {}", object.absolute_path); + throw Exception( + ErrorCodes::AZURE_BLOB_STORAGE_ERROR, "Failed to delete file (path: {}) in AzureBlob Storage, reason: {}", + object.absolute_path, delete_info.RawResponse ? delete_info.RawResponse->GetReasonPhrase() : "Unknown"); } } void AzureObjectStorage::removeObjectIfExists(const StoredObject & object) { auto client_ptr = client.get(); - auto delete_info = client_ptr->DeleteBlob(object.absolute_path); + try + { + LOG_TEST(log, "Removing single object: {}", object.absolute_path); + auto delete_info = client_ptr->DeleteBlob(object.absolute_path); + } + catch (const Azure::Storage::StorageException & e) + { + /// If object doesn't exist... + if (e.StatusCode == Azure::Core::Http::HttpStatusCode::NotFound) + return; + tryLogCurrentException(__PRETTY_FUNCTION__); + throw; + } } void AzureObjectStorage::removeObjectsIfExist(const StoredObjects & objects) { auto client_ptr = client.get(); for (const auto & object : objects) - auto delete_info = client_ptr->DeleteBlob(object.absolute_path); + { + try + { + auto delete_info = client_ptr->DeleteBlob(object.absolute_path); + } + catch (const Azure::Storage::StorageException & e) + { + /// If object doesn't exist... + if (e.StatusCode == Azure::Core::Http::HttpStatusCode::NotFound) + return; + tryLogCurrentException(__PRETTY_FUNCTION__); + throw; + } + } + } diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/registerDiskAzureBlobStorage.cpp b/src/Disks/ObjectStorages/AzureBlobStorage/registerDiskAzureBlobStorage.cpp index df377cdf710..562b2b2fec0 100644 --- a/src/Disks/ObjectStorages/AzureBlobStorage/registerDiskAzureBlobStorage.cpp +++ b/src/Disks/ObjectStorages/AzureBlobStorage/registerDiskAzureBlobStorage.cpp @@ -4,8 +4,6 @@ #if USE_AZURE_BLOB_STORAGE -#include - #include #include @@ -51,7 +49,7 @@ void registerDiskAzureBlobStorage(DiskFactory & factory, bool global_skip_access bool skip_access_check = global_skip_access_check || config.getBool(config_prefix + ".skip_access_check", false); azure_blob_storage_disk->startup(context, skip_access_check); - return std::make_shared(azure_blob_storage_disk); + return azure_blob_storage_disk; }; factory.registerDiskType("azure_blob_storage", creator); diff --git a/src/Disks/ObjectStorages/Cached/CachedObjectStorage.h b/src/Disks/ObjectStorages/Cached/CachedObjectStorage.h index 07310772a81..dbea90803d0 100644 --- a/src/Disks/ObjectStorages/Cached/CachedObjectStorage.h +++ b/src/Disks/ObjectStorages/Cached/CachedObjectStorage.h @@ -115,6 +115,8 @@ public: static bool canUseReadThroughCache(); + FileCachePtr getCache() const { return cache; } + private: FileCache::Key getCacheKey(const std::string & path) const; diff --git a/src/Disks/ObjectStorages/DiskObjectStorage.cpp b/src/Disks/ObjectStorages/DiskObjectStorage.cpp index 263a9a9d0e1..57a7d25fd17 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorage.cpp +++ b/src/Disks/ObjectStorages/DiskObjectStorage.cpp @@ -26,6 +26,7 @@ namespace ErrorCodes extern const int FILE_DOESNT_EXIST; extern const int ATTEMPT_TO_READ_AFTER_EOF; extern const int CANNOT_READ_ALL_DATA; + extern const int DIRECTORY_DOESNT_EXIST; } namespace @@ -126,6 +127,9 @@ StoredObjects DiskObjectStorage::getStorageObjects(const String & local_path) co void DiskObjectStorage::getRemotePathsRecursive(const String & local_path, std::vector & paths_map) { + if (!metadata_storage->exists(local_path)) + return; + /// Protect against concurrent delition of files (for example because of a merge). if (metadata_storage->isFile(local_path)) { @@ -138,6 +142,7 @@ void DiskObjectStorage::getRemotePathsRecursive(const String & local_path, std:: /// Unfortunately in rare cases it can happen when files disappear /// or can be empty in case of operation interruption (like cancelled metadata fetch) if (e.code() == ErrorCodes::FILE_DOESNT_EXIST || + e.code() == ErrorCodes::DIRECTORY_DOESNT_EXIST || e.code() == ErrorCodes::ATTEMPT_TO_READ_AFTER_EOF || e.code() == ErrorCodes::CANNOT_READ_ALL_DATA) return; @@ -157,6 +162,7 @@ void DiskObjectStorage::getRemotePathsRecursive(const String & local_path, std:: /// Unfortunately in rare cases it can happen when files disappear /// or can be empty in case of operation interruption (like cancelled metadata fetch) if (e.code() == ErrorCodes::FILE_DOESNT_EXIST || + e.code() == ErrorCodes::DIRECTORY_DOESNT_EXIST || e.code() == ErrorCodes::ATTEMPT_TO_READ_AFTER_EOF || e.code() == ErrorCodes::CANNOT_READ_ALL_DATA) return; @@ -519,6 +525,14 @@ void DiskObjectStorage::wrapWithCache(FileCachePtr cache, const FileCacheSetting object_storage = std::make_shared(object_storage, cache, cache_settings, layer_name); } +FileCachePtr DiskObjectStorage::getCache() const +{ + const auto * cached_object_storage = typeid_cast(object_storage.get()); + if (!cached_object_storage) + return nullptr; + return cached_object_storage->getCache(); +} + NameSet DiskObjectStorage::getCacheLayersNames() const { NameSet cache_layers; diff --git a/src/Disks/ObjectStorages/DiskObjectStorage.h b/src/Disks/ObjectStorages/DiskObjectStorage.h index 00e3cf98142..a24acc270c0 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorage.h +++ b/src/Disks/ObjectStorages/DiskObjectStorage.h @@ -186,6 +186,7 @@ public: /// There can be any number of cache layers: /// DiskObjectStorage(CachedObjectStorage(...CacheObjectStorage(S3ObjectStorage)...)) void wrapWithCache(FileCachePtr cache, const FileCacheSettings & cache_settings, const String & layer_name); + FileCachePtr getCache() const; /// Get structure of object storage this disk works with. Examples: /// DiskObjectStorage(S3ObjectStorage) diff --git a/src/Disks/ObjectStorages/DiskObjectStorageMetadata.cpp b/src/Disks/ObjectStorages/DiskObjectStorageMetadata.cpp index dc4898559c0..c3284b635da 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorageMetadata.cpp +++ b/src/Disks/ObjectStorages/DiskObjectStorageMetadata.cpp @@ -20,7 +20,7 @@ void DiskObjectStorageMetadata::deserialize(ReadBuffer & buf) UInt32 version; readIntText(version, buf); - if (version < VERSION_ABSOLUTE_PATHS || version > VERSION_READ_ONLY_FLAG) + if (version < VERSION_ABSOLUTE_PATHS || version > VERSION_INLINE_DATA) throw Exception( ErrorCodes::UNKNOWN_FORMAT, "Unknown metadata file version. Path: {}. Version: {}. Maximum expected version: {}", @@ -65,6 +65,12 @@ void DiskObjectStorageMetadata::deserialize(ReadBuffer & buf) readBoolText(read_only, buf); assertChar('\n', buf); } + + if (version >= VERSION_INLINE_DATA) + { + readEscapedString(inline_data, buf); + assertChar('\n', buf); + } } void DiskObjectStorageMetadata::deserializeFromString(const std::string & data) @@ -75,7 +81,11 @@ void DiskObjectStorageMetadata::deserializeFromString(const std::string & data) void DiskObjectStorageMetadata::serialize(WriteBuffer & buf, bool sync) const { - writeIntText(VERSION_READ_ONLY_FLAG, buf); + if (inline_data.empty()) + writeIntText(VERSION_READ_ONLY_FLAG, buf); + else + writeIntText(VERSION_INLINE_DATA, buf); + writeChar('\n', buf); writeIntText(storage_objects.size(), buf); @@ -97,6 +107,12 @@ void DiskObjectStorageMetadata::serialize(WriteBuffer & buf, bool sync) const writeBoolText(read_only, buf); writeChar('\n', buf); + if (!inline_data.empty()) + { + writeEscapedString(inline_data, buf); + writeChar('\n', buf); + } + buf.finalize(); if (sync) buf.sync(); diff --git a/src/Disks/ObjectStorages/DiskObjectStorageMetadata.h b/src/Disks/ObjectStorages/DiskObjectStorageMetadata.h index d3ea5795dd3..a2d0653e4aa 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorageMetadata.h +++ b/src/Disks/ObjectStorages/DiskObjectStorageMetadata.h @@ -16,6 +16,7 @@ private: static constexpr uint32_t VERSION_ABSOLUTE_PATHS = 1; static constexpr uint32_t VERSION_RELATIVE_PATHS = 2; static constexpr uint32_t VERSION_READ_ONLY_FLAG = 3; + static constexpr uint32_t VERSION_INLINE_DATA = 4; const std::string & common_metadata_path; @@ -39,6 +40,9 @@ private: /// Flag indicates that file is read only. bool read_only = false; + /// This data will be stored inline + std::string inline_data; + public: DiskObjectStorageMetadata( @@ -99,6 +103,15 @@ public: read_only = true; } + void setInlineData(const std::string & data) + { + inline_data = data; + } + + const std::string & getInlineData() const + { + return inline_data; + } }; using DiskObjectStorageMetadataPtr = std::unique_ptr; diff --git a/src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp b/src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp index b55fb2c4fa5..677debc69e6 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp +++ b/src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp @@ -133,8 +133,13 @@ struct RemoveObjectStorageOperation final : public IDiskObjectStorageOperation void finalize() override { + /// The client for an object storage may do retries internally + /// and there could be a situation when a query succeeded, but the response is lost + /// due to network error or similar. And when it will retry an operation it may receive + /// a 404 HTTP code. We don't want to threat this code as a real error for deletion process + /// (e.g. throwing some exceptions) and thus we just use method `removeObjectsIfExists` if (!delete_metadata_only && !objects_to_remove.empty()) - object_storage.removeObjects(objects_to_remove); + object_storage.removeObjectsIfExist(objects_to_remove); } }; @@ -213,8 +218,10 @@ struct RemoveManyObjectStorageOperation final : public IDiskObjectStorageOperati void finalize() override { + /// Read comment inside RemoveObjectStorageOperation class + /// TL;DR Don't pay any attention to 404 status code if (!objects_to_remove.empty()) - object_storage.removeObjects(objects_to_remove); + object_storage.removeObjectsIfExist(objects_to_remove); } }; @@ -307,7 +314,9 @@ struct RemoveRecursiveObjectStorageOperation final : public IDiskObjectStorageOp remove_from_remote.insert(remove_from_remote.end(), remote_paths.begin(), remote_paths.end()); } } - object_storage.removeObjects(remove_from_remote); + /// Read comment inside RemoveObjectStorageOperation class + /// TL;DR Don't pay any attention to 404 status code + object_storage.removeObjectsIfExist(remove_from_remote); } } }; @@ -352,8 +361,10 @@ struct ReplaceFileObjectStorageOperation final : public IDiskObjectStorageOperat void finalize() override { + /// Read comment inside RemoveObjectStorageOperation class + /// TL;DR Don't pay any attention to 404 status code if (!objects_to_remove.empty()) - object_storage.removeObjects(objects_to_remove); + object_storage.removeObjectsIfExist(objects_to_remove); } }; @@ -749,4 +760,10 @@ void DiskObjectStorageTransaction::commit() operation->finalize(); } +void DiskObjectStorageTransaction::undo() +{ + for (const auto & operation : operations_to_execute | std::views::reverse) + operation->undo(); +} + } diff --git a/src/Disks/ObjectStorages/DiskObjectStorageTransaction.h b/src/Disks/ObjectStorages/DiskObjectStorageTransaction.h index 9c42203b613..9e6bd5b6307 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorageTransaction.h +++ b/src/Disks/ObjectStorages/DiskObjectStorageTransaction.h @@ -70,6 +70,7 @@ public: DiskObjectStorageRemoteMetadataRestoreHelper * metadata_helper_); void commit() override; + void undo() override; void createDirectory(const std::string & path) override; diff --git a/src/Disks/ObjectStorages/FakeMetadataStorageFromDisk.cpp b/src/Disks/ObjectStorages/FakeMetadataStorageFromDisk.cpp index 383bbebd880..dbfdb2f7b1a 100644 --- a/src/Disks/ObjectStorages/FakeMetadataStorageFromDisk.cpp +++ b/src/Disks/ObjectStorages/FakeMetadataStorageFromDisk.cpp @@ -23,7 +23,7 @@ FakeMetadataStorageFromDisk::FakeMetadataStorageFromDisk( { } -MetadataTransactionPtr FakeMetadataStorageFromDisk::createTransaction() const +MetadataTransactionPtr FakeMetadataStorageFromDisk::createTransaction() { return std::make_shared(*this, disk); } @@ -66,12 +66,7 @@ uint64_t FakeMetadataStorageFromDisk::getFileSize(const String & path) const std::vector FakeMetadataStorageFromDisk::listDirectory(const std::string & path) const { std::vector result; - auto it = disk->iterateDirectory(path); - while (it->isValid()) - { - result.push_back(it->path()); - it->next(); - } + disk->listFiles(path, result); return result; } @@ -85,6 +80,19 @@ std::string FakeMetadataStorageFromDisk::readFileToString(const std::string &) c throw Exception(ErrorCodes::NOT_IMPLEMENTED, "readFileToString is not implemented for FakeMetadataStorageFromDisk"); } +std::string FakeMetadataStorageFromDisk::readInlineDataToString(const std::string & path) const +{ + auto rb = disk->readFile(path); + std::string result; + std::array buf; + while (!rb->eof()) + { + auto sz = rb->read(buf.data(), buf.size()); + result.append(buf.data(), buf.data() + sz); + } + return result; +} + std::unordered_map FakeMetadataStorageFromDisk::getSerializedMetadata(const std::vector &) const { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "getSerializedMetadata is not implemented for FakeMetadataStorageFromDisk"); @@ -118,6 +126,13 @@ void FakeMetadataStorageFromDiskTransaction::writeStringToFile(const std::string wb->finalize(); } +void FakeMetadataStorageFromDiskTransaction::writeInlineDataToFile(const std::string & path, const std::string & data) +{ + auto wb = disk->writeFile(path); + wb->write(data.data(), data.size()); + wb->finalize(); +} + void FakeMetadataStorageFromDiskTransaction::setLastModified(const std::string & path, const Poco::Timestamp & timestamp) { disk->setLastModified(path, timestamp); diff --git a/src/Disks/ObjectStorages/FakeMetadataStorageFromDisk.h b/src/Disks/ObjectStorages/FakeMetadataStorageFromDisk.h index 65cf012ddab..849e7235c0a 100644 --- a/src/Disks/ObjectStorages/FakeMetadataStorageFromDisk.h +++ b/src/Disks/ObjectStorages/FakeMetadataStorageFromDisk.h @@ -27,7 +27,7 @@ public: ObjectStoragePtr object_storage_, const std::string & object_storage_root_path_); - MetadataTransactionPtr createTransaction() const override; + MetadataTransactionPtr createTransaction() override; const std::string & getPath() const override; @@ -55,6 +55,8 @@ public: std::string readFileToString(const std::string & path) const override; + std::string readInlineDataToString(const std::string & path) const override; + std::unordered_map getSerializedMetadata(const std::vector & file_paths) const override; uint32_t getHardlinkCount(const std::string & path) const override; @@ -88,6 +90,8 @@ public: void writeStringToFile(const std::string & path, const std::string & data) override; + void writeInlineDataToFile(const std::string & path, const std::string & data) override; + void createEmptyMetadataFile(const std::string & path) override; void createMetadataFile(const std::string & path, const std::string & blob_name, uint64_t size_in_bytes) override; diff --git a/src/Disks/ObjectStorages/HDFS/registerDiskHDFS.cpp b/src/Disks/ObjectStorages/HDFS/registerDiskHDFS.cpp index 7bec0ee5a6c..db134f3dcba 100644 --- a/src/Disks/ObjectStorages/HDFS/registerDiskHDFS.cpp +++ b/src/Disks/ObjectStorages/HDFS/registerDiskHDFS.cpp @@ -3,8 +3,8 @@ #include #include #include -#include #include +#include namespace DB { @@ -23,7 +23,8 @@ void registerDiskHDFS(DiskFactory & factory, bool global_skip_access_check) ContextPtr context, const DisksMap & /*map*/) -> DiskPtr { - String uri{config.getString(config_prefix + ".endpoint")}; + String endpoint = context->getMacros()->expand(config.getString(config_prefix + ".endpoint")); + String uri{endpoint}; checkHDFSURL(uri); if (uri.back() != '/') @@ -55,7 +56,7 @@ void registerDiskHDFS(DiskFactory & factory, bool global_skip_access_check) copy_thread_pool_size); disk->startup(context, skip_access_check); - return std::make_shared(disk); + return disk; }; factory.registerDiskType("hdfs", creator); diff --git a/src/Disks/ObjectStorages/IMetadataStorage.h b/src/Disks/ObjectStorages/IMetadataStorage.h index 597d7744c78..00150df9fa3 100644 --- a/src/Disks/ObjectStorages/IMetadataStorage.h +++ b/src/Disks/ObjectStorages/IMetadataStorage.h @@ -44,6 +44,12 @@ public: throwNotImplemented(); } + /// Writes the data inline with the metadata + virtual void writeInlineDataToFile(const std::string & /* path */, const std::string & /* data */) + { + throwNotImplemented(); + } + virtual void setLastModified(const std::string & /* path */, const Poco::Timestamp & /* timestamp */) { throwNotImplemented(); @@ -143,7 +149,7 @@ using MetadataTransactionPtr = std::shared_ptr; class IMetadataStorage : private boost::noncopyable { public: - virtual MetadataTransactionPtr createTransaction() const = 0; + virtual MetadataTransactionPtr createTransaction() = 0; /// Get metadata root path. virtual const std::string & getPath() const = 0; @@ -185,6 +191,12 @@ public: throwNotImplemented(); } + /// Read inline data for file to string from path + virtual std::string readInlineDataToString(const std::string & /* path */) const + { + throwNotImplemented(); + } + virtual ~IMetadataStorage() = default; /// ==== More specific methods. Previous were almost general purpose. ==== diff --git a/src/Disks/ObjectStorages/MetadataStorageFromDisk.cpp b/src/Disks/ObjectStorages/MetadataStorageFromDisk.cpp index 0ad46a1327d..625350eeeff 100644 --- a/src/Disks/ObjectStorages/MetadataStorageFromDisk.cpp +++ b/src/Disks/ObjectStorages/MetadataStorageFromDisk.cpp @@ -78,6 +78,11 @@ std::string MetadataStorageFromDisk::readFileToString(const std::string & path) return result; } +std::string MetadataStorageFromDisk::readInlineDataToString(const std::string & path) const +{ + return readMetadata(path)->getInlineData(); +} + DiskObjectStorageMetadataPtr MetadataStorageFromDisk::readMetadataUnlocked(const std::string & path, std::shared_lock &) const { auto metadata = std::make_unique(disk->getPath(), object_storage_root_path, path); @@ -122,7 +127,7 @@ void MetadataStorageFromDiskTransaction::createHardLink(const std::string & path addOperation(std::make_unique(path_from, path_to, *metadata_storage.disk, metadata_storage)); } -MetadataTransactionPtr MetadataStorageFromDisk::createTransaction() const +MetadataTransactionPtr MetadataStorageFromDisk::createTransaction() { return std::make_shared(*this); } @@ -244,6 +249,16 @@ void MetadataStorageFromDiskTransaction::writeStringToFile( addOperation(std::make_unique(path, *metadata_storage.getDisk(), data)); } +void MetadataStorageFromDiskTransaction::writeInlineDataToFile( + const std::string & path, + const std::string & data) +{ + auto metadata = std::make_unique( + metadata_storage.getDisk()->getPath(), metadata_storage.getObjectStorageRootPath(), path); + metadata->setInlineData(data); + writeStringToFile(path, metadata->serializeToString()); +} + void MetadataStorageFromDiskTransaction::setLastModified(const std::string & path, const Poco::Timestamp & timestamp) { addOperation(std::make_unique(path, timestamp, *metadata_storage.getDisk())); diff --git a/src/Disks/ObjectStorages/MetadataStorageFromDisk.h b/src/Disks/ObjectStorages/MetadataStorageFromDisk.h index b06a2a41f2b..2c80572e7b4 100644 --- a/src/Disks/ObjectStorages/MetadataStorageFromDisk.h +++ b/src/Disks/ObjectStorages/MetadataStorageFromDisk.h @@ -25,7 +25,7 @@ private: public: MetadataStorageFromDisk(DiskPtr disk_, const std::string & object_storage_root_path_); - MetadataTransactionPtr createTransaction() const override; + MetadataTransactionPtr createTransaction() override; const std::string & getPath() const override; @@ -53,6 +53,8 @@ public: std::string readFileToString(const std::string & path) const override; + std::string readInlineDataToString(const std::string & path) const override; + std::unordered_map getSerializedMetadata(const std::vector & file_paths) const override; uint32_t getHardlinkCount(const std::string & path) const override; @@ -94,6 +96,8 @@ public: void writeStringToFile(const std::string & path, const std::string & data) override; + void writeInlineDataToFile(const std::string & path, const std::string & data) override; + void createEmptyMetadataFile(const std::string & path) override; void createMetadataFile(const std::string & path, const std::string & blob_name, uint64_t size_in_bytes) override; @@ -127,6 +131,8 @@ public: void replaceFile(const std::string & path_from, const std::string & path_to) override; void unlinkMetadata(const std::string & path) override; + + }; diff --git a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.cpp b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.cpp index 34a9ae021b7..62c6d57b16f 100644 --- a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.cpp +++ b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.cpp @@ -23,7 +23,7 @@ MetadataStorageFromPlainObjectStorage::MetadataStorageFromPlainObjectStorage( { } -MetadataTransactionPtr MetadataStorageFromPlainObjectStorage::createTransaction() const +MetadataTransactionPtr MetadataStorageFromPlainObjectStorage::createTransaction() { return std::make_shared(*this); } diff --git a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.h b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.h index 99cc960b9e4..0beed65879b 100644 --- a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.h +++ b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.h @@ -33,7 +33,7 @@ public: ObjectStoragePtr object_storage_, const std::string & object_storage_root_path_); - MetadataTransactionPtr createTransaction() const override; + MetadataTransactionPtr createTransaction() override; const std::string & getPath() const override; diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp index d655fd37458..3c620ca819e 100644 --- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp +++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp @@ -1,5 +1,7 @@ #include #include +#include + #if USE_AWS_S3 @@ -31,6 +33,7 @@ #include #include #include +#include namespace ProfileEvents @@ -634,10 +637,11 @@ std::unique_ptr S3ObjectStorage::cloneObjectStorage( { auto new_s3_settings = getSettings(config, config_prefix, context); auto new_client = getClient(config, config_prefix, context, *new_s3_settings); + String endpoint = context->getMacros()->expand(config.getString(config_prefix + ".endpoint")); return std::make_unique( std::move(new_client), std::move(new_s3_settings), version_id, s3_capabilities, new_namespace, - config.getString(config_prefix + ".endpoint")); + endpoint); } } diff --git a/src/Disks/ObjectStorages/S3/diskSettings.cpp b/src/Disks/ObjectStorages/S3/diskSettings.cpp index ba3828c312c..d5c6b03082f 100644 --- a/src/Disks/ObjectStorages/S3/diskSettings.cpp +++ b/src/Disks/ObjectStorages/S3/diskSettings.cpp @@ -20,8 +20,8 @@ #include #include #include -#include #include +#include namespace DB { @@ -122,7 +122,8 @@ std::unique_ptr getClient( settings.request_settings.get_request_throttler, settings.request_settings.put_request_throttler); - S3::URI uri(config.getString(config_prefix + ".endpoint")); + String endpoint = context->getMacros()->expand(config.getString(config_prefix + ".endpoint")); + S3::URI uri(endpoint); if (uri.key.back() != '/') throw Exception("S3 path must ends with '/', but '" + uri.key + "' doesn't.", ErrorCodes::BAD_ARGUMENTS); diff --git a/src/Disks/ObjectStorages/S3/registerDiskS3.cpp b/src/Disks/ObjectStorages/S3/registerDiskS3.cpp index 533a925aa1b..1c192a0d89c 100644 --- a/src/Disks/ObjectStorages/S3/registerDiskS3.cpp +++ b/src/Disks/ObjectStorages/S3/registerDiskS3.cpp @@ -11,8 +11,8 @@ #include #include -#include #include +#include #include #include #include @@ -23,6 +23,7 @@ #include #include +#include namespace DB @@ -104,7 +105,8 @@ void registerDiskS3(DiskFactory & factory, bool global_skip_access_check) ContextPtr context, const DisksMap & /*map*/) -> DiskPtr { - S3::URI uri(config.getString(config_prefix + ".endpoint")); + String endpoint = context->getMacros()->expand(config.getString(config_prefix + ".endpoint")); + S3::URI uri(endpoint); if (uri.key.empty()) throw Exception(ErrorCodes::BAD_ARGUMENTS, "No key in S3 uri: {}", uri.uri.toString()); @@ -165,9 +167,7 @@ void registerDiskS3(DiskFactory & factory, bool global_skip_access_check) s3disk->startup(context, skip_access_check); - std::shared_ptr disk_result = s3disk; - - return std::make_shared(disk_result); + return s3disk; }; factory.registerDiskType("s3", creator); factory.registerDiskType("s3_plain", creator); diff --git a/src/Disks/ObjectStorages/Web/MetadataStorageFromStaticFilesWebServer.cpp b/src/Disks/ObjectStorages/Web/MetadataStorageFromStaticFilesWebServer.cpp index ab5d86fd836..d39582a089e 100644 --- a/src/Disks/ObjectStorages/Web/MetadataStorageFromStaticFilesWebServer.cpp +++ b/src/Disks/ObjectStorages/Web/MetadataStorageFromStaticFilesWebServer.cpp @@ -21,7 +21,7 @@ MetadataStorageFromStaticFilesWebServer::MetadataStorageFromStaticFilesWebServer { } -MetadataTransactionPtr MetadataStorageFromStaticFilesWebServer::createTransaction() const +MetadataTransactionPtr MetadataStorageFromStaticFilesWebServer::createTransaction() { return std::make_shared(*this); } diff --git a/src/Disks/ObjectStorages/Web/MetadataStorageFromStaticFilesWebServer.h b/src/Disks/ObjectStorages/Web/MetadataStorageFromStaticFilesWebServer.h index 6a7c8128b4a..a04a1359d34 100644 --- a/src/Disks/ObjectStorages/Web/MetadataStorageFromStaticFilesWebServer.h +++ b/src/Disks/ObjectStorages/Web/MetadataStorageFromStaticFilesWebServer.h @@ -24,7 +24,7 @@ private: public: explicit MetadataStorageFromStaticFilesWebServer(const WebObjectStorage & object_storage_); - MetadataTransactionPtr createTransaction() const override; + MetadataTransactionPtr createTransaction() override; const std::string & getPath() const override; diff --git a/src/Disks/ObjectStorages/Web/registerDiskWebServer.cpp b/src/Disks/ObjectStorages/Web/registerDiskWebServer.cpp index 253d32ceb14..8a54de81815 100644 --- a/src/Disks/ObjectStorages/Web/registerDiskWebServer.cpp +++ b/src/Disks/ObjectStorages/Web/registerDiskWebServer.cpp @@ -5,6 +5,9 @@ #include #include #include +#include +#include + namespace DB { @@ -23,7 +26,7 @@ void registerDiskWebServer(DiskFactory & factory, bool global_skip_access_check) ContextPtr context, const DisksMap & /*map*/) -> DiskPtr { - String uri{config.getString(config_prefix + ".endpoint")}; + String uri = context->getMacros()->expand(config.getString(config_prefix + ".endpoint")); bool skip_access_check = global_skip_access_check || config.getBool(config_prefix + ".skip_access_check", false); if (!uri.ends_with('/')) diff --git a/src/Disks/TemporaryFileOnDisk.cpp b/src/Disks/TemporaryFileOnDisk.cpp index 4f348519037..b0113d6f9c9 100644 --- a/src/Disks/TemporaryFileOnDisk.cpp +++ b/src/Disks/TemporaryFileOnDisk.cpp @@ -15,7 +15,6 @@ namespace CurrentMetrics extern const Metric TotalTemporaryFiles; } - namespace DB { diff --git a/src/Formats/CapnProtoUtils.cpp b/src/Formats/CapnProtoUtils.cpp index a7ff065aca5..fb5e7c06542 100644 --- a/src/Formats/CapnProtoUtils.cpp +++ b/src/Formats/CapnProtoUtils.cpp @@ -32,6 +32,16 @@ namespace ErrorCodes extern const int BAD_ARGUMENTS; } +std::pair splitCapnProtoFieldName(const String & name) +{ + const auto * begin = name.data(); + const auto * end = name.data() + name.size(); + const auto * it = find_first_symbols<'_', '.'>(begin, end); + String first = String(begin, it); + String second = it == end ? "" : String(it + 1, end); + return {first, second}; +} + capnp::StructSchema CapnProtoSchemaParser::getMessageSchema(const FormatSchemaInfo & schema_info) { capnp::ParsedSchema schema; @@ -201,9 +211,9 @@ static bool checkEnums(const capnp::Type & capnp_type, const DataTypePtr column_ return result; } -static bool checkCapnProtoType(const capnp::Type & capnp_type, const DataTypePtr & data_type, FormatSettings::EnumComparingMode mode, String & error_message); +static bool checkCapnProtoType(const capnp::Type & capnp_type, const DataTypePtr & data_type, FormatSettings::EnumComparingMode mode, String & error_message, const String & column_name); -static bool checkNullableType(const capnp::Type & capnp_type, const DataTypePtr & data_type, FormatSettings::EnumComparingMode mode, String & error_message) +static bool checkNullableType(const capnp::Type & capnp_type, const DataTypePtr & data_type, FormatSettings::EnumComparingMode mode, String & error_message, const String & column_name) { if (!capnp_type.isStruct()) return false; @@ -222,9 +232,9 @@ static bool checkNullableType(const capnp::Type & capnp_type, const DataTypePtr auto nested_type = assert_cast(data_type.get())->getNestedType(); if (first.getType().isVoid()) - return checkCapnProtoType(second.getType(), nested_type, mode, error_message); + return checkCapnProtoType(second.getType(), nested_type, mode, error_message, column_name); if (second.getType().isVoid()) - return checkCapnProtoType(first.getType(), nested_type, mode, error_message); + return checkCapnProtoType(first.getType(), nested_type, mode, error_message, column_name); return false; } @@ -260,7 +270,7 @@ static bool checkTupleType(const capnp::Type & capnp_type, const DataTypePtr & d { KJ_IF_MAYBE(field, struct_schema.findFieldByName(name)) { - if (!checkCapnProtoType(field->getType(), nested_types[tuple_data_type->getPositionByName(name)], mode, error_message)) + if (!checkCapnProtoType(field->getType(), nested_types[tuple_data_type->getPositionByName(name)], mode, error_message, name)) return false; } else @@ -273,16 +283,28 @@ static bool checkTupleType(const capnp::Type & capnp_type, const DataTypePtr & d return true; } -static bool checkArrayType(const capnp::Type & capnp_type, const DataTypePtr & data_type, FormatSettings::EnumComparingMode mode, String & error_message) +static bool checkArrayType(const capnp::Type & capnp_type, const DataTypePtr & data_type, FormatSettings::EnumComparingMode mode, String & error_message, const String & column_name) { if (!capnp_type.isList()) return false; auto list_schema = capnp_type.asList(); auto nested_type = assert_cast(data_type.get())->getNestedType(); - return checkCapnProtoType(list_schema.getElementType(), nested_type, mode, error_message); + + auto [field_name, nested_name] = splitCapnProtoFieldName(column_name); + if (!nested_name.empty() && list_schema.getElementType().isStruct()) + { + auto struct_schema = list_schema.getElementType().asStruct(); + KJ_IF_MAYBE(field, struct_schema.findFieldByName(nested_name)) + return checkCapnProtoType(field->getType(), nested_type, mode, error_message, nested_name); + + error_message += "Element type of List {} doesn't contain field with name " + nested_name; + return false; + } + + return checkCapnProtoType(list_schema.getElementType(), nested_type, mode, error_message, column_name); } -static bool checkCapnProtoType(const capnp::Type & capnp_type, const DataTypePtr & data_type, FormatSettings::EnumComparingMode mode, String & error_message) +static bool checkCapnProtoType(const capnp::Type & capnp_type, const DataTypePtr & data_type, FormatSettings::EnumComparingMode mode, String & error_message, const String & column_name) { switch (data_type->getTypeId()) { @@ -301,9 +323,11 @@ static bool checkCapnProtoType(const capnp::Type & capnp_type, const DataTypePtr case TypeIndex::Int16: return capnp_type.isInt16(); case TypeIndex::Date32: [[fallthrough]]; + case TypeIndex::Decimal32: [[fallthrough]]; case TypeIndex::Int32: return capnp_type.isInt32(); case TypeIndex::DateTime64: [[fallthrough]]; + case TypeIndex::Decimal64: [[fallthrough]]; case TypeIndex::Int64: return capnp_type.isInt64(); case TypeIndex::Float32: @@ -318,15 +342,15 @@ static bool checkCapnProtoType(const capnp::Type & capnp_type, const DataTypePtr return checkTupleType(capnp_type, data_type, mode, error_message); case TypeIndex::Nullable: { - auto result = checkNullableType(capnp_type, data_type, mode, error_message); + auto result = checkNullableType(capnp_type, data_type, mode, error_message, column_name); if (!result) error_message += "Nullable can be represented only as a named union of type Void and nested type"; return result; } case TypeIndex::Array: - return checkArrayType(capnp_type, data_type, mode, error_message); + return checkArrayType(capnp_type, data_type, mode, error_message, column_name); case TypeIndex::LowCardinality: - return checkCapnProtoType(capnp_type, assert_cast(data_type.get())->getDictionaryType(), mode, error_message); + return checkCapnProtoType(capnp_type, assert_cast(data_type.get())->getDictionaryType(), mode, error_message, column_name); case TypeIndex::FixedString: [[fallthrough]]; case TypeIndex::String: return capnp_type.isText() || capnp_type.isData(); @@ -335,19 +359,9 @@ static bool checkCapnProtoType(const capnp::Type & capnp_type, const DataTypePtr } } -static std::pair splitFieldName(const String & name) -{ - const auto * begin = name.data(); - const auto * end = name.data() + name.size(); - const auto * it = find_first_symbols<'_', '.'>(begin, end); - String first = String(begin, it); - String second = it == end ? "" : String(it + 1, end); - return {first, second}; -} - capnp::DynamicValue::Reader getReaderByColumnName(const capnp::DynamicStruct::Reader & struct_reader, const String & name) { - auto [field_name, nested_name] = splitFieldName(name); + auto [field_name, nested_name] = splitCapnProtoFieldName(name); KJ_IF_MAYBE(field, struct_reader.getSchema().findFieldByName(field_name)) { capnp::DynamicValue::Reader field_reader; @@ -363,6 +377,20 @@ capnp::DynamicValue::Reader getReaderByColumnName(const capnp::DynamicStruct::Re if (nested_name.empty()) return field_reader; + /// Support reading Nested as List of Structs. + if (field_reader.getType() == capnp::DynamicValue::LIST) + { + auto list_schema = field->getType().asList(); + if (!list_schema.getElementType().isStruct()) + throw Exception(ErrorCodes::CAPN_PROTO_BAD_CAST, "Element type of List {} is not a struct", field_name); + + auto struct_schema = list_schema.getElementType().asStruct(); + KJ_IF_MAYBE(nested_field, struct_schema.findFieldByName(nested_name)) + return field_reader; + + throw Exception(ErrorCodes::THERE_IS_NO_COLUMN, "Element type of List {} doesn't contain field with name \"{}\"", field_name, nested_name); + } + if (field_reader.getType() != capnp::DynamicValue::STRUCT) throw Exception(ErrorCodes::CAPN_PROTO_BAD_CAST, "Field {} is not a struct", field_name); @@ -374,13 +402,28 @@ capnp::DynamicValue::Reader getReaderByColumnName(const capnp::DynamicStruct::Re std::pair getStructBuilderAndFieldByColumnName(capnp::DynamicStruct::Builder struct_builder, const String & name) { - auto [field_name, nested_name] = splitFieldName(name); + auto [field_name, nested_name] = splitCapnProtoFieldName(name); KJ_IF_MAYBE(field, struct_builder.getSchema().findFieldByName(field_name)) { if (nested_name.empty()) return {struct_builder, *field}; auto field_builder = struct_builder.get(*field); + + /// Support reading Nested as List of Structs. + if (field_builder.getType() == capnp::DynamicValue::LIST) + { + auto list_schema = field->getType().asList(); + if (!list_schema.getElementType().isStruct()) + throw Exception(ErrorCodes::CAPN_PROTO_BAD_CAST, "Element type of List {} is not a struct", field_name); + + auto struct_schema = list_schema.getElementType().asStruct(); + KJ_IF_MAYBE(nested_field, struct_schema.findFieldByName(nested_name)) + return {struct_builder, *field}; + + throw Exception(ErrorCodes::THERE_IS_NO_COLUMN, "Element type of List {} doesn't contain field with name \"{}\"", field_name, nested_name); + } + if (field_builder.getType() != capnp::DynamicValue::STRUCT) throw Exception(ErrorCodes::CAPN_PROTO_BAD_CAST, "Field {} is not a struct", field_name); @@ -390,13 +433,27 @@ std::pair getStructBu throw Exception(ErrorCodes::THERE_IS_NO_COLUMN, "Capnproto struct doesn't contain field with name {}", field_name); } -static capnp::StructSchema::Field getFieldByName(const capnp::StructSchema & schema, const String & name) +static std::pair getFieldByName(const capnp::StructSchema & schema, const String & name) { - auto [field_name, nested_name] = splitFieldName(name); + auto [field_name, nested_name] = splitCapnProtoFieldName(name); KJ_IF_MAYBE(field, schema.findFieldByName(field_name)) { if (nested_name.empty()) - return *field; + return {*field, name}; + + /// Support reading Nested as List of Structs. + if (field->getType().isList()) + { + auto list_schema = field->getType().asList(); + if (!list_schema.getElementType().isStruct()) + throw Exception(ErrorCodes::CAPN_PROTO_BAD_CAST, "Element type of List {} is not a struct", field_name); + + auto struct_schema = list_schema.getElementType().asStruct(); + KJ_IF_MAYBE(nested_field, struct_schema.findFieldByName(nested_name)) + return {*field, name}; + + throw Exception(ErrorCodes::THERE_IS_NO_COLUMN, "Element type of List {} doesn't contain field with name \"{}\"", field_name, nested_name); + } if (!field->getType().isStruct()) throw Exception(ErrorCodes::CAPN_PROTO_BAD_CAST, "Field {} is not a struct", field_name); @@ -416,8 +473,8 @@ void checkCapnProtoSchemaStructure(const capnp::StructSchema & schema, const Blo String additional_error_message; for (auto & [name, type] : names_and_types) { - auto field = getFieldByName(schema, name); - if (!checkCapnProtoType(field.getType(), type, mode, additional_error_message)) + auto [field, field_name] = getFieldByName(schema, name); + if (!checkCapnProtoType(field.getType(), type, mode, additional_error_message, field_name)) { auto e = Exception( ErrorCodes::CAPN_PROTO_BAD_CAST, diff --git a/src/Formats/CapnProtoUtils.h b/src/Formats/CapnProtoUtils.h index 102c3a2e306..2d8cdb418d7 100644 --- a/src/Formats/CapnProtoUtils.h +++ b/src/Formats/CapnProtoUtils.h @@ -30,6 +30,8 @@ public: capnp::StructSchema getMessageSchema(const FormatSchemaInfo & schema_info); }; +std::pair splitCapnProtoFieldName(const String & name); + bool compareEnumNames(const String & first, const String & second, FormatSettings::EnumComparingMode mode); std::pair getStructBuilderAndFieldByColumnName(capnp::DynamicStruct::Builder struct_builder, const String & name); diff --git a/src/Formats/EscapingRuleUtils.cpp b/src/Formats/EscapingRuleUtils.cpp index df9c711b432..f59ccb996e7 100644 --- a/src/Formats/EscapingRuleUtils.cpp +++ b/src/Formats/EscapingRuleUtils.cpp @@ -318,6 +318,12 @@ DataTypePtr tryInferDataTypeByEscapingRule(const String & field, const FormatSet if (auto date_type = tryInferDateOrDateTimeFromString(field, format_settings)) return date_type; + /// Special case when we have number that starts with 0. In TSV we don't parse such numbers, + /// see readIntTextUnsafe in ReadHelpers.h. If we see data started with 0, we can determine it + /// as a String, so parsing won't fail. + if (field[0] == '0' && field.size() != 1) + return std::make_shared(); + auto type = tryInferDataTypeForSingleField(field, format_settings); if (!type) return std::make_shared(); @@ -421,10 +427,12 @@ String getAdditionalFormatInfoByEscapingRule(const FormatSettings & settings, Fo break; case FormatSettings::EscapingRule::JSON: result += fmt::format( - ", try_infer_numbers_from_strings={}, read_bools_as_numbers={}, try_infer_objects={}", + ", try_infer_numbers_from_strings={}, read_bools_as_numbers={}, read_objects_as_strings={}, read_numbers_as_strings={}, try_infer_objects={}", settings.json.try_infer_numbers_from_strings, settings.json.read_bools_as_numbers, - settings.json.try_infer_objects); + settings.json.read_objects_as_strings, + settings.json.read_numbers_as_strings, + settings.json.allow_object_type); break; default: break; diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp index 91f2b0958f6..6b11c1ccbcf 100644 --- a/src/Formats/FormatFactory.cpp +++ b/src/Formats/FormatFactory.cpp @@ -103,7 +103,7 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings) format_settings.json.validate_types_from_metadata = settings.input_format_json_validate_types_from_metadata; format_settings.json.validate_utf8 = settings.output_format_json_validate_utf8; format_settings.json_object_each_row.column_for_object_name = settings.format_json_object_each_row_column_for_object_name; - format_settings.json.try_infer_objects = context->getSettingsRef().allow_experimental_object_type; + format_settings.json.allow_object_type = context->getSettingsRef().allow_experimental_object_type; format_settings.null_as_default = settings.input_format_null_as_default; format_settings.decimal_trailing_zeros = settings.output_format_decimal_trailing_zeros; format_settings.parquet.row_group_size = settings.output_format_parquet_row_group_size; @@ -317,6 +317,9 @@ static void addExistingProgressToOutputFormat(OutputFormatPtr format, ContextPtr auto current_progress = element_id->getProgressIn(); Progress read_progress{current_progress.read_rows, current_progress.read_bytes, current_progress.total_rows_to_read}; format->onProgress(read_progress); + + /// Update the start of the statistics to use the start of the query, and not the creation of the format class + format->setStartTime(element_id->getQueryCPUStartTime(), true); } } @@ -325,7 +328,6 @@ OutputFormatPtr FormatFactory::getOutputFormatParallelIfPossible( WriteBuffer & buf, const Block & sample, ContextPtr context, - WriteCallback callback, const std::optional & _format_settings) const { const auto & output_getter = getCreators(name).output_creator; @@ -339,9 +341,9 @@ OutputFormatPtr FormatFactory::getOutputFormatParallelIfPossible( if (settings.output_format_parallel_formatting && getCreators(name).supports_parallel_formatting && !settings.output_format_json_array_of_rows) { - auto formatter_creator = [output_getter, sample, callback, format_settings] (WriteBuffer & output) -> OutputFormatPtr + auto formatter_creator = [output_getter, sample, format_settings] (WriteBuffer & output) -> OutputFormatPtr { - return output_getter(output, sample, {callback}, format_settings); + return output_getter(output, sample, format_settings); }; ParallelFormattingOutputFormat::Params builder{buf, sample, formatter_creator, settings.max_threads}; @@ -354,7 +356,7 @@ OutputFormatPtr FormatFactory::getOutputFormatParallelIfPossible( return format; } - return getOutputFormat(name, buf, sample, context, callback, _format_settings); + return getOutputFormat(name, buf, sample, context, _format_settings); } @@ -363,7 +365,6 @@ OutputFormatPtr FormatFactory::getOutputFormat( WriteBuffer & buf, const Block & sample, ContextPtr context, - WriteCallback callback, const std::optional & _format_settings) const { const auto & output_getter = getCreators(name).output_creator; @@ -373,15 +374,12 @@ OutputFormatPtr FormatFactory::getOutputFormat( if (context->hasQueryContext() && context->getSettingsRef().log_queries) context->getQueryContext()->addQueryFactoriesInfo(Context::QueryLogFactories::Format, name); - RowOutputFormatParams params; - params.callback = std::move(callback); - auto format_settings = _format_settings ? *_format_settings : getFormatSettings(context); /** TODO: Materialization is needed, because formats can use the functions `IDataType`, * which only work with full columns. */ - auto format = output_getter(buf, sample, params, format_settings); + auto format = output_getter(buf, sample, format_settings); /// Enable auto-flush for streaming mode. Currently it is needed by INSERT WATCH query. if (format_settings.enable_streaming) @@ -408,9 +406,8 @@ String FormatFactory::getContentType( auto format_settings = _format_settings ? *_format_settings : getFormatSettings(context); Block empty_block; - RowOutputFormatParams empty_params; WriteBufferFromOwnString empty_buffer; - auto format = output_getter(empty_buffer, empty_block, empty_params, format_settings); + auto format = output_getter(empty_buffer, empty_block, format_settings); return format->getContentType(); } diff --git a/src/Formats/FormatFactory.h b/src/Formats/FormatFactory.h index 7af43664a50..398548e4b22 100644 --- a/src/Formats/FormatFactory.h +++ b/src/Formats/FormatFactory.h @@ -30,9 +30,9 @@ using ProcessorPtr = std::shared_ptr; class IInputFormat; class IOutputFormat; +class IRowOutputFormat; struct RowInputFormatParams; -struct RowOutputFormatParams; class ISchemaReader; class IExternalSchemaReader; @@ -41,6 +41,7 @@ using ExternalSchemaReaderPtr = std::shared_ptr; using InputFormatPtr = std::shared_ptr; using OutputFormatPtr = std::shared_ptr; +using RowOutputFormatPtr = std::shared_ptr; template struct Memory; @@ -56,10 +57,6 @@ FormatSettings getFormatSettings(ContextPtr context, const T & settings); class FormatFactory final : private boost::noncopyable { public: - /// This callback allows to perform some additional actions after reading a single row. - /// It's initial purpose was to extract payload for virtual columns from Kafka Consumer ReadBuffer. - using ReadCallback = std::function; - /** Fast reading data from buffer and save result to memory. * Reads at least `min_bytes` and some more until the end of the chunk, depends on the format. * If `max_rows` is non-zero the function also stops after reading the `max_rows` number of rows @@ -72,12 +69,6 @@ public: size_t min_bytes, size_t max_rows)>; - /// This callback allows to perform some additional actions after writing a single row. - /// It's initial purpose was to flush Kafka message for each row. - using WriteCallback = std::function; - private: using InputCreator = std::function; /// Some input formats can have non trivial readPrefix() and readSuffix(), @@ -153,7 +143,6 @@ public: WriteBuffer & buf, const Block & sample, ContextPtr context, - WriteCallback callback = {}, const std::optional & format_settings = std::nullopt) const; OutputFormatPtr getOutputFormat( @@ -161,7 +150,6 @@ public: WriteBuffer & buf, const Block & sample, ContextPtr context, - WriteCallback callback = {}, const std::optional & _format_settings = std::nullopt) const; String getContentType( diff --git a/src/Formats/FormatSettings.h b/src/Formats/FormatSettings.h index 250601c3bf0..dcdd44edfeb 100644 --- a/src/Formats/FormatSettings.h +++ b/src/Formats/FormatSettings.h @@ -161,7 +161,7 @@ struct FormatSettings bool try_infer_numbers_from_strings = false; bool validate_types_from_metadata = true; bool validate_utf8 = false; - bool try_infer_objects = false; + bool allow_object_type = false; } json; struct diff --git a/src/Formats/NativeWriter.cpp b/src/Formats/NativeWriter.cpp index c4dea371afd..e932bb88c2d 100644 --- a/src/Formats/NativeWriter.cpp +++ b/src/Formats/NativeWriter.cpp @@ -64,8 +64,10 @@ static void writeData(const ISerialization & serialization, const ColumnPtr & co } -void NativeWriter::write(const Block & block) +size_t NativeWriter::write(const Block & block) { + size_t written_before = ostr.count(); + /// Additional information about the block. if (client_revision > 0) block.info.write(ostr); @@ -161,6 +163,10 @@ void NativeWriter::write(const Block & block) if (index) index->blocks.emplace_back(std::move(index_block)); + + size_t written_after = ostr.count(); + size_t written_size = written_after - written_before; + return written_size; } } diff --git a/src/Formats/NativeWriter.h b/src/Formats/NativeWriter.h index 010a03ec722..7bb377d2e4a 100644 --- a/src/Formats/NativeWriter.h +++ b/src/Formats/NativeWriter.h @@ -27,7 +27,9 @@ public: IndexForNativeFormat * index_ = nullptr, size_t initial_size_of_file_ = 0); Block getHeader() const { return header; } - void write(const Block & block); + + /// Returns the number of bytes written. + size_t write(const Block & block); void flush(); static String getContentType() { return "application/octet-stream"; } diff --git a/src/Formats/ProtobufReader.cpp b/src/Formats/ProtobufReader.cpp index 0e05b59badf..c92b0b1cc71 100644 --- a/src/Formats/ProtobufReader.cpp +++ b/src/Formats/ProtobufReader.cpp @@ -37,7 +37,7 @@ namespace ProtobufReader::ProtobufReader(ReadBuffer & in_) - : in(in_) + : in(&in_) { } @@ -153,7 +153,7 @@ bool ProtobufReader::readFieldNumber(int & field_number_) { if (current_message_end == END_OF_FILE) { - if (unlikely(in.eof())) + if (unlikely(in->eof())) { current_message_end = cursor; return false; @@ -282,26 +282,26 @@ void ProtobufReader::readStringAndAppend(PaddedPODArray & str) void ProtobufReader::readBinary(void* data, size_t size) { - in.readStrict(reinterpret_cast(data), size); + in->readStrict(reinterpret_cast(data), size); cursor += size; } void ProtobufReader::ignore(UInt64 num_bytes) { - in.ignore(num_bytes); + in->ignore(num_bytes); cursor += num_bytes; } void ProtobufReader::ignoreAll() { - cursor += in.tryIgnore(std::numeric_limits::max()); + cursor += in->tryIgnore(std::numeric_limits::max()); } void ProtobufReader::moveCursorBackward(UInt64 num_bytes) { - if (in.offset() < num_bytes) + if (in->offset() < num_bytes) throwUnknownFormat(); - in.position() -= num_bytes; + in->position() -= num_bytes; cursor -= num_bytes; } @@ -313,7 +313,7 @@ UInt64 ProtobufReader::continueReadingVarint(UInt64 first_byte) # define PROTOBUF_READER_READ_VARINT_BYTE(byteNo) \ do \ { \ - in.readStrict(c); \ + in->readStrict(c); \ ++cursor; \ if constexpr ((byteNo) < 10) \ { \ @@ -352,7 +352,7 @@ void ProtobufReader::ignoreVarint() # define PROTOBUF_READER_IGNORE_VARINT_BYTE(byteNo) \ do \ { \ - in.readStrict(c); \ + in->readStrict(c); \ ++cursor; \ if constexpr ((byteNo) < 10) \ { \ diff --git a/src/Formats/ProtobufReader.h b/src/Formats/ProtobufReader.h index a1a1ce7b2f1..9de45a4a37a 100644 --- a/src/Formats/ProtobufReader.h +++ b/src/Formats/ProtobufReader.h @@ -32,7 +32,9 @@ public: void readString(String & str); void readStringAndAppend(PaddedPODArray & str); - bool eof() const { return in.eof(); } + bool eof() const { return in->eof(); } + + void setReadBuffer(ReadBuffer & in_) { in = &in_; } private: void readBinary(void * data, size_t size); @@ -43,7 +45,7 @@ private: UInt64 ALWAYS_INLINE readVarint() { char c; - in.readStrict(c); + in->readStrict(c); UInt64 first_byte = static_cast(c); ++cursor; if (likely(!(c & 0x80))) @@ -56,7 +58,7 @@ private: void ignoreGroup(); [[noreturn]] void throwUnknownFormat() const; - ReadBuffer & in; + ReadBuffer * in; Int64 cursor = 0; bool root_message_has_length_delimiter = false; size_t current_message_level = 0; diff --git a/src/Formats/ProtobufSerializer.cpp b/src/Formats/ProtobufSerializer.cpp index 2f56c4242e5..97dac4b10fc 100644 --- a/src/Formats/ProtobufSerializer.cpp +++ b/src/Formats/ProtobufSerializer.cpp @@ -1736,7 +1736,7 @@ namespace } const std::shared_ptr aggregate_function_data_type; - const AggregateFunctionPtr aggregate_function; + AggregateFunctionPtr aggregate_function; String text_buffer; }; @@ -2511,6 +2511,11 @@ namespace writer->endMessage(/*with_length_delimiter = */ true); } + void reset() override + { + first_call_of_write_row = true; + } + void readRow(size_t row_num) override { if (first_call_of_read_row) diff --git a/src/Formats/ProtobufSerializer.h b/src/Formats/ProtobufSerializer.h index ebd136c1a82..d50f7e4956e 100644 --- a/src/Formats/ProtobufSerializer.h +++ b/src/Formats/ProtobufSerializer.h @@ -27,6 +27,7 @@ public: virtual void setColumns(const ColumnPtr * columns, size_t num_columns) = 0; virtual void writeRow(size_t row_num) = 0; virtual void finalizeWrite() {} + virtual void reset() {} virtual void setColumns(const MutableColumnPtr * columns, size_t num_columns) = 0; virtual void readRow(size_t row_num) = 0; diff --git a/src/Formats/SchemaInferenceUtils.cpp b/src/Formats/SchemaInferenceUtils.cpp index 29b530966a0..6a5e328bf8e 100644 --- a/src/Formats/SchemaInferenceUtils.cpp +++ b/src/Formats/SchemaInferenceUtils.cpp @@ -250,7 +250,7 @@ namespace { if (isArray(type)) nested_types.push_back(assert_cast(*type).getNestedType()); - else + else if (isTuple(type)) { const auto & elements = assert_cast(*type).getElements(); for (const auto & element : elements) @@ -262,7 +262,10 @@ namespace if (checkIfTypesAreEqual(nested_types)) { for (auto & type : data_types) - type = std::make_shared(nested_types.back()); + { + if (isArray(type) || isTuple(type)) + type = std::make_shared(nested_types.back()); + } } } @@ -363,7 +366,7 @@ namespace transformJSONTuplesAndArraysToArrays(data_types, settings, type_indexes, json_info); /// Convert Maps to Objects if needed. - if (settings.json.try_infer_objects) + if (settings.json.allow_object_type) transformMapsAndObjectsToObjects(data_types, type_indexes); if (settings.json.read_objects_as_strings) @@ -713,7 +716,7 @@ namespace { if constexpr (is_json) { - if (settings.json.try_infer_objects) + if (settings.json.allow_object_type) return std::make_shared("json", true); } /// Empty Map is Map(Nothing, Nothing) @@ -732,10 +735,10 @@ namespace transformInferredTypesIfNeededImpl(value_types, settings, json_info); if (!checkIfTypesAreEqual(value_types)) { - if (settings.json.try_infer_objects) + if (settings.json.allow_object_type) return std::make_shared("json", true); if (settings.json.read_objects_as_strings) - return makeNullable(std::make_shared()); + return std::make_shared(); return nullptr; } @@ -826,14 +829,40 @@ void transformInferredJSONTypesIfNeeded( void transformJSONTupleToArrayIfPossible(DataTypePtr & data_type, const FormatSettings & settings, JSONInferenceInfo * json_info) { - if (!data_type || !isTuple(data_type)) + if (!data_type) return; - const auto * tuple_type = assert_cast(data_type.get()); - auto nested_types = tuple_type->getElements(); - transformInferredTypesIfNeededImpl(nested_types, settings, json_info); - if (checkIfTypesAreEqual(nested_types)) - data_type = std::make_shared(nested_types.back()); + if (const auto * array_type = typeid_cast(data_type.get())) + { + auto nested_type = array_type->getNestedType(); + transformJSONTupleToArrayIfPossible(nested_type, settings, json_info); + data_type = std::make_shared(nested_type); + return; + } + + if (const auto * map_type = typeid_cast(data_type.get())) + { + auto value_type = map_type->getValueType(); + transformJSONTupleToArrayIfPossible(value_type, settings, json_info); + data_type = std::make_shared(map_type->getKeyType(), value_type); + return; + } + + if (const auto * tuple_type = typeid_cast(data_type.get())) + { + auto nested_types = tuple_type->getElements(); + for (auto & nested_type : nested_types) + transformJSONTupleToArrayIfPossible(nested_type, settings, json_info); + + auto nested_types_copy = nested_types; + transformInferredTypesIfNeededImpl(nested_types_copy, settings, json_info); + if (checkIfTypesAreEqual(nested_types_copy)) + data_type = std::make_shared(nested_types_copy.back()); + else + data_type = std::make_shared(nested_types); + + return; + } } DataTypePtr tryInferNumberFromString(std::string_view field, const FormatSettings & settings) diff --git a/src/Functions/CMakeLists.txt b/src/Functions/CMakeLists.txt index 93374f933b7..e9810e918b4 100644 --- a/src/Functions/CMakeLists.txt +++ b/src/Functions/CMakeLists.txt @@ -112,12 +112,7 @@ endif () target_link_libraries(clickhouse_functions_obj PUBLIC ${PUBLIC_LIBS} PRIVATE ${PRIVATE_LIBS}) -if (USE_STATIC_LIBRARIES OR NOT SPLIT_SHARED_LIBRARIES) - # Used to forward the linking information to the final binaries such as clickhouse / unit_tests_dbms, - # since such information are lost after we convert to OBJECT target - add_library(clickhouse_functions INTERFACE) - target_link_libraries(clickhouse_functions INTERFACE ${OBJECT_LIBS} ${PUBLIC_LIBS} ${PRIVATE_LIBS}) -else() - add_library(clickhouse_functions SHARED ${OBJECT_LIBS}) - target_link_libraries(clickhouse_functions PUBLIC ${PUBLIC_LIBS} PRIVATE ${PRIVATE_LIBS}) -endif () +# Used to forward the linking information to the final binaries such as clickhouse / unit_tests_dbms, +# since such information are lost after we convert to OBJECT target +add_library(clickhouse_functions INTERFACE) +target_link_libraries(clickhouse_functions INTERFACE ${OBJECT_LIBS} ${PUBLIC_LIBS} ${PRIVATE_LIBS}) diff --git a/src/Functions/CastOverloadResolver.h b/src/Functions/CastOverloadResolver.h index 6aa3d97ff0a..0bd0bad0d14 100644 --- a/src/Functions/CastOverloadResolver.h +++ b/src/Functions/CastOverloadResolver.h @@ -1,5 +1,6 @@ #pragma once #include +#include namespace DB { @@ -32,10 +33,11 @@ public: ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; } - explicit CastOverloadResolverImpl(std::optional diagnostic_, bool keep_nullable_, bool cast_ipv4_ipv6_default_on_conversion_error_) + explicit CastOverloadResolverImpl(std::optional diagnostic_, bool keep_nullable_, bool cast_ipv4_ipv6_default_on_conversion_error_, const DataTypeValidationSettings & data_type_validation_settings_) : diagnostic(std::move(diagnostic_)) , keep_nullable(keep_nullable_) , cast_ipv4_ipv6_default_on_conversion_error(cast_ipv4_ipv6_default_on_conversion_error_) + , data_type_validation_settings(data_type_validation_settings_) { } @@ -46,13 +48,13 @@ public: if constexpr (internal) return createImpl({}, false /*keep_nullable*/, settings_ref.cast_ipv4_ipv6_default_on_conversion_error); - return createImpl({}, settings_ref.cast_keep_nullable, settings_ref.cast_ipv4_ipv6_default_on_conversion_error); + return createImpl({}, settings_ref.cast_keep_nullable, settings_ref.cast_ipv4_ipv6_default_on_conversion_error, DataTypeValidationSettings(settings_ref)); } - static FunctionOverloadResolverPtr createImpl(std::optional diagnostic = {}, bool keep_nullable = false, bool cast_ipv4_ipv6_default_on_conversion_error = false) + static FunctionOverloadResolverPtr createImpl(std::optional diagnostic = {}, bool keep_nullable = false, bool cast_ipv4_ipv6_default_on_conversion_error = false, const DataTypeValidationSettings & data_type_validation_settings = {}) { assert(!internal || !keep_nullable); - return std::make_unique(std::move(diagnostic), keep_nullable, cast_ipv4_ipv6_default_on_conversion_error); + return std::make_unique(std::move(diagnostic), keep_nullable, cast_ipv4_ipv6_default_on_conversion_error, data_type_validation_settings); } protected: @@ -83,6 +85,7 @@ protected: ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); DataTypePtr type = DataTypeFactory::instance().get(type_col->getValue()); + validateDataType(type, data_type_validation_settings); if constexpr (cast_type == CastType::accurateOrNull) return makeNullable(type); @@ -104,6 +107,7 @@ private: std::optional diagnostic; bool keep_nullable; bool cast_ipv4_ipv6_default_on_conversion_error; + DataTypeValidationSettings data_type_validation_settings; }; diff --git a/src/Functions/DateTimeTransforms.h b/src/Functions/DateTimeTransforms.h index e9a4e357b7e..f4163a336ef 100644 --- a/src/Functions/DateTimeTransforms.h +++ b/src/Functions/DateTimeTransforms.h @@ -1343,6 +1343,30 @@ struct ToYYYYMMDDhhmmssImpl using FactorTransform = ZeroTransform; }; +struct ToDateTimeComponentsImpl +{ + static constexpr auto name = "toDateTimeComponents"; + + static inline DateLUTImpl::DateTimeComponents execute(Int64 t, const DateLUTImpl & time_zone) + { + return time_zone.toDateTimeComponents(t); + } + static inline DateLUTImpl::DateTimeComponents execute(UInt32 t, const DateLUTImpl & time_zone) + { + return time_zone.toDateTimeComponents(static_cast(t)); + } + static inline DateLUTImpl::DateTimeComponents execute(Int32 d, const DateLUTImpl & time_zone) + { + return time_zone.toDateTimeComponents(ExtendedDayNum(d)); + } + static inline DateLUTImpl::DateTimeComponents execute(UInt16 d, const DateLUTImpl & time_zone) + { + return time_zone.toDateTimeComponents(DayNum(d)); + } + + using FactorTransform = ZeroTransform; +}; + template struct Transformer diff --git a/src/Functions/FunctionUnixTimestamp64.h b/src/Functions/FunctionUnixTimestamp64.h index 847ea74a784..85bf50f3127 100644 --- a/src/Functions/FunctionUnixTimestamp64.h +++ b/src/Functions/FunctionUnixTimestamp64.h @@ -84,6 +84,13 @@ public: return res_column; } + + bool hasInformationAboutMonotonicity() const override { return true; } + + Monotonicity getMonotonicityForRange(const IDataType & /*type*/, const Field & /*left*/, const Field & /*right*/) const override + { + return {.is_monotonic = true, .is_always_monotonic = true}; + } }; diff --git a/src/Functions/FunctionsConversion.h b/src/Functions/FunctionsConversion.h index c9638ab95af..8847621661d 100644 --- a/src/Functions/FunctionsConversion.h +++ b/src/Functions/FunctionsConversion.h @@ -1,6 +1,5 @@ #pragma once -#include "Common/Exception.h" #include #include @@ -43,6 +42,7 @@ #include #include #include +#include #include #include #include @@ -2147,14 +2147,18 @@ struct ToNumberMonotonicity if constexpr (std::is_floating_point_v) return { .is_monotonic = true, .is_always_monotonic = true }; - /// If converting from Float, for monotonicity, arguments must fit in range of result type. - bool is_type_float = false; - if (const auto * low_cardinality = typeid_cast(&type)) - is_type_float = WhichDataType(low_cardinality->getDictionaryType()).isFloat(); - else - is_type_float = WhichDataType(type).isFloat(); + const auto * low_cardinality = typeid_cast(&type); + const IDataType * low_cardinality_dictionary_type = nullptr; + if (low_cardinality) + low_cardinality_dictionary_type = low_cardinality->getDictionaryType().get(); - if (is_type_float) + WhichDataType which_type(type); + WhichDataType which_inner_type = low_cardinality + ? WhichDataType(low_cardinality_dictionary_type) + : WhichDataType(type); + + /// If converting from Float, for monotonicity, arguments must fit in range of result type. + if (which_inner_type.isFloat()) { if (left.isNull() || right.isNull()) return {}; @@ -2173,16 +2177,19 @@ struct ToNumberMonotonicity /// Integer cases. + /// Only support types represented by native integers. + /// It can be extended to big integers, decimals and DateTime64 later. + /// By the way, NULLs are representing unbounded ranges. + if (!((left.isNull() || left.getType() == Field::Types::UInt64 || left.getType() == Field::Types::Int64) + && (right.isNull() || right.getType() == Field::Types::UInt64 || right.getType() == Field::Types::Int64))) + return {}; + const bool from_is_unsigned = type.isValueRepresentedByUnsignedInteger(); const bool to_is_unsigned = is_unsigned_v; const size_t size_of_from = type.getSizeOfValueInMemory(); const size_t size_of_to = sizeof(T); - /// Do not support 128 bit integers and decimals for now. - if (size_of_from > sizeof(Int64)) - return {}; - const bool left_in_first_half = left.isNull() ? from_is_unsigned : (left.get() >= 0); diff --git a/src/Functions/FunctionsHashing.cpp b/src/Functions/FunctionsHashing.cpp index 8f616b0be94..c51898b271b 100644 --- a/src/Functions/FunctionsHashing.cpp +++ b/src/Functions/FunctionsHashing.cpp @@ -8,16 +8,6 @@ namespace DB REGISTER_FUNCTION(Hashing) { -#if USE_SSL - factory.registerFunction(); - factory.registerFunction(); - factory.registerFunction(); - factory.registerFunction(); - factory.registerFunction(); - factory.registerFunction(); - factory.registerFunction(); - factory.registerFunction(); -#endif factory.registerFunction(); factory.registerFunction(); factory.registerFunction(); diff --git a/src/Functions/FunctionsHashingSSL.cpp b/src/Functions/FunctionsHashingSSL.cpp new file mode 100644 index 00000000000..01a19a3dd6e --- /dev/null +++ b/src/Functions/FunctionsHashingSSL.cpp @@ -0,0 +1,27 @@ +#include "config.h" + +#if USE_SSL + +#include "FunctionsHashing.h" +#include + +/// SSL functions are located in the separate FunctionsHashingSSL.cpp file +/// to lower the compilation time of FunctionsHashing.cpp + +namespace DB +{ + +REGISTER_FUNCTION(HashingSSL) +{ + factory.registerFunction(); + factory.registerFunction(); + factory.registerFunction(); + factory.registerFunction(); + factory.registerFunction(); + factory.registerFunction(); + factory.registerFunction(); + factory.registerFunction(); +} +} + +#endif diff --git a/src/Functions/FunctionsLogical.h b/src/Functions/FunctionsLogical.h index 30d8983b8cc..833191866e5 100644 --- a/src/Functions/FunctionsLogical.h +++ b/src/Functions/FunctionsLogical.h @@ -176,6 +176,7 @@ public: ColumnPtr executeShortCircuit(ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type) const; bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; } size_t getNumberOfArguments() const override { return 0; } + bool canBeExecutedOnLowCardinalityDictionary() const override { return false; } bool useDefaultImplementationForNulls() const override { return !Impl::specialImplementationForNulls(); } diff --git a/src/Functions/FunctionsMiscellaneous.h b/src/Functions/FunctionsMiscellaneous.h index da90889fd4d..d5c92c5d666 100644 --- a/src/Functions/FunctionsMiscellaneous.h +++ b/src/Functions/FunctionsMiscellaneous.h @@ -205,7 +205,7 @@ public: const String & expression_return_name_) : expression_actions(std::move(expression_actions_)) { - /// Check that expression does not contain unusual actions that will break columnss structure. + /// Check that expression does not contain unusual actions that will break columns structure. for (const auto & action : expression_actions->getActions()) if (action.node->type == ActionsDAG::ActionType::ARRAY_JOIN) throw Exception("Expression with arrayJoin or other unusual action cannot be captured", ErrorCodes::BAD_ARGUMENTS); diff --git a/src/Functions/IFunction.h b/src/Functions/IFunction.h index fc1a353a873..e82b98f0084 100644 --- a/src/Functions/IFunction.h +++ b/src/Functions/IFunction.h @@ -3,6 +3,8 @@ #include #include #include +#include +#include #include #include "config.h" @@ -122,11 +124,11 @@ using Values = std::vector; /** Function with known arguments and return type (when the specific overload was chosen). * It is also the point where all function-specific properties are known. */ -class IFunctionBase +class IFunctionBase : public IResolvedFunction { public: - virtual ~IFunctionBase() = default; + ~IFunctionBase() override = default; virtual ColumnPtr execute( /// NOLINT const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count, bool dry_run = false) const @@ -137,8 +139,10 @@ public: /// Get the main function name. virtual String getName() const = 0; - virtual const DataTypes & getArgumentTypes() const = 0; - virtual const DataTypePtr & getResultType() const = 0; + const Array & getParameters() const final + { + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "IFunctionBase doesn't support getParameters method"); + } /// Do preparations and return executable. /// sample_columns should contain data types of arguments and values of constants, if relevant. @@ -281,7 +285,7 @@ public: }; -using FunctionBasePtr = std::shared_ptr; +using FunctionBasePtr = std::shared_ptr; /** Creates IFunctionBase from argument types list (chooses one function overload). diff --git a/src/Functions/IFunctionAdaptors.h b/src/Functions/IFunctionAdaptors.h index dbcc07af57a..eb2350d9b5e 100644 --- a/src/Functions/IFunctionAdaptors.h +++ b/src/Functions/IFunctionAdaptors.h @@ -51,6 +51,8 @@ public: const DataTypes & getArgumentTypes() const override { return arguments; } const DataTypePtr & getResultType() const override { return result_type; } + const FunctionPtr & getFunction() const { return function; } + #if USE_EMBEDDED_COMPILER bool isCompilable() const override { return function->isCompilable(getArgumentTypes()); } diff --git a/src/Functions/PerformanceAdaptors.h b/src/Functions/PerformanceAdaptors.h index 5b690d83805..9aed3893fff 100644 --- a/src/Functions/PerformanceAdaptors.h +++ b/src/Functions/PerformanceAdaptors.h @@ -210,7 +210,7 @@ public: throw Exception("There are no available implementations for function " "TODO(dakovalkov): add name", ErrorCodes::NO_SUITABLE_FUNCTION_IMPLEMENTATION); - /// Statistics shouldn't rely on small columnss. + /// Statistics shouldn't rely on small columns. bool considerable = (input_rows_count > 1000); ColumnPtr res; diff --git a/src/Functions/TransformDateTime64.h b/src/Functions/TransformDateTime64.h index cb4b3fbb71d..3dab9efeb6b 100644 --- a/src/Functions/TransformDateTime64.h +++ b/src/Functions/TransformDateTime64.h @@ -48,6 +48,10 @@ public: : scale_multiplier(DecimalUtils::scaleMultiplier(scale_)) {} + TransformDateTime64(DateTime64::NativeType scale_multiplier_ = 1) /// NOLINT(google-explicit-constructor) + : scale_multiplier(scale_multiplier_) + {} + template inline auto NO_SANITIZE_UNDEFINED execute(const DateTime64 & t, Args && ... args) const { @@ -127,6 +131,8 @@ public: return wrapped_transform.executeExtendedResult(t, std::forward(args)...); } + DateTime64::NativeType getScaleMultiplier() const { return scale_multiplier; } + private: DateTime64::NativeType scale_multiplier = 1; Transform wrapped_transform = {}; diff --git a/src/Functions/array/arrayReduce.cpp b/src/Functions/array/arrayReduce.cpp index c93e67d4b1c..e7ed8577049 100644 --- a/src/Functions/array/arrayReduce.cpp +++ b/src/Functions/array/arrayReduce.cpp @@ -104,7 +104,7 @@ DataTypePtr FunctionArrayReduce::getReturnTypeImpl(const ColumnsWithTypeAndName aggregate_function = AggregateFunctionFactory::instance().get(aggregate_function_name, argument_types, params_row, properties); } - return aggregate_function->getReturnType(); + return aggregate_function->getResultType(); } diff --git a/src/Functions/array/arrayReduceInRanges.cpp b/src/Functions/array/arrayReduceInRanges.cpp index 11d5e03eb3d..2cceea4ddba 100644 --- a/src/Functions/array/arrayReduceInRanges.cpp +++ b/src/Functions/array/arrayReduceInRanges.cpp @@ -122,7 +122,7 @@ DataTypePtr FunctionArrayReduceInRanges::getReturnTypeImpl(const ColumnsWithType aggregate_function = AggregateFunctionFactory::instance().get(aggregate_function_name, argument_types, params_row, properties); } - return std::make_shared(aggregate_function->getReturnType()); + return std::make_shared(aggregate_function->getResultType()); } diff --git a/src/Functions/array/range.cpp b/src/Functions/array/range.cpp index aacf3103df9..46284ce95bf 100644 --- a/src/Functions/array/range.cpp +++ b/src/Functions/array/range.cpp @@ -126,7 +126,7 @@ private: size_t total_values = 0; size_t pre_values = 0; - std::vector row_length(input_rows_count); + PODArray row_length(input_rows_count); for (size_t row_idx = 0; row_idx < input_rows_count; ++row_idx) { @@ -138,6 +138,8 @@ private: row_length[row_idx] = (static_cast<__int128_t>(end_data[row_idx]) - static_cast<__int128_t>(start) - 1) / static_cast<__int128_t>(step) + 1; else if (start > end_data[row_idx] && step < 0) row_length[row_idx] = (static_cast<__int128_t>(end_data[row_idx]) - static_cast<__int128_t>(start) + 1) / static_cast<__int128_t>(step) + 1; + else + row_length[row_idx] = 0; pre_values += row_length[row_idx]; @@ -161,8 +163,11 @@ private: IColumn::Offset offset{}; for (size_t row_idx = 0; row_idx < input_rows_count; ++row_idx) { - for (size_t idx = 0; idx < row_length[row_idx]; idx++) - out_data[offset++] = static_cast(start + offset * step); + for (size_t idx = 0; idx < row_length[row_idx]; ++idx) + { + out_data[offset] = static_cast(start + offset * step); + ++offset; + } out_offsets[row_idx] = offset; } @@ -183,7 +188,7 @@ private: size_t total_values = 0; size_t pre_values = 0; - std::vector row_length(input_rows_count); + PODArray row_length(input_rows_count); for (size_t row_idx = 0; row_idx < input_rows_count; ++row_idx) { @@ -195,7 +200,8 @@ private: row_length[row_idx] = (static_cast<__int128_t>(end_data[row_idx]) - static_cast<__int128_t>(start_data[row_idx]) - 1) / static_cast<__int128_t>(step) + 1; else if (start_data[row_idx] > end_data[row_idx] && step < 0) row_length[row_idx] = (static_cast<__int128_t>(end_data[row_idx]) - static_cast<__int128_t>(start_data[row_idx]) + 1) / static_cast<__int128_t>(step) + 1; - + else + row_length[row_idx] = 0; pre_values += row_length[row_idx]; @@ -241,7 +247,7 @@ private: size_t total_values = 0; size_t pre_values = 0; - std::vector row_length(input_rows_count); + PODArray row_length(input_rows_count); for (size_t row_idx = 0; row_idx < input_rows_count; ++row_idx) { @@ -253,6 +259,8 @@ private: row_length[row_idx] = (static_cast<__int128_t>(end_data[row_idx]) - static_cast<__int128_t>(start) - 1) / static_cast<__int128_t>(step_data[row_idx]) + 1; else if (start > end_data[row_idx] && step_data[row_idx] < 0) row_length[row_idx] = (static_cast<__int128_t>(end_data[row_idx]) - static_cast<__int128_t>(start) + 1) / static_cast<__int128_t>(step_data[row_idx]) + 1; + else + row_length[row_idx] = 0; pre_values += row_length[row_idx]; @@ -301,7 +309,7 @@ private: size_t total_values = 0; size_t pre_values = 0; - std::vector row_length(input_rows_count); + PODArray row_length(input_rows_count); for (size_t row_idx = 0; row_idx < input_rows_count; ++row_idx) { @@ -312,6 +320,8 @@ private: row_length[row_idx] = (static_cast<__int128_t>(end_start[row_idx]) - static_cast<__int128_t>(start_data[row_idx]) - 1) / static_cast<__int128_t>(step_data[row_idx]) + 1; else if (start_data[row_idx] > end_start[row_idx] && step_data[row_idx] < 0) row_length[row_idx] = (static_cast<__int128_t>(end_start[row_idx]) - static_cast<__int128_t>(start_data[row_idx]) + 1) / static_cast<__int128_t>(step_data[row_idx]) + 1; + else + row_length[row_idx] = 0; pre_values += row_length[row_idx]; diff --git a/src/Functions/bar.cpp b/src/Functions/bar.cpp index 982e1ff3a25..e1f65a61175 100644 --- a/src/Functions/bar.cpp +++ b/src/Functions/bar.cpp @@ -118,7 +118,7 @@ public: size_t next_size = current_offset + UnicodeBar::getWidthInBytes(width) + 1; dst_chars.resize(next_size); - UnicodeBar::render(width, reinterpret_cast(&dst_chars[current_offset])); + UnicodeBar::render(width, reinterpret_cast(&dst_chars[current_offset]), reinterpret_cast(&dst_chars[next_size])); current_offset = next_size; dst_offsets[i] = current_offset; } diff --git a/src/Functions/dateDiff.cpp b/src/Functions/dateDiff.cpp index ec9c9df8e49..60668f81edf 100644 --- a/src/Functions/dateDiff.cpp +++ b/src/Functions/dateDiff.cpp @@ -1,6 +1,7 @@ #include #include #include +#include #include #include #include @@ -34,6 +35,7 @@ namespace ErrorCodes namespace { +template class DateDiffImpl { public: @@ -165,8 +167,92 @@ public: template Int64 calculate(const TransformX & transform_x, const TransformY & transform_y, T1 x, T2 y, const DateLUTImpl & timezone_x, const DateLUTImpl & timezone_y) const { - return static_cast(transform_y.execute(y, timezone_y)) + if constexpr (is_diff) + return static_cast(transform_y.execute(y, timezone_y)) - static_cast(transform_x.execute(x, timezone_x)); + else + { + auto res = static_cast(transform_y.execute(y, timezone_y)) + - static_cast(transform_x.execute(x, timezone_x)); + DateLUTImpl::DateTimeComponents a_comp; + DateLUTImpl::DateTimeComponents b_comp; + Int64 adjust_value; + auto x_seconds = TransformDateTime64>(transform_x.getScaleMultiplier()).execute(x, timezone_x); + auto y_seconds = TransformDateTime64>(transform_y.getScaleMultiplier()).execute(y, timezone_y); + if (x_seconds <= y_seconds) + { + a_comp = TransformDateTime64(transform_x.getScaleMultiplier()).execute(x, timezone_x); + b_comp = TransformDateTime64(transform_y.getScaleMultiplier()).execute(y, timezone_y); + adjust_value = -1; + } + else + { + a_comp = TransformDateTime64(transform_y.getScaleMultiplier()).execute(y, timezone_y); + b_comp = TransformDateTime64(transform_x.getScaleMultiplier()).execute(x, timezone_x); + adjust_value = 1; + } + + if constexpr (std::is_same_v>>) + { + if ((a_comp.date.month > b_comp.date.month) + || ((a_comp.date.month == b_comp.date.month) && ((a_comp.date.day > b_comp.date.day) + || ((a_comp.date.day == b_comp.date.day) && ((a_comp.time.hour > b_comp.time.hour) + || ((a_comp.time.hour == b_comp.time.hour) && ((a_comp.time.minute > b_comp.time.minute) + || ((a_comp.time.minute == b_comp.time.minute) && (a_comp.time.second > b_comp.time.second)))) + ))))) + res += adjust_value; + } + else if constexpr (std::is_same_v>>) + { + auto x_month_in_quarter = (a_comp.date.month - 1) % 3; + auto y_month_in_quarter = (b_comp.date.month - 1) % 3; + if ((x_month_in_quarter > y_month_in_quarter) + || ((x_month_in_quarter == y_month_in_quarter) && ((a_comp.date.day > b_comp.date.day) + || ((a_comp.date.day == b_comp.date.day) && ((a_comp.time.hour > b_comp.time.hour) + || ((a_comp.time.hour == b_comp.time.hour) && ((a_comp.time.minute > b_comp.time.minute) + || ((a_comp.time.minute == b_comp.time.minute) && (a_comp.time.second > b_comp.time.second)))) + ))))) + res += adjust_value; + } + else if constexpr (std::is_same_v>>) + { + if ((a_comp.date.day > b_comp.date.day) + || ((a_comp.date.day == b_comp.date.day) && ((a_comp.time.hour > b_comp.time.hour) + || ((a_comp.time.hour == b_comp.time.hour) && ((a_comp.time.minute > b_comp.time.minute) + || ((a_comp.time.minute == b_comp.time.minute) && (a_comp.time.second > b_comp.time.second)))) + ))) + res += adjust_value; + } + else if constexpr (std::is_same_v>>) + { + auto x_day_of_week = TransformDateTime64(transform_x.getScaleMultiplier()).execute(x, timezone_x); + auto y_day_of_week = TransformDateTime64(transform_y.getScaleMultiplier()).execute(y, timezone_y); + if ((x_day_of_week > y_day_of_week) + || ((x_day_of_week == y_day_of_week) && (a_comp.time.hour > b_comp.time.hour)) + || ((a_comp.time.hour == b_comp.time.hour) && ((a_comp.time.minute > b_comp.time.minute) + || ((a_comp.time.minute == b_comp.time.minute) && (a_comp.time.second > b_comp.time.second))))) + res += adjust_value; + } + else if constexpr (std::is_same_v>>) + { + if ((a_comp.time.hour > b_comp.time.hour) + || ((a_comp.time.hour == b_comp.time.hour) && ((a_comp.time.minute > b_comp.time.minute) + || ((a_comp.time.minute == b_comp.time.minute) && (a_comp.time.second > b_comp.time.second))))) + res += adjust_value; + } + else if constexpr (std::is_same_v>>) + { + if ((a_comp.time.minute > b_comp.time.minute) + || ((a_comp.time.minute == b_comp.time.minute) && (a_comp.time.second > b_comp.time.second))) + res += adjust_value; + } + else if constexpr (std::is_same_v>>) + { + if (a_comp.time.second > b_comp.time.second) + res += adjust_value; + } + return res; + } } template @@ -193,7 +279,8 @@ private: /** dateDiff('unit', t1, t2, [timezone]) - * t1 and t2 can be Date or DateTime + * age('unit', t1, t2, [timezone]) + * t1 and t2 can be Date, Date32, DateTime or DateTime64 * * If timezone is specified, it applied to both arguments. * If not, timezones from datatypes t1 and t2 are used. @@ -201,10 +288,11 @@ private: * * Timezone matters because days can have different length. */ +template class FunctionDateDiff : public IFunction { public: - static constexpr auto name = "dateDiff"; + static constexpr auto name = is_relative ? "dateDiff" : "age"; static FunctionPtr create(ContextPtr) { return std::make_shared(); } String getName() const override @@ -270,21 +358,21 @@ public: const auto & timezone_y = extractTimeZoneFromFunctionArguments(arguments, 3, 2); if (unit == "year" || unit == "yy" || unit == "yyyy") - impl.dispatchForColumns>(x, y, timezone_x, timezone_y, res->getData()); + impl.template dispatchForColumns>(x, y, timezone_x, timezone_y, res->getData()); else if (unit == "quarter" || unit == "qq" || unit == "q") - impl.dispatchForColumns>(x, y, timezone_x, timezone_y, res->getData()); + impl.template dispatchForColumns>(x, y, timezone_x, timezone_y, res->getData()); else if (unit == "month" || unit == "mm" || unit == "m") - impl.dispatchForColumns>(x, y, timezone_x, timezone_y, res->getData()); + impl.template dispatchForColumns>(x, y, timezone_x, timezone_y, res->getData()); else if (unit == "week" || unit == "wk" || unit == "ww") - impl.dispatchForColumns>(x, y, timezone_x, timezone_y, res->getData()); + impl.template dispatchForColumns>(x, y, timezone_x, timezone_y, res->getData()); else if (unit == "day" || unit == "dd" || unit == "d") - impl.dispatchForColumns>(x, y, timezone_x, timezone_y, res->getData()); + impl.template dispatchForColumns>(x, y, timezone_x, timezone_y, res->getData()); else if (unit == "hour" || unit == "hh" || unit == "h") - impl.dispatchForColumns>(x, y, timezone_x, timezone_y, res->getData()); + impl.template dispatchForColumns>(x, y, timezone_x, timezone_y, res->getData()); else if (unit == "minute" || unit == "mi" || unit == "n") - impl.dispatchForColumns>(x, y, timezone_x, timezone_y, res->getData()); + impl.template dispatchForColumns>(x, y, timezone_x, timezone_y, res->getData()); else if (unit == "second" || unit == "ss" || unit == "s") - impl.dispatchForColumns>(x, y, timezone_x, timezone_y, res->getData()); + impl.template dispatchForColumns>(x, y, timezone_x, timezone_y, res->getData()); else throw Exception(ErrorCodes::BAD_ARGUMENTS, "Function {} does not support '{}' unit", getName(), unit); @@ -292,7 +380,7 @@ public: return res; } private: - DateDiffImpl impl{name}; + DateDiffImpl impl{name}; }; @@ -352,14 +440,14 @@ public: return res; } private: - DateDiffImpl impl{name}; + DateDiffImpl impl{name}; }; } REGISTER_FUNCTION(DateDiff) { - factory.registerFunction({}, FunctionFactory::CaseInsensitive); + factory.registerFunction>({}, FunctionFactory::CaseInsensitive); } REGISTER_FUNCTION(TimeDiff) @@ -376,4 +464,9 @@ Example: Documentation::Categories{"Dates and Times"}}, FunctionFactory::CaseInsensitive); } +REGISTER_FUNCTION(Age) +{ + factory.registerFunction>({}, FunctionFactory::CaseInsensitive); +} + } diff --git a/src/Functions/formatDateTime.cpp b/src/Functions/formatDateTime.cpp index 4c24239a06c..e7c9a1b5103 100644 --- a/src/Functions/formatDateTime.cpp +++ b/src/Functions/formatDateTime.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include @@ -20,6 +21,7 @@ #include #include +#include namespace DB @@ -36,6 +38,22 @@ namespace ErrorCodes namespace { +struct FormatDateTimeTraits +{ + enum class SupportInteger + { + Yes, + No + }; + + enum class FormatSyntax + { + MySQL, + Joda + }; +}; + + template struct ActionValueTypeMap {}; template <> struct ActionValueTypeMap { using ActionValueType = UInt32; }; template <> struct ActionValueTypeMap { using ActionValueType = UInt32; }; @@ -50,22 +68,79 @@ template <> struct ActionValueTypeMap { using ActionValueTyp template <> struct ActionValueTypeMap { using ActionValueType = UInt32; }; template <> struct ActionValueTypeMap { using ActionValueType = Int64; }; +/// Counts the number of literal characters in Joda format string until the next closing literal +/// sequence single quote. Returns -1 if no literal single quote was found. +/// In Joda format string(https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html) +/// literal content must be quoted with single quote. and two single quote means literal with one single quote. +/// For example: +/// Format string: "'aaaa'", unescaped literal: "aaaa"; +/// Format string: "'aa''aa'", unescaped literal: "aa'aa"; +/// Format string: "'aaa''aa" is not valid because of missing of end single quote. +Int64 numLiteralChars(const char * cur, const char * end) +{ + bool found = false; + Int64 count = 0; + while (cur < end) + { + if (*cur == '\'') + { + if (cur + 1 < end && *(cur + 1) == '\'') + { + count += 2; + cur += 2; + } + else + { + found = true; + break; + } + } + else + { + ++count; + ++cur; + } + } + return found ? count : -1; +} -/** formatDateTime(time, 'pattern') - * Performs formatting of time, according to provided pattern. +/// Cast value from integer to string, making sure digits number in result string is no less than total_digits by padding leading '0'. +String padValue(UInt32 val, size_t min_digits) +{ + String str = std::to_string(val); + auto length = str.size(); + if (length >= min_digits) + return str; + + String paddings(min_digits - length, '0'); + return str.insert(0, paddings); +} + +constexpr std::string_view weekdaysFull[] = {"Sunday", "Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday"}; + +constexpr std::string_view weekdaysShort[] = {"Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"}; + +constexpr std::string_view monthsFull[] + = {"January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December"}; + +constexpr std::string_view monthsShort[] + = {"Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"}; + +/** formatDateTime(time, 'format') + * Performs formatting of time, according to provided format. * * This function is optimized with an assumption, that the resulting strings are fixed width. * (This assumption is fulfilled for currently supported formatting options). * * It is implemented in two steps. - * At first step, it creates a pattern of zeros, literal characters, whitespaces, etc. - * and quickly fills resulting character array (string column) with this pattern. + * At first step, it creates a template of zeros, literal characters, whitespaces, etc. + * and quickly fills resulting character array (string column) with this format. * At second step, it walks across the resulting character array and modifies/replaces specific characters, * by calling some functions by pointers and shifting cursor by specified amount. * * Advantages: * - memcpy is mostly unrolled; - * - low number of arithmetic ops due to pre-filled pattern; + * - low number of arithmetic ops due to pre-filled template; * - for somewhat reason, function by pointer call is faster than switch/case. * * Possible further optimization options: @@ -89,7 +164,7 @@ template <> struct ActionValueTypeMap { using ActionValueTyp * * PS. We can make this function to return FixedString. Currently it returns String. */ -template +template class FunctionFormatDateTimeImpl : public IFunction { private: @@ -112,189 +187,424 @@ private: class Action { public: - using Func = void (*)(char *, Time, UInt64, UInt32, const DateLUTImpl &); + /// Using std::function will cause performance degradation in MySQL format by 0.45x. + /// But std::function is required for Joda format to capture extra variables. + /// This is the reason why we use raw function pointer in MySQL format and std::function + /// in Joda format. + using Func = std::conditional_t< + format_syntax == FormatDateTimeTraits::FormatSyntax::MySQL, + size_t (*)(char *, Time, UInt64, UInt32, const DateLUTImpl &), + std::function>; Func func; - size_t shift; - explicit Action(Func func_, size_t shift_ = 0) : func(func_), shift(shift_) {} + /// extra_shift is only used in MySQL format syntax. It is always 0 in Joda format syntax. + size_t extra_shift = 0; - void perform(char *& target, Time source, UInt64 fractional_second, UInt32 scale, const DateLUTImpl & timezone) + /// Action for appending date/time related number in specified format. + explicit Action(Func && func_) : func(std::move(func_)) {} + + void perform(char *& dest, Time source, UInt64 fractional_second, UInt32 scale, const DateLUTImpl & timezone) { - func(target, source, fractional_second, scale, timezone); - target += shift; + auto shift = func(dest, source, fractional_second, scale, timezone); + dest += shift + extra_shift; } private: template - static inline void writeNumber2(char * p, T v) + static size_t writeNumber2(char * p, T v) { memcpy(p, &digits100[v * 2], 2); + return 2; } template - static inline void writeNumber3(char * p, T v) + static size_t writeNumber3(char * p, T v) { writeNumber2(p, v / 10); - p[2] += v % 10; + p[2] = '0' + v % 10; + return 3; } template - static inline void writeNumber4(char * p, T v) + static size_t writeNumber4(char * p, T v) { writeNumber2(p, v / 100); writeNumber2(p + 2, v % 100); + return 4; } - public: - static void noop(char *, Time, UInt64 , UInt32 , const DateLUTImpl &) + /// Cast content from integer to string, and append result string to buffer. + /// Make sure digits number in result string is no less than total_digits by padding leading '0' + /// Notice: '-' is not counted as digit. + /// For example: + /// val = -123, total_digits = 2 => dest = "-123" + /// val = -123, total_digits = 3 => dest = "-123" + /// val = -123, total_digits = 4 => dest = "-0123" + static size_t writeNumberWithPadding(char * dest, std::integral auto val, size_t min_digits) { - } + using T = decltype(val); + using WeightType = typename NumberTraits::Construct, /*is_floating*/ false, sizeof(T)>::Type; + WeightType w = 1; + WeightType n = val; + size_t digits = 0; + while (n) + { + w *= 10; + n /= 10; + ++digits; + } - static void century(char * target, Time source, UInt64 /*fractional_second*/, UInt32 /*scale*/, const DateLUTImpl & timezone) + /// Possible sign + size_t pos = 0; + n = val; + if constexpr (is_signed_v) + if (val < 0) + { + n = (~n) + 1; + dest[pos] = '-'; + ++pos; + } + + /// Possible leading paddings + if (min_digits > digits) + { + memset(dest, '0', min_digits - digits); + pos += min_digits - digits; + } + + /// Digits + while (w >= 100) + { + w /= 100; + + writeNumber2(dest + pos, n / w); + pos += 2; + + n = n % w; + } + if (n) + { + dest[pos] = '0' + n; + ++pos; + } + + return pos; + } + public: + static size_t mysqlNoop(char *, Time, UInt64, UInt32, const DateLUTImpl &) { return 0; } + + static size_t mysqlCentury(char * dest, Time source, UInt64, UInt32, const DateLUTImpl & timezone) { auto year = ToYearImpl::execute(source, timezone); auto century = year / 100; - writeNumber2(target, century); + return writeNumber2(dest, century); } - static void dayOfMonth(char * target, Time source, UInt64 /*fractional_second*/, UInt32 /*scale*/, const DateLUTImpl & timezone) + static size_t mysqlDayOfMonth(char * dest, Time source, UInt64, UInt32, const DateLUTImpl & timezone) { - writeNumber2(target, ToDayOfMonthImpl::execute(source, timezone)); + return writeNumber2(dest, ToDayOfMonthImpl::execute(source, timezone)); } - static void americanDate(char * target, Time source, UInt64 /*fractional_second*/, UInt32 /*scale*/, const DateLUTImpl & timezone) + static size_t mysqlAmericanDate(char * dest, Time source, UInt64, UInt32, const DateLUTImpl & timezone) { - writeNumber2(target, ToMonthImpl::execute(source, timezone)); - writeNumber2(target + 3, ToDayOfMonthImpl::execute(source, timezone)); - writeNumber2(target + 6, ToYearImpl::execute(source, timezone) % 100); + writeNumber2(dest, ToMonthImpl::execute(source, timezone)); + writeNumber2(dest + 3, ToDayOfMonthImpl::execute(source, timezone)); + writeNumber2(dest + 6, ToYearImpl::execute(source, timezone) % 100); + return 8; } - static void dayOfMonthSpacePadded(char * target, Time source, UInt64 /*fractional_second*/, UInt32 /*scale*/, const DateLUTImpl & timezone) + static size_t mysqlDayOfMonthSpacePadded(char * dest, Time source, UInt64, UInt32, const DateLUTImpl & timezone) { auto day = ToDayOfMonthImpl::execute(source, timezone); if (day < 10) - target[1] += day; + dest[1] = '0' + day; else - writeNumber2(target, day); + writeNumber2(dest, day); + return 2; } - static void ISO8601Date(char * target, Time source, UInt64 /*fractional_second*/, UInt32 /*scale*/, const DateLUTImpl & timezone) // NOLINT + static size_t mysqlISO8601Date(char * dest, Time source, UInt64, UInt32, const DateLUTImpl & timezone) // NOLINT { - writeNumber4(target, ToYearImpl::execute(source, timezone)); - writeNumber2(target + 5, ToMonthImpl::execute(source, timezone)); - writeNumber2(target + 8, ToDayOfMonthImpl::execute(source, timezone)); + writeNumber4(dest, ToYearImpl::execute(source, timezone)); + writeNumber2(dest + 5, ToMonthImpl::execute(source, timezone)); + writeNumber2(dest + 8, ToDayOfMonthImpl::execute(source, timezone)); + return 10; } - static void dayOfYear(char * target, Time source, UInt64 /*fractional_second*/, UInt32 /*scale*/, const DateLUTImpl & timezone) + static size_t mysqlDayOfYear(char * dest, Time source, UInt64, UInt32, const DateLUTImpl & timezone) { - writeNumber3(target, ToDayOfYearImpl::execute(source, timezone)); + return writeNumber3(dest, ToDayOfYearImpl::execute(source, timezone)); } - static void month(char * target, Time source, UInt64 /*fractional_second*/, UInt32 /*scale*/, const DateLUTImpl & timezone) + static size_t mysqlMonth(char * dest, Time source, UInt64, UInt32, const DateLUTImpl & timezone) { - writeNumber2(target, ToMonthImpl::execute(source, timezone)); + return writeNumber2(dest, ToMonthImpl::execute(source, timezone)); } - static void dayOfWeek(char * target, Time source, UInt64 /*fractional_second*/, UInt32 /*scale*/, const DateLUTImpl & timezone) + static size_t mysqlDayOfWeek(char * dest, Time source, UInt64, UInt32, const DateLUTImpl & timezone) { - *target += ToDayOfWeekImpl::execute(source, timezone); + *dest = '0' + ToDayOfWeekImpl::execute(source, timezone); + return 1; } - static void dayOfWeek0To6(char * target, Time source, UInt64 /*fractional_second*/, UInt32 /*scale*/, const DateLUTImpl & timezone) + static size_t mysqlDayOfWeek0To6(char * dest, Time source, UInt64, UInt32, const DateLUTImpl & timezone) { auto day = ToDayOfWeekImpl::execute(source, timezone); - *target += (day == 7 ? 0 : day); + *dest = '0' + (day == 7 ? 0 : day); + return 1; } - static void ISO8601Week(char * target, Time source, UInt64 /*fractional_second*/, UInt32 /*scale*/, const DateLUTImpl & timezone) // NOLINT + static size_t mysqlISO8601Week(char * dest, Time source, UInt64, UInt32, const DateLUTImpl & timezone) // NOLINT { - writeNumber2(target, ToISOWeekImpl::execute(source, timezone)); + return writeNumber2(dest, ToISOWeekImpl::execute(source, timezone)); } - static void ISO8601Year2(char * target, Time source, UInt64 /*fractional_second*/, UInt32 /*scale*/, const DateLUTImpl & timezone) // NOLINT + static size_t mysqlISO8601Year2(char * dest, Time source, UInt64, UInt32, const DateLUTImpl & timezone) // NOLINT { - writeNumber2(target, ToISOYearImpl::execute(source, timezone) % 100); + return writeNumber2(dest, ToISOYearImpl::execute(source, timezone) % 100); } - static void ISO8601Year4(char * target, Time source, UInt64 /*fractional_second*/, UInt32 /*scale*/, const DateLUTImpl & timezone) // NOLINT + static size_t mysqlISO8601Year4(char * dest, Time source, UInt64, UInt32, const DateLUTImpl & timezone) // NOLINT { - writeNumber4(target, ToISOYearImpl::execute(source, timezone)); + return writeNumber4(dest, ToISOYearImpl::execute(source, timezone)); } - static void year2(char * target, Time source, UInt64 /*fractional_second*/, UInt32 /*scale*/, const DateLUTImpl & timezone) + static size_t mysqlYear2(char * dest, Time source, UInt64, UInt32, const DateLUTImpl & timezone) { - writeNumber2(target, ToYearImpl::execute(source, timezone) % 100); + return writeNumber2(dest, ToYearImpl::execute(source, timezone) % 100); } - static void year4(char * target, Time source, UInt64 /*fractional_second*/, UInt32 /*scale*/, const DateLUTImpl & timezone) + static size_t mysqlYear4(char * dest, Time source, UInt64, UInt32, const DateLUTImpl & timezone) { - writeNumber4(target, ToYearImpl::execute(source, timezone)); + return writeNumber4(dest, ToYearImpl::execute(source, timezone)); } - static void hour24(char * target, Time source, UInt64 /*fractional_second*/, UInt32 /*scale*/, const DateLUTImpl & timezone) + static size_t mysqlHour24(char * dest, Time source, UInt64, UInt32, const DateLUTImpl & timezone) { - writeNumber2(target, ToHourImpl::execute(source, timezone)); + return writeNumber2(dest, ToHourImpl::execute(source, timezone)); } - static void hour12(char * target, Time source, UInt64 /*fractional_second*/, UInt32 /*scale*/, const DateLUTImpl & timezone) + static size_t mysqlHour12(char * dest, Time source, UInt64, UInt32, const DateLUTImpl & timezone) { auto x = ToHourImpl::execute(source, timezone); - writeNumber2(target, x == 0 ? 12 : (x > 12 ? x - 12 : x)); + return writeNumber2(dest, x == 0 ? 12 : (x > 12 ? x - 12 : x)); } - static void minute(char * target, Time source, UInt64 /*fractional_second*/, UInt32 /*scale*/, const DateLUTImpl & timezone) + static size_t mysqlMinute(char * dest, Time source, UInt64, UInt32, const DateLUTImpl & timezone) { - writeNumber2(target, ToMinuteImpl::execute(source, timezone)); + return writeNumber2(dest, ToMinuteImpl::execute(source, timezone)); } - static void AMPM(char * target, Time source, UInt64 /*fractional_second*/, UInt32 /*scale*/, const DateLUTImpl & timezone) // NOLINT + static size_t mysqlAMPM(char * dest, Time source, UInt64, UInt32, const DateLUTImpl & timezone) // NOLINT { auto hour = ToHourImpl::execute(source, timezone); - if (hour >= 12) - *target = 'P'; + dest[0] = hour >= 12 ? 'P' : 'A'; + dest[1] = 'M'; + return 2; } - static void hhmm24(char * target, Time source, UInt64 /*fractional_second*/, UInt32 /*scale*/, const DateLUTImpl & timezone) + static size_t mysqlHHMM24(char * dest, Time source, UInt64, UInt32, const DateLUTImpl & timezone) { - writeNumber2(target, ToHourImpl::execute(source, timezone)); - writeNumber2(target + 3, ToMinuteImpl::execute(source, timezone)); + writeNumber2(dest, ToHourImpl::execute(source, timezone)); + writeNumber2(dest + 3, ToMinuteImpl::execute(source, timezone)); + return 5; } - static void second(char * target, Time source, UInt64 /*fractional_second*/, UInt32 /*scale*/, const DateLUTImpl & timezone) + static size_t mysqlSecond(char * dest, Time source, UInt64, UInt32, const DateLUTImpl & timezone) { - writeNumber2(target, ToSecondImpl::execute(source, timezone)); + return writeNumber2(dest, ToSecondImpl::execute(source, timezone)); } - static void fractionalSecond(char * target, Time /*source*/, UInt64 fractional_second, UInt32 scale, const DateLUTImpl & /*timezone*/) - { - for (Int64 i = scale, value = fractional_second; i > 0; --i, value /= 10) - target[i - 1] += value % 10; - } - - static void ISO8601Time(char * target, Time source, UInt64 /*fractional_second*/, UInt32 /*scale*/, const DateLUTImpl & timezone) // NOLINT + static size_t + mysqlFractionalSecond(char * dest, Time /*source*/, UInt64 fractional_second, UInt32 scale, const DateLUTImpl & /*timezone*/) { - writeNumber2(target, ToHourImpl::execute(source, timezone)); - writeNumber2(target + 3, ToMinuteImpl::execute(source, timezone)); - writeNumber2(target + 6, ToSecondImpl::execute(source, timezone)); + if (scale == 0) + scale = 1; + + for (Int64 i = scale, value = fractional_second; i > 0; --i) + { + dest[i - 1] += value % 10; + value /= 10; + } + return scale; } - static void timezoneOffset(char * target, Time source, UInt64 /*fractional_second*/, UInt32 /*scale*/, const DateLUTImpl & timezone) + static size_t mysqlISO8601Time(char * dest, Time source, UInt64, UInt32, const DateLUTImpl & timezone) // NOLINT + { + writeNumber2(dest, ToHourImpl::execute(source, timezone)); + writeNumber2(dest + 3, ToMinuteImpl::execute(source, timezone)); + writeNumber2(dest + 6, ToSecondImpl::execute(source, timezone)); + return 8; + } + + static size_t mysqlTimezoneOffset(char * dest, Time source, UInt64, UInt32, const DateLUTImpl & timezone) { auto offset = TimezoneOffsetImpl::execute(source, timezone); if (offset < 0) { - *target = '-'; + *dest = '-'; offset = -offset; } - writeNumber2(target + 1, offset / 3600); - writeNumber2(target + 3, offset % 3600 / 60); + writeNumber2(dest + 1, offset / 3600); + writeNumber2(dest + 3, offset % 3600 / 60); + return 5; } - static void quarter(char * target, Time source, UInt64 /*fractional_second*/, UInt32 /*scale*/, const DateLUTImpl & timezone) + static size_t mysqlQuarter(char * dest, Time source, UInt64, UInt32, const DateLUTImpl & timezone) { - *target += ToQuarterImpl::execute(source, timezone); + *dest = '0' + ToQuarterImpl::execute(source, timezone); + return 1; + } + + template + static size_t jodaLiteral(const Literal & literal, char * dest, Time, UInt64, UInt32, const DateLUTImpl &) + { + memcpy(dest, literal.data(), literal.size()); + return literal.size(); + } + + static size_t jodaEra(size_t min_represent_digits, char * dest, Time source, UInt64, UInt32, const DateLUTImpl & timezone) + { + auto year = static_cast(ToYearImpl::execute(source, timezone)); + String res; + if (min_represent_digits <= 3) + res = static_cast(year) > 0 ? "AD" : "BC"; + else + res = static_cast(year) > 0 ? "Anno Domini" : "Before Christ"; + + memcpy(dest, res.data(), res.size()); + return res.size(); + } + + static size_t jodaCentryOfEra(size_t min_represent_digits, char * dest, Time source, UInt64, UInt32, const DateLUTImpl & timezone) + { + auto year = static_cast(ToYearImpl::execute(source, timezone)); + year = (year < 0 ? -year : year); + return writeNumberWithPadding(dest, year / 100, min_represent_digits); + } + + static size_t jodaYearOfEra(size_t min_represent_digits, char * dest, Time source, UInt64, UInt32, const DateLUTImpl & timezone) + { + auto year = static_cast(ToYearImpl::execute(source, timezone)); + if (min_represent_digits == 2) + return writeNumberWithPadding(dest, std::abs(year) % 100, 2); + else + { + year = year <= 0 ? std::abs(year - 1) : year; + return writeNumberWithPadding(dest, year, min_represent_digits); + } + } + + static size_t jodaDayOfWeek1Based(size_t min_represent_digits, char * dest, Time source, UInt64, UInt32, const DateLUTImpl & timezone) + { + auto week_day = ToDayOfWeekImpl::execute(source, timezone); + return writeNumberWithPadding(dest, week_day, min_represent_digits); + } + + static size_t jodaDayOfWeekText(size_t min_represent_digits, char * dest, Time source, UInt64, UInt32, const DateLUTImpl & timezone) + { + auto week_day = ToDayOfWeekImpl::execute(source, timezone); + if (week_day == 7) + week_day = 0; + + std::string_view str_view = min_represent_digits <= 3 ? weekdaysShort[week_day] : weekdaysFull[week_day]; + memcpy(dest, str_view.data(), str_view.size()); + return str_view.size(); + } + + static size_t jodaYear(size_t min_represent_digits, char * dest, Time source, UInt64, UInt32, const DateLUTImpl & timezone) + { + auto year = static_cast(ToYearImpl::execute(source, timezone)); + if (min_represent_digits == 2) + { + year = std::abs(year); + auto two_digit_year = year % 100; + return writeNumberWithPadding(dest, two_digit_year, 2); + } + else + return writeNumberWithPadding(dest, year, min_represent_digits); + } + + static size_t jodaDayOfYear(size_t min_represent_digits, char * dest, Time source, UInt64, UInt32, const DateLUTImpl & timezone) + { + auto day_of_year = ToDayOfYearImpl::execute(source, timezone); + return writeNumberWithPadding(dest, day_of_year, min_represent_digits); + } + + static size_t jodaMonthOfYear(size_t min_represent_digits, char * dest, Time source, UInt64, UInt32, const DateLUTImpl & timezone) + { + auto month_of_year = ToMonthImpl::execute(source, timezone); + return writeNumberWithPadding(dest, month_of_year, min_represent_digits); + } + + static size_t jodaMonthOfYearText(size_t min_represent_digits, char * dest, Time source, UInt64, UInt32, const DateLUTImpl & timezone) + { + auto month = ToMonthImpl::execute(source, timezone); + std::string_view str_view = min_represent_digits <= 3 ? monthsShort[month - 1] : monthsFull[month - 1]; + memcpy(dest, str_view.data(), str_view.size()); + return str_view.size(); + } + + static size_t jodaDayOfMonth(size_t min_represent_digits, char * dest, Time source, UInt64, UInt32, const DateLUTImpl & timezone) + { + auto day_of_month = ToDayOfMonthImpl::execute(source, timezone); + return writeNumberWithPadding(dest, day_of_month, min_represent_digits); + } + + static size_t jodaHalfDayOfDay( + size_t /*min_represent_digits*/, char * dest, Time source, UInt64 fractional_second, UInt32 scale, const DateLUTImpl & timezone) + { + return mysqlAMPM(dest, source, fractional_second, scale, timezone); + } + + static size_t jodaHourOfHalfDay(size_t min_represent_digits, char * dest, Time source, UInt64, UInt32, const DateLUTImpl & timezone) + { + auto hour = ToHourImpl::execute(source, timezone) % 12; + return writeNumberWithPadding(dest, hour, min_represent_digits); + } + + static size_t jodaClockHourOfHalfDay(size_t min_represent_digits, char * dest, Time source, UInt64, UInt32, const DateLUTImpl & timezone) + { + auto hour = ToHourImpl::execute(source, timezone) ; + hour = (hour + 11) % 12 + 1; + return writeNumberWithPadding(dest, hour, min_represent_digits); + } + + static size_t jodaHourOfDay(size_t min_represent_digits, char * dest, Time source, UInt64, UInt32, const DateLUTImpl & timezone) + { + auto hour = ToHourImpl::execute(source, timezone) ; + return writeNumberWithPadding(dest, hour, min_represent_digits); + } + + static size_t jodaClockHourOfDay(size_t min_represent_digits, char * dest, Time source, UInt64, UInt32, const DateLUTImpl & timezone) + { + auto hour = ToHourImpl::execute(source, timezone); + hour = (hour + 23) % 24 + 1; + return writeNumberWithPadding(dest, hour, min_represent_digits); + } + + static size_t jodaMinuteOfHour(size_t min_represent_digits, char * dest, Time source, UInt64, UInt32, const DateLUTImpl & timezone) + { + auto minute_of_hour = ToMinuteImpl::execute(source, timezone); + return writeNumberWithPadding(dest, minute_of_hour, min_represent_digits); + } + + static size_t jodaSecondOfMinute(size_t min_represent_digits, char * dest, Time source, UInt64, UInt32, const DateLUTImpl & timezone) + { + auto second_of_minute = ToSecondImpl::execute(source, timezone); + return writeNumberWithPadding(dest, second_of_minute, min_represent_digits); + } + + static size_t jodaTimezone(size_t min_represent_digits, char * dest, Time /*source*/, UInt64, UInt32, const DateLUTImpl & timezone) + { + if (min_represent_digits <= 3) + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Short name time zone is not yet supported"); + + auto str = timezone.getTimeZone(); + memcpy(dest, str.data(), str.size()); + return str.size(); } }; @@ -319,7 +629,7 @@ public: DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { - if constexpr (support_integer) + if constexpr (support_integer == FormatDateTimeTraits::SupportInteger::Yes) { if (arguments.size() != 1 && arguments.size() != 2 && arguments.size() != 3) throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, @@ -364,7 +674,7 @@ public: ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, [[maybe_unused]] size_t input_rows_count) const override { ColumnPtr res; - if constexpr (support_integer) + if constexpr (support_integer == FormatDateTimeTraits::SupportInteger::Yes) { if (arguments.size() == 1) { @@ -423,30 +733,26 @@ public: if (!times) return nullptr; - const ColumnConst * pattern_column = checkAndGetColumnConst(arguments[1].column.get()); - if (!pattern_column) + const ColumnConst * format_column = checkAndGetColumnConst(arguments[1].column.get()); + if (!format_column) throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of second ('format') argument of function {}. Must be constant string.", arguments[1].column->getName(), getName()); - String pattern = pattern_column->getValue(); + String format = format_column->getValue(); UInt32 scale [[maybe_unused]] = 0; if constexpr (std::is_same_v) - { scale = times->getScale(); - } using T = typename ActionValueTypeMap::ActionValueType; std::vector> instructions; - String pattern_to_fill = parsePattern(pattern, instructions, scale); - size_t result_size = pattern_to_fill.size(); + String out_template; + auto result_size = parseFormat(format, instructions, scale, out_template); const DateLUTImpl * time_zone_tmp = nullptr; if (castType(arguments[0].type.get(), [&]([[maybe_unused]] const auto & type) { return true; })) - { time_zone_tmp = &extractTimeZoneFromFunctionArguments(arguments, 2, 0); - } else if (std::is_same_v || std::is_same_v) time_zone_tmp = &extractTimeZoneFromFunctionArguments(arguments, 2, 0); else @@ -461,30 +767,32 @@ public: dst_data.resize(vec.size() * (result_size + 1)); dst_offsets.resize(vec.size()); - /// Fill result with literals. + if constexpr (format_syntax == FormatDateTimeTraits::FormatSyntax::MySQL) { - UInt8 * begin = dst_data.data(); - UInt8 * end = begin + dst_data.size(); - UInt8 * pos = begin; - - if (pos < end) + /// Fill result with literals. { - memcpy(pos, pattern_to_fill.data(), result_size + 1); /// With zero terminator. - pos += result_size + 1; - } + UInt8 * begin = dst_data.data(); + UInt8 * end = begin + dst_data.size(); + UInt8 * pos = begin; - /// Fill by copying exponential growing ranges. - while (pos < end) - { - size_t bytes_to_copy = std::min(pos - begin, end - pos); - memcpy(pos, begin, bytes_to_copy); - pos += bytes_to_copy; + if (pos < end) + { + memcpy(pos, out_template.data(), result_size + 1); /// With zero terminator. + pos += result_size + 1; + } + + /// Fill by copying exponential growing ranges. + while (pos < end) + { + size_t bytes_to_copy = std::min(pos - begin, end - pos); + memcpy(pos, begin, bytes_to_copy); + pos += bytes_to_copy; + } } } auto * begin = reinterpret_cast(dst_data.data()); auto * pos = begin; - for (size_t i = 0; i < vec.size(); ++i) { if constexpr (std::is_same_v) @@ -500,6 +808,7 @@ public: for (auto & instruction : instructions) instruction.perform(pos, static_cast(vec[i]), 0, 0, time_zone); } + *pos++ = '\0'; dst_offsets[i] = pos - begin; } @@ -509,32 +818,41 @@ public: } template - String parsePattern(const String & pattern, std::vector> & instructions, UInt32 scale) const + size_t parseFormat(const String & format, std::vector> & instructions, UInt32 scale, String & out_template) const { - String result; + if constexpr (format_syntax == FormatDateTimeTraits::FormatSyntax::MySQL) + return parseMySQLFormat(format, instructions, scale, out_template); + else if constexpr (format_syntax == FormatDateTimeTraits::FormatSyntax::Joda) + return parseJodaFormat(format, instructions, scale, out_template); + else + throw Exception( + ErrorCodes::NOT_IMPLEMENTED, + "Unknown datetime format style {} in function {}", + magic_enum::enum_name(format_syntax), + getName()); + } - const char * pos = pattern.data(); - const char * end = pos + pattern.size(); - - /// Add shift to previous action; or if there were none, add noop action with shift. - auto add_shift = [&](size_t amount) + template + size_t parseMySQLFormat(const String & format, std::vector> & instructions, UInt32 scale, String & out_template) const + { + auto add_extra_shift = [&](size_t amount) { if (instructions.empty()) - instructions.emplace_back(&Action::noop); - instructions.back().shift += amount; + instructions.emplace_back(&Action::mysqlNoop); + instructions.back().extra_shift += amount; }; - /// If the argument was DateTime, add instruction for printing. If it was date, just shift (the buffer is pre-filled with default values). - auto add_instruction_or_shift = [&](typename Action::Func func [[maybe_unused]], size_t shift) + auto add_instruction_or_extra_shift = [&](auto * func [[maybe_unused]], size_t amount [[maybe_unused]]) { - if constexpr (std::is_same_v) - instructions.emplace_back(func, shift); - else if constexpr (std::is_same_v) - instructions.emplace_back(func, shift); + if constexpr (std::is_same_v || std::is_same_v) + instructions.emplace_back(std::move(func)); else - add_shift(shift); + add_extra_shift(amount); }; + const char * pos = format.data(); + const char * const end = pos + format.size(); + while (true) { const char * percent_pos = find_first_symbols<'%'>(pos, end); @@ -543,205 +861,400 @@ public: { if (pos < percent_pos) { - result.append(pos, percent_pos); - add_shift(percent_pos - pos); + add_extra_shift(percent_pos - pos); + out_template += String(pos, percent_pos - pos); } pos = percent_pos + 1; - if (pos >= end) - throw Exception("Sign '%' is the last in pattern, if you need it, use '%%'", ErrorCodes::BAD_ARGUMENTS); + throw Exception("Sign '%' is the last in format, if you need it, use '%%'", ErrorCodes::BAD_ARGUMENTS); switch (*pos) { // Year, divided by 100, zero-padded case 'C': - instructions.emplace_back(&Action::century, 2); - result.append("00"); + instructions.emplace_back(&Action::mysqlCentury); + out_template += "00"; break; // Day of month, zero-padded (01-31) case 'd': - instructions.emplace_back(&Action::dayOfMonth, 2); - result.append("00"); + instructions.emplace_back(&Action::mysqlDayOfMonth); + out_template += "00"; break; // Short MM/DD/YY date, equivalent to %m/%d/%y case 'D': - instructions.emplace_back(&Action::americanDate, 8); - result.append("00/00/00"); + instructions.emplace_back(&Action::mysqlAmericanDate); + out_template += "00/00/00"; break; // Day of month, space-padded ( 1-31) 23 case 'e': - instructions.emplace_back(&Action::dayOfMonthSpacePadded, 2); - result.append(" 0"); + instructions.emplace_back(&Action::mysqlDayOfMonthSpacePadded); + out_template += " 0"; break; // Fractional seconds case 'f': { /// If the time data type has no fractional part, then we print '0' as the fractional part. - const auto actual_scale = std::max(1, scale); - instructions.emplace_back(&Action::fractionalSecond, actual_scale); - result.append(actual_scale, '0'); + instructions.emplace_back(&Action::mysqlFractionalSecond); + out_template += String(std::max(1, scale), '0'); break; } // Short YYYY-MM-DD date, equivalent to %Y-%m-%d 2001-08-23 case 'F': - instructions.emplace_back(&Action::ISO8601Date, 10); - result.append("0000-00-00"); + instructions.emplace_back(&Action::mysqlISO8601Date); + out_template += "0000-00-00"; break; // Last two digits of year of ISO 8601 week number (see %G) case 'g': - instructions.emplace_back(&Action::ISO8601Year2, 2); - result.append("00"); - break; + instructions.emplace_back(&Action::mysqlISO8601Year2); + out_template += "00"; + break; // Year of ISO 8601 week number (see %V) case 'G': - instructions.emplace_back(&Action::ISO8601Year4, 4); - result.append("0000"); - break; + instructions.emplace_back(&Action::mysqlISO8601Year4); + out_template += "0000"; + break; // Day of the year (001-366) 235 case 'j': - instructions.emplace_back(&Action::dayOfYear, 3); - result.append("000"); + instructions.emplace_back(&Action::mysqlDayOfYear); + out_template += "000"; break; // Month as a decimal number (01-12) case 'm': - instructions.emplace_back(&Action::month, 2); - result.append("00"); + instructions.emplace_back(&Action::mysqlMonth); + out_template += "00"; break; // ISO 8601 weekday as number with Monday as 1 (1-7) case 'u': - instructions.emplace_back(&Action::dayOfWeek, 1); - result.append("0"); + instructions.emplace_back(&Action::mysqlDayOfWeek); + out_template += "0"; break; // ISO 8601 week number (01-53) case 'V': - instructions.emplace_back(&Action::ISO8601Week, 2); - result.append("00"); + instructions.emplace_back(&Action::mysqlISO8601Week); + out_template += "00"; break; // Weekday as a decimal number with Sunday as 0 (0-6) 4 case 'w': - instructions.emplace_back(&Action::dayOfWeek0To6, 1); - result.append("0"); + instructions.emplace_back(&Action::mysqlDayOfWeek0To6); + out_template += "0"; break; // Two digits year case 'y': - instructions.emplace_back(&Action::year2, 2); - result.append("00"); + instructions.emplace_back(&Action::mysqlYear2); + out_template += "00"; break; // Four digits year case 'Y': - instructions.emplace_back(&Action::year4, 4); - result.append("0000"); + instructions.emplace_back(&Action::mysqlYear4); + out_template += "0000"; break; // Quarter (1-4) case 'Q': - instructions.template emplace_back(&Action::quarter, 1); - result.append("0"); + instructions.template emplace_back(&Action::mysqlQuarter); + out_template += "0"; break; // Offset from UTC timezone as +hhmm or -hhmm case 'z': - instructions.emplace_back(&Action::timezoneOffset, 5); - result.append("+0000"); + instructions.emplace_back(&Action::mysqlTimezoneOffset); + out_template += "+0000"; break; /// Time components. If the argument is Date, not a DateTime, then this components will have default value. // Minute (00-59) case 'M': - add_instruction_or_shift(&Action::minute, 2); - result.append("00"); + add_instruction_or_extra_shift(&Action::mysqlMinute, 2); + out_template += "00"; break; // AM or PM case 'p': - add_instruction_or_shift(&Action::AMPM, 2); - result.append("AM"); + add_instruction_or_extra_shift(&Action::mysqlAMPM, 2); + out_template += "AM"; break; // 24-hour HH:MM time, equivalent to %H:%M 14:55 case 'R': - add_instruction_or_shift(&Action::hhmm24, 5); - result.append("00:00"); + add_instruction_or_extra_shift(&Action::mysqlHHMM24, 5); + out_template += "00:00"; break; // Seconds case 'S': - add_instruction_or_shift(&Action::second, 2); - result.append("00"); + add_instruction_or_extra_shift(&Action::mysqlSecond, 2); + out_template += "00"; break; // ISO 8601 time format (HH:MM:SS), equivalent to %H:%M:%S 14:55:02 case 'T': - add_instruction_or_shift(&Action::ISO8601Time, 8); - result.append("00:00:00"); + add_instruction_or_extra_shift(&Action::mysqlISO8601Time, 8); + out_template += "00:00:00"; break; // Hour in 24h format (00-23) case 'H': - add_instruction_or_shift(&Action::hour24, 2); - result.append("00"); + add_instruction_or_extra_shift(&Action::mysqlHour24, 2); + out_template += "00"; break; // Hour in 12h format (01-12) case 'I': - add_instruction_or_shift(&Action::hour12, 2); - result.append("12"); + add_instruction_or_extra_shift(&Action::mysqlHour12, 2); + out_template += "12"; break; /// Escaped literal characters. case '%': - result += '%'; - add_shift(1); + add_extra_shift(1); + out_template += "%"; break; case 't': - result += '\t'; - add_shift(1); + add_extra_shift(1); + out_template += "\t"; break; case 'n': - result += '\n'; - add_shift(1); + add_extra_shift(1); + out_template += "\n"; break; // Unimplemented - case 'U': [[fallthrough]]; + case 'U': + [[fallthrough]]; case 'W': - throw Exception(ErrorCodes::NOT_IMPLEMENTED, - "Wrong pattern '{}', symbol '{}' is not implemented for function {}", - pattern, *pos, getName()); + throw Exception( + ErrorCodes::NOT_IMPLEMENTED, + "Wrong syntax '{}', symbol '{}' is not implemented for function {}", + format, + *pos, + getName()); default: - throw Exception(ErrorCodes::ILLEGAL_COLUMN, - "Wrong pattern '{}', unexpected symbol '{}' for function {}", - pattern, *pos, getName()); + throw Exception( + ErrorCodes::ILLEGAL_COLUMN, + "Wrong syntax '{}', unexpected symbol '{}' for function {}", + format, + *pos, + getName()); } ++pos; } else { - result.append(pos, end); - add_shift(end + 1 - pos); /// including zero terminator + add_extra_shift(end - pos); + out_template += String(pos, end - pos); break; } } - return result; + return out_template.size(); + } + + template + size_t parseJodaFormat(const String & format, std::vector> & instructions, UInt32, String &) const + { + /// If the argument was DateTime, add instruction for printing. If it was date, just append default literal + auto add_instruction = [&](auto && func [[maybe_unused]], const String & default_literal [[maybe_unused]]) + { + if constexpr (std::is_same_v || std::is_same_v) + instructions.emplace_back(func); + else + instructions.emplace_back(std::bind_front(&Action::template jodaLiteral, default_literal)); + }; + + size_t reserve_size = 0; + const char * pos = format.data(); + const char * end = pos + format.size(); + + while (pos < end) + { + const char * cur_token = pos; + + // Literal case + if (*cur_token == '\'') + { + // Case 1: 2 consecutive single quote + if (pos + 1 < end && *(pos + 1) == '\'') + { + std::string_view literal(cur_token, 1); + instructions.emplace_back(std::bind_front(&Action::template jodaLiteral, literal)); + ++reserve_size; + pos += 2; + } + else + { + // Case 2: find closing single quote + Int64 count = numLiteralChars(cur_token + 1, end); + if (count == -1) + throw Exception("No closing single quote for literal", ErrorCodes::BAD_ARGUMENTS); + else + { + for (Int64 i = 1; i <= count; i++) + { + std::string_view literal(cur_token + i, 1); + instructions.emplace_back( + std::bind_front(&Action::template jodaLiteral, literal)); + ++reserve_size; + if (*(cur_token + i) == '\'') + i += 1; + } + pos += count + 2; + } + } + } + else + { + int repetitions = 1; + ++pos; + while (pos < end && *cur_token == *pos) + { + ++repetitions; + ++pos; + } + switch (*cur_token) + { + case 'G': + instructions.emplace_back(std::bind_front(&Action::jodaEra, repetitions)); + reserve_size += repetitions <= 3 ? 2 : 13; + break; + case 'C': + instructions.emplace_back(std::bind_front(&Action::jodaCentryOfEra, repetitions)); + /// Year range [1900, 2299] + reserve_size += std::max(repetitions, 2); + break; + case 'Y': + instructions.emplace_back(std::bind_front(&Action::jodaYearOfEra, repetitions)); + /// Year range [1900, 2299] + reserve_size += repetitions == 2 ? 2 : std::max(repetitions, 4); + break; + case 'x': + throw Exception("format is not supported for WEEK_YEAR", ErrorCodes::NOT_IMPLEMENTED); + case 'w': + throw Exception("format is not supported for WEEK_OF_WEEK_YEAR", ErrorCodes::NOT_IMPLEMENTED); + case 'e': + instructions.emplace_back(std::bind_front(&Action::jodaDayOfWeek1Based, repetitions)); + /// Day of week range [1, 7] + reserve_size += std::max(repetitions, 1); + break; + case 'E': + instructions.emplace_back(std::bind_front(&Action::jodaDayOfWeekText, repetitions)); + /// Maximum length of short name is 3, maximum length of full name is 9. + reserve_size += repetitions <= 3 ? 3 : 9; + break; + case 'y': + instructions.emplace_back(std::bind_front(&Action::jodaYear, repetitions)); + /// Year range [1900, 2299] + reserve_size += repetitions == 2 ? 2 : std::max(repetitions, 4); + break; + case 'D': + instructions.emplace_back(std::bind_front(&Action::jodaDayOfYear, repetitions)); + /// Day of year range [1, 366] + reserve_size += std::max(repetitions, 3); + break; + case 'M': + if (repetitions <= 2) + { + instructions.emplace_back(std::bind_front(&Action::jodaMonthOfYear, repetitions)); + /// Month of year range [1, 12] + reserve_size += 2; + } + else + { + instructions.emplace_back(std::bind_front(&Action::jodaMonthOfYearText, repetitions)); + /// Maximum length of short name is 3, maximum length of full name is 9. + reserve_size += repetitions <= 3 ? 3 : 9; + } + break; + case 'd': + instructions.emplace_back(std::bind_front(&Action::jodaDayOfMonth, repetitions)); + /// Day of month range [1, 3] + reserve_size += std::max(repetitions, 3); + break; + case 'a': + /// Default half day of day is "AM" + add_instruction(std::bind_front(&Action::jodaHalfDayOfDay, repetitions), "AM"); + reserve_size += 2; + break; + case 'K': + /// Default hour of half day is 0 + add_instruction( + std::bind_front(&Action::jodaHourOfHalfDay, repetitions), padValue(0, repetitions)); + /// Hour of half day range [0, 11] + reserve_size += std::max(repetitions, 2); + break; + case 'h': + /// Default clock hour of half day is 12 + add_instruction( + std::bind_front(&Action::jodaClockHourOfHalfDay, repetitions), + padValue(12, repetitions)); + /// Clock hour of half day range [1, 12] + reserve_size += std::max(repetitions, 2); + break; + case 'H': + /// Default hour of day is 0 + add_instruction(std::bind_front(&Action::jodaHourOfDay, repetitions), padValue(0, repetitions)); + /// Hour of day range [0, 23] + reserve_size += std::max(repetitions, 2); + break; + case 'k': + /// Default clock hour of day is 24 + add_instruction(std::bind_front(&Action::jodaClockHourOfDay, repetitions), padValue(24, repetitions)); + /// Clock hour of day range [1, 24] + reserve_size += std::max(repetitions, 2); + break; + case 'm': + /// Default minute of hour is 0 + add_instruction(std::bind_front(&Action::jodaMinuteOfHour, repetitions), padValue(0, repetitions)); + /// Minute of hour range [0, 59] + reserve_size += std::max(repetitions, 2); + break; + case 's': + /// Default second of minute is 0 + add_instruction(std::bind_front(&Action::jodaSecondOfMinute, repetitions), padValue(0, repetitions)); + /// Second of minute range [0, 59] + reserve_size += std::max(repetitions, 2); + break; + case 'S': + throw Exception("format is not supported for FRACTION_OF_SECOND", ErrorCodes::NOT_IMPLEMENTED); + case 'z': + if (repetitions <= 3) + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Short name time zone is not yet supported"); + + instructions.emplace_back(std::bind_front(&Action::jodaTimezone, repetitions)); + /// Longest length of full name of time zone is 32. + reserve_size += 32; + break; + case 'Z': + throw Exception("format is not supported for TIMEZONE_OFFSET_ID", ErrorCodes::NOT_IMPLEMENTED); + default: + if (isalpha(*cur_token)) + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "format is not supported for {}", String(cur_token, repetitions)); + + std::string_view literal(cur_token, pos - cur_token); + instructions.emplace_back(std::bind_front(&Action::template jodaLiteral, literal)); + reserve_size += pos - cur_token; + break; + } + } + } + return reserve_size; } }; @@ -755,8 +1268,21 @@ struct NameFromUnixTime static constexpr auto name = "fromUnixTimestamp"; }; -using FunctionFormatDateTime = FunctionFormatDateTimeImpl; -using FunctionFromUnixTimestamp = FunctionFormatDateTimeImpl; +struct NameFormatDateTimeInJodaSyntax +{ + static constexpr auto name = "formatDateTimeInJodaSyntax"; +}; + +struct NameFromUnixTimeInJodaSyntax +{ + static constexpr auto name = "fromUnixTimestampInJodaSyntax"; +}; + + +using FunctionFormatDateTime = FunctionFormatDateTimeImpl; +using FunctionFromUnixTimestamp = FunctionFormatDateTimeImpl; +using FunctionFormatDateTimeInJodaSyntax = FunctionFormatDateTimeImpl; +using FunctionFromUnixTimestampInJodaSyntax = FunctionFormatDateTimeImpl; } @@ -765,6 +1291,8 @@ REGISTER_FUNCTION(FormatDateTime) factory.registerFunction(); factory.registerFunction(); factory.registerAlias("FROM_UNIXTIME", "fromUnixTimestamp"); -} + factory.registerFunction(); + factory.registerFunction(); +} } diff --git a/src/Functions/formatRow.cpp b/src/Functions/formatRow.cpp index 0c2df48105e..abfe2ce53c4 100644 --- a/src/Functions/formatRow.cpp +++ b/src/Functions/formatRow.cpp @@ -29,7 +29,6 @@ namespace * several columns to generate a string per row, such as CSV, TSV, JSONEachRow, etc. * formatRowNoNewline(...) trims the newline character of each row. */ - template class FunctionFormatRow : public IFunction { @@ -60,8 +59,20 @@ public: for (auto i = 1u; i < arguments.size(); ++i) arg_columns.insert(arguments[i]); materializeBlockInplace(arg_columns); - auto out = FormatFactory::instance().getOutputFormat(format_name, buffer, arg_columns, context, [&](const Columns &, size_t row) + auto format_settings = getFormatSettings(context); + auto out = FormatFactory::instance().getOutputFormat(format_name, buffer, arg_columns, context, format_settings); + + /// This function make sense only for row output formats. + auto * row_output_format = dynamic_cast(out.get()); + if (!row_output_format) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot turn rows into a {} format strings. {} function supports only row output formats", format_name, getName()); + + auto columns = arg_columns.getColumns(); + for (size_t i = 0; i != input_rows_count; ++i) { + row_output_format->writePrefixIfNeeded(); + row_output_format->writeRow(columns, i); + row_output_format->finalize(); if constexpr (no_newline) { // replace '\n' with '\0' @@ -70,16 +81,11 @@ public: } else writeChar('\0', buffer); - offsets[row] = buffer.count(); - }); - /// This function make sense only for row output formats. - if (!dynamic_cast(out.get())) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot turn rows into a {} format strings. {} function supports only row output formats", format_name, getName()); + offsets[i] = buffer.count(); + row_output_format->resetFormatter(); + } - /// Don't write prefix if any. - out->doNotWritePrefix(); - out->write(arg_columns); return col_str; } diff --git a/src/Functions/if.cpp b/src/Functions/if.cpp index 0baf64c83d9..049e6d24920 100644 --- a/src/Functions/if.cpp +++ b/src/Functions/if.cpp @@ -1026,6 +1026,7 @@ public: } bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; } ColumnNumbers getArgumentsThatDontImplyNullableReturnType(size_t /*number_of_arguments*/) const override { return {0}; } + bool canBeExecutedOnLowCardinalityDictionary() const override { return false; } /// Get result types by argument types. If the function does not apply to these arguments, throw an exception. DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override diff --git a/src/Functions/in.cpp b/src/Functions/in.cpp index 5773e823a80..1de8371cf90 100644 --- a/src/Functions/in.cpp +++ b/src/Functions/in.cpp @@ -17,6 +17,7 @@ namespace DB namespace ErrorCodes { extern const int ILLEGAL_COLUMN; + extern const int LOGICAL_ERROR; } namespace @@ -94,6 +95,8 @@ public: { if constexpr (ignore_set) return ColumnUInt8::create(input_rows_count, 0u); + if (input_rows_count == 0) + return ColumnUInt8::create(); /// Second argument must be ColumnSet. ColumnPtr column_set_ptr = arguments[1].column; @@ -135,12 +138,16 @@ public: /// Replace single LowCardinality column to it's dictionary if possible. ColumnPtr lc_indexes = nullptr; + bool is_const = false; if (columns_of_key_columns.size() == 1) { auto & arg = columns_of_key_columns.at(0); const auto * col = arg.column.get(); if (const auto * const_col = typeid_cast(col)) + { col = &const_col->getDataColumn(); + is_const = true; + } if (const auto * lc = typeid_cast(col)) { @@ -153,7 +160,13 @@ public: auto res = set->execute(columns_of_key_columns, negative); if (lc_indexes) - return res->index(*lc_indexes, 0); + res = res->index(*lc_indexes, 0); + + if (is_const) + res = ColumnUInt8::create(input_rows_count, res->getUInt(0)); + + if (res->size() != input_rows_count) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Output size is different from input size, expect {}, get {}", input_rows_count, res->size()); return res; } diff --git a/src/Functions/initializeAggregation.cpp b/src/Functions/initializeAggregation.cpp index 08352553b9c..b782cd04f75 100644 --- a/src/Functions/initializeAggregation.cpp +++ b/src/Functions/initializeAggregation.cpp @@ -87,7 +87,7 @@ DataTypePtr FunctionInitializeAggregation::getReturnTypeImpl(const ColumnsWithTy aggregate_function = AggregateFunctionFactory::instance().get(aggregate_function_name, argument_types, params_row, properties); } - return aggregate_function->getReturnType(); + return aggregate_function->getResultType(); } diff --git a/src/Functions/multiIf.cpp b/src/Functions/multiIf.cpp index f658528a2a7..37301037c0e 100644 --- a/src/Functions/multiIf.cpp +++ b/src/Functions/multiIf.cpp @@ -51,6 +51,7 @@ public: size_t getNumberOfArguments() const override { return 0; } bool useDefaultImplementationForNulls() const override { return false; } bool useDefaultImplementationForNothing() const override { return false; } + bool canBeExecutedOnLowCardinalityDictionary() const override { return false; } ColumnNumbers getArgumentsThatDontImplyNullableReturnType(size_t number_of_arguments) const override { diff --git a/src/Functions/randDistribution.cpp b/src/Functions/randDistribution.cpp index 94dad4fdc89..a4b878f863e 100644 --- a/src/Functions/randDistribution.cpp +++ b/src/Functions/randDistribution.cpp @@ -353,7 +353,7 @@ Typical usage: { R"( Returns a random number from the exponential distribution. -Accepts one parameter. +Accepts one parameter - lambda value. Typical usage: [example:typical] @@ -413,7 +413,7 @@ Typical usage: { R"( Returns a random number from the Bernoulli distribution. -Accepts two parameters - probability of success. +Accepts one parameter - probability of success. Typical usage: [example:typical] @@ -458,7 +458,7 @@ Typical usage: { R"( Returns a random number from the poisson distribution. -Accepts two parameters - the mean number of occurrences. +Accepts one parameter - the mean number of occurrences. Typical usage: [example:typical] diff --git a/src/Functions/rowNumberInAllBlocks.cpp b/src/Functions/rowNumberInAllBlocks.cpp index 28d61afcdc1..750f8691cbb 100644 --- a/src/Functions/rowNumberInAllBlocks.cpp +++ b/src/Functions/rowNumberInAllBlocks.cpp @@ -10,7 +10,7 @@ namespace DB namespace { -/** Incremental number of row within all columnss passed to this function. */ +/** Incremental number of row within all columns passed to this function. */ class FunctionRowNumberInAllBlocks : public IFunction { private: diff --git a/src/Functions/runningAccumulate.cpp b/src/Functions/runningAccumulate.cpp index 336c45e49cb..877ad877f56 100644 --- a/src/Functions/runningAccumulate.cpp +++ b/src/Functions/runningAccumulate.cpp @@ -27,7 +27,7 @@ namespace * Takes state of aggregate function (example runningAccumulate(uniqState(UserID))), * and for each row of columns, return result of aggregate function on merge of states of all previous rows and current row. * - * So, result of function depends on partition of data to columnss and on order of data in columns. + * So, result of function depends on partition of data to columns and on order of data in columns. */ class FunctionRunningAccumulate : public IFunction { @@ -102,7 +102,7 @@ public: /// Will pass empty arena if agg_func does not allocate memory in arena std::unique_ptr arena = agg_func.allocatesMemoryInArena() ? std::make_unique() : nullptr; - auto result_column_ptr = agg_func.getReturnType()->createColumn(); + auto result_column_ptr = agg_func.getResultType()->createColumn(); IColumn & result_column = *result_column_ptr; result_column.reserve(column_with_states->size()); diff --git a/src/Functions/runningDifference.h b/src/Functions/runningDifference.h index cf534b30c90..5e58d0d8aaf 100644 --- a/src/Functions/runningDifference.h +++ b/src/Functions/runningDifference.h @@ -38,13 +38,13 @@ struct FunctionRunningDifferenceName }; /** Calculate difference of consecutive values in columns. - * So, result of function depends on partition of data to columnss and on order of data in columns. + * So, result of function depends on partition of data to columns and on order of data in columns. */ template class FunctionRunningDifferenceImpl : public IFunction { private: - /// It is possible to track value from previous columns, to calculate continuously across all columnss. Not implemented. + /// It is possible to track value from previous columns, to calculate continuously across all columns. Not implemented. template static NO_SANITIZE_UNDEFINED void process(const PaddedPODArray & src, PaddedPODArray & dst, const NullMap * null_map) diff --git a/src/IO/HashingWriteBuffer.h b/src/IO/HashingWriteBuffer.h index bf636deeb07..988dfc227fe 100644 --- a/src/IO/HashingWriteBuffer.h +++ b/src/IO/HashingWriteBuffer.h @@ -77,6 +77,11 @@ public: state = uint128(0, 0); } + void sync() override + { + out.sync(); + } + uint128 getHash() { next(); diff --git a/src/IO/PeekableReadBuffer.cpp b/src/IO/PeekableReadBuffer.cpp index d9de3a5e76a..c47bdce3924 100644 --- a/src/IO/PeekableReadBuffer.cpp +++ b/src/IO/PeekableReadBuffer.cpp @@ -10,12 +10,12 @@ namespace ErrorCodes } PeekableReadBuffer::PeekableReadBuffer(ReadBuffer & sub_buf_, size_t start_size_ /*= 0*/) - : BufferWithOwnMemory(start_size_), sub_buf(sub_buf_) + : BufferWithOwnMemory(start_size_), sub_buf(&sub_buf_) { - padded &= sub_buf.isPadded(); + padded &= sub_buf->isPadded(); /// Read from sub-buffer - Buffer & sub_working = sub_buf.buffer(); - BufferBase::set(sub_working.begin(), sub_working.size(), sub_buf.offset()); + Buffer & sub_working = sub_buf->buffer(); + BufferBase::set(sub_working.begin(), sub_working.size(), sub_buf->offset()); checkStateCorrect(); } @@ -23,17 +23,26 @@ PeekableReadBuffer::PeekableReadBuffer(ReadBuffer & sub_buf_, size_t start_size_ void PeekableReadBuffer::reset() { checkStateCorrect(); +} +void PeekableReadBuffer::setSubBuffer(ReadBuffer & sub_buf_) +{ + sub_buf = &sub_buf_; + resetImpl(); +} + +void PeekableReadBuffer::resetImpl() +{ peeked_size = 0; checkpoint = std::nullopt; checkpoint_in_own_memory = false; use_stack_memory = true; if (!currentlyReadFromOwnMemory()) - sub_buf.position() = pos; + sub_buf->position() = pos; - Buffer & sub_working = sub_buf.buffer(); - BufferBase::set(sub_working.begin(), sub_working.size(), sub_buf.offset()); + Buffer & sub_working = sub_buf->buffer(); + BufferBase::set(sub_working.begin(), sub_working.size(), sub_buf->offset()); checkStateCorrect(); } @@ -43,20 +52,20 @@ bool PeekableReadBuffer::peekNext() checkStateCorrect(); Position copy_from = pos; - size_t bytes_to_copy = sub_buf.available(); + size_t bytes_to_copy = sub_buf->available(); if (useSubbufferOnly()) { /// Don't have to copy all data from sub-buffer if there is no data in own memory (checkpoint and pos are in sub-buffer) if (checkpoint) copy_from = *checkpoint; - bytes_to_copy = sub_buf.buffer().end() - copy_from; + bytes_to_copy = sub_buf->buffer().end() - copy_from; if (!bytes_to_copy) { - sub_buf.position() = copy_from; + sub_buf->position() = copy_from; /// Both checkpoint and pos are at the end of sub-buffer. Just load next part of data. - bool res = sub_buf.next(); - BufferBase::set(sub_buf.buffer().begin(), sub_buf.buffer().size(), sub_buf.offset()); + bool res = sub_buf->next(); + BufferBase::set(sub_buf->buffer().begin(), sub_buf->buffer().size(), sub_buf->offset()); if (checkpoint) checkpoint.emplace(pos); @@ -70,13 +79,13 @@ bool PeekableReadBuffer::peekNext() if (useSubbufferOnly()) { - sub_buf.position() = copy_from; + sub_buf->position() = copy_from; } char * memory_data = getMemoryData(); /// Save unread data from sub-buffer to own memory - memcpy(memory_data + peeked_size, sub_buf.position(), bytes_to_copy); + memcpy(memory_data + peeked_size, sub_buf->position(), bytes_to_copy); /// If useSubbufferOnly() is false, then checkpoint is in own memory and it was updated in resizeOwnMemoryIfNecessary /// Otherwise, checkpoint now at the beginning of own memory @@ -106,10 +115,10 @@ bool PeekableReadBuffer::peekNext() } peeked_size += bytes_to_copy; - sub_buf.position() += bytes_to_copy; + sub_buf->position() += bytes_to_copy; checkStateCorrect(); - return sub_buf.next(); + return sub_buf->next(); } void PeekableReadBuffer::rollbackToCheckpoint(bool drop) @@ -152,7 +161,7 @@ bool PeekableReadBuffer::nextImpl() if (checkpoint) { if (currentlyReadFromOwnMemory()) - res = sub_buf.hasPendingData() || sub_buf.next(); + res = sub_buf->hasPendingData() || sub_buf->next(); else res = peekNext(); } @@ -161,21 +170,21 @@ bool PeekableReadBuffer::nextImpl() if (useSubbufferOnly()) { /// Load next data to sub_buf - sub_buf.position() = position(); - res = sub_buf.next(); + sub_buf->position() = position(); + res = sub_buf->next(); } else { /// All copied data have been read from own memory, continue reading from sub_buf peeked_size = 0; - res = sub_buf.hasPendingData() || sub_buf.next(); + res = sub_buf->hasPendingData() || sub_buf->next(); } } /// Switch to reading from sub_buf (or just update it if already switched) - Buffer & sub_working = sub_buf.buffer(); - BufferBase::set(sub_working.begin(), sub_working.size(), sub_buf.offset()); - nextimpl_working_buffer_offset = sub_buf.offset(); + Buffer & sub_working = sub_buf->buffer(); + BufferBase::set(sub_working.begin(), sub_working.size(), sub_buf->offset()); + nextimpl_working_buffer_offset = sub_buf->offset(); if (checkpoint_at_end) { @@ -199,8 +208,8 @@ void PeekableReadBuffer::checkStateCorrect() const throw DB::Exception("Checkpoint in empty own buffer", ErrorCodes::LOGICAL_ERROR); if (currentlyReadFromOwnMemory() && pos < *checkpoint) throw DB::Exception("Current position in own buffer before checkpoint in own buffer", ErrorCodes::LOGICAL_ERROR); - if (!currentlyReadFromOwnMemory() && pos < sub_buf.position()) - throw DB::Exception("Current position in subbuffer less than sub_buf.position()", ErrorCodes::LOGICAL_ERROR); + if (!currentlyReadFromOwnMemory() && pos < sub_buf->position()) + throw DB::Exception("Current position in subbuffer less than sub_buf->position()", ErrorCodes::LOGICAL_ERROR); } else { @@ -294,11 +303,11 @@ void PeekableReadBuffer::makeContinuousMemoryFromCheckpointToPos() if (!checkpointInOwnMemory() || currentlyReadFromOwnMemory()) return; /// it's already continuous - size_t bytes_to_append = pos - sub_buf.position(); + size_t bytes_to_append = pos - sub_buf->position(); resizeOwnMemoryIfNecessary(bytes_to_append); char * memory_data = getMemoryData(); - memcpy(memory_data + peeked_size, sub_buf.position(), bytes_to_append); - sub_buf.position() = pos; + memcpy(memory_data + peeked_size, sub_buf->position(), bytes_to_append); + sub_buf->position() = pos; peeked_size += bytes_to_append; BufferBase::set(memory_data, peeked_size, peeked_size); } @@ -306,7 +315,7 @@ void PeekableReadBuffer::makeContinuousMemoryFromCheckpointToPos() PeekableReadBuffer::~PeekableReadBuffer() { if (!currentlyReadFromOwnMemory()) - sub_buf.position() = pos; + sub_buf->position() = pos; } bool PeekableReadBuffer::hasUnreadData() const diff --git a/src/IO/PeekableReadBuffer.h b/src/IO/PeekableReadBuffer.h index 45763863437..f50b0d69dc5 100644 --- a/src/IO/PeekableReadBuffer.h +++ b/src/IO/PeekableReadBuffer.h @@ -24,7 +24,7 @@ public: ~PeekableReadBuffer() override; - void prefetch() override { sub_buf.prefetch(); } + void prefetch() override { sub_buf->prefetch(); } /// Sets checkpoint at current position ALWAYS_INLINE inline void setCheckpoint() @@ -71,13 +71,17 @@ public: // without recreating the buffer. void reset(); + void setSubBuffer(ReadBuffer & sub_buf_); + private: bool nextImpl() override; + void resetImpl(); + bool peekNext(); inline bool useSubbufferOnly() const { return !peeked_size; } - inline bool currentlyReadFromOwnMemory() const { return working_buffer.begin() != sub_buf.buffer().begin(); } + inline bool currentlyReadFromOwnMemory() const { return working_buffer.begin() != sub_buf->buffer().begin(); } inline bool checkpointInOwnMemory() const { return checkpoint_in_own_memory; } void checkStateCorrect() const; @@ -90,7 +94,7 @@ private: const char * getMemoryData() const { return use_stack_memory ? stack_memory : memory.data(); } - ReadBuffer & sub_buf; + ReadBuffer * sub_buf; size_t peeked_size = 0; std::optional checkpoint = std::nullopt; bool checkpoint_in_own_memory = false; diff --git a/src/IO/S3/PocoHTTPClient.cpp b/src/IO/S3/PocoHTTPClient.cpp index 33dd3250c9f..6153842520b 100644 --- a/src/IO/S3/PocoHTTPClient.cpp +++ b/src/IO/S3/PocoHTTPClient.cpp @@ -177,7 +177,7 @@ namespace bool checkRequestCanReturn2xxAndErrorInBody(Aws::Http::HttpRequest & request) { auto query_params = request.GetQueryStringParameters(); - if (request.HasHeader("z-amz-copy-source")) + if (request.HasHeader("x-amz-copy-source")) { /// CopyObject https://docs.aws.amazon.com/AmazonS3/latest/API/API_CopyObject.html if (query_params.empty()) diff --git a/src/IO/S3Common.cpp b/src/IO/S3Common.cpp index c03f7f07310..a1a4267496f 100644 --- a/src/IO/S3Common.cpp +++ b/src/IO/S3Common.cpp @@ -149,13 +149,12 @@ class AWSEC2MetadataClient : public Aws::Internal::AWSHttpResourceClient static constexpr char EC2_IMDS_TOKEN_TTL_DEFAULT_VALUE[] = "21600"; static constexpr char EC2_IMDS_TOKEN_TTL_HEADER[] = "x-aws-ec2-metadata-token-ttl-seconds"; - static constexpr char EC2_DEFAULT_METADATA_ENDPOINT[] = "http://169.254.169.254"; - public: /// See EC2MetadataClient. - explicit AWSEC2MetadataClient(const Aws::Client::ClientConfiguration & client_configuration) + explicit AWSEC2MetadataClient(const Aws::Client::ClientConfiguration & client_configuration, const char * endpoint_) : Aws::Internal::AWSHttpResourceClient(client_configuration) + , endpoint(endpoint_) , logger(&Poco::Logger::get("AWSEC2InstanceProfileConfigLoader")) { } @@ -180,7 +179,7 @@ public: { std::lock_guard locker(token_mutex); - LOG_TRACE(logger, "Getting default credentials for EC2 instance."); + LOG_TRACE(logger, "Getting default credentials for ec2 instance from {}", endpoint); auto result = GetResourceWithAWSWebServiceResult(endpoint.c_str(), EC2_SECURITY_CREDENTIALS_RESOURCE, nullptr); credentials_string = result.GetPayload(); if (result.GetResponseCode() == Aws::Http::HttpResponseCode::UNAUTHORIZED) @@ -286,12 +285,50 @@ public: } private: - const Aws::String endpoint = EC2_DEFAULT_METADATA_ENDPOINT; + const Aws::String endpoint; mutable std::recursive_mutex token_mutex; mutable Aws::String token; Poco::Logger * logger; }; +std::shared_ptr InitEC2MetadataClient(const Aws::Client::ClientConfiguration & client_configuration) +{ + Aws::String ec2_metadata_service_endpoint = Aws::Environment::GetEnv("AWS_EC2_METADATA_SERVICE_ENDPOINT"); + auto * logger = &Poco::Logger::get("AWSEC2InstanceProfileConfigLoader"); + if (ec2_metadata_service_endpoint.empty()) + { + Aws::String ec2_metadata_service_endpoint_mode = Aws::Environment::GetEnv("AWS_EC2_METADATA_SERVICE_ENDPOINT_MODE"); + if (ec2_metadata_service_endpoint_mode.length() == 0) + { + ec2_metadata_service_endpoint = "http://169.254.169.254"; //default to IPv4 default endpoint + } + else + { + if (ec2_metadata_service_endpoint_mode.length() == 4) + { + if (Aws::Utils::StringUtils::CaselessCompare(ec2_metadata_service_endpoint_mode.c_str(), "ipv4")) + { + ec2_metadata_service_endpoint = "http://169.254.169.254"; //default to IPv4 default endpoint + } + else if (Aws::Utils::StringUtils::CaselessCompare(ec2_metadata_service_endpoint_mode.c_str(), "ipv6")) + { + ec2_metadata_service_endpoint = "http://[fd00:ec2::254]"; + } + else + { + LOG_ERROR(logger, "AWS_EC2_METADATA_SERVICE_ENDPOINT_MODE can only be set to ipv4 or ipv6, received: {}", ec2_metadata_service_endpoint_mode); + } + } + else + { + LOG_ERROR(logger, "AWS_EC2_METADATA_SERVICE_ENDPOINT_MODE can only be set to ipv4 or ipv6, received: {}", ec2_metadata_service_endpoint_mode); + } + } + } + LOG_INFO(logger, "Using IMDS endpoint: {}", ec2_metadata_service_endpoint); + return std::make_shared(client_configuration, ec2_metadata_service_endpoint.c_str()); +} + class AWSEC2InstanceProfileConfigLoader : public Aws::Config::AWSProfileConfigLoader { public: @@ -646,7 +683,7 @@ public: aws_client_configuration.retryStrategy = std::make_shared(1, 1000); - auto ec2_metadata_client = std::make_shared(aws_client_configuration); + auto ec2_metadata_client = InitEC2MetadataClient(aws_client_configuration); auto config_loader = std::make_shared(ec2_metadata_client, !use_insecure_imds_request); AddProvider(std::make_shared(config_loader)); diff --git a/src/IO/WriteBufferFromTemporaryFile.cpp b/src/IO/WriteBufferFromTemporaryFile.cpp index f93c79ca587..4562ad512b3 100644 --- a/src/IO/WriteBufferFromTemporaryFile.cpp +++ b/src/IO/WriteBufferFromTemporaryFile.cpp @@ -13,7 +13,7 @@ namespace ErrorCodes } -WriteBufferFromTemporaryFile::WriteBufferFromTemporaryFile(std::unique_ptr && tmp_file_) +WriteBufferFromTemporaryFile::WriteBufferFromTemporaryFile(std::unique_ptr && tmp_file_) : WriteBufferFromFile(tmp_file_->path(), DBMS_DEFAULT_BUFFER_SIZE, O_RDWR | O_TRUNC | O_CREAT, 0600), tmp_file(std::move(tmp_file_)) {} @@ -40,11 +40,11 @@ public: return std::make_shared(fd, file_name, std::move(origin->tmp_file)); } - ReadBufferFromTemporaryWriteBuffer(int fd_, const std::string & file_name_, std::unique_ptr && tmp_file_) + ReadBufferFromTemporaryWriteBuffer(int fd_, const std::string & file_name_, std::unique_ptr && tmp_file_) : ReadBufferFromFile(fd_, file_name_), tmp_file(std::move(tmp_file_)) {} - std::unique_ptr tmp_file; + std::unique_ptr tmp_file; }; diff --git a/src/IO/WriteBufferFromTemporaryFile.h b/src/IO/WriteBufferFromTemporaryFile.h index 06e2911db26..a4e83b95ac6 100644 --- a/src/IO/WriteBufferFromTemporaryFile.h +++ b/src/IO/WriteBufferFromTemporaryFile.h @@ -20,11 +20,11 @@ public: ~WriteBufferFromTemporaryFile() override; private: - explicit WriteBufferFromTemporaryFile(std::unique_ptr && tmp_file); + explicit WriteBufferFromTemporaryFile(std::unique_ptr && tmp_file); std::shared_ptr getReadBufferImpl() override; - std::unique_ptr tmp_file; + std::unique_ptr tmp_file; friend class ReadBufferFromTemporaryWriteBuffer; }; diff --git a/src/IO/WriteBufferFromVector.h b/src/IO/WriteBufferFromVector.h index 521acb6c8d6..525e11518bd 100644 --- a/src/IO/WriteBufferFromVector.h +++ b/src/IO/WriteBufferFromVector.h @@ -26,6 +26,7 @@ template class WriteBufferFromVector : public WriteBuffer { public: + using ValueType = typename VectorType::value_type; explicit WriteBufferFromVector(VectorType & vector_) : WriteBuffer(reinterpret_cast(vector_.data()), vector_.size()), vector(vector_) { @@ -50,9 +51,11 @@ public: bool isFinished() const { return finalized; } - void restart() + void restart(std::optional max_capacity = std::nullopt) { - if (vector.empty()) + if (max_capacity && vector.capacity() > max_capacity) + VectorType(initial_size, ValueType()).swap(vector); + else if (vector.empty()) vector.resize(initial_size); set(reinterpret_cast(vector.data()), vector.size()); finalized = false; @@ -68,8 +71,8 @@ private: { vector.resize( ((position() - reinterpret_cast(vector.data())) /// NOLINT - + sizeof(typename VectorType::value_type) - 1) /// Align up. - / sizeof(typename VectorType::value_type)); + + sizeof(ValueType) - 1) /// Align up. + / sizeof(ValueType)); /// Prevent further writes. set(nullptr, 0); diff --git a/src/IO/WriteHelpers.h b/src/IO/WriteHelpers.h index 003e5a56958..8dbfe63be7e 100644 --- a/src/IO/WriteHelpers.h +++ b/src/IO/WriteHelpers.h @@ -1098,6 +1098,25 @@ inline String toString(const T & x) return buf.str(); } +template +inline String toStringWithFinalSeparator(const std::vector & x, const String & final_sep) +{ + WriteBufferFromOwnString buf; + for (auto it = x.begin(); it != x.end(); ++it) + { + if (it != x.begin()) + { + if (std::next(it) == x.end()) + writeString(final_sep, buf); + else + writeString(", ", buf); + } + writeQuoted(*it, buf); + } + + return buf.str(); +} + inline void writeNullTerminatedString(const String & s, WriteBuffer & buffer) { /// c_str is guaranteed to return zero-terminated string diff --git a/src/IO/tests/gtest_WriteHelpers.cpp b/src/IO/tests/gtest_WriteHelpers.cpp new file mode 100644 index 00000000000..b3c7062be58 --- /dev/null +++ b/src/IO/tests/gtest_WriteHelpers.cpp @@ -0,0 +1,32 @@ +#include + +#include +#include +#include + +using namespace DB; + + +TEST(WriteHelpersTest, ToStringWithFinalSeparatorTest) +{ + { + std::vector v; + EXPECT_EQ(toStringWithFinalSeparator(v, " or "), ""); + } + { + std::vector v = {"AAA"}; + EXPECT_EQ(toStringWithFinalSeparator(v, " or "), "'AAA'"); + } + { + std::vector v = {"AAA", "BBB"}; + EXPECT_EQ(toStringWithFinalSeparator(v, " or "), "'AAA' or 'BBB'"); + } + { + std::vector v = {"AAA", "BBB", "CCC"}; + EXPECT_EQ(toStringWithFinalSeparator(v, " or "), "'AAA', 'BBB' or 'CCC'"); + } + { + std::vector v = {"AAA", "BBB", "CCC", "DDD"}; + EXPECT_EQ(toStringWithFinalSeparator(v, " or "), "'AAA', 'BBB', 'CCC' or 'DDD'"); + } +} diff --git a/src/Interpreters/ActionsDAG.cpp b/src/Interpreters/ActionsDAG.cpp index 49cc7c3f9a7..3b4d2dd1dd4 100644 --- a/src/Interpreters/ActionsDAG.cpp +++ b/src/Interpreters/ActionsDAG.cpp @@ -48,8 +48,6 @@ void ActionsDAG::Node::toTree(JSONBuilder::JSONMap & map) const if (function_base) map.add("Function", function_base->getName()); - else if (function_builder) - map.add("Function", function_builder->getName()); if (type == ActionType::FUNCTION) map.add("Compiled", is_function_compiled); @@ -167,7 +165,6 @@ const ActionsDAG::Node & ActionsDAG::addFunction( Node node; node.type = ActionType::FUNCTION; - node.function_builder = function; node.children = std::move(children); bool all_const = true; @@ -239,6 +236,86 @@ const ActionsDAG::Node & ActionsDAG::addFunction( return addNode(std::move(node)); } +const ActionsDAG::Node & ActionsDAG::addFunction( + const FunctionBasePtr & function_base, + NodeRawConstPtrs children, + std::string result_name) +{ + size_t num_arguments = children.size(); + + Node node; + node.type = ActionType::FUNCTION; + node.children = std::move(children); + + bool all_const = true; + ColumnsWithTypeAndName arguments(num_arguments); + + for (size_t i = 0; i < num_arguments; ++i) + { + const auto & child = *node.children[i]; + + ColumnWithTypeAndName argument; + argument.column = child.column; + argument.type = child.result_type; + argument.name = child.result_name; + + if (!argument.column || !isColumnConst(*argument.column)) + all_const = false; + + arguments[i] = std::move(argument); + } + + node.function_base = function_base; + node.result_type = node.function_base->getResultType(); + node.function = node.function_base->prepare(arguments); + node.is_deterministic = node.function_base->isDeterministic(); + + /// If all arguments are constants, and function is suitable to be executed in 'prepare' stage - execute function. + if (node.function_base->isSuitableForConstantFolding()) + { + ColumnPtr column; + + if (all_const) + { + size_t num_rows = arguments.empty() ? 0 : arguments.front().column->size(); + column = node.function->execute(arguments, node.result_type, num_rows, true); + } + else + { + column = node.function_base->getConstantResultForNonConstArguments(arguments, node.result_type); + } + + /// If the result is not a constant, just in case, we will consider the result as unknown. + if (column && isColumnConst(*column)) + { + /// All constant (literal) columns in block are added with size 1. + /// But if there was no columns in block before executing a function, the result has size 0. + /// Change the size to 1. + + if (column->empty()) + column = column->cloneResized(1); + + node.column = std::move(column); + } + } + + if (result_name.empty()) + { + result_name = function_base->getName() + "("; + for (size_t i = 0; i < num_arguments; ++i) + { + if (i) + result_name += ", "; + result_name += node.children[i]->result_name; + } + result_name += ")"; + } + + node.result_name = std::move(result_name); + + return addNode(std::move(node)); +} + const ActionsDAG::Node & ActionsDAG::findInOutputs(const std::string & name) const { if (const auto * node = tryFindInOutputs(name)) @@ -1954,8 +2031,7 @@ ActionsDAGPtr ActionsDAG::cloneActionsForFilterPushDown( FunctionOverloadResolverPtr func_builder_cast = CastInternalOverloadResolver::createImpl(); - predicate->function_builder = func_builder_cast; - predicate->function_base = predicate->function_builder->build(arguments); + predicate->function_base = func_builder_cast->build(arguments); predicate->function = predicate->function_base->prepare(arguments); } } @@ -1966,7 +2042,9 @@ ActionsDAGPtr ActionsDAG::cloneActionsForFilterPushDown( predicate->children.swap(new_children); auto arguments = prepareFunctionArguments(predicate->children); - predicate->function_base = predicate->function_builder->build(arguments); + FunctionOverloadResolverPtr func_builder_and = std::make_unique(std::make_shared()); + + predicate->function_base = func_builder_and->build(arguments); predicate->function = predicate->function_base->prepare(arguments); } } @@ -2171,7 +2249,7 @@ ActionsDAGPtr ActionsDAG::buildFilterActionsDAG( for (const auto & child : node->children) function_children.push_back(node_to_result_node.find(child)->second); - result_node = &result_dag->addFunction(node->function_builder, std::move(function_children), {}); + result_node = &result_dag->addFunction(node->function_base, std::move(function_children), {}); break; } } diff --git a/src/Interpreters/ActionsDAG.h b/src/Interpreters/ActionsDAG.h index a532dd0c436..f574757abac 100644 --- a/src/Interpreters/ActionsDAG.h +++ b/src/Interpreters/ActionsDAG.h @@ -17,7 +17,7 @@ class IExecutableFunction; using ExecutableFunctionPtr = std::shared_ptr; class IFunctionBase; -using FunctionBasePtr = std::shared_ptr; +using FunctionBasePtr = std::shared_ptr; class IFunctionOverloadResolver; using FunctionOverloadResolverPtr = std::shared_ptr; @@ -74,7 +74,6 @@ public: std::string result_name; DataTypePtr result_type; - FunctionOverloadResolverPtr function_builder; /// Can be used to get function signature or properties like monotonicity. FunctionBasePtr function_base; /// Prepared function which is used in function execution. @@ -139,6 +138,10 @@ public: const FunctionOverloadResolverPtr & function, NodeRawConstPtrs children, std::string result_name); + const Node & addFunction( + const FunctionBasePtr & function_base, + NodeRawConstPtrs children, + std::string result_name); /// Find first column by name in output nodes. This search is linear. const Node & findInOutputs(const std::string & name) const; diff --git a/src/Interpreters/AggregateDescription.cpp b/src/Interpreters/AggregateDescription.cpp index b0f51ea7c90..787e0a503f8 100644 --- a/src/Interpreters/AggregateDescription.cpp +++ b/src/Interpreters/AggregateDescription.cpp @@ -53,7 +53,7 @@ void AggregateDescription::explain(WriteBuffer & out, size_t indent) const out << type->getName(); } - out << ") → " << function->getReturnType()->getName() << "\n"; + out << ") → " << function->getResultType()->getName() << "\n"; } else out << prefix << " Function: nullptr\n"; @@ -109,7 +109,7 @@ void AggregateDescription::explain(JSONBuilder::JSONMap & map) const args_array->add(type->getName()); function_map->add("Argument Types", std::move(args_array)); - function_map->add("Result Type", function->getReturnType()->getName()); + function_map->add("Result Type", function->getResultType()->getName()); map.add("Function", std::move(function_map)); } diff --git a/src/Interpreters/AggregationUtils.cpp b/src/Interpreters/AggregationUtils.cpp index 4e870e8152b..157590e6f44 100644 --- a/src/Interpreters/AggregationUtils.cpp +++ b/src/Interpreters/AggregationUtils.cpp @@ -45,7 +45,7 @@ OutputBlockColumns prepareOutputBlockColumns( } else { - final_aggregate_columns[i] = aggregate_functions[i]->getReturnType()->createColumn(); + final_aggregate_columns[i] = aggregate_functions[i]->getResultType()->createColumn(); final_aggregate_columns[i]->reserve(rows); if (aggregate_functions[i]->isState()) diff --git a/src/Interpreters/Aggregator.cpp b/src/Interpreters/Aggregator.cpp index 14113514f1e..b42ec5c547c 100644 --- a/src/Interpreters/Aggregator.cpp +++ b/src/Interpreters/Aggregator.cpp @@ -448,7 +448,7 @@ Block Aggregator::Params::getHeader( { auto & elem = res.getByName(aggregate.column_name); - elem.type = aggregate.function->getReturnType(); + elem.type = aggregate.function->getResultType(); elem.column = elem.type->createColumn(); } } @@ -467,7 +467,7 @@ Block Aggregator::Params::getHeader( DataTypePtr type; if (final) - type = aggregate.function->getReturnType(); + type = aggregate.function->getResultType(); else type = std::make_shared(aggregate.function, argument_types, aggregate.parameters); @@ -1599,7 +1599,7 @@ void Aggregator::writeToTemporaryFile(AggregatedDataVariants & data_variants, si auto & out_stream = tmp_data->createStream(getHeader(false), max_temp_file_size); ProfileEvents::increment(ProfileEvents::ExternalAggregationWritePart); - LOG_DEBUG(log, "Writing part of aggregation data into temporary file {}", out_stream.path()); + LOG_DEBUG(log, "Writing part of aggregation data into temporary file {}", out_stream.getPath()); /// Flush only two-level data and possibly overflow data. diff --git a/src/Interpreters/ApplyWithGlobalVisitor.cpp b/src/Interpreters/ApplyWithGlobalVisitor.cpp index a0f256fca83..1d36b4ab203 100644 --- a/src/Interpreters/ApplyWithGlobalVisitor.cpp +++ b/src/Interpreters/ApplyWithGlobalVisitor.cpp @@ -88,7 +88,7 @@ void ApplyWithGlobalVisitor::visit(ASTPtr & ast) if (auto * ast_with_alias = dynamic_cast(child.get())) exprs[ast_with_alias->alias] = child; } - for (auto it = node_union->list_of_selects->children.begin() + 1; it != node_union->list_of_selects->children.end(); ++it) + for (auto * it = node_union->list_of_selects->children.begin() + 1; it != node_union->list_of_selects->children.end(); ++it) { if (auto * union_child = (*it)->as()) visit(*union_child, exprs, with_expression_list); diff --git a/src/Interpreters/AsynchronousInsertQueue.cpp b/src/Interpreters/AsynchronousInsertQueue.cpp index 8bd8efd40ba..fa3e9915e8f 100644 --- a/src/Interpreters/AsynchronousInsertQueue.cpp +++ b/src/Interpreters/AsynchronousInsertQueue.cpp @@ -245,7 +245,8 @@ std::future AsynchronousInsertQueue::push(ASTPtr query, ContextPtr query_c /// Here we check whether we hit the limit on maximum data size in the buffer. /// And use setting from query context. /// It works, because queries with the same set of settings are already grouped together. - if (data->size_in_bytes > key.settings.async_insert_max_data_size || data->query_number > key.settings.async_insert_max_query_number) + if (data->size_in_bytes >= key.settings.async_insert_max_data_size + || data->query_number >= key.settings.async_insert_max_query_number) { data_to_process = std::move(data); shard.iterators.erase(it); diff --git a/src/Interpreters/Cache/FileCache.cpp b/src/Interpreters/Cache/FileCache.cpp index 4bdf05d81a5..20af7bf2c58 100644 --- a/src/Interpreters/Cache/FileCache.cpp +++ b/src/Interpreters/Cache/FileCache.cpp @@ -13,6 +13,7 @@ #include #include + namespace fs = std::filesystem; namespace ProfileEvents @@ -60,13 +61,27 @@ FileCache::Key FileCache::createKeyForPath(const String & path) return Key(path); } -String FileCache::getPathInLocalCache(const Key & key, size_t offset, bool is_persistent) const +String FileCache::getPathInLocalCache(const Key & key, size_t offset, FileSegmentKind segment_kind) const { + String file_suffix; + switch (segment_kind) + { + case FileSegmentKind::Persistent: + file_suffix = "_persistent"; + break; + case FileSegmentKind::Temporary: + file_suffix = "_temporary"; + break; + case FileSegmentKind::Regular: + file_suffix = ""; + break; + } + auto key_str = key.toString(); return fs::path(cache_base_path) / key_str.substr(0, 3) / key_str - / (std::to_string(offset) + (is_persistent ? "_persistent" : "")); + / (std::to_string(offset) + file_suffix); } String FileCache::getPathInLocalCache(const Key & key) const @@ -399,6 +414,28 @@ KeyTransactionPtr FileCache::createKeyTransaction(const Key & key, KeyNotFoundPo return std::make_unique(lock_it->second, it->second); } +FileSegmentsHolder FileCache::set(const Key & key, size_t offset, size_t size, const CreateFileSegmentSettings & settings) +{ + auto key_transaction = createKeyTransaction(key, KeyNotFoundPolicy::CREATE_EMPTY); + + FileSegment::Range range(offset, offset + size - 1); + auto file_segments = getImpl(key, range, *key_transaction); + if (!file_segments.empty()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Having intersection iwth already existing cache"); + + FileSegments file_segments; + if (settings.unbounded) + { + /// If the file is unbounded, we can create a single cell for it. + const auto * cell_it = addCell(key, offset, size, FileSegment::State::EMPTY, settings, cache_lock); + file_segments = {cell_it->second->file_segment}; + } + else + file_segments = splitRangeIntoCells(key, offset, size, FileSegment::State::EMPTY, settings, cache_lock); + + return FileSegmentsHolder(); +} + FileSegmentsHolderPtr FileCache::getOrSet( const Key & key, size_t offset, @@ -527,7 +564,7 @@ FileSegmentPtr FileCache::createFileSegmentForDownload( size_t size, const CreateFileSegmentSettings & settings) { - if (size > max_file_segment_size) + if (!settings.unbounded && size > max_file_segment_size) throw Exception(ErrorCodes::LOGICAL_ERROR, "Requested size exceeds max file segment size"); auto key_transaction = createKeyTransaction(key, KeyNotFoundPolicy::CREATE_EMPTY); @@ -1035,14 +1072,20 @@ void FileCache::loadCacheInfoIntoMemory() auto offset_with_suffix = offset_it->path().filename().string(); auto delim_pos = offset_with_suffix.find('_'); bool parsed; - bool is_persistent = false; + FileSegmentKind segment_kind = FileSegmentKind::Regular; if (delim_pos == std::string::npos) parsed = tryParse(offset, offset_with_suffix); else { parsed = tryParse(offset, offset_with_suffix.substr(0, delim_pos)); - is_persistent = offset_with_suffix.substr(delim_pos+1) == "persistent"; + if (offset_with_suffix.substr(delim_pos+1) == "persistent") + segment_kind = FileSegmentKind::Persistent; + if (offset_with_suffix.substr(delim_pos+1) == "temporary") + { + fs::remove(offset_it->path()); + continue; + } } if (!parsed) @@ -1066,7 +1109,7 @@ void FileCache::loadCacheInfoIntoMemory() { auto cell_it = addCell( key, offset, size, FileSegment::State::DOWNLOADED, - CreateFileSegmentSettings{ .is_persistent = is_persistent }, *key_transaction); + CreateFileSegmentSettings(segment_kind), *key_transaction); queue_entries.emplace_back(cell_it->second.queue_iterator, cell_it->second.file_segment); } @@ -1125,7 +1168,6 @@ void KeyTransaction::reduceSizeToDownloaded(const Key & key, size_t offset, cons file_segment->getInfoForLogUnlocked(segment_lock)); } - CreateFileSegmentSettings create_settings{ .is_persistent = file_segment->is_persistent }; assert(file_segment->downloaded_size <= file_segment->reserved_size); assert(cell->queue_iterator->size() == file_segment->reserved_size); assert(cell->queue_iterator->size() >= file_segment->downloaded_size); @@ -1136,6 +1178,7 @@ void KeyTransaction::reduceSizeToDownloaded(const Key & key, size_t offset, cons cell->queue_iterator->incrementSize(-extra_size, getQueueLock()); } + CreateFileSegmentSettings create_settings(file_segment->getKind()); cell->file_segment = std::make_shared( offset, downloaded_size, key, getCreator(), file_segment->cache, FileSegment::State::DOWNLOADED, create_settings); @@ -1168,7 +1211,7 @@ std::vector FileCache::tryGetCachePaths(const Key & key) for (const auto & [offset, cell] : *cells_by_offset) { if (cell.file_segment->state() == FileSegment::State::DOWNLOADED) - cache_paths.push_back(getPathInLocalCache(key, offset, cell.file_segment->isPersistent())); + cache_paths.push_back(getPathInLocalCache(key, offset, cell.file_segment->getKind())); } return cache_paths; } diff --git a/src/Interpreters/Cache/FileCache.h b/src/Interpreters/Cache/FileCache.h index 7307be0908e..d4a05687e79 100644 --- a/src/Interpreters/Cache/FileCache.h +++ b/src/Interpreters/Cache/FileCache.h @@ -63,6 +63,8 @@ public: */ FileSegmentsHolderPtr getOrSet(const Key & key, size_t offset, size_t size, const CreateFileSegmentSettings & settings); + FileSegmentsHolder set(const Key & key, size_t offset, size_t size, const CreateFileSegmentSettings & settings); + /** * Segments in returned list are ordered in ascending order and represent a full contiguous * interval (no holes). Each segment in returned list has state: DOWNLOADED, DOWNLOADING or EMPTY. @@ -82,7 +84,7 @@ public: static Key createKeyForPath(const String & path); - String getPathInLocalCache(const Key & key, size_t offset, bool is_persistent) const; + String getPathInLocalCache(const Key & key, size_t offset, FileSegmentKind segment_kind) const; String getPathInLocalCache(const Key & key) const; diff --git a/src/Interpreters/Cache/FileCacheFactory.cpp b/src/Interpreters/Cache/FileCacheFactory.cpp index b276760c0dd..e120fe3fc27 100644 --- a/src/Interpreters/Cache/FileCacheFactory.cpp +++ b/src/Interpreters/Cache/FileCacheFactory.cpp @@ -31,14 +31,21 @@ const FileCacheSettings & FileCacheFactory::getSettings(const std::string & cach } -FileCachePtr FileCacheFactory::get(const std::string & cache_base_path) +FileCachePtr FileCacheFactory::tryGet(const std::string & cache_base_path) { std::lock_guard lock(mutex); auto it = caches_by_path.find(cache_base_path); if (it == caches_by_path.end()) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "No cache found by path: {}", cache_base_path); + return nullptr; return it->second->cache; +} +FileCachePtr FileCacheFactory::get(const std::string & cache_base_path) +{ + auto file_cache_ptr = tryGet(cache_base_path); + if (!file_cache_ptr) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "No cache found by path: {}", cache_base_path); + return file_cache_ptr; } FileCachePtr FileCacheFactory::getOrCreate( diff --git a/src/Interpreters/Cache/FileCacheFactory.h b/src/Interpreters/Cache/FileCacheFactory.h index 82e0ec8f928..32ecd05f019 100644 --- a/src/Interpreters/Cache/FileCacheFactory.h +++ b/src/Interpreters/Cache/FileCacheFactory.h @@ -33,6 +33,7 @@ public: FileCachePtr getOrCreate(const std::string & cache_base_path, const FileCacheSettings & file_cache_settings, const std::string & name); + FileCachePtr tryGet(const std::string & cache_base_path); FileCachePtr get(const std::string & cache_base_path); CacheByBasePath getAll(); diff --git a/src/Interpreters/Cache/FileCacheKey.h b/src/Interpreters/Cache/FileCacheKey.h index 3881a7aec7e..35e02276b88 100644 --- a/src/Interpreters/Cache/FileCacheKey.h +++ b/src/Interpreters/Cache/FileCacheKey.h @@ -17,6 +17,8 @@ struct FileCacheKey explicit FileCacheKey(const UInt128 & path); + static FileCacheKey random() { return FileCacheKey(UUIDHelpers::generateV4().toUnderType()); } + bool operator==(const FileCacheKey & other) const { return key == other.key; } }; diff --git a/src/Interpreters/Cache/FileSegment.cpp b/src/Interpreters/Cache/FileSegment.cpp index 8a42ddde31c..56c26f3c3c1 100644 --- a/src/Interpreters/Cache/FileSegment.cpp +++ b/src/Interpreters/Cache/FileSegment.cpp @@ -9,6 +9,8 @@ #include #include +#include + namespace CurrentMetrics { extern const Metric CacheDetachedFileSegments; @@ -22,6 +24,11 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } +String toString(FileSegmentKind kind) +{ + return String(magic_enum::enum_name(kind)); +} + FileSegment::FileSegment( size_t offset_, size_t size_, @@ -41,7 +48,8 @@ FileSegment::FileSegment( #else , log(&Poco::Logger::get("FileSegment")) #endif - , is_persistent(settings.is_persistent) + , segment_kind(settings.kind) + , is_unbound(settings.unbounded) { /// On creation, file segment state can be EMPTY, DOWNLOADED, DOWNLOADING. switch (download_state) @@ -129,6 +137,18 @@ size_t FileSegment::getDownloadedSizeUnlocked(const FileSegmentGuard::Lock &) co return downloaded_size; } +void FileSegment::setDownloadedSize(size_t delta) +{ + std::unique_lock download_lock(download_mutex); + setDownloadedSizeUnlocked(download_lock, delta); +} + +void FileSegment::setDownloadedSizeUnlocked(std::unique_lock & /* download_lock */, size_t delta) +{ + downloaded_size += delta; + assert(downloaded_size == std::filesystem::file_size(getPathInLocalCache())); +} + bool FileSegment::isDownloaded() const { auto lock = segment_guard.lock(); @@ -291,6 +311,22 @@ void FileSegment::resetRemoteFileReader() remote_file_reader.reset(); } +std::unique_ptr FileSegment::detachWriter() +{ + std::unique_lock segment_lock(mutex); + + if (!cache_writer) + { + if (detached_writer) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Writer is already detached"); + + auto download_path = getPathInLocalCache(); + cache_writer = std::make_unique(download_path); + } + detached_writer = true; + return std::move(cache_writer); +} + void FileSegment::write(const char * from, size_t size, size_t offset) { if (!size) @@ -334,6 +370,9 @@ void FileSegment::write(const char * from, size_t size, size_t offset) "Cache writer was finalized (downloaded size: {}, state: {})", current_downloaded_size, stateToString(download_state)); + if (detached_writer) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cache writer was detached"); + cache_writer = std::make_unique(file_path); } } @@ -413,6 +452,7 @@ bool FileSegment::reserve(size_t size_to_reserve) size_t expected_downloaded_size; + bool is_file_segment_size_exceeded; { auto lock = segment_guard.lock(); @@ -423,11 +463,14 @@ bool FileSegment::reserve(size_t size_to_reserve) expected_downloaded_size = getDownloadedSizeUnlocked(lock); - if (expected_downloaded_size + size_to_reserve > range().size()) + is_file_segment_size_exceeded = expected_downloaded_size + size_to_reserve > range().size(); + if (is_file_segment_size_exceeded && !is_unbound) + { throw Exception( ErrorCodes::LOGICAL_ERROR, "Attempt to reserve space too much space ({}) for file segment with range: {} (downloaded size: {})", size_to_reserve, range().toString(), downloaded_size); + } chassert(reserved_size >= expected_downloaded_size); } @@ -444,13 +487,15 @@ bool FileSegment::reserve(size_t size_to_reserve) if (!reserved) { size_to_reserve = size_to_reserve - already_reserved_size; - reserved = cache->tryReserve(key(), offset(), size_to_reserve); + if (is_unbound && is_file_segment_size_exceeded) + segment_range.right = range().left + expected_downloaded_size + size_to_reserve; + + reserved = cache->tryReserve(key(), offset(), size_to_reserve); if (reserved) { auto lock = segment_guard.lock(); reserved_size += size_to_reserve; - } } return reserved; @@ -585,6 +630,22 @@ void FileSegment::completeUnlocked(KeyTransaction & key_transaction) resetDownloaderUnlocked(segment_lock); } + if (cache_writer && (is_downloader || is_last_holder)) + { + cache_writer->finalize(); + cache_writer.reset(); + remote_file_reader.reset(); + } + + if (segment_kind == FileSegmentKind::Temporary && is_last_holder) + { + LOG_TEST(log, "Removing temporary file segment: {}", getInfoForLogUnlocked(segment_lock)); + detach(cache_lock, segment_lock); + setDownloadState(State::SKIP_CACHE); + key_transaction.remove(key(), offset(), segment_lock); + return; + } + switch (download_state) { case State::DOWNLOADED: @@ -634,13 +695,6 @@ void FileSegment::completeUnlocked(KeyTransaction & key_transaction) key_transaction.reduceSizeToDownloaded(key(), offset(), segment_lock); } - if (cache_writer) - { - cache_writer->finalize(); - cache_writer.reset(); - remote_file_reader.reset(); - } - detachAssumeStateFinalized(segment_lock); is_completed = true; } @@ -672,7 +726,7 @@ String FileSegment::getInfoForLogUnlocked(const FileSegmentGuard::Lock & lock) c info << "first non-downloaded offset: " << getFirstNonDownloadedOffsetUnlocked(lock) << ", "; info << "caller id: " << getCallerId() << ", "; info << "detached: " << is_detached << ", "; - info << "persistent: " << is_persistent; + info << "kind: " << toString(segment_kind); return info.str(); } @@ -770,7 +824,7 @@ FileSegmentPtr FileSegment::getSnapshot(const FileSegmentPtr & file_segment) snapshot->ref_count = file_segment.use_count(); snapshot->downloaded_size = file_segment->getDownloadedSizeUnlocked(lock); snapshot->download_state = file_segment->download_state; - snapshot->is_persistent = file_segment->isPersistent(); + snapshot->segment_kind = file_segment->getKind(); return snapshot; } @@ -828,12 +882,17 @@ FileSegments::iterator FileSegmentsHolder::completeAndPopFrontImpl() return file_segments.erase(file_segments.begin()); } -FileSegmentsHolder::~FileSegmentsHolder() +void FileSegmentsHolder::reset() { for (auto file_segment_it = file_segments.begin(); file_segment_it != file_segments.end();) file_segment_it = completeAndPopFrontImpl(); } +FileSegmentsHolder::~FileSegmentsHolder() +{ + reset(); +} + String FileSegmentsHolder::toString() { String ranges; diff --git a/src/Interpreters/Cache/FileSegment.h b/src/Interpreters/Cache/FileSegment.h index 0ceb887d1ba..f7d0b0a223c 100644 --- a/src/Interpreters/Cache/FileSegment.h +++ b/src/Interpreters/Cache/FileSegment.h @@ -35,9 +35,39 @@ using KeyTransactionPtr = std::unique_ptr; struct KeyTransactionCreator; using KeyTransactionCreatorPtr = std::unique_ptr; +/* + * FileSegmentKind is used to specify the eviction policy for file segments. + */ +enum class FileSegmentKind +{ + /* `Regular` file segment is still in cache after usage, and can be evicted + * (unless there're some holders). + */ + Regular, + + /* `Persistent` file segment can't be evicted from cache, + * it should be removed manually. + */ + Persistent, + + /* `Temporary` file segment is removed right after releasing. + * Also corresponding files are removed during cache loading (if any). + */ + Temporary, +}; + +String toString(FileSegmentKind kind); + struct CreateFileSegmentSettings { - bool is_persistent = false; + FileSegmentKind kind = FileSegmentKind::Regular; + bool unbounded = false; + + CreateFileSegmentSettings() = default; + + explicit CreateFileSegmentSettings(FileSegmentKind kind_, bool unbounded_ = false) + : kind(kind_), unbounded(unbounded_) + {} }; class FileSegment : private boost::noncopyable, public std::enable_shared_from_this @@ -132,7 +162,8 @@ public: size_t offset() const { return range().left; } - bool isPersistent() const { return is_persistent; } + FileSegmentKind getKind() const { return segment_kind; } + bool isPersistent() const { return segment_kind == FileSegmentKind::Persistent; } using UniqueId = std::pair; UniqueId getUniqueId() const { return std::pair(key(), offset()); } @@ -196,19 +227,19 @@ public: void assertCorrectness() const; - /** - * ========== Methods for _only_ file segment's `writer` ====================== - */ - - void synchronousWrite(const char * from, size_t size, size_t offset); - /** * ========== Methods for _only_ file segment's `downloader` ================== */ /// Try to reserve exactly `size` bytes. + /// Returns true if reservation was successful, false otherwise. bool reserve(size_t size_to_reserve); + /// Try to reserve at max `size_to_reserve` bytes. + /// Returns actual size reserved. It can be less than size_to_reserve in non strict mode. + /// In strict mode throws an error on attempt to reserve space too much space. + size_t tryReserve(size_t size_to_reserve, bool strict = false); + /// Write data into reserved space. void write(const char * from, size_t size, size_t offset); @@ -231,6 +262,10 @@ public: FileSegmentGuard::Lock lock() const { return segment_guard.lock(); } + void setDownloadedSize(size_t delta); + + LocalCacheWriterPtr detachWriter(); + private: size_t getFirstNonDownloadedOffsetUnlocked(const FileSegmentGuard::Lock & lock) const; size_t getCurrentWriteOffsetUnlocked(const FileSegmentGuard::Lock & lock) const; @@ -264,9 +299,9 @@ private: KeyTransactionPtr createKeyTransaction(bool assert_exists = true) const; - /// complete() without any completion state is called from destructor of - /// FileSegmentsHolder. complete() might check if the caller of the method - /// is the last alive holder of the segment. Therefore, complete() and destruction + /// completeWithoutStateUnlocked() is called from destructor of FileSegmentsHolder. + /// Function might check if the caller of the method + /// is the last alive holder of the segment. Therefore, completion and destruction /// of the file segment pointer must be done under the same cache mutex. void completeUnlocked(KeyTransaction & key_transaction); @@ -286,7 +321,9 @@ private: RemoteFileReaderPtr remote_file_reader; LocalCacheWriterPtr cache_writer; + bool detached_writer = false; + /// downloaded_size should always be less or equal to reserved_size size_t downloaded_size = 0; size_t reserved_size = 0; @@ -317,12 +354,15 @@ private: bool is_detached = false; std::atomic is_completed = false; - bool is_downloaded{false}; + bool is_downloaded = false; std::atomic hits_count = 0; /// cache hits. std::atomic ref_count = 0; /// Used for getting snapshot state - bool is_persistent; + FileSegmentKind segment_kind; + + /// Size of the segment is not known until it is downloaded and can be bigger than max_file_segment_size. + bool is_unbound = false; CurrentMetrics::Increment metric_increment{CurrentMetrics::CacheFileSegments}; }; @@ -356,6 +396,8 @@ struct FileSegmentsHolder : private boost::noncopyable FileSegments::iterator begin() { return file_segments.begin(); } FileSegments::iterator end() { return file_segments.end(); } + void reset(); + private: FileSegments file_segments{}; diff --git a/src/Interpreters/Cache/WriteBufferToFileSegment.cpp b/src/Interpreters/Cache/WriteBufferToFileSegment.cpp new file mode 100644 index 00000000000..16906e9440e --- /dev/null +++ b/src/Interpreters/Cache/WriteBufferToFileSegment.cpp @@ -0,0 +1,76 @@ +#include +#include + +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int NOT_ENOUGH_SPACE; + extern const int LOGICAL_ERROR; +} + +namespace +{ + class SwapHelper + { + public: + SwapHelper(WriteBuffer & b1_, WriteBuffer & b2_) : b1(b1_), b2(b2_) { b1.swap(b2); } + ~SwapHelper() { b1.swap(b2); } + + private: + WriteBuffer & b1; + WriteBuffer & b2; + }; +} + +WriteBufferToFileSegment::WriteBufferToFileSegment(FileSegment * file_segment_) + : WriteBufferFromFileDecorator(file_segment_->detachWriter()), file_segment(file_segment_) +{ + auto downloader = file_segment->getOrSetDownloader(); + if (downloader != FileSegment::getCallerId()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Failed to set a downloader. ({})", file_segment->getInfoForLog()); +} + +/// If it throws an exception, the file segment will be incomplete, so you should not use it in the future. +void WriteBufferToFileSegment::nextImpl() +{ + size_t bytes_to_write = offset(); + + /// In case of an error, we don't need to finalize the file segment + /// because it will be deleted soon and completed in the holder's destructor. + bool ok = file_segment->reserve(bytes_to_write); + if (!ok) + throw Exception(ErrorCodes::NOT_ENOUGH_SPACE, "Failed to reserve space for the file cache ({})", file_segment->getInfoForLog()); + + try + { + SwapHelper swap(*this, *impl); + /// Write data to the underlying buffer. + impl->next(); + } + catch (...) + { + LOG_WARNING(&Poco::Logger::get("WriteBufferToFileSegment"), "Failed to write to the underlying buffer ({})", file_segment->getInfoForLog()); + throw; + } + + file_segment->setDownloadedSize(bytes_to_write); +} + + +WriteBufferToFileSegment::~WriteBufferToFileSegment() +{ + try + { + finalize(); + } + catch (...) + { + tryLogCurrentException(__PRETTY_FUNCTION__); + } +} + +} diff --git a/src/Interpreters/Cache/WriteBufferToFileSegment.h b/src/Interpreters/Cache/WriteBufferToFileSegment.h new file mode 100644 index 00000000000..4748891a6e0 --- /dev/null +++ b/src/Interpreters/Cache/WriteBufferToFileSegment.h @@ -0,0 +1,24 @@ +#pragma once + +#include + +namespace DB +{ + +class FileSegment; + +class WriteBufferToFileSegment : public WriteBufferFromFileDecorator +{ +public: + explicit WriteBufferToFileSegment(FileSegment * file_segment_); + + void nextImpl() override; + + ~WriteBufferToFileSegment() override; + +private: + FileSegment * file_segment; +}; + + +} diff --git a/src/Interpreters/CollectJoinOnKeysVisitor.cpp b/src/Interpreters/CollectJoinOnKeysVisitor.cpp index 15ecb822976..9b264cb52a3 100644 --- a/src/Interpreters/CollectJoinOnKeysVisitor.cpp +++ b/src/Interpreters/CollectJoinOnKeysVisitor.cpp @@ -19,6 +19,11 @@ namespace ErrorCodes namespace { +bool isDeterminedIdentifier(JoinIdentifierPos pos) +{ + return pos == JoinIdentifierPos::Left || pos == JoinIdentifierPos::Right; +} + bool isLeftIdentifier(JoinIdentifierPos pos) { /// Unknown identifiers considered as left, we will try to process it on later stages @@ -79,7 +84,7 @@ void CollectJoinOnKeysMatcher::Data::asofToJoinKeys() void CollectJoinOnKeysMatcher::visit(const ASTIdentifier & ident, const ASTPtr & ast, CollectJoinOnKeysMatcher::Data & data) { - if (auto expr_from_table = getTableForIdentifiers(ast, false, data); expr_from_table != JoinIdentifierPos::Unknown) + if (auto expr_from_table = getTableForIdentifiers(ast, false, data); isDeterminedIdentifier(expr_from_table)) data.analyzed_join.addJoinCondition(ast, isLeftIdentifier(expr_from_table)); else throw Exception("Unexpected identifier '" + ident.name() + "' in JOIN ON section", @@ -105,23 +110,26 @@ void CollectJoinOnKeysMatcher::visit(const ASTFunction & func, const ASTPtr & as ASTPtr left = func.arguments->children.at(0); ASTPtr right = func.arguments->children.at(1); auto table_numbers = getTableNumbers(left, right, data); + if (table_numbers.first == table_numbers.second) { - if (table_numbers.first == JoinIdentifierPos::Unknown) - throw Exception("Ambiguous column in expression '" + queryToString(ast) + "' in JOIN ON section", - ErrorCodes::AMBIGUOUS_COLUMN_NAME); + if (!isDeterminedIdentifier(table_numbers.first)) + throw Exception(ErrorCodes::AMBIGUOUS_COLUMN_NAME, + "Ambiguous columns in expression '{}' in JOIN ON section", queryToString(ast)); data.analyzed_join.addJoinCondition(ast, isLeftIdentifier(table_numbers.first)); return; } - if (table_numbers.first != JoinIdentifierPos::NotApplicable && table_numbers.second != JoinIdentifierPos::NotApplicable) + if ((isLeftIdentifier(table_numbers.first) && isRightIdentifier(table_numbers.second)) || + (isRightIdentifier(table_numbers.first) && isLeftIdentifier(table_numbers.second))) { data.addJoinKeys(left, right, table_numbers); return; } } - if (auto expr_from_table = getTableForIdentifiers(ast, false, data); expr_from_table != JoinIdentifierPos::Unknown) + + if (auto expr_from_table = getTableForIdentifiers(ast, false, data); isDeterminedIdentifier(expr_from_table)) { data.analyzed_join.addJoinCondition(ast, isLeftIdentifier(expr_from_table)); return; @@ -204,7 +212,7 @@ JoinIdentifierPos CollectJoinOnKeysMatcher::getTableForIdentifiers(const ASTPtr std::vector identifiers; getIdentifiers(ast, identifiers); if (identifiers.empty()) - return JoinIdentifierPos::NotApplicable; + return JoinIdentifierPos::NotColumn; JoinIdentifierPos table_number = JoinIdentifierPos::Unknown; diff --git a/src/Interpreters/CollectJoinOnKeysVisitor.h b/src/Interpreters/CollectJoinOnKeysVisitor.h index e52b0c69591..4f4e886099e 100644 --- a/src/Interpreters/CollectJoinOnKeysVisitor.h +++ b/src/Interpreters/CollectJoinOnKeysVisitor.h @@ -2,6 +2,7 @@ #include #include +#include #include #include #include @@ -18,6 +19,11 @@ namespace ASOF enum class Inequality; } +namespace ErrorCodes +{ + extern const int INVALID_JOIN_ON_EXPRESSION; +} + enum class JoinIdentifierPos { /// Position can't be established, identifier not resolved @@ -26,8 +32,8 @@ enum class JoinIdentifierPos Left, /// Right side of JOIN Right, - /// Expression not valid, e.g. doesn't contain identifiers - NotApplicable, + /// Identifier is not a column (e.g constant) + NotColumn, }; using JoinIdentifierPosPair = std::pair; @@ -66,6 +72,9 @@ public: } else { + if (ast->children.empty()) + throw Exception(ErrorCodes::INVALID_JOIN_ON_EXPRESSION, "Illegal expression '{}' in JOIN ON section", queryToString(ast)); + /// visit children } } diff --git a/src/Interpreters/ComparisonGraph.h b/src/Interpreters/ComparisonGraph.h index 3891fbf51cf..996526b60df 100644 --- a/src/Interpreters/ComparisonGraph.h +++ b/src/Interpreters/ComparisonGraph.h @@ -17,7 +17,7 @@ class ComparisonGraph { public: /// atomic_formulas are extracted from constraints. - explicit ComparisonGraph(const std::vector & atomic_formulas); + explicit ComparisonGraph(const ASTs & atomic_formulas); enum class CompareResult { @@ -43,7 +43,7 @@ public: bool isAlwaysCompare(CompareResult expected, const ASTPtr & left, const ASTPtr & right) const; /// Returns all expressions from component to which @ast belongs if any. - std::vector getEqual(const ASTPtr & ast) const; + ASTs getEqual(const ASTPtr & ast) const; /// Returns constant expression from component to which @ast belongs if any. std::optional getEqualConst(const ASTPtr & ast) const; @@ -52,7 +52,7 @@ public: std::optional getComponentId(const ASTPtr & ast) const; /// Returns all expressions from component. - std::vector getComponent(size_t id) const; + ASTs getComponent(size_t id) const; size_t getNumOfComponents() const { return graph.vertices.size(); } @@ -72,7 +72,7 @@ private: struct EqualComponent { /// All these expressions are considered as equal. - std::vector asts; + ASTs asts; std::optional constant_index; bool hasConstant() const; diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index b0d4e9719ba..aa83c64ee2d 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -31,7 +31,7 @@ #include #include #include -#include +#include #include #include #include @@ -103,9 +103,12 @@ #include #include #include +#include #include #include +#include + #if USE_ROCKSDB #include #endif @@ -748,28 +751,69 @@ void Context::setPath(const String & path) shared->user_scripts_path = shared->path + "user_scripts/"; } -VolumePtr Context::setTemporaryStorage(const String & path, const String & policy_name, size_t max_size) +static void setupTmpPath(Poco::Logger * log, const std::string & path) +try +{ + LOG_DEBUG(log, "Setting up {} to store temporary data in it", path); + + fs::create_directories(path); + + /// Clearing old temporary files. + fs::directory_iterator dir_end; + for (fs::directory_iterator it(path); it != dir_end; ++it) + { + if (it->is_regular_file()) + { + if (startsWith(it->path().filename(), "tmp")) + { + LOG_DEBUG(log, "Removing old temporary file {}", it->path().string()); + fs::remove(it->path()); + } + else + LOG_DEBUG(log, "Found unknown file in temporary path {}", it->path().string()); + } + /// We skip directories (for example, 'http_buffers' - it's used for buffering of the results) and all other file types. + } +} +catch (...) +{ + DB::tryLogCurrentException(log, fmt::format( + "Caught exception while setup temporary path: {}. " + "It is ok to skip this exception as cleaning old temporary files is not necessary", path)); +} + +static VolumePtr createLocalSingleDiskVolume(const std::string & path) +{ + auto disk = std::make_shared("_tmp_default", path, 0); + VolumePtr volume = std::make_shared("_tmp_default", disk, 0); + return volume; +} + +void Context::setTemporaryStoragePath(const String & path, size_t max_size) +{ + shared->tmp_path = path; + if (!shared->tmp_path.ends_with('/')) + shared->tmp_path += '/'; + + VolumePtr volume = createLocalSingleDiskVolume(shared->tmp_path); + + for (const auto & disk : volume->getDisks()) + { + setupTmpPath(shared->log, disk->getPath()); + } + + shared->temp_data_on_disk = std::make_shared(volume, max_size); +} + +void Context::setTemporaryStoragePolicy(const String & policy_name, size_t max_size) { std::lock_guard lock(shared->storage_policies_mutex); - VolumePtr volume; - if (policy_name.empty()) - { - shared->tmp_path = path; - if (!shared->tmp_path.ends_with('/')) - shared->tmp_path += '/'; - - auto disk = std::make_shared("_tmp_default", shared->tmp_path, 0); - volume = std::make_shared("_tmp_default", disk, 0); - } - else - { - StoragePolicyPtr tmp_policy = getStoragePolicySelector(lock)->get(policy_name); - if (tmp_policy->getVolumes().size() != 1) - throw Exception(ErrorCodes::NO_ELEMENTS_IN_CONFIG, - "Policy '{}' is used temporary files, such policy should have exactly one volume", policy_name); - volume = tmp_policy->getVolume(0); - } + StoragePolicyPtr tmp_policy = getStoragePolicySelector(lock)->get(policy_name); + if (tmp_policy->getVolumes().size() != 1) + throw Exception(ErrorCodes::NO_ELEMENTS_IN_CONFIG, + "Policy '{}' is used temporary files, such policy should have exactly one volume", policy_name); + VolumePtr volume = tmp_policy->getVolume(0); if (volume->getDisks().empty()) throw Exception("No disks volume for temporary files", ErrorCodes::NO_ELEMENTS_IN_CONFIG); @@ -781,8 +825,6 @@ VolumePtr Context::setTemporaryStorage(const String & path, const String & polic /// Check that underlying disk is local (can be wrapped in decorator) DiskPtr disk_ptr = disk; - if (const auto * disk_decorator = dynamic_cast(disk_ptr.get())) - disk_ptr = disk_decorator->getNestedDisk(); if (dynamic_cast(disk_ptr.get()) == nullptr) { @@ -791,10 +833,33 @@ VolumePtr Context::setTemporaryStorage(const String & path, const String & polic "Disk '{}' ({}) is not local and can't be used for temporary files", disk_ptr->getName(), typeid(*disk_raw_ptr).name()); } + + setupTmpPath(shared->log, disk->getPath()); } shared->temp_data_on_disk = std::make_shared(volume, max_size); - return volume; +} + + +void Context::setTemporaryStorageInCache(const String & cache_disk_name, size_t max_size) +{ + auto disk_ptr = getDisk(cache_disk_name); + if (!disk_ptr) + throw Exception(ErrorCodes::NO_ELEMENTS_IN_CONFIG, "Disk '{}' is not found", cache_disk_name); + + const auto * disk_object_storage_ptr = dynamic_cast(disk_ptr.get()); + if (!disk_object_storage_ptr) + throw Exception(ErrorCodes::NO_ELEMENTS_IN_CONFIG, "Disk '{}' does not use cache", cache_disk_name); + + auto file_cache = disk_object_storage_ptr->getCache(); + if (!file_cache) + throw Exception(ErrorCodes::NO_ELEMENTS_IN_CONFIG, "Cache '{}' is not found", file_cache->getBasePath()); + + LOG_DEBUG(shared->log, "Using file cache ({}) for temporary files", file_cache->getBasePath()); + + shared->tmp_path = file_cache->getBasePath(); + VolumePtr volume = createLocalSingleDiskVolume(shared->tmp_path); + shared->temp_data_on_disk = std::make_shared(volume, file_cache.get(), max_size); } void Context::setFlagsPath(const String & path) @@ -2291,7 +2356,7 @@ void Context::initializeKeeperDispatcher([[maybe_unused]] bool start_async) cons } shared->keeper_dispatcher = std::make_shared(); - shared->keeper_dispatcher->initialize(config, is_standalone_app, start_async); + shared->keeper_dispatcher->initialize(config, is_standalone_app, start_async, getMacros()); } #endif } @@ -2333,7 +2398,7 @@ void Context::updateKeeperConfiguration([[maybe_unused]] const Poco::Util::Abstr if (!shared->keeper_dispatcher) return; - shared->keeper_dispatcher->updateConfiguration(config); + shared->keeper_dispatcher->updateConfiguration(config, getMacros()); #endif } diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h index 655f65aa69a..4b7d0685ba3 100644 --- a/src/Interpreters/Context.h +++ b/src/Interpreters/Context.h @@ -467,7 +467,9 @@ public: void addWarningMessage(const String & msg) const; - VolumePtr setTemporaryStorage(const String & path, const String & policy_name, size_t max_size); + void setTemporaryStorageInCache(const String & cache_disk_name, size_t max_size); + void setTemporaryStoragePolicy(const String & policy_name, size_t max_size); + void setTemporaryStoragePath(const String & path, size_t max_size); using ConfigurationPtr = Poco::AutoPtr; diff --git a/src/Interpreters/CrossToInnerJoinVisitor.cpp b/src/Interpreters/CrossToInnerJoinVisitor.cpp index bafa63e767f..09aebf874be 100644 --- a/src/Interpreters/CrossToInnerJoinVisitor.cpp +++ b/src/Interpreters/CrossToInnerJoinVisitor.cpp @@ -149,7 +149,7 @@ ASTPtr makeOnExpression(const std::vector & expressions) if (expressions.size() == 1) return expressions[0]->clone(); - std::vector arguments; + ASTs arguments; arguments.reserve(expressions.size()); for (const auto & ast : expressions) arguments.emplace_back(ast->clone()); diff --git a/src/Interpreters/DDLTask.cpp b/src/Interpreters/DDLTask.cpp index 1c551dc89e0..0425b3de99b 100644 --- a/src/Interpreters/DDLTask.cpp +++ b/src/Interpreters/DDLTask.cpp @@ -29,6 +29,7 @@ namespace ErrorCodes extern const int DNS_ERROR; } + HostID HostID::fromString(const String & host_port_str) { HostID res; @@ -36,6 +37,7 @@ HostID HostID::fromString(const String & host_port_str) return res; } + bool HostID::isLocalAddress(UInt16 clickhouse_port) const { try diff --git a/src/Interpreters/DDLWorker.cpp b/src/Interpreters/DDLWorker.cpp index 2e1918e1a37..d427e97828b 100644 --- a/src/Interpreters/DDLWorker.cpp +++ b/src/Interpreters/DDLWorker.cpp @@ -248,6 +248,7 @@ void DDLWorker::scheduleTasks(bool reinitialized) LOG_TRACE(log, "Don't have unfinished tasks after restarting"); else LOG_INFO(log, "Have {} unfinished tasks, will check them", current_tasks.size()); + assert(current_tasks.size() <= pool_size + (worker_pool != nullptr)); auto task_it = current_tasks.begin(); while (task_it != current_tasks.end()) @@ -279,7 +280,9 @@ void DDLWorker::scheduleTasks(bool reinitialized) task->completely_processed = true; } else + { processTask(*task, zookeeper); + } ++task_it; } else @@ -291,6 +294,7 @@ void DDLWorker::scheduleTasks(bool reinitialized) /// of log entry number (with leading zeros). if (!first_failed_task_name || task->entry_name < *first_failed_task_name) first_failed_task_name = task->entry_name; + task_it = current_tasks.erase(task_it); } } @@ -416,18 +420,24 @@ void DDLWorker::scheduleTasks(bool reinitialized) DDLTaskBase & DDLWorker::saveTask(DDLTaskPtr && task) { current_tasks.remove_if([](const DDLTaskPtr & t) { return t->completely_processed.load(); }); + /// Tasks are scheduled and executed in main thread <==> Parallel execution is disabled assert((worker_pool != nullptr) == (1 < pool_size)); + /// Parallel execution is disabled ==> All previous tasks are failed to start or finished, /// so current tasks list must be empty when we are ready to process new one. assert(worker_pool || current_tasks.empty()); + /// Parallel execution is enabled ==> Not more than pool_size tasks are currently executing. /// Note: If current_tasks.size() == pool_size, then all worker threads are busy, /// so we will wait on worker_pool->scheduleOrThrowOnError(...) assert(!worker_pool || current_tasks.size() <= pool_size); + current_tasks.emplace_back(std::move(task)); + if (first_failed_task_name && *first_failed_task_name == current_tasks.back()->entry_name) first_failed_task_name.reset(); + return *current_tasks.back(); } @@ -660,8 +670,8 @@ void DDLWorker::processTask(DDLTaskBase & task, const ZooKeeperPtr & zookeeper) active_node->setAlreadyRemoved(); task.createSyncedNodeIfNeed(zookeeper); - task.completely_processed = true; updateMaxDDLEntryID(task.entry_name); + task.completely_processed = true; } @@ -748,13 +758,13 @@ bool DDLWorker::tryExecuteQueryOnLeaderReplica( /// but DDL worker can continue processing other queries. while (stopwatch.elapsedSeconds() <= MAX_EXECUTION_TIMEOUT_SEC) { - StorageReplicatedMergeTree::Status status; + ReplicatedTableStatus status; // Has to get with zk fields to get active replicas field replicated_storage->getStatus(status, true); // Should return as soon as possible if the table is dropped. - bool replica_dropped = replicated_storage->is_dropped; - bool all_replicas_likely_detached = status.active_replicas == 0 && !DatabaseCatalog::instance().isTableExist(replicated_storage->getStorageID(), context); + bool replica_dropped = storage->is_dropped; + bool all_replicas_likely_detached = status.active_replicas == 0 && !DatabaseCatalog::instance().isTableExist(storage->getStorageID(), context); if (replica_dropped || all_replicas_likely_detached) { LOG_WARNING(log, ", task {} will not be executed.", task.entry_name); diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp index bc93abff534..a3db464fbbb 100644 --- a/src/Interpreters/ExpressionAnalyzer.cpp +++ b/src/Interpreters/ExpressionAnalyzer.cpp @@ -425,7 +425,7 @@ void ExpressionAnalyzer::analyzeAggregation(ActionsDAGPtr & temp_actions) aggregated_columns = temp_actions->getNamesAndTypesList(); for (const auto & desc : aggregate_descriptions) - aggregated_columns.emplace_back(desc.column_name, desc.function->getReturnType()); + aggregated_columns.emplace_back(desc.column_name, desc.function->getResultType()); } @@ -2074,7 +2074,7 @@ ExpressionAnalysisResult::ExpressionAnalysisResult( for (const auto & f : w.window_functions) { query_analyzer.columns_after_window.push_back( - {f.column_name, f.aggregate_function->getReturnType()}); + {f.column_name, f.aggregate_function->getResultType()}); } } diff --git a/src/Interpreters/ExpressionJIT.cpp b/src/Interpreters/ExpressionJIT.cpp index 3a2c2e333a9..dfc88e97052 100644 --- a/src/Interpreters/ExpressionJIT.cpp +++ b/src/Interpreters/ExpressionJIT.cpp @@ -263,7 +263,7 @@ public: return result; } - static void applyFunction(IFunctionBase & function, Field & value) + static void applyFunction(const IFunctionBase & function, Field & value) { const auto & type = function.getArgumentTypes().at(0); ColumnsWithTypeAndName args{{type->createColumnConst(1, value), type, "x" }}; @@ -338,7 +338,7 @@ static bool isCompilableFunction(const ActionsDAG::Node & node, const std::unord if (node.type != ActionsDAG::ActionType::FUNCTION) return false; - auto & function = *node.function_base; + const auto & function = *node.function_base; IFunction::ShortCircuitSettings settings; if (function.isShortCircuit(settings, node.children.size())) diff --git a/src/Interpreters/HashJoin.cpp b/src/Interpreters/HashJoin.cpp index 9fd577318f8..dc041094381 100644 --- a/src/Interpreters/HashJoin.cpp +++ b/src/Interpreters/HashJoin.cpp @@ -178,7 +178,7 @@ namespace JoinStuff } } -static ColumnWithTypeAndName correctNullability(ColumnWithTypeAndName && column, bool nullable) +static void correctNullabilityInplace(ColumnWithTypeAndName & column, bool nullable) { if (nullable) { @@ -193,11 +193,9 @@ static ColumnWithTypeAndName correctNullability(ColumnWithTypeAndName && column, JoinCommon::removeColumnNullability(column); } - - return std::move(column); } -static ColumnWithTypeAndName correctNullability(ColumnWithTypeAndName && column, bool nullable, const ColumnUInt8 & negative_null_map) +static void correctNullabilityInplace(ColumnWithTypeAndName & column, bool nullable, const ColumnUInt8 & negative_null_map) { if (nullable) { @@ -211,8 +209,6 @@ static ColumnWithTypeAndName correctNullability(ColumnWithTypeAndName && column, } else JoinCommon::removeColumnNullability(column); - - return std::move(column); } HashJoin::HashJoin(std::shared_ptr table_join_, const Block & right_sample_block_, bool any_take_last_row_) @@ -1475,7 +1471,7 @@ void HashJoin::joinBlockImpl( ColumnWithTypeAndName right_col(col.column, col.type, right_col_name); if (right_col.type->lowCardinality() != right_key.type->lowCardinality()) JoinCommon::changeLowCardinalityInplace(right_col); - right_col = correctNullability(std::move(right_col), is_nullable); + correctNullabilityInplace(right_col, is_nullable); block.insert(std::move(right_col)); } } @@ -1509,7 +1505,7 @@ void HashJoin::joinBlockImpl( ColumnWithTypeAndName right_col(thin_column, col.type, right_col_name); if (right_col.type->lowCardinality() != right_key.type->lowCardinality()) JoinCommon::changeLowCardinalityInplace(right_col); - right_col = correctNullability(std::move(right_col), is_nullable, null_map_filter); + correctNullabilityInplace(right_col, is_nullable, null_map_filter); block.insert(std::move(right_col)); if constexpr (jf.need_replication) @@ -2020,7 +2016,8 @@ BlocksList HashJoin::releaseJoinedBlocks() for (size_t i = 0; i < positions.size(); ++i) { auto & column = saved_block.getByPosition(positions[i]); - restored_block.insert(correctNullability(std::move(column), is_nullable[i])); + correctNullabilityInplace(column, is_nullable[i]); + restored_block.insert(column); } restored_blocks.emplace_back(std::move(restored_block)); } @@ -2028,7 +2025,6 @@ BlocksList HashJoin::releaseJoinedBlocks() return restored_blocks; } - const ColumnWithTypeAndName & HashJoin::rightAsofKeyColumn() const { /// It should be nullable when right side is nullable diff --git a/src/Interpreters/IdentifierSemantic.cpp b/src/Interpreters/IdentifierSemantic.cpp index d3750e98b8c..0aa70057794 100644 --- a/src/Interpreters/IdentifierSemantic.cpp +++ b/src/Interpreters/IdentifierSemantic.cpp @@ -348,7 +348,7 @@ void splitConjunctionsAst(const ASTPtr & node, ASTs & result) ASTs splitConjunctionsAst(const ASTPtr & node) { - std::vector result; + ASTs result; splitConjunctionsAst(node, result); return result; } diff --git a/src/Interpreters/InterpreterAlterQuery.cpp b/src/Interpreters/InterpreterAlterQuery.cpp index 22edac051a5..14628f34111 100644 --- a/src/Interpreters/InterpreterAlterQuery.cpp +++ b/src/Interpreters/InterpreterAlterQuery.cpp @@ -143,7 +143,7 @@ BlockIO InterpreterAlterQuery::executeToTable(const ASTAlterQuery & alter) "to execute ALTERs of different types in single query"); } - if (!mutation_commands.empty()) + if (mutation_commands.hasNonEmptyMutationCommands()) { table->checkMutationIsPossible(mutation_commands, getContext()->getSettingsRef()); MutationsInterpreter(table, metadata_snapshot, mutation_commands, getContext(), false).validate(); diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index 50536b66185..ed4fd5699da 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -76,8 +76,6 @@ #include -#define MAX_FIXEDSTRING_SIZE_WITHOUT_SUSPICIOUS 256 - namespace DB { @@ -227,7 +225,7 @@ BlockIO InterpreterCreateQuery::createDatabase(ASTCreateQuery & create) if ((create.storage->engine->name == "MaterializeMySQL" || create.storage->engine->name == "MaterializedMySQL") && !getContext()->getSettingsRef().allow_experimental_database_materialized_mysql - && !internal) + && !internal && !create.attach) { throw Exception("MaterializedMySQL is an experimental database engine. " "Enable allow_experimental_database_materialized_mysql to use it.", ErrorCodes::UNKNOWN_DATABASE_ENGINE); @@ -235,7 +233,7 @@ BlockIO InterpreterCreateQuery::createDatabase(ASTCreateQuery & create) if (create.storage->engine->name == "Replicated" && !getContext()->getSettingsRef().allow_experimental_database_replicated - && !internal) + && !internal && !create.attach) { throw Exception("Replicated is an experimental database engine. " "Enable allow_experimental_database_replicated to use it.", ErrorCodes::UNKNOWN_DATABASE_ENGINE); @@ -243,7 +241,7 @@ BlockIO InterpreterCreateQuery::createDatabase(ASTCreateQuery & create) if (create.storage->engine->name == "MaterializedPostgreSQL" && !getContext()->getSettingsRef().allow_experimental_database_materialized_postgresql - && !internal) + && !internal && !create.attach) { throw Exception("MaterializedPostgreSQL is an experimental database engine. " "Enable allow_experimental_database_materialized_postgresql to use it.", ErrorCodes::UNKNOWN_DATABASE_ENGINE); @@ -404,6 +402,8 @@ ASTPtr InterpreterCreateQuery::formatColumns(const ColumnsDescription & columns) column_declaration->children.push_back(column_declaration->default_expression); } + column_declaration->ephemeral_default = column.default_desc.ephemeral_default; + if (!column.comment.empty()) { column_declaration->comment = std::make_shared(Field(column.comment)); @@ -540,11 +540,7 @@ ColumnsDescription InterpreterCreateQuery::getColumnsDescription( final_column_name)); default_expr_list->children.emplace_back( - setAlias( - col_decl.default_specifier == "EPHEMERAL" ? /// can be ASTLiteral::value NULL - std::make_shared(data_type_ptr->getDefault()) : - col_decl.default_expression->clone(), - tmp_column_name)); + setAlias(col_decl.default_expression->clone(), tmp_column_name)); } else default_expr_list->children.emplace_back(setAlias(col_decl.default_expression->clone(), col_decl.name)); @@ -561,7 +557,7 @@ ColumnsDescription InterpreterCreateQuery::getColumnsDescription( ColumnsDescription res; auto name_type_it = column_names_and_types.begin(); - for (auto ast_it = columns_ast.children.begin(); ast_it != columns_ast.children.end(); ++ast_it, ++name_type_it) + for (const auto * ast_it = columns_ast.children.begin(); ast_it != columns_ast.children.end(); ++ast_it, ++name_type_it) { ColumnDescription column; @@ -590,10 +586,7 @@ ColumnsDescription InterpreterCreateQuery::getColumnsDescription( visitor.visit(col_decl.default_expression); } - ASTPtr default_expr = - col_decl.default_specifier == "EPHEMERAL" && col_decl.default_expression->as()->value.isNull() ? - std::make_shared(DataTypeFactory::instance().get(col_decl.type)->getDefault()) : - col_decl.default_expression->clone(); + ASTPtr default_expr = col_decl.default_expression->clone(); if (col_decl.type) column.type = name_type_it->type; @@ -607,6 +600,7 @@ ColumnsDescription InterpreterCreateQuery::getColumnsDescription( column.default_desc.kind = columnDefaultKindFromString(col_decl.default_specifier); column.default_desc.expression = default_expr; + column.default_desc.ephemeral_default = col_decl.ephemeral_default; } else if (col_decl.type) column.type = name_type_it->type; diff --git a/src/Interpreters/InterpreterDeleteQuery.cpp b/src/Interpreters/InterpreterDeleteQuery.cpp index b5b8ae81366..63dad10ebd6 100644 --- a/src/Interpreters/InterpreterDeleteQuery.cpp +++ b/src/Interpreters/InterpreterDeleteQuery.cpp @@ -23,6 +23,7 @@ namespace ErrorCodes { extern const int TABLE_IS_READ_ONLY; extern const int SUPPORT_IS_DISABLED; + extern const int BAD_ARGUMENTS; } @@ -58,8 +59,7 @@ BlockIO InterpreterDeleteQuery::execute() auto table_lock = table->lockForShare(getContext()->getCurrentQueryId(), getContext()->getSettingsRef().lock_acquire_timeout); auto metadata_snapshot = table->getInMemoryMetadataPtr(); - auto merge_tree = std::dynamic_pointer_cast(table); - if (!merge_tree) + if (table->supportsDelete()) { /// Convert to MutationCommand MutationCommands mutation_commands; @@ -75,39 +75,45 @@ BlockIO InterpreterDeleteQuery::execute() table->mutate(mutation_commands, getContext()); return {}; } + else if (table->supportsLightweightDelete()) + { + if (!getContext()->getSettingsRef().allow_experimental_lightweight_delete) + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Lightweight delete mutate is experimental. Set `allow_experimental_lightweight_delete` setting to enable it"); - if (!getContext()->getSettingsRef().allow_experimental_lightweight_delete) - throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Lightweight delete mutate is experimental. Set `allow_experimental_lightweight_delete` setting to enable it"); + /// Convert to MutationCommand + MutationCommands mutation_commands; + MutationCommand mut_command; - /// Convert to MutationCommand - MutationCommands mutation_commands; - MutationCommand mut_command; + /// Build "UPDATE _row_exists = 0 WHERE predicate" query + mut_command.type = MutationCommand::Type::UPDATE; + mut_command.predicate = delete_query.predicate; - /// Build "UPDATE _row_exists = 0 WHERE predicate" query - mut_command.type = MutationCommand::Type::UPDATE; - mut_command.predicate = delete_query.predicate; + auto command = std::make_shared(); + command->type = ASTAlterCommand::UPDATE; + command->predicate = delete_query.predicate; + command->update_assignments = std::make_shared(); + auto set_row_does_not_exist = std::make_shared(); + set_row_does_not_exist->column_name = LightweightDeleteDescription::FILTER_COLUMN.name; + auto zero_value = std::make_shared(DB::Field(UInt8(0))); + set_row_does_not_exist->children.push_back(zero_value); + command->update_assignments->children.push_back(set_row_does_not_exist); + command->children.push_back(command->predicate); + command->children.push_back(command->update_assignments); + mut_command.column_to_update_expression[set_row_does_not_exist->column_name] = zero_value; + mut_command.ast = command->ptr(); - auto command = std::make_shared(); - command->type = ASTAlterCommand::UPDATE; - command->predicate = delete_query.predicate; - command->update_assignments = std::make_shared(); - auto set_row_does_not_exist = std::make_shared(); - set_row_does_not_exist->column_name = LightweightDeleteDescription::FILTER_COLUMN.name; - auto zero_value = std::make_shared(DB::Field(UInt8(0))); - set_row_does_not_exist->children.push_back(zero_value); - command->update_assignments->children.push_back(set_row_does_not_exist); - command->children.push_back(command->predicate); - command->children.push_back(command->update_assignments); - mut_command.column_to_update_expression[set_row_does_not_exist->column_name] = zero_value; - mut_command.ast = command->ptr(); + mutation_commands.emplace_back(mut_command); - mutation_commands.emplace_back(mut_command); + table->checkMutationIsPossible(mutation_commands, getContext()->getSettingsRef()); + MutationsInterpreter(table, metadata_snapshot, mutation_commands, getContext(), false).validate(); + table->mutate(mutation_commands, getContext()); - table->checkMutationIsPossible(mutation_commands, getContext()->getSettingsRef()); - MutationsInterpreter(table, metadata_snapshot, mutation_commands, getContext(), false).validate(); - table->mutate(mutation_commands, getContext()); - - return {}; + return {}; + } + else + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "DELETE query is not supported for table {}", table->getStorageID().getFullTableName()); + } } } diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index 4dc53b53eb4..2cccba734d4 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -79,7 +79,8 @@ StoragePtr InterpreterInsertQuery::getTable(ASTInsertQuery & query) table_function_ptr->setStructureHint(structure_hint); } - return table_function_ptr->execute(query.table_function, getContext(), table_function_ptr->getName()); + return table_function_ptr->execute(query.table_function, getContext(), table_function_ptr->getName(), + /* cached_columns */ {}, /* use_global_context */ false, /* is_insert_query */true); } if (query.table_id) diff --git a/src/Interpreters/InterpreterSystemQuery.cpp b/src/Interpreters/InterpreterSystemQuery.cpp index 563c7db510c..28501a794ff 100644 --- a/src/Interpreters/InterpreterSystemQuery.cpp +++ b/src/Interpreters/InterpreterSystemQuery.cpp @@ -42,7 +42,6 @@ #include #include #include -#include #include #include #include @@ -62,6 +61,7 @@ namespace DB { + namespace ErrorCodes { extern const int LOGICAL_ERROR; @@ -483,6 +483,9 @@ BlockIO InterpreterSystemQuery::execute() case Type::DROP_REPLICA: dropReplica(query); break; + case Type::DROP_DATABASE_REPLICA: + dropDatabaseReplica(query); + break; case Type::SYNC_REPLICA: syncReplica(query); break; @@ -506,7 +509,6 @@ BlockIO InterpreterSystemQuery::execute() break; case Type::RESTART_DISK: restartDisk(query.disk); - break; case Type::FLUSH_LOGS: { getContext()->checkAccess(AccessType::SYSTEM_FLUSH_LOGS); @@ -723,7 +725,7 @@ void InterpreterSystemQuery::dropReplica(ASTSystemQuery & query) { if (auto * storage_replicated = dynamic_cast(iterator->table().get())) { - StorageReplicatedMergeTree::Status status; + ReplicatedTableStatus status; storage_replicated->getStatus(status); if (status.zookeeper_path == query.replica_zk_path) throw Exception("There is a local table " + storage_replicated->getStorageID().getNameForLogs() + @@ -759,7 +761,7 @@ bool InterpreterSystemQuery::dropReplicaImpl(ASTSystemQuery & query, const Stora if (!storage_replicated) return false; - StorageReplicatedMergeTree::Status status; + ReplicatedTableStatus status; auto zookeeper = getContext()->getZooKeeper(); storage_replicated->getStatus(status); @@ -781,6 +783,75 @@ bool InterpreterSystemQuery::dropReplicaImpl(ASTSystemQuery & query, const Stora return true; } +void InterpreterSystemQuery::dropDatabaseReplica(ASTSystemQuery & query) +{ + if (query.replica.empty()) + throw Exception("Replica name is empty", ErrorCodes::BAD_ARGUMENTS); + + auto check_not_local_replica = [](const DatabaseReplicated * replicated, const ASTSystemQuery & query) + { + if (!query.replica_zk_path.empty() && fs::path(replicated->getZooKeeperPath()) != fs::path(query.replica_zk_path)) + return; + if (replicated->getFullReplicaName() != query.replica) + return; + + throw Exception(ErrorCodes::TABLE_WAS_NOT_DROPPED, "There is a local database {}, which has the same path in ZooKeeper " + "and the same replica name. Please check the path in query. " + "If you want to drop replica of this database, use `DROP DATABASE`", replicated->getDatabaseName()); + }; + + if (query.database) + { + getContext()->checkAccess(AccessType::SYSTEM_DROP_REPLICA, query.getDatabase()); + DatabasePtr database = DatabaseCatalog::instance().getDatabase(query.getDatabase()); + if (auto * replicated = dynamic_cast(database.get())) + { + check_not_local_replica(replicated, query); + DatabaseReplicated::dropReplica(replicated, replicated->getZooKeeperPath(), query.replica); + } + else + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Database {} is not Replicated, cannot drop replica", query.getDatabase()); + LOG_TRACE(log, "Dropped replica {} of Replicated database {}", query.replica, backQuoteIfNeed(database->getDatabaseName())); + } + else if (query.is_drop_whole_replica) + { + auto databases = DatabaseCatalog::instance().getDatabases(); + auto access = getContext()->getAccess(); + bool access_is_granted_globally = access->isGranted(AccessType::SYSTEM_DROP_REPLICA); + + for (auto & elem : databases) + { + DatabasePtr & database = elem.second; + auto * replicated = dynamic_cast(database.get()); + if (!replicated) + continue; + if (!access_is_granted_globally && !access->isGranted(AccessType::SYSTEM_DROP_REPLICA, elem.first)) + { + LOG_INFO(log, "Access {} denied, skipping database {}", "SYSTEM DROP REPLICA", elem.first); + continue; + } + + check_not_local_replica(replicated, query); + DatabaseReplicated::dropReplica(replicated, replicated->getZooKeeperPath(), query.replica); + LOG_TRACE(log, "Dropped replica {} of Replicated database {}", query.replica, backQuoteIfNeed(database->getDatabaseName())); + } + } + else if (!query.replica_zk_path.empty()) + { + getContext()->checkAccess(AccessType::SYSTEM_DROP_REPLICA); + + /// This check is actually redundant, but it may prevent from some user mistakes + for (auto & elem : DatabaseCatalog::instance().getDatabases()) + if (auto * replicated = dynamic_cast(elem.second.get())) + check_not_local_replica(replicated, query); + + DatabaseReplicated::dropReplica(nullptr, query.replica_zk_path, query.replica); + LOG_INFO(log, "Dropped replica {} of Replicated database with path {}", query.replica, query.replica_zk_path); + } + else + throw Exception("Invalid query", ErrorCodes::LOGICAL_ERROR); +} + void InterpreterSystemQuery::syncReplica(ASTSystemQuery &) { getContext()->checkAccess(AccessType::SYSTEM_SYNC_REPLICA, table_id); @@ -840,16 +911,10 @@ void InterpreterSystemQuery::flushDistributed(ASTSystemQuery &) throw Exception("Table " + table_id.getNameForLogs() + " is not distributed", ErrorCodes::BAD_ARGUMENTS); } -void InterpreterSystemQuery::restartDisk(String & name) +[[noreturn]] void InterpreterSystemQuery::restartDisk(String &) { getContext()->checkAccess(AccessType::SYSTEM_RESTART_DISK); - - auto disk = getContext()->getDisk(name); - - if (DiskRestartProxy * restart_proxy = dynamic_cast(disk.get())) - restart_proxy->restart(getContext()); - else - throw Exception("Disk " + name + " doesn't have possibility to restart", ErrorCodes::BAD_ARGUMENTS); + throw Exception("SYSTEM RESTART DISK is not supported", ErrorCodes::NOT_IMPLEMENTED); } @@ -981,6 +1046,7 @@ AccessRightsElements InterpreterSystemQuery::getRequiredAccessForDDLOnCluster() break; } case Type::DROP_REPLICA: + case Type::DROP_DATABASE_REPLICA: { required_access.emplace_back(AccessType::SYSTEM_DROP_REPLICA, query.getDatabase(), query.getTable()); break; diff --git a/src/Interpreters/InterpreterSystemQuery.h b/src/Interpreters/InterpreterSystemQuery.h index af8734e8237..0058d0c9def 100644 --- a/src/Interpreters/InterpreterSystemQuery.h +++ b/src/Interpreters/InterpreterSystemQuery.h @@ -66,8 +66,9 @@ private: void dropReplica(ASTSystemQuery & query); bool dropReplicaImpl(ASTSystemQuery & query, const StoragePtr & table); + void dropDatabaseReplica(ASTSystemQuery & query); void flushDistributed(ASTSystemQuery & query); - void restartDisk(String & name); + [[noreturn]] void restartDisk(String & name); AccessRightsElements getRequiredAccessForDDLOnCluster() const; void startStopAction(StorageActionBlockType action_type, bool start); diff --git a/src/Interpreters/JIT/compileFunction.cpp b/src/Interpreters/JIT/compileFunction.cpp index e12b4894eb0..8bf0eb25b60 100644 --- a/src/Interpreters/JIT/compileFunction.cpp +++ b/src/Interpreters/JIT/compileFunction.cpp @@ -403,7 +403,7 @@ static void compileInsertAggregatesIntoResultColumns(llvm::Module & module, cons std::vector columns(functions.size()); for (size_t i = 0; i < functions.size(); ++i) { - auto return_type = functions[i].function->getReturnType(); + auto return_type = functions[i].function->getResultType(); auto * data = b.CreateLoad(column_type, b.CreateConstInBoundsGEP1_64(column_type, columns_arg, i)); auto * column_data_type = toNativeType(b, removeNullable(return_type)); diff --git a/src/Interpreters/JoinToSubqueryTransformVisitor.cpp b/src/Interpreters/JoinToSubqueryTransformVisitor.cpp index 10b122364f9..15d12de527d 100644 --- a/src/Interpreters/JoinToSubqueryTransformVisitor.cpp +++ b/src/Interpreters/JoinToSubqueryTransformVisitor.cpp @@ -153,7 +153,7 @@ private: data.addTableColumns(identifier.name(), columns); // QualifiedAsterisk's transformers start to appear at child 1 - for (auto it = qualified_asterisk->children.begin() + 1; it != qualified_asterisk->children.end(); ++it) + for (const auto * it = qualified_asterisk->children.begin() + 1; it != qualified_asterisk->children.end(); ++it) { IASTColumnsTransformer::transform(*it, columns); } @@ -209,7 +209,7 @@ struct RewriteTablesVisitorData { if (done) return; - std::vector new_tables{left, right}; + ASTs new_tables{left, right}; ast->children.swap(new_tables); done = true; } diff --git a/src/Interpreters/LogicalExpressionsOptimizer.cpp b/src/Interpreters/LogicalExpressionsOptimizer.cpp index 9e30cac2e19..35989f0dfba 100644 --- a/src/Interpreters/LogicalExpressionsOptimizer.cpp +++ b/src/Interpreters/LogicalExpressionsOptimizer.cpp @@ -313,7 +313,7 @@ void LogicalExpressionsOptimizer::cleanupOrExpressions() for (const auto & entry : garbage_map) { const auto * function = entry.first; - auto first_erased = entry.second; + auto * first_erased = entry.second; auto & operands = getFunctionOperands(function); operands.erase(first_erased, operands.end()); diff --git a/src/Interpreters/MySQL/InterpretersMySQLDDLQuery.cpp b/src/Interpreters/MySQL/InterpretersMySQLDDLQuery.cpp index 6989940323c..70773e2fffb 100644 --- a/src/Interpreters/MySQL/InterpretersMySQLDDLQuery.cpp +++ b/src/Interpreters/MySQL/InterpretersMySQLDDLQuery.cpp @@ -155,7 +155,7 @@ static ColumnsDescription createColumnsDescription(const NamesAndTypesList & col /// but this produce endless recursion in gcc-11, and leads to SIGSEGV /// (see git blame for details). auto column_name_and_type = columns_name_and_type.begin(); - auto declare_column_ast = columns_definition->children.begin(); + const auto * declare_column_ast = columns_definition->children.begin(); for (; column_name_and_type != columns_name_and_type.end(); column_name_and_type++, declare_column_ast++) { const auto & declare_column = (*declare_column_ast)->as(); diff --git a/src/Interpreters/ProcessList.cpp b/src/Interpreters/ProcessList.cpp index cc22ca6597e..beda10a3af2 100644 --- a/src/Interpreters/ProcessList.cpp +++ b/src/Interpreters/ProcessList.cpp @@ -69,7 +69,8 @@ static bool isUnlimitedQuery(const IAST * ast) } -ProcessList::EntryPtr ProcessList::insert(const String & query_, const IAST * ast, ContextMutablePtr query_context) +ProcessList::EntryPtr +ProcessList::insert(const String & query_, const IAST * ast, ContextMutablePtr query_context, UInt64 watch_start_nanoseconds) { EntryPtr res; @@ -218,7 +219,6 @@ ProcessList::EntryPtr ProcessList::insert(const String & query_, const IAST * as user_process_list.user_temp_data_on_disk, settings.max_temporary_data_on_disk_size_for_query)); } thread_group->query = query_; - thread_group->one_line_query = toOneLineQuery(query_); thread_group->normalized_query_hash = normalizedQueryHash(query_); /// Set query-level memory trackers @@ -243,13 +243,16 @@ ProcessList::EntryPtr ProcessList::insert(const String & query_, const IAST * as /// since allocation and deallocation could happen in different threads } - auto process_it = processes.emplace(processes.end(), std::make_shared( - query_context, - query_, - client_info, - priorities.insert(static_cast(settings.priority)), - std::move(thread_group), - query_kind)); + auto process_it = processes.emplace( + processes.end(), + std::make_shared( + query_context, + query_, + client_info, + priorities.insert(static_cast(settings.priority)), + std::move(thread_group), + query_kind, + watch_start_nanoseconds)); increaseQueryKindAmount(query_kind); @@ -344,11 +347,13 @@ QueryStatus::QueryStatus( const ClientInfo & client_info_, QueryPriorities::Handle && priority_handle_, ThreadGroupStatusPtr && thread_group_, - IAST::QueryKind query_kind_) + IAST::QueryKind query_kind_, + UInt64 watch_start_nanoseconds) : WithContext(context_) , query(query_) , client_info(client_info_) , thread_group(std::move(thread_group_)) + , watch(CLOCK_MONOTONIC, watch_start_nanoseconds, true) , priority_handle(std::move(priority_handle_)) , global_overcommit_tracker(context_->getGlobalOvercommitTracker()) , query_kind(query_kind_) @@ -522,7 +527,7 @@ QueryStatusInfo QueryStatus::getInfo(bool get_thread_list, bool get_profile_even res.query = query; res.client_info = client_info; - res.elapsed_seconds = watch.elapsedSeconds(); + res.elapsed_microseconds = watch.elapsedMicroseconds(); res.is_cancelled = is_killed.load(std::memory_order_relaxed); res.is_all_data_sent = is_all_data_sent.load(std::memory_order_relaxed); res.read_rows = progress_in.read_rows; diff --git a/src/Interpreters/ProcessList.h b/src/Interpreters/ProcessList.h index 5fbdce358f9..34edfc5a2e2 100644 --- a/src/Interpreters/ProcessList.h +++ b/src/Interpreters/ProcessList.h @@ -53,7 +53,7 @@ class ProcessListEntry; struct QueryStatusInfo { String query; - double elapsed_seconds; + UInt64 elapsed_microseconds; size_t read_rows; size_t read_bytes; size_t total_rows; @@ -142,15 +142,14 @@ protected: CurrentMetrics::Increment num_queries_increment; public: - QueryStatus( ContextPtr context_, const String & query_, const ClientInfo & client_info_, QueryPriorities::Handle && priority_handle_, ThreadGroupStatusPtr && thread_group_, - IAST::QueryKind query_kind_ - ); + IAST::QueryKind query_kind_, + UInt64 watch_start_nanoseconds); ~QueryStatus(); @@ -221,6 +220,9 @@ public: bool checkTimeLimit(); /// Same as checkTimeLimit but it never throws [[nodiscard]] bool checkTimeLimitSoft(); + + /// Get the reference for the start of the query. Used to synchronize with other Stopwatches + UInt64 getQueryCPUStartTime() { return watch.getStart(); } }; using QueryStatusPtr = std::shared_ptr; @@ -382,7 +384,7 @@ public: * If timeout is passed - throw an exception. * Don't count KILL QUERY queries. */ - EntryPtr insert(const String & query_, const IAST * ast, ContextMutablePtr query_context); + EntryPtr insert(const String & query_, const IAST * ast, ContextMutablePtr query_context, UInt64 watch_start_nanoseconds); /// Number of currently executing queries. size_t size() const { return processes.size(); } diff --git a/src/Interpreters/ServerAsynchronousMetrics.cpp b/src/Interpreters/ServerAsynchronousMetrics.cpp index dc4a2a8e435..0777ffd6c44 100644 --- a/src/Interpreters/ServerAsynchronousMetrics.cpp +++ b/src/Interpreters/ServerAsynchronousMetrics.cpp @@ -250,7 +250,7 @@ void ServerAsynchronousMetrics::updateImpl(AsynchronousMetricValues & new_values if (StorageReplicatedMergeTree * table_replicated_merge_tree = typeid_cast(table.get())) { - StorageReplicatedMergeTree::Status status; + ReplicatedTableStatus status; table_replicated_merge_tree->getStatus(status, false); calculateMaxAndSum(max_queue_size, sum_queue_size, status.queue.queue_size); diff --git a/src/Interpreters/Set.h b/src/Interpreters/Set.h index 44f543ce222..bafb0dcea7a 100644 --- a/src/Interpreters/Set.h +++ b/src/Interpreters/Set.h @@ -18,7 +18,7 @@ struct Range; class Context; class IFunctionBase; -using FunctionBasePtr = std::shared_ptr; +using FunctionBasePtr = std::shared_ptr; class Chunk; diff --git a/src/Interpreters/TemporaryDataOnDisk.cpp b/src/Interpreters/TemporaryDataOnDisk.cpp index c5ae6f6c885..9e9389451b7 100644 --- a/src/Interpreters/TemporaryDataOnDisk.cpp +++ b/src/Interpreters/TemporaryDataOnDisk.cpp @@ -7,8 +7,11 @@ #include #include #include +#include +#include #include +#include namespace DB { @@ -20,12 +23,12 @@ namespace ErrorCodes extern const int NOT_ENOUGH_SPACE; } + void TemporaryDataOnDiskScope::deltaAllocAndCheck(ssize_t compressed_delta, ssize_t uncompressed_delta) { if (parent) parent->deltaAllocAndCheck(compressed_delta, uncompressed_delta); - /// check that we don't go negative if ((compressed_delta < 0 && stat.compressed_size < static_cast(-compressed_delta)) || (uncompressed_delta < 0 && stat.uncompressed_size < static_cast(-uncompressed_delta))) @@ -35,7 +38,8 @@ void TemporaryDataOnDiskScope::deltaAllocAndCheck(ssize_t compressed_delta, ssiz size_t new_consumprion = stat.compressed_size + compressed_delta; if (compressed_delta > 0 && limit && new_consumprion > limit) - throw Exception(ErrorCodes::TOO_MANY_ROWS_OR_BYTES, "Limit for temporary files size exceeded"); + throw Exception(ErrorCodes::TOO_MANY_ROWS_OR_BYTES, + "Limit for temporary files size exceeded (would consume {} / {} bytes)", new_consumprion, limit); stat.compressed_size += compressed_delta; stat.uncompressed_size += uncompressed_delta; @@ -43,6 +47,31 @@ void TemporaryDataOnDiskScope::deltaAllocAndCheck(ssize_t compressed_delta, ssiz TemporaryFileStream & TemporaryDataOnDisk::createStream(const Block & header, size_t max_file_size) { + if (file_cache) + return createStreamToCacheFile(header, max_file_size); + else if (volume) + return createStreamToRegularFile(header, max_file_size); + + throw Exception("TemporaryDataOnDiskScope has no cache and no volume", ErrorCodes::LOGICAL_ERROR); +} + +TemporaryFileStream & TemporaryDataOnDisk::createStreamToCacheFile(const Block & header, size_t max_file_size) +{ + if (!file_cache) + throw Exception("TemporaryDataOnDiskScope has no cache", ErrorCodes::LOGICAL_ERROR); + + auto holder = file_cache->set(FileSegment::Key::random(), 0, std::max(10_MiB, max_file_size), CreateFileSegmentSettings(FileSegmentKind::Temporary, /* unbounded */ true)); + + std::lock_guard lock(mutex); + TemporaryFileStreamPtr & tmp_stream = streams.emplace_back(std::make_unique(std::move(holder), header, this)); + return *tmp_stream; +} + +TemporaryFileStream & TemporaryDataOnDisk::createStreamToRegularFile(const Block & header, size_t max_file_size) +{ + if (!volume) + throw Exception("TemporaryDataOnDiskScope has no volume", ErrorCodes::LOGICAL_ERROR); + DiskPtr disk; if (max_file_size > 0) { @@ -63,7 +92,6 @@ TemporaryFileStream & TemporaryDataOnDisk::createStream(const Block & header, si return *tmp_stream; } - std::vector TemporaryDataOnDisk::getStreams() const { std::vector res; @@ -83,18 +111,40 @@ bool TemporaryDataOnDisk::empty() const struct TemporaryFileStream::OutputWriter { OutputWriter(const String & path, const Block & header_) - : out_file_buf(path) - , out_compressed_buf(out_file_buf) + : out_buf(std::make_unique(path)) + , out_compressed_buf(*out_buf) , out_writer(out_compressed_buf, DBMS_TCP_PROTOCOL_VERSION, header_) { + LOG_TEST(&Poco::Logger::get("TemporaryFileStream"), "Writing to {}", path); } - void write(const Block & block) + OutputWriter(std::unique_ptr out_buf_, const Block & header_) + : out_buf(std::move(out_buf_)) + , out_compressed_buf(*out_buf) + , out_writer(out_compressed_buf, DBMS_TCP_PROTOCOL_VERSION, header_) + { + LOG_TEST(&Poco::Logger::get("TemporaryFileStream"), + "Writing to {}", + static_cast(out_buf.get())->getFileName()); + } + + size_t write(const Block & block) { if (finalized) throw Exception("Cannot write to finalized stream", ErrorCodes::LOGICAL_ERROR); - out_writer.write(block); + size_t written_bytes = out_writer.write(block); num_rows += block.rows(); + return written_bytes; + } + + void flush() + { + if (finalized) + throw Exception("Cannot flush finalized stream", ErrorCodes::LOGICAL_ERROR); + + out_compressed_buf.next(); + out_buf->next(); + out_writer.flush(); } void finalize() @@ -108,7 +158,7 @@ struct TemporaryFileStream::OutputWriter out_writer.flush(); out_compressed_buf.finalize(); - out_file_buf.finalize(); + out_buf->finalize(); } ~OutputWriter() @@ -123,7 +173,7 @@ struct TemporaryFileStream::OutputWriter } } - WriteBufferFromFile out_file_buf; + std::unique_ptr out_buf; CompressedWriteBuffer out_compressed_buf; NativeWriter out_writer; @@ -139,6 +189,7 @@ struct TemporaryFileStream::InputReader , in_compressed_buf(in_file_buf) , in_reader(in_compressed_buf, header_, DBMS_TCP_PROTOCOL_VERSION) { + LOG_TEST(&Poco::Logger::get("TemporaryFileStream"), "Reading {} from {}", header_.dumpStructure(), path); } explicit InputReader(const String & path) @@ -146,9 +197,13 @@ struct TemporaryFileStream::InputReader , in_compressed_buf(in_file_buf) , in_reader(in_compressed_buf, DBMS_TCP_PROTOCOL_VERSION) { + LOG_TEST(&Poco::Logger::get("TemporaryFileStream"), "Reading from {}", path); } - Block read() { return in_reader.read(); } + Block read() + { + return in_reader.read(); + } ReadBufferFromFile in_file_buf; CompressedReadBuffer in_compressed_buf; @@ -163,13 +218,34 @@ TemporaryFileStream::TemporaryFileStream(TemporaryFileOnDiskHolder file_, const { } -void TemporaryFileStream::write(const Block & block) +TemporaryFileStream::TemporaryFileStream(FileSegmentsHolder && segments_, const Block & header_, TemporaryDataOnDisk * parent_) + : parent(parent_) + , header(header_) + , segment_holder(std::move(segments_)) +{ + if (segment_holder.file_segments.size() != 1) + throw Exception(ErrorCodes::LOGICAL_ERROR, "TemporaryFileStream can be created only from single segment"); + auto & segment = segment_holder.file_segments.front(); + auto out_buf = std::make_unique(segment.get()); + out_writer = std::make_unique(std::move(out_buf), header); +} + +size_t TemporaryFileStream::write(const Block & block) { if (!out_writer) throw Exception("Writing has been finished", ErrorCodes::LOGICAL_ERROR); updateAllocAndCheck(); - out_writer->write(block); + size_t bytes_written = out_writer->write(block); + return bytes_written; +} + +void TemporaryFileStream::flush() +{ + if (!out_writer) + throw Exception("Writing has been finished", ErrorCodes::LOGICAL_ERROR); + + out_writer->flush(); } TemporaryFileStream::Stat TemporaryFileStream::finishWriting() @@ -206,7 +282,7 @@ Block TemporaryFileStream::read() if (!in_reader) { - in_reader = std::make_unique(file->getPath(), header); + in_reader = std::make_unique(getPath(), header); } Block block = in_reader->read(); @@ -228,7 +304,7 @@ void TemporaryFileStream::updateAllocAndCheck() { throw Exception(ErrorCodes::LOGICAL_ERROR, "Temporary file {} size decreased after write: compressed: {} -> {}, uncompressed: {} -> {}", - file->getPath(), new_compressed_size, stat.compressed_size, new_uncompressed_size, stat.uncompressed_size); + getPath(), new_compressed_size, stat.compressed_size, new_uncompressed_size, stat.uncompressed_size); } parent->deltaAllocAndCheck(new_compressed_size - stat.compressed_size, new_uncompressed_size - stat.uncompressed_size); @@ -239,17 +315,11 @@ void TemporaryFileStream::updateAllocAndCheck() bool TemporaryFileStream::isEof() const { - return file == nullptr; + return file == nullptr && segment_holder.empty(); } void TemporaryFileStream::release() { - if (file) - { - file.reset(); - parent->deltaAllocAndCheck(-stat.compressed_size, -stat.uncompressed_size); - } - if (in_reader) in_reader.reset(); @@ -258,6 +328,25 @@ void TemporaryFileStream::release() out_writer->finalize(); out_writer.reset(); } + + if (file) + { + file.reset(); + parent->deltaAllocAndCheck(-stat.compressed_size, -stat.uncompressed_size); + } + + if (!segment_holder.empty()) + segment_holder.reset(); +} + +String TemporaryFileStream::getPath() const +{ + if (file) + return file->getPath(); + if (!segment_holder.file_segments.empty()) + return segment_holder.file_segments.front()->getPathInLocalCache(); + + throw Exception(ErrorCodes::LOGICAL_ERROR, "TemporaryFileStream has no file"); } TemporaryFileStream::~TemporaryFileStream() diff --git a/src/Interpreters/TemporaryDataOnDisk.h b/src/Interpreters/TemporaryDataOnDisk.h index 11edc8700d2..1b56f953d17 100644 --- a/src/Interpreters/TemporaryDataOnDisk.h +++ b/src/Interpreters/TemporaryDataOnDisk.h @@ -6,6 +6,8 @@ #include #include #include +#include +#include namespace CurrentMetrics @@ -44,8 +46,12 @@ public: : volume(std::move(volume_)), limit(limit_) {} + explicit TemporaryDataOnDiskScope(VolumePtr volume_, FileCache * file_cache_, size_t limit_) + : volume(std::move(volume_)), file_cache(file_cache_), limit(limit_) + {} + explicit TemporaryDataOnDiskScope(TemporaryDataOnDiskScopePtr parent_, size_t limit_) - : parent(std::move(parent_)), volume(parent->volume), limit(limit_) + : parent(std::move(parent_)), volume(parent->volume), file_cache(parent->file_cache), limit(limit_) {} /// TODO: remove @@ -56,7 +62,9 @@ protected: void deltaAllocAndCheck(ssize_t compressed_delta, ssize_t uncompressed_delta); TemporaryDataOnDiskScopePtr parent = nullptr; - VolumePtr volume; + + VolumePtr volume = nullptr; + FileCache * file_cache = nullptr; StatAtomic stat; size_t limit = 0; @@ -93,6 +101,9 @@ public: const StatAtomic & getStat() const { return stat; } private: + TemporaryFileStream & createStreamToCacheFile(const Block & header, size_t max_file_size); + TemporaryFileStream & createStreamToRegularFile(const Block & header, size_t max_file_size); + mutable std::mutex mutex; std::vector streams TSA_GUARDED_BY(mutex); @@ -117,14 +128,18 @@ public: }; TemporaryFileStream(TemporaryFileOnDiskHolder file_, const Block & header_, TemporaryDataOnDisk * parent_); + TemporaryFileStream(FileSegmentsHolder && segments_, const Block & header_, TemporaryDataOnDisk * parent_); + + size_t write(const Block & block); + void flush(); - void write(const Block & block); Stat finishWriting(); bool isWriteFinished() const; Block read(); - const String path() const { return file->getPath(); } + String getPath() const; + Block getHeader() const { return header; } /// Read finished and file released @@ -142,7 +157,9 @@ private: Block header; + /// Data can be stored in file directly or in the cache TemporaryFileOnDiskHolder file; + FileSegmentsHolder segment_holder; Stat stat; diff --git a/src/Interpreters/ThreadStatusExt.cpp b/src/Interpreters/ThreadStatusExt.cpp index ad7884ade55..e96a8a4b188 100644 --- a/src/Interpreters/ThreadStatusExt.cpp +++ b/src/Interpreters/ThreadStatusExt.cpp @@ -97,7 +97,7 @@ void CurrentThread::defaultThreadDeleter() void ThreadStatus::setupState(const ThreadGroupStatusPtr & thread_group_) { - assertState({ThreadState::DetachedFromQuery}, __PRETTY_FUNCTION__); + assertState(ThreadState::DetachedFromQuery, __PRETTY_FUNCTION__); /// Attach or init current thread to thread group and copy useful information from it thread_group = thread_group_; @@ -324,7 +324,7 @@ void ThreadStatus::detachQuery(bool exit_if_already_detached, bool thread_exits) return; } - assertState({ThreadState::AttachedToQuery}, __PRETTY_FUNCTION__); + assertState(ThreadState::AttachedToQuery, __PRETTY_FUNCTION__); finalizeQueryProfiler(); finalizePerformanceCounters(); diff --git a/src/Interpreters/TraceCollector.cpp b/src/Interpreters/TraceCollector.cpp index 367249f1289..40a5b1f228d 100644 --- a/src/Interpreters/TraceCollector.cpp +++ b/src/Interpreters/TraceCollector.cpp @@ -31,6 +31,7 @@ TraceCollector::TraceCollector(std::shared_ptr trace_log_) TraceCollector::~TraceCollector() +try { if (!thread.joinable()) LOG_ERROR(&Poco::Logger::get("TraceCollector"), "TraceCollector thread is malformed and cannot be joined"); @@ -39,6 +40,10 @@ TraceCollector::~TraceCollector() TraceSender::pipe.close(); } +catch (...) +{ + tryLogCurrentException("TraceCollector"); +} /** Sends TraceCollector stop message @@ -97,9 +102,6 @@ void TraceCollector::run() Int64 size; readPODBinary(size, in); - UInt64 ptr; - readPODBinary(ptr, in); - ProfileEvents::Event event; readPODBinary(event, in); @@ -115,7 +117,7 @@ void TraceCollector::run() UInt64 time = static_cast(ts.tv_sec * 1000000000LL + ts.tv_nsec); UInt64 time_in_microseconds = static_cast((ts.tv_sec * 1000000LL) + (ts.tv_nsec / 1000)); - TraceLogElement element{time_t(time / 1000000000), time_in_microseconds, time, trace_type, thread_id, query_id, trace, size, ptr, event, increment}; + TraceLogElement element{time_t(time / 1000000000), time_in_microseconds, time, trace_type, thread_id, query_id, trace, size, event, increment}; trace_log->add(element); } } diff --git a/src/Interpreters/TraceLog.cpp b/src/Interpreters/TraceLog.cpp index cd5f965a679..0408ebe504b 100644 --- a/src/Interpreters/TraceLog.cpp +++ b/src/Interpreters/TraceLog.cpp @@ -38,7 +38,6 @@ NamesAndTypesList TraceLogElement::getNamesAndTypes() {"query_id", std::make_shared()}, {"trace", std::make_shared(std::make_shared())}, {"size", std::make_shared()}, - {"ptr", std::make_shared()}, {"event", std::make_shared(std::make_shared())}, {"increment", std::make_shared()}, }; @@ -58,7 +57,6 @@ void TraceLogElement::appendToBlock(MutableColumns & columns) const columns[i++]->insertData(query_id.data(), query_id.size()); columns[i++]->insert(trace); columns[i++]->insert(size); - columns[i++]->insert(ptr); String event_name; if (event != ProfileEvents::end()) diff --git a/src/Interpreters/TraceLog.h b/src/Interpreters/TraceLog.h index 71aec0b50c4..c481f033a72 100644 --- a/src/Interpreters/TraceLog.h +++ b/src/Interpreters/TraceLog.h @@ -27,10 +27,8 @@ struct TraceLogElement UInt64 thread_id{}; String query_id{}; Array trace{}; - /// Allocation size in bytes for TraceType::Memory and TraceType::MemorySample. + /// Allocation size in bytes for TraceType::Memory. Int64 size{}; - /// Allocation ptr for TraceType::MemorySample. - UInt64 ptr{}; /// ProfileEvent for TraceType::ProfileEvent. ProfileEvents::Event event{ProfileEvents::end()}; /// Increment of profile event for TraceType::ProfileEvent. diff --git a/src/Interpreters/TranslateQualifiedNamesVisitor.cpp b/src/Interpreters/TranslateQualifiedNamesVisitor.cpp index bc862ed7b38..2ca1174f704 100644 --- a/src/Interpreters/TranslateQualifiedNamesVisitor.cpp +++ b/src/Interpreters/TranslateQualifiedNamesVisitor.cpp @@ -299,7 +299,7 @@ void TranslateQualifiedNamesMatcher::visit(ASTExpressionList & node, const ASTPt } // QualifiedAsterisk's transformers start to appear at child 1 - for (auto it = qualified_asterisk->children.begin() + 1; it != qualified_asterisk->children.end(); ++it) + for (const auto * it = qualified_asterisk->children.begin() + 1; it != qualified_asterisk->children.end(); ++it) { IASTColumnsTransformer::transform(*it, columns); } diff --git a/src/Interpreters/TreeOptimizer.cpp b/src/Interpreters/TreeOptimizer.cpp index 6461a35dae6..6a8c9dc7dbd 100644 --- a/src/Interpreters/TreeOptimizer.cpp +++ b/src/Interpreters/TreeOptimizer.cpp @@ -688,59 +688,6 @@ void optimizeFunctionsToSubcolumns(ASTPtr & query, const StorageMetadataPtr & me RewriteFunctionToSubcolumnVisitor(data).visit(query); } -std::shared_ptr getQuantileFuseCandidate(const String & func_name, std::vector & functions) -{ - if (functions.size() < 2) - return nullptr; - - const auto & common_arguments = (*functions[0])->as()->arguments->children; - auto func_base = makeASTFunction(GatherFunctionQuantileData::getFusedName(func_name)); - func_base->arguments->children = common_arguments; - func_base->parameters = std::make_shared(); - - for (const auto * ast : functions) - { - assert(ast && *ast); - const auto * func = (*ast)->as(); - assert(func && func->parameters->as()); - const ASTs & parameters = func->parameters->as().children; - if (parameters.size() != 1) - return nullptr; /// query is illegal, give up - func_base->parameters->children.push_back(parameters[0]); - } - return func_base; -} - -/// Rewrites multi quantile()() functions with the same arguments to quantiles()()[] -/// eg:SELECT quantile(0.5)(x), quantile(0.9)(x), quantile(0.95)(x) FROM... -/// rewrite to : SELECT quantiles(0.5, 0.9, 0.95)(x)[1], quantiles(0.5, 0.9, 0.95)(x)[2], quantiles(0.5, 0.9, 0.95)(x)[3] FROM ... -void optimizeFuseQuantileFunctions(ASTPtr & query) -{ - GatherFunctionQuantileVisitor::Data data{}; - GatherFunctionQuantileVisitor(data).visit(query); - for (auto & candidate : data.fuse_quantile) - { - String func_name = candidate.first; - auto & args_to_functions = candidate.second; - - /// Try to fuse multiply `quantile*` Function to plural - for (auto it : args_to_functions.arg_map_function) - { - std::vector & functions = it.second; - auto func_base = getQuantileFuseCandidate(func_name, functions); - if (!func_base) - continue; - for (size_t i = 0; i < functions.size(); ++i) - { - std::shared_ptr ast_new = makeASTFunction("arrayElement", func_base, std::make_shared(i + 1)); - if (const auto & alias = (*functions[i])->tryGetAlias(); !alias.empty()) - ast_new->setAlias(alias); - *functions[i] = ast_new; - } - } - } -} - void optimizeOrLikeChain(ASTPtr & query) { ConvertFunctionOrLikeVisitor::Data data = {}; @@ -890,9 +837,6 @@ void TreeOptimizer::apply(ASTPtr & query, TreeRewriterResult & result, /// Remove duplicated columns from USING(...). optimizeUsing(select_query); - if (settings.optimize_syntax_fuse_functions) - optimizeFuseQuantileFunctions(query); - if (settings.optimize_or_like_chain && settings.allow_hyperscan && settings.max_hyperscan_regexp_length == 0 diff --git a/src/Interpreters/TreeRewriter.cpp b/src/Interpreters/TreeRewriter.cpp index e0da9e77b81..20c14b8d7b6 100644 --- a/src/Interpreters/TreeRewriter.cpp +++ b/src/Interpreters/TreeRewriter.cpp @@ -1,5 +1,6 @@ #include #include +#include #include #include @@ -62,7 +63,6 @@ namespace ErrorCodes extern const int EMPTY_LIST_OF_COLUMNS_QUERIED; extern const int EMPTY_NESTED_TABLE; extern const int EXPECTED_ALL_OR_ANY; - extern const int INCOMPATIBLE_TYPE_OF_JOIN; extern const int INVALID_JOIN_ON_EXPRESSION; extern const int LOGICAL_ERROR; extern const int NOT_IMPLEMENTED; @@ -573,7 +573,7 @@ void removeUnneededColumnsFromSelectClause(ASTSelectQuery * select_query, const auto & children = select_query->interpolate()->children; if (!children.empty()) { - for (auto it = children.begin(); it != children.end();) + for (auto * it = children.begin(); it != children.end();) { if (remove_columns.contains((*it)->as()->column)) it = select_query->interpolate()->children.erase(it); @@ -715,32 +715,34 @@ std::optional tryEvaluateConstCondition(ASTPtr expr, ContextPtr context) return res > 0; } -bool tryJoinOnConst(TableJoin & analyzed_join, ASTPtr & on_expression, ContextPtr context) +bool tryJoinOnConst(TableJoin & analyzed_join, const ASTPtr & on_expression, ContextPtr context) { - bool join_on_value; - if (auto eval_const_res = tryEvaluateConstCondition(on_expression, context)) - join_on_value = *eval_const_res; - else + if (!analyzed_join.isEnabledAlgorithm(JoinAlgorithm::HASH)) return false; - if (!analyzed_join.isEnabledAlgorithm(JoinAlgorithm::HASH)) - throw Exception(ErrorCodes::NOT_IMPLEMENTED, - "JOIN ON constant ({}) supported only with join algorithm 'hash'", - queryToString(on_expression)); + if (analyzed_join.strictness() == JoinStrictness::Asof) + return false; - on_expression = nullptr; - if (join_on_value) - { - LOG_DEBUG(&Poco::Logger::get("TreeRewriter"), "Join on constant executed as cross join"); - analyzed_join.resetToCross(); - } - else - { - LOG_DEBUG(&Poco::Logger::get("TreeRewriter"), "Join on constant executed as empty join"); - analyzed_join.resetKeys(); - } + if (analyzed_join.isSpecialStorage()) + return false; - return true; + if (auto eval_const_res = tryEvaluateConstCondition(on_expression, context)) + { + if (eval_const_res.value()) + { + /// JOIN ON 1 == 1 + LOG_DEBUG(&Poco::Logger::get("TreeRewriter"), "Join on constant executed as cross join"); + analyzed_join.resetToCross(); + } + else + { + /// JOIN ON 1 != 1 + LOG_DEBUG(&Poco::Logger::get("TreeRewriter"), "Join on constant executed as empty join"); + analyzed_join.resetKeys(); + } + return true; + } + return false; } /// Find the columns that are obtained by JOIN. @@ -759,6 +761,13 @@ void collectJoinedColumns(TableJoin & analyzed_join, ASTTableJoin & table_join, } else if (table_join.on_expression) { + bool join_on_const_ok = tryJoinOnConst(analyzed_join, table_join.on_expression, context); + if (join_on_const_ok) + { + table_join.on_expression = nullptr; + return; + } + bool is_asof = (table_join.strictness == JoinStrictness::Asof); CollectJoinOnKeysVisitor::Data data{analyzed_join, tables[0], tables[1], aliases, is_asof}; @@ -779,44 +788,22 @@ void collectJoinedColumns(TableJoin & analyzed_join, ASTTableJoin & table_join, } auto check_keys_empty = [] (auto e) { return e.key_names_left.empty(); }; + bool any_keys_empty = std::any_of(analyzed_join.getClauses().begin(), analyzed_join.getClauses().end(), check_keys_empty); - /// All clauses should to have keys or be empty simultaneously - bool all_keys_empty = std::all_of(analyzed_join.getClauses().begin(), analyzed_join.getClauses().end(), check_keys_empty); - if (all_keys_empty) + if (any_keys_empty) + throw DB::Exception(ErrorCodes::INVALID_JOIN_ON_EXPRESSION, + "Cannot get JOIN keys from JOIN ON section: '{}', found keys: {}", + queryToString(table_join.on_expression), TableJoin::formatClauses(analyzed_join.getClauses())); + + if (is_asof) { - /// Try join on constant (cross or empty join) or fail - if (is_asof) - throw Exception(ErrorCodes::INVALID_JOIN_ON_EXPRESSION, - "Cannot get JOIN keys from JOIN ON section: {}", queryToString(table_join.on_expression)); - - if (const auto storage_join = analyzed_join.getStorageJoin()) - throw Exception(ErrorCodes::INCOMPATIBLE_TYPE_OF_JOIN, - "StorageJoin keys should match JOIN keys, expected JOIN ON [{}]", fmt::join(storage_join->getKeyNames(), ", ")); - - bool join_on_const_ok = tryJoinOnConst(analyzed_join, table_join.on_expression, context); - if (!join_on_const_ok) - throw Exception(ErrorCodes::INVALID_JOIN_ON_EXPRESSION, - "Cannot get JOIN keys from JOIN ON section: {}", queryToString(table_join.on_expression)); + if (!analyzed_join.oneDisjunct()) + throw DB::Exception(ErrorCodes::NOT_IMPLEMENTED, "ASOF join doesn't support multiple ORs for keys in JOIN ON section"); + data.asofToJoinKeys(); } - else - { - bool any_keys_empty = std::any_of(analyzed_join.getClauses().begin(), analyzed_join.getClauses().end(), check_keys_empty); - if (any_keys_empty) - throw DB::Exception(ErrorCodes::INVALID_JOIN_ON_EXPRESSION, - "Cannot get JOIN keys from JOIN ON section: '{}'", - queryToString(table_join.on_expression)); - - if (is_asof) - { - if (!analyzed_join.oneDisjunct()) - throw DB::Exception(ErrorCodes::NOT_IMPLEMENTED, "ASOF join doesn't support multiple ORs for keys in JOIN ON section"); - data.asofToJoinKeys(); - } - - if (!analyzed_join.oneDisjunct() && !analyzed_join.isEnabledAlgorithm(JoinAlgorithm::HASH)) - throw DB::Exception(ErrorCodes::NOT_IMPLEMENTED, "Only `hash` join supports multiple ORs for keys in JOIN ON section"); - } + if (!analyzed_join.oneDisjunct() && !analyzed_join.isEnabledAlgorithm(JoinAlgorithm::HASH)) + throw DB::Exception(ErrorCodes::NOT_IMPLEMENTED, "Only `hash` join supports multiple ORs for keys in JOIN ON section"); } } @@ -1243,16 +1230,24 @@ void TreeRewriterResult::collectUsedColumns(const ASTPtr & query, bool is_select if (storage) { std::vector hint_name{}; + std::set helper_hint_name{}; for (const auto & name : columns_context.requiredColumns()) { auto hints = storage->getHints(name); - hint_name.insert(hint_name.end(), hints.begin(), hints.end()); + for (const auto & hint : hints) + { + // We want to preserve the ordering of the hints + // (as they are ordered by Levenshtein distance) + auto [_, inserted] = helper_hint_name.insert(hint); + if (inserted) + hint_name.push_back(hint); + } } if (!hint_name.empty()) { ss << ", maybe you meant: "; - ss << toString(hint_name); + ss << toStringWithFinalSeparator(hint_name, " or "); } } else diff --git a/src/Interpreters/applyTableOverride.cpp b/src/Interpreters/applyTableOverride.cpp index e614e58b06b..8e88047c13c 100644 --- a/src/Interpreters/applyTableOverride.cpp +++ b/src/Interpreters/applyTableOverride.cpp @@ -26,10 +26,10 @@ void applyTableOverrideToCreateQuery(const ASTTableOverride & override, ASTCreat if (!create_query->columns_list->columns) create_query->columns_list->set(create_query->columns_list->columns, std::make_shared()); auto & dest_children = create_query->columns_list->columns->children; - auto exists = std::find_if(dest_children.begin(), dest_children.end(), [&](ASTPtr node) -> bool - { - return node->as()->name == override_column->name; - }); + auto * exists = std::find_if( + dest_children.begin(), + dest_children.end(), + [&](ASTPtr node) -> bool { return node->as()->name == override_column->name; }); /// For columns, only allow adding ALIAS (non-physical) for now. /// TODO: This logic should instead be handled by validation that is /// executed from InterpreterCreateQuery / InterpreterAlterQuery. @@ -52,10 +52,10 @@ void applyTableOverrideToCreateQuery(const ASTTableOverride & override, ASTCreat if (!create_query->columns_list->indices) create_query->columns_list->set(create_query->columns_list->indices, std::make_shared()); auto & dest_children = create_query->columns_list->indices->children; - auto exists = std::find_if(dest_children.begin(), dest_children.end(), [&](ASTPtr node) -> bool - { - return node->as()->name == override_index->name; - }); + auto * exists = std::find_if( + dest_children.begin(), + dest_children.end(), + [&](ASTPtr node) -> bool { return node->as()->name == override_index->name; }); if (exists == dest_children.end()) dest_children.emplace_back(override_index_ast); else @@ -72,10 +72,10 @@ void applyTableOverrideToCreateQuery(const ASTTableOverride & override, ASTCreat if (!create_query->columns_list->constraints) create_query->columns_list->set(create_query->columns_list->constraints, std::make_shared()); auto & dest_children = create_query->columns_list->constraints->children; - auto exists = std::find_if(dest_children.begin(), dest_children.end(), [&](ASTPtr node) -> bool - { - return node->as()->name == override_constraint->name; - }); + auto * exists = std::find_if( + dest_children.begin(), + dest_children.end(), + [&](ASTPtr node) -> bool { return node->as()->name == override_constraint->name; }); if (exists == dest_children.end()) dest_children.emplace_back(override_constraint_ast); else @@ -92,10 +92,10 @@ void applyTableOverrideToCreateQuery(const ASTTableOverride & override, ASTCreat if (!create_query->columns_list->projections) create_query->columns_list->set(create_query->columns_list->projections, std::make_shared()); auto & dest_children = create_query->columns_list->projections->children; - auto exists = std::find_if(dest_children.begin(), dest_children.end(), [&](ASTPtr node) -> bool - { - return node->as()->name == override_projection->name; - }); + auto * exists = std::find_if( + dest_children.begin(), + dest_children.end(), + [&](ASTPtr node) -> bool { return node->as()->name == override_projection->name; }); if (exists == dest_children.end()) dest_children.emplace_back(override_projection_ast); else diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index 2bd204a0d42..e8e30d78323 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -201,26 +201,32 @@ static void logException(ContextPtr context, QueryLogElement & elem) elem.stack_trace); } -static void onExceptionBeforeStart(const String & query_for_logging, ContextPtr context, UInt64 current_time_us, ASTPtr ast, const std::shared_ptr & query_span) +static void onExceptionBeforeStart( + const String & query_for_logging, + ContextPtr context, + ASTPtr ast, + const std::shared_ptr & query_span, + UInt64 elapsed_millliseconds) { + auto query_end_time = std::chrono::system_clock::now(); + /// Exception before the query execution. if (auto quota = context->getQuota()) quota->used(QuotaType::ERRORS, 1, /* check_exceeded = */ false); const Settings & settings = context->getSettingsRef(); + const auto & client_info = context->getClientInfo(); + /// Log the start of query execution into the table if necessary. QueryLogElement elem; elem.type = QueryLogElementType::EXCEPTION_BEFORE_START; - - // all callers to onExceptionBeforeStart method construct the timespec for event_time and - // event_time_microseconds from the same time point. So, it can be assumed that both of these - // times are equal up to the precision of a second. - elem.event_time = current_time_us / 1000000; - elem.event_time_microseconds = current_time_us; - elem.query_start_time = current_time_us / 1000000; - elem.query_start_time_microseconds = current_time_us; + elem.event_time = timeInSeconds(query_end_time); + elem.event_time_microseconds = timeInMicroseconds(query_end_time); + elem.query_start_time = client_info.initial_query_start_time; + elem.query_start_time_microseconds = client_info.initial_query_start_time_microseconds; + elem.query_duration_ms = elapsed_millliseconds; elem.current_database = context->getCurrentDatabase(); elem.query = query_for_logging; @@ -324,19 +330,32 @@ static std::tuple executeQueryImpl( /// we still have enough span logs for the execution of external queries. std::shared_ptr query_span = internal ? nullptr : std::make_shared("query"); - const auto current_time = std::chrono::system_clock::now(); + auto query_start_time = std::chrono::system_clock::now(); + + /// Used to set the watch in QueryStatus and the output formats. It is not based on query_start_time as that might be based on + /// the value passed by the client + Stopwatch start_watch{CLOCK_MONOTONIC}; auto & client_info = context->getClientInfo(); - // If it's not an internal query and we don't see an initial_query_start_time yet, initialize it - // to current time. Internal queries are those executed without an independent client context, - // thus should not set initial_query_start_time, because it might introduce data race. It's also - // possible to have unset initial_query_start_time for non-internal and non-initial queries. For - // example, the query is from an initiator that is running an old version of clickhouse. - if (!internal && client_info.initial_query_start_time == 0) + if (!internal) { - client_info.initial_query_start_time = timeInSeconds(current_time); - client_info.initial_query_start_time_microseconds = timeInMicroseconds(current_time); + // If it's not an internal query and we don't see an initial_query_start_time yet, initialize it + // to current time. Internal queries are those executed without an independent client context, + // thus should not set initial_query_start_time, because it might introduce data race. It's also + // possible to have unset initial_query_start_time for non-internal and non-initial queries. For + // example, the query is from an initiator that is running an old version of clickhouse. + // On the other hand, if it's initialized then take it as the start of the query + if (client_info.initial_query_start_time == 0) + { + client_info.initial_query_start_time = timeInSeconds(query_start_time); + client_info.initial_query_start_time_microseconds = timeInMicroseconds(query_start_time); + } + else + { + query_start_time = std::chrono::time_point( + std::chrono::microseconds{client_info.initial_query_start_time_microseconds}); + } } assert(internal || CurrentThread::get().getQueryContext()); @@ -413,7 +432,7 @@ static std::tuple executeQueryImpl( logQuery(query_for_logging, context, internal, stage); if (!internal) - onExceptionBeforeStart(query_for_logging, context, timeInMicroseconds(current_time), ast, query_span); + onExceptionBeforeStart(query_for_logging, context, ast, query_span, start_watch.elapsedMilliseconds()); throw; } @@ -514,7 +533,7 @@ static std::tuple executeQueryImpl( if (!internal && !ast->as()) { /// processlist also has query masked now, to avoid secrets leaks though SHOW PROCESSLIST by other users. - process_list_entry = context->getProcessList().insert(query_for_logging, ast.get(), context); + process_list_entry = context->getProcessList().insert(query_for_logging, ast.get(), context, start_watch.getStart()); context->setProcessListElement(process_list_entry->getQueryStatus()); } @@ -720,10 +739,10 @@ static std::tuple executeQueryImpl( elem.type = QueryLogElementType::QUERY_START; //-V1048 - elem.event_time = timeInSeconds(current_time); - elem.event_time_microseconds = timeInMicroseconds(current_time); - elem.query_start_time = timeInSeconds(current_time); - elem.query_start_time_microseconds = timeInMicroseconds(current_time); + elem.event_time = timeInSeconds(query_start_time); + elem.event_time_microseconds = timeInMicroseconds(query_start_time); + elem.query_start_time = timeInSeconds(query_start_time); + elem.query_start_time_microseconds = timeInMicroseconds(query_start_time); elem.current_database = context->getCurrentDatabase(); elem.query = query_for_logging; @@ -772,25 +791,29 @@ static std::tuple executeQueryImpl( } /// Common code for finish and exception callbacks - auto status_info_to_query_log = [](QueryLogElement & element, const QueryStatusInfo & info, const ASTPtr query_ast, const ContextPtr context_ptr) mutable + auto status_info_to_query_log + = [](QueryLogElement & element, const QueryStatusInfo & info, const ASTPtr query_ast, const ContextPtr context_ptr) mutable { - UInt64 query_time = static_cast(info.elapsed_seconds * 1000000); - ProfileEvents::increment(ProfileEvents::QueryTimeMicroseconds, query_time); + const auto time_now = std::chrono::system_clock::now(); + UInt64 elapsed_microseconds = info.elapsed_microseconds; + element.event_time = timeInSeconds(time_now); + element.event_time_microseconds = timeInMicroseconds(time_now); + element.query_duration_ms = elapsed_microseconds / 1000; + + ProfileEvents::increment(ProfileEvents::QueryTimeMicroseconds, elapsed_microseconds); if (query_ast->as() || query_ast->as()) { - ProfileEvents::increment(ProfileEvents::SelectQueryTimeMicroseconds, query_time); + ProfileEvents::increment(ProfileEvents::SelectQueryTimeMicroseconds, elapsed_microseconds); } else if (query_ast->as()) { - ProfileEvents::increment(ProfileEvents::InsertQueryTimeMicroseconds, query_time); + ProfileEvents::increment(ProfileEvents::InsertQueryTimeMicroseconds, elapsed_microseconds); } else { - ProfileEvents::increment(ProfileEvents::OtherQueryTimeMicroseconds, query_time); + ProfileEvents::increment(ProfileEvents::OtherQueryTimeMicroseconds, elapsed_microseconds); } - element.query_duration_ms = static_cast(info.elapsed_seconds * 1000); - element.read_rows = info.read_rows; element.read_bytes = info.read_bytes; @@ -844,16 +867,8 @@ static std::tuple executeQueryImpl( CurrentThread::finalizePerformanceCounters(); QueryStatusInfo info = process_list_elem->getInfo(true, context->getSettingsRef().log_profile_events); - - double elapsed_seconds = info.elapsed_seconds; - elem.type = QueryLogElementType::QUERY_FINISH; - // construct event_time and event_time_microseconds using the same time point - // so that the two times will always be equal up to a precision of a second. - const auto finish_time = std::chrono::system_clock::now(); - elem.event_time = timeInSeconds(finish_time); - elem.event_time_microseconds = timeInMicroseconds(finish_time); status_info_to_query_log(elem, info, ast, context); if (pulling_pipeline) @@ -877,9 +892,15 @@ static std::tuple executeQueryImpl( if (elem.read_rows != 0) { - LOG_INFO(&Poco::Logger::get("executeQuery"), "Read {} rows, {} in {} sec., {} rows/sec., {}/sec.", - elem.read_rows, ReadableSize(elem.read_bytes), elapsed_seconds, - static_cast(elem.read_rows / elapsed_seconds), + double elapsed_seconds = static_cast(info.elapsed_microseconds) / 1000000.0; + double rows_per_second = static_cast(elem.read_rows) / elapsed_seconds; + LOG_INFO( + &Poco::Logger::get("executeQuery"), + "Read {} rows, {} in {} sec., {} rows/sec., {}/sec.", + elem.read_rows, + ReadableSize(elem.read_bytes), + elapsed_seconds, + rows_per_second, ReadableSize(elem.read_bytes / elapsed_seconds)); } @@ -893,8 +914,8 @@ static std::tuple executeQueryImpl( if (auto processors_profile_log = context->getProcessorsProfileLog()) { ProcessorProfileLogElement processor_elem; - processor_elem.event_time = timeInSeconds(finish_time); - processor_elem.event_time_microseconds = timeInMicroseconds(finish_time); + processor_elem.event_time = elem.event_time; + processor_elem.event_time_microseconds = elem.event_time_microseconds; processor_elem.query_id = elem.client_info.current_query_id; auto get_proc_id = [](const IProcessor & proc) -> UInt64 @@ -969,7 +990,8 @@ static std::tuple executeQueryImpl( } }; - auto exception_callback = [elem, + auto exception_callback = [start_watch, + elem, context, ast, log_queries, @@ -992,14 +1014,6 @@ static std::tuple executeQueryImpl( quota->used(QuotaType::ERRORS, 1, /* check_exceeded = */ false); elem.type = QueryLogElementType::EXCEPTION_WHILE_PROCESSING; - - // event_time and event_time_microseconds are being constructed from the same time point - // to ensure that both the times will be equal up to the precision of a second. - const auto time_now = std::chrono::system_clock::now(); - - elem.event_time = timeInSeconds(time_now); - elem.event_time_microseconds = timeInMicroseconds(time_now); - elem.query_duration_ms = 1000 * (elem.event_time - elem.query_start_time); elem.exception_code = getCurrentExceptionCode(); elem.exception = getCurrentExceptionMessage(false); @@ -1008,12 +1022,19 @@ static std::tuple executeQueryImpl( /// Update performance counters before logging to query_log CurrentThread::finalizePerformanceCounters(); + const auto time_now = std::chrono::system_clock::now(); + elem.event_time = timeInSeconds(time_now); + elem.event_time_microseconds = timeInMicroseconds(time_now); if (process_list_elem) { QueryStatusInfo info = process_list_elem->getInfo(true, current_settings.log_profile_events, false); status_info_to_query_log(elem, info, ast, context); } + else + { + elem.query_duration_ms = start_watch.elapsedMilliseconds(); + } if (current_settings.calculate_text_stack_trace) setExceptionStackTrace(elem); @@ -1063,7 +1084,7 @@ static std::tuple executeQueryImpl( } if (!internal) - onExceptionBeforeStart(query_for_logging, context, timeInMicroseconds(current_time), ast, query_span); + onExceptionBeforeStart(query_for_logging, context, ast, query_span, start_watch.elapsedMilliseconds()); throw; } @@ -1195,7 +1216,6 @@ void executeQuery( compressed_buffer ? *compressed_buffer : *out_buf, materializeBlock(pipeline.getHeader()), context, - {}, output_format_settings); out->setAutoFlush(); diff --git a/src/Interpreters/parseColumnsListForTableFunction.cpp b/src/Interpreters/parseColumnsListForTableFunction.cpp index ed270c2118b..c44259a3ccc 100644 --- a/src/Interpreters/parseColumnsListForTableFunction.cpp +++ b/src/Interpreters/parseColumnsListForTableFunction.cpp @@ -4,6 +4,9 @@ #include #include #include +#include +#include +#include namespace DB @@ -12,9 +15,66 @@ namespace DB namespace ErrorCodes { extern const int LOGICAL_ERROR; + extern const int SUSPICIOUS_TYPE_FOR_LOW_CARDINALITY; + extern const int ILLEGAL_COLUMN; + } -ColumnsDescription parseColumnsListFromString(const std::string & structure, ContextPtr context) +void validateDataType(const DataTypePtr & type, const DataTypeValidationSettings & settings) +{ + if (!settings.allow_suspicious_low_cardinality_types) + { + if (const auto * lc_type = typeid_cast(type.get())) + { + if (!isStringOrFixedString(*removeNullable(lc_type->getDictionaryType()))) + throw Exception( + ErrorCodes::SUSPICIOUS_TYPE_FOR_LOW_CARDINALITY, + "Creating columns of type {} is prohibited by default due to expected negative impact on performance. " + "It can be enabled with the \"allow_suspicious_low_cardinality_types\" setting.", + lc_type->getName()); + } + } + + if (!settings.allow_experimental_geo_types) + { + const auto & type_name = type->getName(); + if (type_name == "MultiPolygon" || type_name == "Polygon" || type_name == "Ring" || type_name == "Point") + { + throw Exception( + ErrorCodes::ILLEGAL_COLUMN, + "Cannot create column with type '{}' because experimental geo types are not allowed. Set setting " + "allow_experimental_geo_types = 1 in order to allow it", type_name); + } + } + + if (!settings.allow_experimental_object_type) + { + if (type->hasDynamicSubcolumns()) + { + throw Exception( + ErrorCodes::ILLEGAL_COLUMN, + "Cannot create column with type '{}' because experimental Object type is not allowed. " + "Set setting allow_experimental_object_type = 1 in order to allow it", type->getName()); + } + } + + if (!settings.allow_suspicious_fixed_string_types) + { + auto basic_type = removeLowCardinality(removeNullable(type)); + if (const auto * fixed_string = typeid_cast(basic_type.get())) + { + if (fixed_string->getN() > MAX_FIXEDSTRING_SIZE_WITHOUT_SUSPICIOUS) + throw Exception( + ErrorCodes::ILLEGAL_COLUMN, + "Cannot create column with type '{}' because fixed string with size > {} is suspicious. " + "Set setting allow_suspicious_fixed_string_types = 1 in order to allow it", + type->getName(), + MAX_FIXEDSTRING_SIZE_WITHOUT_SUSPICIOUS); + } + } +} + +ColumnsDescription parseColumnsListFromString(const std::string & structure, const ContextPtr & context) { ParserColumnDeclarationList parser(true, true); const Settings & settings = context->getSettingsRef(); @@ -25,10 +85,14 @@ ColumnsDescription parseColumnsListFromString(const std::string & structure, Con if (!columns_list) throw Exception("Could not cast AST to ASTExpressionList", ErrorCodes::LOGICAL_ERROR); - return InterpreterCreateQuery::getColumnsDescription(*columns_list, context, false); + auto columns = InterpreterCreateQuery::getColumnsDescription(*columns_list, context, false); + auto validation_settings = DataTypeValidationSettings(context->getSettingsRef()); + for (const auto & [name, type] : columns.getAll()) + validateDataType(type, validation_settings); + return columns; } -bool tryParseColumnsListFromString(const std::string & structure, ColumnsDescription & columns, ContextPtr context) +bool tryParseColumnsListFromString(const std::string & structure, ColumnsDescription & columns, const ContextPtr & context) { ParserColumnDeclarationList parser(true, true); const Settings & settings = context->getSettingsRef(); @@ -47,6 +111,9 @@ bool tryParseColumnsListFromString(const std::string & structure, ColumnsDescrip try { columns = InterpreterCreateQuery::getColumnsDescription(*columns_list, context, false); + auto validation_settings = DataTypeValidationSettings(context->getSettingsRef()); + for (const auto & [name, type] : columns.getAll()) + validateDataType(type, validation_settings); return true; } catch (...) diff --git a/src/Interpreters/parseColumnsListForTableFunction.h b/src/Interpreters/parseColumnsListForTableFunction.h index e82a32f3d23..97923bcad77 100644 --- a/src/Interpreters/parseColumnsListForTableFunction.h +++ b/src/Interpreters/parseColumnsListForTableFunction.h @@ -2,6 +2,7 @@ #include #include +#include namespace DB @@ -9,9 +10,29 @@ namespace DB class Context; -/// Parses a common argument for table functions such as table structure given in string -ColumnsDescription parseColumnsListFromString(const std::string & structure, ContextPtr context); +struct DataTypeValidationSettings +{ + DataTypeValidationSettings() = default; -bool tryParseColumnsListFromString(const std::string & structure, ColumnsDescription & columns, ContextPtr context); + explicit DataTypeValidationSettings(const Settings & settings) + : allow_suspicious_low_cardinality_types(settings.allow_suspicious_low_cardinality_types) + , allow_experimental_geo_types(settings.allow_experimental_geo_types) + , allow_experimental_object_type(settings.allow_experimental_object_type) + , allow_suspicious_fixed_string_types(settings.allow_suspicious_fixed_string_types) + { + } + + bool allow_suspicious_low_cardinality_types = true; + bool allow_experimental_geo_types = true; + bool allow_experimental_object_type = true; + bool allow_suspicious_fixed_string_types = true; +}; + +void validateDataType(const DataTypePtr & type, const DataTypeValidationSettings & settings); + +/// Parses a common argument for table functions such as table structure given in string +ColumnsDescription parseColumnsListFromString(const std::string & structure, const ContextPtr & context); + +bool tryParseColumnsListFromString(const std::string & structure, ColumnsDescription & columns, const ContextPtr & context); } diff --git a/src/Interpreters/tests/gtest_lru_file_cache.cpp b/src/Interpreters/tests/gtest_lru_file_cache.cpp index a81e716c347..d095130f4dd 100644 --- a/src/Interpreters/tests/gtest_lru_file_cache.cpp +++ b/src/Interpreters/tests/gtest_lru_file_cache.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -14,11 +15,16 @@ #include #include #include +#include + +#include +#include +#include namespace fs = std::filesystem; +using namespace DB; -fs::path caches_dir = fs::current_path() / "lru_cache_test"; -String cache_base_path = caches_dir / "cache1" / ""; +static constexpr auto TEST_LOG_LEVEL = "debug"; void assertRange( [[maybe_unused]] size_t assert_n, DB::FileSegmentPtr file_segment, @@ -67,7 +73,7 @@ String getFileSegmentPath(const String & base_path, const DB::FileCache::Key & k return fs::path(base_path) / key_str.substr(0, 3) / key_str / DB::toString(offset); } -void download(DB::FileSegmentPtr file_segment) +void download(const std::string & cache_base_path, DB::FileSegmentPtr file_segment) { const auto & key = file_segment->key(); size_t size = file_segment->range().size(); @@ -81,30 +87,58 @@ void download(DB::FileSegmentPtr file_segment) file_segment->write(data.data(), size, file_segment->getCurrentWriteOffset()); } -void prepareAndDownload(DB::FileSegmentPtr file_segment) +void prepareAndDownload(const std::string & cache_base_path, DB::FileSegmentPtr file_segment) { - // std::cerr << "Reserving: " << file_segment->range().size() << " for: " << file_segment->range().toString() << "\n"; ASSERT_TRUE(file_segment->reserve(file_segment->range().size())); - download(file_segment); + download(cache_base_path, file_segment); } -void complete(DB::FileSegmentsHolderPtr holder) +void complete(const std::string & cache_base_path, const DB::FileSegmentsHolderPtr & holder) { for (auto it = holder->begin(); it != holder->end(); ++it) { ASSERT_TRUE((*it)->getOrSetDownloader() == DB::FileSegment::getCallerId()); prepareAndDownload(*it); + prepareAndDownload(cache_base_path, file_segment); (*it)->complete(); } } - -TEST(FileCache, get) +class FileCacheTest : public ::testing::Test { - if (fs::exists(cache_base_path)) - fs::remove_all(cache_base_path); - fs::create_directories(cache_base_path); +public: + static void setupLogs(const std::string & level) + { + Poco::AutoPtr channel(new Poco::ConsoleChannel(std::cerr)); + Poco::Logger::root().setChannel(channel); + Poco::Logger::root().setLevel(level); + } + + void SetUp() override + { + if(const char * test_log_level = std::getenv("TEST_LOG_LEVEL")) // NOLINT(concurrency-mt-unsafe) + setupLogs(test_log_level); + else + setupLogs(TEST_LOG_LEVEL); + + if (fs::exists(cache_base_path)) + fs::remove_all(cache_base_path); + fs::create_directories(cache_base_path); + } + + void TearDown() override + { + if (fs::exists(cache_base_path)) + fs::remove_all(cache_base_path); + } + + fs::path caches_dir = fs::current_path() / "lru_cache_test"; + std::string cache_base_path = caches_dir / "cache1" / ""; +}; + +TEST_F(FileCacheTest, get) +{ DB::ThreadStatus thread_status; /// To work with cache need query_id and query context. @@ -140,7 +174,7 @@ TEST(FileCache, get) ASSERT_TRUE(segments[0]->reserve(segments[0]->range().size())); assertRange(2, segments[0], DB::FileSegment::Range(0, 9), DB::FileSegment::State::DOWNLOADING); - download(segments[0]); + download(cache_base_path, segments[0]); segments[0]->complete(); assertRange(3, segments[0], DB::FileSegment::Range(0, 9), DB::FileSegment::State::DOWNLOADED); } @@ -161,7 +195,7 @@ TEST(FileCache, get) assertRange(5, segments[1], DB::FileSegment::Range(10, 14), DB::FileSegment::State::EMPTY); ASSERT_TRUE(segments[1]->getOrSetDownloader() == DB::FileSegment::getCallerId()); - prepareAndDownload(segments[1]); + prepareAndDownload(cache_base_path, segments[1]); segments[1]->complete(); assertRange(6, segments[1], DB::FileSegment::Range(10, 14), DB::FileSegment::State::DOWNLOADED); } @@ -194,8 +228,8 @@ TEST(FileCache, get) assertRange(10, segments[0], DB::FileSegment::Range(10, 14), DB::FileSegment::State::DOWNLOADED); } - complete(cache.getOrSet(key, 17, 4, {})); /// Get [17, 20] - complete(cache.getOrSet(key, 24, 3, {})); /// Get [24, 26] + complete(cache_base_path, cache.getOrSet(key, 17, 4, {})); /// Get [17, 20] + complete(cache_base_path, cache.getOrSet(key, 24, 3, {})); /// Get [24, 26] /// completeWithState(cache.getOrSet(key, 27, 1, false)); /// Get [27, 27] /// Current cache: [__________][_____] [____] [___][] @@ -217,7 +251,7 @@ TEST(FileCache, get) assertRange(13, segments[2], DB::FileSegment::Range(15, 16), DB::FileSegment::State::EMPTY); ASSERT_TRUE(segments[2]->getOrSetDownloader() == DB::FileSegment::getCallerId()); - prepareAndDownload(segments[2]); + prepareAndDownload(cache_base_path, segments[2]); segments[2]->complete(); @@ -258,7 +292,7 @@ TEST(FileCache, get) assertRange(21, segments[3], DB::FileSegment::Range(21, 21), DB::FileSegment::State::EMPTY); ASSERT_TRUE(segments[3]->getOrSetDownloader() == DB::FileSegment::getCallerId()); - prepareAndDownload(segments[3]); + prepareAndDownload(cache_base_path, segments[3]); segments[3]->complete(); ASSERT_TRUE(segments[3]->state() == DB::FileSegment::State::DOWNLOADED); @@ -281,8 +315,8 @@ TEST(FileCache, get) ASSERT_TRUE(segments[0]->getOrSetDownloader() == DB::FileSegment::getCallerId()); ASSERT_TRUE(segments[2]->getOrSetDownloader() == DB::FileSegment::getCallerId()); - prepareAndDownload(segments[0]); - prepareAndDownload(segments[2]); + prepareAndDownload(cache_base_path, segments[0]); + prepareAndDownload(cache_base_path, segments[2]); segments[0]->complete(); segments[2]->complete(); } @@ -310,8 +344,8 @@ TEST(FileCache, get) ASSERT_TRUE(s5[0]->getOrSetDownloader() == DB::FileSegment::getCallerId()); ASSERT_TRUE(s1[0]->getOrSetDownloader() == DB::FileSegment::getCallerId()); - prepareAndDownload(s5[0]); - prepareAndDownload(s1[0]); + prepareAndDownload(cache_base_path, s5[0]); + prepareAndDownload(cache_base_path, s1[0]); s5[0]->complete(); s1[0]->complete(); @@ -416,7 +450,7 @@ TEST(FileCache, get) cv.wait(lock, [&]{ return lets_start_download; }); } - prepareAndDownload(segments[2]); + prepareAndDownload(cache_base_path, segments[2]); segments[2]->complete(); ASSERT_TRUE(segments[2]->state() == DB::FileSegment::State::DOWNLOADED); @@ -481,7 +515,7 @@ TEST(FileCache, get) ASSERT_TRUE(segments_2[1]->state() == DB::FileSegment::State::PARTIALLY_DOWNLOADED); ASSERT_TRUE(segments_2[1]->getOrSetDownloader() == DB::FileSegment::getCallerId()); - prepareAndDownload(segments_2[1]); + prepareAndDownload(cache_base_path, segments_2[1]); segments_2[1]->complete(); }); @@ -539,3 +573,171 @@ TEST(FileCache, get) } } + +TEST_F(FileCacheTest, writeBuffer) +{ + DB::FileCacheSettings settings; + settings.max_size = 100; + settings.max_elements = 5; + settings.max_file_segment_size = 5; + + DB::FileCache cache(cache_base_path, settings); + cache.initialize(); + + auto write_to_cache = [&cache](const String & key, const Strings & data) + { + CreateFileSegmentSettings segment_settings; + segment_settings.kind = FileSegmentKind::Temporary; + segment_settings.unbounded = true; + + auto holder = cache.set(cache.hash(key), 0, 3, segment_settings); + EXPECT_EQ(holder.file_segments.size(), 1); + auto & segment = holder.file_segments.front(); + WriteBufferToFileSegment out(segment.get()); + for (const auto & s : data) + out.write(s.data(), s.size()); + return holder; + }; + + std::vector file_segment_paths; + { + auto holder = write_to_cache("key1", {"abc", "defg"}); + file_segment_paths.emplace_back(holder.file_segments.front()->getPathInLocalCache()); + + ASSERT_EQ(fs::file_size(file_segment_paths.back()), 7); + ASSERT_TRUE(holder.file_segments.front()->range() == FileSegment::Range(0, 7)); + ASSERT_EQ(cache.getUsedCacheSize(), 7); + + { + auto holder2 = write_to_cache("key2", {"1", "22", "333", "4444", "55555"}); + file_segment_paths.emplace_back(holder2.file_segments.front()->getPathInLocalCache()); + + ASSERT_EQ(fs::file_size(file_segment_paths.back()), 15); + ASSERT_TRUE(holder2.file_segments.front()->range() == FileSegment::Range(0, 15)); + ASSERT_EQ(cache.getUsedCacheSize(), 22); + } + ASSERT_FALSE(fs::exists(file_segment_paths.back())); + ASSERT_EQ(cache.getUsedCacheSize(), 7); + } + + for (const auto & file_segment_path : file_segment_paths) + { + ASSERT_FALSE(fs::exists(file_segment_path)); + } + ASSERT_EQ(cache.getUsedCacheSize(), 0); +} + + +static Block generateBlock(size_t size = 0) +{ + Block block; + ColumnWithTypeAndName column; + column.name = "x"; + column.type = std::make_shared(); + + { + MutableColumnPtr mut_col = column.type->createColumn(); + for (size_t i = 0; i < size; ++i) + mut_col->insert(i); + column.column = std::move(mut_col); + } + + block.insert(column); + return block; +} + +static size_t readAllTemporaryData(TemporaryFileStream & stream) +{ + Block block; + size_t read_rows = 0; + do + { + block = stream.read(); + read_rows += block.rows(); + } while (block); + return read_rows; +} + +TEST_F(FileCacheTest, temporaryData) +{ + DB::FileCacheSettings settings; + settings.max_size = 10_KiB; + settings.max_file_segment_size = 1_KiB; + + DB::FileCache file_cache(cache_base_path, settings); + file_cache.initialize(); + + auto tmp_data_scope = std::make_shared(nullptr, &file_cache, 0); + + auto some_data_holder = file_cache.getOrSet(file_cache.hash("some_data"), 0, 5_KiB, CreateFileSegmentSettings{}); + + { + auto segments = fromHolder(some_data_holder); + ASSERT_EQ(segments.size(), 5); + for (auto & segment : segments) + { + ASSERT_TRUE(segment->getOrSetDownloader() == DB::FileSegment::getCallerId()); + ASSERT_TRUE(segment->reserve(segment->range().size())); + download(cache_base_path, segment); + segment->completeWithoutState(); + } + } + + size_t size_used_before_temporary_data = file_cache.getUsedCacheSize(); + size_t segments_used_before_temporary_data = file_cache.getFileSegmentsNum(); + ASSERT_GT(size_used_before_temporary_data, 0); + ASSERT_GT(segments_used_before_temporary_data, 0); + + size_t size_used_with_temporary_data; + size_t segments_used_with_temporary_data; + { + auto tmp_data = std::make_unique(tmp_data_scope); + + auto & stream = tmp_data->createStream(generateBlock()); + + ASSERT_GT(stream.write(generateBlock(100)), 0); + + ASSERT_GT(file_cache.getUsedCacheSize(), 0); + ASSERT_GT(file_cache.getFileSegmentsNum(), 0); + + size_t used_size_before_attempt = file_cache.getUsedCacheSize(); + /// data can't be evicted because it is still held by `some_data_holder` + ASSERT_THROW({ + stream.write(generateBlock(2000)); + stream.flush(); + }, DB::Exception); + + ASSERT_EQ(file_cache.getUsedCacheSize(), used_size_before_attempt); + } + { + auto tmp_data = std::make_unique(tmp_data_scope); + auto & stream = tmp_data->createStream(generateBlock()); + + ASSERT_GT(stream.write(generateBlock(100)), 0); + + some_data_holder.reset(); + + stream.write(generateBlock(2000)); + + auto stat = stream.finishWriting(); + + ASSERT_TRUE(fs::exists(stream.getPath())); + ASSERT_GT(fs::file_size(stream.getPath()), 100); + + ASSERT_EQ(stat.num_rows, 2100); + ASSERT_EQ(readAllTemporaryData(stream), 2100); + + size_used_with_temporary_data = file_cache.getUsedCacheSize(); + segments_used_with_temporary_data = file_cache.getFileSegmentsNum(); + ASSERT_GT(size_used_with_temporary_data, 0); + ASSERT_GT(segments_used_with_temporary_data, 0); + } + + /// All temp data should be evicted after removing temporary files + ASSERT_LE(file_cache.getUsedCacheSize(), size_used_with_temporary_data); + ASSERT_LE(file_cache.getFileSegmentsNum(), segments_used_with_temporary_data); + + /// Some segments reserved by `some_data_holder` was eviced by temporary data + ASSERT_LE(file_cache.getUsedCacheSize(), size_used_before_temporary_data); + ASSERT_LE(file_cache.getFileSegmentsNum(), segments_used_before_temporary_data); +} diff --git a/src/Parsers/ASTColumnDeclaration.cpp b/src/Parsers/ASTColumnDeclaration.cpp index dc5651d9f14..c2396708a73 100644 --- a/src/Parsers/ASTColumnDeclaration.cpp +++ b/src/Parsers/ASTColumnDeclaration.cpp @@ -2,6 +2,7 @@ #include #include #include +#include namespace DB @@ -78,7 +79,7 @@ void ASTColumnDeclaration::formatImpl(const FormatSettings & settings, FormatSta if (default_expression) { settings.ostr << ' ' << (settings.hilite ? hilite_keyword : "") << default_specifier << (settings.hilite ? hilite_none : ""); - if (default_specifier != "EPHEMERAL" || !default_expression->as()->value.isNull()) + if (!ephemeral_default) { settings.ostr << ' '; default_expression->formatImpl(settings, state, frame); diff --git a/src/Parsers/ASTColumnDeclaration.h b/src/Parsers/ASTColumnDeclaration.h index 5ecfb859abc..2008e4f99d1 100644 --- a/src/Parsers/ASTColumnDeclaration.h +++ b/src/Parsers/ASTColumnDeclaration.h @@ -16,6 +16,7 @@ public: std::optional null_modifier; String default_specifier; ASTPtr default_expression; + bool ephemeral_default; ASTPtr comment; ASTPtr codec; ASTPtr ttl; diff --git a/src/Parsers/ASTColumnsMatcher.cpp b/src/Parsers/ASTColumnsMatcher.cpp index 0fc6847de68..124206043cf 100644 --- a/src/Parsers/ASTColumnsMatcher.cpp +++ b/src/Parsers/ASTColumnsMatcher.cpp @@ -87,7 +87,7 @@ void ASTColumnsListMatcher::updateTreeHashImpl(SipHash & hash_state) const void ASTColumnsListMatcher::appendColumnName(WriteBuffer & ostr) const { writeCString("COLUMNS(", ostr); - for (auto it = column_list->children.begin(); it != column_list->children.end(); ++it) + for (auto * it = column_list->children.begin(); it != column_list->children.end(); ++it) { if (it != column_list->children.begin()) writeCString(", ", ostr); @@ -198,7 +198,7 @@ void ASTQualifiedColumnsListMatcher::appendColumnName(WriteBuffer & ostr) const qualifier->appendColumnName(ostr); writeCString(".COLUMNS(", ostr); - for (auto it = column_list->children.begin(); it != column_list->children.end(); ++it) + for (auto * it = column_list->children.begin(); it != column_list->children.end(); ++it) { if (it != column_list->children.begin()) writeCString(", ", ostr); diff --git a/src/Parsers/ASTColumnsTransformers.cpp b/src/Parsers/ASTColumnsTransformers.cpp index 118c22b463f..16752fa115e 100644 --- a/src/Parsers/ASTColumnsTransformers.cpp +++ b/src/Parsers/ASTColumnsTransformers.cpp @@ -217,7 +217,7 @@ void ASTColumnsExceptTransformer::transform(ASTs & nodes) const for (const auto & child : children) expected_columns.insert(child->as().name()); - for (auto it = nodes.begin(); it != nodes.end();) + for (auto * it = nodes.begin(); it != nodes.end();) { if (const auto * id = it->get()->as()) { @@ -234,7 +234,7 @@ void ASTColumnsExceptTransformer::transform(ASTs & nodes) const } else { - for (auto it = nodes.begin(); it != nodes.end();) + for (auto * it = nodes.begin(); it != nodes.end();) { if (const auto * id = it->get()->as()) { diff --git a/src/Parsers/ASTFunction.cpp b/src/Parsers/ASTFunction.cpp index 9668848f0b6..4ac4bb6144e 100644 --- a/src/Parsers/ASTFunction.cpp +++ b/src/Parsers/ASTFunction.cpp @@ -383,7 +383,7 @@ void ASTFunction::appendColumnNameImpl(WriteBuffer & ostr) const if (parameters) { writeChar('(', ostr); - for (auto it = parameters->children.begin(); it != parameters->children.end(); ++it) + for (auto * it = parameters->children.begin(); it != parameters->children.end(); ++it) { if (it != parameters->children.begin()) writeCString(", ", ostr); @@ -396,7 +396,7 @@ void ASTFunction::appendColumnNameImpl(WriteBuffer & ostr) const writeChar('(', ostr); if (arguments) { - for (auto it = arguments->children.begin(); it != arguments->children.end(); ++it) + for (auto * it = arguments->children.begin(); it != arguments->children.end(); ++it) { if (it != arguments->children.begin()) writeCString(", ", ostr); diff --git a/src/Parsers/ASTIdentifier.cpp b/src/Parsers/ASTIdentifier.cpp index 341ac44b56e..8651a52f2c1 100644 --- a/src/Parsers/ASTIdentifier.cpp +++ b/src/Parsers/ASTIdentifier.cpp @@ -24,7 +24,7 @@ ASTIdentifier::ASTIdentifier(const String & short_name, ASTPtr && name_param) children.push_back(std::move(name_param)); } -ASTIdentifier::ASTIdentifier(std::vector && name_parts_, bool special, std::vector && name_params) +ASTIdentifier::ASTIdentifier(std::vector && name_parts_, bool special, ASTs && name_params) : name_parts(name_parts_), semantic(std::make_shared()) { assert(!name_parts.empty()); @@ -164,12 +164,12 @@ void ASTIdentifier::resetFullName() full_name += '.' + name_parts[i]; } -ASTTableIdentifier::ASTTableIdentifier(const String & table_name, std::vector && name_params) +ASTTableIdentifier::ASTTableIdentifier(const String & table_name, ASTs && name_params) : ASTIdentifier({table_name}, true, std::move(name_params)) { } -ASTTableIdentifier::ASTTableIdentifier(const StorageID & table_id, std::vector && name_params) +ASTTableIdentifier::ASTTableIdentifier(const StorageID & table_id, ASTs && name_params) : ASTIdentifier( table_id.database_name.empty() ? std::vector{table_id.table_name} : std::vector{table_id.database_name, table_id.table_name}, @@ -178,7 +178,7 @@ ASTTableIdentifier::ASTTableIdentifier(const StorageID & table_id, std::vector && name_params) +ASTTableIdentifier::ASTTableIdentifier(const String & database_name, const String & table_name, ASTs && name_params) : ASTIdentifier({database_name, table_name}, true, std::move(name_params)) { } diff --git a/src/Parsers/ASTIdentifier.h b/src/Parsers/ASTIdentifier.h index c9712d578e0..0e030c797ce 100644 --- a/src/Parsers/ASTIdentifier.h +++ b/src/Parsers/ASTIdentifier.h @@ -4,6 +4,7 @@ #include #include #include +#include namespace DB @@ -23,7 +24,7 @@ class ASTIdentifier : public ASTWithAlias { public: explicit ASTIdentifier(const String & short_name, ASTPtr && name_param = {}); - explicit ASTIdentifier(std::vector && name_parts, bool special = false, std::vector && name_params = {}); + explicit ASTIdentifier(std::vector && name_parts, bool special = false, ASTs && name_params = {}); /** Get the text that identifies this element. */ String getID(char delim) const override { return "Identifier" + (delim + name()); } @@ -72,9 +73,9 @@ private: class ASTTableIdentifier : public ASTIdentifier { public: - explicit ASTTableIdentifier(const String & table_name, std::vector && name_params = {}); - explicit ASTTableIdentifier(const StorageID & table_id, std::vector && name_params = {}); - ASTTableIdentifier(const String & database_name, const String & table_name, std::vector && name_params = {}); + explicit ASTTableIdentifier(const String & table_name, ASTs && name_params = {}); + explicit ASTTableIdentifier(const StorageID & table_id, ASTs && name_params = {}); + ASTTableIdentifier(const String & database_name, const String & table_name, ASTs && name_params = {}); String getID(char delim) const override { return "TableIdentifier" + (delim + name()); } ASTPtr clone() const override; diff --git a/src/Parsers/ASTSystemQuery.cpp b/src/Parsers/ASTSystemQuery.cpp index ab5137d0960..5ed77f48ceb 100644 --- a/src/Parsers/ASTSystemQuery.cpp +++ b/src/Parsers/ASTSystemQuery.cpp @@ -185,7 +185,7 @@ void ASTSystemQuery::formatImpl(const FormatSettings & settings, FormatState &, { print_identifier(database->as()->name()); } - else if (type == Type::DROP_REPLICA) + else if (type == Type::DROP_REPLICA || type == Type::DROP_DATABASE_REPLICA) { print_drop_replica(); } diff --git a/src/Parsers/ASTSystemQuery.h b/src/Parsers/ASTSystemQuery.h index 2498dfdc12b..76788fd31fe 100644 --- a/src/Parsers/ASTSystemQuery.h +++ b/src/Parsers/ASTSystemQuery.h @@ -36,6 +36,7 @@ public: RESTART_REPLICA, RESTORE_REPLICA, DROP_REPLICA, + DROP_DATABASE_REPLICA, SYNC_REPLICA, SYNC_DATABASE_REPLICA, SYNC_TRANSACTION_LOG, diff --git a/src/Parsers/ASTTTLElement.cpp b/src/Parsers/ASTTTLElement.cpp index 86dd85e0eb8..bb353194e8c 100644 --- a/src/Parsers/ASTTTLElement.cpp +++ b/src/Parsers/ASTTTLElement.cpp @@ -52,7 +52,7 @@ void ASTTTLElement::formatImpl(const FormatSettings & settings, FormatState & st else if (mode == TTLMode::GROUP_BY) { settings.ostr << " GROUP BY "; - for (auto it = group_by_key.begin(); it != group_by_key.end(); ++it) + for (const auto * it = group_by_key.begin(); it != group_by_key.end(); ++it) { if (it != group_by_key.begin()) settings.ostr << ", "; @@ -62,7 +62,7 @@ void ASTTTLElement::formatImpl(const FormatSettings & settings, FormatState & st if (!group_by_assignments.empty()) { settings.ostr << " SET "; - for (auto it = group_by_assignments.begin(); it != group_by_assignments.end(); ++it) + for (const auto * it = group_by_assignments.begin(); it != group_by_assignments.end(); ++it) { if (it != group_by_assignments.begin()) settings.ostr << ", "; diff --git a/src/Parsers/ExpressionElementParsers.cpp b/src/Parsers/ExpressionElementParsers.cpp index 23c27e455c7..c6b51fd4dfe 100644 --- a/src/Parsers/ExpressionElementParsers.cpp +++ b/src/Parsers/ExpressionElementParsers.cpp @@ -37,6 +37,7 @@ #include #include #include +#include #include #include @@ -224,7 +225,7 @@ bool ParserCompoundIdentifier::parseImpl(Pos & pos, ASTPtr & node, Expected & ex return false; std::vector parts; - std::vector params; + ASTs params; const auto & list = id_list->as(); for (const auto & child : list.children) { @@ -1169,36 +1170,42 @@ class ICollection { public: virtual ~ICollection() = default; - virtual bool parse(IParser::Pos & pos, Collections & collections, ASTPtr & node, Expected & expected) = 0; + virtual bool parse(IParser::Pos & pos, Collections & collections, ASTPtr & node, Expected & expected, bool allow_map) = 0; }; template class CommonCollection : public ICollection { public: - bool parse(IParser::Pos & pos, Collections & collections, ASTPtr & node, Expected & expected) override; + explicit CommonCollection(const IParser::Pos & pos) : begin(pos) {} + + bool parse(IParser::Pos & pos, Collections & collections, ASTPtr & node, Expected & expected, bool allow_map) override; private: Container container; + IParser::Pos begin; }; class MapCollection : public ICollection { public: - bool parse(IParser::Pos & pos, Collections & collections, ASTPtr & node, Expected & expected) override; + explicit MapCollection(const IParser::Pos & pos) : begin(pos) {} + + bool parse(IParser::Pos & pos, Collections & collections, ASTPtr & node, Expected & expected, bool allow_map) override; private: Map container; + IParser::Pos begin; }; -bool parseAllCollectionsStart(IParser::Pos & pos, Collections & collections, Expected & /*expected*/) +bool parseAllCollectionsStart(IParser::Pos & pos, Collections & collections, Expected & /*expected*/, bool allow_map) { - if (pos->type == TokenType::OpeningCurlyBrace) - collections.push_back(std::make_unique()); + if (allow_map && pos->type == TokenType::OpeningCurlyBrace) + collections.push_back(std::make_unique(pos)); else if (pos->type == TokenType::OpeningRoundBracket) - collections.push_back(std::make_unique>()); + collections.push_back(std::make_unique>(pos)); else if (pos->type == TokenType::OpeningSquareBracket) - collections.push_back(std::make_unique>()); + collections.push_back(std::make_unique>(pos)); else return false; @@ -1207,7 +1214,7 @@ bool parseAllCollectionsStart(IParser::Pos & pos, Collections & collections, Exp } template -bool CommonCollection::parse(IParser::Pos & pos, Collections & collections, ASTPtr & node, Expected & expected) +bool CommonCollection::parse(IParser::Pos & pos, Collections & collections, ASTPtr & node, Expected & expected, bool allow_map) { if (node) { @@ -1224,23 +1231,27 @@ bool CommonCollection::parse(IParser::Pos & pos, Collectio { if (end_p.ignore(pos, expected)) { - node = std::make_shared(std::move(container)); + auto result = std::make_shared(std::move(container)); + result->begin = begin; + result->end = pos; + + node = std::move(result); break; } if (!container.empty() && !comma_p.ignore(pos, expected)) - return false; + return false; if (literal_p.parse(pos, literal, expected)) container.push_back(std::move(literal->as().value)); else - return parseAllCollectionsStart(pos, collections, expected); + return parseAllCollectionsStart(pos, collections, expected, allow_map); } return true; } -bool MapCollection::parse(IParser::Pos & pos, Collections & collections, ASTPtr & node, Expected & expected) +bool MapCollection::parse(IParser::Pos & pos, Collections & collections, ASTPtr & node, Expected & expected, bool allow_map) { if (node) { @@ -1258,7 +1269,11 @@ bool MapCollection::parse(IParser::Pos & pos, Collections & collections, ASTPtr { if (end_p.ignore(pos, expected)) { - node = std::make_shared(std::move(container)); + auto result = std::make_shared(std::move(container)); + result->begin = begin; + result->end = pos; + + node = std::move(result); break; } @@ -1276,7 +1291,7 @@ bool MapCollection::parse(IParser::Pos & pos, Collections & collections, ASTPtr if (literal_p.parse(pos, literal, expected)) container.push_back(std::move(literal->as().value)); else - return parseAllCollectionsStart(pos, collections, expected); + return parseAllCollectionsStart(pos, collections, expected, allow_map); } return true; @@ -1289,12 +1304,12 @@ bool ParserAllCollectionsOfLiterals::parseImpl(Pos & pos, ASTPtr & node, Expecte { Collections collections; - if (!parseAllCollectionsStart(pos, collections, expected)) + if (!parseAllCollectionsStart(pos, collections, expected, allow_map)) return false; while (!collections.empty()) { - if (!collections.back()->parse(pos, collections, node, expected)) + if (!collections.back()->parse(pos, collections, node, expected, allow_map)) return false; if (node) diff --git a/src/Parsers/ExpressionElementParsers.h b/src/Parsers/ExpressionElementParsers.h index 8e328db976b..cc88faf2653 100644 --- a/src/Parsers/ExpressionElementParsers.h +++ b/src/Parsers/ExpressionElementParsers.h @@ -307,9 +307,14 @@ protected: class ParserAllCollectionsOfLiterals : public IParserBase { public: + explicit ParserAllCollectionsOfLiterals(bool allow_map_ = true) : allow_map(allow_map_) {} + protected: const char * getName() const override { return "combination of maps, arrays, tuples"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; + +private: + bool allow_map; }; diff --git a/src/Parsers/ExpressionListParsers.cpp b/src/Parsers/ExpressionListParsers.cpp index cbbee4a04e3..01955c2c05a 100644 --- a/src/Parsers/ExpressionListParsers.cpp +++ b/src/Parsers/ExpressionListParsers.cpp @@ -46,16 +46,15 @@ bool ParserList::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) if (!elem_parser->parse(pos, element, expected)) return false; - elements.push_back(element); + elements.push_back(std::move(element)); return true; }; if (!parseUtil(pos, expected, parse_element, *separator_parser, allow_empty)) return false; - auto list = std::make_shared(result_separator); - list->children = std::move(elements); - node = list; + node = std::make_shared(result_separator); + node->children = std::move(elements); return true; } @@ -76,7 +75,7 @@ bool ParserUnionList::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) if (!elem_parser.parse(pos, element, expected)) return false; - elements.push_back(element); + elements.push_back(std::move(element)); return true; }; @@ -120,9 +119,8 @@ bool ParserUnionList::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) if (!parseUtil(pos, parse_element, parse_separator)) return false; - auto list = std::make_shared(); - list->children = std::move(elements); - node = list; + node = std::make_shared(); + node->children = std::move(elements); return true; } @@ -242,7 +240,7 @@ bool ParserLeftAssociativeBinaryOperatorList::parseImpl(Pos & pos, ASTPtr & node if (!elem_parser->parse(pos, elem, expected)) return false; - node = elem; + node = std::move(elem); first = false; } else @@ -607,7 +605,7 @@ public: asts.reserve(asts.size() + n); - auto start = operands.begin() + operands.size() - n; + auto * start = operands.begin() + operands.size() - n; asts.insert(asts.end(), std::make_move_iterator(start), std::make_move_iterator(operands.end())); operands.erase(start, operands.end()); @@ -701,7 +699,7 @@ public: /// 2. If there is already tuple do nothing if (tryGetFunctionName(elements.back()) == "tuple") { - pushOperand(elements.back()); + pushOperand(std::move(elements.back())); elements.pop_back(); } /// 3. Put all elements in a single tuple @@ -711,6 +709,19 @@ public: elements.clear(); pushOperand(function); } + + /// We must check that tuple arguments are identifiers + auto * func_ptr = operands.back()->as(); + auto * args_ptr = func_ptr->arguments->as(); + + for (const auto & child : args_ptr->children) + { + if (typeid_cast(child.get())) + continue; + + return false; + } + return true; } @@ -1064,9 +1075,7 @@ public: is_tuple = true; // Special case for f(x, (y) -> z) = f(x, tuple(y) -> z) - auto test_pos = pos; - auto test_expected = expected; - if (parseOperator(test_pos, "->", test_expected)) + if (pos->type == TokenType::Arrow) is_tuple = true; } @@ -1448,7 +1457,7 @@ public: return false; auto subquery = std::make_shared(); - subquery->children.push_back(node); + subquery->children.push_back(std::move(node)); elements = {makeASTFunction("exists", subquery)}; finished = true; @@ -1734,6 +1743,29 @@ private: bool parsed_interval_kind = false; }; +class TupleLayer : public LayerWithSeparator +{ +public: + bool parse(IParser::Pos & pos, Expected & expected, Action & action) override + { + bool result = LayerWithSeparator::parse(pos, expected, action); + + /// Check that after the tuple() function there is no lambdas operator + if (finished && pos->type == TokenType::Arrow) + return false; + + return result; + } + +protected: + bool getResultImpl(ASTPtr & node) override + { + node = makeASTFunction("tuple", std::move(elements)); + return true; + } +}; + + class IntervalLayer : public Layer { public: @@ -2037,6 +2069,9 @@ std::unique_ptr getFunctionLayer(ASTPtr identifier, bool is_table_functio return std::make_unique(true); } + if (function_name == "tuple") + return std::make_unique(); + if (function_name_lowercase == "cast") return std::make_unique(); else if (function_name_lowercase == "extract") @@ -2361,6 +2396,7 @@ Action ParserExpressionImpl::tryParseOperand(Layers & layers, IParser::Pos & pos if (layers.back()->previousType() == OperatorType::Comparison) { + auto old_pos = pos; SubqueryFunctionType subquery_function_type = SubqueryFunctionType::NONE; if (any_parser.ignore(pos, expected) && subquery_parser.parse(pos, tmp, expected)) @@ -2386,6 +2422,10 @@ Action ParserExpressionImpl::tryParseOperand(Layers & layers, IParser::Pos & pos layers.back()->pushOperand(std::move(function)); return Action::OPERATOR; } + else + { + pos = old_pos; + } } /// Try to find any unary operators diff --git a/src/Parsers/IAST_fwd.h b/src/Parsers/IAST_fwd.h index 18ba79d6618..53d41d42d65 100644 --- a/src/Parsers/IAST_fwd.h +++ b/src/Parsers/IAST_fwd.h @@ -1,13 +1,36 @@ #pragma once +#include #include -#include +#include namespace DB { class IAST; using ASTPtr = std::shared_ptr; -using ASTs = std::vector; +/// sizeof(absl::InlinedVector) == 8 + N * 16. +/// 7 elements take 120 Bytes which is ~128 +using ASTs = absl::InlinedVector; + +} + +namespace std +{ + +inline typename DB::ASTs::size_type erase(DB::ASTs & asts, const DB::ASTPtr & element) +{ + auto old_size = asts.size(); + asts.erase(std::remove(asts.begin(), asts.end(), element), asts.end()); + return old_size - asts.size(); +} + +template +inline typename DB::ASTs::size_type erase_if(DB::ASTs & asts, Predicate pred) +{ + auto old_size = asts.size(); + asts.erase(std::remove_if(asts.begin(), asts.end(), pred), asts.end()); + return old_size - asts.size(); +} } diff --git a/src/Parsers/IParser.h b/src/Parsers/IParser.h index 4e6dbca15a6..466cdf7a4b1 100644 --- a/src/Parsers/IParser.h +++ b/src/Parsers/IParser.h @@ -1,7 +1,7 @@ #pragma once +#include #include -#include #include #include @@ -24,8 +24,8 @@ namespace ErrorCodes */ struct Expected { + absl::InlinedVector variants; const char * max_parsed_pos = nullptr; - std::vector variants; /// 'description' should be statically allocated string. ALWAYS_INLINE void add(const char * current_pos, const char * description) @@ -38,7 +38,7 @@ struct Expected return; } - if ((current_pos == max_parsed_pos) && (find(variants.begin(), variants.end(), description) == variants.end())) + if ((current_pos == max_parsed_pos) && (std::find(variants.begin(), variants.end(), description) == variants.end())) variants.push_back(description); } @@ -64,6 +64,8 @@ public: { } + Pos(TokenIterator token_iterator_, uint32_t max_depth_) : TokenIterator(token_iterator_), max_depth(max_depth_) { } + ALWAYS_INLINE void increaseDepth() { ++depth; diff --git a/src/Parsers/ParserCreateQuery.h b/src/Parsers/ParserCreateQuery.h index e97033c51f0..ef87988aab2 100644 --- a/src/Parsers/ParserCreateQuery.h +++ b/src/Parsers/ParserCreateQuery.h @@ -1,5 +1,6 @@ #pragma once +#include #include #include #include @@ -170,6 +171,7 @@ bool IParserColumnDeclaration::parseImpl(Pos & pos, ASTPtr & node, E ASTPtr type; String default_specifier; std::optional null_modifier; + bool ephemeral_default = false; ASTPtr default_expression; ASTPtr comment_expression; ASTPtr codec_expression; @@ -235,8 +237,16 @@ bool IParserColumnDeclaration::parseImpl(Pos & pos, ASTPtr & node, E else if (s_ephemeral.ignore(pos, expected)) { default_specifier = s_ephemeral.getName(); - if (!literal_parser.parse(pos, default_expression, expected) && type) - default_expression = std::make_shared(Field()); + if (!expr_parser.parse(pos, default_expression, expected) && type) + { + ephemeral_default = true; + + auto default_function = std::make_shared(); + default_function->name = "defaultValueOfTypeName"; + default_function->arguments = std::make_shared(); + default_function->arguments->children.emplace_back(std::make_shared(type->as()->formatWithSecretsHidden())); + default_expression = default_function; + } if (!default_expression && !type) return false; @@ -302,6 +312,7 @@ bool IParserColumnDeclaration::parseImpl(Pos & pos, ASTPtr & node, E column_declaration->default_specifier = default_specifier; if (default_expression) { + column_declaration->ephemeral_default = ephemeral_default; column_declaration->default_expression = default_expression; column_declaration->children.push_back(std::move(default_expression)); } diff --git a/src/Parsers/ParserDataType.cpp b/src/Parsers/ParserDataType.cpp index a1a24c40ac2..3e2a6facac6 100644 --- a/src/Parsers/ParserDataType.cpp +++ b/src/Parsers/ParserDataType.cpp @@ -27,7 +27,7 @@ private: { ParserNestedTable nested_parser; ParserDataType data_type_parser; - ParserLiteral literal_parser; + ParserAllCollectionsOfLiterals literal_parser(false); const char * operators[] = {"=", "equals", nullptr}; ParserLeftAssociativeBinaryOperatorList enum_parser(operators, std::make_unique()); @@ -145,4 +145,3 @@ bool ParserDataType::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) } } - diff --git a/src/Parsers/ParserSystemQuery.cpp b/src/Parsers/ParserSystemQuery.cpp index ffe8ecd365e..a44516fc4a3 100644 --- a/src/Parsers/ParserSystemQuery.cpp +++ b/src/Parsers/ParserSystemQuery.cpp @@ -68,7 +68,7 @@ enum class SystemQueryTargetType { Model, Function, - Disk + Disk, }; [[nodiscard]] static bool parseQueryWithOnClusterAndTarget(std::shared_ptr & res, IParser::Pos & pos, Expected & expected, SystemQueryTargetType target_type) @@ -150,6 +150,49 @@ enum class SystemQueryTargetType return true; } +[[nodiscard]] static bool parseDropReplica(std::shared_ptr & res, IParser::Pos & pos, Expected & expected, bool database) +{ + if (!parseQueryWithOnCluster(res, pos, expected)) + return false; + + ASTPtr ast; + if (!ParserStringLiteral{}.parse(pos, ast, expected)) + return false; + res->replica = ast->as().value.safeGet(); + if (ParserKeyword{"FROM"}.ignore(pos, expected)) + { + // way 1. parse replica database + // way 2. parse replica table + // way 3. parse replica zkpath + if (ParserKeyword{"DATABASE"}.ignore(pos, expected)) + { + ParserIdentifier database_parser; + if (!database_parser.parse(pos, res->database, expected)) + return false; + } + else if (!database && ParserKeyword{"TABLE"}.ignore(pos, expected)) + { + parseDatabaseAndTableAsAST(pos, expected, res->database, res->table); + } + else if (ParserKeyword{"ZKPATH"}.ignore(pos, expected)) + { + ASTPtr path_ast; + if (!ParserStringLiteral{}.parse(pos, path_ast, expected)) + return false; + String zk_path = path_ast->as().value.safeGet(); + if (!zk_path.empty() && zk_path[zk_path.size() - 1] == '/') + zk_path.pop_back(); + res->replica_zk_path = zk_path; + } + else + return false; + } + else + res->is_drop_whole_replica = true; + + return true; +} + bool ParserSystemQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expected & expected) { if (!ParserKeyword{"SYSTEM"}.ignore(pos, expected)) @@ -194,46 +237,17 @@ bool ParserSystemQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expected & return false; break; } + case Type::DROP_REPLICA: { - if (!parseQueryWithOnCluster(res, pos, expected)) + if (!parseDropReplica(res, pos, expected, /* database */ false)) return false; - - ASTPtr ast; - if (!ParserStringLiteral{}.parse(pos, ast, expected)) + break; + } + case Type::DROP_DATABASE_REPLICA: + { + if (!parseDropReplica(res, pos, expected, /* database */ true)) return false; - res->replica = ast->as().value.safeGet(); - if (ParserKeyword{"FROM"}.ignore(pos, expected)) - { - // way 1. parse replica database - // way 2. parse replica tables - // way 3. parse replica zkpath - if (ParserKeyword{"DATABASE"}.ignore(pos, expected)) - { - ParserIdentifier database_parser; - if (!database_parser.parse(pos, res->database, expected)) - return false; - } - else if (ParserKeyword{"TABLE"}.ignore(pos, expected)) - { - parseDatabaseAndTableAsAST(pos, expected, res->database, res->table); - } - else if (ParserKeyword{"ZKPATH"}.ignore(pos, expected)) - { - ASTPtr path_ast; - if (!ParserStringLiteral{}.parse(pos, path_ast, expected)) - return false; - String zk_path = path_ast->as().value.safeGet(); - if (!zk_path.empty() && zk_path[zk_path.size() - 1] == '/') - zk_path.pop_back(); - res->replica_zk_path = zk_path; - } - else - return false; - } - else - res->is_drop_whole_replica = true; - break; } @@ -255,14 +269,12 @@ bool ParserSystemQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expected & return false; break; } - case Type::RESTART_DISK: { if (!parseQueryWithOnClusterAndTarget(res, pos, expected, SystemQueryTargetType::Disk)) return false; break; } - /// FLUSH DISTRIBUTED requires table /// START/STOP DISTRIBUTED SENDS does not require table case Type::STOP_DISTRIBUTED_SENDS: diff --git a/src/Parsers/TokenIterator.cpp b/src/Parsers/TokenIterator.cpp index 08877e0b2fe..6633ddb9563 100644 --- a/src/Parsers/TokenIterator.cpp +++ b/src/Parsers/TokenIterator.cpp @@ -4,6 +4,20 @@ namespace DB { +Tokens::Tokens(const char * begin, const char * end, size_t max_query_size) +{ + Lexer lexer(begin, end, max_query_size); + + bool stop = false; + do + { + Token token = lexer.nextToken(); + stop = token.isEnd() || token.type == TokenType::ErrorMaxQuerySizeExceeded; + if (token.isSignificant()) + data.emplace_back(std::move(token)); + } while (!stop); +} + UnmatchedParentheses checkUnmatchedParentheses(TokenIterator begin) { /// We have just two kind of parentheses: () and []. diff --git a/src/Parsers/TokenIterator.h b/src/Parsers/TokenIterator.h index b84bec57817..c9ac61dfef9 100644 --- a/src/Parsers/TokenIterator.h +++ b/src/Parsers/TokenIterator.h @@ -3,6 +3,7 @@ #include #include +#include #include @@ -20,34 +21,19 @@ class Tokens { private: std::vector data; - Lexer lexer; + std::size_t last_accessed_index = 0; public: - Tokens(const char * begin, const char * end, size_t max_query_size = 0) : lexer(begin, end, max_query_size) {} + Tokens(const char * begin, const char * end, size_t max_query_size = 0); - const Token & operator[] (size_t index) + ALWAYS_INLINE inline const Token & operator[](size_t index) { - while (true) - { - if (index < data.size()) - return data[index]; - - if (!data.empty() && data.back().isEnd()) - return data.back(); - - Token token = lexer.nextToken(); - - if (token.isSignificant()) - data.emplace_back(token); - } + assert(index < data.size()); + last_accessed_index = std::max(last_accessed_index, index); + return data[index]; } - const Token & max() - { - if (data.empty()) - return (*this)[0]; - return data.back(); - } + ALWAYS_INLINE inline const Token & max() { return data[last_accessed_index]; } }; diff --git a/src/Parsers/parseQuery.cpp b/src/Parsers/parseQuery.cpp index 4a0c60da48d..da8450ac301 100644 --- a/src/Parsers/parseQuery.cpp +++ b/src/Parsers/parseQuery.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -263,7 +264,19 @@ ASTPtr tryParseQuery( ASTInsertQuery * insert = nullptr; if (parse_res) - insert = res->as(); + { + if (auto * explain = res->as()) + { + if (auto explained_query = explain->getExplainedQuery()) + { + insert = explained_query->as(); + } + } + else + { + insert = res->as(); + } + } // If parsed query ends at data for insertion. Data for insertion could be // in any format and not necessary be lexical correct, so we can't perform diff --git a/src/Planner/Planner.cpp b/src/Planner/Planner.cpp index d88766f3656..6b2de30722c 100644 --- a/src/Planner/Planner.cpp +++ b/src/Planner/Planner.cpp @@ -349,8 +349,8 @@ void Planner::buildQueryPlanIfNeeded() { auto function_node = std::make_shared("and"); auto and_function = FunctionFactory::instance().get("and", query_context); - function_node->resolveAsFunction(std::move(and_function), std::make_shared()); function_node->getArguments().getNodes() = {query_node.getPrewhere(), query_node.getWhere()}; + function_node->resolveAsFunction(and_function->build(function_node->getArgumentTypes())); query_node.getWhere() = std::move(function_node); query_node.getPrewhere() = {}; } diff --git a/src/Planner/PlannerActionsVisitor.cpp b/src/Planner/PlannerActionsVisitor.cpp index aa1b61e5559..95edd93dd9f 100644 --- a/src/Planner/PlannerActionsVisitor.cpp +++ b/src/Planner/PlannerActionsVisitor.cpp @@ -121,7 +121,8 @@ public: return node; } - const ActionsDAG::Node * addFunctionIfNecessary(const std::string & node_name, ActionsDAG::NodeRawConstPtrs children, FunctionOverloadResolverPtr function) + template + const ActionsDAG::Node * addFunctionIfNecessary(const std::string & node_name, ActionsDAG::NodeRawConstPtrs children, FunctionOrOverloadResolver function) { auto it = node_name_to_node.find(node_name); if (it != node_name_to_node.end()) @@ -325,6 +326,7 @@ PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::vi lambda_actions, captured_column_names, lambda_arguments_names_and_types, result_type, lambda_expression_node_name); actions_stack.pop_back(); + // TODO: Pass IFunctionBase here not FunctionCaptureOverloadResolver. actions_stack[level].addFunctionIfNecessary(lambda_node_name, std::move(lambda_children), std::move(function_capture)); size_t actions_stack_size = actions_stack.size(); diff --git a/src/Planner/PlannerAggregation.cpp b/src/Planner/PlannerAggregation.cpp index a1a8b54426a..05e7b5418e3 100644 --- a/src/Planner/PlannerAggregation.cpp +++ b/src/Planner/PlannerAggregation.cpp @@ -101,14 +101,14 @@ public: { auto grouping_ordinary_function = std::make_shared(arguments_indexes, force_grouping_standard_compatibility); auto grouping_ordinary_function_adaptor = std::make_shared(std::move(grouping_ordinary_function)); - function_node->resolveAsFunction(std::move(grouping_ordinary_function_adaptor), std::make_shared()); + function_node->resolveAsFunction(grouping_ordinary_function_adaptor->build({})); break; } case GroupByKind::ROLLUP: { auto grouping_rollup_function = std::make_shared(arguments_indexes, aggregation_keys_size, force_grouping_standard_compatibility); auto grouping_rollup_function_adaptor = std::make_shared(std::move(grouping_rollup_function)); - function_node->resolveAsFunction(std::move(grouping_rollup_function_adaptor), std::make_shared()); + function_node->resolveAsFunction(grouping_rollup_function_adaptor->build({})); function_node->getArguments().getNodes().push_back(std::move(grouping_set_argument_column)); break; } @@ -116,7 +116,7 @@ public: { auto grouping_cube_function = std::make_shared(arguments_indexes, aggregation_keys_size, force_grouping_standard_compatibility); auto grouping_cube_function_adaptor = std::make_shared(std::move(grouping_cube_function)); - function_node->resolveAsFunction(std::move(grouping_cube_function_adaptor), std::make_shared()); + function_node->resolveAsFunction(grouping_cube_function_adaptor->build({})); function_node->getArguments().getNodes().push_back(std::move(grouping_set_argument_column)); break; } @@ -124,7 +124,7 @@ public: { auto grouping_grouping_sets_function = std::make_shared(arguments_indexes, grouping_sets_keys_indices, force_grouping_standard_compatibility); auto grouping_grouping_sets_function_adaptor = std::make_shared(std::move(grouping_grouping_sets_function)); - function_node->resolveAsFunction(std::move(grouping_grouping_sets_function_adaptor), std::make_shared()); + function_node->resolveAsFunction(grouping_grouping_sets_function_adaptor->build({})); function_node->getArguments().getNodes().push_back(std::move(grouping_set_argument_column)); break; } diff --git a/src/Planner/PlannerExpressionAnalysis.cpp b/src/Planner/PlannerExpressionAnalysis.cpp index 9db268512be..91a04b090fc 100644 --- a/src/Planner/PlannerExpressionAnalysis.cpp +++ b/src/Planner/PlannerExpressionAnalysis.cpp @@ -65,7 +65,7 @@ std::optional analyzeAggregation(QueryTreeNodePtr & q ColumnsWithTypeAndName aggregates_columns; aggregates_columns.reserve(aggregates_descriptions.size()); for (auto & aggregate_description : aggregates_descriptions) - aggregates_columns.emplace_back(nullptr, aggregate_description.function->getReturnType(), aggregate_description.column_name); + aggregates_columns.emplace_back(nullptr, aggregate_description.function->getResultType(), aggregate_description.column_name); Names aggregation_keys; @@ -284,7 +284,7 @@ std::optional analyzeWindow(QueryTreeNodePtr & query_tree, for (auto & window_description : window_descriptions) for (auto & window_function : window_description.window_functions) - window_functions_additional_columns.emplace_back(nullptr, window_function.aggregate_function->getReturnType(), window_function.column_name); + window_functions_additional_columns.emplace_back(nullptr, window_function.aggregate_function->getResultType(), window_function.column_name); auto before_window_step = std::make_unique(before_window_actions, ActionsChainStep::AvailableOutputColumnsStrategy::ALL_NODES, diff --git a/src/Processors/Executors/StreamingFormatExecutor.cpp b/src/Processors/Executors/StreamingFormatExecutor.cpp index db81b6707f2..91db2c09a20 100644 --- a/src/Processors/Executors/StreamingFormatExecutor.cpp +++ b/src/Processors/Executors/StreamingFormatExecutor.cpp @@ -34,8 +34,15 @@ MutableColumns StreamingFormatExecutor::getResultColumns() size_t StreamingFormatExecutor::execute(ReadBuffer & buffer) { + auto & initial_buf = format->getReadBuffer(); format->setReadBuffer(buffer); - return execute(); + size_t rows = execute(); + /// Format destructor can touch read buffer (for example when we use PeekableReadBuffer), + /// but we cannot control lifetime of provided read buffer. To avoid heap use after free + /// we can set initial read buffer back, because initial read buffer was created before + /// format, so it will be destructed after it. + format->setReadBuffer(initial_buf); + return rows; } size_t StreamingFormatExecutor::execute() diff --git a/src/Processors/Executors/StreamingFormatExecutor.h b/src/Processors/Executors/StreamingFormatExecutor.h index b84364dcca0..f948d833095 100644 --- a/src/Processors/Executors/StreamingFormatExecutor.h +++ b/src/Processors/Executors/StreamingFormatExecutor.h @@ -2,6 +2,7 @@ #include #include +#include namespace DB { diff --git a/src/Processors/Formats/IInputFormat.h b/src/Processors/Formats/IInputFormat.h index 091447e96ee..4e84eb65aaf 100644 --- a/src/Processors/Formats/IInputFormat.h +++ b/src/Processors/Formats/IInputFormat.h @@ -38,7 +38,7 @@ public: virtual void resetParser(); virtual void setReadBuffer(ReadBuffer & in_); - const ReadBuffer & getReadBuffer() const { return *in; } + ReadBuffer & getReadBuffer() const { return *in; } virtual const BlockMissingValues & getMissingValues() const { diff --git a/src/Processors/Formats/IOutputFormat.cpp b/src/Processors/Formats/IOutputFormat.cpp index 47ebaa9c5f5..88a6fb1e92f 100644 --- a/src/Processors/Formats/IOutputFormat.cpp +++ b/src/Processors/Formats/IOutputFormat.cpp @@ -65,7 +65,7 @@ static Chunk prepareTotals(Chunk chunk) void IOutputFormat::work() { - writePrefixIfNot(); + writePrefixIfNeeded(); if (finished && !finalized) { @@ -73,6 +73,8 @@ void IOutputFormat::work() setRowsBeforeLimit(rows_before_limit_counter->get()); finalize(); + if (auto_flush) + flush(); return; } @@ -84,7 +86,7 @@ void IOutputFormat::work() consume(std::move(current_chunk)); break; case Totals: - writeSuffixIfNot(); + writeSuffixIfNeeded(); if (auto totals = prepareTotals(std::move(current_chunk))) { consumeTotals(std::move(totals)); @@ -92,7 +94,7 @@ void IOutputFormat::work() } break; case Extremes: - writeSuffixIfNot(); + writeSuffixIfNeeded(); consumeExtremes(std::move(current_chunk)); break; } @@ -110,7 +112,7 @@ void IOutputFormat::flush() void IOutputFormat::write(const Block & block) { - writePrefixIfNot(); + writePrefixIfNeeded(); consume(Chunk(block.getColumns(), block.rows())); if (auto_flush) @@ -121,9 +123,10 @@ void IOutputFormat::finalize() { if (finalized) return; - writePrefixIfNot(); - writeSuffixIfNot(); + writePrefixIfNeeded(); + writeSuffixIfNeeded(); finalizeImpl(); + finalizeBuffers(); finalized = true; } diff --git a/src/Processors/Formats/IOutputFormat.h b/src/Processors/Formats/IOutputFormat.h index 1f21537c6c6..02e91d5b28b 100644 --- a/src/Processors/Formats/IOutputFormat.h +++ b/src/Processors/Formats/IOutputFormat.h @@ -61,13 +61,13 @@ public: void setTotals(const Block & totals) { - writeSuffixIfNot(); + writeSuffixIfNeeded(); consumeTotals(Chunk(totals.getColumns(), totals.rows())); are_totals_written = true; } void setExtremes(const Block & extremes) { - writeSuffixIfNot(); + writeSuffixIfNeeded(); consumeExtremes(Chunk(extremes.getColumns(), extremes.rows())); } @@ -76,17 +76,24 @@ public: void doNotWritePrefix() { need_write_prefix = false; } -protected: - friend class ParallelFormattingOutputFormat; + void resetFormatter() + { + need_write_prefix = true; + need_write_suffix = true; + finalized = false; + resetFormatterImpl(); + } - virtual void consume(Chunk) = 0; - virtual void consumeTotals(Chunk) {} - virtual void consumeExtremes(Chunk) {} - virtual void finalizeImpl() {} - virtual void writePrefix() {} - virtual void writeSuffix() {} + /// Reset the statistics watch to a specific point in time + /// If set to not running it will stop on the call (elapsed = now() - given start) + void setStartTime(UInt64 start, bool is_running) + { + statistics.watch = Stopwatch(CLOCK_MONOTONIC, start, true); + if (!is_running) + statistics.watch.stop(); + } - void writePrefixIfNot() + void writePrefixIfNeeded() { if (need_write_prefix) { @@ -95,7 +102,11 @@ protected: } } - void writeSuffixIfNot() +protected: + friend class ParallelFormattingOutputFormat; + + + void writeSuffixIfNeeded() { if (need_write_suffix) { @@ -104,6 +115,15 @@ protected: } } + virtual void consume(Chunk) = 0; + virtual void consumeTotals(Chunk) {} + virtual void consumeExtremes(Chunk) {} + virtual void finalizeImpl() {} + virtual void finalizeBuffers() {} + virtual void writePrefix() {} + virtual void writeSuffix() {} + virtual void resetFormatterImpl() {} + /// Methods-helpers for parallel formatting. /// Set the number of rows that was already read in @@ -132,9 +152,6 @@ protected: Chunk extremes; }; - void setOutsideStatistics(Statistics statistics_) { statistics = std::make_shared(std::move(statistics_)); } - std::shared_ptr getOutsideStatistics() const { return statistics; } - /// In some formats the way we print extremes depends on /// were totals printed or not. In this case in parallel formatting /// we should notify underling format if totals were printed. @@ -160,10 +177,10 @@ protected: bool need_write_suffix = true; RowsBeforeLimitCounterPtr rows_before_limit_counter; + Statistics statistics; private: size_t rows_read_before = 0; - std::shared_ptr statistics = nullptr; bool are_totals_written = false; /// Counters for consumed chunks. Are used for QueryLog. diff --git a/src/Processors/Formats/IRowOutputFormat.cpp b/src/Processors/Formats/IRowOutputFormat.cpp index 52ce9c1b227..ac44dbc0157 100644 --- a/src/Processors/Formats/IRowOutputFormat.cpp +++ b/src/Processors/Formats/IRowOutputFormat.cpp @@ -10,12 +10,11 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } -IRowOutputFormat::IRowOutputFormat(const Block & header, WriteBuffer & out_, const Params & params_) +IRowOutputFormat::IRowOutputFormat(const Block & header, WriteBuffer & out_) : IOutputFormat(header, out_) , num_columns(header.columns()) , types(header.getDataTypes()) , serializations(header.getSerializations()) - , params(params_) { } @@ -26,14 +25,10 @@ void IRowOutputFormat::consume(DB::Chunk chunk) for (size_t row = 0; row < num_rows; ++row) { - if (!first_row || getRowsReadBefore() != 0) + if (haveWrittenData()) writeRowBetweenDelimiter(); write(columns, row); - - if (params.callback) - params.callback(columns, row); - first_row = false; } } diff --git a/src/Processors/Formats/IRowOutputFormat.h b/src/Processors/Formats/IRowOutputFormat.h index da2eb192e90..3a648f00eba 100644 --- a/src/Processors/Formats/IRowOutputFormat.h +++ b/src/Processors/Formats/IRowOutputFormat.h @@ -9,14 +9,6 @@ namespace DB { -struct RowOutputFormatParams -{ - using WriteCallback = std::function; - - // Callback used to indicate that another row is written. - WriteCallback callback; -}; - class WriteBuffer; /** Output format that writes data row by row. @@ -24,10 +16,17 @@ class WriteBuffer; class IRowOutputFormat : public IOutputFormat { public: - using Params = RowOutputFormatParams; + /// Used to work with IRowOutputFormat explicitly. + void writeRow(const Columns & columns, size_t row_num) + { + first_row = false; + write(columns, row_num); + } + + virtual void writeRowBetweenDelimiter() {} /// delimiter between rows protected: - IRowOutputFormat(const Block & header, WriteBuffer & out_, const Params & params_); + IRowOutputFormat(const Block & header, WriteBuffer & out_); void consume(Chunk chunk) override; void consumeTotals(Chunk chunk) override; void consumeExtremes(Chunk chunk) override; @@ -51,7 +50,6 @@ protected: virtual void writeFieldDelimiter() {} /// delimiter between values virtual void writeRowStartDelimiter() {} /// delimiter before each row virtual void writeRowEndDelimiter() {} /// delimiter after each row - virtual void writeRowBetweenDelimiter() {} /// delimiter between rows virtual void writePrefix() override {} /// delimiter before resultset virtual void writeSuffix() override {} /// delimiter after resultset virtual void writeBeforeTotals() {} @@ -60,10 +58,11 @@ protected: virtual void writeAfterExtremes() {} virtual void finalizeImpl() override {} /// Write something after resultset, totals end extremes. + bool haveWrittenData() { return !first_row || getRowsReadBefore() != 0; } + size_t num_columns; DataTypes types; Serializations serializations; - Params params; bool first_row = true; }; diff --git a/src/Processors/Formats/Impl/ArrowBlockInputFormat.cpp b/src/Processors/Formats/Impl/ArrowBlockInputFormat.cpp index 4eb7ab98f31..ed963d8a500 100644 --- a/src/Processors/Formats/Impl/ArrowBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ArrowBlockInputFormat.cpp @@ -71,7 +71,7 @@ Chunk ArrowBlockInputFormat::generate() ++record_batch_current; - arrow_column_to_ch_column->arrowTableToCHChunk(res, *table_result); + arrow_column_to_ch_column->arrowTableToCHChunk(res, *table_result, (*table_result)->num_rows()); /// If defaults_for_omitted_fields is true, calculate the default values from default expression for omitted fields. /// Otherwise fill the missing columns with zero values of its type. @@ -173,8 +173,9 @@ NamesAndTypesList ArrowSchemaReader::readSchema() auto header = ArrowColumnToCHColumn::arrowSchemaToCHHeader( *schema, stream ? "ArrowStream" : "Arrow", format_settings.arrow.skip_columns_with_unsupported_types_in_schema_inference); - return getNamesAndRecursivelyNullableTypes(header); -} + if (format_settings.schema_inference_make_columns_nullable) + return getNamesAndRecursivelyNullableTypes(header); + return header.getNamesAndTypesList();} void registerInputFormatArrow(FormatFactory & factory) { @@ -208,12 +209,24 @@ void registerArrowSchemaReader(FormatFactory & factory) { return std::make_shared(buf, false, settings); }); + + factory.registerAdditionalInfoForSchemaCacheGetter("Arrow", [](const FormatSettings & settings) + { + return fmt::format("schema_inference_make_columns_nullable={}", settings.schema_inference_make_columns_nullable); + }); factory.registerSchemaReader( "ArrowStream", [](ReadBuffer & buf, const FormatSettings & settings) { return std::make_shared(buf, true, settings); - });} + }); + + factory.registerAdditionalInfoForSchemaCacheGetter("ArrowStream", [](const FormatSettings & settings) + { + return fmt::format("schema_inference_make_columns_nullable={}", settings.schema_inference_make_columns_nullable); + }); +} + } #else diff --git a/src/Processors/Formats/Impl/ArrowBlockOutputFormat.cpp b/src/Processors/Formats/Impl/ArrowBlockOutputFormat.cpp index 83eaefa8cf7..874709cc8d7 100644 --- a/src/Processors/Formats/Impl/ArrowBlockOutputFormat.cpp +++ b/src/Processors/Formats/Impl/ArrowBlockOutputFormat.cpp @@ -21,7 +21,6 @@ ArrowBlockOutputFormat::ArrowBlockOutputFormat(WriteBuffer & out_, const Block & : IOutputFormat(header_, out_) , stream{stream_} , format_settings{format_settings_} - , arrow_ostream{std::make_shared(out_)} { } @@ -65,8 +64,15 @@ void ArrowBlockOutputFormat::finalizeImpl() "Error while closing a table: {}", status.ToString()); } +void ArrowBlockOutputFormat::resetFormatterImpl() +{ + writer.reset(); + arrow_ostream.reset(); +} + void ArrowBlockOutputFormat::prepareWriter(const std::shared_ptr & schema) { + arrow_ostream = std::make_shared(out); arrow::Result> writer_status; // TODO: should we use arrow::ipc::IpcOptions::alignment? @@ -88,7 +94,6 @@ void registerOutputFormatArrow(FormatFactory & factory) "Arrow", [](WriteBuffer & buf, const Block & sample, - const RowOutputFormatParams &, const FormatSettings & format_settings) { return std::make_shared(buf, sample, false, format_settings); @@ -99,7 +104,6 @@ void registerOutputFormatArrow(FormatFactory & factory) "ArrowStream", [](WriteBuffer & buf, const Block & sample, - const RowOutputFormatParams &, const FormatSettings & format_settings) { return std::make_shared(buf, sample, true, format_settings); diff --git a/src/Processors/Formats/Impl/ArrowBlockOutputFormat.h b/src/Processors/Formats/Impl/ArrowBlockOutputFormat.h index ce0bdab9bcb..3c977842625 100644 --- a/src/Processors/Formats/Impl/ArrowBlockOutputFormat.h +++ b/src/Processors/Formats/Impl/ArrowBlockOutputFormat.h @@ -27,6 +27,7 @@ public: private: void consume(Chunk) override; void finalizeImpl() override; + void resetFormatterImpl() override; void prepareWriter(const std::shared_ptr & schema); diff --git a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp index 8b546f48116..cbc87f921ef 100644 --- a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp +++ b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp @@ -69,7 +69,6 @@ namespace ErrorCodes extern const int DUPLICATE_COLUMN; extern const int THERE_IS_NO_COLUMN; extern const int UNKNOWN_EXCEPTION; - extern const int INCORRECT_NUMBER_OF_COLUMNS; extern const int INCORRECT_DATA; } @@ -810,7 +809,7 @@ ArrowColumnToCHColumn::ArrowColumnToCHColumn( { } -void ArrowColumnToCHColumn::arrowTableToCHChunk(Chunk & res, std::shared_ptr & table) +void ArrowColumnToCHColumn::arrowTableToCHChunk(Chunk & res, std::shared_ptr & table, size_t num_rows) { NameToColumnPtr name_to_column_ptr; for (auto column_name : table->ColumnNames()) @@ -824,16 +823,12 @@ void ArrowColumnToCHColumn::arrowTableToCHChunk(Chunk & res, std::shared_ptrsecond->length(); columns_list.reserve(header.columns()); std::unordered_map>> nested_tables; bool skipped = false; diff --git a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.h b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.h index 3540778940e..dd9f44eb94e 100644 --- a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.h +++ b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.h @@ -28,9 +28,9 @@ public: bool allow_missing_columns_, bool case_insensitive_matching_ = false); - void arrowTableToCHChunk(Chunk & res, std::shared_ptr & table); + void arrowTableToCHChunk(Chunk & res, std::shared_ptr & table, size_t num_rows); - void arrowColumnsToCHChunk(Chunk & res, NameToColumnPtr & name_to_column_ptr); + void arrowColumnsToCHChunk(Chunk & res, NameToColumnPtr & name_to_column_ptr, size_t num_rows); /// Get missing columns that exists in header but not in arrow::Schema std::vector getMissingColumns(const arrow::Schema & schema) const; diff --git a/src/Processors/Formats/Impl/AvroRowInputFormat.cpp b/src/Processors/Formats/Impl/AvroRowInputFormat.cpp index 80183838277..da7f18260a9 100644 --- a/src/Processors/Formats/Impl/AvroRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/AvroRowInputFormat.cpp @@ -26,6 +26,7 @@ #include #include #include +#include #include #include @@ -46,7 +47,6 @@ #include #include -#include #include #include #include @@ -906,11 +906,15 @@ AvroConfluentRowInputFormat::AvroConfluentRowInputFormat( const Block & header_, ReadBuffer & in_, Params params_, const FormatSettings & format_settings_) : IRowInputFormat(header_, in_, params_) , schema_registry(getConfluentSchemaRegistry(format_settings_)) - , input_stream(std::make_unique(*in)) - , decoder(avro::binaryDecoder()) , format_settings(format_settings_) { +} + +void AvroConfluentRowInputFormat::readPrefix() +{ + input_stream = std::make_unique(*in); + decoder = avro::binaryDecoder(); decoder->init(*input_stream); } @@ -989,7 +993,7 @@ DataTypePtr AvroSchemaReader::avroNodeToDataType(avro::NodePtr node) case avro::Type::AVRO_LONG: return std::make_shared(); case avro::Type::AVRO_BOOL: - return std::make_shared(); + return DataTypeFactory::instance().get("Bool"); case avro::Type::AVRO_FLOAT: return std::make_shared(); case avro::Type::AVRO_DOUBLE: diff --git a/src/Processors/Formats/Impl/AvroRowInputFormat.h b/src/Processors/Formats/Impl/AvroRowInputFormat.h index 3a029232420..4525d7d33b0 100644 --- a/src/Processors/Formats/Impl/AvroRowInputFormat.h +++ b/src/Processors/Formats/Impl/AvroRowInputFormat.h @@ -163,6 +163,7 @@ public: private: virtual bool readRow(MutableColumns & columns, RowReadExtension & ext) override; + void readPrefix() override; bool allowSyncAfterError() const override { return true; } void syncAfterError() override; diff --git a/src/Processors/Formats/Impl/AvroRowOutputFormat.cpp b/src/Processors/Formats/Impl/AvroRowOutputFormat.cpp index e3d570d1876..7efe2a999b4 100644 --- a/src/Processors/Formats/Impl/AvroRowOutputFormat.cpp +++ b/src/Processors/Formats/Impl/AvroRowOutputFormat.cpp @@ -97,6 +97,12 @@ AvroSerializer::SchemaWithSerializeFn AvroSerializer::createSchemaWithSerializeF switch (data_type->getTypeId()) { case TypeIndex::UInt8: + if (isBool(data_type)) + return {avro::BoolSchema(), [](const IColumn & column, size_t row_num, avro::Encoder & encoder) + { + encoder.encodeBool(assert_cast(column).getElement(row_num)); + }}; + return {avro::IntSchema(), [](const IColumn & column, size_t row_num, avro::Encoder & encoder) { encoder.encodeInt(assert_cast(column).getElement(row_num)); @@ -436,8 +442,8 @@ static avro::Codec getCodec(const std::string & codec_name) } AvroRowOutputFormat::AvroRowOutputFormat( - WriteBuffer & out_, const Block & header_, const RowOutputFormatParams & params_, const FormatSettings & settings_) - : IRowOutputFormat(header_, out_, params_) + WriteBuffer & out_, const Block & header_, const FormatSettings & settings_) + : IRowOutputFormat(header_, out_) , settings(settings_) , serializer(header_.getColumnsWithTypeAndName(), std::make_unique(settings)) { @@ -471,67 +477,24 @@ void AvroRowOutputFormat::write(const Columns & columns, size_t row_num) file_writer_ptr->incr(); } -void AvroRowOutputFormat::writeSuffix() +void AvroRowOutputFormat::finalizeImpl() +{ + file_writer_ptr->close(); +} + +void AvroRowOutputFormat::resetFormatterImpl() { file_writer_ptr.reset(); } -void AvroRowOutputFormat::consume(DB::Chunk chunk) -{ - if (params.callback) - consumeImplWithCallback(std::move(chunk)); - else - consumeImpl(std::move(chunk)); -} - -void AvroRowOutputFormat::consumeImpl(DB::Chunk chunk) -{ - auto num_rows = chunk.getNumRows(); - const auto & columns = chunk.getColumns(); - - for (size_t row = 0; row < num_rows; ++row) - { - write(columns, row); - } - -} - -void AvroRowOutputFormat::consumeImplWithCallback(DB::Chunk chunk) -{ - auto num_rows = chunk.getNumRows(); - const auto & columns = chunk.getColumns(); - - for (size_t row = 0; row < num_rows;) - { - size_t current_row = row; - /// used by WriteBufferToKafkaProducer to obtain auxiliary data - /// from the starting row of a file - - writePrefixIfNot(); - for (size_t row_in_file = 0; - row_in_file < settings.avro.output_rows_in_file && row < num_rows; - ++row, ++row_in_file) - { - write(columns, row); - } - - file_writer_ptr->flush(); - writeSuffix(); - need_write_prefix = true; - - params.callback(columns, current_row); - } -} - void registerOutputFormatAvro(FormatFactory & factory) { factory.registerOutputFormat("Avro", []( WriteBuffer & buf, const Block & sample, - const RowOutputFormatParams & params, const FormatSettings & settings) { - return std::make_shared(buf, sample, params, settings); + return std::make_shared(buf, sample, settings); }); factory.markFormatHasNoAppendSupport("Avro"); } diff --git a/src/Processors/Formats/Impl/AvroRowOutputFormat.h b/src/Processors/Formats/Impl/AvroRowOutputFormat.h index 4834c8948b2..d7b15a95d26 100644 --- a/src/Processors/Formats/Impl/AvroRowOutputFormat.h +++ b/src/Processors/Formats/Impl/AvroRowOutputFormat.h @@ -46,27 +46,23 @@ private: class AvroRowOutputFormat final : public IRowOutputFormat { public: - AvroRowOutputFormat(WriteBuffer & out_, const Block & header_, const RowOutputFormatParams & params_, const FormatSettings & settings_); + AvroRowOutputFormat(WriteBuffer & out_, const Block & header_, const FormatSettings & settings_); virtual ~AvroRowOutputFormat() override; - void consume(Chunk) override; String getName() const override { return "AvroRowOutputFormat"; } private: void write(const Columns & columns, size_t row_num) override; void writeField(const IColumn &, const ISerialization &, size_t) override {} virtual void writePrefix() override; - virtual void writeSuffix() override; + virtual void finalizeImpl() override; + virtual void resetFormatterImpl() override; void createFileWriter(); FormatSettings settings; AvroSerializer serializer; std::unique_ptr file_writer_ptr; - - void consumeImpl(Chunk); - void consumeImplWithCallback(Chunk); - }; } diff --git a/src/Processors/Formats/Impl/BSONEachRowRowOutputFormat.cpp b/src/Processors/Formats/Impl/BSONEachRowRowOutputFormat.cpp index c296114a6e7..c9530d4ba81 100644 --- a/src/Processors/Formats/Impl/BSONEachRowRowOutputFormat.cpp +++ b/src/Processors/Formats/Impl/BSONEachRowRowOutputFormat.cpp @@ -43,8 +43,8 @@ static String toValidUTF8String(const String & name) } BSONEachRowRowOutputFormat::BSONEachRowRowOutputFormat( - WriteBuffer & out_, const Block & header_, const RowOutputFormatParams & params_, const FormatSettings & settings_) - : IRowOutputFormat(header_, out_, params_), settings(settings_) + WriteBuffer & out_, const Block & header_, const FormatSettings & settings_) + : IRowOutputFormat(header_, out_), settings(settings_) { const auto & sample = getPort(PortKind::Main).getHeader(); fields.reserve(sample.columns()); @@ -519,8 +519,8 @@ void registerOutputFormatBSONEachRow(FormatFactory & factory) { factory.registerOutputFormat( "BSONEachRow", - [](WriteBuffer & buf, const Block & sample, const RowOutputFormatParams & params, const FormatSettings & _format_settings) - { return std::make_shared(buf, sample, params, _format_settings); }); + [](WriteBuffer & buf, const Block & sample, const FormatSettings & _format_settings) + { return std::make_shared(buf, sample, _format_settings); }); factory.markOutputFormatSupportsParallelFormatting("BSONEachRow"); } diff --git a/src/Processors/Formats/Impl/BSONEachRowRowOutputFormat.h b/src/Processors/Formats/Impl/BSONEachRowRowOutputFormat.h index f2252cabebe..d6fcd38c841 100644 --- a/src/Processors/Formats/Impl/BSONEachRowRowOutputFormat.h +++ b/src/Processors/Formats/Impl/BSONEachRowRowOutputFormat.h @@ -47,7 +47,7 @@ class BSONEachRowRowOutputFormat final : public IRowOutputFormat { public: BSONEachRowRowOutputFormat( - WriteBuffer & out_, const Block & header_, const RowOutputFormatParams & params_, const FormatSettings & settings_); + WriteBuffer & out_, const Block & header_, const FormatSettings & settings_); String getName() const override { return "BSONEachRowRowOutputFormat"; } diff --git a/src/Processors/Formats/Impl/BinaryRowOutputFormat.cpp b/src/Processors/Formats/Impl/BinaryRowOutputFormat.cpp index c9ed8e03449..ff904f61d22 100644 --- a/src/Processors/Formats/Impl/BinaryRowOutputFormat.cpp +++ b/src/Processors/Formats/Impl/BinaryRowOutputFormat.cpp @@ -10,8 +10,8 @@ namespace DB { -BinaryRowOutputFormat::BinaryRowOutputFormat(WriteBuffer & out_, const Block & header, bool with_names_, bool with_types_, const RowOutputFormatParams & params_, const FormatSettings & format_settings_) - : IRowOutputFormat(header, out_, params_), with_names(with_names_), with_types(with_types_), format_settings(format_settings_) +BinaryRowOutputFormat::BinaryRowOutputFormat(WriteBuffer & out_, const Block & header, bool with_names_, bool with_types_, const FormatSettings & format_settings_) + : IRowOutputFormat(header, out_), with_names(with_names_), with_types(with_types_), format_settings(format_settings_) { } @@ -55,10 +55,9 @@ void registerOutputFormatRowBinary(FormatFactory & factory) factory.registerOutputFormat(format_name, [with_names, with_types]( WriteBuffer & buf, const Block & sample, - const RowOutputFormatParams & params, const FormatSettings & format_settings) { - return std::make_shared(buf, sample, with_names, with_types, params, format_settings); + return std::make_shared(buf, sample, with_names, with_types, format_settings); }); factory.markOutputFormatSupportsParallelFormatting(format_name); }; diff --git a/src/Processors/Formats/Impl/BinaryRowOutputFormat.h b/src/Processors/Formats/Impl/BinaryRowOutputFormat.h index e8198cb6ee0..3f58ccccf76 100644 --- a/src/Processors/Formats/Impl/BinaryRowOutputFormat.h +++ b/src/Processors/Formats/Impl/BinaryRowOutputFormat.h @@ -17,7 +17,7 @@ class WriteBuffer; class BinaryRowOutputFormat final: public IRowOutputFormat { public: - BinaryRowOutputFormat(WriteBuffer & out_, const Block & header, bool with_names_, bool with_types_, const RowOutputFormatParams & params_, const FormatSettings & format_settings_); + BinaryRowOutputFormat(WriteBuffer & out_, const Block & header, bool with_names_, bool with_types_, const FormatSettings & format_settings_); String getName() const override { return "BinaryRowOutputFormat"; } diff --git a/src/Processors/Formats/Impl/CSVRowOutputFormat.cpp b/src/Processors/Formats/Impl/CSVRowOutputFormat.cpp index 790994cb240..304e877aae9 100644 --- a/src/Processors/Formats/Impl/CSVRowOutputFormat.cpp +++ b/src/Processors/Formats/Impl/CSVRowOutputFormat.cpp @@ -9,8 +9,8 @@ namespace DB { -CSVRowOutputFormat::CSVRowOutputFormat(WriteBuffer & out_, const Block & header_, bool with_names_, bool with_types_, const RowOutputFormatParams & params_, const FormatSettings & format_settings_) - : IRowOutputFormat(header_, out_, params_), with_names(with_names_), with_types(with_types_), format_settings(format_settings_) +CSVRowOutputFormat::CSVRowOutputFormat(WriteBuffer & out_, const Block & header_, bool with_names_, bool with_types_, const FormatSettings & format_settings_) + : IRowOutputFormat(header_, out_), with_names(with_names_), with_types(with_types_), format_settings(format_settings_) { const auto & sample = getPort(PortKind::Main).getHeader(); size_t columns = sample.columns(); @@ -24,11 +24,10 @@ void CSVRowOutputFormat::writeLine(const std::vector & values) for (size_t i = 0; i < values.size(); ++i) { writeCSVString(values[i], out); - if (i + 1 == values.size()) - writeRowEndDelimiter(); - else + if (i + 1 != values.size()) writeFieldDelimiter(); } + writeRowEndDelimiter(); } void CSVRowOutputFormat::writePrefix() @@ -80,10 +79,9 @@ void registerOutputFormatCSV(FormatFactory & factory) factory.registerOutputFormat(format_name, [with_names, with_types]( WriteBuffer & buf, const Block & sample, - const RowOutputFormatParams & params, const FormatSettings & format_settings) { - return std::make_shared(buf, sample, with_names, with_types, params, format_settings); + return std::make_shared(buf, sample, with_names, with_types, format_settings); }); factory.markOutputFormatSupportsParallelFormatting(format_name); }; diff --git a/src/Processors/Formats/Impl/CSVRowOutputFormat.h b/src/Processors/Formats/Impl/CSVRowOutputFormat.h index 0efc00378e5..131439e6bbc 100644 --- a/src/Processors/Formats/Impl/CSVRowOutputFormat.h +++ b/src/Processors/Formats/Impl/CSVRowOutputFormat.h @@ -20,7 +20,7 @@ public: /** with_names - output in the first line a header with column names * with_types - output in the next line header with the names of the types */ - CSVRowOutputFormat(WriteBuffer & out_, const Block & header_, bool with_names_, bool with_types, const RowOutputFormatParams & params_, const FormatSettings & format_settings_); + CSVRowOutputFormat(WriteBuffer & out_, const Block & header_, bool with_names_, bool with_types, const FormatSettings & format_settings_); String getName() const override { return "CSVRowOutputFormat"; } diff --git a/src/Processors/Formats/Impl/CapnProtoRowInputFormat.cpp b/src/Processors/Formats/Impl/CapnProtoRowInputFormat.cpp index 08d2cac743a..58ace9cfca5 100644 --- a/src/Processors/Formats/Impl/CapnProtoRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/CapnProtoRowInputFormat.cpp @@ -99,6 +99,12 @@ static void insertSignedInteger(IColumn & column, const DataTypePtr & column_typ case TypeIndex::DateTime64: assert_cast &>(column).insertValue(value); break; + case TypeIndex::Decimal32: + assert_cast &>(column).insertValue(static_cast(value)); + break; + case TypeIndex::Decimal64: + assert_cast &>(column).insertValue(value); + break; default: throw Exception(ErrorCodes::LOGICAL_ERROR, "Column type is not a signed integer."); } @@ -178,14 +184,14 @@ static void insertEnum(IColumn & column, const DataTypePtr & column_type, const } } -static void insertValue(IColumn & column, const DataTypePtr & column_type, const capnp::DynamicValue::Reader & value, FormatSettings::EnumComparingMode enum_comparing_mode) +static void insertValue(IColumn & column, const DataTypePtr & column_type, const String & column_name, const capnp::DynamicValue::Reader & value, FormatSettings::EnumComparingMode enum_comparing_mode) { if (column_type->lowCardinality()) { auto & lc_column = assert_cast(column); auto tmp_column = lc_column.getDictionary().getNestedColumn()->cloneEmpty(); auto dict_type = assert_cast(column_type.get())->getDictionaryType(); - insertValue(*tmp_column, dict_type, value, enum_comparing_mode); + insertValue(*tmp_column, dict_type, column_name, value, enum_comparing_mode); lc_column.insertFromFullColumn(*tmp_column, 0); return; } @@ -226,7 +232,7 @@ static void insertValue(IColumn & column, const DataTypePtr & column_type, const auto & nested_column = column_array.getData(); auto nested_type = assert_cast(column_type.get())->getNestedType(); for (const auto & nested_value : list_value) - insertValue(nested_column, nested_type, nested_value, enum_comparing_mode); + insertValue(nested_column, nested_type, column_name, nested_value, enum_comparing_mode); break; } case capnp::DynamicValue::Type::STRUCT: @@ -243,11 +249,11 @@ static void insertValue(IColumn & column, const DataTypePtr & column_type, const auto & nested_column = nullable_column.getNestedColumn(); auto nested_type = assert_cast(column_type.get())->getNestedType(); auto nested_value = struct_value.get(field); - insertValue(nested_column, nested_type, nested_value, enum_comparing_mode); + insertValue(nested_column, nested_type, column_name, nested_value, enum_comparing_mode); nullable_column.getNullMapData().push_back(0); } } - else + else if (isTuple(column_type)) { auto & tuple_column = assert_cast(column); const auto * tuple_type = assert_cast(column_type.get()); @@ -255,9 +261,16 @@ static void insertValue(IColumn & column, const DataTypePtr & column_type, const insertValue( tuple_column.getColumn(i), tuple_type->getElements()[i], + tuple_type->getElementNames()[i], struct_value.get(tuple_type->getElementNames()[i]), enum_comparing_mode); } + else + { + /// It can be nested column from Nested type. + auto [field_name, nested_name] = splitCapnProtoFieldName(column_name); + insertValue(column, column_type, nested_name, struct_value.get(nested_name), enum_comparing_mode); + } break; } default: @@ -278,7 +291,7 @@ bool CapnProtoRowInputFormat::readRow(MutableColumns & columns, RowReadExtension for (size_t i = 0; i != columns.size(); ++i) { auto value = getReaderByColumnName(root_reader, column_names[i]); - insertValue(*columns[i], column_types[i], value, format_settings.capn_proto.enum_comparing_mode); + insertValue(*columns[i], column_types[i], column_names[i], value, format_settings.capn_proto.enum_comparing_mode); } } catch (const kj::Exception & e) diff --git a/src/Processors/Formats/Impl/CapnProtoRowOutputFormat.cpp b/src/Processors/Formats/Impl/CapnProtoRowOutputFormat.cpp index 654917b6357..dcbd5db5f9b 100644 --- a/src/Processors/Formats/Impl/CapnProtoRowOutputFormat.cpp +++ b/src/Processors/Formats/Impl/CapnProtoRowOutputFormat.cpp @@ -42,10 +42,9 @@ void CapnProtoOutputStream::write(const void * buffer, size_t size) CapnProtoRowOutputFormat::CapnProtoRowOutputFormat( WriteBuffer & out_, const Block & header_, - const RowOutputFormatParams & params_, const FormatSchemaInfo & info, const FormatSettings & format_settings_) - : IRowOutputFormat(header_, out_, params_), column_names(header_.getNames()), column_types(header_.getDataTypes()), output_stream(std::make_unique(out_)), format_settings(format_settings_) + : IRowOutputFormat(header_, out_), column_names(header_.getNames()), column_types(header_.getDataTypes()), output_stream(std::make_unique(out_)), format_settings(format_settings_) { schema = schema_parser.getMessageSchema(info); checkCapnProtoSchemaStructure(schema, getPort(PortKind::Main).getHeader(), format_settings.capn_proto.enum_comparing_mode); @@ -92,6 +91,7 @@ static std::optional convertToDynamicValue( const ColumnPtr & column, const DataTypePtr & data_type, size_t row_num, + const String & column_name, capnp::DynamicValue::Builder builder, FormatSettings::EnumComparingMode enum_comparing_mode, std::vector> & temporary_text_data_storage) @@ -103,15 +103,12 @@ static std::optional convertToDynamicValue( const auto * lc_column = assert_cast(column.get()); const auto & dict_type = assert_cast(data_type.get())->getDictionaryType(); size_t index = lc_column->getIndexAt(row_num); - return convertToDynamicValue(lc_column->getDictionary().getNestedColumn(), dict_type, index, builder, enum_comparing_mode, temporary_text_data_storage); + return convertToDynamicValue(lc_column->getDictionary().getNestedColumn(), dict_type, index, column_name, builder, enum_comparing_mode, temporary_text_data_storage); } switch (builder.getType()) { case capnp::DynamicValue::Type::INT: - /// We allow output DateTime64 as Int64. - if (WhichDataType(data_type).isDateTime64()) - return capnp::DynamicValue::Reader(assert_cast *>(column.get())->getElement(row_num)); return capnp::DynamicValue::Reader(column->getInt(row_num)); case capnp::DynamicValue::Type::UINT: return capnp::DynamicValue::Reader(column->getUInt(row_num)); @@ -150,7 +147,7 @@ static std::optional convertToDynamicValue( { auto struct_builder = builder.as(); auto nested_struct_schema = struct_builder.getSchema(); - /// Struct can be represent Tuple or Naullable (named union with two fields) + /// Struct can represent Tuple, Nullable (named union with two fields) or single column when it contains one nested column. if (data_type->isNullable()) { const auto * nullable_type = assert_cast(data_type.get()); @@ -167,12 +164,12 @@ static std::optional convertToDynamicValue( struct_builder.clear(value_field); const auto & nested_column = nullable_column->getNestedColumnPtr(); auto value_builder = initStructFieldBuilder(nested_column, row_num, struct_builder, value_field); - auto value = convertToDynamicValue(nested_column, nullable_type->getNestedType(), row_num, value_builder, enum_comparing_mode, temporary_text_data_storage); + auto value = convertToDynamicValue(nested_column, nullable_type->getNestedType(), row_num, column_name, value_builder, enum_comparing_mode, temporary_text_data_storage); if (value) struct_builder.set(value_field, *value); } } - else + else if (isTuple(data_type)) { const auto * tuple_data_type = assert_cast(data_type.get()); auto nested_types = tuple_data_type->getElements(); @@ -182,11 +179,21 @@ static std::optional convertToDynamicValue( auto pos = tuple_data_type->getPositionByName(name); auto field_builder = initStructFieldBuilder(nested_columns[pos], row_num, struct_builder, nested_struct_schema.getFieldByName(name)); - auto value = convertToDynamicValue(nested_columns[pos], nested_types[pos], row_num, field_builder, enum_comparing_mode, temporary_text_data_storage); + auto value = convertToDynamicValue(nested_columns[pos], nested_types[pos], row_num, column_name, field_builder, enum_comparing_mode, temporary_text_data_storage); if (value) struct_builder.set(name, *value); } } + else + { + /// It can be nested column from Nested type. + auto [field_name, nested_name] = splitCapnProtoFieldName(column_name); + auto nested_field = nested_struct_schema.getFieldByName(nested_name); + auto field_builder = initStructFieldBuilder(column, row_num, struct_builder, nested_field); + auto value = convertToDynamicValue(column, data_type, row_num, nested_name, field_builder, enum_comparing_mode, temporary_text_data_storage); + if (value) + struct_builder.set(nested_field, *value); + } return std::nullopt; } case capnp::DynamicValue::Type::LIST: @@ -213,7 +220,7 @@ static std::optional convertToDynamicValue( else value_builder = list_builder[i]; - auto value = convertToDynamicValue(nested_column, nested_type, offset + i, value_builder, enum_comparing_mode, temporary_text_data_storage); + auto value = convertToDynamicValue(nested_column, nested_type, offset + i, column_name, value_builder, enum_comparing_mode, temporary_text_data_storage); if (value) list_builder.set(i, *value); } @@ -231,11 +238,19 @@ void CapnProtoRowOutputFormat::write(const Columns & columns, size_t row_num) /// See comment in convertToDynamicValue() for more details. std::vector> temporary_text_data_storage; capnp::DynamicStruct::Builder root = message.initRoot(schema); + + /// Some columns can share same field builder. For example when we have + /// column with Nested type that was flattened into several columns. + std::unordered_map field_builders; for (size_t i = 0; i != columns.size(); ++i) { auto [struct_builder, field] = getStructBuilderAndFieldByColumnName(root, column_names[i]); - auto field_builder = initStructFieldBuilder(columns[i], row_num, struct_builder, field); - auto value = convertToDynamicValue(columns[i], column_types[i], row_num, field_builder, format_settings.capn_proto.enum_comparing_mode, temporary_text_data_storage); + if (!field_builders.contains(field.getIndex())) + { + auto field_builder = initStructFieldBuilder(columns[i], row_num, struct_builder, field); + field_builders[field.getIndex()] = field_builder; + } + auto value = convertToDynamicValue(columns[i], column_types[i], row_num, column_names[i], field_builders[field.getIndex()], format_settings.capn_proto.enum_comparing_mode, temporary_text_data_storage); if (value) struct_builder.set(field, *value); } @@ -248,10 +263,9 @@ void registerOutputFormatCapnProto(FormatFactory & factory) factory.registerOutputFormat("CapnProto", []( WriteBuffer & buf, const Block & sample, - const RowOutputFormatParams & params, const FormatSettings & format_settings) { - return std::make_shared(buf, sample, params, FormatSchemaInfo(format_settings, "CapnProto", true), format_settings); + return std::make_shared(buf, sample, FormatSchemaInfo(format_settings, "CapnProto", true), format_settings); }); } diff --git a/src/Processors/Formats/Impl/CapnProtoRowOutputFormat.h b/src/Processors/Formats/Impl/CapnProtoRowOutputFormat.h index d1f64838145..5cc7099d4c7 100644 --- a/src/Processors/Formats/Impl/CapnProtoRowOutputFormat.h +++ b/src/Processors/Formats/Impl/CapnProtoRowOutputFormat.h @@ -29,7 +29,6 @@ public: CapnProtoRowOutputFormat( WriteBuffer & out_, const Block & header_, - const RowOutputFormatParams & params_, const FormatSchemaInfo & info, const FormatSettings & format_settings_); diff --git a/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp b/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp index bc363e5aa98..994af449947 100644 --- a/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp +++ b/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp @@ -418,12 +418,13 @@ ConstantExpressionTemplate::Cache::getFromCacheOrConstruct(const DataTypePtr & r return res; } -bool ConstantExpressionTemplate::parseExpression(ReadBuffer & istr, const FormatSettings & format_settings, const Settings & settings) +bool ConstantExpressionTemplate::parseExpression( + ReadBuffer & istr, const TokenIterator & token_iterator, const FormatSettings & format_settings, const Settings & settings) { size_t cur_column = 0; try { - if (tryParseExpression(istr, format_settings, cur_column, settings)) + if (tryParseExpression(istr, token_iterator, format_settings, cur_column, settings)) { ++rows_count; return true; @@ -445,7 +446,12 @@ bool ConstantExpressionTemplate::parseExpression(ReadBuffer & istr, const Format return false; } -bool ConstantExpressionTemplate::tryParseExpression(ReadBuffer & istr, const FormatSettings & format_settings, size_t & cur_column, const Settings & settings) +bool ConstantExpressionTemplate::tryParseExpression( + ReadBuffer & istr, + const TokenIterator & token_iterator, + const FormatSettings & format_settings, + size_t & cur_column, + const Settings & settings) { size_t cur_token = 0; size_t num_columns = structure->literals.columns(); @@ -464,7 +470,7 @@ bool ConstantExpressionTemplate::tryParseExpression(ReadBuffer & istr, const For const DataTypePtr & type = structure->literals.getByPosition(cur_column).type; if (format_settings.values.accurate_types_of_literals && !structure->special_parser[cur_column].useDefaultParser()) { - if (!parseLiteralAndAssertType(istr, type.get(), cur_column, settings)) + if (!parseLiteralAndAssertType(istr, token_iterator, type.get(), cur_column, settings)) return false; } else @@ -482,7 +488,8 @@ bool ConstantExpressionTemplate::tryParseExpression(ReadBuffer & istr, const For return true; } -bool ConstantExpressionTemplate::parseLiteralAndAssertType(ReadBuffer & istr, const IDataType * complex_type, size_t column_idx, const Settings & settings) +bool ConstantExpressionTemplate::parseLiteralAndAssertType( + ReadBuffer & istr, const TokenIterator & token_iterator, const IDataType * complex_type, size_t column_idx, const Settings & settings) { using Type = Field::Types::Which; @@ -497,12 +504,12 @@ bool ConstantExpressionTemplate::parseLiteralAndAssertType(ReadBuffer & istr, co if (type_info.is_array || type_info.is_tuple || type_info.is_map) { - /// TODO faster way to check types without using Parsers ParserArrayOfLiterals parser_array; ParserTupleOfLiterals parser_tuple; - Tokens tokens_number(istr.position(), istr.buffer().end()); - IParser::Pos iterator(tokens_number, static_cast(settings.max_parser_depth)); + IParser::Pos iterator(token_iterator, static_cast(settings.max_parser_depth)); + while (iterator->begin < istr.position()) + ++iterator; Expected expected; ASTPtr ast; if (!parser_array.parse(iterator, ast, expected) && !parser_tuple.parse(iterator, ast, expected)) diff --git a/src/Processors/Formats/Impl/ConstantExpressionTemplate.h b/src/Processors/Formats/Impl/ConstantExpressionTemplate.h index c5d4f033258..fbb3cbcd22a 100644 --- a/src/Processors/Formats/Impl/ConstantExpressionTemplate.h +++ b/src/Processors/Formats/Impl/ConstantExpressionTemplate.h @@ -71,7 +71,8 @@ public: /// Read expression from istr, assert it has the same structure and the same types of literals (template matches) /// and parse literals into temporary columns - bool parseExpression(ReadBuffer & istr, const FormatSettings & format_settings, const Settings & settings); + bool parseExpression( + ReadBuffer & istr, const TokenIterator & token_iterator, const FormatSettings & format_settings, const Settings & settings); /// Evaluate batch of expressions were parsed using template. /// If template was deduced with null_as_default == true, set bits in nulls for NULL values in column_idx, starting from offset. @@ -80,8 +81,14 @@ public: size_t rowsCount() const { return rows_count; } private: - bool tryParseExpression(ReadBuffer & istr, const FormatSettings & format_settings, size_t & cur_column, const Settings & settings); - bool parseLiteralAndAssertType(ReadBuffer & istr, const IDataType * type, size_t column_idx, const Settings & settings); + bool tryParseExpression( + ReadBuffer & istr, + const TokenIterator & token_iterator, + const FormatSettings & format_settings, + size_t & cur_column, + const Settings & settings); + bool parseLiteralAndAssertType( + ReadBuffer & istr, const TokenIterator & token_iterator, const IDataType * type, size_t column_idx, const Settings & settings); private: TemplateStructurePtr structure; diff --git a/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.cpp b/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.cpp index f8e328ed0fb..a2f2e59ef16 100644 --- a/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.cpp @@ -88,8 +88,7 @@ void CustomSeparatedRowInputFormat::syncAfterError() void CustomSeparatedRowInputFormat::setReadBuffer(ReadBuffer & in_) { - buf = std::make_unique(in_); - RowInputFormatWithNamesAndTypes::setReadBuffer(*buf); + buf->setSubBuffer(in_); } CustomSeparatedFormatReader::CustomSeparatedFormatReader( diff --git a/src/Processors/Formats/Impl/CustomSeparatedRowOutputFormat.cpp b/src/Processors/Formats/Impl/CustomSeparatedRowOutputFormat.cpp index 4c8cf19b923..7fa3d90ce81 100644 --- a/src/Processors/Formats/Impl/CustomSeparatedRowOutputFormat.cpp +++ b/src/Processors/Formats/Impl/CustomSeparatedRowOutputFormat.cpp @@ -8,8 +8,8 @@ namespace DB { CustomSeparatedRowOutputFormat::CustomSeparatedRowOutputFormat( - const Block & header_, WriteBuffer & out_, const RowOutputFormatParams & params_, const FormatSettings & format_settings_, bool with_names_, bool with_types_) - : IRowOutputFormat(header_, out_, params_) + const Block & header_, WriteBuffer & out_, const FormatSettings & format_settings_, bool with_names_, bool with_types_) + : IRowOutputFormat(header_, out_) , with_names(with_names_) , with_types(with_types_) , format_settings(format_settings_) @@ -84,10 +84,9 @@ void registerOutputFormatCustomSeparated(FormatFactory & factory) factory.registerOutputFormat(format_name, [with_names, with_types]( WriteBuffer & buf, const Block & sample, - const RowOutputFormatParams & params, const FormatSettings & settings) { - return std::make_shared(sample, buf, params, settings, with_names, with_types); + return std::make_shared(sample, buf, settings, with_names, with_types); }); factory.markOutputFormatSupportsParallelFormatting(format_name); diff --git a/src/Processors/Formats/Impl/CustomSeparatedRowOutputFormat.h b/src/Processors/Formats/Impl/CustomSeparatedRowOutputFormat.h index 0e04764b993..34fe1ce965c 100644 --- a/src/Processors/Formats/Impl/CustomSeparatedRowOutputFormat.h +++ b/src/Processors/Formats/Impl/CustomSeparatedRowOutputFormat.h @@ -11,7 +11,7 @@ class WriteBuffer; class CustomSeparatedRowOutputFormat final : public IRowOutputFormat { public: - CustomSeparatedRowOutputFormat(const Block & header_, WriteBuffer & out_, const RowOutputFormatParams & params_, const FormatSettings & format_settings_, bool with_names_, bool with_types_); + CustomSeparatedRowOutputFormat(const Block & header_, WriteBuffer & out_, const FormatSettings & format_settings_, bool with_names_, bool with_types_); String getName() const override { return "CustomSeparatedRowOutputFormat"; } diff --git a/src/Processors/Formats/Impl/HiveTextRowInputFormat.cpp b/src/Processors/Formats/Impl/HiveTextRowInputFormat.cpp index ff41352ff96..ec5612ae30b 100644 --- a/src/Processors/Formats/Impl/HiveTextRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/HiveTextRowInputFormat.cpp @@ -1,4 +1,5 @@ #include +#include #if USE_HIVE @@ -31,12 +32,17 @@ HiveTextRowInputFormat::HiveTextRowInputFormat( HiveTextRowInputFormat::HiveTextRowInputFormat( const Block & header_, std::unique_ptr buf_, const Params & params_, const FormatSettings & format_settings_) : CSVRowInputFormat( - header_, *buf_, params_, true, false, format_settings_, std::make_unique(std::move(buf_), format_settings_)) + header_, *buf_, params_, true, false, format_settings_, std::make_unique(*buf_, format_settings_)), buf(std::move(buf_)) { } -HiveTextFormatReader::HiveTextFormatReader(std::unique_ptr buf_, const FormatSettings & format_settings_) - : CSVFormatReader(*buf_, format_settings_), buf(std::move(buf_)), input_field_names(format_settings_.hive_text.input_field_names) +void HiveTextRowInputFormat::setReadBuffer(ReadBuffer & in_) +{ + buf->setSubBuffer(in_); +} + +HiveTextFormatReader::HiveTextFormatReader(PeekableReadBuffer & buf_, const FormatSettings & format_settings_) + : CSVFormatReader(buf_, format_settings_), buf(&buf_), input_field_names(format_settings_.hive_text.input_field_names) { } @@ -53,6 +59,12 @@ std::vector HiveTextFormatReader::readTypes() throw Exception(ErrorCodes::NOT_IMPLEMENTED, "HiveTextRowInputFormat::readTypes is not implemented"); } +void HiveTextFormatReader::setReadBuffer(ReadBuffer & buf_) +{ + buf = assert_cast(&buf_); + CSVFormatReader::setReadBuffer(buf_); +} + void registerInputFormatHiveText(FormatFactory & factory) { factory.registerInputFormat( diff --git a/src/Processors/Formats/Impl/HiveTextRowInputFormat.h b/src/Processors/Formats/Impl/HiveTextRowInputFormat.h index 61f5bf77b07..251486b247c 100644 --- a/src/Processors/Formats/Impl/HiveTextRowInputFormat.h +++ b/src/Processors/Formats/Impl/HiveTextRowInputFormat.h @@ -18,21 +18,27 @@ public: String getName() const override { return "HiveTextRowInputFormat"; } + void setReadBuffer(ReadBuffer & in_) override; + private: HiveTextRowInputFormat( const Block & header_, std::unique_ptr buf_, const Params & params_, const FormatSettings & format_settings_); + + std::unique_ptr buf; }; class HiveTextFormatReader final : public CSVFormatReader { public: - HiveTextFormatReader(std::unique_ptr buf_, const FormatSettings & format_settings_); + HiveTextFormatReader(PeekableReadBuffer & buf_, const FormatSettings & format_settings_); std::vector readNames() override; std::vector readTypes() override; + void setReadBuffer(ReadBuffer & buf_) override; + private: - std::unique_ptr buf; + PeekableReadBuffer * buf; std::vector input_field_names; }; diff --git a/src/Processors/Formats/Impl/JSONAsStringRowInputFormat.cpp b/src/Processors/Formats/Impl/JSONAsStringRowInputFormat.cpp index d369eedceea..8bf0ecc5d7e 100644 --- a/src/Processors/Formats/Impl/JSONAsStringRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONAsStringRowInputFormat.cpp @@ -12,6 +12,7 @@ namespace ErrorCodes { extern const int BAD_ARGUMENTS; extern const int INCORRECT_DATA; + extern const int ILLEGAL_COLUMN; } JSONAsRowInputFormat::JSONAsRowInputFormat(const Block & header_, ReadBuffer & in_, Params params_) @@ -98,8 +99,7 @@ bool JSONAsRowInputFormat::readRow(MutableColumns & columns, RowReadExtension &) void JSONAsRowInputFormat::setReadBuffer(ReadBuffer & in_) { - buf = std::make_unique(in_); - IInputFormat::setReadBuffer(*buf); + buf->setSubBuffer(in_); } @@ -207,6 +207,15 @@ void JSONAsObjectRowInputFormat::readJSONObject(IColumn & column) serializations[0]->deserializeTextJSON(column, *buf, format_settings); } +JSONAsObjectExternalSchemaReader::JSONAsObjectExternalSchemaReader(const FormatSettings & settings) +{ + if (!settings.json.allow_object_type) + throw Exception( + ErrorCodes::ILLEGAL_COLUMN, + "Cannot infer the data structure in JSONAsObject format because experimental Object type is not allowed. Set setting " + "allow_experimental_object_type = 1 in order to allow it"); +} + void registerInputFormatJSONAsString(FormatFactory & factory) { factory.registerInputFormat("JSONAsString", []( @@ -261,9 +270,9 @@ void registerFileSegmentationEngineJSONAsObject(FormatFactory & factory) void registerJSONAsObjectSchemaReader(FormatFactory & factory) { - factory.registerExternalSchemaReader("JSONAsObject", [](const FormatSettings &) + factory.registerExternalSchemaReader("JSONAsObject", [](const FormatSettings & settings) { - return std::make_shared(); + return std::make_shared(settings); }); } diff --git a/src/Processors/Formats/Impl/JSONAsStringRowInputFormat.h b/src/Processors/Formats/Impl/JSONAsStringRowInputFormat.h index 438107e73e6..3f2d1998139 100644 --- a/src/Processors/Formats/Impl/JSONAsStringRowInputFormat.h +++ b/src/Processors/Formats/Impl/JSONAsStringRowInputFormat.h @@ -77,6 +77,8 @@ public: class JSONAsObjectExternalSchemaReader : public IExternalSchemaReader { public: + JSONAsObjectExternalSchemaReader(const FormatSettings & settings); + NamesAndTypesList readSchema() override { return {{"json", std::make_shared("json", false)}}; diff --git a/src/Processors/Formats/Impl/JSONColumnsBlockOutputFormat.cpp b/src/Processors/Formats/Impl/JSONColumnsBlockOutputFormat.cpp index 9ced073aede..4a618d3a164 100644 --- a/src/Processors/Formats/Impl/JSONColumnsBlockOutputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONColumnsBlockOutputFormat.cpp @@ -34,7 +34,6 @@ void registerOutputFormatJSONColumns(FormatFactory & factory) factory.registerOutputFormat("JSONColumns", []( WriteBuffer & buf, const Block & sample, - const RowOutputFormatParams &, const FormatSettings & format_settings) { return std::make_shared(buf, sample, format_settings, format_settings.json.validate_utf8); diff --git a/src/Processors/Formats/Impl/JSONColumnsBlockOutputFormatBase.cpp b/src/Processors/Formats/Impl/JSONColumnsBlockOutputFormatBase.cpp index 66362d9eed4..87a87548a91 100644 --- a/src/Processors/Formats/Impl/JSONColumnsBlockOutputFormatBase.cpp +++ b/src/Processors/Formats/Impl/JSONColumnsBlockOutputFormatBase.cpp @@ -28,7 +28,6 @@ void JSONColumnsBlockOutputFormatBase::consume(Chunk chunk) void JSONColumnsBlockOutputFormatBase::writeSuffix() { - writeChunk(mono_chunk); mono_chunk.clear(); } diff --git a/src/Processors/Formats/Impl/JSONColumnsWithMetadataBlockOutputFormat.cpp b/src/Processors/Formats/Impl/JSONColumnsWithMetadataBlockOutputFormat.cpp index f8b864ca65f..d23c16c1437 100644 --- a/src/Processors/Formats/Impl/JSONColumnsWithMetadataBlockOutputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONColumnsWithMetadataBlockOutputFormat.cpp @@ -77,10 +77,6 @@ void JSONColumnsWithMetadataBlockOutputFormat::consumeTotals(Chunk chunk) void JSONColumnsWithMetadataBlockOutputFormat::finalizeImpl() { - auto outside_statistics = getOutsideStatistics(); - if (outside_statistics) - statistics = std::move(*outside_statistics); - JSONUtils::writeAdditionalInfo( rows, statistics.rows_before_limit, @@ -95,12 +91,18 @@ void JSONColumnsWithMetadataBlockOutputFormat::finalizeImpl() ostr->next(); } +void JSONColumnsWithMetadataBlockOutputFormat::resetFormatterImpl() +{ + JSONColumnsBlockOutputFormat::resetFormatterImpl(); + rows = 0; + statistics = Statistics(); +} + void registerOutputFormatJSONColumnsWithMetadata(FormatFactory & factory) { factory.registerOutputFormat("JSONColumnsWithMetadata", []( WriteBuffer & buf, const Block & sample, - const RowOutputFormatParams &, const FormatSettings & format_settings) { return std::make_shared(buf, sample, format_settings); diff --git a/src/Processors/Formats/Impl/JSONColumnsWithMetadataBlockOutputFormat.h b/src/Processors/Formats/Impl/JSONColumnsWithMetadataBlockOutputFormat.h index 0e481ada804..c72b4d87234 100644 --- a/src/Processors/Formats/Impl/JSONColumnsWithMetadataBlockOutputFormat.h +++ b/src/Processors/Formats/Impl/JSONColumnsWithMetadataBlockOutputFormat.h @@ -53,6 +53,7 @@ protected: void writePrefix() override; void writeSuffix() override; void finalizeImpl() override; + void resetFormatterImpl() override; void writeChunkStart() override; void writeChunkEnd() override; @@ -60,7 +61,6 @@ protected: void writeExtremesElement(const char * title, const Columns & columns, size_t row_num); DataTypes types; - Statistics statistics; size_t rows; }; diff --git a/src/Processors/Formats/Impl/JSONCompactColumnsBlockOutputFormat.cpp b/src/Processors/Formats/Impl/JSONCompactColumnsBlockOutputFormat.cpp index 2ec9549fa96..bb746798f1e 100644 --- a/src/Processors/Formats/Impl/JSONCompactColumnsBlockOutputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONCompactColumnsBlockOutputFormat.cpp @@ -32,7 +32,6 @@ void registerOutputFormatJSONCompactColumns(FormatFactory & factory) factory.registerOutputFormat("JSONCompactColumns", []( WriteBuffer & buf, const Block & sample, - const RowOutputFormatParams &, const FormatSettings & format_settings) { return std::make_shared(buf, sample, format_settings); diff --git a/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.cpp b/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.cpp index 49564bde429..e9c28099c5f 100644 --- a/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.cpp @@ -1,7 +1,6 @@ #include #include -#include #include #include #include diff --git a/src/Processors/Formats/Impl/JSONCompactEachRowRowOutputFormat.cpp b/src/Processors/Formats/Impl/JSONCompactEachRowRowOutputFormat.cpp index 6427ec00aef..0cafc053467 100644 --- a/src/Processors/Formats/Impl/JSONCompactEachRowRowOutputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONCompactEachRowRowOutputFormat.cpp @@ -11,12 +11,11 @@ namespace DB JSONCompactEachRowRowOutputFormat::JSONCompactEachRowRowOutputFormat(WriteBuffer & out_, const Block & header_, - const RowOutputFormatParams & params_, const FormatSettings & settings_, bool with_names_, bool with_types_, bool yield_strings_) - : RowOutputFormatWithUTF8ValidationAdaptor(settings_.json.validate_utf8, header_, out_, params_) + : RowOutputFormatWithUTF8ValidationAdaptor(settings_.json.validate_utf8, header_, out_) , settings(settings_) , with_names(with_names_) , with_types(with_types_) @@ -80,7 +79,7 @@ void JSONCompactEachRowRowOutputFormat::writeLine(const std::vector & va writeChar('\"', *ostr); writeString(values[i], *ostr); writeChar('\"', *ostr); - if (i != values.size() - 1) + if (i + 1 != values.size()) writeFieldDelimiter(); } writeRowEndDelimiter(); @@ -112,10 +111,9 @@ void registerOutputFormatJSONCompactEachRow(FormatFactory & factory) factory.registerOutputFormat(format_name, [yield_strings, with_names, with_types]( WriteBuffer & buf, const Block & sample, - const RowOutputFormatParams & params, const FormatSettings & format_settings) { - return std::make_shared(buf, sample, params, format_settings, with_names, with_types, yield_strings); + return std::make_shared(buf, sample, format_settings, with_names, with_types, yield_strings); }); factory.markOutputFormatSupportsParallelFormatting(format_name); diff --git a/src/Processors/Formats/Impl/JSONCompactEachRowRowOutputFormat.h b/src/Processors/Formats/Impl/JSONCompactEachRowRowOutputFormat.h index 6dc9d59b3a2..2be39669dd2 100644 --- a/src/Processors/Formats/Impl/JSONCompactEachRowRowOutputFormat.h +++ b/src/Processors/Formats/Impl/JSONCompactEachRowRowOutputFormat.h @@ -17,7 +17,6 @@ public: JSONCompactEachRowRowOutputFormat( WriteBuffer & out_, const Block & header_, - const RowOutputFormatParams & params_, const FormatSettings & settings_, bool with_names_, bool with_types_, diff --git a/src/Processors/Formats/Impl/JSONCompactRowOutputFormat.cpp b/src/Processors/Formats/Impl/JSONCompactRowOutputFormat.cpp index 47b79b71ae2..53ab69f797a 100644 --- a/src/Processors/Formats/Impl/JSONCompactRowOutputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONCompactRowOutputFormat.cpp @@ -11,10 +11,9 @@ namespace DB JSONCompactRowOutputFormat::JSONCompactRowOutputFormat( WriteBuffer & out_, const Block & header, - const RowOutputFormatParams & params_, const FormatSettings & settings_, bool yield_strings_) - : JSONRowOutputFormat(out_, header, params_, settings_, yield_strings_) + : JSONRowOutputFormat(out_, header, settings_, yield_strings_) { } @@ -72,10 +71,9 @@ void registerOutputFormatJSONCompact(FormatFactory & factory) factory.registerOutputFormat("JSONCompact", []( WriteBuffer & buf, const Block & sample, - const RowOutputFormatParams & params, const FormatSettings & format_settings) { - return std::make_shared(buf, sample, params, format_settings, false); + return std::make_shared(buf, sample, format_settings, false); }); factory.markOutputFormatSupportsParallelFormatting("JSONCompact"); @@ -83,10 +81,9 @@ void registerOutputFormatJSONCompact(FormatFactory & factory) factory.registerOutputFormat("JSONCompactStrings", []( WriteBuffer & buf, const Block & sample, - const RowOutputFormatParams & params, const FormatSettings & format_settings) { - return std::make_shared(buf, sample, params, format_settings, true); + return std::make_shared(buf, sample, format_settings, true); }); factory.markOutputFormatSupportsParallelFormatting("JSONCompactStrings"); diff --git a/src/Processors/Formats/Impl/JSONCompactRowOutputFormat.h b/src/Processors/Formats/Impl/JSONCompactRowOutputFormat.h index 38123833f10..d10075f8834 100644 --- a/src/Processors/Formats/Impl/JSONCompactRowOutputFormat.h +++ b/src/Processors/Formats/Impl/JSONCompactRowOutputFormat.h @@ -19,7 +19,6 @@ public: JSONCompactRowOutputFormat( WriteBuffer & out_, const Block & header, - const RowOutputFormatParams & params_, const FormatSettings & settings_, bool yield_strings_); diff --git a/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp b/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp index c9502659267..44cbf8ca215 100644 --- a/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp @@ -278,6 +278,7 @@ void JSONEachRowRowInputFormat::resetParser() read_columns.clear(); seen_columns.clear(); prev_positions.clear(); + allow_new_rows = true; } void JSONEachRowRowInputFormat::readPrefix() diff --git a/src/Processors/Formats/Impl/JSONEachRowRowOutputFormat.cpp b/src/Processors/Formats/Impl/JSONEachRowRowOutputFormat.cpp index 0fb4f929ca3..2e2209bb6e3 100644 --- a/src/Processors/Formats/Impl/JSONEachRowRowOutputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONEachRowRowOutputFormat.cpp @@ -12,9 +12,8 @@ namespace DB JSONEachRowRowOutputFormat::JSONEachRowRowOutputFormat( WriteBuffer & out_, const Block & header_, - const RowOutputFormatParams & params_, const FormatSettings & settings_) - : RowOutputFormatWithUTF8ValidationAdaptor(settings_.json.validate_utf8, header_, out_, params_), + : RowOutputFormatWithUTF8ValidationAdaptor(settings_.json.validate_utf8, header_, out_), settings(settings_) { fields = JSONUtils::makeNamesValidJSONStrings(getPort(PortKind::Main).getHeader().getNames(), settings, settings.json.validate_utf8); @@ -42,49 +41,18 @@ void JSONEachRowRowOutputFormat::writeRowStartDelimiter() void JSONEachRowRowOutputFormat::writeRowEndDelimiter() { - // Why do we need this weird `if`? - // - // The reason is the formatRow function that is broken with respect to - // row-between delimiters. It should not write them, but it does, and then - // hacks around it by having a special formatRowNoNewline version, which, as - // you guessed, removes the newline from the end of row. But the row-between - // delimiter goes into a second row, so it turns out to be in the beginning - // of the line, and the removal doesn't work. There is also a second bug -- - // the row-between delimiter in this format is written incorrectly. In fact, - // it is not written at all, and the newline is written in a row-end - // delimiter ("}\n" instead of the correct "}"). With these two bugs - // combined, the test 01420_format_row works perfectly. - // - // A proper implementation of formatRow would use IRowOutputFormat directly, - // and not write row-between delimiters, instead of using IOutputFormat - // processor and its crutch row callback. This would require exposing - // IRowOutputFormat, which we don't do now, but which can be generally useful - // for other cases such as parallel formatting, that also require a control - // flow different from the usual IOutputFormat. - // - // I just don't have time or energy to redo all of this, but I need to - // support JSON array output here, which requires proper ",\n" row-between - // delimiters. For compatibility, I preserve the bug in case of non-array - // output. if (settings.json.array_of_rows) - { writeChar('}', *ostr); - } else - { writeCString("}\n", *ostr); - } field_number = 0; } void JSONEachRowRowOutputFormat::writeRowBetweenDelimiter() { - // We preserve an existing bug here for compatibility. See the comment above. if (settings.json.array_of_rows) - { writeCString(",\n", *ostr); - } } @@ -100,9 +68,7 @@ void JSONEachRowRowOutputFormat::writePrefix() void JSONEachRowRowOutputFormat::writeSuffix() { if (settings.json.array_of_rows) - { writeCString("\n]\n", *ostr); - } } @@ -113,13 +79,11 @@ void registerOutputFormatJSONEachRow(FormatFactory & factory) factory.registerOutputFormat(format, [serialize_as_strings]( WriteBuffer & buf, const Block & sample, - const RowOutputFormatParams & params, const FormatSettings & _format_settings) { FormatSettings settings = _format_settings; settings.json.serialize_as_strings = serialize_as_strings; - return std::make_shared(buf, sample, params, - settings); + return std::make_shared(buf, sample, settings); }); factory.markOutputFormatSupportsParallelFormatting(format); }; diff --git a/src/Processors/Formats/Impl/JSONEachRowRowOutputFormat.h b/src/Processors/Formats/Impl/JSONEachRowRowOutputFormat.h index 2a216275c24..62b8188cb4d 100644 --- a/src/Processors/Formats/Impl/JSONEachRowRowOutputFormat.h +++ b/src/Processors/Formats/Impl/JSONEachRowRowOutputFormat.h @@ -17,7 +17,6 @@ public: JSONEachRowRowOutputFormat( WriteBuffer & out_, const Block & header_, - const RowOutputFormatParams & params_, const FormatSettings & settings_); String getName() const override { return "JSONEachRowRowOutputFormat"; } diff --git a/src/Processors/Formats/Impl/JSONEachRowWithProgressRowOutputFormat.cpp b/src/Processors/Formats/Impl/JSONEachRowWithProgressRowOutputFormat.cpp index 4e28a1d5fbc..449ccb62562 100644 --- a/src/Processors/Formats/Impl/JSONEachRowWithProgressRowOutputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONEachRowWithProgressRowOutputFormat.cpp @@ -62,25 +62,21 @@ void registerOutputFormatJSONEachRowWithProgress(FormatFactory & factory) factory.registerOutputFormat("JSONEachRowWithProgress", []( WriteBuffer & buf, const Block & sample, - const RowOutputFormatParams & params, const FormatSettings & _format_settings) { FormatSettings settings = _format_settings; settings.json.serialize_as_strings = false; - return std::make_shared(buf, - sample, params, settings); + return std::make_shared(buf, sample, settings); }); factory.registerOutputFormat("JSONStringsEachRowWithProgress", []( WriteBuffer & buf, const Block & sample, - const RowOutputFormatParams & params, const FormatSettings & _format_settings) { FormatSettings settings = _format_settings; settings.json.serialize_as_strings = true; - return std::make_shared(buf, - sample, params, settings); + return std::make_shared(buf, sample, settings); }); } diff --git a/src/Processors/Formats/Impl/JSONObjectEachRowRowOutputFormat.cpp b/src/Processors/Formats/Impl/JSONObjectEachRowRowOutputFormat.cpp index 6155efd4b63..a02199d6075 100644 --- a/src/Processors/Formats/Impl/JSONObjectEachRowRowOutputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONObjectEachRowRowOutputFormat.cpp @@ -6,8 +6,8 @@ namespace DB { -JSONObjectEachRowRowOutputFormat::JSONObjectEachRowRowOutputFormat(WriteBuffer & out_, const Block & header_, const RowOutputFormatParams & params_, const FormatSettings & settings_) - : JSONEachRowRowOutputFormat(out_, header_, params_, settings_), field_index_for_object_name(getColumnIndexForJSONObjectEachRowObjectName(header_, settings_)) +JSONObjectEachRowRowOutputFormat::JSONObjectEachRowRowOutputFormat(WriteBuffer & out_, const Block & header_, const FormatSettings & settings_) + : JSONEachRowRowOutputFormat(out_, header_, settings_), field_index_for_object_name(getColumnIndexForJSONObjectEachRowObjectName(header_, settings_)) { } @@ -71,12 +71,11 @@ void registerOutputFormatJSONObjectEachRow(FormatFactory & factory) factory.registerOutputFormat("JSONObjectEachRow", []( WriteBuffer & buf, const Block & sample, - const RowOutputFormatParams & params, const FormatSettings & _format_settings) { FormatSettings settings = _format_settings; settings.json.serialize_as_strings = false; - return std::make_shared(buf, sample, params, settings); + return std::make_shared(buf, sample, settings); }); factory.markOutputFormatSupportsParallelFormatting("JSONObjectEachRow"); factory.markFormatHasNoAppendSupport("JSONObjectEachRow"); diff --git a/src/Processors/Formats/Impl/JSONObjectEachRowRowOutputFormat.h b/src/Processors/Formats/Impl/JSONObjectEachRowRowOutputFormat.h index 19d9fe1aa53..1981931e91b 100644 --- a/src/Processors/Formats/Impl/JSONObjectEachRowRowOutputFormat.h +++ b/src/Processors/Formats/Impl/JSONObjectEachRowRowOutputFormat.h @@ -23,7 +23,6 @@ public: JSONObjectEachRowRowOutputFormat( WriteBuffer & out_, const Block & header_, - const RowOutputFormatParams & params_, const FormatSettings & settings_); String getName() const override { return "JSONObjectEachRowRowOutputFormat"; } diff --git a/src/Processors/Formats/Impl/JSONRowOutputFormat.cpp b/src/Processors/Formats/Impl/JSONRowOutputFormat.cpp index 98120abe8d8..0193ec7e3d3 100644 --- a/src/Processors/Formats/Impl/JSONRowOutputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONRowOutputFormat.cpp @@ -11,10 +11,9 @@ namespace DB JSONRowOutputFormat::JSONRowOutputFormat( WriteBuffer & out_, const Block & header, - const RowOutputFormatParams & params_, const FormatSettings & settings_, bool yield_strings_) - : RowOutputFormatWithUTF8ValidationAdaptor(true, header, out_, params_), settings(settings_), yield_strings(yield_strings_) + : RowOutputFormatWithUTF8ValidationAdaptor(true, header, out_), settings(settings_), yield_strings(yield_strings_) { names = JSONUtils::makeNamesValidJSONStrings(header.getNames(), settings, true); } @@ -112,10 +111,6 @@ void JSONRowOutputFormat::writeAfterExtremes() void JSONRowOutputFormat::finalizeImpl() { - auto outside_statistics = getOutsideStatistics(); - if (outside_statistics) - statistics = std::move(*outside_statistics); - JSONUtils::writeAdditionalInfo( row_count, statistics.rows_before_limit, @@ -130,6 +125,13 @@ void JSONRowOutputFormat::finalizeImpl() ostr->next(); } +void JSONRowOutputFormat::resetFormatterImpl() +{ + RowOutputFormatWithUTF8ValidationAdaptor::resetFormatterImpl(); + row_count = 0; + statistics = Statistics(); +} + void JSONRowOutputFormat::onProgress(const Progress & value) { @@ -142,10 +144,9 @@ void registerOutputFormatJSON(FormatFactory & factory) factory.registerOutputFormat("JSON", []( WriteBuffer & buf, const Block & sample, - const RowOutputFormatParams & params, const FormatSettings & format_settings) { - return std::make_shared(buf, sample, params, format_settings, false); + return std::make_shared(buf, sample, format_settings, false); }); factory.markOutputFormatSupportsParallelFormatting("JSON"); @@ -154,10 +155,9 @@ void registerOutputFormatJSON(FormatFactory & factory) factory.registerOutputFormat("JSONStrings", []( WriteBuffer & buf, const Block & sample, - const RowOutputFormatParams & params, const FormatSettings & format_settings) { - return std::make_shared(buf, sample, params, format_settings, true); + return std::make_shared(buf, sample, format_settings, true); }); factory.markOutputFormatSupportsParallelFormatting("JSONStrings"); diff --git a/src/Processors/Formats/Impl/JSONRowOutputFormat.h b/src/Processors/Formats/Impl/JSONRowOutputFormat.h index 9147aaa5387..dc3f0541af0 100644 --- a/src/Processors/Formats/Impl/JSONRowOutputFormat.h +++ b/src/Processors/Formats/Impl/JSONRowOutputFormat.h @@ -19,7 +19,6 @@ public: JSONRowOutputFormat( WriteBuffer & out_, const Block & header, - const RowOutputFormatParams & params_, const FormatSettings & settings_, bool yield_strings_); @@ -57,6 +56,7 @@ protected: void writeAfterExtremes() override; void finalizeImpl() override; + void resetFormatterImpl() override; virtual void writeExtremesElement(const char * title, const Columns & columns, size_t row_num); @@ -66,7 +66,6 @@ protected: size_t row_count = 0; Names names; /// The column names are pre-escaped to be put into JSON string literal. - Statistics statistics; FormatSettings settings; bool yield_strings; diff --git a/src/Processors/Formats/Impl/MarkdownRowOutputFormat.cpp b/src/Processors/Formats/Impl/MarkdownRowOutputFormat.cpp index cf732d8c0f3..ea414171ed6 100644 --- a/src/Processors/Formats/Impl/MarkdownRowOutputFormat.cpp +++ b/src/Processors/Formats/Impl/MarkdownRowOutputFormat.cpp @@ -5,8 +5,8 @@ namespace DB { -MarkdownRowOutputFormat::MarkdownRowOutputFormat(WriteBuffer & out_, const Block & header_, const RowOutputFormatParams & params_, const FormatSettings & format_settings_) - : IRowOutputFormat(header_, out_, params_), format_settings(format_settings_) {} +MarkdownRowOutputFormat::MarkdownRowOutputFormat(WriteBuffer & out_, const Block & header_, const FormatSettings & format_settings_) + : IRowOutputFormat(header_, out_), format_settings(format_settings_) {} void MarkdownRowOutputFormat::writePrefix() { @@ -60,10 +60,9 @@ void registerOutputFormatMarkdown(FormatFactory & factory) factory.registerOutputFormat("Markdown", []( WriteBuffer & buf, const Block & sample, - const RowOutputFormatParams & params, const FormatSettings & settings) { - return std::make_shared(buf, sample, params, settings); + return std::make_shared(buf, sample, settings); }); factory.markOutputFormatSupportsParallelFormatting("Markdown"); diff --git a/src/Processors/Formats/Impl/MarkdownRowOutputFormat.h b/src/Processors/Formats/Impl/MarkdownRowOutputFormat.h index c6e15282780..8a4ae1f3b96 100644 --- a/src/Processors/Formats/Impl/MarkdownRowOutputFormat.h +++ b/src/Processors/Formats/Impl/MarkdownRowOutputFormat.h @@ -12,7 +12,7 @@ class ReadBuffer; class MarkdownRowOutputFormat final : public IRowOutputFormat { public: - MarkdownRowOutputFormat(WriteBuffer & out_, const Block & header_, const RowOutputFormatParams & params_, const FormatSettings & format_settings_); + MarkdownRowOutputFormat(WriteBuffer & out_, const Block & header_, const FormatSettings & format_settings_); String getName() const override { return "MarkdownRowOutputFormat"; } @@ -28,8 +28,8 @@ private: /// Write '|' between values void writeFieldDelimiter() override; - /// Write '|\n' after each row - void writeRowEndDelimiter() override ; + /// Write '|\n' at the end of each row + void writeRowEndDelimiter() override; void writeField(const IColumn & column, const ISerialization & serialization, size_t row_num) override; diff --git a/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp b/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp index 3a76a5a3fc6..0e8421566ab 100644 --- a/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp @@ -429,8 +429,7 @@ bool MsgPackRowInputFormat::readRow(MutableColumns & columns, RowReadExtension & void MsgPackRowInputFormat::setReadBuffer(ReadBuffer & in_) { - buf = std::make_unique(in_); - IInputFormat::setReadBuffer(in_); + buf->setSubBuffer(in_); } MsgPackSchemaReader::MsgPackSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings_) diff --git a/src/Processors/Formats/Impl/MsgPackRowOutputFormat.cpp b/src/Processors/Formats/Impl/MsgPackRowOutputFormat.cpp index da683913d4d..c9b41ee10bb 100644 --- a/src/Processors/Formats/Impl/MsgPackRowOutputFormat.cpp +++ b/src/Processors/Formats/Impl/MsgPackRowOutputFormat.cpp @@ -32,8 +32,8 @@ namespace ErrorCodes extern const int ILLEGAL_COLUMN; } -MsgPackRowOutputFormat::MsgPackRowOutputFormat(WriteBuffer & out_, const Block & header_, const RowOutputFormatParams & params_, const FormatSettings & format_settings_) - : IRowOutputFormat(header_, out_, params_), packer(out_), format_settings(format_settings_) {} +MsgPackRowOutputFormat::MsgPackRowOutputFormat(WriteBuffer & out_, const Block & header_, const FormatSettings & format_settings_) + : IRowOutputFormat(header_, out_), packer(out_), format_settings(format_settings_) {} void MsgPackRowOutputFormat::serializeField(const IColumn & column, DataTypePtr data_type, size_t row_num) { @@ -226,10 +226,9 @@ void registerOutputFormatMsgPack(FormatFactory & factory) factory.registerOutputFormat("MsgPack", []( WriteBuffer & buf, const Block & sample, - const RowOutputFormatParams & params, const FormatSettings & settings) { - return std::make_shared(buf, sample, params, settings); + return std::make_shared(buf, sample, settings); }); factory.markOutputFormatSupportsParallelFormatting("MsgPack"); } diff --git a/src/Processors/Formats/Impl/MsgPackRowOutputFormat.h b/src/Processors/Formats/Impl/MsgPackRowOutputFormat.h index 81943b5f73c..61225cd2864 100644 --- a/src/Processors/Formats/Impl/MsgPackRowOutputFormat.h +++ b/src/Processors/Formats/Impl/MsgPackRowOutputFormat.h @@ -17,7 +17,7 @@ namespace DB class MsgPackRowOutputFormat final : public IRowOutputFormat { public: - MsgPackRowOutputFormat(WriteBuffer & out_, const Block & header_, const RowOutputFormatParams & params_, const FormatSettings & format_settings_); + MsgPackRowOutputFormat(WriteBuffer & out_, const Block & header_, const FormatSettings & format_settings_); String getName() const override { return "MsgPackRowOutputFormat"; } diff --git a/src/Processors/Formats/Impl/MySQLOutputFormat.cpp b/src/Processors/Formats/Impl/MySQLOutputFormat.cpp index 75a03cb6d0e..f2157f63c25 100644 --- a/src/Processors/Formats/Impl/MySQLOutputFormat.cpp +++ b/src/Processors/Formats/Impl/MySQLOutputFormat.cpp @@ -79,11 +79,14 @@ void MySQLOutputFormat::finalizeImpl() CurrentThread::finalizePerformanceCounters(); QueryStatusInfo info = process_list_elem->getInfo(); affected_rows = info.written_rows; + double elapsed_seconds = static_cast(info.elapsed_microseconds) / 1000000.0; human_readable_info = fmt::format( "Read {} rows, {} in {} sec., {} rows/sec., {}/sec.", - info.read_rows, ReadableSize(info.read_bytes), info.elapsed_seconds, - static_cast(info.read_rows / info.elapsed_seconds), - ReadableSize(info.read_bytes / info.elapsed_seconds)); + info.read_rows, + ReadableSize(info.read_bytes), + elapsed_seconds, + static_cast(info.read_rows / elapsed_seconds), + ReadableSize(info.read_bytes / elapsed_seconds)); } const auto & header = getPort(PortKind::Main).getHeader(); @@ -106,7 +109,6 @@ void registerOutputFormatMySQLWire(FormatFactory & factory) "MySQLWire", [](WriteBuffer & buf, const Block & sample, - const RowOutputFormatParams &, const FormatSettings & settings) { return std::make_shared(buf, sample, settings); }); } diff --git a/src/Processors/Formats/Impl/NativeFormat.cpp b/src/Processors/Formats/Impl/NativeFormat.cpp index a8e2ddf95e4..959b86ec051 100644 --- a/src/Processors/Formats/Impl/NativeFormat.cpp +++ b/src/Processors/Formats/Impl/NativeFormat.cpp @@ -115,7 +115,6 @@ void registerOutputFormatNative(FormatFactory & factory) factory.registerOutputFormat("Native", []( WriteBuffer & buf, const Block & sample, - const RowOutputFormatParams &, const FormatSettings &) { return std::make_shared(buf, sample); diff --git a/src/Processors/Formats/Impl/NullFormat.cpp b/src/Processors/Formats/Impl/NullFormat.cpp index 6c457c71d14..59514be9abc 100644 --- a/src/Processors/Formats/Impl/NullFormat.cpp +++ b/src/Processors/Formats/Impl/NullFormat.cpp @@ -13,7 +13,6 @@ void registerOutputFormatNull(FormatFactory & factory) factory.registerOutputFormat("Null", []( WriteBuffer &, const Block & sample, - const RowOutputFormatParams &, const FormatSettings &) { return std::make_shared(sample); diff --git a/src/Processors/Formats/Impl/ODBCDriver2BlockOutputFormat.cpp b/src/Processors/Formats/Impl/ODBCDriver2BlockOutputFormat.cpp index 7cf133e5739..43294355f2f 100644 --- a/src/Processors/Formats/Impl/ODBCDriver2BlockOutputFormat.cpp +++ b/src/Processors/Formats/Impl/ODBCDriver2BlockOutputFormat.cpp @@ -100,7 +100,7 @@ void ODBCDriver2BlockOutputFormat::writePrefix() void registerOutputFormatODBCDriver2(FormatFactory & factory) { factory.registerOutputFormat( - "ODBCDriver2", [](WriteBuffer & buf, const Block & sample, const RowOutputFormatParams &, const FormatSettings & format_settings) + "ODBCDriver2", [](WriteBuffer & buf, const Block & sample, const FormatSettings & format_settings) { return std::make_shared(buf, sample, format_settings); }); diff --git a/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp b/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp index b0ce87f45da..19a0b2eb23c 100644 --- a/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp @@ -54,14 +54,19 @@ Chunk ORCBlockInputFormat::generate() throw ParsingException( ErrorCodes::CANNOT_READ_ALL_DATA, "Error while reading batch of ORC data: {}", table_result.status().ToString()); + /// We should extract the number of rows directly from the stripe, because in case when + /// record batch contains 0 columns (for example if we requested only columns that + /// are not presented in data) the number of rows in record batch will be 0. + size_t num_rows = file_reader->GetRawORCReader()->getStripe(stripe_current)->getNumberOfRows(); + auto table = table_result.ValueOrDie(); - if (!table || !table->num_rows()) + if (!table || !num_rows) return {}; ++stripe_current; Chunk res; - arrow_column_to_ch_column->arrowTableToCHChunk(res, table); + arrow_column_to_ch_column->arrowTableToCHChunk(res, table, num_rows); /// If defaults_for_omitted_fields is true, calculate the default values from default expression for omitted fields. /// Otherwise fill the missing columns with zero values of its type. if (format_settings.defaults_for_omitted_fields) @@ -184,8 +189,9 @@ NamesAndTypesList ORCSchemaReader::readSchema() getFileReaderAndSchema(in, file_reader, schema, format_settings, is_stopped); auto header = ArrowColumnToCHColumn::arrowSchemaToCHHeader( *schema, "ORC", format_settings.orc.skip_columns_with_unsupported_types_in_schema_inference); - return getNamesAndRecursivelyNullableTypes(header); -} + if (format_settings.schema_inference_make_columns_nullable) + return getNamesAndRecursivelyNullableTypes(header); + return header.getNamesAndTypesList();} void registerInputFormatORC(FormatFactory & factory) { @@ -211,6 +217,11 @@ void registerORCSchemaReader(FormatFactory & factory) return std::make_shared(buf, settings); } ); + + factory.registerAdditionalInfoForSchemaCacheGetter("ORC", [](const FormatSettings & settings) + { + return fmt::format("schema_inference_make_columns_nullable={}", settings.schema_inference_make_columns_nullable); + }); } } diff --git a/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp b/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp index 4279a998a61..e1af4436789 100644 --- a/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp +++ b/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp @@ -515,6 +515,11 @@ void ORCBlockOutputFormat::finalizeImpl() writer->close(); } +void ORCBlockOutputFormat::resetFormatterImpl() +{ + writer.reset(); +} + void ORCBlockOutputFormat::prepareWriter() { const Block & header = getPort(PortKind::Main).getHeader(); @@ -531,7 +536,6 @@ void registerOutputFormatORC(FormatFactory & factory) factory.registerOutputFormat("ORC", []( WriteBuffer & buf, const Block & sample, - const RowOutputFormatParams &, const FormatSettings & format_settings) { return std::make_shared(buf, sample, format_settings); diff --git a/src/Processors/Formats/Impl/ORCBlockOutputFormat.h b/src/Processors/Formats/Impl/ORCBlockOutputFormat.h index 322778299ae..28837193d1a 100644 --- a/src/Processors/Formats/Impl/ORCBlockOutputFormat.h +++ b/src/Processors/Formats/Impl/ORCBlockOutputFormat.h @@ -44,6 +44,7 @@ public: private: void consume(Chunk chunk) override; void finalizeImpl() override; + void resetFormatterImpl() override; std::unique_ptr getORCType(const DataTypePtr & type); diff --git a/src/Processors/Formats/Impl/ParallelFormattingOutputFormat.cpp b/src/Processors/Formats/Impl/ParallelFormattingOutputFormat.cpp index 40ab6554115..db4bb422cb1 100644 --- a/src/Processors/Formats/Impl/ParallelFormattingOutputFormat.cpp +++ b/src/Processors/Formats/Impl/ParallelFormattingOutputFormat.cpp @@ -216,7 +216,7 @@ namespace DB } case ProcessingUnitType::FINALIZE: { - formatter->setOutsideStatistics(std::move(unit.statistics)); + formatter->statistics = std::move(unit.statistics); formatter->finalizeImpl(); break; } diff --git a/src/Processors/Formats/Impl/ParallelFormattingOutputFormat.h b/src/Processors/Formats/Impl/ParallelFormattingOutputFormat.h index fb58f5765c1..5faa6bbb483 100644 --- a/src/Processors/Formats/Impl/ParallelFormattingOutputFormat.h +++ b/src/Processors/Formats/Impl/ParallelFormattingOutputFormat.h @@ -20,6 +20,11 @@ namespace DB { +namespace ErrorCodes +{ + extern const int NOT_IMPLEMENTED; +} + /** * ORDER-PRESERVING parallel formatting of data formats. * The idea is similar to ParallelParsingInputFormat. @@ -167,6 +172,12 @@ private: void finalizeImpl() override; + void resetFormatterImpl() override + { + /// Resetting parallel formatting is not obvious and it's not used anywhere + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method resetFormatterImpl is not implemented for parallel formatting"); + } + InternalFormatterCreator internal_formatter_creator; /// Status to synchronize multiple threads. @@ -227,7 +238,6 @@ private: size_t rows_consumed = 0; std::atomic_bool are_totals_written = false; - Statistics statistics; /// We change statistics in onProgress() which can be called from different threads. std::mutex statistics_mutex; bool save_totals_and_extremes_in_statistics; diff --git a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp index 85eed00a95f..ad1d1ba85b9 100644 --- a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp @@ -70,7 +70,7 @@ Chunk ParquetBlockInputFormat::generate() ++row_group_current; - arrow_column_to_ch_column->arrowTableToCHChunk(res, table); + arrow_column_to_ch_column->arrowTableToCHChunk(res, table, table->num_rows()); /// If defaults_for_omitted_fields is true, calculate the default values from default expression for omitted fields. /// Otherwise fill the missing columns with zero values of its type. @@ -187,7 +187,9 @@ NamesAndTypesList ParquetSchemaReader::readSchema() getFileReaderAndSchema(in, file_reader, schema, format_settings, is_stopped); auto header = ArrowColumnToCHColumn::arrowSchemaToCHHeader( *schema, "Parquet", format_settings.parquet.skip_columns_with_unsupported_types_in_schema_inference); - return getNamesAndRecursivelyNullableTypes(header); + if (format_settings.schema_inference_make_columns_nullable) + return getNamesAndRecursivelyNullableTypes(header); + return header.getNamesAndTypesList(); } void registerInputFormatParquet(FormatFactory & factory) @@ -214,6 +216,11 @@ void registerParquetSchemaReader(FormatFactory & factory) return std::make_shared(buf, settings); } ); + + factory.registerAdditionalInfoForSchemaCacheGetter("Parquet", [](const FormatSettings & settings) + { + return fmt::format("schema_inference_make_columns_nullable={}", settings.schema_inference_make_columns_nullable); + }); } } diff --git a/src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp b/src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp index c8e94311af5..e99b308b87b 100644 --- a/src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp +++ b/src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp @@ -74,13 +74,17 @@ void ParquetBlockOutputFormat::finalizeImpl() throw Exception{"Error while closing a table: " + status.ToString(), ErrorCodes::UNKNOWN_EXCEPTION}; } +void ParquetBlockOutputFormat::resetFormatterImpl() +{ + file_writer.reset(); +} + void registerOutputFormatParquet(FormatFactory & factory) { factory.registerOutputFormat( "Parquet", [](WriteBuffer & buf, const Block & sample, - const RowOutputFormatParams &, const FormatSettings & format_settings) { return std::make_shared(buf, sample, format_settings); diff --git a/src/Processors/Formats/Impl/ParquetBlockOutputFormat.h b/src/Processors/Formats/Impl/ParquetBlockOutputFormat.h index c0421a4d99f..0518d9df77c 100644 --- a/src/Processors/Formats/Impl/ParquetBlockOutputFormat.h +++ b/src/Processors/Formats/Impl/ParquetBlockOutputFormat.h @@ -36,6 +36,7 @@ public: private: void consume(Chunk) override; void finalizeImpl() override; + void resetFormatterImpl() override; const FormatSettings format_settings; diff --git a/src/Processors/Formats/Impl/PostgreSQLOutputFormat.cpp b/src/Processors/Formats/Impl/PostgreSQLOutputFormat.cpp index 0450051daf8..3fa126494d2 100644 --- a/src/Processors/Formats/Impl/PostgreSQLOutputFormat.cpp +++ b/src/Processors/Formats/Impl/PostgreSQLOutputFormat.cpp @@ -66,7 +66,6 @@ void registerOutputFormatPostgreSQLWire(FormatFactory & factory) "PostgreSQLWire", [](WriteBuffer & buf, const Block & sample, - const RowOutputFormatParams &, const FormatSettings & settings) { return std::make_shared(buf, sample, settings); }); } } diff --git a/src/Processors/Formats/Impl/PrettyBlockOutputFormat.cpp b/src/Processors/Formats/Impl/PrettyBlockOutputFormat.cpp index faf4681340f..cefe3ee4a98 100644 --- a/src/Processors/Formats/Impl/PrettyBlockOutputFormat.cpp +++ b/src/Processors/Formats/Impl/PrettyBlockOutputFormat.cpp @@ -14,11 +14,6 @@ namespace DB { -namespace ErrorCodes -{ -} - - PrettyBlockOutputFormat::PrettyBlockOutputFormat( WriteBuffer & out_, const Block & header_, const FormatSettings & format_settings_, bool mono_block_) : IOutputFormat(header_, out_), format_settings(format_settings_), serializations(header_.getSerializations()), mono_block(mono_block_) @@ -37,8 +32,8 @@ void PrettyBlockOutputFormat::calculateWidths( { size_t num_rows = std::min(chunk.getNumRows(), format_settings.pretty.max_rows); - /// len(num_rows) + len(". ") - row_number_width = static_cast(std::floor(std::log10(num_rows))) + 3; + /// len(num_rows + total_rows) + len(". ") + row_number_width = static_cast(std::floor(std::log10(num_rows + total_rows))) + 3; size_t num_columns = chunk.getNumColumns(); const auto & columns = chunk.getColumns(); @@ -295,7 +290,7 @@ void PrettyBlockOutputFormat::writeChunk(const Chunk & chunk, PortKind port_kind if (format_settings.pretty.output_format_pretty_row_numbers) { // Write row number; - auto row_num_string = std::to_string(i + 1) + ". "; + auto row_num_string = std::to_string(i + 1 + total_rows) + ". "; for (size_t j = 0; j < row_number_width - row_num_string.size(); ++j) { writeCString(" ", out); diff --git a/src/Processors/Formats/Impl/PrettyBlockOutputFormat.h b/src/Processors/Formats/Impl/PrettyBlockOutputFormat.h index c8ab22b123f..95c72d15fa9 100644 --- a/src/Processors/Formats/Impl/PrettyBlockOutputFormat.h +++ b/src/Processors/Formats/Impl/PrettyBlockOutputFormat.h @@ -54,6 +54,11 @@ protected: const IColumn & column, const ISerialization & serialization, size_t row_num, size_t value_width, size_t pad_to_width, bool align_right); + void resetFormatterImpl() override + { + total_rows = 0; + } + private: bool mono_block; /// For mono_block == true only @@ -68,7 +73,6 @@ void registerPrettyFormatWithNoEscapesAndMonoBlock(FormatFactory & factory, cons fact.registerOutputFormat(name, [no_escapes, mono_block]( WriteBuffer & buf, const Block & sample, - const RowOutputFormatParams &, const FormatSettings & format_settings) { if (no_escapes) diff --git a/src/Processors/Formats/Impl/PrettyCompactBlockOutputFormat.cpp b/src/Processors/Formats/Impl/PrettyCompactBlockOutputFormat.cpp index cc26a064b54..2ba9ec725e2 100644 --- a/src/Processors/Formats/Impl/PrettyCompactBlockOutputFormat.cpp +++ b/src/Processors/Formats/Impl/PrettyCompactBlockOutputFormat.cpp @@ -144,7 +144,7 @@ void PrettyCompactBlockOutputFormat::writeRow( if (format_settings.pretty.output_format_pretty_row_numbers) { // Write row number; - auto row_num_string = std::to_string(row_num + 1) + ". "; + auto row_num_string = std::to_string(row_num + 1 + total_rows) + ". "; for (size_t i = 0; i < row_number_width - row_num_string.size(); ++i) { writeCString(" ", out); diff --git a/src/Processors/Formats/Impl/PrettySpaceBlockOutputFormat.cpp b/src/Processors/Formats/Impl/PrettySpaceBlockOutputFormat.cpp index 4afb380dec9..46d1872412c 100644 --- a/src/Processors/Formats/Impl/PrettySpaceBlockOutputFormat.cpp +++ b/src/Processors/Formats/Impl/PrettySpaceBlockOutputFormat.cpp @@ -73,7 +73,7 @@ void PrettySpaceBlockOutputFormat::writeChunk(const Chunk & chunk, PortKind port if (format_settings.pretty.output_format_pretty_row_numbers) { // Write row number; - auto row_num_string = std::to_string(row + 1) + ". "; + auto row_num_string = std::to_string(row + 1 + total_rows) + ". "; for (size_t i = 0; i < row_number_width - row_num_string.size(); ++i) { writeCString(" ", out); diff --git a/src/Processors/Formats/Impl/PrometheusTextOutputFormat.cpp b/src/Processors/Formats/Impl/PrometheusTextOutputFormat.cpp index b7c5ef92328..d290280bdbf 100644 --- a/src/Processors/Formats/Impl/PrometheusTextOutputFormat.cpp +++ b/src/Processors/Formats/Impl/PrometheusTextOutputFormat.cpp @@ -82,9 +82,8 @@ static Float64 tryParseFloat(const String & s) PrometheusTextOutputFormat::PrometheusTextOutputFormat( WriteBuffer & out_, const Block & header_, - const RowOutputFormatParams & params_, const FormatSettings & format_settings_) - : IRowOutputFormat(header_, out_, params_) + : IRowOutputFormat(header_, out_) , string_serialization(DataTypeString().getDefaultSerialization()) , format_settings(format_settings_) { @@ -339,10 +338,9 @@ void registerOutputFormatPrometheus(FormatFactory & factory) factory.registerOutputFormat(FORMAT_NAME, []( WriteBuffer & buf, const Block & sample, - const RowOutputFormatParams & params, const FormatSettings & settings) { - return std::make_shared(buf, sample, params, settings); + return std::make_shared(buf, sample, settings); }); } diff --git a/src/Processors/Formats/Impl/PrometheusTextOutputFormat.h b/src/Processors/Formats/Impl/PrometheusTextOutputFormat.h index 69b8d10e56b..de87237c663 100644 --- a/src/Processors/Formats/Impl/PrometheusTextOutputFormat.h +++ b/src/Processors/Formats/Impl/PrometheusTextOutputFormat.h @@ -20,7 +20,6 @@ public: PrometheusTextOutputFormat( WriteBuffer & out_, const Block & header_, - const RowOutputFormatParams & params_, const FormatSettings & format_settings_); String getName() const override { return "PrometheusTextOutputFormat"; } diff --git a/src/Processors/Formats/Impl/ProtobufListInputFormat.cpp b/src/Processors/Formats/Impl/ProtobufListInputFormat.cpp index 73e8e7992d3..9777f2361a2 100644 --- a/src/Processors/Formats/Impl/ProtobufListInputFormat.cpp +++ b/src/Processors/Formats/Impl/ProtobufListInputFormat.cpp @@ -30,6 +30,12 @@ ProtobufListInputFormat::ProtobufListInputFormat( { } +void ProtobufListInputFormat::setReadBuffer(ReadBuffer & in_) +{ + reader->setReadBuffer(in_); + IRowInputFormat::setReadBuffer(in_); +} + bool ProtobufListInputFormat::readRow(MutableColumns & columns, RowReadExtension & row_read_extension) { if (reader->eof()) diff --git a/src/Processors/Formats/Impl/ProtobufListInputFormat.h b/src/Processors/Formats/Impl/ProtobufListInputFormat.h index 7c8bfb9b443..ba2e8014878 100644 --- a/src/Processors/Formats/Impl/ProtobufListInputFormat.h +++ b/src/Processors/Formats/Impl/ProtobufListInputFormat.h @@ -33,6 +33,8 @@ public: String getName() const override { return "ProtobufListInputFormat"; } + void setReadBuffer(ReadBuffer & in_) override; + private: bool readRow(MutableColumns & columns, RowReadExtension & row_read_extension) override; diff --git a/src/Processors/Formats/Impl/ProtobufListOutputFormat.cpp b/src/Processors/Formats/Impl/ProtobufListOutputFormat.cpp index e5320b37ae7..815b9ebb61d 100644 --- a/src/Processors/Formats/Impl/ProtobufListOutputFormat.cpp +++ b/src/Processors/Formats/Impl/ProtobufListOutputFormat.cpp @@ -13,10 +13,9 @@ namespace DB ProtobufListOutputFormat::ProtobufListOutputFormat( WriteBuffer & out_, const Block & header_, - const RowOutputFormatParams & params_, const FormatSchemaInfo & schema_info_, bool defaults_for_nullable_google_wrappers_) - : IRowOutputFormat(header_, out_, params_) + : IRowOutputFormat(header_, out_) , writer(std::make_unique(out)) , serializer(ProtobufSerializer::create( header_.getNames(), @@ -42,18 +41,21 @@ void ProtobufListOutputFormat::finalizeImpl() serializer->finalizeWrite(); } +void ProtobufListOutputFormat::resetFormatterImpl() +{ + serializer->reset(); +} + void registerOutputFormatProtobufList(FormatFactory & factory) { factory.registerOutputFormat( "ProtobufList", [](WriteBuffer & buf, const Block & header, - const RowOutputFormatParams & params, const FormatSettings & settings) { return std::make_shared( - buf, header, params, - FormatSchemaInfo(settings, "Protobuf", true), + buf, header, FormatSchemaInfo(settings, "Protobuf", true), settings.protobuf.output_nullables_with_google_wrappers); }); } diff --git a/src/Processors/Formats/Impl/ProtobufListOutputFormat.h b/src/Processors/Formats/Impl/ProtobufListOutputFormat.h index 7b3513bb7ed..d85018c0351 100644 --- a/src/Processors/Formats/Impl/ProtobufListOutputFormat.h +++ b/src/Processors/Formats/Impl/ProtobufListOutputFormat.h @@ -26,7 +26,6 @@ public: ProtobufListOutputFormat( WriteBuffer & out_, const Block & header_, - const RowOutputFormatParams & params_, const FormatSchemaInfo & schema_info_, bool defaults_for_nullable_google_wrappers_); @@ -39,6 +38,7 @@ private: void writeField(const IColumn &, const ISerialization &, size_t) override {} void finalizeImpl() override; + void resetFormatterImpl() override; std::unique_ptr writer; std::unique_ptr serializer; diff --git a/src/Processors/Formats/Impl/ProtobufRowInputFormat.cpp b/src/Processors/Formats/Impl/ProtobufRowInputFormat.cpp index 3046b005fa8..40f6a2a54a7 100644 --- a/src/Processors/Formats/Impl/ProtobufRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/ProtobufRowInputFormat.cpp @@ -44,6 +44,12 @@ bool ProtobufRowInputFormat::readRow(MutableColumns & columns, RowReadExtension return true; } +void ProtobufRowInputFormat::setReadBuffer(ReadBuffer & in_) +{ + reader->setReadBuffer(in_); + IRowInputFormat::setReadBuffer(in_); +} + bool ProtobufRowInputFormat::allowSyncAfterError() const { return true; diff --git a/src/Processors/Formats/Impl/ProtobufRowInputFormat.h b/src/Processors/Formats/Impl/ProtobufRowInputFormat.h index 1747d090976..2e0ed49b768 100644 --- a/src/Processors/Formats/Impl/ProtobufRowInputFormat.h +++ b/src/Processors/Formats/Impl/ProtobufRowInputFormat.h @@ -38,6 +38,8 @@ public: String getName() const override { return "ProtobufRowInputFormat"; } + void setReadBuffer(ReadBuffer & in_) override; + private: bool readRow(MutableColumns & columns, RowReadExtension & row_read_extension) override; bool allowSyncAfterError() const override; diff --git a/src/Processors/Formats/Impl/ProtobufRowOutputFormat.cpp b/src/Processors/Formats/Impl/ProtobufRowOutputFormat.cpp index 7d8a67bdfb6..3faeefbaabd 100644 --- a/src/Processors/Formats/Impl/ProtobufRowOutputFormat.cpp +++ b/src/Processors/Formats/Impl/ProtobufRowOutputFormat.cpp @@ -20,11 +20,10 @@ namespace ErrorCodes ProtobufRowOutputFormat::ProtobufRowOutputFormat( WriteBuffer & out_, const Block & header_, - const RowOutputFormatParams & params_, const FormatSchemaInfo & schema_info_, const FormatSettings & settings_, bool with_length_delimiter_) - : IRowOutputFormat(header_, out_, params_) + : IRowOutputFormat(header_, out_) , writer(std::make_unique(out)) , serializer(ProtobufSerializer::create( header_.getNames(), @@ -59,14 +58,11 @@ void registerOutputFormatProtobuf(FormatFactory & factory) with_length_delimiter ? "Protobuf" : "ProtobufSingle", [with_length_delimiter](WriteBuffer & buf, const Block & header, - const RowOutputFormatParams & params, const FormatSettings & settings) { return std::make_shared( - buf, header, params, - FormatSchemaInfo(settings, "Protobuf", true), - settings, - with_length_delimiter); + buf, header, FormatSchemaInfo(settings, "Protobuf", true), + settings, with_length_delimiter); }); } } diff --git a/src/Processors/Formats/Impl/ProtobufRowOutputFormat.h b/src/Processors/Formats/Impl/ProtobufRowOutputFormat.h index 01eaac288f5..f6ff5bae999 100644 --- a/src/Processors/Formats/Impl/ProtobufRowOutputFormat.h +++ b/src/Processors/Formats/Impl/ProtobufRowOutputFormat.h @@ -30,7 +30,6 @@ public: ProtobufRowOutputFormat( WriteBuffer & out_, const Block & header_, - const RowOutputFormatParams & params_, const FormatSchemaInfo & schema_info_, const FormatSettings & settings_, bool with_length_delimiter_); diff --git a/src/Processors/Formats/Impl/RawBLOBRowOutputFormat.cpp b/src/Processors/Formats/Impl/RawBLOBRowOutputFormat.cpp index 1627bb0cad0..947bf2858a6 100644 --- a/src/Processors/Formats/Impl/RawBLOBRowOutputFormat.cpp +++ b/src/Processors/Formats/Impl/RawBLOBRowOutputFormat.cpp @@ -8,9 +8,8 @@ namespace DB RawBLOBRowOutputFormat::RawBLOBRowOutputFormat( WriteBuffer & out_, - const Block & header_, - const RowOutputFormatParams & params_) - : IRowOutputFormat(header_, out_, params_) + const Block & header_) + : IRowOutputFormat(header_, out_) { } @@ -30,10 +29,9 @@ void registerOutputFormatRawBLOB(FormatFactory & factory) factory.registerOutputFormat("RawBLOB", []( WriteBuffer & buf, const Block & sample, - const RowOutputFormatParams & params, const FormatSettings &) { - return std::make_shared(buf, sample, params); + return std::make_shared(buf, sample); }); } diff --git a/src/Processors/Formats/Impl/RawBLOBRowOutputFormat.h b/src/Processors/Formats/Impl/RawBLOBRowOutputFormat.h index f6c4f0a58ca..261103a0f6d 100644 --- a/src/Processors/Formats/Impl/RawBLOBRowOutputFormat.h +++ b/src/Processors/Formats/Impl/RawBLOBRowOutputFormat.h @@ -29,8 +29,7 @@ class RawBLOBRowOutputFormat final : public IRowOutputFormat public: RawBLOBRowOutputFormat( WriteBuffer & out_, - const Block & header_, - const RowOutputFormatParams & params_); + const Block & header_); String getName() const override { return "RawBLOBRowOutputFormat"; } diff --git a/src/Processors/Formats/Impl/RegexpRowInputFormat.cpp b/src/Processors/Formats/Impl/RegexpRowInputFormat.cpp index 5c0192c1e4a..ef577395d8f 100644 --- a/src/Processors/Formats/Impl/RegexpRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/RegexpRowInputFormat.cpp @@ -130,8 +130,7 @@ bool RegexpRowInputFormat::readRow(MutableColumns & columns, RowReadExtension & void RegexpRowInputFormat::setReadBuffer(ReadBuffer & in_) { - buf = std::make_unique(in_); - IInputFormat::setReadBuffer(*buf); + buf->setSubBuffer(in_); } RegexpSchemaReader::RegexpSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings_) diff --git a/src/Processors/Formats/Impl/SQLInsertRowOutputFormat.cpp b/src/Processors/Formats/Impl/SQLInsertRowOutputFormat.cpp index 749b4b40984..6f490c8b085 100644 --- a/src/Processors/Formats/Impl/SQLInsertRowOutputFormat.cpp +++ b/src/Processors/Formats/Impl/SQLInsertRowOutputFormat.cpp @@ -5,8 +5,8 @@ namespace DB { -SQLInsertRowOutputFormat::SQLInsertRowOutputFormat(WriteBuffer & out_, const Block & header_, const RowOutputFormatParams & params_, const FormatSettings & format_settings_) - : IRowOutputFormat(header_, out_, params_), column_names(header_.getNames()), format_settings(format_settings_) +SQLInsertRowOutputFormat::SQLInsertRowOutputFormat(WriteBuffer & out_, const Block & header_, const FormatSettings & format_settings_) + : IRowOutputFormat(header_, out_), column_names(header_.getNames()), format_settings(format_settings_) { } @@ -85,16 +85,19 @@ void SQLInsertRowOutputFormat::writeSuffix() writeCString(";\n", out); } +void SQLInsertRowOutputFormat::resetFormatterImpl() +{ + rows_in_line = 0; +} void registerOutputFormatSQLInsert(FormatFactory & factory) { factory.registerOutputFormat("SQLInsert", []( WriteBuffer & buf, const Block & sample, - const RowOutputFormatParams & params, const FormatSettings & settings) { - return std::make_shared(buf, sample, params, settings); + return std::make_shared(buf, sample, settings); }); } diff --git a/src/Processors/Formats/Impl/SQLInsertRowOutputFormat.h b/src/Processors/Formats/Impl/SQLInsertRowOutputFormat.h index aaaf39a9e4d..8b87744ed7c 100644 --- a/src/Processors/Formats/Impl/SQLInsertRowOutputFormat.h +++ b/src/Processors/Formats/Impl/SQLInsertRowOutputFormat.h @@ -16,7 +16,6 @@ public: SQLInsertRowOutputFormat( WriteBuffer & out_, const Block & header_, - const RowOutputFormatParams & params_, const FormatSettings & format_settings_); String getName() const override { return "SQLInsertRowOutputFormat"; } @@ -26,11 +25,12 @@ public: protected: void writeField(const IColumn & column, const ISerialization & serialization, size_t row_num) override; - virtual void writeFieldDelimiter() override; - virtual void writeRowStartDelimiter() override; - virtual void writeRowEndDelimiter() override; - virtual void writeRowBetweenDelimiter() override; - virtual void writeSuffix() override; + void writeFieldDelimiter() override; + void writeRowStartDelimiter() override; + void writeRowEndDelimiter() override; + void writeRowBetweenDelimiter() override; + void writeSuffix() override; + void resetFormatterImpl() override; void printLineStart(); void printColumnNames(); diff --git a/src/Processors/Formats/Impl/TSKVRowOutputFormat.cpp b/src/Processors/Formats/Impl/TSKVRowOutputFormat.cpp index 0e29d74b419..808d77f5841 100644 --- a/src/Processors/Formats/Impl/TSKVRowOutputFormat.cpp +++ b/src/Processors/Formats/Impl/TSKVRowOutputFormat.cpp @@ -7,8 +7,8 @@ namespace DB { -TSKVRowOutputFormat::TSKVRowOutputFormat(WriteBuffer & out_, const Block & header, const RowOutputFormatParams & params_, const FormatSettings & format_settings_) - : TabSeparatedRowOutputFormat(out_, header, false, false, false, params_, format_settings_), fields(header.getNamesAndTypes()) +TSKVRowOutputFormat::TSKVRowOutputFormat(WriteBuffer & out_, const Block & header, const FormatSettings & format_settings_) + : TabSeparatedRowOutputFormat(out_, header, false, false, false, format_settings_), fields(header.getNamesAndTypes()) { for (auto & field : fields) { @@ -40,10 +40,9 @@ void registerOutputFormatTSKV(FormatFactory & factory) factory.registerOutputFormat("TSKV", []( WriteBuffer & buf, const Block & sample, - const RowOutputFormatParams & params, const FormatSettings & settings) { - return std::make_shared(buf, sample, params, settings); + return std::make_shared(buf, sample, settings); }); factory.markOutputFormatSupportsParallelFormatting("TSKV"); } diff --git a/src/Processors/Formats/Impl/TSKVRowOutputFormat.h b/src/Processors/Formats/Impl/TSKVRowOutputFormat.h index 9bc44bbb4d7..25613dd22d9 100644 --- a/src/Processors/Formats/Impl/TSKVRowOutputFormat.h +++ b/src/Processors/Formats/Impl/TSKVRowOutputFormat.h @@ -14,7 +14,7 @@ namespace DB class TSKVRowOutputFormat final : public TabSeparatedRowOutputFormat { public: - TSKVRowOutputFormat(WriteBuffer & out_, const Block & header, const RowOutputFormatParams & params_, const FormatSettings & format_settings); + TSKVRowOutputFormat(WriteBuffer & out_, const Block & header, const FormatSettings & format_settings); String getName() const override { return "TSKVRowOutputFormat"; } diff --git a/src/Processors/Formats/Impl/TabSeparatedRowOutputFormat.cpp b/src/Processors/Formats/Impl/TabSeparatedRowOutputFormat.cpp index 3bd0fd7e3d6..a4a5aea26cb 100644 --- a/src/Processors/Formats/Impl/TabSeparatedRowOutputFormat.cpp +++ b/src/Processors/Formats/Impl/TabSeparatedRowOutputFormat.cpp @@ -12,9 +12,8 @@ TabSeparatedRowOutputFormat::TabSeparatedRowOutputFormat( bool with_names_, bool with_types_, bool is_raw_, - const RowOutputFormatParams & params_, const FormatSettings & format_settings_) - : IRowOutputFormat(header_, out_, params_), with_names(with_names_), with_types(with_types_), is_raw(is_raw_), format_settings(format_settings_) + : IRowOutputFormat(header_, out_), with_names(with_names_), with_types(with_types_), is_raw(is_raw_), format_settings(format_settings_) { } @@ -26,11 +25,10 @@ void TabSeparatedRowOutputFormat::writeLine(const std::vector & values) writeString(values[i], out); else writeEscapedString(values[i], out); - if (i + 1 == values.size()) - writeRowEndDelimiter(); - else + if (i + 1 != values.size()) writeFieldDelimiter(); } + writeRowEndDelimiter(); } void TabSeparatedRowOutputFormat::writePrefix() @@ -86,10 +84,9 @@ void registerOutputFormatTabSeparated(FormatFactory & factory) factory.registerOutputFormat(format_name, [is_raw, with_names, with_types]( WriteBuffer & buf, const Block & sample, - const RowOutputFormatParams & params, const FormatSettings & settings) { - return std::make_shared(buf, sample, with_names, with_types, is_raw, params, settings); + return std::make_shared(buf, sample, with_names, with_types, is_raw, settings); }); factory.markOutputFormatSupportsParallelFormatting(format_name); diff --git a/src/Processors/Formats/Impl/TabSeparatedRowOutputFormat.h b/src/Processors/Formats/Impl/TabSeparatedRowOutputFormat.h index 533e30a3ee1..9facba2052a 100644 --- a/src/Processors/Formats/Impl/TabSeparatedRowOutputFormat.h +++ b/src/Processors/Formats/Impl/TabSeparatedRowOutputFormat.h @@ -25,7 +25,6 @@ public: bool with_names_, bool with_types_, bool is_raw_, - const RowOutputFormatParams & params_, const FormatSettings & format_settings_); String getName() const override { return "TabSeparatedRowOutputFormat"; } diff --git a/src/Processors/Formats/Impl/TemplateBlockOutputFormat.cpp b/src/Processors/Formats/Impl/TemplateBlockOutputFormat.cpp index 0e7bdb259ac..1b2af4e631c 100644 --- a/src/Processors/Formats/Impl/TemplateBlockOutputFormat.cpp +++ b/src/Processors/Formats/Impl/TemplateBlockOutputFormat.cpp @@ -133,14 +133,7 @@ void TemplateBlockOutputFormat::writePrefix() void TemplateBlockOutputFormat::finalizeImpl() { - if (finalized) - return; - size_t parts = format.format_idx_to_column_idx.size(); - auto outside_statistics = getOutsideStatistics(); - if (outside_statistics) - statistics = std::move(*outside_statistics); - for (size_t i = 0; i < parts; ++i) { auto type = std::make_shared(); @@ -184,17 +177,19 @@ void TemplateBlockOutputFormat::finalizeImpl() } writeString(format.delimiters[i + 1], out); } - - finalized = true; } +void TemplateBlockOutputFormat::resetFormatterImpl() +{ + row_count = 0; + statistics = Statistics(); +} void registerOutputFormatTemplate(FormatFactory & factory) { factory.registerOutputFormat("Template", []( WriteBuffer & buf, const Block & sample, - const RowOutputFormatParams &, const FormatSettings & settings) { ParsedTemplateFormatString resultset_format; diff --git a/src/Processors/Formats/Impl/TemplateBlockOutputFormat.h b/src/Processors/Formats/Impl/TemplateBlockOutputFormat.h index 419fcac37c1..53d98849482 100644 --- a/src/Processors/Formats/Impl/TemplateBlockOutputFormat.h +++ b/src/Processors/Formats/Impl/TemplateBlockOutputFormat.h @@ -44,6 +44,7 @@ private: void consumeTotals(Chunk chunk) override { statistics.totals = std::move(chunk); } void consumeExtremes(Chunk chunk) override { statistics.extremes = std::move(chunk); } void finalizeImpl() override; + void resetFormatterImpl() override; void writeRow(const Chunk & chunk, size_t row_num); template void writeValue(U value, EscapingRule escaping_rule); @@ -57,8 +58,6 @@ private: ParsedTemplateFormatString format; ParsedTemplateFormatString row_format; - Statistics statistics; - size_t row_count = 0; std::string row_between_delimiter; diff --git a/src/Processors/Formats/Impl/TemplateRowInputFormat.cpp b/src/Processors/Formats/Impl/TemplateRowInputFormat.cpp index ba6650c2887..05fa17c7a17 100644 --- a/src/Processors/Formats/Impl/TemplateRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/TemplateRowInputFormat.cpp @@ -293,8 +293,7 @@ void TemplateRowInputFormat::resetParser() void TemplateRowInputFormat::setReadBuffer(ReadBuffer & in_) { - buf = std::make_unique(in_); - IInputFormat::setReadBuffer(*buf); + buf->setSubBuffer(in_); } TemplateFormatReader::TemplateFormatReader( diff --git a/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp b/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp index 85182682f1b..7b9cb23ddf0 100644 --- a/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp @@ -108,15 +108,97 @@ Chunk ValuesBlockInputFormat::generate() return Chunk{std::move(columns), rows_in_block}; } +/// Can be used in fileSegmentationEngine for parallel parsing of Values +static bool skipToNextRow(PeekableReadBuffer * buf, size_t min_chunk_bytes, int balance) +{ + skipWhitespaceIfAny(*buf); + if (buf->eof() || *buf->position() == ';') + return false; + bool quoted = false; + + size_t chunk_begin_buf_count = buf->count(); + while (!buf->eof() && (balance || buf->count() - chunk_begin_buf_count < min_chunk_bytes)) + { + buf->position() = find_first_symbols<'\\', '\'', ')', '('>(buf->position(), buf->buffer().end()); + if (buf->position() == buf->buffer().end()) + continue; + if (*buf->position() == '\\') + { + ++buf->position(); + if (!buf->eof()) + ++buf->position(); + } + else if (*buf->position() == '\'') + { + quoted ^= true; + ++buf->position(); + } + else if (*buf->position() == ')') + { + ++buf->position(); + if (!quoted) + --balance; + } + else if (*buf->position() == '(') + { + ++buf->position(); + if (!quoted) + ++balance; + } + } + + if (!buf->eof() && *buf->position() == ',') + ++buf->position(); + return true; +} + +/// We need continuous memory containing the expression to use Lexer +/// Note that this is both reading and tokenizing until the end of the row +/// This is doing unnecessary work if the rest of the columns can be read with tryReadValue (which doesn't require tokens) +/// and it's more efficient if they don't (as everything is already tokenized) +void ValuesBlockInputFormat::readUntilTheEndOfRowAndReTokenize(size_t current_column_idx) +{ + if (tokens && token_iterator && + /// Make sure the underlying memory hasn't changed because of next() calls in the buffer + ((*token_iterator)->begin >= buf->buffer().begin() && (*token_iterator)->begin <= buf->buffer().end())) + { + while ((*token_iterator)->begin < buf->position() && !(*token_iterator)->isError() && !(*token_iterator)->isEnd()) + ++(*token_iterator); + + if (!(*token_iterator)->isError() && !(*token_iterator)->isEnd()) + return; + } + + skipToNextRow(buf.get(), 0, 1); + buf->makeContinuousMemoryFromCheckpointToPos(); + auto * row_end = buf->position(); + buf->rollbackToCheckpoint(); + tokens.emplace(buf->position(), row_end); + token_iterator.emplace(*tokens, static_cast(context->getSettingsRef().max_parser_depth)); + auto const & first = (*token_iterator).get(); + if (first.isError() || first.isEnd()) + { + const Block & header = getPort().getHeader(); + const IDataType & type = *header.getByPosition(current_column_idx).type; + throw Exception( + ErrorCodes::SYNTAX_ERROR, + "Cannot parse expression of type {} here: {}", + type.getName(), + std::string_view(buf->position(), std::min(SHOW_CHARS_ON_SYNTAX_ERROR, buf->buffer().end() - buf->position()))); + } +} + void ValuesBlockInputFormat::readRow(MutableColumns & columns, size_t row_num) { + tokens.reset(); + token_iterator.reset(); assertChar('(', *buf); for (size_t column_idx = 0; column_idx < num_columns; ++column_idx) { skipWhitespaceIfAny(*buf); PeekableReadBufferCheckpoint checkpoint{*buf}; - bool read; + bool read = false; /// Parse value using fast streaming parser for literals and slow SQL parser for expressions. /// If there is SQL expression in some row, template of this expression will be deduced, @@ -126,7 +208,7 @@ void ValuesBlockInputFormat::readRow(MutableColumns & columns, size_t row_num) read = tryReadValue(*columns[column_idx], column_idx); else if (parser_type_for_column[column_idx] == ParserType::BatchTemplate) read = tryParseExpressionUsingTemplate(columns[column_idx], column_idx); - else /// if (parser_type_for_column[column_idx] == ParserType::SingleExpressionEvaluation) + else read = parseExpression(*columns[column_idx], column_idx); if (!read) @@ -143,9 +225,12 @@ void ValuesBlockInputFormat::readRow(MutableColumns & columns, size_t row_num) bool ValuesBlockInputFormat::tryParseExpressionUsingTemplate(MutableColumnPtr & column, size_t column_idx) { + readUntilTheEndOfRowAndReTokenize(column_idx); + IParser::Pos start = *token_iterator; + /// Try to parse expression using template if one was successfully deduced while parsing the first row - auto settings = context->getSettingsRef(); - if (templates[column_idx]->parseExpression(*buf, format_settings, settings)) + const auto & settings = context->getSettingsRef(); + if (templates[column_idx]->parseExpression(*buf, *token_iterator, format_settings, settings)) { ++rows_parsed_using_template[column_idx]; return true; @@ -166,6 +251,7 @@ bool ValuesBlockInputFormat::tryParseExpressionUsingTemplate(MutableColumnPtr & /// Do not use this template anymore templates[column_idx].reset(); buf->rollbackToCheckpoint(); + *token_iterator = start; /// It will deduce new template or fallback to slow SQL parser return parseExpression(*column, column_idx); @@ -295,79 +381,41 @@ namespace } } -/// Can be used in fileSegmentationEngine for parallel parsing of Values -static bool skipToNextRow(PeekableReadBuffer * buf, size_t min_chunk_bytes, int balance) -{ - skipWhitespaceIfAny(*buf); - if (buf->eof() || *buf->position() == ';') - return false; - bool quoted = false; - - size_t chunk_begin_buf_count = buf->count(); - while (!buf->eof() && (balance || buf->count() - chunk_begin_buf_count < min_chunk_bytes)) - { - buf->position() = find_first_symbols<'\\', '\'', ')', '('>(buf->position(), buf->buffer().end()); - if (buf->position() == buf->buffer().end()) - continue; - if (*buf->position() == '\\') - { - ++buf->position(); - if (!buf->eof()) - ++buf->position(); - } - else if (*buf->position() == '\'') - { - quoted ^= true; - ++buf->position(); - } - else if (*buf->position() == ')') - { - ++buf->position(); - if (!quoted) - --balance; - } - else if (*buf->position() == '(') - { - ++buf->position(); - if (!quoted) - ++balance; - } - } - - if (!buf->eof() && *buf->position() == ',') - ++buf->position(); - return true; -} - bool ValuesBlockInputFormat::parseExpression(IColumn & column, size_t column_idx) { const Block & header = getPort().getHeader(); const IDataType & type = *header.getByPosition(column_idx).type; auto settings = context->getSettingsRef(); - /// We need continuous memory containing the expression to use Lexer - skipToNextRow(buf.get(), 0, 1); - buf->makeContinuousMemoryFromCheckpointToPos(); - buf->rollbackToCheckpoint(); + /// Advance the token iterator until the start of the column expression + readUntilTheEndOfRowAndReTokenize(column_idx); - Expected expected; - Tokens tokens(buf->position(), buf->buffer().end()); - IParser::Pos token_iterator(tokens, static_cast(settings.max_parser_depth)); + bool parsed = false; ASTPtr ast; + std::optional ti_start; - bool parsed = parser.parse(token_iterator, ast, expected); + if (!(*token_iterator)->isError() && !(*token_iterator)->isEnd()) + { + Expected expected; + /// Keep a copy to the start of the column tokens to use if later if necessary + ti_start = IParser::Pos(*token_iterator, static_cast(settings.max_parser_depth)); - /// Consider delimiter after value (',' or ')') as part of expression - if (column_idx + 1 != num_columns) - parsed &= token_iterator->type == TokenType::Comma; - else - parsed &= token_iterator->type == TokenType::ClosingRoundBracket; + parsed = parser.parse(*token_iterator, ast, expected); + + /// Consider delimiter after value (',' or ')') as part of expression + if (column_idx + 1 != num_columns) + parsed &= (*token_iterator)->type == TokenType::Comma; + else + parsed &= (*token_iterator)->type == TokenType::ClosingRoundBracket; + } if (!parsed) - throw Exception("Cannot parse expression of type " + type.getName() + " here: " - + String(buf->position(), std::min(SHOW_CHARS_ON_SYNTAX_ERROR, buf->buffer().end() - buf->position())), - ErrorCodes::SYNTAX_ERROR); - ++token_iterator; + throw Exception( + ErrorCodes::SYNTAX_ERROR, + "Cannot parse expression of type {} here: {}", + type.getName(), + std::string_view(buf->position(), std::min(SHOW_CHARS_ON_SYNTAX_ERROR, buf->buffer().end() - buf->position()))); + ++(*token_iterator); if (parser_type_for_column[column_idx] != ParserType::Streaming && dynamic_cast(ast.get())) { @@ -417,8 +465,8 @@ bool ValuesBlockInputFormat::parseExpression(IColumn & column, size_t column_idx auto structure = templates_cache.getFromCacheOrConstruct( result_type, !result_type->isNullable() && format_settings.null_as_default, - TokenIterator(tokens), - token_iterator, + *ti_start, + *token_iterator, ast, context, &found_in_cache, @@ -430,7 +478,7 @@ bool ValuesBlockInputFormat::parseExpression(IColumn & column, size_t column_idx ++attempts_to_deduce_template[column_idx]; buf->rollbackToCheckpoint(); - if (templates[column_idx]->parseExpression(*buf, format_settings, settings)) + if (templates[column_idx]->parseExpression(*buf, *ti_start, format_settings, settings)) { ++rows_parsed_using_template[column_idx]; parser_type_for_column[column_idx] = ParserType::BatchTemplate; @@ -448,7 +496,7 @@ bool ValuesBlockInputFormat::parseExpression(IColumn & column, size_t column_idx else { buf->rollbackToCheckpoint(); - size_t len = const_cast(token_iterator->begin) - buf->position(); + size_t len = const_cast((*token_iterator)->begin) - buf->position(); throw Exception("Cannot deduce template of expression: " + std::string(buf->position(), len), ErrorCodes::SYNTAX_ERROR); } } @@ -460,7 +508,7 @@ bool ValuesBlockInputFormat::parseExpression(IColumn & column, size_t column_idx throw Exception("Interpreting expressions is disabled", ErrorCodes::SUPPORT_IS_DISABLED); /// Try to evaluate single expression if other parsers don't work - buf->position() = const_cast(token_iterator->begin); + buf->position() = const_cast((*token_iterator)->begin); std::pair value_raw = evaluateConstantExpression(ast, context); @@ -564,8 +612,7 @@ void ValuesBlockInputFormat::resetParser() void ValuesBlockInputFormat::setReadBuffer(ReadBuffer & in_) { - buf = std::make_unique(in_); - IInputFormat::setReadBuffer(*buf); + buf->setSubBuffer(in_); } ValuesSchemaReader::ValuesSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings_) diff --git a/src/Processors/Formats/Impl/ValuesBlockInputFormat.h b/src/Processors/Formats/Impl/ValuesBlockInputFormat.h index bf243c54bd7..9abade72af1 100644 --- a/src/Processors/Formats/Impl/ValuesBlockInputFormat.h +++ b/src/Processors/Formats/Impl/ValuesBlockInputFormat.h @@ -56,6 +56,7 @@ private: Chunk generate() override; void readRow(MutableColumns & columns, size_t row_num); + void readUntilTheEndOfRowAndReTokenize(size_t current_column_idx); bool tryParseExpressionUsingTemplate(MutableColumnPtr & column, size_t column_idx); ALWAYS_INLINE inline bool tryReadValue(IColumn & column, size_t column_idx); @@ -70,6 +71,8 @@ private: void readSuffix(); std::unique_ptr buf; + std::optional token_iterator{}; + std::optional tokens{}; const RowInputFormatParams params; diff --git a/src/Processors/Formats/Impl/ValuesRowOutputFormat.cpp b/src/Processors/Formats/Impl/ValuesRowOutputFormat.cpp index 275bbad9e66..0612bde0fa3 100644 --- a/src/Processors/Formats/Impl/ValuesRowOutputFormat.cpp +++ b/src/Processors/Formats/Impl/ValuesRowOutputFormat.cpp @@ -10,8 +10,8 @@ namespace DB { -ValuesRowOutputFormat::ValuesRowOutputFormat(WriteBuffer & out_, const Block & header_, const RowOutputFormatParams & params_, const FormatSettings & format_settings_) - : IRowOutputFormat(header_, out_, params_), format_settings(format_settings_) +ValuesRowOutputFormat::ValuesRowOutputFormat(WriteBuffer & out_, const Block & header_, const FormatSettings & format_settings_) + : IRowOutputFormat(header_, out_), format_settings(format_settings_) { } @@ -46,10 +46,9 @@ void registerOutputFormatValues(FormatFactory & factory) factory.registerOutputFormat("Values", []( WriteBuffer & buf, const Block & sample, - const RowOutputFormatParams & params, const FormatSettings & settings) { - return std::make_shared(buf, sample, params, settings); + return std::make_shared(buf, sample, settings); }); factory.markOutputFormatSupportsParallelFormatting("Values"); diff --git a/src/Processors/Formats/Impl/ValuesRowOutputFormat.h b/src/Processors/Formats/Impl/ValuesRowOutputFormat.h index 76c0a1e7873..35840d7f0c9 100644 --- a/src/Processors/Formats/Impl/ValuesRowOutputFormat.h +++ b/src/Processors/Formats/Impl/ValuesRowOutputFormat.h @@ -15,7 +15,7 @@ class WriteBuffer; class ValuesRowOutputFormat final : public IRowOutputFormat { public: - ValuesRowOutputFormat(WriteBuffer & out_, const Block & header_, const RowOutputFormatParams & params_, const FormatSettings & format_settings_); + ValuesRowOutputFormat(WriteBuffer & out_, const Block & header_, const FormatSettings & format_settings_); String getName() const override { return "ValuesRowOutputFormat"; } diff --git a/src/Processors/Formats/Impl/VerticalRowOutputFormat.cpp b/src/Processors/Formats/Impl/VerticalRowOutputFormat.cpp index 33ff06edde0..cfaefbe663b 100644 --- a/src/Processors/Formats/Impl/VerticalRowOutputFormat.cpp +++ b/src/Processors/Formats/Impl/VerticalRowOutputFormat.cpp @@ -11,8 +11,8 @@ namespace DB { VerticalRowOutputFormat::VerticalRowOutputFormat( - WriteBuffer & out_, const Block & header_, const RowOutputFormatParams & params_, const FormatSettings & format_settings_) - : IRowOutputFormat(header_, out_, params_), format_settings(format_settings_) + WriteBuffer & out_, const Block & header_, const FormatSettings & format_settings_) + : IRowOutputFormat(header_, out_), format_settings(format_settings_) { const auto & sample = getPort(PortKind::Main).getHeader(); size_t columns = sample.columns(); @@ -71,6 +71,7 @@ void VerticalRowOutputFormat::writeValue(const IColumn & column, const ISerializ void VerticalRowOutputFormat::writeRowStartDelimiter() { + field_number = 0; ++row_number; if (row_number > format_settings.pretty.max_rows) @@ -92,8 +93,7 @@ void VerticalRowOutputFormat::writeRowBetweenDelimiter() if (row_number > format_settings.pretty.max_rows) return; - writeCString("\n", out); - field_number = 0; + writeChar('\n', out); } @@ -160,10 +160,9 @@ void registerOutputFormatVertical(FormatFactory & factory) factory.registerOutputFormat("Vertical", []( WriteBuffer & buf, const Block & sample, - const RowOutputFormatParams & params, const FormatSettings & settings) { - return std::make_shared(buf, sample, params, settings); + return std::make_shared(buf, sample, settings); }); factory.markOutputFormatSupportsParallelFormatting("Vertical"); diff --git a/src/Processors/Formats/Impl/VerticalRowOutputFormat.h b/src/Processors/Formats/Impl/VerticalRowOutputFormat.h index c3dbafd8b9b..5870c3503fc 100644 --- a/src/Processors/Formats/Impl/VerticalRowOutputFormat.h +++ b/src/Processors/Formats/Impl/VerticalRowOutputFormat.h @@ -18,7 +18,7 @@ class Context; class VerticalRowOutputFormat final : public IRowOutputFormat { public: - VerticalRowOutputFormat(WriteBuffer & out_, const Block & header_, const RowOutputFormatParams & params_, const FormatSettings & format_settings_); + VerticalRowOutputFormat(WriteBuffer & out_, const Block & header_, const FormatSettings & format_settings_); String getName() const override { return "VerticalRowOutputFormat"; } @@ -45,6 +45,11 @@ private: /// For totals and extremes. void writeSpecialRow(const Columns & columns, size_t row_num, const char * title); + void resetFormatterImpl() override + { + row_number = 0; + } + const FormatSettings format_settings; size_t field_number = 0; size_t row_number = 0; diff --git a/src/Processors/Formats/Impl/XMLRowOutputFormat.cpp b/src/Processors/Formats/Impl/XMLRowOutputFormat.cpp index b0de4d0a976..1d6fb62275c 100644 --- a/src/Processors/Formats/Impl/XMLRowOutputFormat.cpp +++ b/src/Processors/Formats/Impl/XMLRowOutputFormat.cpp @@ -7,8 +7,8 @@ namespace DB { -XMLRowOutputFormat::XMLRowOutputFormat(WriteBuffer & out_, const Block & header_, const RowOutputFormatParams & params_, const FormatSettings & format_settings_) - : RowOutputFormatWithUTF8ValidationAdaptor(true, header_, out_, params_), fields(header_.getNamesAndTypes()), format_settings(format_settings_) +XMLRowOutputFormat::XMLRowOutputFormat(WriteBuffer & out_, const Block & header_, const FormatSettings & format_settings_) + : RowOutputFormatWithUTF8ValidationAdaptor(true, header_, out_), fields(header_.getNamesAndTypes()), format_settings(format_settings_) { const auto & sample = getPort(PortKind::Main).getHeader(); field_tag_names.resize(sample.columns()); @@ -94,11 +94,9 @@ void XMLRowOutputFormat::writeRowEndDelimiter() ++row_count; } - void XMLRowOutputFormat::writeSuffix() { writeCString("\t\n", *ostr); - } @@ -190,9 +188,6 @@ void XMLRowOutputFormat::finalizeImpl() writeIntText(row_count, *ostr); writeCString("\n", *ostr); - auto outside_statistics = getOutsideStatistics(); - if (outside_statistics) - statistics = std::move(*outside_statistics); writeRowsBeforeLimitAtLeast(); @@ -203,6 +198,13 @@ void XMLRowOutputFormat::finalizeImpl() ostr->next(); } +void XMLRowOutputFormat::resetFormatterImpl() +{ + RowOutputFormatWithUTF8ValidationAdaptor::resetFormatterImpl(); + row_count = 0; + statistics = Statistics(); +} + void XMLRowOutputFormat::writeRowsBeforeLimitAtLeast() { if (statistics.applied_limit) @@ -234,10 +236,9 @@ void registerOutputFormatXML(FormatFactory & factory) factory.registerOutputFormat("XML", []( WriteBuffer & buf, const Block & sample, - const RowOutputFormatParams & params, const FormatSettings & settings) { - return std::make_shared(buf, sample, params, settings); + return std::make_shared(buf, sample, settings); }); factory.markOutputFormatSupportsParallelFormatting("XML"); diff --git a/src/Processors/Formats/Impl/XMLRowOutputFormat.h b/src/Processors/Formats/Impl/XMLRowOutputFormat.h index 25544bab35e..e25e7129109 100644 --- a/src/Processors/Formats/Impl/XMLRowOutputFormat.h +++ b/src/Processors/Formats/Impl/XMLRowOutputFormat.h @@ -16,7 +16,7 @@ namespace DB class XMLRowOutputFormat final : public RowOutputFormatWithUTF8ValidationAdaptor { public: - XMLRowOutputFormat(WriteBuffer & out_, const Block & header_, const RowOutputFormatParams & params_, const FormatSettings & format_settings_); + XMLRowOutputFormat(WriteBuffer & out_, const Block & header_, const FormatSettings & format_settings_); String getName() const override { return "XMLRowOutputFormat"; } @@ -27,6 +27,7 @@ private: void writePrefix() override; void writeSuffix() override; void finalizeImpl() override; + void resetFormatterImpl() override; void writeMinExtreme(const Columns & columns, size_t row_num) override; void writeMaxExtreme(const Columns & columns, size_t row_num) override; @@ -61,7 +62,6 @@ private: NamesAndTypes fields; Names field_tag_names; - Statistics statistics; const FormatSettings format_settings; }; diff --git a/src/Processors/Formats/OutputFormatWithUTF8ValidationAdaptor.h b/src/Processors/Formats/OutputFormatWithUTF8ValidationAdaptor.h index b3f1641bd00..8d8fb9ef0c6 100644 --- a/src/Processors/Formats/OutputFormatWithUTF8ValidationAdaptor.h +++ b/src/Processors/Formats/OutputFormatWithUTF8ValidationAdaptor.h @@ -40,6 +40,18 @@ public: this->out.next(); } + void finalizeBuffers() override + { + if (validating_ostr) + validating_ostr->finalize(); + } + + void resetFormatterImpl() override + { + validating_ostr = std::make_unique(this->out); + ostr = validating_ostr.get(); + } + protected: /// Point to validating_ostr or out from IOutputFormat, should be used in derived classes instead of out. WriteBuffer * ostr; @@ -50,7 +62,7 @@ private: }; using OutputFormatWithUTF8ValidationAdaptor = OutputFormatWithUTF8ValidationAdaptorBase; -using RowOutputFormatWithUTF8ValidationAdaptor = OutputFormatWithUTF8ValidationAdaptorBase; +using RowOutputFormatWithUTF8ValidationAdaptor = OutputFormatWithUTF8ValidationAdaptorBase; } diff --git a/src/Processors/Merges/Algorithms/Graphite.cpp b/src/Processors/Merges/Algorithms/Graphite.cpp index c5c611366ff..0616c4bd6e6 100644 --- a/src/Processors/Merges/Algorithms/Graphite.cpp +++ b/src/Processors/Merges/Algorithms/Graphite.cpp @@ -332,8 +332,7 @@ std::string buildTaggedRegex(std::string regexp_str) * * */ -static const Pattern & -appendGraphitePattern( +static const Pattern & appendGraphitePattern( const Poco::Util::AbstractConfiguration & config, const String & config_element, Patterns & patterns, bool default_rule, diff --git a/src/Processors/Merges/Algorithms/GraphiteRollupSortedAlgorithm.cpp b/src/Processors/Merges/Algorithms/GraphiteRollupSortedAlgorithm.cpp index 467ded19f4d..c5937fe0bc5 100644 --- a/src/Processors/Merges/Algorithms/GraphiteRollupSortedAlgorithm.cpp +++ b/src/Processors/Merges/Algorithms/GraphiteRollupSortedAlgorithm.cpp @@ -9,6 +9,12 @@ namespace DB { +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; +} + + static GraphiteRollupSortedAlgorithm::ColumnsDefinition defineColumns( const Block & header, const Graphite::Params & params) { @@ -26,6 +32,9 @@ static GraphiteRollupSortedAlgorithm::ColumnsDefinition defineColumns( if (i != def.time_column_num && i != def.value_column_num && i != def.version_column_num) def.unmodified_column_numbers.push_back(i); + if (!WhichDataType(header.getByPosition(def.value_column_num).type).isFloat64()) + throw Exception("Only `Float64` data type is allowed for the value column of GraphiteMergeTree", ErrorCodes::BAD_ARGUMENTS); + return def; } diff --git a/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.cpp b/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.cpp index c79c667a988..ee3177e132f 100644 --- a/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.cpp +++ b/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.cpp @@ -382,7 +382,7 @@ static MutableColumns getMergedDataColumns( for (const auto & desc : def.columns_to_aggregate) { // Wrap aggregated columns in a tuple to match function signature - if (!desc.is_agg_func_type && !desc.is_simple_agg_func_type && isTuple(desc.function->getReturnType())) + if (!desc.is_agg_func_type && !desc.is_simple_agg_func_type && isTuple(desc.function->getResultType())) { size_t tuple_size = desc.column_numbers.size(); MutableColumns tuple_columns(tuple_size); @@ -439,7 +439,7 @@ static void postprocessChunk( auto column = std::move(columns[next_column]); ++next_column; - if (!desc.is_agg_func_type && !desc.is_simple_agg_func_type && isTuple(desc.function->getReturnType())) + if (!desc.is_agg_func_type && !desc.is_simple_agg_func_type && isTuple(desc.function->getResultType())) { /// Unpack tuple into block. size_t tuple_size = desc.column_numbers.size(); diff --git a/src/Processors/QueryPlan/CreatingSetsStep.h b/src/Processors/QueryPlan/CreatingSetsStep.h index 9995af7bca7..b4777578a30 100644 --- a/src/Processors/QueryPlan/CreatingSetsStep.h +++ b/src/Processors/QueryPlan/CreatingSetsStep.h @@ -44,9 +44,6 @@ public: QueryPipelineBuilderPtr updatePipeline(QueryPipelineBuilders pipelines, const BuildQueryPipelineSettings &) override; void describePipeline(FormatSettings & settings) const override; - -private: - Processors processors; }; void addCreatingSetsStep(QueryPlan & query_plan, PreparedSets::SubqueriesForSets subqueries_for_sets, ContextPtr context); diff --git a/src/Processors/QueryPlan/IQueryPlanStep.cpp b/src/Processors/QueryPlan/IQueryPlanStep.cpp index b36d1f0e12f..a0035089c29 100644 --- a/src/Processors/QueryPlan/IQueryPlanStep.cpp +++ b/src/Processors/QueryPlan/IQueryPlanStep.cpp @@ -113,4 +113,9 @@ void IQueryPlanStep::describePipeline(const Processors & processors, FormatSetti doDescribeProcessor(*prev, count, settings); } +void IQueryPlanStep::appendExtraProcessors(const Processors & extra_processors) +{ + processors.insert(processors.end(), extra_processors.begin(), extra_processors.end()); +} + } diff --git a/src/Processors/QueryPlan/IQueryPlanStep.h b/src/Processors/QueryPlan/IQueryPlanStep.h index 1e00d76b66f..316ecff9c2e 100644 --- a/src/Processors/QueryPlan/IQueryPlanStep.h +++ b/src/Processors/QueryPlan/IQueryPlanStep.h @@ -110,6 +110,9 @@ public: /// Get description of processors added in current step. Should be called after updatePipeline(). virtual void describePipeline(FormatSettings & /*settings*/) const {} + /// Append extra processors for this step. + void appendExtraProcessors(const Processors & extra_processors); + protected: DataStreams input_streams; std::optional output_stream; @@ -117,6 +120,10 @@ protected: /// Text description about what current step does. std::string step_description; + /// This field is used to store added processors from this step. + /// It is used only for introspection (EXPLAIN PIPELINE). + Processors processors; + static void describePipeline(const Processors & processors, FormatSettings & settings); }; diff --git a/src/Processors/QueryPlan/ISourceStep.h b/src/Processors/QueryPlan/ISourceStep.h index 08c939b626d..744b6f9b5c4 100644 --- a/src/Processors/QueryPlan/ISourceStep.h +++ b/src/Processors/QueryPlan/ISourceStep.h @@ -15,10 +15,6 @@ public: virtual void initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings & settings) = 0; void describePipeline(FormatSettings & settings) const override; - -protected: - /// We collect processors got after pipeline transformation. - Processors processors; }; } diff --git a/src/Processors/QueryPlan/ITransformingStep.cpp b/src/Processors/QueryPlan/ITransformingStep.cpp index 64ad2ec5626..195fa9ad68c 100644 --- a/src/Processors/QueryPlan/ITransformingStep.cpp +++ b/src/Processors/QueryPlan/ITransformingStep.cpp @@ -70,9 +70,4 @@ void ITransformingStep::describePipeline(FormatSettings & settings) const IQueryPlanStep::describePipeline(processors, settings); } -void ITransformingStep::appendExtraProcessors(const Processors & extra_processors) -{ - processors.insert(processors.end(), extra_processors.begin(), extra_processors.end()); -} - } diff --git a/src/Processors/QueryPlan/ITransformingStep.h b/src/Processors/QueryPlan/ITransformingStep.h index 8b16e982af5..1513b4307f8 100644 --- a/src/Processors/QueryPlan/ITransformingStep.h +++ b/src/Processors/QueryPlan/ITransformingStep.h @@ -75,9 +75,6 @@ public: void describePipeline(FormatSettings & settings) const override; - /// Append extra processors for this step. - void appendExtraProcessors(const Processors & extra_processors); - /// Enforcement is supposed to be done through the special settings that will be taken into account by remote nodes during query planning (e.g. force_aggregation_in_order). /// Should be called only if data_stream_traits.can_enforce_sorting_properties_in_distributed_query == true. virtual void adjustSettingsToEnforceSortingPropertiesInDistributedQuery(ContextMutablePtr) const @@ -100,8 +97,7 @@ protected: private: virtual void updateOutputStream() = 0; - /// We collect processors got after pipeline transformation. - Processors processors; + /// If we should collect processors got after pipeline transformation. bool collect_processors; const DataStreamTraits data_stream_traits; diff --git a/src/Processors/QueryPlan/IntersectOrExceptStep.cpp b/src/Processors/QueryPlan/IntersectOrExceptStep.cpp index c031303cc7f..afdff44020f 100644 --- a/src/Processors/QueryPlan/IntersectOrExceptStep.cpp +++ b/src/Processors/QueryPlan/IntersectOrExceptStep.cpp @@ -42,10 +42,10 @@ IntersectOrExceptStep::IntersectOrExceptStep( QueryPipelineBuilderPtr IntersectOrExceptStep::updatePipeline(QueryPipelineBuilders pipelines, const BuildQueryPipelineSettings &) { auto pipeline = std::make_unique(); - QueryPipelineProcessorsCollector collector(*pipeline, this); if (pipelines.empty()) { + QueryPipelineProcessorsCollector collector(*pipeline, this); pipeline->init(Pipe(std::make_shared(output_stream->header))); processors = collector.detachProcessors(); return pipeline; @@ -56,6 +56,7 @@ QueryPipelineBuilderPtr IntersectOrExceptStep::updatePipeline(QueryPipelineBuild /// Just in case. if (!isCompatibleHeader(cur_pipeline->getHeader(), getOutputStream().header)) { + QueryPipelineProcessorsCollector collector(*cur_pipeline, this); auto converting_dag = ActionsDAG::makeConvertingActions( cur_pipeline->getHeader().getColumnsWithTypeAndName(), getOutputStream().header.getColumnsWithTypeAndName(), @@ -66,16 +67,20 @@ QueryPipelineBuilderPtr IntersectOrExceptStep::updatePipeline(QueryPipelineBuild { return std::make_shared(cur_header, converting_actions); }); + + auto added_processors = collector.detachProcessors(); + processors.insert(processors.end(), added_processors.begin(), added_processors.end()); } /// For the case of union. cur_pipeline->addTransform(std::make_shared(header, cur_pipeline->getNumStreams(), 1)); } - *pipeline = QueryPipelineBuilder::unitePipelines(std::move(pipelines), max_threads); - pipeline->addTransform(std::make_shared(header, current_operator)); + *pipeline = QueryPipelineBuilder::unitePipelines(std::move(pipelines), max_threads, &processors); + auto transform = std::make_shared(header, current_operator); + processors.push_back(transform); + pipeline->addTransform(std::move(transform)); - processors = collector.detachProcessors(); return pipeline; } diff --git a/src/Processors/QueryPlan/IntersectOrExceptStep.h b/src/Processors/QueryPlan/IntersectOrExceptStep.h index d7eab574431..d15d2802456 100644 --- a/src/Processors/QueryPlan/IntersectOrExceptStep.h +++ b/src/Processors/QueryPlan/IntersectOrExceptStep.h @@ -24,7 +24,6 @@ private: Block header; Operator current_operator; size_t max_threads; - Processors processors; }; } diff --git a/src/Processors/QueryPlan/JoinStep.h b/src/Processors/QueryPlan/JoinStep.h index fc7f74d4fe8..a814d541574 100644 --- a/src/Processors/QueryPlan/JoinStep.h +++ b/src/Processors/QueryPlan/JoinStep.h @@ -37,7 +37,6 @@ private: size_t max_block_size; size_t max_streams; bool keep_left_read_in_order; - Processors processors; }; /// Special step for the case when Join is already filled. diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index 556997b6e7a..0d8fe84f9d3 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -66,7 +66,7 @@ static MergeTreeReaderSettings getMergeTreeReaderSettings( .read_in_order = query_info.input_order_info != nullptr, .apply_deleted_mask = context->applyDeletedMask(), .use_asynchronous_read_from_pool = settings.allow_asynchronous_read_from_io_pool_for_merge_tree - && (settings.max_streams_to_max_threads_ratio > 1 || settings.allow_asynchronous_read_from_io_pool_for_merge_tree), + && (settings.max_streams_to_max_threads_ratio > 1 || settings.max_streams_for_merge_tree_reading > 1), }; } diff --git a/src/Processors/QueryPlan/UnionStep.cpp b/src/Processors/QueryPlan/UnionStep.cpp index 6290c7417db..e111890a833 100644 --- a/src/Processors/QueryPlan/UnionStep.cpp +++ b/src/Processors/QueryPlan/UnionStep.cpp @@ -62,10 +62,10 @@ void UnionStep::updateOutputSortDescription() QueryPipelineBuilderPtr UnionStep::updatePipeline(QueryPipelineBuilders pipelines, const BuildQueryPipelineSettings &) { auto pipeline = std::make_unique(); - QueryPipelineProcessorsCollector collector(*pipeline, this); if (pipelines.empty()) { + QueryPipelineProcessorsCollector collector(*pipeline, this); pipeline->init(Pipe(std::make_shared(output_stream->header))); processors = collector.detachProcessors(); return pipeline; @@ -80,6 +80,7 @@ QueryPipelineBuilderPtr UnionStep::updatePipeline(QueryPipelineBuilders pipeline /// But, just in case, convert it to the same header if not. if (!isCompatibleHeader(cur_pipeline->getHeader(), getOutputStream().header)) { + QueryPipelineProcessorsCollector collector(*cur_pipeline, this); auto converting_dag = ActionsDAG::makeConvertingActions( cur_pipeline->getHeader().getColumnsWithTypeAndName(), getOutputStream().header.getColumnsWithTypeAndName(), @@ -90,12 +91,13 @@ QueryPipelineBuilderPtr UnionStep::updatePipeline(QueryPipelineBuilders pipeline { return std::make_shared(cur_header, converting_actions); }); + + auto added_processors = collector.detachProcessors(); + processors.insert(processors.end(), added_processors.begin(), added_processors.end()); } } - *pipeline = QueryPipelineBuilder::unitePipelines(std::move(pipelines), max_threads); - - processors = collector.detachProcessors(); + *pipeline = QueryPipelineBuilder::unitePipelines(std::move(pipelines), max_threads, &processors); return pipeline; } diff --git a/src/Processors/QueryPlan/UnionStep.h b/src/Processors/QueryPlan/UnionStep.h index a5a2f6b356e..6278de07673 100644 --- a/src/Processors/QueryPlan/UnionStep.h +++ b/src/Processors/QueryPlan/UnionStep.h @@ -24,7 +24,6 @@ public: private: Block header; size_t max_threads; - Processors processors; }; } diff --git a/src/Processors/QueryPlan/WindowStep.cpp b/src/Processors/QueryPlan/WindowStep.cpp index b67b394b57b..92e9948c4c7 100644 --- a/src/Processors/QueryPlan/WindowStep.cpp +++ b/src/Processors/QueryPlan/WindowStep.cpp @@ -35,7 +35,7 @@ static Block addWindowFunctionResultColumns(const Block & block, { ColumnWithTypeAndName column_with_type; column_with_type.name = f.column_name; - column_with_type.type = f.aggregate_function->getReturnType(); + column_with_type.type = f.aggregate_function->getResultType(); column_with_type.column = column_with_type.type->createColumn(); result.insert(column_with_type); diff --git a/src/Processors/Sources/ShellCommandSource.cpp b/src/Processors/Sources/ShellCommandSource.cpp index 3f70abaea6d..4983fa047dc 100644 --- a/src/Processors/Sources/ShellCommandSource.cpp +++ b/src/Processors/Sources/ShellCommandSource.cpp @@ -71,28 +71,22 @@ static bool pollFd(int fd, size_t timeout_milliseconds, int events) pfd.events = events; pfd.revents = 0; - Stopwatch watch; - int res; while (true) { + Stopwatch watch; res = poll(&pfd, 1, static_cast(timeout_milliseconds)); if (res < 0) { - if (errno == EINTR) - { - watch.stop(); - timeout_milliseconds -= watch.elapsedMilliseconds(); - watch.start(); - - continue; - } - else - { + if (errno != EINTR) throwFromErrno("Cannot poll", ErrorCodes::CANNOT_POLL); - } + + const auto elapsed = watch.elapsedMilliseconds(); + if (timeout_milliseconds <= elapsed) + break; + timeout_milliseconds -= elapsed; } else { @@ -474,7 +468,7 @@ Pipe ShellCommandSourceCoordinator::createPipe( std::unique_ptr process; std::unique_ptr process_holder; - auto destructor_strategy = ShellCommand::DestructorStrategy{true /*terminate_in_destructor*/, configuration.command_termination_timeout_seconds}; + auto destructor_strategy = ShellCommand::DestructorStrategy{true /*terminate_in_destructor*/, SIGTERM, configuration.command_termination_timeout_seconds}; command_config.terminate_in_destructor_strategy = destructor_strategy; bool is_executable_pool = (process_pool != nullptr); diff --git a/src/Processors/Transforms/MergeSortingTransform.cpp b/src/Processors/Transforms/MergeSortingTransform.cpp index b039109c3f5..efd9249066c 100644 --- a/src/Processors/Transforms/MergeSortingTransform.cpp +++ b/src/Processors/Transforms/MergeSortingTransform.cpp @@ -34,7 +34,7 @@ public: , tmp_stream(tmp_stream_) , log(log_) { - LOG_INFO(log, "Sorting and writing part of data into temporary file {}", tmp_stream.path()); + LOG_INFO(log, "Sorting and writing part of data into temporary file {}", tmp_stream.getPath()); ProfileEvents::increment(ProfileEvents::ExternalSortWritePart); } @@ -58,7 +58,7 @@ public: ProfileEvents::increment(ProfileEvents::ExternalSortUncompressedBytes, stat.uncompressed_size); LOG_INFO(log, "Done writing part of data into temporary file {}, compressed {}, uncompressed {} ", - tmp_stream.path(), ReadableSize(static_cast(stat.compressed_size)), ReadableSize(static_cast(stat.uncompressed_size))); + tmp_stream.getPath(), ReadableSize(static_cast(stat.compressed_size)), ReadableSize(static_cast(stat.uncompressed_size))); } Block block = tmp_stream.read(); diff --git a/src/Processors/Transforms/MongoDBSource.cpp b/src/Processors/Transforms/MongoDBSource.cpp index b8f40789e83..88eddde0b3d 100644 --- a/src/Processors/Transforms/MongoDBSource.cpp +++ b/src/Processors/Transforms/MongoDBSource.cpp @@ -29,111 +29,11 @@ namespace DB namespace ErrorCodes { extern const int TYPE_MISMATCH; - extern const int MONGODB_CANNOT_AUTHENTICATE; extern const int UNKNOWN_TYPE; extern const int MONGODB_ERROR; } -#if POCO_VERSION < 0x01070800 -/// See https://pocoproject.org/forum/viewtopic.php?f=10&t=6326&p=11426&hilit=mongodb+auth#p11485 -void authenticate(Poco::MongoDB::Connection & connection, const std::string & database, const std::string & user, const std::string & password) -{ - Poco::MongoDB::Database db(database); - - /// Challenge-response authentication. - std::string nonce; - - /// First step: request nonce. - { - auto command = db.createCommand(); - command->setNumberToReturn(1); - command->selector().add("getnonce", 1); - - Poco::MongoDB::ResponseMessage response; - connection.sendRequest(*command, response); - - if (response.documents().empty()) - throw Exception( - "Cannot authenticate in MongoDB: server returned empty response for 'getnonce' command", - ErrorCodes::MONGODB_CANNOT_AUTHENTICATE); - - auto doc = response.documents()[0]; - try - { - double ok = doc->get("ok", 0); - if (ok != 1) - throw Exception( - "Cannot authenticate in MongoDB: server returned response for 'getnonce' command that" - " has field 'ok' missing or having wrong value", - ErrorCodes::MONGODB_CANNOT_AUTHENTICATE); - - nonce = doc->get("nonce", ""); - if (nonce.empty()) - throw Exception( - "Cannot authenticate in MongoDB: server returned response for 'getnonce' command that" - " has field 'nonce' missing or empty", - ErrorCodes::MONGODB_CANNOT_AUTHENTICATE); - } - catch (Poco::NotFoundException & e) - { - throw Exception( - "Cannot authenticate in MongoDB: server returned response for 'getnonce' command that has missing required field: " - + e.displayText(), - ErrorCodes::MONGODB_CANNOT_AUTHENTICATE); - } - } - - /// Second step: use nonce to calculate digest and send it back to the server. - /// Digest is hex_md5(n.nonce + username + hex_md5(username + ":mongo:" + password)) - { - std::string first = user + ":mongo:" + password; - - Poco::MD5Engine md5; - md5.update(first); - std::string digest_first(Poco::DigestEngine::digestToHex(md5.digest())); - std::string second = nonce + user + digest_first; - md5.reset(); - md5.update(second); - std::string digest_second(Poco::DigestEngine::digestToHex(md5.digest())); - - auto command = db.createCommand(); - command->setNumberToReturn(1); - command->selector() - .add("authenticate", 1) - .add("user", user) - .add("nonce", nonce) - .add("key", digest_second); - - Poco::MongoDB::ResponseMessage response; - connection.sendRequest(*command, response); - - if (response.empty()) - throw Exception( - "Cannot authenticate in MongoDB: server returned empty response for 'authenticate' command", - ErrorCodes::MONGODB_CANNOT_AUTHENTICATE); - - auto doc = response.documents()[0]; - try - { - double ok = doc->get("ok", 0); - if (ok != 1) - throw Exception( - "Cannot authenticate in MongoDB: server returned response for 'authenticate' command that" - " has field 'ok' missing or having wrong value", - ErrorCodes::MONGODB_CANNOT_AUTHENTICATE); - } - catch (Poco::NotFoundException & e) - { - throw Exception( - "Cannot authenticate in MongoDB: server returned response for 'authenticate' command that has missing required field: " - + e.displayText(), - ErrorCodes::MONGODB_CANNOT_AUTHENTICATE); - } - } -} -#endif - std::unique_ptr createCursor(const std::string & database, const std::string & collection, const Block & sample_block_to_select) { auto cursor = std::make_unique(database, collection); diff --git a/src/Processors/Transforms/WindowTransform.cpp b/src/Processors/Transforms/WindowTransform.cpp index 4d3eb1f0bbd..9bfaf1f375f 100644 --- a/src/Processors/Transforms/WindowTransform.cpp +++ b/src/Processors/Transforms/WindowTransform.cpp @@ -1067,7 +1067,7 @@ void WindowTransform::appendChunk(Chunk & chunk) // Initialize output columns. for (auto & ws : workspaces) { - block.output_columns.push_back(ws.aggregate_function->getReturnType() + block.output_columns.push_back(ws.aggregate_function->getResultType() ->createColumn()); block.output_columns.back()->reserve(block.rows); } @@ -1441,8 +1441,8 @@ struct WindowFunction { std::string name; - WindowFunction(const std::string & name_, const DataTypes & argument_types_, const Array & parameters_) - : IAggregateFunctionHelper(argument_types_, parameters_) + WindowFunction(const std::string & name_, const DataTypes & argument_types_, const Array & parameters_, const DataTypePtr & result_type_) + : IAggregateFunctionHelper(argument_types_, parameters_, result_type_) , name(name_) {} @@ -1472,12 +1472,9 @@ struct WindowFunctionRank final : public WindowFunction { WindowFunctionRank(const std::string & name_, const DataTypes & argument_types_, const Array & parameters_) - : WindowFunction(name_, argument_types_, parameters_) + : WindowFunction(name_, argument_types_, parameters_, std::make_shared()) {} - DataTypePtr getReturnType() const override - { return std::make_shared(); } - bool allocatesMemoryInArena() const override { return false; } void windowInsertResultInto(const WindowTransform * transform, @@ -1494,12 +1491,9 @@ struct WindowFunctionDenseRank final : public WindowFunction { WindowFunctionDenseRank(const std::string & name_, const DataTypes & argument_types_, const Array & parameters_) - : WindowFunction(name_, argument_types_, parameters_) + : WindowFunction(name_, argument_types_, parameters_, std::make_shared()) {} - DataTypePtr getReturnType() const override - { return std::make_shared(); } - bool allocatesMemoryInArena() const override { return false; } void windowInsertResultInto(const WindowTransform * transform, @@ -1560,8 +1554,8 @@ template struct StatefulWindowFunction : public WindowFunction { StatefulWindowFunction(const std::string & name_, - const DataTypes & argument_types_, const Array & parameters_) - : WindowFunction(name_, argument_types_, parameters_) + const DataTypes & argument_types_, const Array & parameters_, const DataTypePtr & result_type_) + : WindowFunction(name_, argument_types_, parameters_, result_type_) { } @@ -1607,7 +1601,7 @@ struct WindowFunctionExponentialTimeDecayedSum final : public StatefulWindowFunc WindowFunctionExponentialTimeDecayedSum(const std::string & name_, const DataTypes & argument_types_, const Array & parameters_) - : StatefulWindowFunction(name_, argument_types_, parameters_) + : StatefulWindowFunction(name_, argument_types_, parameters_, std::make_shared()) { if (parameters_.size() != 1) { @@ -1639,11 +1633,6 @@ struct WindowFunctionExponentialTimeDecayedSum final : public StatefulWindowFunc } } - DataTypePtr getReturnType() const override - { - return std::make_shared(); - } - bool allocatesMemoryInArena() const override { return false; } void windowInsertResultInto(const WindowTransform * transform, @@ -1705,7 +1694,7 @@ struct WindowFunctionExponentialTimeDecayedMax final : public WindowFunction WindowFunctionExponentialTimeDecayedMax(const std::string & name_, const DataTypes & argument_types_, const Array & parameters_) - : WindowFunction(name_, argument_types_, parameters_) + : WindowFunction(name_, argument_types_, parameters_, std::make_shared()) { if (parameters_.size() != 1) { @@ -1737,11 +1726,6 @@ struct WindowFunctionExponentialTimeDecayedMax final : public WindowFunction } } - DataTypePtr getReturnType() const override - { - return std::make_shared(); - } - bool allocatesMemoryInArena() const override { return false; } void windowInsertResultInto(const WindowTransform * transform, @@ -1781,7 +1765,7 @@ struct WindowFunctionExponentialTimeDecayedCount final : public StatefulWindowFu WindowFunctionExponentialTimeDecayedCount(const std::string & name_, const DataTypes & argument_types_, const Array & parameters_) - : StatefulWindowFunction(name_, argument_types_, parameters_) + : StatefulWindowFunction(name_, argument_types_, parameters_, std::make_shared()) { if (parameters_.size() != 1) { @@ -1805,11 +1789,6 @@ struct WindowFunctionExponentialTimeDecayedCount final : public StatefulWindowFu } } - DataTypePtr getReturnType() const override - { - return std::make_shared(); - } - bool allocatesMemoryInArena() const override { return false; } void windowInsertResultInto(const WindowTransform * transform, @@ -1868,7 +1847,7 @@ struct WindowFunctionExponentialTimeDecayedAvg final : public StatefulWindowFunc WindowFunctionExponentialTimeDecayedAvg(const std::string & name_, const DataTypes & argument_types_, const Array & parameters_) - : StatefulWindowFunction(name_, argument_types_, parameters_) + : StatefulWindowFunction(name_, argument_types_, parameters_, std::make_shared()) { if (parameters_.size() != 1) { @@ -1900,11 +1879,6 @@ struct WindowFunctionExponentialTimeDecayedAvg final : public StatefulWindowFunc } } - DataTypePtr getReturnType() const override - { - return std::make_shared(); - } - bool allocatesMemoryInArena() const override { return false; } void windowInsertResultInto(const WindowTransform * transform, @@ -1980,12 +1954,9 @@ struct WindowFunctionRowNumber final : public WindowFunction { WindowFunctionRowNumber(const std::string & name_, const DataTypes & argument_types_, const Array & parameters_) - : WindowFunction(name_, argument_types_, parameters_) + : WindowFunction(name_, argument_types_, parameters_, std::make_shared()) {} - DataTypePtr getReturnType() const override - { return std::make_shared(); } - bool allocatesMemoryInArena() const override { return false; } void windowInsertResultInto(const WindowTransform * transform, @@ -2004,7 +1975,7 @@ struct WindowFunctionLagLeadInFrame final : public WindowFunction { WindowFunctionLagLeadInFrame(const std::string & name_, const DataTypes & argument_types_, const Array & parameters_) - : WindowFunction(name_, argument_types_, parameters_) + : WindowFunction(name_, argument_types_, parameters_, createResultType(argument_types_, name_)) { if (!parameters.empty()) { @@ -2012,12 +1983,6 @@ struct WindowFunctionLagLeadInFrame final : public WindowFunction "Function {} cannot be parameterized", name_); } - if (argument_types.empty()) - { - throw Exception(ErrorCodes::BAD_ARGUMENTS, - "Function {} takes at least one argument", name_); - } - if (argument_types.size() == 1) { return; @@ -2060,7 +2025,16 @@ struct WindowFunctionLagLeadInFrame final : public WindowFunction } } - DataTypePtr getReturnType() const override { return argument_types[0]; } + static DataTypePtr createResultType(const DataTypes & argument_types_, const std::string & name_) + { + if (argument_types_.empty()) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Function {} takes at least one argument", name_); + } + + return argument_types_[0]; + } bool allocatesMemoryInArena() const override { return false; } @@ -2125,7 +2099,7 @@ struct WindowFunctionNthValue final : public WindowFunction { WindowFunctionNthValue(const std::string & name_, const DataTypes & argument_types_, const Array & parameters_) - : WindowFunction(name_, argument_types_, parameters_) + : WindowFunction(name_, argument_types_, parameters_, createResultType(name_, argument_types_)) { if (!parameters.empty()) { @@ -2133,12 +2107,6 @@ struct WindowFunctionNthValue final : public WindowFunction "Function {} cannot be parameterized", name_); } - if (argument_types.size() != 2) - { - throw Exception(ErrorCodes::BAD_ARGUMENTS, - "Function {} takes exactly two arguments", name_); - } - if (!isInt64OrUInt64FieldType(argument_types[1]->getDefault().getType())) { throw Exception(ErrorCodes::BAD_ARGUMENTS, @@ -2147,7 +2115,16 @@ struct WindowFunctionNthValue final : public WindowFunction } } - DataTypePtr getReturnType() const override { return argument_types[0]; } + static DataTypePtr createResultType(const std::string & name_, const DataTypes & argument_types_) + { + if (argument_types_.size() != 2) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Function {} takes exactly two arguments", name_); + } + + return argument_types_[0]; + } bool allocatesMemoryInArena() const override { return false; } @@ -2204,7 +2181,7 @@ struct WindowFunctionNonNegativeDerivative final : public StatefulWindowFunction WindowFunctionNonNegativeDerivative(const std::string & name_, const DataTypes & argument_types_, const Array & parameters_) - : StatefulWindowFunction(name_, argument_types_, parameters_) + : StatefulWindowFunction(name_, argument_types_, parameters_, std::make_shared()) { if (!parameters.empty()) { @@ -2263,9 +2240,6 @@ struct WindowFunctionNonNegativeDerivative final : public StatefulWindowFunction } } - - DataTypePtr getReturnType() const override { return std::make_shared(); } - bool allocatesMemoryInArena() const override { return false; } void windowInsertResultInto(const WindowTransform * transform, @@ -2339,7 +2313,8 @@ void registerWindowFunctions(AggregateFunctionFactory & factory) .returns_default_when_only_null = true, // This probably doesn't make any difference for window functions because // it is an Aggregator-specific setting. - .is_order_dependent = true }; + .is_order_dependent = true, + .is_window_function = true}; factory.registerFunction("rank", {[](const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings *) diff --git a/src/Processors/examples/comma_separated_streams.cpp b/src/Processors/examples/comma_separated_streams.cpp index a8c925354d9..2ec5564f346 100644 --- a/src/Processors/examples/comma_separated_streams.cpp +++ b/src/Processors/examples/comma_separated_streams.cpp @@ -102,8 +102,7 @@ try auto pipeline = QueryPipeline(std::move(input_format)); auto reader = std::make_unique(pipeline); - RowOutputFormatParams out_params; - OutputFormatPtr output_format = std::make_shared(out_buf, sample, true, true, out_params, format_settings); + OutputFormatPtr output_format = std::make_shared(out_buf, sample, true, true, format_settings); Block res; while (reader->pull(res)) { diff --git a/src/QueryPipeline/QueryPipelineBuilder.cpp b/src/QueryPipeline/QueryPipelineBuilder.cpp index 626296834a2..a3b3438306e 100644 --- a/src/QueryPipeline/QueryPipelineBuilder.cpp +++ b/src/QueryPipeline/QueryPipelineBuilder.cpp @@ -309,6 +309,7 @@ QueryPipelineBuilder QueryPipelineBuilder::unitePipelines( pipeline.limitMaxThreads(max_threads_limit); } + pipeline.setCollectedProcessors(nullptr); return pipeline; } @@ -384,11 +385,7 @@ std::unique_ptr QueryPipelineBuilder::joinPipelinesRightLe /// Collect the NEW processors for the right pipeline. QueryPipelineProcessorsCollector collector(*right); /// Remember the last step of the right pipeline. - ExpressionStep * step = typeid_cast(right->pipe.processors->back()->getQueryPlanStep()); - if (!step) - { - throw Exception(ErrorCodes::LOGICAL_ERROR, "The top step of the right pipeline should be ExpressionStep"); - } + IQueryPlanStep * step = right->pipe.processors->back()->getQueryPlanStep(); /// In case joined subquery has totals, and we don't, add default chunk to totals. bool default_totals = false; @@ -560,7 +557,8 @@ std::unique_ptr QueryPipelineBuilder::joinPipelinesRightLe /// Move the collected processors to the last step in the right pipeline. Processors processors = collector.detachProcessors(); - step->appendExtraProcessors(processors); + if (step) + step->appendExtraProcessors(processors); left->pipe.processors->insert(left->pipe.processors->end(), right->pipe.processors->begin(), right->pipe.processors->end()); left->resources = std::move(right->resources); diff --git a/src/QueryPipeline/RemoteQueryExecutor.cpp b/src/QueryPipeline/RemoteQueryExecutor.cpp index e42b0141a27..49bc6a6326d 100644 --- a/src/QueryPipeline/RemoteQueryExecutor.cpp +++ b/src/QueryPipeline/RemoteQueryExecutor.cpp @@ -375,7 +375,9 @@ std::optional RemoteQueryExecutor::processPacket(Packet packet) got_duplicated_part_uuids = true; break; case Protocol::Server::Data: - /// If the block is not empty and is not a header block + /// Note: `packet.block.rows() > 0` means it's a header block. + /// We can actually return it, and the first call to RemoteQueryExecutor::read + /// will return earlier. We should consider doing it. if (packet.block && (packet.block.rows() > 0)) return adaptBlockStructure(packet.block, header); break; /// If the block is empty - we will receive other packets before EndOfStream. @@ -412,10 +414,14 @@ std::optional RemoteQueryExecutor::processPacket(Packet packet) case Protocol::Server::Totals: totals = packet.block; + if (totals) + totals = adaptBlockStructure(totals, header); break; case Protocol::Server::Extremes: extremes = packet.block; + if (extremes) + extremes = adaptBlockStructure(packet.block, header); break; case Protocol::Server::Log: diff --git a/src/Storages/AlterCommands.cpp b/src/Storages/AlterCommands.cpp index c2c12da6077..f39f830bcc0 100644 --- a/src/Storages/AlterCommands.cpp +++ b/src/Storages/AlterCommands.cpp @@ -572,20 +572,17 @@ void AlterCommand::apply(StorageInMemoryMetadata & metadata, ContextPtr context) ErrorCodes::ILLEGAL_COLUMN); } - auto insert_it = constraints.end(); + auto * insert_it = constraints.end(); constraints.emplace(insert_it, constraint_decl); metadata.constraints = ConstraintsDescription(constraints); } else if (type == DROP_CONSTRAINT) { auto constraints = metadata.constraints.getConstraints(); - auto erase_it = std::find_if( - constraints.begin(), - constraints.end(), - [this](const ASTPtr & constraint_ast) - { - return constraint_ast->as().name == constraint_name; - }); + auto * erase_it = std::find_if( + constraints.begin(), + constraints.end(), + [this](const ASTPtr & constraint_ast) { return constraint_ast->as().name == constraint_name; }); if (erase_it == constraints.end()) { @@ -1355,12 +1352,20 @@ static MutationCommand createMaterializeTTLCommand() return command; } -MutationCommands AlterCommands::getMutationCommands(StorageInMemoryMetadata metadata, bool materialize_ttl, ContextPtr context) const +MutationCommands AlterCommands::getMutationCommands(StorageInMemoryMetadata metadata, bool materialize_ttl, ContextPtr context, bool with_alters) const { MutationCommands result; for (const auto & alter_cmd : *this) + { if (auto mutation_cmd = alter_cmd.tryConvertToMutationCommand(metadata, context); mutation_cmd) + { result.push_back(*mutation_cmd); + } + else if (with_alters) + { + result.push_back(MutationCommand{.ast = alter_cmd.ast->clone(), .type = MutationCommand::Type::ALTER_WITHOUT_MUTATION}); + } + } if (materialize_ttl) { diff --git a/src/Storages/AlterCommands.h b/src/Storages/AlterCommands.h index 71c622cb9be..c91c82e9c7a 100644 --- a/src/Storages/AlterCommands.h +++ b/src/Storages/AlterCommands.h @@ -209,7 +209,7 @@ public: /// alter. If alter can be performed as pure metadata update, than result is /// empty. If some TTL changes happened than, depending on materialize_ttl /// additional mutation command (MATERIALIZE_TTL) will be returned. - MutationCommands getMutationCommands(StorageInMemoryMetadata metadata, bool materialize_ttl, ContextPtr context) const; + MutationCommands getMutationCommands(StorageInMemoryMetadata metadata, bool materialize_ttl, ContextPtr context, bool with_alters=false) const; }; } diff --git a/src/Storages/ColumnDefault.h b/src/Storages/ColumnDefault.h index 096a1f177ab..af1be6f3bec 100644 --- a/src/Storages/ColumnDefault.h +++ b/src/Storages/ColumnDefault.h @@ -26,6 +26,7 @@ struct ColumnDefault { ColumnDefaultKind kind = ColumnDefaultKind::Default; ASTPtr expression; + bool ephemeral_default = false; }; diff --git a/src/Storages/ColumnsDescription.cpp b/src/Storages/ColumnsDescription.cpp index 5fa267a964b..0fdb21e064f 100644 --- a/src/Storages/ColumnsDescription.cpp +++ b/src/Storages/ColumnsDescription.cpp @@ -123,6 +123,7 @@ void ColumnDescription::readText(ReadBuffer & buf) { default_desc.kind = columnDefaultKindFromString(col_ast->default_specifier); default_desc.expression = std::move(col_ast->default_expression); + default_desc.ephemeral_default = col_ast->ephemeral_default; } if (col_ast->comment) diff --git a/src/Storages/ConstraintsDescription.cpp b/src/Storages/ConstraintsDescription.cpp index f73a148ad07..96037b46e52 100644 --- a/src/Storages/ConstraintsDescription.cpp +++ b/src/Storages/ConstraintsDescription.cpp @@ -107,7 +107,7 @@ std::unique_ptr ConstraintsDescription::buildGraph() const { static const NameSet relations = { "equals", "less", "lessOrEquals", "greaterOrEquals", "greater" }; - std::vector constraints_for_graph; + ASTs constraints_for_graph; auto atomic_formulas = getAtomicConstraintData(); for (const auto & atomic_formula : atomic_formulas) { @@ -153,7 +153,7 @@ const std::vector> & ConstraintsDescription return cnf_constraints; } -const std::vector & ConstraintsDescription::getConstraints() const +const ASTs & ConstraintsDescription::getConstraints() const { return constraints; } @@ -218,7 +218,7 @@ void ConstraintsDescription::update() { cnf_constraints.clear(); ast_to_atom_ids.clear(); - graph = std::make_unique(std::vector()); + graph = std::make_unique(ASTs()); return; } diff --git a/src/Storages/FileLog/StorageFileLog.cpp b/src/Storages/FileLog/StorageFileLog.cpp index 64b82eb4000..94d5f7441ec 100644 --- a/src/Storages/FileLog/StorageFileLog.cpp +++ b/src/Storages/FileLog/StorageFileLog.cpp @@ -72,6 +72,21 @@ StorageFileLog::StorageFileLog( storage_metadata.setComment(comment); setInMemoryMetadata(storage_metadata); + if (!fileOrSymlinkPathStartsWith(path, getContext()->getUserFilesPath())) + { + if (attach) + { + LOG_ERROR(log, "The absolute data path should be inside `user_files_path`({})", getContext()->getUserFilesPath()); + return; + } + else + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "The absolute data path should be inside `user_files_path`({})", + getContext()->getUserFilesPath()); + } + + bool created_metadata_directory = false; try { if (!attach) @@ -84,6 +99,7 @@ StorageFileLog::StorageFileLog( metadata_base_path); } disk->createDirectories(metadata_base_path); + created_metadata_directory = true; } loadMetaFiles(attach); @@ -101,7 +117,12 @@ StorageFileLog::StorageFileLog( catch (...) { if (!attach) + { + if (created_metadata_directory) + disk->removeRecursive(metadata_base_path); throw; + } + tryLogCurrentException(__PRETTY_FUNCTION__); } } @@ -124,12 +145,6 @@ void StorageFileLog::loadMetaFiles(bool attach) void StorageFileLog::loadFiles() { - if (!fileOrSymlinkPathStartsWith(path, getContext()->getUserFilesPath())) - { - throw Exception( - ErrorCodes::BAD_ARGUMENTS, "The absolute data path should be inside `user_files_path`({})", getContext()->getUserFilesPath()); - } - auto absolute_path = std::filesystem::absolute(path); absolute_path = absolute_path.lexically_normal(); /// Normalize path. @@ -372,43 +387,26 @@ void StorageFileLog::drop() void StorageFileLog::startup() { - try - { - if (task) - { - task->holder->activateAndSchedule(); - } - } - catch (...) - { - tryLogCurrentException(__PRETTY_FUNCTION__); - } + if (task) + task->holder->activateAndSchedule(); } void StorageFileLog::shutdown() { - try + if (task) { - if (task) - { - task->stream_cancelled = true; + task->stream_cancelled = true; - /// Reader thread may wait for wake up - wakeUp(); + /// Reader thread may wait for wake up + wakeUp(); - LOG_TRACE(log, "Waiting for cleanup"); - task->holder->deactivate(); - } + LOG_TRACE(log, "Waiting for cleanup"); + task->holder->deactivate(); /// If no reading call and threadFunc, the log files will never /// be opened, also just leave the work of close files and /// store meta to streams. because if we close files in here, /// may result in data race with unfinishing reading pipeline } - catch (...) - { - tryLogCurrentException(__PRETTY_FUNCTION__); - task->holder->deactivate(); - } } void StorageFileLog::assertStreamGood(const std::ifstream & reader) diff --git a/src/Storages/IMessageProducer.cpp b/src/Storages/IMessageProducer.cpp new file mode 100644 index 00000000000..8a3b41de125 --- /dev/null +++ b/src/Storages/IMessageProducer.cpp @@ -0,0 +1,30 @@ +#include +#include + +namespace DB +{ + +void AsynchronousMessageProducer::start(const ContextPtr & context) +{ + initialize(); + producing_task = context->getSchedulePool().createTask(getProducingTaskName(), [this] + { + startProducingTaskLoop(); + }); + producing_task->activateAndSchedule(); +} + +void AsynchronousMessageProducer::finish() +{ + /// We should execute finish logic only once. + if (finished.exchange(true)) + return; + + stopProducingTask(); + /// Deactivate producing task and wait until it's finished. + producing_task->deactivate(); + finishImpl(); +} + + +} diff --git a/src/Storages/IMessageProducer.h b/src/Storages/IMessageProducer.h new file mode 100644 index 00000000000..4a7a99edb7d --- /dev/null +++ b/src/Storages/IMessageProducer.h @@ -0,0 +1,64 @@ +#pragma once + +#pragma once + +#include +#include +#include + +namespace DB +{ + +/// Interface for producing messages in streaming storages. +/// It's used in MessageQueueSink. +class IMessageProducer +{ +public: + /// Do some preparations. + virtual void start(const ContextPtr & context) = 0; + + /// Produce single message. + virtual void produce(const String & message, size_t rows_in_message, const Columns & columns, size_t last_row) = 0; + + /// Finalize producer. + virtual void finish() = 0; + + virtual ~IMessageProducer() = default; +}; + +/// Implements interface for concurrent message producing. +class AsynchronousMessageProducer : public IMessageProducer +{ +public: + /// Create and schedule task in BackgroundSchedulePool that will produce messages. + void start(const ContextPtr & context) override; + + /// Stop producing task, wait for ot to finish and finalize. + void finish() override; + + /// In this method producer should not do any hard work and send message + /// to producing task, for example, by using ConcurrentBoundedQueue. + void produce(const String & message, size_t rows_in_message, const Columns & columns, size_t last_row) override = 0; + +protected: + /// Do some initialization before scheduling producing task. + virtual void initialize() {} + /// Tell producer to finish all work and stop producing task + virtual void stopProducingTask() = 0; + /// Do some finalization after producing task is stopped. + virtual void finishImpl() {} + + virtual String getProducingTaskName() const = 0; + /// Method that is called inside producing task, all producing work should be done here. + virtual void startProducingTaskLoop() = 0; + +private: + /// Flag, indicated that finish() method was called. + /// It's used to prevent doing finish logic more than once. + std::atomic finished = false; + + BackgroundSchedulePool::TaskHolder producing_task; +}; + + +} diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h index a0585e9c9a1..2c4ea91869c 100644 --- a/src/Storages/IStorage.h +++ b/src/Storages/IStorage.h @@ -241,6 +241,10 @@ public: /// Return true if storage can execute lightweight delete mutations. virtual bool supportsLightweightDelete() const { return false; } + /// Return true if storage can execute 'DELETE FROM' mutations. This is different from lightweight delete + /// because those are internally translated into 'ALTER UDPATE' mutations. + virtual bool supportsDelete() const { return false; } + private: StorageID storage_id; diff --git a/src/Storages/Kafka/Buffer_fwd.h b/src/Storages/Kafka/Buffer_fwd.h deleted file mode 100644 index 89e72454aa5..00000000000 --- a/src/Storages/Kafka/Buffer_fwd.h +++ /dev/null @@ -1,14 +0,0 @@ -#pragma once - -#include - -namespace DB -{ - -class ReadBufferFromKafkaConsumer; -class WriteBufferToKafkaProducer; - -using ConsumerBufferPtr = std::shared_ptr; -using ProducerBufferPtr = std::shared_ptr; - -} diff --git a/src/Storages/Kafka/KafkaBlockSink.cpp b/src/Storages/Kafka/KafkaBlockSink.cpp deleted file mode 100644 index 2c7cfffb9a1..00000000000 --- a/src/Storages/Kafka/KafkaBlockSink.cpp +++ /dev/null @@ -1,52 +0,0 @@ -#include - -#include -#include -#include - -namespace DB -{ - -KafkaSink::KafkaSink( - StorageKafka & storage_, - const StorageMetadataPtr & metadata_snapshot_, - const ContextPtr & context_) - : SinkToStorage(metadata_snapshot_->getSampleBlockNonMaterialized()) - , storage(storage_) - , metadata_snapshot(metadata_snapshot_) - , context(context_) -{ -} - -void KafkaSink::onStart() -{ - buffer = storage.createWriteBuffer(getHeader()); - - auto format_settings = getFormatSettings(context); - format_settings.protobuf.allow_multiple_rows_without_delimiter = true; - - format = FormatFactory::instance().getOutputFormat(storage.getFormatName(), *buffer, - getHeader(), context, - [this](const Columns & columns, size_t row) - { - buffer->countRow(columns, row); - }, - format_settings); -} - -void KafkaSink::consume(Chunk chunk) -{ - format->write(getHeader().cloneWithColumns(chunk.detachColumns())); -} - -void KafkaSink::onFinish() -{ - if (format) - format->finalize(); - //flush(); - - if (buffer) - buffer->flush(); -} - -} diff --git a/src/Storages/Kafka/ReadBufferFromKafkaConsumer.cpp b/src/Storages/Kafka/KafkaConsumer.cpp similarity index 89% rename from src/Storages/Kafka/ReadBufferFromKafkaConsumer.cpp rename to src/Storages/Kafka/KafkaConsumer.cpp index 8c45ce4be38..1cfbd145fb1 100644 --- a/src/Storages/Kafka/ReadBufferFromKafkaConsumer.cpp +++ b/src/Storages/Kafka/KafkaConsumer.cpp @@ -2,7 +2,8 @@ // should be defined before any instantiation #include -#include +#include +#include #include @@ -44,7 +45,7 @@ const std::size_t POLL_TIMEOUT_WO_ASSIGNMENT_MS = 50; const auto DRAIN_TIMEOUT_MS = 5000ms; -ReadBufferFromKafkaConsumer::ReadBufferFromKafkaConsumer( +KafkaConsumer::KafkaConsumer( ConsumerPtr consumer_, Poco::Logger * log_, size_t max_batch_size, @@ -52,8 +53,7 @@ ReadBufferFromKafkaConsumer::ReadBufferFromKafkaConsumer( bool intermediate_commit_, const std::atomic & stopped_, const Names & _topics) - : ReadBuffer(nullptr, 0) - , consumer(consumer_) + : consumer(consumer_) , log(log_) , batch_size(max_batch_size) , poll_timeout(poll_timeout_) @@ -127,7 +127,7 @@ ReadBufferFromKafkaConsumer::ReadBufferFromKafkaConsumer( }); } -ReadBufferFromKafkaConsumer::~ReadBufferFromKafkaConsumer() +KafkaConsumer::~KafkaConsumer() { try { @@ -155,7 +155,7 @@ ReadBufferFromKafkaConsumer::~ReadBufferFromKafkaConsumer() // after unsubscribe, otherwise consumer will hang on destruction // see https://github.com/edenhill/librdkafka/issues/2077 // https://github.com/confluentinc/confluent-kafka-go/issues/189 etc. -void ReadBufferFromKafkaConsumer::drain() +void KafkaConsumer::drain() { auto start_time = std::chrono::steady_clock::now(); cppkafka::Error last_error(RD_KAFKA_RESP_ERR_NO_ERROR); @@ -194,7 +194,7 @@ void ReadBufferFromKafkaConsumer::drain() } -void ReadBufferFromKafkaConsumer::commit() +void KafkaConsumer::commit() { auto print_offsets = [this] (const char * prefix, const cppkafka::TopicPartitionList & offsets) { @@ -279,7 +279,7 @@ void ReadBufferFromKafkaConsumer::commit() offsets_stored = 0; } -void ReadBufferFromKafkaConsumer::subscribe() +void KafkaConsumer::subscribe() { LOG_TRACE(log, "Already subscribed to topics: [{}]", boost::algorithm::join(consumer->get_subscription(), ", ")); @@ -313,22 +313,20 @@ void ReadBufferFromKafkaConsumer::subscribe() } cleanUnprocessed(); - allowed = false; // we can reset any flags (except of CONSUMER_STOPPED) before attempt of reading new block of data if (stalled_status != CONSUMER_STOPPED) stalled_status = NO_MESSAGES_RETURNED; } -void ReadBufferFromKafkaConsumer::cleanUnprocessed() +void KafkaConsumer::cleanUnprocessed() { messages.clear(); current = messages.begin(); - BufferBase::set(nullptr, 0, 0); offsets_stored = 0; } -void ReadBufferFromKafkaConsumer::unsubscribe() +void KafkaConsumer::unsubscribe() { LOG_TRACE(log, "Re-joining claimed consumer after failure"); cleanUnprocessed(); @@ -346,13 +344,13 @@ void ReadBufferFromKafkaConsumer::unsubscribe() } catch (const cppkafka::HandleException & e) { - LOG_ERROR(log, "Exception from ReadBufferFromKafkaConsumer::unsubscribe: {}", e.what()); + LOG_ERROR(log, "Exception from KafkaConsumer::unsubscribe: {}", e.what()); } } -void ReadBufferFromKafkaConsumer::resetToLastCommitted(const char * msg) +void KafkaConsumer::resetToLastCommitted(const char * msg) { if (!assignment.has_value() || assignment->empty()) { @@ -365,18 +363,15 @@ void ReadBufferFromKafkaConsumer::resetToLastCommitted(const char * msg) } // it do the poll when needed -bool ReadBufferFromKafkaConsumer::poll() +ReadBufferPtr KafkaConsumer::consume() { resetIfStopped(); if (polledDataUnusable()) - return false; + return nullptr; if (hasMorePolledMessages()) - { - allowed = true; - return true; - } + return getNextMessage(); if (intermediate_commit) commit(); @@ -401,7 +396,7 @@ bool ReadBufferFromKafkaConsumer::poll() resetIfStopped(); if (stalled_status == CONSUMER_STOPPED) { - return false; + return nullptr; } else if (stalled_status == REBALANCE_HAPPENED) { @@ -412,7 +407,7 @@ bool ReadBufferFromKafkaConsumer::poll() // otherwise we will continue polling from that position resetToLastCommitted("Rewind last poll after rebalance."); } - return false; + return nullptr; } if (new_messages.empty()) @@ -430,18 +425,18 @@ bool ReadBufferFromKafkaConsumer::poll() { LOG_WARNING(log, "Can't get assignment. Will keep trying."); stalled_status = NO_ASSIGNMENT; - return false; + return nullptr; } } else if (assignment->empty()) { LOG_TRACE(log, "Empty assignment."); - return false; + return nullptr; } else { LOG_TRACE(log, "Stalled"); - return false; + return nullptr; } } else @@ -459,17 +454,31 @@ bool ReadBufferFromKafkaConsumer::poll() { LOG_ERROR(log, "Only errors left"); stalled_status = ERRORS_RETURNED; - return false; + return nullptr; } ProfileEvents::increment(ProfileEvents::KafkaMessagesPolled, messages.size()); stalled_status = NOT_STALLED; - allowed = true; - return true; + return getNextMessage(); } -size_t ReadBufferFromKafkaConsumer::filterMessageErrors() +ReadBufferPtr KafkaConsumer::getNextMessage() +{ + if (current == messages.end()) + return nullptr; + + const auto * data = current->get_payload().get_data(); + size_t size = current->get_payload().get_size(); + ++current; + + if (data) + return std::make_shared(data, size); + + return getNextMessage(); +} + +size_t KafkaConsumer::filterMessageErrors() { assert(current == messages.begin()); @@ -494,7 +503,7 @@ size_t ReadBufferFromKafkaConsumer::filterMessageErrors() return skipped; } -void ReadBufferFromKafkaConsumer::resetIfStopped() +void KafkaConsumer::resetIfStopped() { // we can react on stop only during fetching data // after block is formed (i.e. during copying data to MV / committing) we ignore stop attempts @@ -505,29 +514,8 @@ void ReadBufferFromKafkaConsumer::resetIfStopped() } } -/// Do commit messages implicitly after we processed the previous batch. -bool ReadBufferFromKafkaConsumer::nextImpl() -{ - if (!allowed || !hasMorePolledMessages()) - return false; - const auto * message_data = current->get_payload().get_data(); - size_t message_size = current->get_payload().get_size(); - - allowed = false; - ++current; - - /// If message is empty, return end of stream. - if (message_data == nullptr) - return false; - - /// const_cast is needed, because ReadBuffer works with non-const char *. - auto * new_position = reinterpret_cast(const_cast(message_data)); - BufferBase::set(new_position, message_size, 0); - return true; -} - -void ReadBufferFromKafkaConsumer::storeLastReadMessageOffset() +void KafkaConsumer::storeLastReadMessageOffset() { if (!isStalled()) { diff --git a/src/Storages/Kafka/ReadBufferFromKafkaConsumer.h b/src/Storages/Kafka/KafkaConsumer.h similarity index 86% rename from src/Storages/Kafka/ReadBufferFromKafkaConsumer.h rename to src/Storages/Kafka/KafkaConsumer.h index f390d1c1330..feda51a682e 100644 --- a/src/Storages/Kafka/ReadBufferFromKafkaConsumer.h +++ b/src/Storages/Kafka/KafkaConsumer.h @@ -22,10 +22,10 @@ namespace DB using ConsumerPtr = std::shared_ptr; -class ReadBufferFromKafkaConsumer : public ReadBuffer +class KafkaConsumer { public: - ReadBufferFromKafkaConsumer( + KafkaConsumer( ConsumerPtr consumer_, Poco::Logger * log_, size_t max_batch_size, @@ -34,7 +34,8 @@ public: const std::atomic & stopped_, const Names & _topics ); - ~ReadBufferFromKafkaConsumer() override; + + ~KafkaConsumer(); void commit(); // Commit all processed messages. void subscribe(); // Subscribe internal consumer to topics. void unsubscribe(); // Unsubscribe internal consumer in case of failure. @@ -56,11 +57,9 @@ public: void storeLastReadMessageOffset(); void resetToLastCommitted(const char * msg); - // Polls batch of messages from Kafka or allows to read consecutive message by nextImpl - // returns true if there are some messages to process - // return false and sets stalled to false if there are no messages to process. - // additionally sets - bool poll(); + /// Polls batch of messages from Kafka and returns read buffer containing the next message or + /// nullptr when there are no messages to process. + ReadBufferPtr consume(); // Return values for the message that's being read. String currentTopic() const { return current[-1].get_topic(); } @@ -94,7 +93,6 @@ private: StalledStatus stalled_status = NO_MESSAGES_RETURNED; bool intermediate_commit = true; - bool allowed = true; size_t waited_for_assignment = 0; const std::atomic & stopped; @@ -112,8 +110,7 @@ private: void resetIfStopped(); /// Return number of messages with an error. size_t filterMessageErrors(); - - bool nextImpl() override; + ReadBufferPtr getNextMessage(); }; } diff --git a/src/Storages/Kafka/KafkaProducer.cpp b/src/Storages/Kafka/KafkaProducer.cpp new file mode 100644 index 00000000000..1d9154bdc58 --- /dev/null +++ b/src/Storages/Kafka/KafkaProducer.cpp @@ -0,0 +1,106 @@ +#include "KafkaProducer.h" +#include "Core/Block.h" +#include "Columns/ColumnString.h" +#include "Columns/ColumnsNumber.h" + +#include + +namespace ProfileEvents +{ + extern const Event KafkaRowsWritten; + extern const Event KafkaProducerFlushes; + extern const Event KafkaMessagesProduced; + extern const Event KafkaProducerErrors; +} + +namespace DB +{ + +KafkaProducer::KafkaProducer( + ProducerPtr producer_, const std::string & topic_, std::chrono::milliseconds poll_timeout, std::atomic & shutdown_called_, const Block & header) + : producer(producer_), topic(topic_), timeout(poll_timeout), shutdown_called(shutdown_called_) +{ + if (header.has("_key")) + { + auto column_index = header.getPositionByName("_key"); + auto column_info = header.getByPosition(column_index); + if (isString(column_info.type)) + key_column_index = column_index; + // else ? (not sure it's a good place to report smth to user) + } + + if (header.has("_timestamp")) + { + auto column_index = header.getPositionByName("_timestamp"); + auto column_info = header.getByPosition(column_index); + if (isDateTime(column_info.type)) + timestamp_column_index = column_index; + } +} + +void KafkaProducer::produce(const String & message, size_t rows_in_message, const Columns & columns, size_t last_row) +{ + ProfileEvents::increment(ProfileEvents::KafkaRowsWritten, rows_in_message); + cppkafka::MessageBuilder builder(topic); + builder.payload(message); + + // Note: if it will be few rows per message - it will take the value from last row of block + if (key_column_index) + { + const auto & key_column = assert_cast(*columns[key_column_index.value()]); + const auto key_data = key_column.getDataAt(last_row).toString(); + builder.key(cppkafka::Buffer(key_data.data(), key_data.size())); + } + + if (timestamp_column_index) + { + const auto & timestamp_column = assert_cast(*columns[timestamp_column_index.value()]); + const auto timestamp = std::chrono::seconds{timestamp_column.getElement(last_row)}; + builder.timestamp(timestamp); + } + + while (!shutdown_called) + { + try + { + producer->produce(builder); + } + catch (cppkafka::HandleException & e) + { + if (e.get_error() == RD_KAFKA_RESP_ERR__QUEUE_FULL) + { + producer->poll(timeout); + continue; + } + ProfileEvents::increment(ProfileEvents::KafkaProducerErrors); + throw; + } + ProfileEvents::increment(ProfileEvents::KafkaMessagesProduced); + break; + } +} + +void KafkaProducer::finish() +{ + // For unknown reason we may hit some internal timeout when inserting for the first time. + while (true) + { + try + { + producer->flush(timeout); + } + catch (cppkafka::HandleException & e) + { + if (e.get_error() == RD_KAFKA_RESP_ERR__TIMED_OUT) + continue; + + ProfileEvents::increment(ProfileEvents::KafkaProducerErrors); + throw; + } + + ProfileEvents::increment(ProfileEvents::KafkaProducerFlushes); + break; + } +} + +} diff --git a/src/Storages/Kafka/WriteBufferToKafkaProducer.h b/src/Storages/Kafka/KafkaProducer.h similarity index 56% rename from src/Storages/Kafka/WriteBufferToKafkaProducer.h rename to src/Storages/Kafka/KafkaProducer.h index 64b06571f0a..2e4bbaf7658 100644 --- a/src/Storages/Kafka/WriteBufferToKafkaProducer.h +++ b/src/Storages/Kafka/KafkaProducer.h @@ -1,8 +1,7 @@ #pragma once -#include #include - +#include #include #include @@ -14,43 +13,35 @@ namespace CurrentMetrics extern const Metric KafkaProducers; } - namespace DB { class Block; using ProducerPtr = std::shared_ptr; -class WriteBufferToKafkaProducer : public WriteBuffer +class KafkaProducer : public IMessageProducer { public: - WriteBufferToKafkaProducer( + KafkaProducer( ProducerPtr producer_, const std::string & topic_, - std::optional delimiter, - size_t rows_per_message, - size_t chunk_size_, std::chrono::milliseconds poll_timeout, + std::atomic & shutdown_called_, const Block & header); - ~WriteBufferToKafkaProducer() override; - void countRow(const Columns & columns, size_t row); - void flush(); + void produce(const String & message, size_t rows_in_message, const Columns & columns, size_t last_row) override; + + void start(const ContextPtr &) override {} + void finish() override; private: - void nextImpl() override; - void addChunk(); - void reinitializeChunks(); CurrentMetrics::Increment metric_increment{CurrentMetrics::KafkaProducers}; ProducerPtr producer; const std::string topic; - const std::optional delim; - const size_t max_rows; - const size_t chunk_size; const std::chrono::milliseconds timeout; - size_t rows = 0; - std::list chunks; + std::atomic & shutdown_called; + std::optional key_column_index; std::optional timestamp_column_index; }; diff --git a/src/Storages/Kafka/KafkaSettings.h b/src/Storages/Kafka/KafkaSettings.h index e0da8031dd8..ae4b92a08e1 100644 --- a/src/Storages/Kafka/KafkaSettings.h +++ b/src/Storages/Kafka/KafkaSettings.h @@ -13,25 +13,25 @@ class ASTStorage; M(String, kafka_broker_list, "", "A comma-separated list of brokers for Kafka engine.", 0) \ M(String, kafka_topic_list, "", "A list of Kafka topics.", 0) \ M(String, kafka_group_name, "", "Client group id string. All Kafka consumers sharing the same group.id belong to the same group.", 0) \ - M(String, kafka_client_id, "", "Client identifier.", 0) \ - M(UInt64, kafka_num_consumers, 1, "The number of consumers per table for Kafka engine.", 0) \ - M(Bool, kafka_commit_every_batch, false, "Commit every consumed and handled batch instead of a single commit after writing a whole block", 0) \ - /* default is stream_poll_timeout_ms */ \ - M(Milliseconds, kafka_poll_timeout_ms, 0, "Timeout for single poll from Kafka.", 0) \ - /* default is min(max_block_size, kafka_max_block_size)*/ \ - M(UInt64, kafka_poll_max_batch_size, 0, "Maximum amount of messages to be polled in a single Kafka poll.", 0) \ - /* default is = max_insert_block_size / kafka_num_consumers */ \ - M(UInt64, kafka_max_block_size, 0, "Number of row collected by poll(s) for flushing data from Kafka.", 0) \ - /* default is stream_flush_interval_ms */ \ - M(Milliseconds, kafka_flush_interval_ms, 0, "Timeout for flushing data from Kafka.", 0) \ /* those are mapped to format factory settings */ \ M(String, kafka_format, "", "The message format for Kafka engine.", 0) \ M(Char, kafka_row_delimiter, '\0', "The character to be considered as a delimiter in Kafka message.", 0) \ M(String, kafka_schema, "", "Schema identifier (used by schema-based formats) for Kafka engine", 0) \ + M(UInt64, kafka_num_consumers, 1, "The number of consumers per table for Kafka engine.", 0) \ + /* default is = max_insert_block_size / kafka_num_consumers */ \ + M(UInt64, kafka_max_block_size, 0, "Number of row collected by poll(s) for flushing data from Kafka.", 0) \ M(UInt64, kafka_skip_broken_messages, 0, "Skip at least this number of broken messages from Kafka topic per block", 0) \ + M(Bool, kafka_commit_every_batch, false, "Commit every consumed and handled batch instead of a single commit after writing a whole block", 0) \ + M(String, kafka_client_id, "", "Client identifier.", 0) \ + /* default is stream_poll_timeout_ms */ \ + M(Milliseconds, kafka_poll_timeout_ms, 0, "Timeout for single poll from Kafka.", 0) \ + M(UInt64, kafka_poll_max_batch_size, 0, "Maximum amount of messages to be polled in a single Kafka poll.", 0) \ + /* default is stream_flush_interval_ms */ \ + M(Milliseconds, kafka_flush_interval_ms, 0, "Timeout for flushing data from Kafka.", 0) \ M(Bool, kafka_thread_per_consumer, false, "Provide independent thread for each consumer", 0) \ M(HandleKafkaErrorMode, kafka_handle_error_mode, HandleKafkaErrorMode::DEFAULT, "How to handle errors for Kafka engine. Possible values: default, stream.", 0) \ M(Bool, kafka_commit_on_select, false, "Commit messages when select query is made", 0) \ + M(UInt64, kafka_max_rows_per_message, 1, "The maximum number of rows produced in one kafka message for row-based formats.", 0) \ /** TODO: */ /* https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md */ diff --git a/src/Storages/Kafka/KafkaSink.h b/src/Storages/Kafka/KafkaSink.h deleted file mode 100644 index 8e0801c7b5e..00000000000 --- a/src/Storages/Kafka/KafkaSink.h +++ /dev/null @@ -1,35 +0,0 @@ -#pragma once - -#include -#include - -namespace DB -{ - -class IOutputFormat; -using IOutputFormatPtr = std::shared_ptr; - -class KafkaSink : public SinkToStorage -{ -public: - explicit KafkaSink( - StorageKafka & storage_, - const StorageMetadataPtr & metadata_snapshot_, - const ContextPtr & context_); - - void consume(Chunk chunk) override; - void onStart() override; - void onFinish() override; - String getName() const override { return "KafkaSink"; } - - ///void flush() override; - -private: - StorageKafka & storage; - StorageMetadataPtr metadata_snapshot; - const ContextPtr context; - ProducerBufferPtr buffer; - IOutputFormatPtr format; -}; - -} diff --git a/src/Storages/Kafka/KafkaSource.cpp b/src/Storages/Kafka/KafkaSource.cpp index 38b404fe79e..db528adec79 100644 --- a/src/Storages/Kafka/KafkaSource.cpp +++ b/src/Storages/Kafka/KafkaSource.cpp @@ -1,7 +1,8 @@ #include #include -#include +#include +#include #include #include #include @@ -51,13 +52,13 @@ KafkaSource::KafkaSource( KafkaSource::~KafkaSource() { - if (!buffer) + if (!consumer) return; if (broken) - buffer->unsubscribe(); + consumer->unsubscribe(); - storage.pushReadBuffer(buffer); + storage.pushConsumer(consumer); } bool KafkaSource::checkTimeLimit() const @@ -75,15 +76,15 @@ bool KafkaSource::checkTimeLimit() const Chunk KafkaSource::generateImpl() { - if (!buffer) + if (!consumer) { auto timeout = std::chrono::milliseconds(context->getSettingsRef().kafka_max_wait_ms.totalMilliseconds()); - buffer = storage.popReadBuffer(timeout); + consumer = storage.popConsumer(timeout); - if (!buffer) + if (!consumer) return {}; - buffer->subscribe(); + consumer->subscribe(); broken = true; } @@ -99,8 +100,9 @@ Chunk KafkaSource::generateImpl() auto put_error_to_stream = handle_error_mode == HandleKafkaErrorMode::STREAM; + EmptyReadBuffer empty_buf; auto input_format = FormatFactory::instance().getInputFormat( - storage.getFormatName(), *buffer, non_virtual_header, context, max_block_size); + storage.getFormatName(), empty_buf, non_virtual_header, context, max_block_size); std::optional exception_message; size_t total_rows = 0; @@ -130,7 +132,7 @@ Chunk KafkaSource::generateImpl() else { e.addMessage("while parsing Kafka message (topic: {}, partition: {}, offset: {})'", - buffer->currentTopic(), buffer->currentPartition(), buffer->currentOffset()); + consumer->currentTopic(), consumer->currentPartition(), consumer->currentOffset()); throw; } }; @@ -141,32 +143,31 @@ Chunk KafkaSource::generateImpl() { size_t new_rows = 0; exception_message.reset(); - if (buffer->poll()) + if (auto buf = consumer->consume()) { - // poll provide one message at a time to the input_format ProfileEvents::increment(ProfileEvents::KafkaMessagesRead); - new_rows = executor.execute(); + new_rows = executor.execute(*buf); } if (new_rows) { - // In read_kafka_message(), ReadBufferFromKafkaConsumer::nextImpl() + // In read_kafka_message(), KafkaConsumer::nextImpl() // will be called, that may make something unusable, i.e. clean - // ReadBufferFromKafkaConsumer::messages, which is accessed from - // ReadBufferFromKafkaConsumer::currentTopic() (and other helpers). - if (buffer->isStalled()) + // KafkaConsumer::messages, which is accessed from + // KafkaConsumer::currentTopic() (and other helpers). + if (consumer->isStalled()) throw Exception("Polled messages became unusable", ErrorCodes::LOGICAL_ERROR); ProfileEvents::increment(ProfileEvents::KafkaRowsRead, new_rows); - buffer->storeLastReadMessageOffset(); + consumer->storeLastReadMessageOffset(); - auto topic = buffer->currentTopic(); - auto key = buffer->currentKey(); - auto offset = buffer->currentOffset(); - auto partition = buffer->currentPartition(); - auto timestamp_raw = buffer->currentTimestamp(); - auto header_list = buffer->currentHeaderList(); + auto topic = consumer->currentTopic(); + auto key = consumer->currentKey(); + auto offset = consumer->currentOffset(); + auto partition = consumer->currentPartition(); + auto timestamp_raw = consumer->currentTimestamp(); + auto header_list = consumer->currentHeaderList(); Array headers_names; Array headers_values; @@ -205,7 +206,7 @@ Chunk KafkaSource::generateImpl() { if (exception_message) { - auto payload = buffer->currentPayload(); + auto payload = consumer->currentPayload(); virtual_columns[8]->insert(payload); virtual_columns[9]->insert(*exception_message); } @@ -219,11 +220,11 @@ Chunk KafkaSource::generateImpl() total_rows = total_rows + new_rows; } - else if (buffer->polledDataUnusable()) + else if (consumer->polledDataUnusable()) { break; } - else if (buffer->isStalled()) + else if (consumer->isStalled()) { ++failed_poll_attempts; } @@ -232,11 +233,11 @@ Chunk KafkaSource::generateImpl() // We came here in case of tombstone (or sometimes zero-length) messages, and it is not something abnormal // TODO: it seems like in case of put_error_to_stream=true we may need to process those differently // currently we just skip them with note in logs. - buffer->storeLastReadMessageOffset(); - LOG_DEBUG(log, "Parsing of message (topic: {}, partition: {}, offset: {}) return no rows.", buffer->currentTopic(), buffer->currentPartition(), buffer->currentOffset()); + consumer->storeLastReadMessageOffset(); + LOG_DEBUG(log, "Parsing of message (topic: {}, partition: {}, offset: {}) return no rows.", consumer->currentTopic(), consumer->currentPartition(), consumer->currentOffset()); } - if (!buffer->hasMorePolledMessages() + if (!consumer->hasMorePolledMessages() && (total_rows >= max_block_size || !checkTimeLimit() || failed_poll_attempts >= MAX_FAILED_POLL_ATTEMPTS)) { break; @@ -247,7 +248,7 @@ Chunk KafkaSource::generateImpl() { return {}; } - else if (buffer->polledDataUnusable()) + else if (consumer->polledDataUnusable()) { // the rows were counted already before by KafkaRowsRead, // so let's count the rows we ignore separately @@ -291,10 +292,10 @@ Chunk KafkaSource::generate() void KafkaSource::commit() { - if (!buffer) + if (!consumer) return; - buffer->commit(); + consumer->commit(); broken = false; } diff --git a/src/Storages/Kafka/KafkaSource.h b/src/Storages/Kafka/KafkaSource.h index 94e4224d8df..3d2edd4ebd1 100644 --- a/src/Storages/Kafka/KafkaSource.h +++ b/src/Storages/Kafka/KafkaSource.h @@ -3,7 +3,7 @@ #include #include -#include +#include #include @@ -32,7 +32,7 @@ public: Chunk generate() override; void commit(); - bool isStalled() const { return !buffer || buffer->isStalled(); } + bool isStalled() const { return !consumer || consumer->isStalled(); } void setTimeLimit(Poco::Timespan max_execution_time_) { max_execution_time = max_execution_time_; } @@ -44,7 +44,7 @@ private: Poco::Logger * log; UInt64 max_block_size; - ConsumerBufferPtr buffer; + KafkaConsumerPtr consumer; bool broken = true; bool is_finished = false; bool commit_in_suffix; diff --git a/src/Storages/Kafka/StorageKafka.cpp b/src/Storages/Kafka/StorageKafka.cpp index 77afa7ba623..635d0e7864a 100644 --- a/src/Storages/Kafka/StorageKafka.cpp +++ b/src/Storages/Kafka/StorageKafka.cpp @@ -20,10 +20,10 @@ #include #include #include -#include +#include +#include #include #include -#include #include #include #include @@ -39,6 +39,7 @@ #include #include #include +#include #include "config_version.h" @@ -208,7 +209,7 @@ StorageKafka::StorageKafka( kafka_settings->kafka_client_id.value.empty() ? getDefaultClientId(table_id_) : getContext()->getMacros()->expand(kafka_settings->kafka_client_id.value)) , format_name(getContext()->getMacros()->expand(kafka_settings->kafka_format.value)) - , row_delimiter(kafka_settings->kafka_row_delimiter.value) + , max_rows_per_message(kafka_settings->kafka_max_rows_per_message.value) , schema_name(getContext()->getMacros()->expand(kafka_settings->kafka_schema.value)) , num_consumers(kafka_settings->kafka_num_consumers.value) , log(&Poco::Logger::get("StorageKafka (" + table_id_.table_name + ")")) @@ -334,7 +335,28 @@ SinkToStoragePtr StorageKafka::write(const ASTPtr &, const StorageMetadataPtr & if (topics.size() > 1) throw Exception("Can't write to Kafka table with multiple topics!", ErrorCodes::NOT_IMPLEMENTED); - return std::make_shared(*this, metadata_snapshot, modified_context); + + cppkafka::Configuration conf; + conf.set("metadata.broker.list", brokers); + conf.set("client.id", client_id); + conf.set("client.software.name", VERSION_NAME); + conf.set("client.software.version", VERSION_DESCRIBE); + // TODO: fill required settings + updateConfiguration(conf); + + const Settings & settings = getContext()->getSettingsRef(); + size_t poll_timeout = settings.stream_poll_timeout_ms.totalMilliseconds(); + const auto & header = metadata_snapshot->getSampleBlockNonMaterialized(); + + auto producer = std::make_unique( + std::make_shared(conf), topics[0], std::chrono::milliseconds(poll_timeout), shutdown_called, header); + + size_t max_rows = max_rows_per_message; + /// Need for backward compatibility. + if (format_name == "Avro" && local_context->getSettingsRef().output_format_avro_rows_in_file.changed) + max_rows = local_context->getSettingsRef().output_format_avro_rows_in_file.value; + return std::make_shared( + header, getFormatName(), max_rows, std::move(producer), getName(), modified_context); } @@ -344,7 +366,7 @@ void StorageKafka::startup() { try { - pushReadBuffer(createReadBuffer(i)); + pushConsumer(createConsumer(i)); ++num_created_consumers; } catch (const cppkafka::Exception &) @@ -374,29 +396,29 @@ void StorageKafka::shutdown() LOG_TRACE(log, "Closing consumers"); for (size_t i = 0; i < num_created_consumers; ++i) - auto buffer = popReadBuffer(); + auto consumer = popConsumer(); LOG_TRACE(log, "Consumers closed"); rd_kafka_wait_destroyed(CLEANUP_TIMEOUT_MS); } -void StorageKafka::pushReadBuffer(ConsumerBufferPtr buffer) +void StorageKafka::pushConsumer(KafkaConsumerPtr consumer) { std::lock_guard lock(mutex); - buffers.push_back(buffer); + consumers.push_back(consumer); semaphore.set(); CurrentMetrics::sub(CurrentMetrics::KafkaConsumersInUse, 1); } -ConsumerBufferPtr StorageKafka::popReadBuffer() +KafkaConsumerPtr StorageKafka::popConsumer() { - return popReadBuffer(std::chrono::milliseconds::zero()); + return popConsumer(std::chrono::milliseconds::zero()); } -ConsumerBufferPtr StorageKafka::popReadBuffer(std::chrono::milliseconds timeout) +KafkaConsumerPtr StorageKafka::popConsumer(std::chrono::milliseconds timeout) { // Wait for the first free buffer if (timeout == std::chrono::milliseconds::zero()) @@ -409,32 +431,14 @@ ConsumerBufferPtr StorageKafka::popReadBuffer(std::chrono::milliseconds timeout) // Take the first available buffer from the list std::lock_guard lock(mutex); - auto buffer = buffers.back(); - buffers.pop_back(); + auto consumer = consumers.back(); + consumers.pop_back(); CurrentMetrics::add(CurrentMetrics::KafkaConsumersInUse, 1); - return buffer; -} - -ProducerBufferPtr StorageKafka::createWriteBuffer(const Block & header) -{ - cppkafka::Configuration conf; - conf.set("metadata.broker.list", brokers); - conf.set("client.id", client_id); - conf.set("client.software.name", VERSION_NAME); - conf.set("client.software.version", VERSION_DESCRIBE); - // TODO: fill required settings - updateConfiguration(conf); - - auto producer = std::make_shared(conf); - const Settings & settings = getContext()->getSettingsRef(); - size_t poll_timeout = settings.stream_poll_timeout_ms.totalMilliseconds(); - - return std::make_shared( - producer, topics[0], row_delimiter ? std::optional{row_delimiter} : std::nullopt, 1, 1024, std::chrono::milliseconds(poll_timeout), header); + return consumer; } -ConsumerBufferPtr StorageKafka::createReadBuffer(size_t consumer_number) +KafkaConsumerPtr StorageKafka::createConsumer(size_t consumer_number) { cppkafka::Configuration conf; @@ -466,16 +470,16 @@ ConsumerBufferPtr StorageKafka::createReadBuffer(size_t consumer_number) conf.set("enable.partition.eof", "false"); // Ignore EOF messages // Create a consumer and subscribe to topics - auto consumer = std::make_shared(conf); - consumer->set_destroy_flags(RD_KAFKA_DESTROY_F_NO_CONSUMER_CLOSE); + auto consumer_impl = std::make_shared(conf); + consumer_impl->set_destroy_flags(RD_KAFKA_DESTROY_F_NO_CONSUMER_CLOSE); /// NOTE: we pass |stream_cancelled| by reference here, so the buffers should not outlive the storage. if (thread_per_consumer) { auto& stream_cancelled = tasks[consumer_number]->stream_cancelled; - return std::make_shared(consumer, log, getPollMaxBatchSize(), getPollTimeoutMillisecond(), intermediate_commit, stream_cancelled, topics); + return std::make_shared(consumer_impl, log, getPollMaxBatchSize(), getPollTimeoutMillisecond(), intermediate_commit, stream_cancelled, topics); } - return std::make_shared(consumer, log, getPollMaxBatchSize(), getPollTimeoutMillisecond(), intermediate_commit, tasks.back()->stream_cancelled, topics); + return std::make_shared(consumer_impl, log, getPollMaxBatchSize(), getPollTimeoutMillisecond(), intermediate_commit, tasks.back()->stream_cancelled, topics); } size_t StorageKafka::getMaxBlockSize() const @@ -837,6 +841,13 @@ void registerStorageKafka(StorageFactory & factory) CHECK_KAFKA_STORAGE_ARGUMENT(8, kafka_max_block_size, 0) CHECK_KAFKA_STORAGE_ARGUMENT(9, kafka_skip_broken_messages, 0) CHECK_KAFKA_STORAGE_ARGUMENT(10, kafka_commit_every_batch, 0) + CHECK_KAFKA_STORAGE_ARGUMENT(11, kafka_client_id, 2) + CHECK_KAFKA_STORAGE_ARGUMENT(12, kafka_poll_timeout_ms, 0) + CHECK_KAFKA_STORAGE_ARGUMENT(13, kafka_flush_interval_ms, 0) + CHECK_KAFKA_STORAGE_ARGUMENT(14, kafka_thread_per_consumer, 0) + CHECK_KAFKA_STORAGE_ARGUMENT(15, kafka_handle_error_mode, 0) + CHECK_KAFKA_STORAGE_ARGUMENT(16, kafka_commit_on_select, 0) + CHECK_KAFKA_STORAGE_ARGUMENT(17, kafka_max_rows_per_message, 0) } #undef CHECK_KAFKA_STORAGE_ARGUMENT diff --git a/src/Storages/Kafka/StorageKafka.h b/src/Storages/Kafka/StorageKafka.h index c1c67b19c51..890eb5a82e6 100644 --- a/src/Storages/Kafka/StorageKafka.h +++ b/src/Storages/Kafka/StorageKafka.h @@ -2,7 +2,7 @@ #include #include -#include +#include #include #include @@ -24,6 +24,8 @@ namespace DB struct StorageKafkaInterceptors; +using KafkaConsumerPtr = std::shared_ptr; + /** Implements a Kafka queue table engine that can be used as a persistent queue / buffer, * or as a basic building block for creating pipelines with a continuous insertion / ETL. */ @@ -60,11 +62,12 @@ public: const StorageMetadataPtr & /*metadata_snapshot*/, ContextPtr context) override; - void pushReadBuffer(ConsumerBufferPtr buf); - ConsumerBufferPtr popReadBuffer(); - ConsumerBufferPtr popReadBuffer(std::chrono::milliseconds timeout); + /// We want to control the number of rows in a chunk inserted into Kafka + bool prefersLargeBlocks() const override { return false; } - ProducerBufferPtr createWriteBuffer(const Block & header); + void pushConsumer(KafkaConsumerPtr consumer); + KafkaConsumerPtr popConsumer(); + KafkaConsumerPtr popConsumer(std::chrono::milliseconds timeout); const auto & getFormatName() const { return format_name; } @@ -80,7 +83,7 @@ private: const String group; const String client_id; const String format_name; - const char row_delimiter; /// optional row delimiter for generating char delimited stream in order to make various input stream parsers happy. + const size_t max_rows_per_message; const String schema_name; const size_t num_consumers; /// total number of consumers Poco::Logger * log; @@ -94,7 +97,7 @@ private: /// In this case we still need to be able to shutdown() properly. size_t num_created_consumers = 0; /// number of actually created consumers. - std::vector buffers; /// available buffers for Kafka consumers + std::vector consumers; /// available consumers std::mutex mutex; @@ -115,11 +118,13 @@ private: std::list> thread_statuses; SettingsChanges createSettingsAdjustments(); - ConsumerBufferPtr createReadBuffer(size_t consumer_number); + KafkaConsumerPtr createConsumer(size_t consumer_number); /// If named_collection is specified. String collection_name; + std::atomic shutdown_called = false; + // Update Kafka configuration with values from CH user configuration. void updateConfiguration(cppkafka::Configuration & conf); String getConfigPrefix() const; diff --git a/src/Storages/Kafka/WriteBufferToKafkaProducer.cpp b/src/Storages/Kafka/WriteBufferToKafkaProducer.cpp deleted file mode 100644 index 28877864e16..00000000000 --- a/src/Storages/Kafka/WriteBufferToKafkaProducer.cpp +++ /dev/null @@ -1,175 +0,0 @@ -#include "WriteBufferToKafkaProducer.h" -#include "Core/Block.h" -#include "Columns/ColumnString.h" -#include "Columns/ColumnsNumber.h" - -#include - -namespace ProfileEvents -{ - extern const Event KafkaRowsWritten; - extern const Event KafkaProducerFlushes; - extern const Event KafkaMessagesProduced; - extern const Event KafkaProducerErrors; -} - -namespace DB -{ -WriteBufferToKafkaProducer::WriteBufferToKafkaProducer( - ProducerPtr producer_, - const std::string & topic_, - std::optional delimiter, - size_t rows_per_message, - size_t chunk_size_, - std::chrono::milliseconds poll_timeout, - const Block & header - ) - : WriteBuffer(nullptr, 0) - , producer(producer_) - , topic(topic_) - , delim(delimiter) - , max_rows(rows_per_message) - , chunk_size(chunk_size_) - , timeout(poll_timeout) -{ - if (header.has("_key")) - { - auto column_index = header.getPositionByName("_key"); - auto column_info = header.getByPosition(column_index); - if (isString(column_info.type)) - { - key_column_index = column_index; - } - // else ? (not sure it's a good place to report smth to user) - } - - if (header.has("_timestamp")) - { - auto column_index = header.getPositionByName("_timestamp"); - auto column_info = header.getByPosition(column_index); - if (isDateTime(column_info.type)) - { - timestamp_column_index = column_index; - } - } - - reinitializeChunks(); -} - -WriteBufferToKafkaProducer::~WriteBufferToKafkaProducer() -{ - assert(rows == 0); -} - -void WriteBufferToKafkaProducer::countRow(const Columns & columns, size_t current_row) -{ - ProfileEvents::increment(ProfileEvents::KafkaRowsWritten); - - if (++rows % max_rows == 0) - { - const std::string & last_chunk = chunks.back(); - size_t last_chunk_size = offset(); - - // if last character of last chunk is delimiter - we don't need it - if (last_chunk_size && delim && last_chunk[last_chunk_size - 1] == delim) - --last_chunk_size; - - std::string payload; - payload.reserve((chunks.size() - 1) * chunk_size + last_chunk_size); - - // concat all chunks except the last one - for (auto i = chunks.begin(), e = --chunks.end(); i != e; ++i) - payload.append(*i); - - // add last one - payload.append(last_chunk, 0, last_chunk_size); - - cppkafka::MessageBuilder builder(topic); - builder.payload(payload); - - // Note: if it will be few rows per message - it will take the value from last row of block - if (key_column_index) - { - const auto & key_column = assert_cast(*columns[key_column_index.value()]); - const auto key_data = key_column.getDataAt(current_row); - builder.key(cppkafka::Buffer(key_data.data, key_data.size)); - } - - if (timestamp_column_index) - { - const auto & timestamp_column = assert_cast(*columns[timestamp_column_index.value()]); - const auto timestamp = std::chrono::seconds{timestamp_column.getElement(current_row)}; - builder.timestamp(timestamp); - } - - while (true) - { - try - { - producer->produce(builder); - } - catch (cppkafka::HandleException & e) - { - if (e.get_error() == RD_KAFKA_RESP_ERR__QUEUE_FULL) - { - producer->poll(timeout); - continue; - } - ProfileEvents::increment(ProfileEvents::KafkaProducerErrors); - throw; - } - ProfileEvents::increment(ProfileEvents::KafkaMessagesProduced); - - break; - } - - reinitializeChunks(); - } -} - -void WriteBufferToKafkaProducer::flush() -{ - // For unknown reason we may hit some internal timeout when inserting for the first time. - while (true) - { - try - { - producer->flush(timeout); - } - catch (cppkafka::HandleException & e) - { - if (e.get_error() == RD_KAFKA_RESP_ERR__TIMED_OUT) - continue; - - ProfileEvents::increment(ProfileEvents::KafkaProducerErrors); - throw; - } - - ProfileEvents::increment(ProfileEvents::KafkaProducerFlushes); - break; - } -} - -void WriteBufferToKafkaProducer::nextImpl() -{ - addChunk(); -} - -void WriteBufferToKafkaProducer::addChunk() -{ - chunks.push_back(std::string()); - chunks.back().resize(chunk_size); - set(chunks.back().data(), chunk_size); -} - -void WriteBufferToKafkaProducer::reinitializeChunks() -{ - rows = 0; - chunks.clear(); - /// We cannot leave the buffer in the undefined state (i.e. without any - /// underlying buffer), since in this case the WriteBuffeR::next() will - /// not call our nextImpl() (due to available() == 0) - addChunk(); -} - -} diff --git a/src/Storages/MeiliSearch/SinkMeiliSearch.cpp b/src/Storages/MeiliSearch/SinkMeiliSearch.cpp index 2c843639669..e04275dfe50 100644 --- a/src/Storages/MeiliSearch/SinkMeiliSearch.cpp +++ b/src/Storages/MeiliSearch/SinkMeiliSearch.cpp @@ -42,7 +42,7 @@ void SinkMeiliSearch::writeBlockData(const Block & block) const FormatSettings settings = getFormatSettings(local_context); settings.json.quote_64bit_integers = false; WriteBufferFromOwnString buf; - auto writer = FormatFactory::instance().getOutputFormat("JSON", buf, sample_block, local_context, {}, settings); + auto writer = FormatFactory::instance().getOutputFormat("JSON", buf, sample_block, local_context, settings); writer->write(block); writer->flush(); writer->finalize(); diff --git a/src/Storages/MergeTree/ActiveDataPartSet.cpp b/src/Storages/MergeTree/ActiveDataPartSet.cpp index a482dd21099..67199ca02ac 100644 --- a/src/Storages/MergeTree/ActiveDataPartSet.cpp +++ b/src/Storages/MergeTree/ActiveDataPartSet.cpp @@ -23,8 +23,13 @@ ActiveDataPartSet::ActiveDataPartSet(MergeTreeDataFormatVersion format_version_, bool ActiveDataPartSet::add(const String & name, Strings * out_replaced_parts) { - /// TODO make it exception safe (out_replaced_parts->push_back(...) may throw) auto part_info = MergeTreePartInfo::fromPartName(name, format_version); + return add(part_info, name, out_replaced_parts); +} + +bool ActiveDataPartSet::add(const MergeTreePartInfo & part_info, const String & name, Strings * out_replaced_parts) +{ + /// TODO make it exception safe (out_replaced_parts->push_back(...) may throw) if (getContainingPartImpl(part_info) != part_info_to_name.end()) return false; @@ -42,7 +47,7 @@ bool ActiveDataPartSet::add(const String & name, Strings * out_replaced_parts) if (!part_info.contains(it->first)) { if (!part_info.isDisjoint(it->first)) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Part {} intersects previous part {}. It is a bug or a result of manual intervention in the ZooKeeper data.", name, it->first.getPartName()); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Part {} intersects previous part {}. It is a bug or a result of manual intervention in the ZooKeeper data.", part_info.getPartName(), it->first.getPartName()); ++it; break; } @@ -69,6 +74,12 @@ bool ActiveDataPartSet::add(const String & name, Strings * out_replaced_parts) part_info_to_name.emplace(part_info, name); return true; + +} + +bool ActiveDataPartSet::add(const MergeTreePartInfo & part_info, Strings * out_replaced_parts) +{ + return add(part_info, part_info.getPartName(), out_replaced_parts); } @@ -160,6 +171,16 @@ Strings ActiveDataPartSet::getParts() const return res; } +std::vector ActiveDataPartSet::getPartInfos() const +{ + std::vector res; + res.reserve(part_info_to_name.size()); + for (const auto & kv : part_info_to_name) + res.push_back(kv.first); + + return res; +} + size_t ActiveDataPartSet::size() const { return part_info_to_name.size(); diff --git a/src/Storages/MergeTree/ActiveDataPartSet.h b/src/Storages/MergeTree/ActiveDataPartSet.h index 8ab03625d5c..f3cd6b0019d 100644 --- a/src/Storages/MergeTree/ActiveDataPartSet.h +++ b/src/Storages/MergeTree/ActiveDataPartSet.h @@ -40,6 +40,8 @@ public: /// Returns true if the part was actually added. If out_replaced_parts != nullptr, it will contain /// parts that were replaced from the set by the newly added part. bool add(const String & name, Strings * out_replaced_parts = nullptr); + bool add(const MergeTreePartInfo & part_info, const String & name, Strings * out_replaced_parts = nullptr); + bool add(const MergeTreePartInfo & part_info, Strings * out_replaced_parts = nullptr); bool remove(const MergeTreePartInfo & part_info) { @@ -83,6 +85,7 @@ public: /// Returns parts in ascending order of the partition_id and block number. Strings getParts() const; + std::vector getPartInfos() const; size_t size() const; diff --git a/src/Storages/MergeTree/DataPartStorageOnDisk.cpp b/src/Storages/MergeTree/DataPartStorageOnDisk.cpp index 347ea16950e..215d6034a53 100644 --- a/src/Storages/MergeTree/DataPartStorageOnDisk.cpp +++ b/src/Storages/MergeTree/DataPartStorageOnDisk.cpp @@ -59,9 +59,9 @@ std::string DataPartStorageOnDisk::getFullRootPath() const return fs::path(volume->getDisk()->getPath()) / root_path / ""; } -MutableDataPartStoragePtr DataPartStorageOnDisk::getProjection(const std::string & name) +MutableDataPartStoragePtr DataPartStorageOnDisk::getProjection(const std::string & name, bool use_parent_transaction) // NOLINT { - return std::shared_ptr(new DataPartStorageOnDisk(volume, std::string(fs::path(root_path) / part_dir), name, transaction)); + return std::shared_ptr(new DataPartStorageOnDisk(volume, std::string(fs::path(root_path) / part_dir), name, use_parent_transaction ? transaction : nullptr)); } DataPartStoragePtr DataPartStorageOnDisk::getProjection(const std::string & name) const @@ -638,12 +638,17 @@ MutableDataPartStoragePtr DataPartStorageOnDisk::clonePart( } void DataPartStorageOnDisk::rename( - const std::string & new_root_path, - const std::string & new_part_dir, + std::string new_root_path, + std::string new_part_dir, Poco::Logger * log, bool remove_new_dir_if_exists, bool fsync_part_dir) { + if (new_root_path.ends_with('/')) + new_root_path.pop_back(); + if (new_part_dir.ends_with('/')) + new_part_dir.pop_back(); + String to = fs::path(new_root_path) / new_part_dir / ""; if (volume->getDisk()->exists(to)) @@ -668,7 +673,6 @@ void DataPartStorageOnDisk::rename( fullPath(volume->getDisk(), to)); } } - String from = getRelativePath(); /// Why? diff --git a/src/Storages/MergeTree/DataPartStorageOnDisk.h b/src/Storages/MergeTree/DataPartStorageOnDisk.h index bea1596e1f7..fd408af9cf1 100644 --- a/src/Storages/MergeTree/DataPartStorageOnDisk.h +++ b/src/Storages/MergeTree/DataPartStorageOnDisk.h @@ -21,7 +21,7 @@ public: std::string getPartDirectory() const override { return part_dir; } std::string getFullRootPath() const override; - MutableDataPartStoragePtr getProjection(const std::string & name) override; + MutableDataPartStoragePtr getProjection(const std::string & name, bool use_parent_transaction = true) override; // NOLINT DataPartStoragePtr getProjection(const std::string & name) const override; bool exists() const override; @@ -123,8 +123,8 @@ public: void createHardLinkFrom(const IDataPartStorage & source, const std::string & from, const std::string & to) override; void rename( - const std::string & new_root_path, - const std::string & new_part_dir, + std::string new_root_path, + std::string new_part_dir, Poco::Logger * log, bool remove_new_dir_if_exists, bool fsync_part_dir) override; @@ -156,5 +156,4 @@ private: Poco::Logger * log, bool is_projection) const; }; - } diff --git a/src/Storages/MergeTree/EphemeralLockInZooKeeper.cpp b/src/Storages/MergeTree/EphemeralLockInZooKeeper.cpp index bc28a555f77..ca81578c5c6 100644 --- a/src/Storages/MergeTree/EphemeralLockInZooKeeper.cpp +++ b/src/Storages/MergeTree/EphemeralLockInZooKeeper.cpp @@ -2,7 +2,7 @@ #include #include #include -#include +#include namespace DB diff --git a/src/Storages/MergeTree/IDataPartStorage.h b/src/Storages/MergeTree/IDataPartStorage.h index 53ee2738fc6..d7c0c9c76e3 100644 --- a/src/Storages/MergeTree/IDataPartStorage.h +++ b/src/Storages/MergeTree/IDataPartStorage.h @@ -8,6 +8,8 @@ #include #include #include +#include +#include namespace DB { @@ -85,7 +87,7 @@ public: /// virtual std::string getRelativeRootPath() const = 0; /// Get a storage for projection. - virtual std::shared_ptr getProjection(const std::string & name) = 0; + virtual std::shared_ptr getProjection(const std::string & name, bool use_parent_transaction = true) = 0; // NOLINT virtual std::shared_ptr getProjection(const std::string & name) const = 0; /// Part directory exists. @@ -237,12 +239,13 @@ public: /// Examples are: 'all_1_2_1' -> 'detached/all_1_2_1' /// 'moving/tmp_all_1_2_1' -> 'all_1_2_1' virtual void rename( - const std::string & new_root_path, - const std::string & new_part_dir, + std::string new_root_path, + std::string new_part_dir, Poco::Logger * log, bool remove_new_dir_if_exists, bool fsync_part_dir) = 0; + /// Starts a transaction of mutable operations. virtual void beginTransaction() = 0; /// Commits a transaction of mutable operations. diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index 02a7a2ae641..afebb8992e0 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -576,6 +576,9 @@ void IMergeTreeDataPart::assertState(const std::initializer_list #include +#include + #include #include #include @@ -599,9 +601,9 @@ static const ActionsDAG::Node & cloneASTWithInversionPushDown( if (name == "indexHint") { ActionsDAG::NodeRawConstPtrs children; - if (const auto * adaptor = typeid_cast(node.function_builder.get())) + if (const auto * adaptor = typeid_cast(node.function_base.get())) { - if (const auto * index_hint = typeid_cast(adaptor->getFunction())) + if (const auto * index_hint = typeid_cast(adaptor->getFunction().get())) { const auto & index_hint_dag = index_hint->getActions(); children = index_hint_dag->getOutputs(); @@ -611,7 +613,7 @@ static const ActionsDAG::Node & cloneASTWithInversionPushDown( } } - const auto & func = inverted_dag.addFunction(node.function_builder, children, ""); + const auto & func = inverted_dag.addFunction(FunctionFactory::instance().get(node.function_base->getName(), context), children, ""); to_inverted[&node] = &func; return func; } @@ -654,7 +656,8 @@ static const ActionsDAG::Node & cloneASTWithInversionPushDown( return func; } - res = &inverted_dag.addFunction(node.function_builder, children, ""); + res = &inverted_dag.addFunction(node.function_base, children, ""); + chassert(res->result_type == node.result_type); } } @@ -939,12 +942,13 @@ static FieldRef applyFunction(const FunctionBasePtr & func, const DataTypePtr & * which while not strictly monotonic, are monotonic everywhere on the input range. */ bool KeyCondition::transformConstantWithValidFunctions( + ContextPtr context, const String & expr_name, size_t & out_key_column_num, DataTypePtr & out_key_column_type, Field & out_value, DataTypePtr & out_type, - std::function always_monotonic) const + std::function always_monotonic) const { const auto & sample_block = key_expr->getSampleBlock(); @@ -1024,14 +1028,16 @@ bool KeyCondition::transformConstantWithValidFunctions( auto left_arg_type = left->result_type; auto left_arg_value = (*left->column)[0]; std::tie(const_value, const_type) = applyBinaryFunctionForFieldOfUnknownType( - func->function_builder, left_arg_type, left_arg_value, const_type, const_value); + FunctionFactory::instance().get(func->function_base->getName(), context), + left_arg_type, left_arg_value, const_type, const_value); } else { auto right_arg_type = right->result_type; auto right_arg_value = (*right->column)[0]; std::tie(const_value, const_type) = applyBinaryFunctionForFieldOfUnknownType( - func->function_builder, const_type, const_value, right_arg_type, right_arg_value); + FunctionFactory::instance().get(func->function_base->getName(), context), + const_type, const_value, right_arg_type, right_arg_value); } } } @@ -1067,7 +1073,13 @@ bool KeyCondition::canConstantBeWrappedByMonotonicFunctions( return false; return transformConstantWithValidFunctions( - expr_name, out_key_column_num, out_key_column_type, out_value, out_type, [](IFunctionBase & func, const IDataType & type) + node.getTreeContext().getQueryContext(), + expr_name, + out_key_column_num, + out_key_column_type, + out_value, + out_type, + [](const IFunctionBase & func, const IDataType & type) { if (!func.hasInformationAboutMonotonicity()) return false; @@ -1116,7 +1128,13 @@ bool KeyCondition::canConstantBeWrappedByFunctions( return false; return transformConstantWithValidFunctions( - expr_name, out_key_column_num, out_key_column_type, out_value, out_type, [](IFunctionBase & func, const IDataType &) + node.getTreeContext().getQueryContext(), + expr_name, + out_key_column_num, + out_key_column_type, + out_value, + out_type, + [](const IFunctionBase & func, const IDataType &) { return func.isDeterministic(); }); diff --git a/src/Storages/MergeTree/KeyCondition.h b/src/Storages/MergeTree/KeyCondition.h index 258f88ac6b9..0a4ac93b082 100644 --- a/src/Storages/MergeTree/KeyCondition.h +++ b/src/Storages/MergeTree/KeyCondition.h @@ -19,7 +19,7 @@ namespace DB class ASTFunction; class Context; class IFunction; -using FunctionBasePtr = std::shared_ptr; +using FunctionBasePtr = std::shared_ptr; class ExpressionActions; using ExpressionActionsPtr = std::shared_ptr; struct ActionDAGNodes; @@ -421,12 +421,13 @@ private: std::vector & out_functions_chain); bool transformConstantWithValidFunctions( + ContextPtr context, const String & expr_name, size_t & out_key_column_num, DataTypePtr & out_key_column_type, Field & out_value, DataTypePtr & out_type, - std::function always_monotonic) const; + std::function always_monotonic) const; bool canConstantBeWrappedByMonotonicFunctions( const RPNBuilderTreeNode & node, diff --git a/src/Storages/MergeTree/MergeList.cpp b/src/Storages/MergeTree/MergeList.cpp index a833da7064f..76d69cc6b7d 100644 --- a/src/Storages/MergeTree/MergeList.cpp +++ b/src/Storages/MergeTree/MergeList.cpp @@ -88,10 +88,6 @@ MergeListElement::MergeListElement( /// thread_group::memory_tracker, but MemoryTrackerThreadSwitcher will reset parent). memory_tracker.setProfilerStep(settings.memory_profiler_step); memory_tracker.setSampleProbability(settings.memory_profiler_sample_probability); - /// Specify sample probability also for current thread to track more deallocations. - if (auto * thread_memory_tracker = DB::CurrentThread::getMemoryTracker()) - thread_memory_tracker->setSampleProbability(settings.memory_profiler_sample_probability); - memory_tracker.setSoftLimit(settings.memory_overcommit_ratio_denominator); if (settings.memory_tracker_fault_probability > 0.0) memory_tracker.setFaultProbability(settings.memory_tracker_fault_probability); diff --git a/src/Storages/MergeTree/MergeTask.cpp b/src/Storages/MergeTree/MergeTask.cpp index 0b6fe23e961..ea6ed4b403a 100644 --- a/src/Storages/MergeTree/MergeTask.cpp +++ b/src/Storages/MergeTree/MergeTask.cpp @@ -96,9 +96,13 @@ static void extractMergingAndGatheringColumns( bool MergeTask::ExecuteAndFinalizeHorizontalPart::prepare() { - // projection parts have different prefix and suffix compared to normal parts. - // E.g. `proj_a.proj` for a normal projection merge and `proj_a.tmp_proj` for a projection materialization merge. - const String local_tmp_prefix = global_ctx->parent_part ? "" : "tmp_merge_"; + String local_tmp_prefix; + if (global_ctx->need_prefix) + { + // projection parts have different prefix and suffix compared to normal parts. + // E.g. `proj_a.proj` for a normal projection merge and `proj_a.tmp_proj` for a projection materialization merge. + local_tmp_prefix = global_ctx->parent_part ? "" : "tmp_merge_"; + } const String local_tmp_suffix = global_ctx->parent_part ? ctx->suffix : ""; if (global_ctx->merges_blocker->isCancelled() || global_ctx->merge_list_element_ptr->is_cancelled.load(std::memory_order_relaxed)) @@ -653,6 +657,7 @@ bool MergeTask::MergeProjectionsStage::mergeMinMaxIndexAndPrepareProjections() c global_ctx->deduplicate, global_ctx->deduplicate_by_columns, projection_merging_params, + global_ctx->need_prefix, global_ctx->new_data_part.get(), ".proj", NO_TRANSACTION_PTR, diff --git a/src/Storages/MergeTree/MergeTask.h b/src/Storages/MergeTree/MergeTask.h index 6a29cdbb5ca..46af2e1563a 100644 --- a/src/Storages/MergeTree/MergeTask.h +++ b/src/Storages/MergeTree/MergeTask.h @@ -59,6 +59,7 @@ public: bool deduplicate_, Names deduplicate_by_columns_, MergeTreeData::MergingParams merging_params_, + bool need_prefix, IMergeTreeDataPart * parent_part_, String suffix_, MergeTreeTransactionPtr txn, @@ -86,6 +87,7 @@ public: global_ctx->merges_blocker = std::move(merges_blocker_); global_ctx->ttl_merges_blocker = std::move(ttl_merges_blocker_); global_ctx->txn = std::move(txn); + global_ctx->need_prefix = need_prefix; auto prepare_stage_ctx = std::make_shared(); @@ -171,6 +173,7 @@ private: IMergedBlockOutputStream::WrittenOffsetColumns written_offset_columns{}; MergeTreeTransactionPtr txn; + bool need_prefix; scope_guard temporary_directory_lock; }; @@ -184,6 +187,7 @@ private: { /// Dependencies String suffix; + bool need_prefix; MergeTreeData::MergingParams merging_params{}; DiskPtr tmp_disk{nullptr}; @@ -192,7 +196,7 @@ private: bool force_ttl{false}; CompressionCodecPtr compression_codec{nullptr}; size_t sum_input_rows_upper_bound{0}; - std::unique_ptr rows_sources_file{nullptr}; + std::unique_ptr rows_sources_file{nullptr}; std::unique_ptr rows_sources_uncompressed_write_buf{nullptr}; std::unique_ptr rows_sources_write_buf{nullptr}; std::optional column_sizes{}; @@ -257,7 +261,7 @@ private: /// Begin dependencies from previous stage std::unique_ptr rows_sources_write_buf{nullptr}; std::unique_ptr rows_sources_uncompressed_write_buf{nullptr}; - std::unique_ptr rows_sources_file; + std::unique_ptr rows_sources_file; std::optional column_sizes; CompressionCodecPtr compression_codec; DiskPtr tmp_disk{nullptr}; diff --git a/src/Storages/MergeTree/MergeTreeBackgroundExecutor.cpp b/src/Storages/MergeTree/MergeTreeBackgroundExecutor.cpp index 234487763d7..f1c1a96d24f 100644 --- a/src/Storages/MergeTree/MergeTreeBackgroundExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeBackgroundExecutor.cpp @@ -41,7 +41,7 @@ void MergeTreeBackgroundExecutor::increaseThreadsAndMaxTasksCount(size_t return; } - if (new_max_tasks_count < max_tasks_count) + if (new_max_tasks_count < max_tasks_count.load(std::memory_order_relaxed)) { LOG_WARNING(log, "Loaded new max tasks count for {}Executor from top level config, but new value ({}) is not greater than current {}", name, new_max_tasks_count, max_tasks_count); return; @@ -59,15 +59,14 @@ void MergeTreeBackgroundExecutor::increaseThreadsAndMaxTasksCount(size_t for (size_t number = threads_count; number < new_threads_count; ++number) pool.scheduleOrThrowOnError([this] { threadFunction(); }); - max_tasks_count = new_max_tasks_count; + max_tasks_count.store(new_max_tasks_count, std::memory_order_relaxed); threads_count = new_threads_count; } template size_t MergeTreeBackgroundExecutor::getMaxTasksCount() const { - std::lock_guard lock(mutex); - return max_tasks_count; + return max_tasks_count.load(std::memory_order_relaxed); } template diff --git a/src/Storages/MergeTree/MergeTreeBackgroundExecutor.h b/src/Storages/MergeTree/MergeTreeBackgroundExecutor.h index 0fc888dd6ad..ad50cd44189 100644 --- a/src/Storages/MergeTree/MergeTreeBackgroundExecutor.h +++ b/src/Storages/MergeTree/MergeTreeBackgroundExecutor.h @@ -194,6 +194,10 @@ public: /// Supports only increasing the number of threads and tasks, because /// implementing tasks eviction will definitely be too error-prone and buggy. void increaseThreadsAndMaxTasksCount(size_t new_threads_count, size_t new_max_tasks_count); + + /// This method can return stale value of max_tasks_count (no mutex locking). + /// It's okay because amount of tasks can be only increased and getting stale value + /// can lead only to some postponing, not logical error. size_t getMaxTasksCount() const; bool trySchedule(ExecutableTaskPtr task); @@ -203,7 +207,7 @@ public: private: String name; size_t threads_count TSA_GUARDED_BY(mutex) = 0; - size_t max_tasks_count TSA_GUARDED_BY(mutex) = 0; + std::atomic max_tasks_count = 0; CurrentMetrics::Metric metric; void routine(TaskRuntimeDataPtr item); diff --git a/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp b/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp index bd7e3a64749..5b6b0f09bc3 100644 --- a/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp +++ b/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp @@ -68,12 +68,13 @@ IMergeTreeSelectAlgorithm::IMergeTreeSelectAlgorithm( size_t non_const_columns_offset = header_without_const_virtual_columns.columns(); injectNonConstVirtualColumns(0, header_without_const_virtual_columns, virt_column_names); - /// Reverse order is to minimize reallocations when removing columns from the block for (size_t col_num = non_const_columns_offset; col_num < header_without_const_virtual_columns.columns(); ++col_num) non_const_virtual_column_names.emplace_back(header_without_const_virtual_columns.getByPosition(col_num).name); result_header = header_without_const_virtual_columns; injectPartConstVirtualColumns(0, result_header, nullptr, partition_value_type, virt_column_names); + + LOG_TEST(log, "PREWHERE actions: {}", (prewhere_actions ? prewhere_actions->dump() : std::string(""))); } diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index e6b792e8be1..358d527ae28 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -1,34 +1,23 @@ #include #include -#include #include #include #include #include #include -#include -#include -#include #include #include -#include #include #include #include -#include #include -#include #include #include #include -#include #include -#include #include -#include #include -#include #include #include #include @@ -56,7 +45,6 @@ #include #include #include -#include #include #include #include @@ -70,22 +58,19 @@ #include #include #include -#include #include #include #include -#include #include #include -#include #include -#include #include #include #include +#include #include #include #include @@ -209,9 +194,86 @@ static void checkSampleExpression(const StorageInMemoryMetadata & metadata, bool ErrorCodes::ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER); } + +void MergeTreeData::initializeDirectoriesAndFormatVersion(const std::string & relative_data_path_, bool attach, const std::string & date_column_name, bool need_create_directories) +{ + relative_data_path = relative_data_path_; + + MergeTreeDataFormatVersion min_format_version(0); + if (date_column_name.empty()) + min_format_version = MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING; + + if (relative_data_path.empty()) + throw Exception("MergeTree storages require data path", ErrorCodes::INCORRECT_FILE_NAME); + + const auto format_version_path = fs::path(relative_data_path) / MergeTreeData::FORMAT_VERSION_FILE_NAME; + std::optional read_format_version; + + for (const auto & disk : getDisks()) + { + if (disk->isBroken()) + continue; + + if (need_create_directories) + { + disk->createDirectories(relative_data_path); + disk->createDirectories(fs::path(relative_data_path) / MergeTreeData::DETACHED_DIR_NAME); + } + + if (disk->exists(format_version_path)) + { + auto buf = disk->readFile(format_version_path); + UInt32 current_format_version{0}; + readIntText(current_format_version, *buf); + if (!buf->eof()) + throw Exception(ErrorCodes::CORRUPTED_DATA, "Bad version file: {}", fullPath(disk, format_version_path)); + + if (!read_format_version.has_value()) + read_format_version = current_format_version; + else if (*read_format_version != current_format_version) + throw Exception(ErrorCodes::CORRUPTED_DATA, "Version file on {} contains version {} expected version is {}.", fullPath(disk, format_version_path), current_format_version, *read_format_version); + } + } + + + // When data path or file not exists, ignore the format_version check + if (!attach || !read_format_version) + { + format_version = min_format_version; + + // try to write to first non-readonly disk + for (const auto & disk : getStoragePolicy()->getDisks()) + { + if (disk->isBroken()) + continue; + + if (!disk->isReadOnly()) + { + auto buf = disk->writeFile(format_version_path, DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Rewrite, getContext()->getWriteSettings()); + writeIntText(format_version.toUnderType(), *buf); + if (getContext()->getSettingsRef().fsync_metadata) + buf->sync(); + } + + break; + } + } + else + { + format_version = *read_format_version; + } + + if (format_version < min_format_version) + { + if (min_format_version == MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING.toUnderType()) + throw Exception( + "MergeTree data format version on disk doesn't support custom partitioning", + ErrorCodes::METADATA_MISMATCH); + } +} + MergeTreeData::MergeTreeData( const StorageID & table_id_, - const String & relative_data_path_, const StorageInMemoryMetadata & metadata_, ContextMutablePtr context_, const String & date_column_name, @@ -222,9 +284,9 @@ MergeTreeData::MergeTreeData( BrokenPartCallback broken_part_callback_) : IStorage(table_id_) , WithMutableContext(context_->getGlobalContext()) + , format_version(date_column_name.empty() ? MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING : MERGE_TREE_DATA_OLD_FORMAT_VERSION) , merging_params(merging_params_) , require_part_metadata(require_part_metadata_) - , relative_data_path(relative_data_path_) , broken_part_callback(broken_part_callback_) , log_name(std::make_shared(table_id_.getNameForLogs())) , log(&Poco::Logger::get(*log_name)) @@ -242,14 +304,10 @@ MergeTreeData::MergeTreeData( const auto settings = getSettings(); allow_nullable_key = attach || settings->allow_nullable_key; - if (relative_data_path.empty()) - throw Exception("MergeTree storages require data path", ErrorCodes::INCORRECT_FILE_NAME); - /// Check sanity of MergeTreeSettings. Only when table is created. if (!attach) settings->sanityCheck(getContext()->getMergeMutateExecutor()->getMaxTasksCount()); - MergeTreeDataFormatVersion min_format_version(0); if (!date_column_name.empty()) { try @@ -270,7 +328,6 @@ MergeTreeData::MergeTreeData( { is_custom_partitioned = true; checkPartitionKeyAndInitMinMax(metadata_.partition_key); - min_format_version = MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING; } setProperties(metadata_, metadata_, attach); @@ -286,64 +343,6 @@ MergeTreeData::MergeTreeData( checkTTLExpressions(metadata_, metadata_); - const auto format_version_path = fs::path(relative_data_path) / MergeTreeData::FORMAT_VERSION_FILE_NAME; - std::optional read_format_version; - /// Creating directories, if not exist. - for (const auto & disk : getDisks()) - { - if (disk->isBroken()) - continue; - - disk->createDirectories(relative_data_path); - disk->createDirectories(fs::path(relative_data_path) / MergeTreeData::DETACHED_DIR_NAME); - - if (disk->exists(format_version_path)) - { - auto buf = disk->readFile(format_version_path); - UInt32 current_format_version{0}; - readIntText(current_format_version, *buf); - if (!buf->eof()) - throw Exception(ErrorCodes::CORRUPTED_DATA, "Bad version file: {}", fullPath(disk, format_version_path)); - - if (!read_format_version.has_value()) - read_format_version = current_format_version; - else if (*read_format_version != current_format_version) - throw Exception(ErrorCodes::CORRUPTED_DATA, "Version file on {} contains version {} expected version is {}.", fullPath(disk, format_version_path), current_format_version, *read_format_version); - } - } - - // When data path or file not exists, ignore the format_version check - if (!attach || !read_format_version) - { - format_version = min_format_version; - - // try to write to first non-readonly disk - for (const auto & disk : getStoragePolicy()->getDisks()) - { - if (!disk->isReadOnly()) - { - auto buf = disk->writeFile(format_version_path, DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Rewrite, context_->getWriteSettings()); - writeIntText(format_version.toUnderType(), *buf); - if (getContext()->getSettingsRef().fsync_metadata) - buf->sync(); - - break; - } - } - } - else - { - format_version = *read_format_version; - } - - if (format_version < min_format_version) - { - if (min_format_version == MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING.toUnderType()) - throw Exception( - "MergeTree data format version on disk doesn't support custom partitioning", - ErrorCodes::METADATA_MISMATCH); - } - String reason; if (!canUsePolymorphicParts(*settings, &reason) && !reason.empty()) LOG_WARNING(log, "{} Settings 'min_rows_for_wide_part', 'min_bytes_for_wide_part', " @@ -1016,7 +1015,7 @@ void MergeTreeData::loadDataPartsFromDisk( size_t suspicious_broken_parts_bytes = 0; std::atomic has_adaptive_parts = false; std::atomic has_non_adaptive_parts = false; - std::atomic has_lightweight_in_parts = false; + std::atomic has_lightweight_deletes_in_parts = false; std::mutex mutex; auto load_part = [&](const String & part_name, const DiskPtr & part_disk_ptr) @@ -1108,7 +1107,7 @@ void MergeTreeData::loadDataPartsFromDisk( /// Check if there is lightweight delete in part if (part->hasLightweightDelete()) - has_lightweight_in_parts.store(true, std::memory_order_relaxed); + has_lightweight_deletes_in_parts.store(true, std::memory_order_relaxed); part->modification_time = part_disk_ptr->getLastModified(fs::path(relative_data_path) / part_name).epochTime(); /// Assume that all parts are Active, covered parts will be detected and marked as Outdated later @@ -1192,7 +1191,7 @@ void MergeTreeData::loadDataPartsFromDisk( has_non_adaptive_index_granularity_parts = has_non_adaptive_parts; - if (has_lightweight_in_parts) + if (has_lightweight_deletes_in_parts) has_lightweight_delete_parts.store(true); if (suspicious_broken_parts > settings->max_suspicious_broken_parts && !skip_sanity_checks) @@ -1596,7 +1595,7 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks) /// (Only files on the first level of nesting are considered). static bool isOldPartDirectory(const DiskPtr & disk, const String & directory_path, time_t threshold) { - if (disk->getLastModified(directory_path).epochTime() >= threshold) + if (!disk->isDirectory(directory_path) || disk->getLastModified(directory_path).epochTime() >= threshold) return false; for (auto it = disk->iterateDirectory(directory_path); it->isValid(); it->next()) @@ -1646,7 +1645,7 @@ size_t MergeTreeData::clearOldTemporaryDirectories(size_t custom_directories_lif try { - if (disk->isDirectory(it->path()) && isOldPartDirectory(disk, it->path(), deadline)) + if (isOldPartDirectory(disk, it->path(), deadline)) { if (temporary_parts.contains(basename)) { @@ -1674,16 +1673,6 @@ size_t MergeTreeData::clearOldTemporaryDirectories(size_t custom_directories_lif } } } - /// see getModificationTime() - catch (const ErrnoException & e) - { - if (e.getErrno() == ENOENT) - { - /// If the file is already deleted, do nothing. - } - else - throw; - } catch (const fs::filesystem_error & e) { if (e.code() == std::errc::no_such_file_or_directory) @@ -1764,11 +1753,20 @@ MergeTreeData::DataPartsVector MergeTreeData::grabOldParts(bool force) auto time_now = time(nullptr); { + auto removal_limit = getSettings()->simultaneous_parts_removal_limit; + size_t current_removal_limit = removal_limit == 0 ? std::numeric_limits::max() : static_cast(removal_limit); + auto parts_lock = lockParts(); auto outdated_parts_range = getDataPartsStateRange(DataPartState::Outdated); for (auto it = outdated_parts_range.begin(); it != outdated_parts_range.end(); ++it) { + if (parts_to_delete.size() == current_removal_limit) + { + LOG_TRACE(log, "Found {} parts to remove and reached the limit for one removal iteration", current_removal_limit); + break; + } + const DataPartPtr & part = *it; part->last_removal_attemp_time.store(time_now, std::memory_order_relaxed); @@ -1819,7 +1817,7 @@ MergeTreeData::DataPartsVector MergeTreeData::grabOldParts(bool force) } if (!res.empty()) - LOG_TRACE(log, "Found {} old parts to remove. Parts {}", + LOG_TRACE(log, "Found {} old parts to remove. Parts: [{}]", res.size(), fmt::join(getPartsNames(res), ", ")); return res; @@ -1839,6 +1837,9 @@ void MergeTreeData::rollbackDeletingParts(const MergeTreeData::DataPartsVector & void MergeTreeData::removePartsFinally(const MergeTreeData::DataPartsVector & parts) { + if (parts.empty()) + return; + { auto lock = lockParts(); @@ -1851,16 +1852,16 @@ void MergeTreeData::removePartsFinally(const MergeTreeData::DataPartsVector & pa auto it = data_parts_by_info.find(part->info); if (it == data_parts_by_info.end()) - throw Exception("Deleting data part " + part->name + " doesn't exist", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Deleting data part {} doesn't exist", part->name); (*it)->assertState({DataPartState::Deleting}); - LOG_DEBUG(log, "Finally removing part from memory {}", part->name); - data_parts_indexes.erase(it); } } + LOG_DEBUG(log, "Removing {} parts from memory: Parts: [{}]", parts.size(), fmt::join(parts, ", ")); + /// Data parts is still alive (since DataPartsVector holds shared_ptrs) and contain useful metainformation for logging /// NOTE: There is no need to log parts deletion somewhere else, all deleting parts pass through this function and pass away @@ -1913,13 +1914,14 @@ void MergeTreeData::flushAllInMemoryPartsIfNeeded() size_t MergeTreeData::clearOldPartsFromFilesystem(bool force) { DataPartsVector parts_to_remove = grabOldParts(force); + if (parts_to_remove.empty()) + return 0; + clearPartsFromFilesystem(parts_to_remove); removePartsFinally(parts_to_remove); - /// This is needed to close files to avoid they reside on disk after being deleted. /// NOTE: we can drop files from cache more selectively but this is good enough. - if (!parts_to_remove.empty()) - getContext()->dropMMappedFileCache(); + getContext()->dropMMappedFileCache(); return parts_to_remove.size(); } @@ -1964,7 +1966,7 @@ void MergeTreeData::clearPartsFromFilesystemImpl(const DataPartsVector & parts_t { const auto settings = getSettings(); bool has_zero_copy_parts = false; - if (supportsReplication() && settings->allow_remote_fs_zero_copy_replication) + if (settings->allow_remote_fs_zero_copy_replication && dynamic_cast(this) != nullptr) { has_zero_copy_parts = std::any_of( parts_to_remove.begin(), parts_to_remove.end(), @@ -1983,7 +1985,8 @@ void MergeTreeData::clearPartsFromFilesystemImpl(const DataPartsVector & parts_t ThreadPool pool(num_threads); /// NOTE: Under heavy system load you may get "Cannot schedule a task" from ThreadPool. - LOG_DEBUG(log, "Removing {} parts from filesystem: {} (concurrently)", parts_to_remove.size(), fmt::join(parts_to_remove, ", ")); + LOG_DEBUG( + log, "Removing {} parts from filesystem (concurrently): Parts: [{}]", parts_to_remove.size(), fmt::join(parts_to_remove, ", ")); for (const DataPartPtr & part : parts_to_remove) { pool.scheduleOrThrowOnError([&, thread_group = CurrentThread::getGroup()] @@ -2008,7 +2011,8 @@ void MergeTreeData::clearPartsFromFilesystemImpl(const DataPartsVector & parts_t } else if (!parts_to_remove.empty()) { - LOG_DEBUG(log, "Removing {} parts from filesystem: {}", parts_to_remove.size(), fmt::join(parts_to_remove, ", ")); + LOG_DEBUG( + log, "Removing {} parts from filesystem (serially): Parts: [{}]", parts_to_remove.size(), fmt::join(parts_to_remove, ", ")); for (const DataPartPtr & part : parts_to_remove) { preparePartForRemoval(part)->remove(); @@ -2298,7 +2302,9 @@ void MergeTreeData::dropAllData() try { - if (!disk->isDirectoryEmpty(relative_data_path) && supportsReplication() && disk->supportZeroCopyReplication() && settings_ptr->allow_remote_fs_zero_copy_replication) + if (!disk->isDirectoryEmpty(relative_data_path) && + supportsReplication() && disk->supportZeroCopyReplication() + && settings_ptr->allow_remote_fs_zero_copy_replication) { std::vector files_left; disk->listFiles(relative_data_path, files_left); @@ -3091,7 +3097,7 @@ void MergeTreeData::checkPartDuplicate(MutableDataPartPtr & part, Transaction & } } -void MergeTreeData::preparePartForCommit(MutableDataPartPtr & part, Transaction & out_transaction) +void MergeTreeData::preparePartForCommit(MutableDataPartPtr & part, Transaction & out_transaction, bool need_rename) { part->is_temp = false; part->setState(DataPartState::PreActive); @@ -3103,12 +3109,55 @@ void MergeTreeData::preparePartForCommit(MutableDataPartPtr & part, Transaction return !may_be_cleaned_up || temporary_parts.contains(dir_name); }()); - part->renameTo(part->name, true); + if (need_rename) + part->renameTo(part->name, true); data_parts_indexes.insert(part); out_transaction.addPart(part); } +bool MergeTreeData::addTempPart( + MutableDataPartPtr & part, + Transaction & out_transaction, + DataPartsLock & lock, + DataPartsVector * out_covered_parts) +{ + LOG_TRACE(log, "Adding temporary part from directory {} with name {}.", part->getDataPartStorage().getPartDirectory(), part->name); + if (&out_transaction.data != this) + throw Exception("MergeTreeData::Transaction for one table cannot be used with another. It is a bug.", + ErrorCodes::LOGICAL_ERROR); + + if (part->hasLightweightDelete()) + has_lightweight_delete_parts.store(true); + + checkPartPartition(part, lock); + checkPartDuplicate(part, out_transaction, lock); + + DataPartPtr covering_part; + DataPartsVector covered_parts = getActivePartsToReplace(part->info, part->name, covering_part, lock); + + if (covering_part) + { + LOG_WARNING(log, "Tried to add obsolete part {} covered by {}", part->name, covering_part->getNameWithState()); + return false; + } + + /// All checks are passed. Now we can rename the part on disk. + /// So, we maintain invariant: if a non-temporary part in filesystem then it is in data_parts + preparePartForCommit(part, out_transaction, /* need_rename = */false); + + if (out_covered_parts) + { + out_covered_parts->reserve(covered_parts.size()); + + for (DataPartPtr & covered_part : covered_parts) + out_covered_parts->emplace_back(std::move(covered_part)); + } + + return true; +} + + bool MergeTreeData::renameTempPartAndReplaceImpl( MutableDataPartPtr & part, Transaction & out_transaction, @@ -3150,7 +3199,7 @@ bool MergeTreeData::renameTempPartAndReplaceImpl( /// All checks are passed. Now we can rename the part on disk. /// So, we maintain invariant: if a non-temporary part in filesystem then it is in data_parts - preparePartForCommit(part, out_transaction); + preparePartForCommit(part, out_transaction, /* need_rename */ true); if (out_covered_parts) { @@ -3271,8 +3320,8 @@ void MergeTreeData::removePartsInRangeFromWorkingSet(MergeTreeTransaction * txn, removePartsInRangeFromWorkingSetAndGetPartsToRemoveFromZooKeeper(txn, drop_range, lock); } -MergeTreeData::PartsToRemoveFromZooKeeper MergeTreeData::removePartsInRangeFromWorkingSetAndGetPartsToRemoveFromZooKeeper( - MergeTreeTransaction * txn, const MergeTreePartInfo & drop_range, DataPartsLock & lock) +DataPartsVector MergeTreeData::grabActivePartsToRemoveForDropRange( + MergeTreeTransaction * txn, const MergeTreePartInfo & drop_range, DataPartsLock & lock) { DataPartsVector parts_to_remove; @@ -3339,6 +3388,14 @@ MergeTreeData::PartsToRemoveFromZooKeeper MergeTreeData::removePartsInRangeFromW parts_to_remove.emplace_back(part); } + return parts_to_remove; +} + +MergeTreeData::PartsToRemoveFromZooKeeper MergeTreeData::removePartsInRangeFromWorkingSetAndGetPartsToRemoveFromZooKeeper( + MergeTreeTransaction * txn, const MergeTreePartInfo & drop_range, DataPartsLock & lock) +{ + + auto parts_to_remove = grabActivePartsToRemoveForDropRange(txn, drop_range, lock); bool clear_without_timeout = true; /// We a going to remove active parts covered by drop_range without timeout. @@ -3915,10 +3972,25 @@ MergeTreeData::DataPartsVector MergeTreeData::getVisibleDataPartsVectorInPartiti return res; } -MergeTreeData::DataPartPtr MergeTreeData::getPartIfExists(const MergeTreePartInfo & part_info, const MergeTreeData::DataPartStates & valid_states, DataPartsLock * acquired_lock) +MergeTreeData::DataPartPtr MergeTreeData::getPartIfExists(const MergeTreePartInfo & part_info, const MergeTreeData::DataPartStates & valid_states) { - auto lock = (acquired_lock) ? DataPartsLock() : lockParts(); + auto lock = lockParts(); + return getPartIfExistsUnlocked(part_info, valid_states, lock); +} +MergeTreeData::DataPartPtr MergeTreeData::getPartIfExists(const String & part_name, const MergeTreeData::DataPartStates & valid_states) +{ + auto lock = lockParts(); + return getPartIfExistsUnlocked(part_name, valid_states, lock); +} + +MergeTreeData::DataPartPtr MergeTreeData::getPartIfExistsUnlocked(const String & part_name, const DataPartStates & valid_states, DataPartsLock & acquired_lock) +{ + return getPartIfExistsUnlocked(MergeTreePartInfo::fromPartName(part_name, format_version), valid_states, acquired_lock); +} + +MergeTreeData::DataPartPtr MergeTreeData::getPartIfExistsUnlocked(const MergeTreePartInfo & part_info, const DataPartStates & valid_states, DataPartsLock & /* acquired_lock */) +{ auto it = data_parts_by_info.find(part_info); if (it == data_parts_by_info.end()) return nullptr; @@ -3930,12 +4002,6 @@ MergeTreeData::DataPartPtr MergeTreeData::getPartIfExists(const MergeTreePartInf return nullptr; } -MergeTreeData::DataPartPtr MergeTreeData::getPartIfExists(const String & part_name, const MergeTreeData::DataPartStates & valid_states, DataPartsLock * acquired_lock) -{ - return getPartIfExists(MergeTreePartInfo::fromPartName(part_name, format_version), valid_states, acquired_lock); -} - - static void loadPartAndFixMetadataImpl(MergeTreeData::MutableDataPartPtr part) { part->loadColumnsChecksumsIndexes(false, true); @@ -4987,6 +5053,8 @@ MergeTreeData::MutableDataPartsVector MergeTreeData::tryLoadPartsToAttach(const { const String containing_part = active_parts.getContainingPart(part_info.dir_name); + LOG_DEBUG(log, "Found containing part {} for part {}", containing_part, part_info.dir_name); + if (!containing_part.empty() && containing_part != part_info.dir_name) part_info.disk->moveDirectory(fs::path(relative_data_path) / source_dir / part_info.dir_name, fs::path(relative_data_path) / source_dir / ("inactive_" + part_info.dir_name)); @@ -5000,7 +5068,7 @@ MergeTreeData::MutableDataPartsVector MergeTreeData::tryLoadPartsToAttach(const renamed_parts.tryRenameAll(); /// Synchronously check that added parts exist and are not broken. We will write checksums.txt if it does not exist. - LOG_DEBUG(log, "Checking parts"); + LOG_DEBUG(log, "Checking {} parts", renamed_parts.old_and_new_names.size()); MutableDataPartsVector loaded_parts; loaded_parts.reserve(renamed_parts.old_and_new_names.size()); @@ -5363,6 +5431,7 @@ MergeTreeData::DataPartsVector MergeTreeData::Transaction::commit(MergeTreeData: part->getDataPartStorage().commitTransaction(); if (txn) + { for (const auto & part : precommitted_parts) { DataPartPtr covering_part; @@ -5384,6 +5453,7 @@ MergeTreeData::DataPartsVector MergeTreeData::Transaction::commit(MergeTreeData: MergeTreeTransaction::addNewPartAndRemoveCovered(data.shared_from_this(), part, covered_parts, txn); } + } MergeTreeData::WriteAheadLogPtr wal; auto get_inited_wal = [&] () @@ -5671,7 +5741,7 @@ Block MergeTreeData::getMinMaxCountProjectionBlock( agg_count->set(place, value.get()); else { - auto value_column = func->getReturnType()->createColumnConst(1, value)->convertToFullColumnIfConst(); + auto value_column = func->getResultType()->createColumnConst(1, value)->convertToFullColumnIfConst(); const auto * value_column_ptr = value_column.get(); func->add(place, &value_column_ptr, 0, &arena); } @@ -5921,20 +5991,25 @@ std::optional MergeTreeData::getQueryProcessingStageWithAgg if (select_query->interpolate() && !select_query->interpolate()->children.empty()) return std::nullopt; - // Currently projections don't support GROUPING SET yet. - if (select_query->group_by_with_grouping_sets) + // Projections don't support grouping sets yet. + if (select_query->group_by_with_grouping_sets + || select_query->group_by_with_totals + || select_query->group_by_with_rollup + || select_query->group_by_with_cube) return std::nullopt; auto query_options = SelectQueryOptions( QueryProcessingStage::WithMergeableState, /* depth */ 1, /* is_subquery_= */ true - ).ignoreProjections().ignoreAlias(); + ).ignoreProjections().ignoreAlias(); + InterpreterSelectQuery select( query_ptr, query_context, query_options, query_info.prepared_sets); + const auto & analysis_result = select.getAnalysisResult(); query_info.prepared_sets = select.getQueryAnalyzer()->getPreparedSets(); @@ -6052,6 +6127,7 @@ std::optional MergeTreeData::getQueryProcessingStageWithAgg { ProjectionCandidate candidate{}; candidate.desc = &projection; + candidate.context = select.getContext(); auto sample_block = projection.sample_block; auto sample_block_for_keys = projection.sample_block_for_keys; @@ -6419,7 +6495,6 @@ std::pair MergeTreeData::cloneAn quoteString(src_part->getDataPartStorage().getFullPath())); String dst_part_name = src_part->getNewName(dst_part_info); - assert(!tmp_part_prefix.empty()); String tmp_dst_part_name = tmp_part_prefix + dst_part_name; auto temporary_directory_lock = getTemporaryPartDirectoryHolder(tmp_dst_part_name); @@ -6517,7 +6592,7 @@ DiskPtr MergeTreeData::tryGetDiskForDetachedPart(const String & part_name) const const auto disks = getStoragePolicy()->getDisks(); for (const DiskPtr & disk : disks) - if (disk->exists(relative_data_path + additional_path + part_name)) + if (disk->exists(fs::path(relative_data_path) / additional_path / part_name)) return disk; return nullptr; diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index 16e6e7aa809..670c755cf72 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -273,6 +273,7 @@ public: tryLogCurrentException("~MergeTreeData::Transaction"); } } + void clear(); TransactionID getTID() const; @@ -284,7 +285,6 @@ public: MutableDataParts precommitted_parts; MutableDataParts locked_parts; - void clear(); }; using TransactionUniquePtr = std::unique_ptr; @@ -376,7 +376,6 @@ public: /// require_part_metadata - should checksums.txt and columns.txt exist in the part directory. /// attach - whether the existing table is attached or the new table is created. MergeTreeData(const StorageID & table_id_, - const String & relative_data_path_, const StorageInMemoryMetadata & metadata_, ContextMutablePtr context_, const String & date_column_name, @@ -517,8 +516,10 @@ public: DataPartsVector getDataPartsVectorInPartitionForInternalUsage(const DataPartStates & affordable_states, const String & partition_id, DataPartsLock * acquired_lock = nullptr) const; /// Returns the part with the given name and state or nullptr if no such part. - DataPartPtr getPartIfExists(const String & part_name, const DataPartStates & valid_states, DataPartsLock * acquired_lock = nullptr); - DataPartPtr getPartIfExists(const MergeTreePartInfo & part_info, const DataPartStates & valid_states, DataPartsLock * acquired_lock = nullptr); + DataPartPtr getPartIfExistsUnlocked(const String & part_name, const DataPartStates & valid_states, DataPartsLock & acquired_lock); + DataPartPtr getPartIfExistsUnlocked(const MergeTreePartInfo & part_info, const DataPartStates & valid_states, DataPartsLock & acquired_lock); + DataPartPtr getPartIfExists(const String & part_name, const DataPartStates & valid_states); + DataPartPtr getPartIfExists(const MergeTreePartInfo & part_info, const DataPartStates & valid_states); /// Total size of active parts in bytes. size_t getTotalActiveSizeInBytes() const; @@ -592,6 +593,8 @@ public: /// Used in REPLACE PARTITION command. void removePartsInRangeFromWorkingSet(MergeTreeTransaction * txn, const MergeTreePartInfo & drop_range, DataPartsLock & lock); + DataPartsVector grabActivePartsToRemoveForDropRange( + MergeTreeTransaction * txn, const MergeTreePartInfo & drop_range, DataPartsLock & lock); /// This wrapper is required to restrict access to parts in Deleting state class PartToRemoveFromZooKeeper { @@ -975,6 +978,14 @@ public: /// If one_part is true, fill in at most one part. Block getBlockWithVirtualPartColumns(const MergeTreeData::DataPartsVector & parts, bool one_part, bool ignore_empty = false) const; + /// In merge tree we do inserts with several steps. One of them: + /// X. write part to temporary directory with some temp name + /// Y. rename temporary directory to final name with correct block number value + /// As temp name MergeTree use just ordinary in memory counter, but in some cases + /// it can be useful to add additional part in temp name to avoid collisions on FS. + /// FIXME: Currently unused. + virtual std::string getPostfixForTempInsertName() const { return ""; } + /// For generating names of temporary parts during insertion. SimpleIncrement insert_increment; @@ -1088,6 +1099,8 @@ protected: struct TagByInfo{}; struct TagByStateAndInfo{}; + void initializeDirectoriesAndFormatVersion(const std::string & relative_data_path_, bool attach, const std::string & date_column_name, bool need_create_directories=true); + static const MergeTreePartInfo & dataPartPtrToInfo(const DataPartPtr & part) { return part->info; @@ -1318,6 +1331,12 @@ protected: static void incrementInsertedPartsProfileEvent(MergeTreeDataPartType type); static void incrementMergedPartsProfileEvent(MergeTreeDataPartType type); + bool addTempPart( + MutableDataPartPtr & part, + Transaction & out_transaction, + DataPartsLock & lock, + DataPartsVector * out_covered_parts); + private: /// Checking that candidate part doesn't break invariants: correct partition void checkPartPartition(MutableDataPartPtr & part, DataPartsLock & lock) const; @@ -1325,7 +1344,7 @@ private: /// Preparing itself to be committed in memory: fill some fields inside part, add it to data_parts_indexes /// in precommitted state and to transaction - void preparePartForCommit(MutableDataPartPtr & part, Transaction & out_transaction); + void preparePartForCommit(MutableDataPartPtr & part, Transaction & out_transaction, bool need_rename); /// Low-level method for preparing parts for commit (in-memory). /// FIXME Merge MergeTreeTransaction and Transaction diff --git a/src/Storages/MergeTree/MergeTreeDataFormatVersion.h b/src/Storages/MergeTree/MergeTreeDataFormatVersion.h index 1e21070dd6b..0a84f08ea71 100644 --- a/src/Storages/MergeTree/MergeTreeDataFormatVersion.h +++ b/src/Storages/MergeTree/MergeTreeDataFormatVersion.h @@ -8,6 +8,7 @@ namespace DB STRONG_TYPEDEF(UInt32, MergeTreeDataFormatVersion) +const MergeTreeDataFormatVersion MERGE_TREE_DATA_OLD_FORMAT_VERSION {0}; const MergeTreeDataFormatVersion MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING {1}; } diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp index 79670c0ab27..6c8b4a7ef57 100644 --- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp +++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp @@ -65,8 +65,8 @@ static const double DISK_USAGE_COEFFICIENT_TO_SELECT = 2; /// because between selecting parts to merge and doing merge, amount of free space could have decreased. static const double DISK_USAGE_COEFFICIENT_TO_RESERVE = 1.1; -MergeTreeDataMergerMutator::MergeTreeDataMergerMutator(MergeTreeData & data_, size_t max_tasks_count_) - : data(data_), max_tasks_count(max_tasks_count_), log(&Poco::Logger::get(data.getLogName() + " (MergerMutator)")) +MergeTreeDataMergerMutator::MergeTreeDataMergerMutator(MergeTreeData & data_) + : data(data_), log(&Poco::Logger::get(data.getLogName() + " (MergerMutator)")) { } @@ -75,6 +75,7 @@ UInt64 MergeTreeDataMergerMutator::getMaxSourcePartsSizeForMerge() const { size_t scheduled_tasks_count = CurrentMetrics::values[CurrentMetrics::BackgroundMergesAndMutationsPoolTask].load(std::memory_order_relaxed); + auto max_tasks_count = data.getContext()->getMergeMutateExecutor()->getMaxTasksCount(); return getMaxSourcePartsSizeForMerge(max_tasks_count, scheduled_tasks_count); } @@ -114,7 +115,7 @@ UInt64 MergeTreeDataMergerMutator::getMaxSourcePartSizeForMutation() const /// DataPart can be store only at one disk. Get maximum reservable free space at all disks. UInt64 disk_space = data.getStoragePolicy()->getMaxUnreservedFreeSpace(); - + auto max_tasks_count = data.getContext()->getMergeMutateExecutor()->getMaxTasksCount(); /// Allow mutations only if there are enough threads, leave free threads for merges else if (occupied <= 1 || max_tasks_count - occupied >= data_settings->number_of_free_entries_in_pool_to_execute_mutation) @@ -523,6 +524,7 @@ MergeTaskPtr MergeTreeDataMergerMutator::mergePartsToTemporaryPart( const Names & deduplicate_by_columns, const MergeTreeData::MergingParams & merging_params, const MergeTreeTransactionPtr & txn, + bool need_prefix, IMergeTreeDataPart * parent_part, const String & suffix) { @@ -537,6 +539,7 @@ MergeTaskPtr MergeTreeDataMergerMutator::mergePartsToTemporaryPart( deduplicate, deduplicate_by_columns, merging_params, + need_prefix, parent_part, suffix, txn, @@ -556,7 +559,8 @@ MutateTaskPtr MergeTreeDataMergerMutator::mutatePartToTemporaryPart( ContextPtr context, const MergeTreeTransactionPtr & txn, ReservationSharedPtr space_reservation, - TableLockHolder & holder) + TableLockHolder & holder, + bool need_prefix) { return std::make_shared( future_part, @@ -570,7 +574,8 @@ MutateTaskPtr MergeTreeDataMergerMutator::mutatePartToTemporaryPart( txn, data, *this, - merges_blocker + merges_blocker, + need_prefix ); } @@ -627,7 +632,7 @@ MergeTreeData::DataPartPtr MergeTreeDataMergerMutator::renameMergedTemporaryPart + " instead of " + parts[i]->name, ErrorCodes::LOGICAL_ERROR); } - LOG_TRACE(log, "Merged {} parts: from {} to {}", parts.size(), parts.front()->name, parts.back()->name); + LOG_TRACE(log, "Merged {} parts: [{}, {}] -> []", parts.size(), parts.front()->name, parts.back()->name, new_data_part->name); return new_data_part; } diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.h b/src/Storages/MergeTree/MergeTreeDataMergerMutator.h index 5d98f526325..b5143834650 100644 --- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.h +++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.h @@ -45,7 +45,7 @@ public: const MergeTreeTransaction *, String *)>; - MergeTreeDataMergerMutator(MergeTreeData & data_, size_t max_tasks_count_); + explicit MergeTreeDataMergerMutator(MergeTreeData & data_); /** Get maximum total size of parts to do merge, at current moment of time. * It depends on number of free threads in background_pool and amount of free space in disk. @@ -113,6 +113,7 @@ public: const Names & deduplicate_by_columns, const MergeTreeData::MergingParams & merging_params, const MergeTreeTransactionPtr & txn, + bool need_prefix = true, IMergeTreeDataPart * parent_part = nullptr, const String & suffix = ""); @@ -126,7 +127,8 @@ public: ContextPtr context, const MergeTreeTransactionPtr & txn, ReservationSharedPtr space_reservation, - TableLockHolder & table_lock_holder); + TableLockHolder & table_lock_holder, + bool need_prefix = true); MergeTreeData::DataPartPtr renameMergedTemporaryPart( MergeTreeData::MutableDataPartPtr & new_data_part, @@ -155,7 +157,6 @@ public : private: MergeTreeData & data; - const size_t max_tasks_count; Poco::Logger * log; diff --git a/src/Storages/MergeTree/MergeTreeDataPartInMemory.cpp b/src/Storages/MergeTree/MergeTreeDataPartInMemory.cpp index 48b1b6bab60..ac56868894f 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartInMemory.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartInMemory.cpp @@ -149,10 +149,10 @@ MutableDataPartStoragePtr MergeTreeDataPartInMemory::flushToDisk(const String & return new_data_part_storage; } -void MergeTreeDataPartInMemory::makeCloneInDetached(const String & prefix, const StorageMetadataPtr & metadata_snapshot) const +DataPartStoragePtr MergeTreeDataPartInMemory::makeCloneInDetached(const String & prefix, const StorageMetadataPtr & metadata_snapshot) const { String detached_path = *getRelativePathForDetachedPart(prefix, /* broken */ false); - flushToDisk(detached_path, metadata_snapshot); + return flushToDisk(detached_path, metadata_snapshot); } void MergeTreeDataPartInMemory::renameTo(const String & new_relative_path, bool /* remove_new_dir_if_exists */) diff --git a/src/Storages/MergeTree/MergeTreeDataPartInMemory.h b/src/Storages/MergeTree/MergeTreeDataPartInMemory.h index e58701b04a1..acb1cd8c844 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartInMemory.h +++ b/src/Storages/MergeTree/MergeTreeDataPartInMemory.h @@ -47,7 +47,7 @@ public: bool hasColumnFiles(const NameAndTypePair & column) const override { return !!getColumnPosition(column.getNameInStorage()); } String getFileNameForColumn(const NameAndTypePair & /* column */) const override { return ""; } void renameTo(const String & new_relative_path, bool remove_new_dir_if_exists) override; - void makeCloneInDetached(const String & prefix, const StorageMetadataPtr & metadata_snapshot) const override; + DataPartStoragePtr makeCloneInDetached(const String & prefix, const StorageMetadataPtr & metadata_snapshot) const override; MutableDataPartStoragePtr flushToDisk(const String & new_relative_path, const StorageMetadataPtr & metadata_snapshot) const; diff --git a/src/Storages/MergeTree/MergeTreeDataPartState.h b/src/Storages/MergeTree/MergeTreeDataPartState.h index a52f7559375..5c4779f016e 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartState.h +++ b/src/Storages/MergeTree/MergeTreeDataPartState.h @@ -3,24 +3,23 @@ namespace DB { -/** - * Part state is a stage of its lifetime. States are ordered and state of a part could be increased only. - * Part state should be modified under data_parts mutex. - * - * Possible state transitions: - * Temporary -> PreActive: we are trying to add a fetched, inserted or merged part to active set - * PreActive -> Outdated: we could not add a part to active set and are doing a rollback (for example it is duplicated part) - * PreActive -> Active: we successfully added a part to active dataset - * PreActive -> Outdated: a part was replaced by a covering part or DROP PARTITION - * Outdated -> Deleting: a cleaner selected this part for deletion - * Deleting -> Outdated: if an ZooKeeper error occurred during the deletion, we will retry deletion - * Active -> DeleteOnDestroy: if part was moved to another disk - */ +/** Part state is a stage of its lifetime. States are ordered and state of a part could be increased only. + * Part state should be modified under data_parts mutex. + * + * Possible state transitions: + * Temporary -> PreActive: we are trying to add a fetched, inserted or merged part to active set + * PreActive -> Outdated: we could not add a part to active set and are doing a rollback (for example it is duplicated part) + * PreActive -> Active: we successfully added a part to active dataset + * PreActive -> Outdated: a part was replaced by a covering part or DROP PARTITION + * Outdated -> Deleting: a cleaner selected this part for deletion + * Deleting -> Outdated: if an ZooKeeper error occurred during the deletion, we will retry deletion + * Active -> DeleteOnDestroy: if part was moved to another disk + */ enum class MergeTreeDataPartState { Temporary, /// the part is generating now, it is not in data_parts list - PreActive, /// the part is in data_parts, but not used for SELECTs - Active, /// active data part, used by current and upcoming SELECTs + PreActive, /// the part is in data_parts, but not used for SELECTs + Active, /// active data part, used by current and upcoming SELECTs Outdated, /// not active data part, but could be used by only current SELECTs, could be deleted after SELECTs finishes Deleting, /// not active data part with identity refcounter, it is deleting right now by a cleaner DeleteOnDestroy, /// part was moved to another disk and should be deleted in own destructor diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp index 020121e59d7..4c1d117ac73 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp @@ -110,7 +110,7 @@ Granules getGranulesToWrite(const MergeTreeIndexGranularity & index_granularity, .is_complete = (rows_left_in_block >= expected_rows_in_mark) }); current_row += result.back().rows_to_write; - current_mark++; + ++current_mark; } return result; @@ -146,6 +146,7 @@ void MergeTreeDataPartWriterCompact::write(const Block & block, const IColumn::P if (compute_granularity) { size_t index_granularity_for_block = computeIndexGranularity(block); + assert(index_granularity_for_block >= 1); fillIndexGranularity(index_granularity_for_block, block.rows()); } diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp index a887b0ee322..fbcf8cb241c 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp @@ -135,7 +135,9 @@ static size_t computeIndexGranularityImpl( size_t rows_in_block = block.rows(); size_t index_granularity_for_block; if (!can_use_adaptive_index_granularity) + { index_granularity_for_block = fixed_index_granularity_rows; + } else { size_t block_size_in_memory = block.bytes(); @@ -152,11 +154,13 @@ static size_t computeIndexGranularityImpl( index_granularity_for_block = index_granularity_bytes / size_of_row_in_bytes; } } - if (index_granularity_for_block == 0) /// very rare case when index granularity bytes less then single row - index_granularity_for_block = 1; - /// We should be less or equal than fixed index granularity index_granularity_for_block = std::min(fixed_index_granularity_rows, index_granularity_for_block); + + /// very rare case when index granularity bytes less then single row + if (index_granularity_for_block == 0) + index_granularity_for_block = 1; + return index_granularity_for_block; } diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index 65f54495b3c..1ca1779e4b0 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -466,6 +466,7 @@ QueryPlanPtr MergeTreeDataSelectExecutor::read( fmt::format("MergeTree(with {} projection {})", query_info.projection->desc->type, query_info.projection->desc->name), query_info.storage_limits); plan->addStep(std::move(step)); + plan->addInterpreterContext(query_info.projection->context); return plan; } diff --git a/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/src/Storages/MergeTree/MergeTreeDataWriter.cpp index c50c01ea356..e314c3f2e58 100644 --- a/src/Storages/MergeTree/MergeTreeDataWriter.cpp +++ b/src/Storages/MergeTree/MergeTreeDataWriter.cpp @@ -322,8 +322,19 @@ Block MergeTreeDataWriter::mergeBlock( return block.cloneWithColumns(status.chunk.getColumns()); } -MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeTempPart( - BlockWithPartition & block_with_partition, const StorageMetadataPtr & metadata_snapshot, ContextPtr context) + +MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeTempPart(BlockWithPartition & block, const StorageMetadataPtr & metadata_snapshot, ContextPtr context) +{ + return writeTempPartImpl(block, metadata_snapshot, context, data.insert_increment.get(), /*need_tmp_prefix = */true); +} + +MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeTempPartWithoutPrefix(BlockWithPartition & block, const StorageMetadataPtr & metadata_snapshot, int64_t block_number, ContextPtr context) +{ + return writeTempPartImpl(block, metadata_snapshot, context, block_number, /*need_tmp_prefix = */false); +} + +MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeTempPartImpl( + BlockWithPartition & block_with_partition, const StorageMetadataPtr & metadata_snapshot, ContextPtr context, int64_t block_number, bool need_tmp_prefix) { TemporaryPart temp_part; Block & block = block_with_partition.block; @@ -334,17 +345,12 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeTempPart( if (column.type->hasDynamicSubcolumns()) column.type = block.getByName(column.name).type; - static const String TMP_PREFIX = "tmp_insert_"; - - /// This will generate unique name in scope of current server process. - Int64 temp_index = data.insert_increment.get(); - auto minmax_idx = std::make_shared(); minmax_idx->update(block, data.getMinMaxColumnsNames(metadata_snapshot->getPartitionKey())); - MergeTreePartition partition(std::move(block_with_partition.partition)); + MergeTreePartition partition(block_with_partition.partition); - MergeTreePartInfo new_part_info(partition.getID(metadata_snapshot->getPartitionKey().sample_block), temp_index, temp_index, 0); + MergeTreePartInfo new_part_info(partition.getID(metadata_snapshot->getPartitionKey().sample_block), block_number, block_number, 0); String part_name; if (data.format_version < MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING) { @@ -364,7 +370,19 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeTempPart( else part_name = new_part_info.getPartName(); - String part_dir = TMP_PREFIX + part_name; + std::string part_dir; + if (need_tmp_prefix) + { + std::string temp_prefix = "tmp_insert_"; + const auto & temp_postfix = data.getPostfixForTempInsertName(); + if (!temp_postfix.empty()) + temp_prefix += temp_postfix + "_"; + part_dir = temp_prefix + part_name; + } + else + { + part_dir = part_name; + } temp_part.temporary_directory_lock = data.getTemporaryPartDirectoryHolder(part_dir); /// If we need to calculate some columns to sort. @@ -419,7 +437,7 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeTempPart( auto data_part_storage = std::make_shared( data_part_volume, data.relative_data_path, - TMP_PREFIX + part_name); + part_dir); data_part_storage->beginTransaction(); @@ -549,7 +567,10 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeProjectionPartImpl( } auto relative_path = part_name + (is_temp ? ".tmp_proj" : ".proj"); - auto projection_part_storage = parent_part->getDataPartStorage().getProjection(relative_path); + auto projection_part_storage = parent_part->getDataPartStorage().getProjection(relative_path, !is_temp); + if (is_temp) + projection_part_storage->beginTransaction(); + auto new_data_part = data.createPart( part_name, part_type, diff --git a/src/Storages/MergeTree/MergeTreeDataWriter.h b/src/Storages/MergeTree/MergeTreeDataWriter.h index 2d7e19cf9d5..cbf8094f7fd 100644 --- a/src/Storages/MergeTree/MergeTreeDataWriter.h +++ b/src/Storages/MergeTree/MergeTreeDataWriter.h @@ -79,6 +79,8 @@ public: */ TemporaryPart writeTempPart(BlockWithPartition & block, const StorageMetadataPtr & metadata_snapshot, ContextPtr context); + TemporaryPart writeTempPartWithoutPrefix(BlockWithPartition & block, const StorageMetadataPtr & metadata_snapshot, int64_t block_number, ContextPtr context); + /// For insertion. static TemporaryPart writeProjectionPart( const MergeTreeData & data, @@ -104,6 +106,14 @@ public: const MergeTreeData::MergingParams & merging_params); private: + + TemporaryPart writeTempPartImpl( + BlockWithPartition & block, + const StorageMetadataPtr & metadata_snapshot, + ContextPtr context, + int64_t block_number, + bool need_tmp_prefix); + static TemporaryPart writeProjectionPartImpl( const String & part_name, bool is_temp, diff --git a/src/Storages/MergeTree/MergeTreeIndexHypothesisMergedCondition.cpp b/src/Storages/MergeTree/MergeTreeIndexHypothesisMergedCondition.cpp index 3a145c0f505..c62b5e86c75 100644 --- a/src/Storages/MergeTree/MergeTreeIndexHypothesisMergedCondition.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexHypothesisMergedCondition.cpp @@ -88,7 +88,7 @@ void MergeTreeIndexhypothesisMergedCondition::addConstraints(const ConstraintsDe /// Replaces < -> <=, > -> >= and assumes that all hypotheses are true then checks if path exists bool MergeTreeIndexhypothesisMergedCondition::alwaysUnknownOrTrue() const { - std::vector active_atomic_formulas(atomic_constraints); + ASTs active_atomic_formulas(atomic_constraints); for (const auto & hypothesis : index_to_compare_atomic_hypotheses) { active_atomic_formulas.insert( @@ -190,7 +190,7 @@ bool MergeTreeIndexhypothesisMergedCondition::mayBeTrueOnGranule(const MergeTree std::unique_ptr MergeTreeIndexhypothesisMergedCondition::buildGraph(const std::vector & values) const { - std::vector active_atomic_formulas(atomic_constraints); + ASTs active_atomic_formulas(atomic_constraints); for (size_t i = 0; i < values.size(); ++i) { if (values[i]) diff --git a/src/Storages/MergeTree/MergeTreeIndexHypothesisMergedCondition.h b/src/Storages/MergeTree/MergeTreeIndexHypothesisMergedCondition.h index 9ebcbe9d7dc..6153c214898 100644 --- a/src/Storages/MergeTree/MergeTreeIndexHypothesisMergedCondition.h +++ b/src/Storages/MergeTree/MergeTreeIndexHypothesisMergedCondition.h @@ -34,7 +34,7 @@ private: std::vector> index_to_compare_atomic_hypotheses; std::vector> index_to_atomic_hypotheses; - std::vector atomic_constraints; + ASTs atomic_constraints; }; } diff --git a/src/Storages/MergeTree/MergeTreeIndexSet.cpp b/src/Storages/MergeTree/MergeTreeIndexSet.cpp index ef3c8da9f70..db99a2f37be 100644 --- a/src/Storages/MergeTree/MergeTreeIndexSet.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexSet.cpp @@ -429,7 +429,7 @@ const ActionsDAG::Node * MergeTreeIndexConditionSet::atomFromDAG(const ActionsDA return nullptr; } - return &result_dag->addFunction(node.function_builder, children, {}); + return &result_dag->addFunction(node.function_base, children, {}); } const ActionsDAG::Node * MergeTreeIndexConditionSet::operatorFromDAG(const ActionsDAG::Node & node, @@ -523,7 +523,7 @@ bool MergeTreeIndexConditionSet::checkDAGUseless(const ActionsDAG::Node & node, if (key_columns.contains(column_name)) return false; - auto function_name = node.function_builder->getName(); + auto function_name = node.function_base->getName(); const auto & arguments = node.children; if (function_name == "and" || function_name == "indexHint") diff --git a/src/Storages/MergeTree/MergeTreeMutationEntry.cpp b/src/Storages/MergeTree/MergeTreeMutationEntry.cpp index a222f2a8ad8..9906ea3d02a 100644 --- a/src/Storages/MergeTree/MergeTreeMutationEntry.cpp +++ b/src/Storages/MergeTree/MergeTreeMutationEntry.cpp @@ -61,7 +61,7 @@ MergeTreeMutationEntry::MergeTreeMutationEntry(MutationCommands commands_, DiskP *out << "format version: 1\n" << "create time: " << LocalDateTime(create_time) << "\n"; *out << "commands: "; - commands.writeText(*out); + commands.writeText(*out, /* with_pure_metadata_commands = */ false); *out << "\n"; if (tid.isPrehistoric()) { @@ -174,7 +174,7 @@ std::shared_ptr MergeTreeMutationEntry::backup() const out << "block number: " << block_number << "\n"; out << "commands: "; - commands.writeText(out); + commands.writeText(out, /* with_pure_metadata_commands = */ false); out << "\n"; return std::make_shared(out.str()); diff --git a/src/Storages/MergeTree/MergeTreeMutationStatus.h b/src/Storages/MergeTree/MergeTreeMutationStatus.h index acda43b9254..5f29b777293 100644 --- a/src/Storages/MergeTree/MergeTreeMutationStatus.h +++ b/src/Storages/MergeTree/MergeTreeMutationStatus.h @@ -27,6 +27,9 @@ struct MergeTreeMutationStatus String latest_failed_part; time_t latest_fail_time = 0; String latest_fail_reason; + + /// FIXME: currently unused, but would be much better to report killed mutations with this flag. + bool is_killed = false; }; /// Check mutation status and throw exception in case of error during mutation diff --git a/src/Storages/MergeTree/MergeTreePartInfo.cpp b/src/Storages/MergeTree/MergeTreePartInfo.cpp index 8c518e4d17f..f537e7cb285 100644 --- a/src/Storages/MergeTree/MergeTreePartInfo.cpp +++ b/src/Storages/MergeTree/MergeTreePartInfo.cpp @@ -18,7 +18,7 @@ MergeTreePartInfo MergeTreePartInfo::fromPartName(const String & part_name, Merg if (auto part_opt = tryParsePartName(part_name, format_version)) return *part_opt; else - throw Exception(ErrorCodes::BAD_DATA_PART_NAME, "Unexpected part name: {}", part_name); + throw Exception(ErrorCodes::BAD_DATA_PART_NAME, "Unexpected part name: {} for format version: {}", part_name, format_version); } void MergeTreePartInfo::validatePartitionID(const String & partition_id, MergeTreeDataFormatVersion format_version) diff --git a/src/Storages/MergeTree/MergeTreeRangeReader.cpp b/src/Storages/MergeTree/MergeTreeRangeReader.cpp index 1a5a4d91806..ac5c3b1db2d 100644 --- a/src/Storages/MergeTree/MergeTreeRangeReader.cpp +++ b/src/Storages/MergeTree/MergeTreeRangeReader.cpp @@ -30,13 +30,17 @@ namespace ErrorCodes } -static void filterColumns(Columns & columns, const IColumn::Filter & filter) +static void filterColumns(Columns & columns, const IColumn::Filter & filter, size_t filter_bytes) { for (auto & column : columns) { if (column) { - column = column->filter(filter, -1); + if (column->size() != filter.size()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Size of column {} doesn't match size of filter {}", + column->size(), filter.size()); + + column = column->filter(filter, filter_bytes); if (column->empty()) { @@ -47,13 +51,12 @@ static void filterColumns(Columns & columns, const IColumn::Filter & filter) } } -static void filterColumns(Columns & columns, const ColumnPtr & filter) +static void filterColumns(Columns & columns, const FilterWithCachedCount & filter) { - ConstantFilterDescription const_descr(*filter); - if (const_descr.always_true) + if (filter.alwaysTrue()) return; - if (const_descr.always_false) + if (filter.alwaysFalse()) { for (auto & col : columns) if (col) @@ -62,8 +65,7 @@ static void filterColumns(Columns & columns, const ColumnPtr & filter) return; } - FilterDescription descr(*filter); - filterColumns(columns, *descr.data); + filterColumns(columns, filter.getData(), filter.countBytesInFilter()); } @@ -320,11 +322,13 @@ void MergeTreeRangeReader::ReadResult::clear() num_rows_to_skip_in_last_granule += rows_per_granule.back(); rows_per_granule.assign(rows_per_granule.size(), 0); total_rows_per_granule = 0; - filter_holder = nullptr; - filter = nullptr; + final_filter = FilterWithCachedCount(); + num_rows = 0; + columns.clear(); + additional_columns.clear(); } -void MergeTreeRangeReader::ReadResult::shrink(Columns & old_columns) +void MergeTreeRangeReader::ReadResult::shrink(Columns & old_columns, const NumRows & rows_per_granule_previous) const { for (auto & column : old_columns) { @@ -337,9 +341,12 @@ void MergeTreeRangeReader::ReadResult::shrink(Columns & old_columns) continue; } + LOG_TEST(log, "ReadResult::shrink() column size: {} total_rows_per_granule: {}", + column->size(), total_rows_per_granule); + auto new_column = column->cloneEmpty(); new_column->reserve(total_rows_per_granule); - for (size_t j = 0, pos = 0; j < rows_per_granule_original.size(); pos += rows_per_granule_original[j++]) + for (size_t j = 0, pos = 0; j < rows_per_granule_previous.size(); pos += rows_per_granule_previous[j++]) { if (rows_per_granule[j]) new_column->insertRangeFrom(*column, pos, rows_per_granule[j]); @@ -348,74 +355,265 @@ void MergeTreeRangeReader::ReadResult::shrink(Columns & old_columns) } } +/// The main invariant of the data in the read result is that he number of rows is +/// either equal to total_rows_per_granule (if filter has not been applied) or to the number of +/// 1s in the filter (if filter has been applied). +void MergeTreeRangeReader::ReadResult::checkInternalConsistency() const +{ + /// Check that filter size matches number of rows that will be read. + if (final_filter.present() && final_filter.size() != total_rows_per_granule) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Final filter size {} doesn't match total_rows_per_granule {}", + final_filter.size(), total_rows_per_granule); + + /// Check that num_rows is consistent with final_filter and rows_per_granule. + if (final_filter.present() && final_filter.countBytesInFilter() != num_rows && total_rows_per_granule != num_rows) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Number of rows {} doesn't match neither filter 1s count {} nor total_rows_per_granule {}", + num_rows, final_filter.countBytesInFilter(), total_rows_per_granule); + + /// Check that additional columns have the same number of rows as the main columns. + if (additional_columns && additional_columns.rows() != num_rows) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Number of rows in additional columns {} is not equal to number of rows in result columns {}", + additional_columns.rows(), num_rows); + + for (const auto & column : columns) + { + if (column) + chassert(column->size() == num_rows); + } +} + +std::string MergeTreeRangeReader::ReadResult::dumpInfo() const +{ + WriteBufferFromOwnString out; + out << "num_rows: " << num_rows + << ", columns: " << columns.size() + << ", total_rows_per_granule: " << total_rows_per_granule; + if (final_filter.present()) + { + out << ", filter size: " << final_filter.size() + << ", filter 1s: " << final_filter.countBytesInFilter(); + } + else + { + out << ", no filter"; + } + for (size_t ci = 0; ci < columns.size(); ++ci) + { + out << ", column[" << ci << "]: "; + if (!columns[ci]) + out << " nullptr"; + else + { + out << " " << columns[ci]->dumpStructure(); + } + } + if (additional_columns) + { + out << ", additional_columns: " << additional_columns.dumpStructure(); + } + return out.str(); +} + +static std::string dumpNames(const NamesAndTypesList & columns) +{ + WriteBufferFromOwnString out; + for (auto it = columns.begin(); it != columns.end(); ++it) + { + if (it != columns.begin()) + out << ", "; + out << it->name; + } + return out.str(); +} + void MergeTreeRangeReader::ReadResult::setFilterConstTrue() { - clearFilter(); - filter_holder = DataTypeUInt8().createColumnConst(num_rows, 1u); + /// Remove the filter, so newly read columns will not be filtered. + final_filter = FilterWithCachedCount(); } -void MergeTreeRangeReader::ReadResult::setFilterConstFalse() +static ColumnPtr andFilters(ColumnPtr c1, ColumnPtr c2) { - clearFilter(); - columns.clear(); - num_rows = 0; + if (c1->size() != c2->size()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Size of filters don't match: {} and {}", + c1->size(), c2->size()); + + // TODO: use proper vectorized implementation of AND? + auto res = ColumnUInt8::create(c1->size()); + auto & res_data = res->getData(); + const auto & c1_data = typeid_cast(*c1).getData(); + const auto & c2_data = typeid_cast(*c2).getData(); + const size_t size = c1->size(); + const size_t step = 16; + size_t i = 0; + /// NOTE: '&&' must be used instead of '&' for 'AND' operation because UInt8 columns might contain any non-zero + /// value for true and we cannot bitwise AND them to get the correct result. + for (; i + step < size; i += step) + for (size_t j = 0; j < step; ++j) + res_data[i+j] = (c1_data[i+j] && c2_data[i+j]); + for (; i < size; ++i) + res_data[i] = (c1_data[i] && c2_data[i]); + return res; } -void MergeTreeRangeReader::ReadResult::optimize(bool can_read_incomplete_granules, bool allow_filter_columns) +static ColumnPtr combineFilters(ColumnPtr first, ColumnPtr second); + +void MergeTreeRangeReader::ReadResult::applyFilter(const FilterWithCachedCount & filter) { - if (total_rows_per_granule == 0 || filter == nullptr) + if (filter.size() != num_rows) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Filter size {} doesn't match number of rows {}", + filter.size(), num_rows); + + LOG_TEST(log, "ReadResult::applyFilter() num_rows before: {}", num_rows); + + filterColumns(columns, filter); + + { + auto tmp_columns = additional_columns.getColumns(); + filterColumns(tmp_columns, filter); + if (!tmp_columns.empty()) + additional_columns.setColumns(tmp_columns); + else + additional_columns.clear(); + } + + num_rows = filter.countBytesInFilter(); + + LOG_TEST(log, "ReadResult::applyFilter() num_rows after: {}", num_rows); +} + +void MergeTreeRangeReader::ReadResult::optimize(const FilterWithCachedCount & current_filter, bool can_read_incomplete_granules) +{ + checkInternalConsistency(); + + /// Combine new filter with the previous one if it is present. + /// This filter has the size of total_rows_per granule. It is applied after reading contiguous chunks from + /// the start of each granule. + FilterWithCachedCount filter = current_filter; + if (final_filter.present()) + { + /// If current filter has the same size as the final filter, it means that the final filter has not been applied. + /// In this case we AND current filter with the existing final filter. + /// In other case, when the final filter has been applied, the size of current step filter will be equal to number of ones + /// in the final filter. In this case we combine current filter with the final filter. + ColumnPtr combined_filter; + if (current_filter.size() == final_filter.size()) + combined_filter = andFilters(final_filter.getColumn(), current_filter.getColumn()); + else + combined_filter = combineFilters(final_filter.getColumn(), current_filter.getColumn()); + + filter = FilterWithCachedCount(combined_filter); + } + + if (total_rows_per_granule == 0 || !filter.present()) return; NumRows zero_tails; - auto total_zero_rows_in_tails = countZeroTails(filter->getData(), zero_tails, can_read_incomplete_granules); + auto total_zero_rows_in_tails = countZeroTails(filter.getData(), zero_tails, can_read_incomplete_granules); - if (total_zero_rows_in_tails == filter->size()) + LOG_TEST(log, "ReadResult::optimize() before: {}", dumpInfo()); + + SCOPE_EXIT(checkInternalConsistency()); + + SCOPE_EXIT({ + LOG_TEST(log, "ReadResult::optimize() after: {}", dumpInfo()); + }); + + if (total_zero_rows_in_tails == filter.size()) { + LOG_TEST(log, "ReadResult::optimize() combined filter is const False"); clear(); return; } - else if (total_zero_rows_in_tails == 0 && countBytesInResultFilter(filter->getData()) == filter->size()) + else if (total_zero_rows_in_tails == 0 && filter.countBytesInFilter() == filter.size()) { + LOG_TEST(log, "ReadResult::optimize() combined filter is const True"); setFilterConstTrue(); return; } /// Just a guess. If only a few rows may be skipped, it's better not to skip at all. - else if (2 * total_zero_rows_in_tails > filter->size()) + else if (2 * total_zero_rows_in_tails > filter.size()) { + const NumRows rows_per_granule_previous = rows_per_granule; + const size_t total_rows_per_granule_previous = total_rows_per_granule; + for (auto i : collections::range(0, rows_per_granule.size())) { - rows_per_granule_original.push_back(rows_per_granule[i]); rows_per_granule[i] -= zero_tails[i]; } - num_rows_to_skip_in_last_granule += rows_per_granule_original.back() - rows_per_granule.back(); + num_rows_to_skip_in_last_granule += rows_per_granule_previous.back() - rows_per_granule.back(); + total_rows_per_granule = total_rows_per_granule_previous - total_zero_rows_in_tails; - filter_original = filter; - filter_holder_original = std::move(filter_holder); - - /// Check if const 1 after shrink - if (allow_filter_columns && countBytesInResultFilter(filter->getData()) + total_zero_rows_in_tails == total_rows_per_granule) + /// Check if const 1 after shrink. + /// We can apply shrink only if after the previous step the number of rows in the result + /// matches the rows_per_granule info. Otherwise we will not be able to match newly added zeros in granule tails. + if (num_rows == total_rows_per_granule_previous && + filter.countBytesInFilter() + total_zero_rows_in_tails == total_rows_per_granule_previous) /// All zeros are in tails? { - total_rows_per_granule = total_rows_per_granule - total_zero_rows_in_tails; - num_rows = total_rows_per_granule; setFilterConstTrue(); - shrink(columns); /// shrink acts as filtering in such case + + /// If all zeros are in granule tails, we can use shrink to filter out rows. + shrink(columns, rows_per_granule_previous); /// shrink acts as filtering in such case + auto c = additional_columns.getColumns(); + shrink(c, rows_per_granule_previous); + additional_columns.setColumns(c); + + num_rows = total_rows_per_granule; + + LOG_TEST(log, "ReadResult::optimize() after shrink {}", dumpInfo()); } else { - auto new_filter = ColumnUInt8::create(filter->size() - total_zero_rows_in_tails); + auto new_filter = ColumnUInt8::create(filter.size() - total_zero_rows_in_tails); IColumn::Filter & new_data = new_filter->getData(); - collapseZeroTails(filter->getData(), new_data); - total_rows_per_granule = new_filter->size(); - num_rows = total_rows_per_granule; - filter = new_filter.get(); - filter_holder = std::move(new_filter); + /// Shorten the filter by removing zeros from granule tails + collapseZeroTails(filter.getData(), rows_per_granule_previous, new_data); + if (total_rows_per_granule != new_filter->size()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "New filter size {} doesn't match number of rows to be read {}", + new_filter->size(), total_rows_per_granule); + + /// Need to apply combined filter here before replacing it with shortened one because otherwise + /// the filter size will not match the number of rows in the result columns. + if (num_rows == total_rows_per_granule_previous) + { + /// Filter from the previous steps has not been applied yet, do it now. + applyFilter(filter); + } + else + { + /// Filter was applied before, so apply only new filter from the current step. + applyFilter(current_filter); + } + + final_filter = FilterWithCachedCount(new_filter->getPtr()); + if (num_rows != final_filter.countBytesInFilter()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Count of 1s in final filter {} doesn't match number of rows {}", + final_filter.countBytesInFilter(), num_rows); + + LOG_TEST(log, "ReadResult::optimize() after colapseZeroTails {}", dumpInfo()); } - need_filter = true; } - /// Another guess, if it's worth filtering at PREWHERE - else if (countBytesInResultFilter(filter->getData()) < 0.6 * filter->size()) - need_filter = true; + else + { + /// Check if we have rows already filtered at the previous step. In such case we must apply the filter because + /// otherwise num_rows doesn't match total_rows_per_granule and the next read step will not know how to filter + /// newly read columns to match the num_rows. + if (num_rows != total_rows_per_granule) + { + applyFilter(current_filter); + } + /// Another guess, if it's worth filtering at PREWHERE + else if (filter.countBytesInFilter() < 0.6 * filter.size()) + { + applyFilter(filter); + } + + final_filter = std::move(filter); + } } size_t MergeTreeRangeReader::ReadResult::countZeroTails(const IColumn::Filter & filter_vec, NumRows & zero_tails, bool can_read_incomplete_granules) const @@ -441,7 +639,7 @@ size_t MergeTreeRangeReader::ReadResult::countZeroTails(const IColumn::Filter & return total_zero_rows_in_tails; } -void MergeTreeRangeReader::ReadResult::collapseZeroTails(const IColumn::Filter & filter_vec, IColumn::Filter & new_filter_vec) +void MergeTreeRangeReader::ReadResult::collapseZeroTails(const IColumn::Filter & filter_vec, const NumRows & rows_per_granule_previous, IColumn::Filter & new_filter_vec) const { const auto * filter_data = filter_vec.data(); auto * new_filter_data = new_filter_vec.data(); @@ -449,7 +647,7 @@ void MergeTreeRangeReader::ReadResult::collapseZeroTails(const IColumn::Filter & for (auto i : collections::range(0, rows_per_granule.size())) { memcpySmallAllowReadWriteOverflow15(new_filter_data, filter_data, rows_per_granule[i]); - filter_data += rows_per_granule_original[i]; + filter_data += rows_per_granule_previous[i]; new_filter_data += rows_per_granule[i]; } @@ -597,54 +795,6 @@ size_t MergeTreeRangeReader::ReadResult::numZerosInTail(const UInt8 * begin, con return count; } -/// Filter size must match total_rows_per_granule -void MergeTreeRangeReader::ReadResult::setFilter(const ColumnPtr & new_filter) -{ - if (!new_filter && filter) - throw Exception("Can't replace existing filter with empty.", ErrorCodes::LOGICAL_ERROR); - - if (filter) - { - size_t new_size = new_filter->size(); - - if (new_size != total_rows_per_granule) - throw Exception("Can't set filter because it's size is " + toString(new_size) + " but " - + toString(total_rows_per_granule) + " rows was read.", ErrorCodes::LOGICAL_ERROR); - } - - ConstantFilterDescription const_description(*new_filter); - if (const_description.always_true) - { - setFilterConstTrue(); - } - else if (const_description.always_false) - { - clear(); - } - else - { - FilterDescription filter_description(*new_filter); - filter_holder = filter_description.data_holder ? filter_description.data_holder : new_filter; - filter = typeid_cast(filter_holder.get()); - if (!filter) - throw Exception("setFilter function expected ColumnUInt8.", ErrorCodes::LOGICAL_ERROR); - } -} - - -size_t MergeTreeRangeReader::ReadResult::countBytesInResultFilter(const IColumn::Filter & filter_) -{ - auto it = filter_bytes_map.find(&filter_); - if (it == filter_bytes_map.end()) - { - auto bytes = countBytesInFilter(filter_); - filter_bytes_map[&filter_] = bytes; - return bytes; - } - else - return it->second; -} - MergeTreeRangeReader::MergeTreeRangeReader( IMergeTreeReader * merge_tree_reader_, MergeTreeRangeReader * prev_reader_, @@ -659,30 +809,37 @@ MergeTreeRangeReader::MergeTreeRangeReader( , is_initialized(true) { if (prev_reader) - sample_block = prev_reader->getSampleBlock(); + result_sample_block = prev_reader->getSampleBlock(); for (const auto & name_and_type : merge_tree_reader->getColumns()) - sample_block.insert({name_and_type.type->createColumn(), name_and_type.type, name_and_type.name}); + { + read_sample_block.insert({name_and_type.type->createColumn(), name_and_type.type, name_and_type.name}); + result_sample_block.insert({name_and_type.type->createColumn(), name_and_type.type, name_and_type.name}); + } for (const auto & column_name : non_const_virtual_column_names_) { - if (sample_block.has(column_name)) + if (result_sample_block.has(column_name)) continue; non_const_virtual_column_names.push_back(column_name); - if (column_name == "_part_offset") - sample_block.insert(ColumnWithTypeAndName(ColumnUInt64::create(), std::make_shared(), column_name)); + if (column_name == "_part_offset" && !prev_reader) + { + /// _part_offset column is filled by the first reader. + read_sample_block.insert(ColumnWithTypeAndName(ColumnUInt64::create(), std::make_shared(), column_name)); + result_sample_block.insert(ColumnWithTypeAndName(ColumnUInt64::create(), std::make_shared(), column_name)); + } } if (prewhere_info) { const auto & step = *prewhere_info; if (step.actions) - step.actions->execute(sample_block, true); + step.actions->execute(result_sample_block, true); if (step.remove_column) - sample_block.erase(step.column_name); + result_sample_block.erase(step.column_name); } } @@ -765,7 +922,12 @@ MergeTreeRangeReader::ReadResult MergeTreeRangeReader::read(size_t max_rows, Mar if (max_rows == 0) throw Exception("Expected at least 1 row to read, got 0.", ErrorCodes::LOGICAL_ERROR); - ReadResult read_result; + ReadResult read_result(log); + + SCOPE_EXIT({ + LOG_TEST(log, "read() returned {}, sample block {}", + read_result.dumpInfo(), this->result_sample_block.dumpNames()); + }); if (prev_reader) { @@ -778,69 +940,52 @@ MergeTreeRangeReader::ReadResult MergeTreeRangeReader::read(size_t max_rows, Mar if (read_result.num_rows == 0) return read_result; - bool has_columns = false; + /// Calculate and update read bytes size_t total_bytes = 0; for (auto & column : columns) { if (column) { total_bytes += column->byteSize(); - has_columns = true; } } - read_result.addNumBytesRead(total_bytes); - bool should_evaluate_missing_defaults = false; - - if (has_columns) - { - /// num_read_rows >= read_result.num_rows - /// We must filter block before adding columns to read_result.block - - /// Fill missing columns before filtering because some arrays from Nested may have empty data. - merge_tree_reader->fillMissingColumns(columns, should_evaluate_missing_defaults, num_read_rows); - - if (read_result.getFilter()) - filterColumns(columns, read_result.getFilter()->getData()); - } - else - { - size_t num_rows = read_result.num_rows; - - /// If block is empty, we still may need to add missing columns. - /// In that case use number of rows in result block and don't filter block. - if (num_rows) - merge_tree_reader->fillMissingColumns(columns, should_evaluate_missing_defaults, num_rows); - } - if (!columns.empty()) { + /// If all requested columns are absent in part num_read_rows will be 0. + /// In this case we need to use number of rows in the result to fill the default values and don't filter block. + if (num_read_rows == 0) + num_read_rows = read_result.num_rows; + + /// fillMissingColumns() must be called after reading but befoe any filterings because + /// some columns (e.g. arrays) might be only partially filled and thus not be valid and + /// fillMissingColumns() fixes this. + bool should_evaluate_missing_defaults; + merge_tree_reader->fillMissingColumns(columns, should_evaluate_missing_defaults, + num_read_rows); + + if (read_result.total_rows_per_granule == num_read_rows && read_result.num_rows != num_read_rows) + { + /// We have filter applied from the previous step + /// So we need to apply it to the newly read rows + if (!read_result.final_filter.present() || read_result.final_filter.countBytesInFilter() != read_result.num_rows) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Final filter is missing or has mistaching size, read_result: {}", + read_result.dumpInfo()); + + filterColumns(columns, read_result.final_filter); + } + /// If some columns absent in part, then evaluate default values if (should_evaluate_missing_defaults) { - auto block = prev_reader->sample_block.cloneWithColumns(read_result.columns); - auto block_before_prewhere = read_result.block_before_prewhere; - for (const auto & column : block) - { - if (block_before_prewhere.has(column.name)) - block_before_prewhere.erase(column.name); - } + Block additional_columns = prev_reader->getSampleBlock().cloneWithColumns(read_result.columns); + for (const auto & col : read_result.additional_columns) + additional_columns.insert(col); - if (block_before_prewhere) - { - if (read_result.need_filter) - { - auto old_columns = block_before_prewhere.getColumns(); - filterColumns(old_columns, read_result.getFilterOriginal()->getData()); - block_before_prewhere.setColumns(old_columns); - } - - for (auto & column : block_before_prewhere) - block.insert(std::move(column)); - } - merge_tree_reader->evaluateMissingDefaults(block, columns); + merge_tree_reader->evaluateMissingDefaults(additional_columns, columns); } + /// If columns not empty, then apply on-fly alter conversions if any required merge_tree_reader->performRequiredConversions(columns); } @@ -854,11 +999,15 @@ MergeTreeRangeReader::ReadResult MergeTreeRangeReader::read(size_t max_rows, Mar read_result = startReadingChain(max_rows, ranges); read_result.num_rows = read_result.numReadRows(); - if (read_result.num_rows) + LOG_TEST(log, "First reader returned: {}, requested columns: {}", + read_result.dumpInfo(), dumpNames(merge_tree_reader->getColumns())); + + if (read_result.num_rows == 0) + return read_result; + { /// Physical columns go first and then some virtual columns follow - /// TODO: is there a better way to account for virtual columns that were filled by previous readers? - size_t physical_columns_count = read_result.columns.size() - read_result.extra_columns_filled.size(); + size_t physical_columns_count = merge_tree_reader->getColumns().size(); Columns physical_columns(read_result.columns.begin(), read_result.columns.begin() + physical_columns_count); bool should_evaluate_missing_defaults; @@ -875,8 +1024,6 @@ MergeTreeRangeReader::ReadResult MergeTreeRangeReader::read(size_t max_rows, Mar for (size_t i = 0; i < physical_columns.size(); ++i) read_result.columns[i] = std::move(physical_columns[i]); } - else - read_result.columns.clear(); size_t total_bytes = 0; for (auto & column : read_result.columns) @@ -885,18 +1032,35 @@ MergeTreeRangeReader::ReadResult MergeTreeRangeReader::read(size_t max_rows, Mar read_result.addNumBytesRead(total_bytes); } - if (read_result.num_rows == 0) - return read_result; - executePrewhereActionsAndFilterColumns(read_result); + read_result.checkInternalConsistency(); + + if (!read_result.can_return_prewhere_column_without_filtering) + { + if (!read_result.filterWasApplied()) + { + /// TODO: another solution might be to set all 0s from final filter into the prewhere column and not filter all the columns here + /// but rely on filtering in WHERE. + read_result.applyFilter(read_result.final_filter); + read_result.checkInternalConsistency(); + } + + read_result.can_return_prewhere_column_without_filtering = true; + } + + if (read_result.num_rows != 0 && read_result.columns.size() != getSampleBlock().columns()) + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "Number of columns in result doesn't match number of columns in sample block, read_result: {}, sample block: {}", + read_result.dumpInfo(), getSampleBlock().dumpStructure()); + return read_result; } - MergeTreeRangeReader::ReadResult MergeTreeRangeReader::startReadingChain(size_t max_rows, MarkRanges & ranges) { - ReadResult result; + ReadResult result(log); result.columns.resize(merge_tree_reader->getColumns().size()); size_t current_task_last_mark = getLastMark(ranges); @@ -946,14 +1110,11 @@ MergeTreeRangeReader::ReadResult MergeTreeRangeReader::startReadingChain(size_t result.addRows(stream.finalize(result.columns)); /// Last granule may be incomplete. - if (!result.rowsPerGranule().empty()) + if (!result.rows_per_granule.empty()) result.adjustLastGranule(); - for (const auto & column_name : non_const_virtual_column_names) - { - if (column_name == "_part_offset") - fillPartOffsetColumn(result, leading_begin_part_offset, leading_end_part_offset); - } + if (read_sample_block.has("_part_offset")) + fillPartOffsetColumn(result, leading_begin_part_offset, leading_end_part_offset); return result; } @@ -968,11 +1129,13 @@ void MergeTreeRangeReader::fillPartOffsetColumn(ReadResult & result, UInt64 lead UInt64 * pos = vec.data(); UInt64 * end = &vec[num_rows]; + /// Fill the reamining part of the previous range (it was started in the previous read request). while (pos < end && leading_begin_part_offset < leading_end_part_offset) *pos++ = leading_begin_part_offset++; - const auto start_ranges = result.startedRanges(); + const auto & start_ranges = result.started_ranges; + /// Fill the ranges which were started in the current read request. for (const auto & start_range : start_ranges) { UInt64 start_part_offset = index_granularity->getMarkStartingRow(start_range.range.begin); @@ -983,7 +1146,6 @@ void MergeTreeRangeReader::fillPartOffsetColumn(ReadResult & result, UInt64 lead } result.columns.emplace_back(std::move(column)); - result.extra_columns_filled.push_back("_part_offset"); } Columns MergeTreeRangeReader::continueReadingChain(const ReadResult & result, size_t & num_rows) @@ -995,7 +1157,7 @@ Columns MergeTreeRangeReader::continueReadingChain(const ReadResult & result, si if (merge_tree_reader->getColumns().empty()) return columns; - if (result.rowsPerGranule().empty()) + if (result.rows_per_granule.empty()) { /// If zero rows were read on prev step, than there is no more rows to read. /// Last granule may have less rows than index_granularity, so finish reading manually. @@ -1005,8 +1167,8 @@ Columns MergeTreeRangeReader::continueReadingChain(const ReadResult & result, si columns.resize(merge_tree_reader->numColumnsInResult()); - const auto & rows_per_granule = result.rowsPerGranule(); - const auto & started_ranges = result.startedRanges(); + const auto & rows_per_granule = result.rows_per_granule; + const auto & started_ranges = result.started_ranges; size_t current_task_last_mark = ReadResult::getLastMark(started_ranges); size_t next_range_to_start = 0; @@ -1027,13 +1189,13 @@ Columns MergeTreeRangeReader::continueReadingChain(const ReadResult & result, si num_rows += stream.read(columns, rows_per_granule[i], !last); } - stream.skip(result.numRowsToSkipInLastGranule()); + stream.skip(result.num_rows_to_skip_in_last_granule); num_rows += stream.finalize(columns); /// added_rows may be zero if all columns were read in prewhere and it's ok. - if (num_rows && num_rows != result.totalRowsPerGranule()) + if (num_rows && num_rows != result.total_rows_per_granule) throw Exception("RangeReader read " + toString(num_rows) + " rows, but " - + toString(result.totalRowsPerGranule()) + " expected.", ErrorCodes::LOGICAL_ERROR); + + toString(result.total_rows_per_granule) + " expected.", ErrorCodes::LOGICAL_ERROR); return columns; } @@ -1047,7 +1209,7 @@ static void checkCombinedFiltersSize(size_t bytes_in_first_filter, size_t second } /// Second filter size must be equal to number of 1s in the first filter. -/// The result size is equal to first filter size. +/// The result has size equal to first filter size and contains 1s only where both filters contain 1s. static ColumnPtr combineFilters(ColumnPtr first, ColumnPtr second) { ConstantFilterDescription first_const_descr(*first); @@ -1100,23 +1262,22 @@ static ColumnPtr combineFilters(ColumnPtr first, ColumnPtr second) return mut_first; } -void MergeTreeRangeReader::executePrewhereActionsAndFilterColumns(ReadResult & result) +void MergeTreeRangeReader::executePrewhereActionsAndFilterColumns(ReadResult & result) const { + result.checkInternalConsistency(); + if (!prewhere_info) return; - const auto & header = merge_tree_reader->getColumns(); - size_t num_columns = header.size(); + const auto & header = read_sample_block; + size_t num_columns = header.columns(); /// Check that we have columns from previous steps and newly read required columns - if (result.columns.size() < num_columns + result.extra_columns_filled.size()) + if (result.columns.size() < num_columns) throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid number of columns passed to MergeTreeRangeReader. Expected {}, got {}", num_columns, result.columns.size()); - /// This filter has the size of total_rows_per granule. It is applied after reading contiguous chunks from - /// the start of each granule. - ColumnPtr combined_filter; /// Filter computed at the current step. Its size is equal to num_rows which is <= total_rows_per_granule ColumnPtr current_step_filter; size_t prewhere_column_pos; @@ -1138,35 +1299,28 @@ void MergeTreeRangeReader::executePrewhereActionsAndFilterColumns(ReadResult & r for (auto name_and_type = header.begin(); name_and_type != header.end() && pos < result.columns.size(); ++pos, ++name_and_type) block.insert({result.columns[pos], name_and_type->type, name_and_type->name}); - for (const auto & column_name : non_const_virtual_column_names) { - if (block.has(column_name)) - continue; + /// Columns might be projected out. We need to store them here so that default columns can be evaluated later. + Block additional_columns = block; - if (column_name == "_part_offset") - { - if (pos >= result.columns.size()) - throw Exception(ErrorCodes::LOGICAL_ERROR, - "Invalid number of columns passed to MergeTreeRangeReader. Expected {}, got {}", - num_columns, result.columns.size()); + if (prewhere_info->actions) + prewhere_info->actions->execute(block); - block.insert({result.columns[pos], std::make_shared(), column_name}); - } - else if (column_name == LightweightDeleteDescription::FILTER_COLUMN.name) + result.additional_columns.clear(); + /// Additional columns might only be needed if there are more steps in the chain. + if (!last_reader_in_chain) { - /// Do nothing, it will be added later + for (auto & col : additional_columns) + { + /// Exclude columns that are present in the result block to avoid storing them and filtering twice. + /// TODO: also need to exclude the columns that are not needed for the next steps. + if (block.has(col.name)) + continue; + result.additional_columns.insert(col); + } } - else - throw Exception("Unexpected non-const virtual column: " + column_name, ErrorCodes::LOGICAL_ERROR); - ++pos; } - /// Columns might be projected out. We need to store them here so that default columns can be evaluated later. - result.block_before_prewhere = block; - - if (prewhere_info->actions) - prewhere_info->actions->execute(block); - prewhere_column_pos = block.getPositionByName(prewhere_info->column_name); result.columns.clear(); @@ -1174,90 +1328,38 @@ void MergeTreeRangeReader::executePrewhereActionsAndFilterColumns(ReadResult & r for (auto & col : block) result.columns.emplace_back(std::move(col.column)); - current_step_filter.swap(result.columns[prewhere_column_pos]); - combined_filter = current_step_filter; + current_step_filter = result.columns[prewhere_column_pos]; } - if (result.getFilter()) - { - ColumnPtr prev_filter = result.getFilterHolder(); - combined_filter = combineFilters(prev_filter, std::move(combined_filter)); - } - - result.setFilter(combined_filter); - - /// If there is a WHERE, we filter in there, and only optimize IO and shrink columns here - if (!last_reader_in_chain) - result.optimize(merge_tree_reader->canReadIncompleteGranules(), true); - - /// If we read nothing or filter gets optimized to nothing - if (result.totalRowsPerGranule() == 0) - result.setFilterConstFalse(); - /// If we need to filter in PREWHERE - else if (prewhere_info->need_filter || result.need_filter) - { - /// If there is a filter and without optimized - if (result.getFilter() && last_reader_in_chain) - { - const auto * result_filter = result.getFilter(); - /// optimize is not called, need to check const 1 and const 0 - size_t bytes_in_filter = result.countBytesInResultFilter(result_filter->getData()); - if (bytes_in_filter == 0) - result.setFilterConstFalse(); - else if (bytes_in_filter == result.num_rows) - result.setFilterConstTrue(); - } - - /// If there is still a filter, do the filtering now - if (result.getFilter()) - { - /// filter might be shrunk while columns not - const auto * result_filter = result.getFilterOriginal(); - - filterColumns(result.columns, current_step_filter); - - result.need_filter = true; - - bool has_column = false; - for (auto & column : result.columns) - { - if (column) - { - has_column = true; - result.num_rows = column->size(); - break; - } - } - - /// There is only one filter column. Record the actual number - if (!has_column) - result.num_rows = result.countBytesInResultFilter(result_filter->getData()); - } - - /// Check if the PREWHERE column is needed - if (!result.columns.empty()) - { - if (prewhere_info->remove_column) - result.columns.erase(result.columns.begin() + prewhere_column_pos); - else - result.columns[prewhere_column_pos] = - getSampleBlock().getByName(prewhere_info->column_name).type-> - createColumnConst(result.num_rows, 1u)->convertToFullColumnIfConst(); - } - } - /// Filter in WHERE instead + if (prewhere_info->remove_column) + result.columns.erase(result.columns.begin() + prewhere_column_pos); else { - if (prewhere_info->remove_column) - result.columns.erase(result.columns.begin() + prewhere_column_pos); - else - { - auto type = getSampleBlock().getByName(prewhere_info->column_name).type; - ColumnWithTypeAndName col(result.getFilterHolder()->convertToFullColumnIfConst(), std::make_shared(), ""); - result.columns[prewhere_column_pos] = castColumn(col, type); - result.clearFilter(); // Acting as a flag to not filter in PREWHERE - } + /// In case when we are not removing prewhere column the caller expects it to serve as a final filter: + /// it must contain 0s not only from the current step but also from all the previous steps. + /// One way to achieve this is to apply the final_filter if we know that the final _filter was not applied at + /// several previous steps but was accumulated instead. + result.can_return_prewhere_column_without_filtering = + (!result.final_filter.present() || result.final_filter.countBytesInFilter() == result.num_rows); } + + FilterWithCachedCount current_filter(current_step_filter); + + result.optimize(current_filter, merge_tree_reader->canReadIncompleteGranules()); + + if (prewhere_info->need_filter && !result.filterWasApplied()) + { + /// Depending on whether the final filter was applied at the previous step or not we need to apply either + /// just the current step filter or the accumulated filter. + FilterWithCachedCount filter_to_apply = + current_filter.size() == result.total_rows_per_granule ? + result.final_filter : + current_filter; + + result.applyFilter(filter_to_apply); + } + + LOG_TEST(log, "After execute prewhere {}", result.dumpInfo()); } std::string PrewhereExprInfo::dump() const diff --git a/src/Storages/MergeTree/MergeTreeRangeReader.h b/src/Storages/MergeTree/MergeTreeRangeReader.h index 06f3f5760fb..039a499e9c1 100644 --- a/src/Storages/MergeTree/MergeTreeRangeReader.h +++ b/src/Storages/MergeTree/MergeTreeRangeReader.h @@ -1,6 +1,9 @@ #pragma once #include #include +#include +#include +#include #include namespace DB @@ -34,6 +37,45 @@ struct PrewhereExprInfo std::string dump() const; }; +class FilterWithCachedCount +{ + ConstantFilterDescription const_description; /// TODO: ConstantFilterDescription only checks always true/false for const columns + /// think how to handle when the column in not const but has all 0s or all 1s + ColumnPtr column = nullptr; + const IColumn::Filter * data = nullptr; + mutable size_t cached_count_bytes = -1; + +public: + explicit FilterWithCachedCount() = default; + + explicit FilterWithCachedCount(const ColumnPtr & column_) + : const_description(*column_) + { + ColumnPtr col = column_->convertToFullIfNeeded(); + FilterDescription desc(*col); + column = desc.data_holder ? desc.data_holder : col; + data = desc.data; + } + + bool present() const { return !!column; } + + bool alwaysTrue() const { return const_description.always_true; } + bool alwaysFalse() const { return const_description.always_false; } + + ColumnPtr getColumn() const { return column; } + + const IColumn::Filter & getData() const { return *data; } + + size_t size() const { return column->size(); } + + size_t countBytesInFilter() const + { + if (cached_count_bytes == size_t(-1)) + cached_count_bytes = DB::countBytesInFilter(*data); + return cached_count_bytes; + } +}; + /// MergeTreeReader iterator which allows sequential reading for arbitrary number of rows between pairs of marks in the same part. /// Stores reading state, which can be inside granule. Can skip rows in current granule and start reading from next mark. /// Used generally for reading number of rows less than index granularity to decrease cache misses for fat blocks. @@ -174,53 +216,46 @@ public: using RangesInfo = std::vector; - const RangesInfo & startedRanges() const { return started_ranges; } - const NumRows & rowsPerGranule() const { return rows_per_granule; } + explicit ReadResult(Poco::Logger * log_) : log(log_) {} static size_t getLastMark(const MergeTreeRangeReader::ReadResult::RangesInfo & ranges); - /// The number of rows were read at LAST iteration in chain. <= num_added_rows + num_filtered_rows. - size_t totalRowsPerGranule() const { return total_rows_per_granule; } - size_t numRowsToSkipInLastGranule() const { return num_rows_to_skip_in_last_granule; } - /// Filter you need to apply to newly-read columns in order to add them to block. - const ColumnUInt8 * getFilterOriginal() const { return filter_original ? filter_original : filter; } - const ColumnUInt8 * getFilter() const { return filter; } - ColumnPtr & getFilterHolder() { return filter_holder; } - void addGranule(size_t num_rows_); void adjustLastGranule(); void addRows(size_t rows) { num_read_rows += rows; } void addRange(const MarkRange & range) { started_ranges.push_back({rows_per_granule.size(), range}); } - /// Set filter or replace old one. Filter must have more zeroes than previous. - void setFilter(const ColumnPtr & new_filter); - /// For each granule calculate the number of filtered rows at the end. Remove them and update filter. - void optimize(bool can_read_incomplete_granules, bool allow_filter_columns); + /// Add current step filter to the result and then for each granule calculate the number of filtered rows at the end. + /// Remove them and update filter. + /// Apply the filter to the columns and update num_rows if required + void optimize(const FilterWithCachedCount & current_filter, bool can_read_incomplete_granules); /// Remove all rows from granules. void clear(); - void clearFilter() { filter = nullptr; } void setFilterConstTrue(); - void setFilterConstFalse(); void addNumBytesRead(size_t count) { num_bytes_read += count; } - void shrink(Columns & old_columns); + /// Shrinks columns according to the diff between current and previous rows_per_granule. + void shrink(Columns & old_columns, const NumRows & rows_per_granule_previous) const; - size_t countBytesInResultFilter(const IColumn::Filter & filter); + /// Applies the filter to the columns and updates num_rows. + void applyFilter(const FilterWithCachedCount & filter); - /// If this flag is false than filtering form PREWHERE can be delayed and done in WHERE - /// to reduce memory copies and applying heavy filters multiple times - bool need_filter = false; + /// Verifies that columns and filter sizes match. + /// The checks might be non-trivial so it make sense to have the only in debug builds. + void checkInternalConsistency() const; - Block block_before_prewhere; + std::string dumpInfo() const; + + /// Contains columns that are not included into result but might be needed for default values calculation. + Block additional_columns; RangesInfo started_ranges; /// The number of rows read from each granule. /// Granule here is not number of rows between two marks /// It's amount of rows per single reading act NumRows rows_per_granule; - NumRows rows_per_granule_original; /// Sum(rows_per_granule) size_t total_rows_per_granule = 0; /// The number of rows was read at first step. May be zero if no read columns present in part. @@ -229,29 +264,36 @@ public: size_t num_rows_to_skip_in_last_granule = 0; /// Without any filtration. size_t num_bytes_read = 0; - /// nullptr if prev reader hasn't prewhere_actions. Otherwise filter.size() >= total_rows_per_granule. - ColumnPtr filter_holder; - ColumnPtr filter_holder_original; - const ColumnUInt8 * filter = nullptr; - const ColumnUInt8 * filter_original = nullptr; - void collapseZeroTails(const IColumn::Filter & filter, IColumn::Filter & new_filter); + /// This filter has the size of total_rows_per_granule. This means that it can be applied to newly read columns. + /// The result of applying this filter is that only rows that pass all previous filtering steps will remain. + FilterWithCachedCount final_filter; + + /// This flag is true when prewhere column can be returned without filtering. + /// It's true when it contains 0s from all filtering steps (not just the step when it was calculated). + /// NOTE: If we accumulated the final_filter for several steps without applying it then prewhere column calculated at the last step + /// will not contain 0s from all previous steps. + bool can_return_prewhere_column_without_filtering = true; + + /// Checks if result columns have current final_filter applied. + bool filterWasApplied() const { return !final_filter.present() || final_filter.countBytesInFilter() == num_rows; } + + /// Builds updated filter by cutting zeros in granules tails + void collapseZeroTails(const IColumn::Filter & filter, const NumRows & rows_per_granule_previous, IColumn::Filter & new_filter) const; size_t countZeroTails(const IColumn::Filter & filter, NumRows & zero_tails, bool can_read_incomplete_granules) const; static size_t numZerosInTail(const UInt8 * begin, const UInt8 * end); - std::map filter_bytes_map; - - Names extra_columns_filled; + Poco::Logger * log; }; ReadResult read(size_t max_rows, MarkRanges & ranges); - const Block & getSampleBlock() const { return sample_block; } + const Block & getSampleBlock() const { return result_sample_block; } private: ReadResult startReadingChain(size_t max_rows, MarkRanges & ranges); Columns continueReadingChain(const ReadResult & result, size_t & num_rows); - void executePrewhereActionsAndFilterColumns(ReadResult & result); + void executePrewhereActionsAndFilterColumns(ReadResult & result) const; void fillPartOffsetColumn(ReadResult & result, UInt64 leading_begin_part_offset, UInt64 leading_end_part_offset); IMergeTreeReader * merge_tree_reader = nullptr; @@ -261,11 +303,14 @@ private: Stream stream; - Block sample_block; + Block read_sample_block; /// Block with columns that are actually read from disk + non-const virtual columns that are filled at this step. + Block result_sample_block; /// Block with columns that are returned by this step. bool last_reader_in_chain = false; bool is_initialized = false; Names non_const_virtual_column_names; + + Poco::Logger * log = &Poco::Logger::get("MergeTreeRangeReader"); }; } diff --git a/src/Storages/MergeTree/MergeTreeSettings.cpp b/src/Storages/MergeTree/MergeTreeSettings.cpp index 5b78a59687b..2bf717c883a 100644 --- a/src/Storages/MergeTree/MergeTreeSettings.cpp +++ b/src/Storages/MergeTree/MergeTreeSettings.cpp @@ -99,6 +99,15 @@ void MergeTreeSettings::sanityCheck(size_t background_pool_tasks) const background_pool_tasks); } + // Zero index_granularity is nonsensical. + if (index_granularity < 1) + { + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "index_granularity: value {} makes no sense", + index_granularity); + } + // The min_index_granularity_bytes value is 1024 b and index_granularity_bytes is 10 mb by default. // If index_granularity_bytes is not disabled i.e > 0 b, then always ensure that it's greater than // min_index_granularity_bytes. This is mainly a safeguard against accidents whereby a really low diff --git a/src/Storages/MergeTree/MergeTreeSettings.h b/src/Storages/MergeTree/MergeTreeSettings.h index 82a0a04257b..37e9bf5779c 100644 --- a/src/Storages/MergeTree/MergeTreeSettings.h +++ b/src/Storages/MergeTree/MergeTreeSettings.h @@ -76,6 +76,9 @@ struct Settings; M(UInt64, max_delay_to_insert, 1, "Max delay of inserting data into MergeTree table in seconds, if there are a lot of unmerged parts in single partition.", 0) \ M(UInt64, max_parts_in_total, 100000, "If more than this number active parts in all partitions in total, throw 'Too many parts ...' exception.", 0) \ \ + /* Part removal settings. */ \ + M(UInt64, simultaneous_parts_removal_limit, 0, "Maximum number of parts to remove during one CleanupThread iteration (0 means unlimited).", 0) \ + \ /** Replication settings. */ \ M(UInt64, replicated_deduplication_window, 100, "How many last blocks of hashes should be kept in ZooKeeper (old blocks will be deleted).", 0) \ M(UInt64, replicated_deduplication_window_seconds, 7 * 24 * 60 * 60 /* one week */, "Similar to \"replicated_deduplication_window\", but determines old blocks by their lifetime. Hash of an inserted block will be deleted (and the block will not be deduplicated after) if it outside of one \"window\". You can set very big replicated_deduplication_window to avoid duplicating INSERTs during that period of time.", 0) \ diff --git a/src/Storages/MergeTree/MergedBlockOutputStream.cpp b/src/Storages/MergeTree/MergedBlockOutputStream.cpp index 991a8d359a8..3a7484a4141 100644 --- a/src/Storages/MergeTree/MergedBlockOutputStream.cpp +++ b/src/Storages/MergeTree/MergedBlockOutputStream.cpp @@ -93,15 +93,29 @@ void MergedBlockOutputStream::Finalizer::Impl::finish() { writer.finish(sync); - for (const auto & file_name : files_to_remove_after_finish) - part->getDataPartStorage().removeFile(file_name); - for (auto & file : written_files) { file->finalize(); if (sync) file->sync(); } + + /// TODO: this code looks really stupid. It's because DiskTransaction is + /// unable to see own write operations. When we merge part with column TTL + /// and column completely outdated we first write empty column and after + /// remove it. In case of single DiskTransaction it's impossible because + /// remove operation will not see just written files. That is why we finish + /// one transaction and start new... + /// + /// FIXME: DiskTransaction should see own writes. Column TTL implementation shouldn't be so stupid... + if (!files_to_remove_after_finish.empty()) + { + part->getDataPartStorage().commitTransaction(); + part->getDataPartStorage().beginTransaction(); + } + + for (const auto & file_name : files_to_remove_after_finish) + part->getDataPartStorage().removeFile(file_name); } MergedBlockOutputStream::Finalizer::~Finalizer() @@ -186,7 +200,9 @@ MergedBlockOutputStream::WrittenFiles MergedBlockOutputStream::finalizePartOnDis const MergeTreeMutableDataPartPtr & new_part, MergeTreeData::DataPart::Checksums & checksums) { + /// NOTE: You do not need to call fsync here, since it will be called later for the all written_files. WrittenFiles written_files; + if (new_part->isProjectionPart()) { if (storage.format_version >= MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING || isCompactPart(new_part)) diff --git a/src/Storages/MergeTree/MutateFromLogEntryTask.cpp b/src/Storages/MergeTree/MutateFromLogEntryTask.cpp index 9e3cbb0640b..b432841d5b0 100644 --- a/src/Storages/MergeTree/MutateFromLogEntryTask.cpp +++ b/src/Storages/MergeTree/MutateFromLogEntryTask.cpp @@ -160,7 +160,6 @@ ReplicatedMergeMutateTaskBase::PrepareResult MutateFromLogEntryTask::prepare() } } - const Settings & settings = storage.getContext()->getSettingsRef(); merge_mutate_entry = storage.getContext()->getMergeList().insert( storage.getStorageID(), diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp index 0c1cc6e4b84..de68cb6f0ba 100644 --- a/src/Storages/MergeTree/MutateTask.cpp +++ b/src/Storages/MergeTree/MutateTask.cpp @@ -625,7 +625,8 @@ void finalizeMutatedPart( MergeTreeData::MutableDataPartPtr new_data_part, ExecuteTTLType execute_ttl_type, const CompressionCodecPtr & codec, - ContextPtr context) + ContextPtr context, + bool sync) { if (new_data_part->uuid != UUIDHelpers::Nil) { @@ -634,6 +635,8 @@ void finalizeMutatedPart( writeUUIDText(new_data_part->uuid, out_hashing); new_data_part->checksums.files[IMergeTreeDataPart::UUID_FILE_NAME].file_size = out_hashing.count(); new_data_part->checksums.files[IMergeTreeDataPart::UUID_FILE_NAME].file_hash = out_hashing.getHash(); + if (sync) + out_hashing.sync(); } if (execute_ttl_type != ExecuteTTLType::NONE) @@ -644,6 +647,8 @@ void finalizeMutatedPart( new_data_part->ttl_infos.write(out_hashing); new_data_part->checksums.files["ttl.txt"].file_size = out_hashing.count(); new_data_part->checksums.files["ttl.txt"].file_hash = out_hashing.getHash(); + if (sync) + out_hashing.sync(); } if (!new_data_part->getSerializationInfos().empty()) @@ -653,23 +658,31 @@ void finalizeMutatedPart( new_data_part->getSerializationInfos().writeJSON(out_hashing); new_data_part->checksums.files[IMergeTreeDataPart::SERIALIZATION_FILE_NAME].file_size = out_hashing.count(); new_data_part->checksums.files[IMergeTreeDataPart::SERIALIZATION_FILE_NAME].file_hash = out_hashing.getHash(); + if (sync) + out_hashing.sync(); } { /// Write file with checksums. auto out_checksums = new_data_part->getDataPartStorage().writeFile("checksums.txt", 4096, context->getWriteSettings()); new_data_part->checksums.write(*out_checksums); + if (sync) + out_checksums->sync(); } /// close fd { auto out = new_data_part->getDataPartStorage().writeFile(IMergeTreeDataPart::DEFAULT_COMPRESSION_CODEC_FILE_NAME, 4096, context->getWriteSettings()); DB::writeText(queryToString(codec->getFullCodecDesc()), *out); + if (sync) + out->sync(); } /// close fd { /// Write a file with a description of columns. auto out_columns = new_data_part->getDataPartStorage().writeFile("columns.txt", 4096, context->getWriteSettings()); new_data_part->getColumns().writeText(*out_columns); + if (sync) + out_columns->sync(); } /// close fd new_data_part->rows_count = source_part->rows_count; @@ -678,7 +691,6 @@ void finalizeMutatedPart( new_data_part->minmax_idx = source_part->minmax_idx; new_data_part->modification_time = time(nullptr); new_data_part->loadProjections(false, false); - /// All information about sizes is stored in checksums. /// It doesn't make sense to touch filesystem for sizes. new_data_part->setBytesOnDisk(new_data_part->checksums.getTotalSizeOnDisk()); @@ -756,6 +768,8 @@ struct MutationContext MergeTreeData::HardlinkedFiles hardlinked_files; + bool need_prefix = true; + scope_guard temporary_directory_lock; }; @@ -862,6 +876,7 @@ public: {}, projection_merging_params, NO_TRANSACTION_PTR, + /* need_prefix */ true, ctx->new_data_part.get(), ".tmp_proj"); @@ -1024,6 +1039,7 @@ bool PartMergerWriter::mutateOriginalPartAndPrepareProjections() auto tmp_part = MergeTreeDataWriter::writeTempProjectionPart( *ctx->data, ctx->log, projection_block, projection, ctx->new_data_part.get(), ++block_num); tmp_part.finalize(); + tmp_part.part->getDataPartStorage().commitTransaction(); projection_parts[projection.name].emplace_back(std::move(tmp_part.part)); } } @@ -1046,6 +1062,7 @@ bool PartMergerWriter::mutateOriginalPartAndPrepareProjections() auto temp_part = MergeTreeDataWriter::writeTempProjectionPart( *ctx->data, ctx->log, projection_block, projection, ctx->new_data_part.get(), ++block_num); temp_part.finalize(); + temp_part.part->getDataPartStorage().commitTransaction(); projection_parts[projection.name].emplace_back(std::move(temp_part.part)); } } @@ -1144,7 +1161,8 @@ private: void prepare() { - ctx->new_data_part->getDataPartStorage().createDirectories(); + if (ctx->new_data_part->isStoredOnDisk()) + ctx->new_data_part->getDataPartStorage().createDirectories(); /// Note: this is done before creating input streams, because otherwise data.data_parts_mutex /// (which is locked in data.getTotalActiveSizeInBytes()) @@ -1409,7 +1427,7 @@ private: } } - MutationHelpers::finalizeMutatedPart(ctx->source_part, ctx->new_data_part, ctx->execute_ttl_type, ctx->compression_codec, ctx->context); + MutationHelpers::finalizeMutatedPart(ctx->source_part, ctx->new_data_part, ctx->execute_ttl_type, ctx->compression_codec, ctx->context, ctx->need_sync); } @@ -1442,7 +1460,8 @@ MutateTask::MutateTask( const MergeTreeTransactionPtr & txn, MergeTreeData & data_, MergeTreeDataMergerMutator & mutator_, - ActionBlocker & merges_blocker_) + ActionBlocker & merges_blocker_, + bool need_prefix_) : ctx(std::make_shared()) { ctx->data = &data_; @@ -1460,6 +1479,7 @@ MutateTask::MutateTask( ctx->txn = txn; ctx->source_part = ctx->future_part->parts[0]; ctx->storage_from_source_part = std::make_shared(ctx->source_part); + ctx->need_prefix = need_prefix_; auto storage_snapshot = ctx->storage_from_source_part->getStorageSnapshot(ctx->metadata_snapshot, context_); extendObjectColumns(ctx->storage_columns, storage_snapshot->object_columns, /*with_subcolumns=*/ false); @@ -1553,7 +1573,14 @@ bool MutateTask::prepare() files_to_copy_instead_of_hardlinks.insert(IMergeTreeDataPart::FILE_FOR_REFERENCES_CHECK); LOG_TRACE(ctx->log, "Part {} doesn't change up to mutation version {}", ctx->source_part->name, ctx->future_part->part_info.mutation); - auto [part, lock] = ctx->data->cloneAndLoadDataPartOnSameDisk(ctx->source_part, "tmp_clone_", ctx->future_part->part_info, ctx->metadata_snapshot, ctx->txn, &ctx->hardlinked_files, false, files_to_copy_instead_of_hardlinks); + std::string prefix; + if (ctx->need_prefix) + prefix = "tmp_clone_"; + + auto [part, lock] = ctx->data->cloneAndLoadDataPartOnSameDisk(ctx->source_part, prefix, ctx->future_part->part_info, ctx->metadata_snapshot, ctx->txn, &ctx->hardlinked_files, false, files_to_copy_instead_of_hardlinks); + + part->getDataPartStorage().beginTransaction(); + ctx->temporary_directory_lock = std::move(lock); promise.set_value(std::move(part)); return false; @@ -1586,7 +1613,10 @@ bool MutateTask::prepare() /// FIXME new_data_part is not used in the case when we clone part with cloneAndLoadDataPartOnSameDisk and return false /// Is it possible to handle this case earlier? - String tmp_part_dir_name = "tmp_mut_" + ctx->future_part->name; + std::string prefix; + if (ctx->need_prefix) + prefix = "tmp_mut_"; + String tmp_part_dir_name = prefix + ctx->future_part->name; ctx->temporary_directory_lock = ctx->data->getTemporaryPartDirectoryHolder(tmp_part_dir_name); auto data_part_storage = std::make_shared( @@ -1680,7 +1710,9 @@ bool MutateTask::prepare() if (copy_checksumns) files_to_copy_instead_of_hardlinks.insert(IMergeTreeDataPart::FILE_FOR_REFERENCES_CHECK); - auto [part, lock] = ctx->data->cloneAndLoadDataPartOnSameDisk(ctx->source_part, "tmp_mut_", ctx->future_part->part_info, ctx->metadata_snapshot, ctx->txn, &ctx->hardlinked_files, false, files_to_copy_instead_of_hardlinks); + auto [part, lock] = ctx->data->cloneAndLoadDataPartOnSameDisk(ctx->source_part, prefix, ctx->future_part->part_info, ctx->metadata_snapshot, ctx->txn, &ctx->hardlinked_files, false, files_to_copy_instead_of_hardlinks); + + part->getDataPartStorage().beginTransaction(); ctx->temporary_directory_lock = std::move(lock); promise.set_value(std::move(part)); return false; diff --git a/src/Storages/MergeTree/MutateTask.h b/src/Storages/MergeTree/MutateTask.h index 3df30670d7f..54ad996ad4c 100644 --- a/src/Storages/MergeTree/MutateTask.h +++ b/src/Storages/MergeTree/MutateTask.h @@ -35,7 +35,8 @@ public: const MergeTreeTransactionPtr & txn, MergeTreeData & data_, MergeTreeDataMergerMutator & mutator_, - ActionBlocker & merges_blocker_); + ActionBlocker & merges_blocker_, + bool need_prefix_); bool execute(); @@ -46,8 +47,6 @@ public: const MergeTreeData::HardlinkedFiles & getHardlinkedFiles() const; - MutableDataPartStoragePtr getBuilder() const; - private: bool prepare(); diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeAttachThread.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeAttachThread.cpp index 049d2c2adf5..557123ddae2 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeAttachThread.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeAttachThread.cpp @@ -191,7 +191,7 @@ void ReplicatedMergeTreeAttachThread::runImpl() void ReplicatedMergeTreeAttachThread::finalizeInitialization() TSA_NO_THREAD_SAFETY_ANALYSIS { - storage.startupImpl(); + storage.startupImpl(/* from_attach_thread */ true); storage.initialization_done = true; LOG_INFO(log, "Table is initialized"); } diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp index 315f471fd5c..1c667b1c867 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp @@ -75,8 +75,8 @@ void ReplicatedMergeTreeCleanupThread::iterate() { clearOldLogs(); auto storage_settings = storage.getSettings(); - clearOldBlocks("blocks", storage_settings->replicated_deduplication_window_seconds, storage_settings->replicated_deduplication_window); - clearOldBlocks("async_blocks", storage_settings->replicated_deduplication_window_seconds_for_async_inserts, storage_settings->replicated_deduplication_window_for_async_inserts); + clearOldBlocks("blocks", storage_settings->replicated_deduplication_window_seconds, storage_settings->replicated_deduplication_window, cached_block_stats_for_sync_inserts); + clearOldBlocks("async_blocks", storage_settings->replicated_deduplication_window_seconds_for_async_inserts, storage_settings->replicated_deduplication_window_for_async_inserts, cached_block_stats_for_async_inserts); clearOldMutations(); storage.clearEmptyParts(); } @@ -323,12 +323,12 @@ struct ReplicatedMergeTreeCleanupThread::NodeWithStat } }; -void ReplicatedMergeTreeCleanupThread::clearOldBlocks(const String & blocks_dir_name, UInt64 window_seconds, UInt64 window_size) +void ReplicatedMergeTreeCleanupThread::clearOldBlocks(const String & blocks_dir_name, UInt64 window_seconds, UInt64 window_size, NodeCTimeAndVersionCache & cached_block_stats) { auto zookeeper = storage.getZooKeeper(); std::vector timed_blocks; - getBlocksSortedByTime(*zookeeper, timed_blocks); + getBlocksSortedByTime(blocks_dir_name, *zookeeper, timed_blocks, cached_block_stats); if (timed_blocks.empty()) return; @@ -391,14 +391,14 @@ void ReplicatedMergeTreeCleanupThread::clearOldBlocks(const String & blocks_dir_ } -void ReplicatedMergeTreeCleanupThread::getBlocksSortedByTime(zkutil::ZooKeeper & zookeeper, std::vector & timed_blocks) +void ReplicatedMergeTreeCleanupThread::getBlocksSortedByTime(const String & blocks_dir_name, zkutil::ZooKeeper & zookeeper, std::vector & timed_blocks, NodeCTimeAndVersionCache & cached_block_stats) { timed_blocks.clear(); Strings blocks; Coordination::Stat stat; - if (Coordination::Error::ZOK != zookeeper.tryGetChildren(storage.zookeeper_path + "/blocks", blocks, &stat)) - throw Exception(storage.zookeeper_path + "/blocks doesn't exist", ErrorCodes::NOT_FOUND_NODE); + if (Coordination::Error::ZOK != zookeeper.tryGetChildren(storage.zookeeper_path + "/" + blocks_dir_name, blocks, &stat)) + throw Exception(ErrorCodes::NOT_FOUND_NODE, "{}/{} doesn't exist", storage.zookeeper_path, blocks_dir_name); /// Seems like this code is obsolete, because we delete blocks from cache /// when they are deleted from zookeeper. But we don't know about all (maybe future) places in code @@ -417,7 +417,7 @@ void ReplicatedMergeTreeCleanupThread::getBlocksSortedByTime(zkutil::ZooKeeper & auto not_cached_blocks = stat.numChildren - cached_block_stats.size(); if (not_cached_blocks) { - LOG_TRACE(log, "Checking {} blocks ({} are not cached){}", stat.numChildren, not_cached_blocks, " to clear old ones from ZooKeeper."); + LOG_TRACE(log, "Checking {} {} ({} are not cached){}, path is {}", stat.numChildren, blocks_dir_name, not_cached_blocks, " to clear old ones from ZooKeeper.", storage.zookeeper_path + "/" + blocks_dir_name); } std::vector exists_paths; @@ -427,7 +427,7 @@ void ReplicatedMergeTreeCleanupThread::getBlocksSortedByTime(zkutil::ZooKeeper & if (it == cached_block_stats.end()) { /// New block. Fetch its stat asynchronously. - exists_paths.emplace_back(storage.zookeeper_path + "/blocks/" + block); + exists_paths.emplace_back(storage.zookeeper_path + "/" + blocks_dir_name + "/" + block); } else { diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.h b/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.h index f8731ca0f43..35838625bbe 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.h @@ -52,18 +52,19 @@ private: const std::unordered_map & log_pointers_candidate_lost_replicas, size_t replicas_count, const zkutil::ZooKeeperPtr & zookeeper); + using NodeCTimeAndVersionCache = std::map>; /// Remove old block hashes from ZooKeeper. This is done by the leader replica. - void clearOldBlocks(const String & blocks_dir_name, UInt64 window_seconds, UInt64 window_size); + void clearOldBlocks(const String & blocks_dir_name, UInt64 window_seconds, UInt64 window_size, NodeCTimeAndVersionCache & cached_block_stats); /// Remove old mutations that are done from ZooKeeper. This is done by the leader replica. void clearOldMutations(); - using NodeCTimeAndVersionCache = std::map>; - NodeCTimeAndVersionCache cached_block_stats; + NodeCTimeAndVersionCache cached_block_stats_for_sync_inserts; + NodeCTimeAndVersionCache cached_block_stats_for_async_inserts; struct NodeWithStat; /// Returns list of blocks (with their stat) sorted by ctime in descending order. - void getBlocksSortedByTime(zkutil::ZooKeeper & zookeeper, std::vector & timed_blocks); + void getBlocksSortedByTime(const String & blocks_dir_name, zkutil::ZooKeeper & zookeeper, std::vector & timed_blocks, NodeCTimeAndVersionCache & cached_block_stats); /// TODO Removing old quorum/failed_parts }; diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeMergeStrategyPicker.h b/src/Storages/MergeTree/ReplicatedMergeTreeMergeStrategyPicker.h index 91f5824f8fc..05b3d656579 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeMergeStrategyPicker.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreeMergeStrategyPicker.h @@ -17,7 +17,7 @@ struct ReplicatedMergeTreeLogEntryData; /// (so instead of doing exactly the same merge cluster-wise you can do merge once and fetch ready part) /// Fetches may be desirable for other operational reasons (backup replica without lot of CPU resources). /// -/// That class allow to take a decisions about preferred strategy for a concreate merge. +/// That class allow to take a decisions about preferred strategy for a concrete merge. /// /// Since that code is used in shouldExecuteLogEntry we need to be able to: /// 1) make decision fast diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeMutationEntry.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeMutationEntry.cpp index 080066c1dff..1efb3f6826b 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeMutationEntry.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeMutationEntry.cpp @@ -24,7 +24,7 @@ void ReplicatedMergeTreeMutationEntry::writeText(WriteBuffer & out) const } out << "commands: "; - commands.writeText(out); + commands.writeText(out, /* with_pure_metadata_commands = */ false); out << "\n"; out << "alter version: "; @@ -93,7 +93,7 @@ std::shared_ptr ReplicatedMergeTreeMutationEntry::backup() c } out << "commands: "; - commands.writeText(out); + commands.writeText(out, /* with_pure_metadata_commands = */ false); out << "\n"; return std::make_shared(out.str()); diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp index 93724e4946d..29528e9ff80 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp @@ -10,11 +10,6 @@ #include -namespace ProfileEvents -{ - extern const Event ReplicaPartialShutdown; -} - namespace CurrentMetrics { extern const Metric ReadonlyReplica; @@ -335,34 +330,11 @@ void ReplicatedMergeTreeRestartingThread::activateReplica() void ReplicatedMergeTreeRestartingThread::partialShutdown(bool part_of_full_shutdown) { setReadonly(part_of_full_shutdown); - ProfileEvents::increment(ProfileEvents::ReplicaPartialShutdown); - - storage.partial_shutdown_called = true; - storage.partial_shutdown_event.set(); - storage.replica_is_active_node = nullptr; - - LOG_TRACE(log, "Waiting for threads to finish"); - storage.merge_selecting_task->deactivate(); - storage.queue_updating_task->deactivate(); - storage.mutations_updating_task->deactivate(); - storage.mutations_finalizing_task->deactivate(); - - storage.cleanup_thread.stop(); - storage.part_check_thread.stop(); - - /// Stop queue processing - { - auto fetch_lock = storage.fetcher.blocker.cancel(); - auto merge_lock = storage.merger_mutator.merges_blocker.cancel(); - auto move_lock = storage.parts_mover.moves_blocker.cancel(); - storage.background_operations_assignee.finish(); - } - - LOG_TRACE(log, "Threads finished"); + storage.partialShutdown(); } -void ReplicatedMergeTreeRestartingThread::shutdown() +void ReplicatedMergeTreeRestartingThread::shutdown(bool part_of_full_shutdown) { /// Stop restarting_thread before stopping other tasks - so that it won't restart them again. need_stop = true; @@ -370,7 +342,7 @@ void ReplicatedMergeTreeRestartingThread::shutdown() LOG_TRACE(log, "Restarting thread finished"); /// Stop other tasks. - partialShutdown(/* part_of_full_shutdown */ true); + partialShutdown(part_of_full_shutdown); } void ReplicatedMergeTreeRestartingThread::setReadonly(bool on_shutdown) diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.h b/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.h index bb4b0c0fdd2..b5314de9dcc 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.h @@ -28,7 +28,7 @@ public: void wakeup() { task->schedule(); } - void shutdown(); + void shutdown(bool part_of_full_shutdown); private: StorageReplicatedMergeTree & storage; diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp index 3da71c61482..7bd5df2b1dc 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp @@ -539,7 +539,7 @@ std::vector ReplicatedMergeTreeSinkImpl::commitPart( /// /// metadata_snapshot->check(part->getColumns()); - String temporary_part_relative_path = part->getDataPartStorage().getPartDirectory(); + const String temporary_part_relative_path = part->getDataPartStorage().getPartDirectory(); /// There is one case when we need to retry transaction in a loop. /// But don't do it too many times - just as defensive measure. @@ -820,6 +820,14 @@ std::vector ReplicatedMergeTreeSinkImpl::commitPart( part->name); } + auto rename_part_to_temporary = [&temporary_part_relative_path, &transaction, &part]() + { + transaction.rollbackPartsToTemporaryState(); + + part->is_temp = true; + part->renameTo(temporary_part_relative_path, false); + }; + try { ThreadFuzzer::maybeInjectSleep(); @@ -828,11 +836,7 @@ std::vector ReplicatedMergeTreeSinkImpl::commitPart( } catch (const Exception &) { - transaction.rollbackPartsToTemporaryState(); - - part->is_temp = true; - part->renameTo(temporary_part_relative_path, false); - + rename_part_to_temporary(); throw; } @@ -906,10 +910,7 @@ std::vector ReplicatedMergeTreeSinkImpl::commitPart( /// We will try to add this part again on the new iteration as it's just a new part. /// So remove it from storage parts set immediately and transfer state to temporary. - transaction.rollbackPartsToTemporaryState(); - - part->is_temp = true; - part->renameTo(temporary_part_relative_path, false); + rename_part_to_temporary(); if constexpr (async_insert) { @@ -931,8 +932,20 @@ std::vector ReplicatedMergeTreeSinkImpl::commitPart( } else if (multi_code == Coordination::Error::ZNODEEXISTS && failed_op_path == quorum_info.status_path) { - storage.unlockSharedData(*part, zookeeper); - transaction.rollback(); + try + { + storage.unlockSharedData(*part, zookeeper); + } + catch (const zkutil::KeeperException & e) + { + /// suppress this exception since need to rename part to temporary next + LOG_DEBUG(log, "Unlocking shared data failed during error handling: code={} message={}", e.code, e.message()); + } + + /// Part was not committed to keeper + /// So make it temporary to avoid its resurrection on restart + rename_part_to_temporary(); + throw Exception("Another quorum insert has been already started", ErrorCodes::UNSATISFIED_QUORUM_FOR_PREVIOUS_WRITE); } else diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.h b/src/Storages/MergeTree/ReplicatedMergeTreeSink.h index 57fd6035471..1199df95b67 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.h @@ -4,7 +4,7 @@ #include #include #include -#include +#include namespace Poco { class Logger; } diff --git a/src/Storages/MergeTree/ReplicatedTableStatus.h b/src/Storages/MergeTree/ReplicatedTableStatus.h new file mode 100644 index 00000000000..b9f84091e9b --- /dev/null +++ b/src/Storages/MergeTree/ReplicatedTableStatus.h @@ -0,0 +1,35 @@ +#pragma once + +#include +#include + +namespace DB +{ + +/** For the system table replicas. */ +struct ReplicatedTableStatus +{ + bool is_leader; + bool can_become_leader; + bool is_readonly; + bool is_session_expired; + + ReplicatedMergeTreeQueue::Status queue; + UInt32 parts_to_check; + String zookeeper_path; + String replica_name; + String replica_path; + Int32 columns_version; + UInt64 log_max_index; + UInt64 log_pointer; + UInt64 absolute_delay; + UInt8 total_replicas; + UInt8 active_replicas; + String last_queue_update_exception; + /// If the error has happened fetching the info from ZooKeeper, this field will be set. + String zookeeper_exception; + + std::unordered_map replica_is_active; +}; + +} diff --git a/src/Storages/MergeTree/registerStorageMergeTree.cpp b/src/Storages/MergeTree/registerStorageMergeTree.cpp index ae2abaf8ea5..620591abbf3 100644 --- a/src/Storages/MergeTree/registerStorageMergeTree.cpp +++ b/src/Storages/MergeTree/registerStorageMergeTree.cpp @@ -314,76 +314,17 @@ static StoragePtr create(const StorageFactory::Arguments & args) /// For Replicated. String zookeeper_path; String replica_name; - StorageReplicatedMergeTree::RenamingRestrictions renaming_restrictions = StorageReplicatedMergeTree::RenamingRestrictions::ALLOW_ANY; + RenamingRestrictions renaming_restrictions = RenamingRestrictions::ALLOW_ANY; bool is_on_cluster = args.getLocalContext()->getClientInfo().query_kind == ClientInfo::QueryKind::SECONDARY_QUERY; bool is_replicated_database = args.getLocalContext()->getClientInfo().query_kind == ClientInfo::QueryKind::SECONDARY_QUERY && DatabaseCatalog::instance().getDatabase(args.table_id.database_name)->getEngineName() == "Replicated"; - if (replicated) + /// Allow implicit {uuid} macros only for zookeeper_path in ON CLUSTER queries + bool allow_uuid_macro = is_on_cluster || is_replicated_database || args.query.attach; + + auto expand_macro = [&] (ASTLiteral * ast_zk_path, ASTLiteral * ast_replica_name) { - bool has_arguments = arg_num + 2 <= arg_cnt; - bool has_valid_arguments = has_arguments && engine_args[arg_num]->as() && engine_args[arg_num + 1]->as(); - - ASTLiteral * ast_zk_path; - ASTLiteral * ast_replica_name; - - if (has_valid_arguments) - { - /// Get path and name from engine arguments - ast_zk_path = engine_args[arg_num]->as(); - if (ast_zk_path && ast_zk_path->value.getType() == Field::Types::String) - zookeeper_path = ast_zk_path->value.safeGet(); - else - throw Exception( - "Path in ZooKeeper must be a string literal" + getMergeTreeVerboseHelp(is_extended_storage_def), - ErrorCodes::BAD_ARGUMENTS); - ++arg_num; - - ast_replica_name = engine_args[arg_num]->as(); - if (ast_replica_name && ast_replica_name->value.getType() == Field::Types::String) - replica_name = ast_replica_name->value.safeGet(); - else - throw Exception( - "Replica name must be a string literal" + getMergeTreeVerboseHelp(is_extended_storage_def), ErrorCodes::BAD_ARGUMENTS); - - if (replica_name.empty()) - throw Exception( - "No replica name in config" + getMergeTreeVerboseHelp(is_extended_storage_def), ErrorCodes::NO_REPLICA_NAME_GIVEN); - ++arg_num; - } - else if (is_extended_storage_def - && (arg_cnt == 0 - || !engine_args[arg_num]->as() - || (arg_cnt == 1 && merging_params.mode == MergeTreeData::MergingParams::Graphite))) - { - /// Try use default values if arguments are not specified. - /// Note: {uuid} macro works for ON CLUSTER queries when database engine is Atomic. - const auto & config = args.getContext()->getConfigRef(); - zookeeper_path = StorageReplicatedMergeTree::getDefaultZooKeeperPath(config); - /// TODO maybe use hostname if {replica} is not defined? - replica_name = StorageReplicatedMergeTree::getDefaultReplicaName(config); - - /// Modify query, so default values will be written to metadata - assert(arg_num == 0); - ASTs old_args; - std::swap(engine_args, old_args); - auto path_arg = std::make_shared(zookeeper_path); - auto name_arg = std::make_shared(replica_name); - ast_zk_path = path_arg.get(); - ast_replica_name = name_arg.get(); - engine_args.emplace_back(std::move(path_arg)); - engine_args.emplace_back(std::move(name_arg)); - std::move(std::begin(old_args), std::end(old_args), std::back_inserter(engine_args)); - arg_num = 2; - arg_cnt += 2; - } - else - throw Exception("Expected two string literal arguments: zookeeper_path and replica_name", ErrorCodes::BAD_ARGUMENTS); - - /// Allow implicit {uuid} macros only for zookeeper_path in ON CLUSTER queries - bool allow_uuid_macro = is_on_cluster || is_replicated_database || args.query.attach; - /// Unfold {database} and {table} macro on table creation, so table can be renamed. if (!args.attach) { @@ -427,9 +368,76 @@ static StoragePtr create(const StorageFactory::Arguments & args) /// or if one of these macros is recursively expanded from some other macro. /// Also do not allow to move table from Atomic to Ordinary database if there's {uuid} macro if (info.expanded_database || info.expanded_table) - renaming_restrictions = StorageReplicatedMergeTree::RenamingRestrictions::DO_NOT_ALLOW; + renaming_restrictions = RenamingRestrictions::DO_NOT_ALLOW; else if (info.expanded_uuid) - renaming_restrictions = StorageReplicatedMergeTree::RenamingRestrictions::ALLOW_PRESERVING_UUID; + renaming_restrictions = RenamingRestrictions::ALLOW_PRESERVING_UUID; + }; + + if (replicated) + { + bool has_arguments = arg_num + 2 <= arg_cnt; + bool has_valid_arguments = has_arguments && engine_args[arg_num]->as() && engine_args[arg_num + 1]->as(); + + ASTLiteral * ast_zk_path; + ASTLiteral * ast_replica_name; + + if (has_valid_arguments) + { + /// Get path and name from engine arguments + ast_zk_path = engine_args[arg_num]->as(); + if (ast_zk_path && ast_zk_path->value.getType() == Field::Types::String) + zookeeper_path = ast_zk_path->value.safeGet(); + else + throw Exception( + "Path in ZooKeeper must be a string literal" + getMergeTreeVerboseHelp(is_extended_storage_def), + ErrorCodes::BAD_ARGUMENTS); + ++arg_num; + + ast_replica_name = engine_args[arg_num]->as(); + if (ast_replica_name && ast_replica_name->value.getType() == Field::Types::String) + replica_name = ast_replica_name->value.safeGet(); + else + throw Exception( + "Replica name must be a string literal" + getMergeTreeVerboseHelp(is_extended_storage_def), ErrorCodes::BAD_ARGUMENTS); + + if (replica_name.empty()) + throw Exception( + "No replica name in config" + getMergeTreeVerboseHelp(is_extended_storage_def), ErrorCodes::NO_REPLICA_NAME_GIVEN); + ++arg_num; + + expand_macro(ast_zk_path, ast_replica_name); + } + else if (is_extended_storage_def + && (arg_cnt == 0 + || !engine_args[arg_num]->as() + || (arg_cnt == 1 && merging_params.mode == MergeTreeData::MergingParams::Graphite))) + { + /// Try use default values if arguments are not specified. + /// Note: {uuid} macro works for ON CLUSTER queries when database engine is Atomic. + const auto & config = args.getContext()->getConfigRef(); + zookeeper_path = StorageReplicatedMergeTree::getDefaultZooKeeperPath(config); + /// TODO maybe use hostname if {replica} is not defined? + replica_name = StorageReplicatedMergeTree::getDefaultReplicaName(config); + + /// Modify query, so default values will be written to metadata + assert(arg_num == 0); + ASTs old_args; + std::swap(engine_args, old_args); + auto path_arg = std::make_shared(zookeeper_path); + auto name_arg = std::make_shared(replica_name); + ast_zk_path = path_arg.get(); + ast_replica_name = name_arg.get(); + + expand_macro(ast_zk_path, ast_replica_name); + + engine_args.emplace_back(std::move(path_arg)); + engine_args.emplace_back(std::move(name_arg)); + std::move(std::begin(old_args), std::end(old_args), std::back_inserter(engine_args)); + arg_num = 2; + arg_cnt += 2; + } + else + throw Exception("Expected two string literal arguments: zookeeper_path and replica_name", ErrorCodes::BAD_ARGUMENTS); } /// This merging param maybe used as part of sorting key @@ -468,7 +476,7 @@ static StoragePtr create(const StorageFactory::Arguments & args) { String graphite_config_name; String error_msg - = "Last parameter of GraphiteMergeTree must be name (in single quotes) of element in configuration file with Graphite options"; + = "Last parameter of GraphiteMergeTree must be the name (in single quotes) of the element in configuration file with the Graphite options"; error_msg += getMergeTreeVerboseHelp(is_extended_storage_def); if (const auto * ast = engine_args[arg_cnt - 1]->as()) diff --git a/src/Storages/MessageQueueSink.cpp b/src/Storages/MessageQueueSink.cpp new file mode 100644 index 00000000000..437228a0730 --- /dev/null +++ b/src/Storages/MessageQueueSink.cpp @@ -0,0 +1,77 @@ +#include +#include +#include + +namespace DB +{ + +MessageQueueSink::MessageQueueSink( + const Block & header, + const String & format_name_, + size_t max_rows_per_message_, + std::unique_ptr producer_, + const String & storage_name_, + const ContextPtr & context_) + : SinkToStorage(header), format_name(format_name_), max_rows_per_message(max_rows_per_message_), producer(std::move(producer_)), storage_name(storage_name_), context(context_) +{ +} + +void MessageQueueSink::onStart() +{ + initialize(); + producer->start(context); + + buffer = std::make_unique(); + + auto format_settings = getFormatSettings(context); + format_settings.protobuf.allow_multiple_rows_without_delimiter = true; + + format = FormatFactory::instance().getOutputFormat(format_name, *buffer, getHeader(), context, format_settings); + row_format = dynamic_cast(format.get()); +} + +void MessageQueueSink::onFinish() +{ + producer->finish(); +} + +void MessageQueueSink::consume(Chunk chunk) +{ + const auto & columns = chunk.getColumns(); + if (columns.empty()) + return; + + if (row_format) + { + size_t row = 0; + while (row < chunk.getNumRows()) + { + row_format->writePrefixIfNeeded(); + size_t i = 0; + for (; i < max_rows_per_message && row < chunk.getNumRows(); ++i, ++row) + { + if (i != 0) + row_format->writeRowBetweenDelimiter(); + row_format->writeRow(columns, row); + } + row_format->finalize(); + row_format->resetFormatter(); + producer->produce(buffer->str(), i, columns, row - 1); + /// Reallocate buffer if it's capacity is large then DBMS_DEFAULT_BUFFER_SIZE, + /// because most likely in this case we serialized abnormally large row + /// and won't need this large allocated buffer anymore. + buffer->restart(DBMS_DEFAULT_BUFFER_SIZE); + } + } + else + { + format->write(getHeader().cloneWithColumns(chunk.detachColumns())); + format->finalize(); + producer->produce(buffer->str(), chunk.getNumRows(), columns, chunk.getNumRows() - 1); + format->resetFormatter(); + buffer->restart(); + } +} + + +} diff --git a/src/Storages/MessageQueueSink.h b/src/Storages/MessageQueueSink.h new file mode 100644 index 00000000000..590bee7ee4f --- /dev/null +++ b/src/Storages/MessageQueueSink.h @@ -0,0 +1,62 @@ +#pragma once + +#include +#include +#include + +namespace DB +{ + +class IOutputFormat; +class IRowOutputFormat; +using IOutputFormatPtr = std::shared_ptr; + + +/// Storage sink for streaming engines like Kafka/RabbitMQ/NATS. +/// It implements formatting input data into messages. +/// For row-based formats like TSV, CSV, JSONEachRow, etc, each message +/// contains from 1 to max_rows_per_message rows. +/// For block-based formats like Native, Arrow, Parquet, the whole block is formatted into one message. +/// Each message is created independently, so it contains all format +/// prefixes/suffixes and can fully parsed back by corresponding input format. +/// After formatting, created message is propagated to IMessageProducer::produce() method. +/// To use MessageQueueSink for specific streaming engine, you should implement +/// IMessageProducer for it. +class MessageQueueSink : public SinkToStorage +{ +public: + MessageQueueSink( + const Block & header, + const String & format_name_, + size_t max_rows_per_message_, + std::unique_ptr producer_, + const String & storage_name_, + const ContextPtr & context_); + + String getName() const override { return storage_name + "Sink"; } + + void consume(Chunk chunk) override; + + void onStart() override; + void onFinish() override; + void onCancel() override { onFinish(); } + void onException() override { onFinish(); } + +protected: + /// Do some specific initialization before consuming data. + virtual void initialize() {} + +private: + const String format_name; + size_t max_rows_per_message; + + std::unique_ptr buffer; + IOutputFormatPtr format; + IRowOutputFormat * row_format; + std::unique_ptr producer; + + const String storage_name; + const ContextPtr context; +}; + +} diff --git a/src/Storages/MutationCommands.cpp b/src/Storages/MutationCommands.cpp index 28dfe488869..ffc2cfc3086 100644 --- a/src/Storages/MutationCommands.cpp +++ b/src/Storages/MutationCommands.cpp @@ -144,23 +144,32 @@ std::optional MutationCommand::parse(ASTAlterCommand * command, res.partition = command->partition; return res; } - return {}; + else + { + MutationCommand res; + res.ast = command->ptr(); + res.type = ALTER_WITHOUT_MUTATION; + return res; + } } -std::shared_ptr MutationCommands::ast() const +std::shared_ptr MutationCommands::ast(bool with_pure_metadata_commands) const { auto res = std::make_shared(); for (const MutationCommand & command : *this) - res->children.push_back(command.ast->clone()); + { + if (command.type != MutationCommand::ALTER_WITHOUT_MUTATION || with_pure_metadata_commands) + res->children.push_back(command.ast->clone()); + } return res; } -void MutationCommands::writeText(WriteBuffer & out) const +void MutationCommands::writeText(WriteBuffer & out, bool with_pure_metadata_commands) const { WriteBufferFromOwnString commands_buf; - formatAST(*ast(), commands_buf, /* hilite = */ false, /* one_line = */ true); + formatAST(*ast(with_pure_metadata_commands), commands_buf, /* hilite = */ false, /* one_line = */ true); writeEscapedString(commands_buf.str(), out); } @@ -169,9 +178,11 @@ void MutationCommands::readText(ReadBuffer & in) String commands_str; readEscapedString(commands_str, in); + ParserAlterCommandList p_alter_commands; auto commands_ast = parseQuery( p_alter_commands, commands_str.data(), commands_str.data() + commands_str.length(), "mutation commands list", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH); + for (const auto & child : commands_ast->children) { auto * command_ast = child->as(); @@ -182,4 +193,22 @@ void MutationCommands::readText(ReadBuffer & in) } } +std::string MutationCommands::toString() const +{ + WriteBufferFromOwnString commands_buf; + formatAST(*ast(), commands_buf, /* hilite = */ false, /* one_line = */ true); + return commands_buf.str(); +} + + +bool MutationCommands::hasNonEmptyMutationCommands() const +{ + for (const auto & command : *this) + { + if (command.type != MutationCommand::Type::EMPTY && command.type != MutationCommand::Type::ALTER_WITHOUT_MUTATION) + return true; + } + return false; +} + } diff --git a/src/Storages/MutationCommands.h b/src/Storages/MutationCommands.h index 3f8af2b4de5..aca91c16e85 100644 --- a/src/Storages/MutationCommands.h +++ b/src/Storages/MutationCommands.h @@ -37,6 +37,7 @@ struct MutationCommand MATERIALIZE_TTL, RENAME_COLUMN, MATERIALIZE_COLUMN, + ALTER_WITHOUT_MUTATION, /// pure metadata command, currently unusned }; Type type = EMPTY; @@ -72,10 +73,12 @@ struct MutationCommand class MutationCommands : public std::vector { public: - std::shared_ptr ast() const; + std::shared_ptr ast(bool with_pure_metadata_commands = false) const; - void writeText(WriteBuffer & out) const; + void writeText(WriteBuffer & out, bool with_pure_metadata_commands) const; void readText(ReadBuffer & in); + std::string toString() const; + bool hasNonEmptyMutationCommands() const; }; using MutationCommandsConstPtr = std::shared_ptr; diff --git a/src/Storages/NATS/Buffer_fwd.h b/src/Storages/NATS/Buffer_fwd.h deleted file mode 100644 index 3eb52314a79..00000000000 --- a/src/Storages/NATS/Buffer_fwd.h +++ /dev/null @@ -1,14 +0,0 @@ -#pragma once - -#include - -namespace DB -{ - -class ReadBufferFromNATSConsumer; -using ConsumerBufferPtr = std::shared_ptr; - -class WriteBufferToNATSProducer; -using ProducerBufferPtr = std::shared_ptr; - -} diff --git a/src/Storages/NATS/NATSConnection.h b/src/Storages/NATS/NATSConnection.h index c699f859446..b49070473b2 100644 --- a/src/Storages/NATS/NATSConnection.h +++ b/src/Storages/NATS/NATSConnection.h @@ -1,6 +1,5 @@ #pragma once -#include #include #include diff --git a/src/Storages/NATS/ReadBufferFromNATSConsumer.cpp b/src/Storages/NATS/NATSConsumer.cpp similarity index 63% rename from src/Storages/NATS/ReadBufferFromNATSConsumer.cpp rename to src/Storages/NATS/NATSConsumer.cpp index fa6e60ac213..c7b40973b72 100644 --- a/src/Storages/NATS/ReadBufferFromNATSConsumer.cpp +++ b/src/Storages/NATS/NATSConsumer.cpp @@ -2,9 +2,8 @@ #include #include #include -#include -#include -#include +#include +#include #include "Poco/Timer.h" #include @@ -17,28 +16,25 @@ namespace ErrorCodes extern const int CANNOT_CONNECT_NATS; } -ReadBufferFromNATSConsumer::ReadBufferFromNATSConsumer( +NATSConsumer::NATSConsumer( std::shared_ptr connection_, StorageNATS & storage_, std::vector & subjects_, const String & subscribe_queue_name, Poco::Logger * log_, - char row_delimiter_, uint32_t queue_size_, const std::atomic & stopped_) - : ReadBuffer(nullptr, 0) - , connection(connection_) + : connection(connection_) , storage(storage_) , subjects(subjects_) , log(log_) - , row_delimiter(row_delimiter_) , stopped(stopped_) , queue_name(subscribe_queue_name) , received(queue_size_) { } -void ReadBufferFromNATSConsumer::subscribe() +void NATSConsumer::subscribe() { if (subscribed) return; @@ -62,49 +58,38 @@ void ReadBufferFromNATSConsumer::subscribe() subscribed = true; } -void ReadBufferFromNATSConsumer::unsubscribe() +void NATSConsumer::unsubscribe() { for (const auto & subscription : subscriptions) natsSubscription_Unsubscribe(subscription.get()); } -bool ReadBufferFromNATSConsumer::nextImpl() +ReadBufferPtr NATSConsumer::consume() { - if (stopped || !allowed) - return false; + if (stopped || !received.tryPop(current)) + return nullptr; - if (received.tryPop(current)) - { - auto * new_position = const_cast(current.message.data()); - BufferBase::set(new_position, current.message.size(), 0); - allowed = false; - - return true; - } - - return false; + return std::make_shared(current.message.data(), current.message.size()); } -void ReadBufferFromNATSConsumer::onMsg(natsConnection *, natsSubscription *, natsMsg * msg, void * consumer) +void NATSConsumer::onMsg(natsConnection *, natsSubscription *, natsMsg * msg, void * consumer) { - auto * buffer = static_cast(consumer); + auto * nats_consumer = static_cast(consumer); const int msg_length = natsMsg_GetDataLength(msg); if (msg_length) { String message_received = std::string(natsMsg_GetData(msg), msg_length); String subject = natsMsg_GetSubject(msg); - if (buffer->row_delimiter != '\0') - message_received += buffer->row_delimiter; MessageData data = { .message = message_received, .subject = subject, }; - if (!buffer->received.push(std::move(data))) + if (!nats_consumer->received.push(std::move(data))) throw Exception(ErrorCodes::LOGICAL_ERROR, "Could not push to received queue"); - buffer->storage.startStreaming(); + nats_consumer->storage.startStreaming(); } natsMsg_Destroy(msg); diff --git a/src/Storages/NATS/ReadBufferFromNATSConsumer.h b/src/Storages/NATS/NATSConsumer.h similarity index 84% rename from src/Storages/NATS/ReadBufferFromNATSConsumer.h rename to src/Storages/NATS/NATSConsumer.h index 306c0aff3bf..a6f950329aa 100644 --- a/src/Storages/NATS/ReadBufferFromNATSConsumer.h +++ b/src/Storages/NATS/NATSConsumer.h @@ -16,16 +16,15 @@ class Logger; namespace DB { -class ReadBufferFromNATSConsumer : public ReadBuffer +class NATSConsumer { public: - ReadBufferFromNATSConsumer( + NATSConsumer( std::shared_ptr connection_, StorageNATS & storage_, std::vector & subjects_, const String & subscribe_queue_name, Poco::Logger * log_, - char row_delimiter_, uint32_t queue_size_, const std::atomic & stopped_); @@ -44,13 +43,14 @@ public: bool queueEmpty() { return received.empty(); } size_t queueSize() { return received.size(); } - void allowNext() { allowed = true; } // Allow to read next message. auto getSubject() const { return current.subject; } -private: - bool nextImpl() override; + /// Return read buffer containing next available message + /// or nullptr if there are no messages to process. + ReadBufferPtr consume(); +private: static void onMsg(natsConnection * nc, natsSubscription * sub, natsMsg * msg, void * consumer); std::shared_ptr connection; @@ -58,8 +58,6 @@ private: std::vector subscriptions; std::vector subjects; Poco::Logger * log; - char row_delimiter; - bool allowed = true; const std::atomic & stopped; bool subscribed = false; diff --git a/src/Storages/NATS/NATSHandler.cpp b/src/Storages/NATS/NATSHandler.cpp index b5812bc3349..7006e5633a9 100644 --- a/src/Storages/NATS/NATSHandler.cpp +++ b/src/Storages/NATS/NATSHandler.cpp @@ -26,7 +26,7 @@ NATSHandler::NATSHandler(uv_loop_t * loop_, Poco::Logger * log_) : natsLibuv_Read, natsLibuv_Write, natsLibuv_Detach); - natsOptions_SetIOBufSize(opts, INT_MAX); + natsOptions_SetIOBufSize(opts, DBMS_DEFAULT_BUFFER_SIZE); natsOptions_SetSendAsap(opts, true); } diff --git a/src/Storages/NATS/NATSProducer.cpp b/src/Storages/NATS/NATSProducer.cpp new file mode 100644 index 00000000000..b92fae1ac85 --- /dev/null +++ b/src/Storages/NATS/NATSProducer.cpp @@ -0,0 +1,126 @@ +#include + +#include +#include +#include +#include +#include + + +namespace DB +{ + +static const auto BATCH = 1000; +static const auto MAX_BUFFERED = 131072; + +namespace ErrorCodes +{ + extern const int CANNOT_CONNECT_NATS; + extern const int LOGICAL_ERROR; +} + +NATSProducer::NATSProducer( + const NATSConfiguration & configuration_, + const String & subject_, + std::atomic & shutdown_called_, + Poco::Logger * log_) + : connection(configuration_, log_) + , subject(subject_) + , shutdown_called(shutdown_called_) + , payloads(BATCH) + , log(log_) +{ +} + +void NATSProducer::initialize() +{ + if (!connection.connect()) + throw Exception(ErrorCodes::CANNOT_CONNECT_NATS, "Cannot connect to NATS {}", connection.connectionInfoForLog()); +} + +void NATSProducer::finishImpl() +{ + connection.disconnect(); +} + + +void NATSProducer::produce(const String & message, size_t, const Columns &, size_t) +{ + if (!payloads.push(message)) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Could not push to payloads queue"); +} + +void NATSProducer::publish() +{ + uv_thread_t flush_thread; + + uv_thread_create(&flush_thread, publishThreadFunc, static_cast(this)); + + connection.getHandler().startLoop(); + uv_thread_join(&flush_thread); +} + +void NATSProducer::publishThreadFunc(void * arg) +{ + NATSProducer * producer = static_cast(arg); + String payload; + + natsStatus status; + while (!producer->payloads.empty()) + { + if (natsConnection_Buffered(producer->connection.getConnection()) > MAX_BUFFERED) + break; + bool pop_result = producer->payloads.pop(payload); + + if (!pop_result) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Could not pop payload"); + + status = natsConnection_Publish(producer->connection.getConnection(), producer->subject.c_str(), payload.c_str(), static_cast(payload.size())); + + if (status != NATS_OK) + { + LOG_DEBUG(producer->log, "Something went wrong during publishing to NATS subject. Nats status text: {}. Last error message: {}", + natsStatus_GetText(status), nats_GetLastError(nullptr)); + if (!producer->payloads.push(std::move(payload))) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Could not push to payloads queue"); + break; + } + } + + nats_ReleaseThreadMemory(); +} + +void NATSProducer::stopProducingTask() +{ + payloads.finish(); +} + +void NATSProducer::startProducingTaskLoop() +{ + try + { + while ((!payloads.isFinishedAndEmpty() || natsConnection_Buffered(connection.getConnection()) != 0) && !shutdown_called.load()) + { + publish(); + + if (!connection.isConnected()) + connection.reconnect(); + + iterateEventLoop(); + } + } + catch (...) + { + tryLogCurrentException(log); + } + + LOG_DEBUG(log, "Producer on subject {} completed", subject); +} + + +void NATSProducer::iterateEventLoop() +{ + connection.getHandler().iterateLoop(); +} + +} diff --git a/src/Storages/NATS/NATSProducer.h b/src/Storages/NATS/NATSProducer.h new file mode 100644 index 00000000000..5bdbfc56222 --- /dev/null +++ b/src/Storages/NATS/NATSProducer.h @@ -0,0 +1,57 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +class NATSProducer : public AsynchronousMessageProducer +{ +public: + NATSProducer( + const NATSConfiguration & configuration_, + const String & subject_, + std::atomic & shutdown_called_, + Poco::Logger * log_); + + void produce(const String & message, size_t rows_in_message, const Columns & columns, size_t last_row) override; + +private: + String getProducingTaskName() const override { return "NatsProducingTask"; } + + void initialize() override; + void stopProducingTask() override; + void finishImpl() override; + + void startProducingTaskLoop() override; + + void iterateEventLoop(); + void publish(); + + static void publishThreadFunc(void * arg); + + NATSConnectionManager connection; + const String subject; + + /* false: when shutdown is called + * true: in all other cases + */ + std::atomic & shutdown_called; + + /* payloads.queue: + * - payloads are pushed to queue in countRow and popped by another thread in writingFunc, each payload gets into queue only once + */ + ConcurrentBoundedQueue payloads; + + Poco::Logger * log; +}; + +} diff --git a/src/Storages/NATS/NATSSettings.h b/src/Storages/NATS/NATSSettings.h index 9bf9b969387..b4003eef46d 100644 --- a/src/Storages/NATS/NATSSettings.h +++ b/src/Storages/NATS/NATSSettings.h @@ -26,6 +26,7 @@ class ASTStorage; M(String, nats_password, "", "NATS password", 0) \ M(String, nats_token, "", "NATS token", 0) \ M(UInt64, nats_startup_connect_tries, 5, "Number of connect tries at startup", 0) \ + M(UInt64, nats_max_rows_per_message, 1, "The maximum number of rows produced in one message for row-based formats.", 0) \ #define LIST_OF_NATS_SETTINGS(M) \ NATS_RELATED_SETTINGS(M) \ diff --git a/src/Storages/NATS/NATSSink.cpp b/src/Storages/NATS/NATSSink.cpp deleted file mode 100644 index 44cf51072e6..00000000000 --- a/src/Storages/NATS/NATSSink.cpp +++ /dev/null @@ -1,56 +0,0 @@ -#include -#include -#include -#include -#include -#include - - -namespace DB -{ - -NATSSink::NATSSink( - StorageNATS & storage_, - const StorageMetadataPtr & metadata_snapshot_, - ContextPtr context_, - ProducerBufferPtr buffer_) - : SinkToStorage(metadata_snapshot_->getSampleBlockNonMaterialized()) - , storage(storage_) - , metadata_snapshot(metadata_snapshot_) - , context(context_) - , buffer(buffer_) -{ -} - - -void NATSSink::onStart() -{ - buffer->activateWriting(); - - auto format_settings = getFormatSettings(context); - format_settings.protobuf.allow_multiple_rows_without_delimiter = true; - - format = FormatFactory::instance().getOutputFormat(storage.getFormatName(), *buffer, getHeader(), context, - [this](const Columns & /* columns */, size_t /* rows */) - { - buffer->countRow(); - }, - format_settings); -} - - -void NATSSink::consume(Chunk chunk) -{ - format->write(getHeader().cloneWithColumns(chunk.detachColumns())); -} - - -void NATSSink::onFinish() -{ - format->finalize(); - - if (buffer) - buffer->updateMaxWait(); -} - -} diff --git a/src/Storages/NATS/NATSSink.h b/src/Storages/NATS/NATSSink.h deleted file mode 100644 index d94575de0e7..00000000000 --- a/src/Storages/NATS/NATSSink.h +++ /dev/null @@ -1,31 +0,0 @@ -#pragma once - -#include -#include - - -namespace DB -{ - -class IOutputFormat; -using IOutputFormatPtr = std::shared_ptr; - -class NATSSink : public SinkToStorage -{ -public: - explicit NATSSink(StorageNATS & storage_, const StorageMetadataPtr & metadata_snapshot_, ContextPtr context_, ProducerBufferPtr buffer_); - - void onStart() override; - void consume(Chunk chunk) override; - void onFinish() override; - - String getName() const override { return "NATSSink"; } - -private: - StorageNATS & storage; - StorageMetadataPtr metadata_snapshot; - ContextPtr context; - ProducerBufferPtr buffer; - IOutputFormatPtr format; -}; -} diff --git a/src/Storages/NATS/NATSSource.cpp b/src/Storages/NATS/NATSSource.cpp index f5e5e4f8b91..793b69e7e46 100644 --- a/src/Storages/NATS/NATSSource.cpp +++ b/src/Storages/NATS/NATSSource.cpp @@ -3,7 +3,8 @@ #include #include #include -#include +#include +#include namespace DB { @@ -59,11 +60,10 @@ NATSSource::~NATSSource() { storage.decrementReader(); - if (!buffer) + if (!consumer) return; - buffer->allowNext(); - storage.pushReadBuffer(buffer); + storage.pushConsumer(consumer); } bool NATSSource::checkTimeLimit() const @@ -81,21 +81,22 @@ bool NATSSource::checkTimeLimit() const Chunk NATSSource::generate() { - if (!buffer) + if (!consumer) { auto timeout = std::chrono::milliseconds(context->getSettingsRef().rabbitmq_max_wait_ms.totalMilliseconds()); - buffer = storage.popReadBuffer(timeout); - buffer->subscribe(); + consumer = storage.popConsumer(timeout); + consumer->subscribe(); } - if (!buffer || is_finished) + if (!consumer || is_finished) return {}; is_finished = true; MutableColumns virtual_columns = virtual_header.cloneEmptyColumns(); + EmptyReadBuffer empty_buf; auto input_format - = FormatFactory::instance().getInputFormat(storage.getFormatName(), *buffer, non_virtual_header, context, max_block_size); + = FormatFactory::instance().getInputFormat(storage.getFormatName(), empty_buf, non_virtual_header, context, max_block_size); StreamingFormatExecutor executor(non_virtual_header, input_format); @@ -103,22 +104,22 @@ Chunk NATSSource::generate() while (true) { - if (buffer->eof()) + if (consumer->queueEmpty()) break; - auto new_rows = executor.execute(); + size_t new_rows = 0; + if (auto buf = consumer->consume()) + new_rows = executor.execute(*buf); if (new_rows) { - auto subject = buffer->getSubject(); + auto subject = consumer->getSubject(); virtual_columns[0]->insertMany(subject, new_rows); total_rows = total_rows + new_rows; } - buffer->allowNext(); - - if (total_rows >= max_block_size || buffer->queueEmpty() || buffer->isConsumerStopped() || !checkTimeLimit()) + if (total_rows >= max_block_size || consumer->queueEmpty() || consumer->isConsumerStopped() || !checkTimeLimit()) break; } diff --git a/src/Storages/NATS/NATSSource.h b/src/Storages/NATS/NATSSource.h index e4e94d2347a..604a8a5366f 100644 --- a/src/Storages/NATS/NATSSource.h +++ b/src/Storages/NATS/NATSSource.h @@ -1,7 +1,7 @@ #pragma once #include -#include +#include #include @@ -21,11 +21,11 @@ public: ~NATSSource() override; String getName() const override { return storage.getName(); } - ConsumerBufferPtr getBuffer() { return buffer; } + NATSConsumerPtr getConsumer() { return consumer; } Chunk generate() override; - bool queueEmpty() const { return !buffer || buffer->queueEmpty(); } + bool queueEmpty() const { return !consumer || consumer->queueEmpty(); } void setTimeLimit(Poco::Timespan max_execution_time_) { max_execution_time = max_execution_time_; } @@ -42,7 +42,7 @@ private: const Block non_virtual_header; const Block virtual_header; - ConsumerBufferPtr buffer; + NATSConsumerPtr consumer; Poco::Timespan max_execution_time = 0; Stopwatch total_stopwatch {CLOCK_MONOTONIC_COARSE}; diff --git a/src/Storages/NATS/StorageNATS.cpp b/src/Storages/NATS/StorageNATS.cpp index 5a8e250a972..cb5dff7d082 100644 --- a/src/Storages/NATS/StorageNATS.cpp +++ b/src/Storages/NATS/StorageNATS.cpp @@ -11,10 +11,10 @@ #include #include #include -#include #include #include -#include +#include +#include #include #include #include @@ -23,7 +23,6 @@ #include #include #include -#include #include #include @@ -56,9 +55,9 @@ StorageNATS::StorageNATS( , nats_settings(std::move(nats_settings_)) , subjects(parseList(getContext()->getMacros()->expand(nats_settings->nats_subjects), ',')) , format_name(getContext()->getMacros()->expand(nats_settings->nats_format)) - , row_delimiter(nats_settings->nats_row_delimiter.value) , schema_name(getContext()->getMacros()->expand(nats_settings->nats_schema)) , num_consumers(nats_settings->nats_num_consumers.value) + , max_rows_per_message(nats_settings->nats_max_rows_per_message) , log(&Poco::Logger::get("StorageNATS (" + table_id_.table_name + ")")) , semaphore(0, static_cast(num_consumers)) , queue_size(std::max(QUEUE_SIZE, static_cast(getMaxBlockSize()))) @@ -238,11 +237,11 @@ void StorageNATS::connectionFunc() bool StorageNATS::initBuffers() { size_t num_initialized = 0; - for (auto & buffer : buffers) + for (auto & consumer : consumers) { try { - buffer->subscribe(); + consumer->subscribe(); ++num_initialized; } catch (...) @@ -253,10 +252,10 @@ bool StorageNATS::initBuffers() } startLoop(); - const bool are_buffers_initialized = num_initialized == num_created_consumers; - if (are_buffers_initialized) + const bool are_consumers_initialized = num_initialized == num_created_consumers; + if (are_consumers_initialized) consumers_ready.store(true); - return are_buffers_initialized; + return are_consumers_initialized; } @@ -381,18 +380,24 @@ SinkToStoragePtr StorageNATS::write(const ASTPtr &, const StorageMetadataPtr & m if (!isSubjectInSubscriptions(subject)) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Selected subject is not among engine subjects"); - return std::make_shared(*this, metadata_snapshot, local_context, createWriteBuffer(subject)); -} + auto producer = std::make_unique(configuration, subject, shutdown_called, log); + size_t max_rows = max_rows_per_message; + /// Need for backward compatibility. + if (format_name == "Avro" && local_context->getSettingsRef().output_format_avro_rows_in_file.changed) + max_rows = local_context->getSettingsRef().output_format_avro_rows_in_file.value; + return std::make_shared( + metadata_snapshot->getSampleBlockNonMaterialized(), getFormatName(), max_rows, std::move(producer), getName(), modified_context);} void StorageNATS::startup() { + (void) is_attach; for (size_t i = 0; i < num_consumers; ++i) { try { - auto buffer = createReadBuffer(); - pushReadBuffer(std::move(buffer)); + auto consumer = createConsumer(); + pushConsumer(std::move(consumer)); ++num_created_consumers; } catch (...) @@ -425,14 +430,14 @@ void StorageNATS::shutdown() { if (drop_table) { - for (auto & buffer : buffers) - buffer->unsubscribe(); + for (auto & consumer : consumers) + consumer->unsubscribe(); } connection->disconnect(); for (size_t i = 0; i < num_created_consumers; ++i) - popReadBuffer(); + popConsumer(); } catch (...) { @@ -440,23 +445,23 @@ void StorageNATS::shutdown() } } -void StorageNATS::pushReadBuffer(ConsumerBufferPtr buffer) +void StorageNATS::pushConsumer(NATSConsumerPtr consumer) { - std::lock_guard lock(buffers_mutex); - buffers.push_back(buffer); + std::lock_guard lock(consumers_mutex); + consumers.push_back(consumer); semaphore.set(); } -ConsumerBufferPtr StorageNATS::popReadBuffer() +NATSConsumerPtr StorageNATS::popConsumer() { - return popReadBuffer(std::chrono::milliseconds::zero()); + return popConsumer(std::chrono::milliseconds::zero()); } -ConsumerBufferPtr StorageNATS::popReadBuffer(std::chrono::milliseconds timeout) +NATSConsumerPtr StorageNATS::popConsumer(std::chrono::milliseconds timeout) { - // Wait for the first free buffer + // Wait for the first free consumer if (timeout == std::chrono::milliseconds::zero()) semaphore.wait(); else @@ -465,29 +470,21 @@ ConsumerBufferPtr StorageNATS::popReadBuffer(std::chrono::milliseconds timeout) return nullptr; } - // Take the first available buffer from the list - std::lock_guard lock(buffers_mutex); - auto buffer = buffers.back(); - buffers.pop_back(); + // Take the first available consumer from the list + std::lock_guard lock(consumers_mutex); + auto consumer = consumers.back(); + consumers.pop_back(); - return buffer; + return consumer; } -ConsumerBufferPtr StorageNATS::createReadBuffer() +NATSConsumerPtr StorageNATS::createConsumer() { - return std::make_shared( + return std::make_shared( connection, *this, subjects, nats_settings->nats_queue_group.changed ? nats_settings->nats_queue_group.value : getStorageID().getFullTableName(), - log, row_delimiter, queue_size, shutdown_called); -} - - -ProducerBufferPtr StorageNATS::createWriteBuffer(const std::string & subject) -{ - return std::make_shared( - configuration, getContext(), subject, shutdown_called, log, - row_delimiter ? std::optional{row_delimiter} : std::nullopt, 1, 1024); + log, queue_size, shutdown_called); } bool StorageNATS::isSubjectInSubscriptions(const std::string & subject) @@ -643,7 +640,7 @@ bool StorageNATS::streamToViews() for (size_t i = 0; i < num_created_consumers; ++i) { - LOG_DEBUG(log, "Current queue size: {}", buffers[0]->queueSize()); + LOG_DEBUG(log, "Current queue size: {}", consumers[0]->queueSize()); auto source = std::make_shared(*this, storage_snapshot, nats_context, column_names, block_size); sources.emplace_back(source); pipes.emplace_back(source); diff --git a/src/Storages/NATS/StorageNATS.h b/src/Storages/NATS/StorageNATS.h index a5a050d566f..518d81fb145 100644 --- a/src/Storages/NATS/StorageNATS.h +++ b/src/Storages/NATS/StorageNATS.h @@ -5,7 +5,6 @@ #include #include #include -#include #include #include #include @@ -14,6 +13,9 @@ namespace DB { +class NATSConsumer; +using NATSConsumerPtr = std::shared_ptr; + class StorageNATS final : public IStorage, WithContext { public: @@ -51,9 +53,12 @@ public: SinkToStoragePtr write(const ASTPtr & query, const StorageMetadataPtr & metadata_snapshot, ContextPtr context) override; - void pushReadBuffer(ConsumerBufferPtr buf); - ConsumerBufferPtr popReadBuffer(); - ConsumerBufferPtr popReadBuffer(std::chrono::milliseconds timeout); + /// We want to control the number of rows in a chunk inserted into NATS + bool prefersLargeBlocks() const override { return false; } + + void pushConsumer(NATSConsumerPtr consumer); + NATSConsumerPtr popConsumer(); + NATSConsumerPtr popConsumer(std::chrono::milliseconds timeout); const String & getFormatName() const { return format_name; } NamesAndTypesList getVirtuals() const override; @@ -69,9 +74,9 @@ private: std::vector subjects; const String format_name; - char row_delimiter; const String schema_name; size_t num_consumers; + size_t max_rows_per_message; Poco::Logger * log; @@ -80,11 +85,11 @@ private: size_t num_created_consumers = 0; Poco::Semaphore semaphore; - std::mutex buffers_mutex; - std::vector buffers; /// available buffers for NATS consumers + std::mutex consumers_mutex; + std::vector consumers; /// available NATS consumers /// maximum number of messages in NATS queue (x-max-length). Also used - /// to setup size of inner buffer for received messages + /// to setup size of inner consumer for received messages uint32_t queue_size; std::once_flag flag; /// remove exchange only once @@ -114,8 +119,7 @@ private: mutable bool drop_table = false; bool is_attach; - ConsumerBufferPtr createReadBuffer(); - ProducerBufferPtr createWriteBuffer(const std::string & subject); + NATSConsumerPtr createConsumer(); bool isSubjectInSubscriptions(const std::string & subject); diff --git a/src/Storages/NATS/WriteBufferToNATSProducer.cpp b/src/Storages/NATS/WriteBufferToNATSProducer.cpp deleted file mode 100644 index af76247d903..00000000000 --- a/src/Storages/NATS/WriteBufferToNATSProducer.cpp +++ /dev/null @@ -1,183 +0,0 @@ -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include - - -namespace DB -{ - -static const auto BATCH = 1000; -static const auto MAX_BUFFERED = 131072; - -namespace ErrorCodes -{ - extern const int CANNOT_CONNECT_NATS; - extern const int LOGICAL_ERROR; -} - -WriteBufferToNATSProducer::WriteBufferToNATSProducer( - const NATSConfiguration & configuration_, - ContextPtr global_context, - const String & subject_, - std::atomic & shutdown_called_, - Poco::Logger * log_, - std::optional delimiter, - size_t rows_per_message, - size_t chunk_size_) - : WriteBuffer(nullptr, 0) - , connection(configuration_, log_) - , subject(subject_) - , shutdown_called(shutdown_called_) - , payloads(BATCH) - , log(log_) - , delim(delimiter) - , max_rows(rows_per_message) - , chunk_size(chunk_size_) -{ - if (!connection.connect()) - throw Exception(ErrorCodes::CANNOT_CONNECT_NATS, "Cannot connect to NATS {}", connection.connectionInfoForLog()); - - writing_task = global_context->getSchedulePool().createTask("NATSWritingTask", [this] { writingFunc(); }); - writing_task->deactivate(); - - reinitializeChunks(); -} - - -WriteBufferToNATSProducer::~WriteBufferToNATSProducer() -{ - writing_task->deactivate(); - assert(rows == 0); -} - - -void WriteBufferToNATSProducer::countRow() -{ - if (++rows % max_rows == 0) - { - const std::string & last_chunk = chunks.back(); - size_t last_chunk_size = offset(); - - if (last_chunk_size && delim && last_chunk[last_chunk_size - 1] == delim) - --last_chunk_size; - - std::string payload; - payload.reserve((chunks.size() - 1) * chunk_size + last_chunk_size); - - for (auto i = chunks.begin(), end = --chunks.end(); i != end; ++i) - payload.append(*i); - - payload.append(last_chunk, 0, last_chunk_size); - - reinitializeChunks(); - - if (!payloads.push(payload)) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Could not push to payloads queue"); - } -} - -void WriteBufferToNATSProducer::publish() -{ - uv_thread_t flush_thread; - - uv_thread_create(&flush_thread, publishThreadFunc, static_cast(this)); - - connection.getHandler().startLoop(); - uv_thread_join(&flush_thread); -} - -void WriteBufferToNATSProducer::publishThreadFunc(void * arg) -{ - WriteBufferToNATSProducer * buffer = static_cast(arg); - String payload; - - natsStatus status; - while (!buffer->payloads.empty()) - { - if (natsConnection_Buffered(buffer->connection.getConnection()) > MAX_BUFFERED) - break; - bool pop_result = buffer->payloads.pop(payload); - - if (!pop_result) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Could not pop payload"); - status = natsConnection_PublishString(buffer->connection.getConnection(), buffer->subject.c_str(), payload.c_str()); - - if (status != NATS_OK) - { - LOG_DEBUG(buffer->log, "Something went wrong during publishing to NATS subject. Nats status text: {}. Last error message: {}", - natsStatus_GetText(status), nats_GetLastError(nullptr)); - if (!buffer->payloads.push(std::move(payload))) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Could not push to payloads queue"); - break; - } - } - - nats_ReleaseThreadMemory(); -} - - -void WriteBufferToNATSProducer::writingFunc() -{ - try - { - while ((!payloads.empty() || wait_all) && !shutdown_called.load()) - { - publish(); - - LOG_DEBUG( - log, "Writing func {} {} {}", wait_payloads.load(), payloads.empty(), natsConnection_Buffered(connection.getConnection())); - if (wait_payloads.load() && payloads.empty() && natsConnection_Buffered(connection.getConnection()) == 0) - wait_all = false; - - if (!connection.isConnected() && wait_all) - connection.reconnect(); - - iterateEventLoop(); - } - } - catch (...) - { - tryLogCurrentException(log); - } - - LOG_DEBUG(log, "Producer on subject {} completed", subject); -} - - -void WriteBufferToNATSProducer::nextImpl() -{ - addChunk(); -} - -void WriteBufferToNATSProducer::addChunk() -{ - chunks.push_back(std::string()); - chunks.back().resize(chunk_size); - set(chunks.back().data(), chunk_size); -} - -void WriteBufferToNATSProducer::reinitializeChunks() -{ - rows = 0; - chunks.clear(); - /// We cannot leave the buffer in the undefined state (i.e. without any - /// underlying buffer), since in this case the WriteBuffeR::next() will - /// not call our nextImpl() (due to available() == 0) - addChunk(); -} - - -void WriteBufferToNATSProducer::iterateEventLoop() -{ - connection.getHandler().iterateLoop(); -} - -} diff --git a/src/Storages/NATS/WriteBufferToNATSProducer.h b/src/Storages/NATS/WriteBufferToNATSProducer.h deleted file mode 100644 index 484d80598db..00000000000 --- a/src/Storages/NATS/WriteBufferToNATSProducer.h +++ /dev/null @@ -1,81 +0,0 @@ -#pragma once - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -namespace DB -{ - -class WriteBufferToNATSProducer : public WriteBuffer -{ -public: - WriteBufferToNATSProducer( - const NATSConfiguration & configuration_, - ContextPtr global_context, - const String & subject_, - std::atomic & shutdown_called_, - Poco::Logger * log_, - std::optional delimiter, - size_t rows_per_message, - size_t chunk_size_); - - ~WriteBufferToNATSProducer() override; - - void countRow(); - void activateWriting() { writing_task->activateAndSchedule(); } - void updateMaxWait() { wait_payloads.store(true); } - -private: - void nextImpl() override; - void addChunk(); - void reinitializeChunks(); - - void iterateEventLoop(); - void writingFunc(); - void publish(); - - static void publishThreadFunc(void * arg); - - NATSConnectionManager connection; - const String subject; - - /* false: when shutdown is called - * true: in all other cases - */ - std::atomic & shutdown_called; - - BackgroundSchedulePool::TaskHolder writing_task; - - /* payloads.queue: - * - payloads are pushed to queue in countRow and popped by another thread in writingFunc, each payload gets into queue only once - */ - ConcurrentBoundedQueue payloads; - - /* false: message delivery successfully ended: publisher received confirm from server that all published - * 1) persistent messages were written to disk - * 2) non-persistent messages reached the queue - * true: continue to process deliveries and returned messages - */ - bool wait_all = true; - - /* false: until writeSuffix is called - * true: means payloads.queue will not grow anymore - */ - std::atomic wait_payloads = false; - - Poco::Logger * log; - const std::optional delim; - const size_t max_rows; - const size_t chunk_size; - size_t rows = 0; - std::list chunks; -}; - -} diff --git a/src/Storages/NamedCollections/NamedCollectionsHelpers.cpp b/src/Storages/NamedCollections/NamedCollectionsHelpers.cpp index cceabdfd7bf..9a7e5fef7d6 100644 --- a/src/Storages/NamedCollections/NamedCollectionsHelpers.cpp +++ b/src/Storages/NamedCollections/NamedCollectionsHelpers.cpp @@ -25,7 +25,7 @@ namespace return nullptr; const auto & collection_name = identifier->name(); - return NamedCollectionFactory::instance().tryGet(collection_name); + return NamedCollectionFactory::instance().get(collection_name); } std::optional> getKeyValueFromAST(ASTPtr ast) diff --git a/src/Storages/PartitionedSink.cpp b/src/Storages/PartitionedSink.cpp index 027e4f1f306..363b4557290 100644 --- a/src/Storages/PartitionedSink.cpp +++ b/src/Storages/PartitionedSink.cpp @@ -33,7 +33,7 @@ PartitionedSink::PartitionedSink( , context(context_) , sample_block(sample_block_) { - std::vector arguments(1, partition_by); + ASTs arguments(1, partition_by); ASTPtr partition_by_string = makeASTFunction(FunctionToString::name, std::move(arguments)); auto syntax_result = TreeRewriter(context).analyze(partition_by_string, sample_block.getNamesAndTypesList()); diff --git a/src/Storages/RabbitMQ/Buffer_fwd.h b/src/Storages/RabbitMQ/Buffer_fwd.h deleted file mode 100644 index 5be2c6fdf6a..00000000000 --- a/src/Storages/RabbitMQ/Buffer_fwd.h +++ /dev/null @@ -1,14 +0,0 @@ -#pragma once - -#include - -namespace DB -{ - -class ReadBufferFromRabbitMQConsumer; -using ConsumerBufferPtr = std::shared_ptr; - -class WriteBufferToRabbitMQProducer; -using ProducerBufferPtr = std::shared_ptr; - -} diff --git a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp b/src/Storages/RabbitMQ/RabbitMQConsumer.cpp similarity index 78% rename from src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp rename to src/Storages/RabbitMQ/RabbitMQConsumer.cpp index 3543085f5a0..9b66b9b1d7c 100644 --- a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp +++ b/src/Storages/RabbitMQ/RabbitMQConsumer.cpp @@ -1,12 +1,11 @@ #include #include #include -#include #include #include -#include +#include #include -#include +#include #include #include "Poco/Timer.h" #include @@ -19,41 +18,32 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } -ReadBufferFromRabbitMQConsumer::ReadBufferFromRabbitMQConsumer( +RabbitMQConsumer::RabbitMQConsumer( RabbitMQHandler & event_handler_, std::vector & queues_, size_t channel_id_base_, const String & channel_base_, Poco::Logger * log_, - char row_delimiter_, uint32_t queue_size_, const std::atomic & stopped_) - : ReadBuffer(nullptr, 0) - , event_handler(event_handler_) + : event_handler(event_handler_) , queues(queues_) , channel_base(channel_base_) , channel_id_base(channel_id_base_) , log(log_) - , row_delimiter(row_delimiter_) , stopped(stopped_) , received(queue_size_) { } -ReadBufferFromRabbitMQConsumer::~ReadBufferFromRabbitMQConsumer() -{ - BufferBase::set(nullptr, 0, 0); -} - - -void ReadBufferFromRabbitMQConsumer::closeChannel() +void RabbitMQConsumer::closeChannel() { if (consumer_channel) consumer_channel->close(); } -void ReadBufferFromRabbitMQConsumer::subscribe() +void RabbitMQConsumer::subscribe() { for (const auto & queue_name : queues) { @@ -70,8 +60,6 @@ void ReadBufferFromRabbitMQConsumer::subscribe() if (message.bodySize()) { String message_received = std::string(message.body(), message.body() + message.bodySize()); - if (row_delimiter != '\0') - message_received += row_delimiter; if (!received.push({message_received, message.hasMessageID() ? message.messageID() : "", message.hasTimestamp() ? message.timestamp() : 0, @@ -91,7 +79,7 @@ void ReadBufferFromRabbitMQConsumer::subscribe() } -bool ReadBufferFromRabbitMQConsumer::ackMessages() +bool RabbitMQConsumer::ackMessages() { AckTracker record_info = last_inserted_record_info; @@ -116,7 +104,7 @@ bool ReadBufferFromRabbitMQConsumer::ackMessages() } -void ReadBufferFromRabbitMQConsumer::updateAckTracker(AckTracker record_info) +void RabbitMQConsumer::updateAckTracker(AckTracker record_info) { if (record_info.delivery_tag && channel_error.load()) return; @@ -128,7 +116,7 @@ void ReadBufferFromRabbitMQConsumer::updateAckTracker(AckTracker record_info) } -void ReadBufferFromRabbitMQConsumer::setupChannel() +void RabbitMQConsumer::setupChannel() { if (!consumer_channel) return; @@ -159,7 +147,7 @@ void ReadBufferFromRabbitMQConsumer::setupChannel() } -bool ReadBufferFromRabbitMQConsumer::needChannelUpdate() +bool RabbitMQConsumer::needChannelUpdate() { if (wait_subscription) return false; @@ -168,27 +156,17 @@ bool ReadBufferFromRabbitMQConsumer::needChannelUpdate() } -void ReadBufferFromRabbitMQConsumer::iterateEventLoop() +void RabbitMQConsumer::iterateEventLoop() { event_handler.iterateLoop(); } - -bool ReadBufferFromRabbitMQConsumer::nextImpl() +ReadBufferPtr RabbitMQConsumer::consume() { - if (stopped || !allowed) - return false; + if (stopped || !received.tryPop(current)) + return nullptr; - if (received.tryPop(current)) - { - auto * new_position = const_cast(current.message.data()); - BufferBase::set(new_position, current.message.size(), 0); - allowed = false; - - return true; - } - - return false; + return std::make_shared(current.message.data(), current.message.size()); } } diff --git a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.h b/src/Storages/RabbitMQ/RabbitMQConsumer.h similarity index 88% rename from src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.h rename to src/Storages/RabbitMQ/RabbitMQConsumer.h index bd55d169744..ca3daa5e090 100644 --- a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.h +++ b/src/Storages/RabbitMQ/RabbitMQConsumer.h @@ -21,22 +21,19 @@ namespace DB class RabbitMQHandler; using ChannelPtr = std::unique_ptr; -class ReadBufferFromRabbitMQConsumer : public ReadBuffer +class RabbitMQConsumer { public: - ReadBufferFromRabbitMQConsumer( + RabbitMQConsumer( RabbitMQHandler & event_handler_, std::vector & queues_, size_t channel_id_base_, const String & channel_base_, Poco::Logger * log_, - char row_delimiter_, uint32_t queue_size_, const std::atomic & stopped_); - ~ReadBufferFromRabbitMQConsumer() override; - struct AckTracker { UInt64 delivery_tag; @@ -55,6 +52,10 @@ public: AckTracker track{}; }; + /// Return read buffer containing next available message + /// or nullptr if there are no messages to process. + ReadBufferPtr consume(); + ChannelPtr & getChannel() { return consumer_channel; } void setupChannel(); bool needChannelUpdate(); @@ -68,7 +69,6 @@ public: void updateAckTracker(AckTracker record = AckTracker()); bool queueEmpty() { return received.empty(); } - void allowNext() { allowed = true; } // Allow to read next message. auto getChannelID() const { return current.track.channel_id; } auto getDeliveryTag() const { return current.track.delivery_tag; } @@ -77,8 +77,6 @@ public: auto getTimestamp() const { return current.timestamp; } private: - bool nextImpl() override; - void subscribe(); void iterateEventLoop(); @@ -88,8 +86,6 @@ private: const String channel_base; const size_t channel_id_base; Poco::Logger * log; - char row_delimiter; - bool allowed = true; const std::atomic & stopped; String channel_id; diff --git a/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp b/src/Storages/RabbitMQ/RabbitMQProducer.cpp similarity index 67% rename from src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp rename to src/Storages/RabbitMQ/RabbitMQProducer.cpp index 367f8579a17..b1852876ea9 100644 --- a/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp +++ b/src/Storages/RabbitMQ/RabbitMQProducer.cpp @@ -1,12 +1,9 @@ -#include +#include #include #include -#include -#include #include #include -#include #include #include #include @@ -25,42 +22,35 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } -WriteBufferToRabbitMQProducer::WriteBufferToRabbitMQProducer( - const RabbitMQConfiguration & configuration_, - ContextPtr global_context, - const Names & routing_keys_, - const String & exchange_name_, - const AMQP::ExchangeType exchange_type_, - const size_t channel_id_base_, - const bool persistent_, - std::atomic & shutdown_called_, - Poco::Logger * log_, - std::optional delimiter, - size_t rows_per_message, - size_t chunk_size_) - : WriteBuffer(nullptr, 0) - , connection(configuration_, log_) - , routing_keys(routing_keys_) - , exchange_name(exchange_name_) - , exchange_type(exchange_type_) - , channel_id_base(std::to_string(channel_id_base_)) - , persistent(persistent_) - , shutdown_called(shutdown_called_) - , payloads(BATCH) - , returned(RETURNED_LIMIT) - , log(log_) - , delim(delimiter) - , max_rows(rows_per_message) - , chunk_size(chunk_size_) +RabbitMQProducer::RabbitMQProducer( + const RabbitMQConfiguration & configuration_, + const Names & routing_keys_, + const String & exchange_name_, + const AMQP::ExchangeType exchange_type_, + const size_t channel_id_base_, + const bool persistent_, + std::atomic & shutdown_called_, + Poco::Logger * log_) + : connection(configuration_, log_) + , routing_keys(routing_keys_) + , exchange_name(exchange_name_) + , exchange_type(exchange_type_) + , channel_id_base(std::to_string(channel_id_base_)) + , persistent(persistent_) + , shutdown_called(shutdown_called_) + , payloads(BATCH) + , returned(RETURNED_LIMIT) + , log(log_) +{ +} + +void RabbitMQProducer::initialize() { if (connection.connect()) setupChannel(); else throw Exception(ErrorCodes::CANNOT_CONNECT_RABBITMQ, "Cannot connect to RabbitMQ {}", connection.connectionInfoForLog()); - writing_task = global_context->getSchedulePool().createTask("RabbitMQWritingTask", [this]{ writingFunc(); }); - writing_task->deactivate(); - if (exchange_type == AMQP::ExchangeType::headers) { for (const auto & header : routing_keys) @@ -70,47 +60,30 @@ WriteBufferToRabbitMQProducer::WriteBufferToRabbitMQProducer( key_arguments[matching[0]] = matching[1]; } } - - reinitializeChunks(); } - -WriteBufferToRabbitMQProducer::~WriteBufferToRabbitMQProducer() +void RabbitMQProducer::stopProducingTask() +{ + payloads.finish(); +} + +void RabbitMQProducer::finishImpl() { - writing_task->deactivate(); connection.disconnect(); - assert(rows == 0); } - -void WriteBufferToRabbitMQProducer::countRow() +void RabbitMQProducer::produce(const String & message, size_t, const Columns &, size_t) { - if (++rows % max_rows == 0) - { - const std::string & last_chunk = chunks.back(); - size_t last_chunk_size = offset(); + LOG_DEBUG(&Poco::Logger::get("RabbitMQProducer"), "push {}", message); - if (last_chunk_size && delim && last_chunk[last_chunk_size - 1] == delim) - --last_chunk_size; - - std::string payload; - payload.reserve((chunks.size() - 1) * chunk_size + last_chunk_size); - - for (auto i = chunks.begin(), end = --chunks.end(); i != end; ++i) - payload.append(*i); - - payload.append(last_chunk, 0, last_chunk_size); - - reinitializeChunks(); - - ++payload_counter; - if (!payloads.push(std::make_pair(payload_counter, payload))) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Could not push to payloads queue"); - } + Payload payload; + payload.message = message; + payload.id = ++payload_counter; + if (!payloads.push(std::move(payload))) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Could not push to payloads queue"); } - -void WriteBufferToRabbitMQProducer::setupChannel() +void RabbitMQProducer::setupChannel() { producer_channel = connection.createChannel(); @@ -161,8 +134,7 @@ void WriteBufferToRabbitMQProducer::setupChannel() }); } - -void WriteBufferToRabbitMQProducer::removeRecord(UInt64 received_delivery_tag, bool multiple, bool republish) +void RabbitMQProducer::removeRecord(UInt64 received_delivery_tag, bool multiple, bool republish) { auto record_iter = delivery_record.find(received_delivery_tag); assert(record_iter != delivery_record.end()); @@ -190,10 +162,9 @@ void WriteBufferToRabbitMQProducer::removeRecord(UInt64 received_delivery_tag, b } } - -void WriteBufferToRabbitMQProducer::publish(ConcurrentBoundedQueue> & messages, bool republishing) +void RabbitMQProducer::publish(Payloads & messages, bool republishing) { - std::pair payload; + Payload payload; /* It is important to make sure that delivery_record.size() is never bigger than returned.size(), i.e. number if unacknowledged * messages cannot exceed returned.size(), because they all might end up there @@ -203,9 +174,9 @@ void WriteBufferToRabbitMQProducer::publish(ConcurrentBoundedQueueusable() will anyway return true, /// but must publish only after onReady callback. @@ -273,9 +243,7 @@ void WriteBufferToRabbitMQProducer::writingFunc() iterateEventLoop(); - if (wait_num.load() && delivery_record.empty() && payloads.empty() && returned.empty()) - wait_all = false; - else if (!producer_channel->usable()) + if (!producer_channel->usable()) { if (connection.reconnect()) setupChannel(); @@ -286,30 +254,7 @@ void WriteBufferToRabbitMQProducer::writingFunc() } -void WriteBufferToRabbitMQProducer::nextImpl() -{ - addChunk(); -} - -void WriteBufferToRabbitMQProducer::addChunk() -{ - chunks.push_back(std::string()); - chunks.back().resize(chunk_size); - set(chunks.back().data(), chunk_size); -} - -void WriteBufferToRabbitMQProducer::reinitializeChunks() -{ - rows = 0; - chunks.clear(); - /// We cannot leave the buffer in the undefined state (i.e. without any - /// underlying buffer), since in this case the WriteBuffeR::next() will - /// not call our nextImpl() (due to available() == 0) - addChunk(); -} - - -void WriteBufferToRabbitMQProducer::iterateEventLoop() +void RabbitMQProducer::iterateEventLoop() { connection.getHandler().iterateLoop(); } diff --git a/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.h b/src/Storages/RabbitMQ/RabbitMQProducer.h similarity index 64% rename from src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.h rename to src/Storages/RabbitMQ/RabbitMQProducer.h index ee6720ece13..fc56eedf1f9 100644 --- a/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.h +++ b/src/Storages/RabbitMQ/RabbitMQProducer.h @@ -1,53 +1,53 @@ #pragma once -#include #include #include #include #include #include #include +#include #include -#include #include namespace DB { -class WriteBufferToRabbitMQProducer : public WriteBuffer +class RabbitMQProducer : public AsynchronousMessageProducer { public: - WriteBufferToRabbitMQProducer( - const RabbitMQConfiguration & configuration_, - ContextPtr global_context, - const Names & routing_keys_, - const String & exchange_name_, - const AMQP::ExchangeType exchange_type_, - const size_t channel_id_base_, - const bool persistent_, - std::atomic & shutdown_called_, - Poco::Logger * log_, - std::optional delimiter, - size_t rows_per_message, - size_t chunk_size_ - ); + RabbitMQProducer( + const RabbitMQConfiguration & configuration_, + const Names & routing_keys_, + const String & exchange_name_, + const AMQP::ExchangeType exchange_type_, + const size_t channel_id_base_, + const bool persistent_, + std::atomic & shutdown_called_, + Poco::Logger * log_); - ~WriteBufferToRabbitMQProducer() override; - - void countRow(); - void activateWriting() { writing_task->activateAndSchedule(); } - void updateMaxWait() { wait_num.store(payload_counter); } + void produce(const String & message, size_t rows_in_message, const Columns & columns, size_t last_row) override; private: - void nextImpl() override; - void addChunk(); - void reinitializeChunks(); + String getProducingTaskName() const override { return "RabbitMQProducingTask"; } + + struct Payload + { + String message; + UInt64 id; + }; + + using Payloads = ConcurrentBoundedQueue; + + void initialize() override; + void stopProducingTask() override; + void finishImpl() override; void iterateEventLoop(); - void writingFunc(); + void startProducingTaskLoop() override; void setupChannel(); void removeRecord(UInt64 received_delivery_tag, bool multiple, bool republish); - void publish(ConcurrentBoundedQueue> & message, bool republishing); + void publish(Payloads & messages, bool republishing); RabbitMQConnection connection; @@ -63,7 +63,6 @@ private: std::atomic & shutdown_called; AMQP::Table key_arguments; - BackgroundSchedulePool::TaskHolder writing_task; std::unique_ptr producer_channel; bool producer_ready = false; @@ -84,7 +83,7 @@ private: * to disk or it was unable to reach the queue. * - payloads are popped from the queue once republished */ - ConcurrentBoundedQueue> payloads, returned; + Payloads payloads, returned; /* Counter of current delivery on a current channel. Delivery tags are scoped per channel. The server attaches a delivery tag for each * published message - a serial number of delivery on current channel. Delivery tag is a way of server to notify publisher if it was @@ -97,25 +96,15 @@ private: * 2) non-persistent messages reached the queue * true: continue to process deliveries and returned messages */ - bool wait_all = true; - - /* false: until writeSuffix is called - * true: means payloads.queue will not grow anymore - */ - std::atomic wait_num = 0; +// bool wait_all = true; /// Needed to fill messageID property UInt64 payload_counter = 0; /// Record of pending acknowledgements from the server; its size never exceeds size of returned.queue - std::map> delivery_record; + std::map delivery_record; Poco::Logger * log; - const std::optional delim; - const size_t max_rows; - const size_t chunk_size; - size_t rows = 0; - std::list chunks; }; } diff --git a/src/Storages/RabbitMQ/RabbitMQSettings.h b/src/Storages/RabbitMQ/RabbitMQSettings.h index 38baeadad2a..739230d25f5 100644 --- a/src/Storages/RabbitMQ/RabbitMQSettings.h +++ b/src/Storages/RabbitMQ/RabbitMQSettings.h @@ -31,6 +31,7 @@ namespace DB M(String, rabbitmq_username, "", "RabbitMQ username", 0) \ M(String, rabbitmq_password, "", "RabbitMQ password", 0) \ M(Bool, rabbitmq_commit_on_select, false, "Commit messages when select query is made", 0) \ + M(UInt64, rabbitmq_max_rows_per_message, 1, "The maximum number of rows produced in one message for row-based formats.", 0) \ #define LIST_OF_RABBITMQ_SETTINGS(M) \ RABBITMQ_RELATED_SETTINGS(M) \ diff --git a/src/Storages/RabbitMQ/RabbitMQSink.cpp b/src/Storages/RabbitMQ/RabbitMQSink.cpp deleted file mode 100644 index 5254966af4c..00000000000 --- a/src/Storages/RabbitMQ/RabbitMQSink.cpp +++ /dev/null @@ -1,56 +0,0 @@ -#include -#include -#include -#include -#include -#include - - -namespace DB -{ - -RabbitMQSink::RabbitMQSink( - StorageRabbitMQ & storage_, - const StorageMetadataPtr & metadata_snapshot_, - ContextPtr context_) - : SinkToStorage(metadata_snapshot_->getSampleBlockNonMaterialized()) - , storage(storage_) - , metadata_snapshot(metadata_snapshot_) - , context(context_) -{ - storage.unbindExchange(); -} - - -void RabbitMQSink::onStart() -{ - buffer = storage.createWriteBuffer(); - buffer->activateWriting(); - - auto format_settings = getFormatSettings(context); - format_settings.protobuf.allow_multiple_rows_without_delimiter = true; - - format = FormatFactory::instance().getOutputFormat(storage.getFormatName(), *buffer, getHeader(), context, - [this](const Columns & /* columns */, size_t /* rows */) - { - buffer->countRow(); - }, - format_settings); -} - - -void RabbitMQSink::consume(Chunk chunk) -{ - format->write(getHeader().cloneWithColumns(chunk.detachColumns())); -} - - -void RabbitMQSink::onFinish() -{ - format->finalize(); - - if (buffer) - buffer->updateMaxWait(); -} - -} diff --git a/src/Storages/RabbitMQ/RabbitMQSink.h b/src/Storages/RabbitMQ/RabbitMQSink.h deleted file mode 100644 index 02014a3f89e..00000000000 --- a/src/Storages/RabbitMQ/RabbitMQSink.h +++ /dev/null @@ -1,32 +0,0 @@ -#pragma once - -#include -#include - - -namespace DB -{ - -class IOutputFormat; -using IOutputFormatPtr = std::shared_ptr; - -class RabbitMQSink : public SinkToStorage -{ - -public: - explicit RabbitMQSink(StorageRabbitMQ & storage_, const StorageMetadataPtr & metadata_snapshot_, ContextPtr context_); - - void onStart() override; - void consume(Chunk chunk) override; - void onFinish() override; - - String getName() const override { return "RabbitMQSink"; } - -private: - StorageRabbitMQ & storage; - StorageMetadataPtr metadata_snapshot; - ContextPtr context; - ProducerBufferPtr buffer; - IOutputFormatPtr format; -}; -} diff --git a/src/Storages/RabbitMQ/RabbitMQSource.cpp b/src/Storages/RabbitMQ/RabbitMQSource.cpp index f6e5bb84037..98c8bfa9189 100644 --- a/src/Storages/RabbitMQ/RabbitMQSource.cpp +++ b/src/Storages/RabbitMQ/RabbitMQSource.cpp @@ -3,7 +3,8 @@ #include #include #include -#include +#include +#include namespace DB { @@ -70,31 +71,31 @@ RabbitMQSource::~RabbitMQSource() { storage.decrementReader(); - if (!buffer) + if (!consumer) return; - storage.pushReadBuffer(buffer); + storage.pushConsumer(consumer); } bool RabbitMQSource::needChannelUpdate() { - if (!buffer) + if (!consumer) return false; - return buffer->needChannelUpdate(); + return consumer->needChannelUpdate(); } void RabbitMQSource::updateChannel() { - if (!buffer) + if (!consumer) return; - buffer->updateAckTracker(); + consumer->updateAckTracker(); - if (storage.updateChannel(buffer->getChannel())) - buffer->setupChannel(); + if (storage.updateChannel(consumer->getChannel())) + consumer->setupChannel(); } Chunk RabbitMQSource::generate() @@ -121,20 +122,21 @@ bool RabbitMQSource::checkTimeLimit() const Chunk RabbitMQSource::generateImpl() { - if (!buffer) + if (!consumer) { auto timeout = std::chrono::milliseconds(context->getSettingsRef().rabbitmq_max_wait_ms.totalMilliseconds()); - buffer = storage.popReadBuffer(timeout); + consumer = storage.popConsumer(timeout); } - if (!buffer || is_finished) + if (!consumer || is_finished) return {}; is_finished = true; MutableColumns virtual_columns = virtual_header.cloneEmptyColumns(); + EmptyReadBuffer empty_buf; auto input_format = FormatFactory::instance().getInputFormat( - storage.getFormatName(), *buffer, non_virtual_header, context, max_block_size); + storage.getFormatName(), empty_buf, non_virtual_header, context, max_block_size); StreamingFormatExecutor executor(non_virtual_header, input_format); @@ -142,21 +144,23 @@ Chunk RabbitMQSource::generateImpl() while (true) { - if (buffer->queueEmpty()) + if (consumer->queueEmpty()) break; - auto new_rows = executor.execute(); + size_t new_rows = 0; + if (auto buf = consumer->consume()) + new_rows = executor.execute(*buf); if (new_rows) { auto exchange_name = storage.getExchange(); - auto channel_id = buffer->getChannelID(); - auto delivery_tag = buffer->getDeliveryTag(); - auto redelivered = buffer->getRedelivered(); - auto message_id = buffer->getMessageID(); - auto timestamp = buffer->getTimestamp(); + auto channel_id = consumer->getChannelID(); + auto delivery_tag = consumer->getDeliveryTag(); + auto redelivered = consumer->getRedelivered(); + auto message_id = consumer->getMessageID(); + auto timestamp = consumer->getTimestamp(); - buffer->updateAckTracker({delivery_tag, channel_id}); + consumer->updateAckTracker({delivery_tag, channel_id}); for (size_t i = 0; i < new_rows; ++i) { @@ -171,9 +175,7 @@ Chunk RabbitMQSource::generateImpl() total_rows = total_rows + new_rows; } - buffer->allowNext(); - - if (total_rows >= max_block_size || buffer->queueEmpty() || buffer->isConsumerStopped() || !checkTimeLimit()) + if (total_rows >= max_block_size || consumer->queueEmpty() || consumer->isConsumerStopped() || !checkTimeLimit()) break; } @@ -190,10 +192,10 @@ Chunk RabbitMQSource::generateImpl() bool RabbitMQSource::sendAck() { - if (!buffer) + if (!consumer) return false; - if (!buffer->ackMessages()) + if (!consumer->ackMessages()) return false; return true; diff --git a/src/Storages/RabbitMQ/RabbitMQSource.h b/src/Storages/RabbitMQ/RabbitMQSource.h index bd2882d1938..6d948edfb59 100644 --- a/src/Storages/RabbitMQ/RabbitMQSource.h +++ b/src/Storages/RabbitMQ/RabbitMQSource.h @@ -2,7 +2,7 @@ #include #include -#include +#include namespace DB @@ -23,11 +23,11 @@ public: ~RabbitMQSource() override; String getName() const override { return storage.getName(); } - ConsumerBufferPtr getBuffer() { return buffer; } + RabbitMQConsumerPtr getBuffer() { return consumer; } Chunk generate() override; - bool queueEmpty() const { return !buffer || buffer->queueEmpty(); } + bool queueEmpty() const { return !consumer || consumer->queueEmpty(); } bool needChannelUpdate(); void updateChannel(); bool sendAck(); @@ -47,7 +47,7 @@ private: const Block non_virtual_header; const Block virtual_header; - ConsumerBufferPtr buffer; + RabbitMQConsumerPtr consumer; Poco::Timespan max_execution_time = 0; Stopwatch total_stopwatch {CLOCK_MONOTONIC_COARSE}; diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp index bce3fee71f7..2a4549e79ca 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp @@ -13,11 +13,11 @@ #include #include #include +#include #include -#include #include #include -#include +#include #include #include #include @@ -77,12 +77,12 @@ StorageRabbitMQ::StorageRabbitMQ( , format_name(getContext()->getMacros()->expand(rabbitmq_settings->rabbitmq_format)) , exchange_type(defineExchangeType(getContext()->getMacros()->expand(rabbitmq_settings->rabbitmq_exchange_type))) , routing_keys(parseSettings(getContext()->getMacros()->expand(rabbitmq_settings->rabbitmq_routing_key_list))) - , row_delimiter(rabbitmq_settings->rabbitmq_row_delimiter.value) , schema_name(getContext()->getMacros()->expand(rabbitmq_settings->rabbitmq_schema)) , num_consumers(rabbitmq_settings->rabbitmq_num_consumers.value) , num_queues(rabbitmq_settings->rabbitmq_num_queues.value) , queue_base(getContext()->getMacros()->expand(rabbitmq_settings->rabbitmq_queue_base)) , queue_settings_list(parseSettings(getContext()->getMacros()->expand(rabbitmq_settings->rabbitmq_queue_settings_list))) + , max_rows_per_message(rabbitmq_settings->rabbitmq_max_rows_per_message) , persistent(rabbitmq_settings->rabbitmq_persistent.value) , use_user_setup(rabbitmq_settings->rabbitmq_queue_consume.value) , hash_exchange(num_consumers > 1 || num_queues > 1) @@ -607,18 +607,18 @@ bool StorageRabbitMQ::updateChannel(ChannelPtr & channel) } -void StorageRabbitMQ::prepareChannelForBuffer(ConsumerBufferPtr buffer) +void StorageRabbitMQ::prepareChannelForConsumer(RabbitMQConsumerPtr consumer) { - if (!buffer) + if (!consumer) return; - if (buffer->queuesCount() != queues.size()) - buffer->updateQueues(queues); + if (consumer->queuesCount() != queues.size()) + consumer->updateQueues(queues); - buffer->updateAckTracker(); + consumer->updateAckTracker(); - if (updateChannel(buffer->getChannel())) - buffer->setupChannel(); + if (updateChannel(consumer->getChannel())) + consumer->setupChannel(); } @@ -749,7 +749,19 @@ void StorageRabbitMQ::read( SinkToStoragePtr StorageRabbitMQ::write(const ASTPtr &, const StorageMetadataPtr & metadata_snapshot, ContextPtr local_context) { - return std::make_shared(*this, metadata_snapshot, local_context); + auto producer = std::make_unique( + configuration, routing_keys, exchange_name, exchange_type, producer_id.fetch_add(1), persistent, shutdown_called, log); + size_t max_rows = max_rows_per_message; + /// Need for backward compatibility. + if (format_name == "Avro" && local_context->getSettingsRef().output_format_avro_rows_in_file.changed) + max_rows = local_context->getSettingsRef().output_format_avro_rows_in_file.value; + return std::make_shared( + metadata_snapshot->getSampleBlockNonMaterialized(), + getFormatName(), + max_rows, + std::move(producer), + getName(), + local_context); } @@ -780,8 +792,8 @@ void StorageRabbitMQ::startup() { try { - auto buffer = createReadBuffer(); - pushReadBuffer(std::move(buffer)); + auto consumer = createConsumer(); + pushConsumer(std::move(consumer)); ++num_created_consumers; } catch (...) @@ -813,18 +825,18 @@ void StorageRabbitMQ::shutdown() { if (drop_table) { - for (auto & buffer : buffers) - buffer->closeChannel(); + for (auto & consumer : consumers) + consumer->closeChannel(); cleanupRabbitMQ(); } - /// It is important to close connection here - before removing consumer buffers, because - /// it will finish and clean callbacks, which might use those buffers data. + /// It is important to close connection here - before removing consumers, because + /// it will finish and clean callbacks, which might use those consumers data. connection->disconnect(); for (size_t i = 0; i < num_created_consumers; ++i) - popReadBuffer(); + popConsumer(); } catch (...) { @@ -884,23 +896,23 @@ void StorageRabbitMQ::cleanupRabbitMQ() const } -void StorageRabbitMQ::pushReadBuffer(ConsumerBufferPtr buffer) +void StorageRabbitMQ::pushConsumer(RabbitMQConsumerPtr consumer) { - std::lock_guard lock(buffers_mutex); - buffers.push_back(buffer); + std::lock_guard lock(consumers_mutex); + consumers.push_back(consumer); semaphore.set(); } -ConsumerBufferPtr StorageRabbitMQ::popReadBuffer() +RabbitMQConsumerPtr StorageRabbitMQ::popConsumer() { - return popReadBuffer(std::chrono::milliseconds::zero()); + return popConsumer(std::chrono::milliseconds::zero()); } -ConsumerBufferPtr StorageRabbitMQ::popReadBuffer(std::chrono::milliseconds timeout) +RabbitMQConsumerPtr StorageRabbitMQ::popConsumer(std::chrono::milliseconds timeout) { - // Wait for the first free buffer + // Wait for the first free consumer if (timeout == std::chrono::milliseconds::zero()) semaphore.wait(); else @@ -909,32 +921,22 @@ ConsumerBufferPtr StorageRabbitMQ::popReadBuffer(std::chrono::milliseconds timeo return nullptr; } - // Take the first available buffer from the list - std::lock_guard lock(buffers_mutex); - auto buffer = buffers.back(); - buffers.pop_back(); + // Take the first available consumer from the list + std::lock_guard lock(consumers_mutex); + auto consumer = consumers.back(); + consumers.pop_back(); - return buffer; + return consumer; } -ConsumerBufferPtr StorageRabbitMQ::createReadBuffer() +RabbitMQConsumerPtr StorageRabbitMQ::createConsumer() { - return std::make_shared( + return std::make_shared( connection->getHandler(), queues, ++consumer_id, - unique_strbase, log, row_delimiter, queue_size, shutdown_called); + unique_strbase, log, queue_size, shutdown_called); } - -ProducerBufferPtr StorageRabbitMQ::createWriteBuffer() -{ - return std::make_shared( - configuration, getContext(), routing_keys, exchange_name, exchange_type, - producer_id.fetch_add(1), persistent, shutdown_called, log, - row_delimiter ? std::optional{row_delimiter} : std::nullopt, 1, 1024); -} - - bool StorageRabbitMQ::checkDependencies(const StorageID & table_id) { // Check if all dependencies are attached @@ -968,8 +970,8 @@ void StorageRabbitMQ::initializeBuffers() assert(rabbit_is_ready); if (!initialized) { - for (const auto & buffer : buffers) - prepareChannelForBuffer(buffer); + for (const auto & consumer : consumers) + prepareChannelForConsumer(consumer); initialized = true; } } @@ -1128,8 +1130,8 @@ bool StorageRabbitMQ::streamToViews() if (source->needChannelUpdate()) { - auto buffer = source->getBuffer(); - prepareChannelForBuffer(buffer); + auto consumer = source->getBuffer(); + prepareChannelForConsumer(consumer); } /* false is returned by the sendAck function in only two cases: diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.h b/src/Storages/RabbitMQ/StorageRabbitMQ.h index a1250f50829..a3f51e43baa 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.h +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.h @@ -5,7 +5,7 @@ #include #include #include -#include +#include #include #include #include @@ -17,6 +17,8 @@ namespace DB { +using RabbitMQConsumerPtr = std::shared_ptr; + class StorageRabbitMQ final: public IStorage, WithContext { public: @@ -57,11 +59,12 @@ public: const StorageMetadataPtr & metadata_snapshot, ContextPtr context) override; - void pushReadBuffer(ConsumerBufferPtr buf); - ConsumerBufferPtr popReadBuffer(); - ConsumerBufferPtr popReadBuffer(std::chrono::milliseconds timeout); + /// We want to control the number of rows in a chunk inserted into RabbitMQ + bool prefersLargeBlocks() const override { return false; } - ProducerBufferPtr createWriteBuffer(); + void pushConsumer(RabbitMQConsumerPtr consumer); + RabbitMQConsumerPtr popConsumer(); + RabbitMQConsumerPtr popConsumer(std::chrono::milliseconds timeout); const String & getFormatName() const { return format_name; } NamesAndTypesList getVirtuals() const override; @@ -71,7 +74,7 @@ public: bool updateChannel(ChannelPtr & channel); void updateQueues(std::vector & queues_) { queues_ = queues; } - void prepareChannelForBuffer(ConsumerBufferPtr buffer); + void prepareChannelForConsumer(RabbitMQConsumerPtr consumer); void incrementReader(); void decrementReader(); @@ -84,12 +87,12 @@ private: const String format_name; AMQP::ExchangeType exchange_type; Names routing_keys; - char row_delimiter; const String schema_name; size_t num_consumers; size_t num_queues; String queue_base; Names queue_settings_list; + size_t max_rows_per_message; /// For insert query. Mark messages as durable. const bool persistent; @@ -107,17 +110,17 @@ private: size_t num_created_consumers = 0; Poco::Semaphore semaphore; - std::mutex buffers_mutex; - std::vector buffers; /// available buffers for RabbitMQ consumers + std::mutex consumers_mutex; + std::vector consumers; /// available RabbitMQ consumers String unique_strbase; /// to make unique consumer channel id /// maximum number of messages in RabbitMQ queue (x-max-length). Also used - /// to setup size of inner buffer for received messages + /// to setup size of inner consumer for received messages uint32_t queue_size; String sharding_exchange, bridge_exchange, consumer_exchange; - size_t consumer_id = 0; /// counter for consumer buffer, needed for channel id + size_t consumer_id = 0; /// counter for consumer, needed for channel id std::vector queues; @@ -135,8 +138,8 @@ private: /// Needed for tell MV or producer background tasks /// that they must finish as soon as possible. std::atomic shutdown_called{false}; - /// Counter for producer buffers, needed for channel id. - /// Needed to generate unique producer buffer identifiers. + /// Counter for producers, needed for channel id. + /// Needed to generate unique producer identifiers. std::atomic producer_id = 1; /// Has connection background task completed successfully? /// It is started only once -- in constructor. @@ -160,7 +163,7 @@ private: mutable bool drop_table = false; bool is_attach; - ConsumerBufferPtr createReadBuffer(); + RabbitMQConsumerPtr createConsumer(); void initializeBuffers(); bool initialized = false; diff --git a/src/Storages/RenamingRestrictions.h b/src/Storages/RenamingRestrictions.h new file mode 100644 index 00000000000..1b53ed0358a --- /dev/null +++ b/src/Storages/RenamingRestrictions.h @@ -0,0 +1,13 @@ +#pragma once + +namespace DB +{ + +enum RenamingRestrictions +{ + ALLOW_ANY, + ALLOW_PRESERVING_UUID, + DO_NOT_ALLOW, +}; + +} diff --git a/src/Storages/RocksDB/StorageEmbeddedRocksDB.h b/src/Storages/RocksDB/StorageEmbeddedRocksDB.h index ca0ab7a1840..eec817acd55 100644 --- a/src/Storages/RocksDB/StorageEmbeddedRocksDB.h +++ b/src/Storages/RocksDB/StorageEmbeddedRocksDB.h @@ -80,6 +80,8 @@ public: const std::vector & keys, PaddedPODArray * out_null_map) const; + bool supportsDelete() const override { return true; } + private: const String primary_key; using RocksDBPtr = std::unique_ptr; diff --git a/src/Storages/SelectQueryInfo.h b/src/Storages/SelectQueryInfo.h index bad2539ef07..a8a8ae4e877 100644 --- a/src/Storages/SelectQueryInfo.h +++ b/src/Storages/SelectQueryInfo.h @@ -163,6 +163,9 @@ struct ProjectionCandidate SortDescription group_by_elements_order_descr; MergeTreeDataSelectAnalysisResultPtr merge_tree_projection_select_result_ptr; MergeTreeDataSelectAnalysisResultPtr merge_tree_normal_select_result_ptr; + + /// Because projection analysis uses a separate interpreter. + ContextPtr context; }; /** Query along with some additional data, @@ -171,7 +174,6 @@ struct ProjectionCandidate */ struct SelectQueryInfo { - SelectQueryInfo() : prepared_sets(std::make_shared()) {} diff --git a/src/Storages/StorageExecutable.cpp b/src/Storages/StorageExecutable.cpp index cd3cc4d48ac..9fabf1a9fb6 100644 --- a/src/Storages/StorageExecutable.cpp +++ b/src/Storages/StorageExecutable.cpp @@ -30,6 +30,7 @@ namespace DB namespace ErrorCodes { + extern const int BAD_ARGUMENTS; extern const int UNSUPPORTED_METHOD; extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; } @@ -192,10 +193,15 @@ void registerStorageExecutable(StorageFactory & factory) std::vector input_queries; for (size_t i = 2; i < args.engine_args.size(); ++i) { + if (args.engine_args[i]->children.empty()) + throw Exception( + ErrorCodes::BAD_ARGUMENTS, "StorageExecutable argument \"{}\" is invalid query", + args.engine_args[i]->formatForErrorMessage()); + ASTPtr query = args.engine_args[i]->children.at(0); if (!query->as()) throw Exception( - ErrorCodes::UNSUPPORTED_METHOD, "StorageExecutable argument is invalid input query {}", + ErrorCodes::UNSUPPORTED_METHOD, "StorageExecutable argument \"{}\" is invalid input query", query->formatForErrorMessage()); input_queries.emplace_back(std::move(query)); diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp index d355d021313..95bd0e7c53e 100644 --- a/src/Storages/StorageFile.cpp +++ b/src/Storages/StorageFile.cpp @@ -821,8 +821,7 @@ public: write_buf = wrapWriteBufferWithCompressionMethod(std::move(naked_buffer), compression_method, 3); writer = FormatFactory::instance().getOutputFormatParallelIfPossible(format_name, - *write_buf, metadata_snapshot->getSampleBlock(), context, - {}, format_settings); + *write_buf, metadata_snapshot->getSampleBlock(), context, format_settings); if (do_not_write_prefix) writer->doNotWritePrefix(); diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index f935fe2854e..a57b4afda7d 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -91,7 +91,6 @@ StorageMergeTree::StorageMergeTree( bool has_force_restore_data_flag) : MergeTreeData( table_id_, - relative_data_path_, metadata_, context_, date_column_name, @@ -101,8 +100,10 @@ StorageMergeTree::StorageMergeTree( attach) , reader(*this) , writer(*this) - , merger_mutator(*this, getContext()->getMergeMutateExecutor()->getMaxTasksCount()) + , merger_mutator(*this) { + initializeDirectoriesAndFormatVersion(relative_data_path_, attach, date_column_name); + loadDataParts(has_force_restore_data_flag); if (!attach && !getDataPartsForInternalUsage().empty()) @@ -1371,7 +1372,7 @@ MergeTreeDataPartPtr StorageMergeTree::outdatePart(MergeTreeTransaction * txn, c /// Forcefully stop merges and make part outdated auto merge_blocker = stopMergesAndWait(); auto parts_lock = lockParts(); - auto part = getPartIfExists(part_name, {MergeTreeDataPartState::Active}, &parts_lock); + auto part = getPartIfExistsUnlocked(part_name, {MergeTreeDataPartState::Active}, parts_lock); if (!part) throw Exception(ErrorCodes::NO_SUCH_DATA_PART, "Part {} not found, won't try to drop it.", part_name); @@ -1384,7 +1385,7 @@ MergeTreeDataPartPtr StorageMergeTree::outdatePart(MergeTreeTransaction * txn, c std::unique_lock lock(currently_processing_in_background_mutex); auto parts_lock = lockParts(); - auto part = getPartIfExists(part_name, {MergeTreeDataPartState::Active}, &parts_lock); + auto part = getPartIfExistsUnlocked(part_name, {MergeTreeDataPartState::Active}, parts_lock); /// It's okay, part was already removed if (!part) return nullptr; diff --git a/src/Storages/StorageMongoDB.cpp b/src/Storages/StorageMongoDB.cpp index 3ae9c974770..92d4c6c0686 100644 --- a/src/Storages/StorageMongoDB.cpp +++ b/src/Storages/StorageMongoDB.cpp @@ -72,16 +72,14 @@ void StorageMongoDB::connectIfNotConnected() auto auth_db = database_name; if (auth_source != query_params.end()) auth_db = auth_source->second; -#if POCO_VERSION >= 0x01070800 + if (!username.empty() && !password.empty()) { Poco::MongoDB::Database poco_db(auth_db); if (!poco_db.authenticate(*connection, username, password, Poco::MongoDB::Database::AUTH_SCRAM_SHA1)) throw Exception("Cannot authenticate in MongoDB, incorrect user or password", ErrorCodes::MONGODB_CANNOT_AUTHENTICATE); } -#else - authenticate(*connection, database_name, username, password); -#endif + authenticated = true; } } @@ -213,7 +211,6 @@ StorageMongoDBConfiguration StorageMongoDB::getConfiguration(ASTs engine_args, C if (engine_args.size() >= 6) configuration.options = checkAndGetLiteralArgument(engine_args[5], "database"); - } context->getRemoteHostFilter().checkHostAndPort(configuration.host, toString(configuration.port)); diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 99ceb1d90ae..7056e6a6952 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -113,6 +113,7 @@ namespace ProfileEvents extern const Event NotCreatedLogEntryForMerge; extern const Event CreatedLogEntryForMutation; extern const Event NotCreatedLogEntryForMutation; + extern const Event ReplicaPartialShutdown; } namespace CurrentMetrics @@ -258,7 +259,6 @@ StorageReplicatedMergeTree::StorageReplicatedMergeTree( bool has_force_restore_data_flag, RenamingRestrictions renaming_restrictions_) : MergeTreeData(table_id_, - relative_data_path_, metadata_, context_, date_column_name, @@ -273,7 +273,7 @@ StorageReplicatedMergeTree::StorageReplicatedMergeTree( , replica_path(fs::path(zookeeper_path) / "replicas" / replica_name_) , reader(*this) , writer(*this) - , merger_mutator(*this, getContext()->getMergeMutateExecutor()->getMaxTasksCount()) + , merger_mutator(*this) , merge_strategy_picker(*this) , queue(*this, merge_strategy_picker) , fetcher(*this) @@ -286,6 +286,7 @@ StorageReplicatedMergeTree::StorageReplicatedMergeTree( , replicated_fetches_throttler(std::make_shared(getSettings()->max_replicated_fetches_network_bandwidth, getContext()->getReplicatedFetchesThrottler())) , replicated_sends_throttler(std::make_shared(getSettings()->max_replicated_sends_network_bandwidth, getContext()->getReplicatedSendsThrottler())) { + initializeDirectoriesAndFormatVersion(relative_data_path_, attach, date_column_name); /// We create and deactivate all tasks for consistency. /// They all will be scheduled and activated by the restarting thread. queue_updating_task = getContext()->getSchedulePool().createTask( @@ -4246,10 +4247,10 @@ void StorageReplicatedMergeTree::startup() return; } - startupImpl(); + startupImpl(/* from_attach_thread */ false); } -void StorageReplicatedMergeTree::startupImpl() +void StorageReplicatedMergeTree::startupImpl(bool from_attach_thread) { /// Do not start replication if ZooKeeper is not configured or there is no metadata in zookeeper if (!has_metadata_in_zookeeper.has_value() || !*has_metadata_in_zookeeper) @@ -4291,7 +4292,16 @@ void StorageReplicatedMergeTree::startupImpl() /// It means that failed "startup" must not create any background tasks that we will have to wait. try { - shutdown(); + /// it's important to avoid full shutdown here, because it even tries to shutdown attach thread which was + /// designed exactly for this: try to start table if no zookeeper connection available. + if (from_attach_thread) + { + restarting_thread.shutdown(/* part_of_full_shutdown */false); + } + else + { + shutdown(); + } } catch (...) { @@ -4311,6 +4321,35 @@ void StorageReplicatedMergeTree::flush() flushAllInMemoryPartsIfNeeded(); } + +void StorageReplicatedMergeTree::partialShutdown() +{ + ProfileEvents::increment(ProfileEvents::ReplicaPartialShutdown); + + partial_shutdown_called = true; + partial_shutdown_event.set(); + replica_is_active_node = nullptr; + + LOG_TRACE(log, "Waiting for threads to finish"); + merge_selecting_task->deactivate(); + queue_updating_task->deactivate(); + mutations_updating_task->deactivate(); + mutations_finalizing_task->deactivate(); + + cleanup_thread.stop(); + part_check_thread.stop(); + + /// Stop queue processing + { + auto fetch_lock = fetcher.blocker.cancel(); + auto merge_lock = merger_mutator.merges_blocker.cancel(); + auto move_lock = parts_mover.moves_blocker.cancel(); + background_operations_assignee.finish(); + } + + LOG_TRACE(log, "Threads finished"); +} + void StorageReplicatedMergeTree::shutdown() { if (shutdown_called.exchange(true)) @@ -4327,7 +4366,8 @@ void StorageReplicatedMergeTree::shutdown() if (attach_thread) attach_thread->shutdown(); - restarting_thread.shutdown(); + + restarting_thread.shutdown(/* part_of_full_shutdown */true); background_operations_assignee.finish(); part_moves_between_shards_orchestrator.shutdown(); @@ -5051,8 +5091,9 @@ String getPartNamePossiblyFake(MergeTreeDataFormatVersion format_version, const return part_info.getPartName(); } -bool StorageReplicatedMergeTree::getFakePartCoveringAllPartsInPartition(const String & partition_id, MergeTreePartInfo & part_info, - std::optional & delimiting_block_lock, bool for_replace_range) +bool StorageReplicatedMergeTree::getFakePartCoveringAllPartsInPartition( + const String & partition_id, MergeTreePartInfo & part_info, + std::optional & delimiting_block_lock, bool for_replace_range) { /// Even if there is no data in the partition, you still need to mark the range for deletion. /// - Because before executing DETACH, tasks for downloading parts to this partition can be executed. @@ -5160,7 +5201,7 @@ void StorageReplicatedMergeTree::restoreMetadataInZooKeeper() LOG_INFO(log, "Attached all partitions, starting table"); - startupImpl(); + startupImpl(/* from_attach_thread */ false); } void StorageReplicatedMergeTree::dropPartNoWaitNoThrow(const String & part_name) @@ -5659,7 +5700,7 @@ bool StorageReplicatedMergeTree::tryWaitForReplicaToProcessLogEntry( } -void StorageReplicatedMergeTree::getStatus(Status & res, bool with_zk_fields) +void StorageReplicatedMergeTree::getStatus(ReplicatedTableStatus & res, bool with_zk_fields) { auto zookeeper = tryGetZooKeeper(); const auto storage_settings_ptr = getSettings(); @@ -8745,7 +8786,7 @@ void StorageReplicatedMergeTree::restoreDataFromBackup(RestorerFromBackup & rest { /// New parts could be in the replication queue but not fetched yet. /// In that case we consider the table as not empty. - StorageReplicatedMergeTree::Status status; + ReplicatedTableStatus status; getStatus(status, /* with_zk_fields = */ false); if (status.queue.inserts_in_queue) empty = false; diff --git a/src/Storages/StorageReplicatedMergeTree.h b/src/Storages/StorageReplicatedMergeTree.h index 67e79378b93..c5e95ab7b39 100644 --- a/src/Storages/StorageReplicatedMergeTree.h +++ b/src/Storages/StorageReplicatedMergeTree.h @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include @@ -17,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -89,13 +91,6 @@ using ZooKeeperWithFaultInjectionPtr = std::shared_ptr replica_is_active; - }; - /// Get the status of the table. If with_zk_fields = false - do not fill in the fields that require queries to ZK. - void getStatus(Status & res, bool with_zk_fields = true); + void getStatus(ReplicatedTableStatus & res, bool with_zk_fields = true); using LogEntriesData = std::vector; void getQueue(LogEntriesData & res, String & replica_name); @@ -879,7 +851,6 @@ private: // Create table id if needed void createTableSharedID() const; - bool checkZeroCopyLockExists(const String & part_name, const DiskPtr & disk); std::optional getZeroCopyPartPath(const String & part_name, const DiskPtr & disk); @@ -888,7 +859,7 @@ private: /// If somebody already holding the lock -- return std::nullopt. std::optional tryCreateZeroCopyExclusiveLock(const String & part_name, const DiskPtr & disk) override; - void startupImpl(); + void startupImpl(bool from_attach_thread); }; String getPartNamePossiblyFake(MergeTreeDataFormatVersion format_version, const MergeTreePartInfo & part_info); diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp index 9b50b41ddaf..ff70b4228ce 100644 --- a/src/Storages/StorageS3.cpp +++ b/src/Storages/StorageS3.cpp @@ -186,7 +186,7 @@ public: size_t getTotalSize() const { - return total_size; + return total_size.load(std::memory_order_relaxed); } ~Impl() @@ -283,7 +283,7 @@ private: buffer.reserve(block.rows()); for (UInt64 idx : idxs.getData()) { - total_size += temp_buffer[idx].info->size; + total_size.fetch_add(temp_buffer[idx].info->size, std::memory_order_relaxed); buffer.emplace_back(std::move(temp_buffer[idx])); } } @@ -291,7 +291,7 @@ private: { buffer = std::move(temp_buffer); for (const auto & [_, info] : buffer) - total_size += info->size; + total_size.fetch_add(info->size, std::memory_order_relaxed); } /// Set iterator only after the whole batch is processed @@ -357,7 +357,7 @@ private: ThreadPool list_objects_pool; ThreadPoolCallbackRunner list_objects_scheduler; std::future outcome_future; - size_t total_size = 0; + std::atomic total_size = 0; }; StorageS3Source::DisclosedGlobIterator::DisclosedGlobIterator( @@ -529,7 +529,8 @@ StorageS3Source::StorageS3Source( const String & bucket_, const String & version_id_, std::shared_ptr file_iterator_, - const size_t download_thread_num_) + const size_t download_thread_num_, + bool only_need_virtual_columns_) : ISource(getHeader(sample_block_, requested_virtual_columns_)) , WithContext(context_) , name(std::move(name_)) @@ -543,12 +544,17 @@ StorageS3Source::StorageS3Source( , client(client_) , sample_block(sample_block_) , format_settings(format_settings_) + , only_need_virtual_columns(only_need_virtual_columns_) , requested_virtual_columns(requested_virtual_columns_) , file_iterator(file_iterator_) , download_thread_num(download_thread_num_) , create_reader_pool(1) , create_reader_scheduler(threadPoolCallbackRunner(create_reader_pool, "CreateS3Reader")) { + /// If user only need virtual columns, StorageS3Source does not use ReaderHolder and does not initialize ReadBufferFromS3. + if (only_need_virtual_columns) + return; + reader = createReader(); if (reader) reader_future = createReaderAsync(); @@ -683,6 +689,35 @@ String StorageS3Source::getName() const Chunk StorageS3Source::generate() { + auto add_virtual_columns = [&](Chunk & chunk, const String & file_path, UInt64 num_rows) + { + for (const auto & virtual_column : requested_virtual_columns) + { + if (virtual_column.name == "_path") + { + chunk.addColumn(virtual_column.type->createColumnConst(num_rows, file_path)->convertToFullColumnIfConst()); + } + else if (virtual_column.name == "_file") + { + size_t last_slash_pos = file_path.find_last_of('/'); + auto column = virtual_column.type->createColumnConst(num_rows, file_path.substr(last_slash_pos + 1)); + chunk.addColumn(column->convertToFullColumnIfConst()); + } + } + }; + + if (only_need_virtual_columns) + { + Chunk chunk; + auto current_key = (*file_iterator)().key; + if (!current_key.empty()) + { + const auto & file_path = fs::path(bucket) / current_key; + add_virtual_columns(chunk, file_path, 1); + } + return chunk; + } + while (true) { if (!reader || isCancelled()) @@ -701,20 +736,7 @@ Chunk StorageS3Source::generate() *this, chunk, total_size, total_rows_approx_accumulated, total_rows_count_times, total_rows_approx_max); } - for (const auto & virtual_column : requested_virtual_columns) - { - if (virtual_column.name == "_path") - { - chunk.addColumn(virtual_column.type->createColumnConst(num_rows, file_path)->convertToFullColumnIfConst()); - } - else if (virtual_column.name == "_file") - { - size_t last_slash_pos = file_path.find_last_of('/'); - auto column = virtual_column.type->createColumnConst(num_rows, file_path.substr(last_slash_pos + 1)); - chunk.addColumn(column->convertToFullColumnIfConst()); - } - } - + add_virtual_columns(chunk, file_path, num_rows); return chunk; } @@ -765,7 +787,7 @@ public: compression_method, 3); writer - = FormatFactory::instance().getOutputFormatParallelIfPossible(format, *write_buf, sample_block, context, {}, format_settings); + = FormatFactory::instance().getOutputFormatParallelIfPossible(format, *write_buf, sample_block, context, format_settings); } String getName() const override { return "StorageS3Sink"; } @@ -1035,6 +1057,10 @@ Pipe StorageS3::read( requested_virtual_columns.push_back(virtual_column); } + bool only_need_virtual_columns = true; + if (column_names_set.size() > requested_virtual_columns.size()) + only_need_virtual_columns = false; + std::shared_ptr iterator_wrapper = createFileIterator( s3_configuration, keys, @@ -1047,25 +1073,28 @@ Pipe StorageS3::read( ColumnsDescription columns_description; Block block_for_format; - if (supportsSubsetOfColumns()) + if (!only_need_virtual_columns) { - auto fetch_columns = column_names; - const auto & virtuals = getVirtuals(); - std::erase_if( - fetch_columns, - [&](const String & col) - { return std::any_of(virtuals.begin(), virtuals.end(), [&](const NameAndTypePair & virtual_col){ return col == virtual_col.name; }); }); + if (supportsSubsetOfColumns()) + { + auto fetch_columns = column_names; + const auto & virtuals = getVirtuals(); + std::erase_if( + fetch_columns, + [&](const String & col) + { return std::any_of(virtuals.begin(), virtuals.end(), [&](const NameAndTypePair & virtual_col){ return col == virtual_col.name; }); }); - if (fetch_columns.empty()) - fetch_columns.push_back(ExpressionActions::getSmallestColumn(storage_snapshot->metadata->getColumns().getAllPhysical())); + if (fetch_columns.empty()) + fetch_columns.push_back(ExpressionActions::getSmallestColumn(storage_snapshot->metadata->getColumns().getAllPhysical())); - columns_description = storage_snapshot->getDescriptionForColumns(fetch_columns); - block_for_format = storage_snapshot->getSampleBlockForColumns(columns_description.getNamesOfPhysical()); - } - else - { - columns_description = storage_snapshot->metadata->getColumns(); - block_for_format = storage_snapshot->metadata->getSampleBlock(); + columns_description = storage_snapshot->getDescriptionForColumns(fetch_columns); + block_for_format = storage_snapshot->getSampleBlockForColumns(columns_description.getNamesOfPhysical()); + } + else + { + columns_description = storage_snapshot->metadata->getColumns(); + block_for_format = storage_snapshot->metadata->getSampleBlock(); + } } const size_t max_download_threads = local_context->getSettingsRef().max_download_threads; @@ -1086,7 +1115,8 @@ Pipe StorageS3::read( s3_configuration.uri.bucket, s3_configuration.uri.version_id, iterator_wrapper, - max_download_threads)); + max_download_threads, + only_need_virtual_columns)); } auto pipe = Pipe::unitePipes(std::move(pipes)); @@ -1291,7 +1321,7 @@ StorageS3Configuration StorageS3::getConfiguration(ASTs & engine_args, ContextPt "Storage S3 requires 1 to 5 arguments: url, [access_key_id, secret_access_key], name of used format and [compression_method].", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); - auto header_it = StorageURL::collectHeaders(engine_args, configuration, local_context); + auto * header_it = StorageURL::collectHeaders(engine_args, configuration, local_context); if (header_it != engine_args.end()) engine_args.erase(header_it); diff --git a/src/Storages/StorageS3.h b/src/Storages/StorageS3.h index 671610173bd..acd5c264822 100644 --- a/src/Storages/StorageS3.h +++ b/src/Storages/StorageS3.h @@ -137,7 +137,8 @@ public: const String & bucket, const String & version_id, std::shared_ptr file_iterator_, - size_t download_thread_num); + size_t download_thread_num, + bool only_need_virtual_columns_ = false); ~StorageS3Source() override; @@ -159,6 +160,7 @@ private: std::shared_ptr client; Block sample_block; std::optional format_settings; + bool only_need_virtual_columns{false}; struct ReaderHolder { diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp index 0f01dc4288c..53d8d2f063e 100644 --- a/src/Storages/StorageURL.cpp +++ b/src/Storages/StorageURL.cpp @@ -440,7 +440,7 @@ StorageURLSink::StorageURLSink( std::make_unique(Poco::URI(uri), http_method, content_type, content_encoding, timeouts), compression_method, 3); - writer = FormatFactory::instance().getOutputFormat(format, *write_buf, sample_block, context, {} /* write callback */, format_settings); + writer = FormatFactory::instance().getOutputFormat(format, *write_buf, sample_block, context, format_settings); } @@ -982,7 +982,7 @@ ASTs::iterator StorageURL::collectHeaders( { ASTs::iterator headers_it = url_function_args.end(); - for (auto arg_it = url_function_args.begin(); arg_it != url_function_args.end(); ++arg_it) + for (auto * arg_it = url_function_args.begin(); arg_it != url_function_args.end(); ++arg_it) { const auto * headers_ast_function = (*arg_it)->as(); if (headers_ast_function && headers_ast_function->name == "headers") @@ -1068,7 +1068,7 @@ URLBasedDataSourceConfiguration StorageURL::getConfiguration(ASTs & args, Contex if (args.empty() || args.size() > 3) throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, bad_arguments_error_message); - auto header_it = collectHeaders(args, configuration, local_context); + auto * header_it = collectHeaders(args, configuration, local_context); if (header_it != args.end()) args.erase(header_it); diff --git a/src/Storages/StorageView.cpp b/src/Storages/StorageView.cpp index 4f3003e68b0..f040e94e141 100644 --- a/src/Storages/StorageView.cpp +++ b/src/Storages/StorageView.cpp @@ -82,6 +82,20 @@ bool hasJoin(const ASTSelectWithUnionQuery & ast) return false; } +/** There are no limits on the maximum size of the result for the view. + * Since the result of the view is not the result of the entire query. + */ +ContextPtr getViewContext(ContextPtr context) +{ + auto view_context = Context::createCopy(context); + Settings view_settings = context->getSettings(); + view_settings.max_result_rows = 0; + view_settings.max_result_bytes = 0; + view_settings.extremes = false; + view_context->setSettings(view_settings); + return view_context; +} + } StorageView::StorageView( @@ -127,13 +141,13 @@ void StorageView::read( if (context->getSettingsRef().allow_experimental_analyzer) { - InterpreterSelectQueryAnalyzer interpreter(current_inner_query, options, context); + InterpreterSelectQueryAnalyzer interpreter(current_inner_query, options, getViewContext(context)); interpreter.addStorageLimits(*query_info.storage_limits); query_plan = std::move(interpreter).extractQueryPlan(); } else { - InterpreterSelectWithUnionQuery interpreter(current_inner_query, context, options, column_names); + InterpreterSelectWithUnionQuery interpreter(current_inner_query, getViewContext(context), options, column_names); interpreter.addStorageLimits(*query_info.storage_limits); interpreter.buildQueryPlan(query_plan); } diff --git a/src/Storages/System/StorageSystemBuildOptions.cpp.in b/src/Storages/System/StorageSystemBuildOptions.cpp.in index 47a448900a4..fd4e478004f 100644 --- a/src/Storages/System/StorageSystemBuildOptions.cpp.in +++ b/src/Storages/System/StorageSystemBuildOptions.cpp.in @@ -20,7 +20,6 @@ const char * auto_config_build[] "CXX_FLAGS", "@FULL_CXX_FLAGS_NORMALIZED@", "LINK_FLAGS", "@FULL_EXE_LINKER_FLAGS_NORMALIZED@", "BUILD_COMPILE_DEFINITIONS", "@BUILD_COMPILE_DEFINITIONS@", - "STATIC", "@USE_STATIC_LIBRARIES@", "USE_EMBEDDED_COMPILER", "@USE_EMBEDDED_COMPILER@", "USE_GLIBC_COMPATIBILITY", "@GLIBC_COMPATIBILITY@", "USE_JEMALLOC", "@ENABLE_JEMALLOC@", diff --git a/src/Storages/System/StorageSystemFilesystemCache.cpp b/src/Storages/System/StorageSystemFilesystemCache.cpp index cd9324b3253..bec92a60436 100644 --- a/src/Storages/System/StorageSystemFilesystemCache.cpp +++ b/src/Storages/System/StorageSystemFilesystemCache.cpp @@ -24,7 +24,8 @@ NamesAndTypesList StorageSystemFilesystemCache::getNamesAndTypes() {"cache_hits", std::make_shared()}, {"references", std::make_shared()}, {"downloaded_size", std::make_shared()}, - {"persistent", std::make_shared>()} + {"persistent", std::make_shared>()}, + {"kind", std::make_shared()}, }; } @@ -45,8 +46,11 @@ void StorageSystemFilesystemCache::fillData(MutableColumns & res_columns, Contex for (const auto & file_segment : file_segments) { res_columns[0]->insert(cache_base_path); + + /// Do not use `file_segment->getPathInLocalCache` here because it will lead to nullptr dereference + /// (because file_segments in getSnapshot doesn't have `cache` field set) res_columns[1]->insert( - cache->getPathInLocalCache(file_segment->key(), file_segment->offset(), file_segment->isPersistent())); + cache->getPathInLocalCache(file_segment->key(), file_segment->offset(), file_segment->getKind())); const auto & range = file_segment->range(); res_columns[2]->insert(range.left); @@ -57,6 +61,7 @@ void StorageSystemFilesystemCache::fillData(MutableColumns & res_columns, Contex res_columns[7]->insert(file_segment->getRefCount()); res_columns[8]->insert(file_segment->getDownloadedSize()); res_columns[9]->insert(file_segment->isPersistent()); + res_columns[10]->insert(toString(file_segment->getKind())); } } } diff --git a/src/Storages/System/StorageSystemParts.cpp b/src/Storages/System/StorageSystemParts.cpp index b205b7c224d..f6854e7d5d0 100644 --- a/src/Storages/System/StorageSystemParts.cpp +++ b/src/Storages/System/StorageSystemParts.cpp @@ -234,9 +234,12 @@ void StorageSystemParts::processNextStorage( if (columns_mask[src_index++]) { - // The full path changes at clean up thread under deleting state, do not read it, avoid the race - if (part->isStoredOnDisk() && part_state != State::Deleting) + /// The full path changes at clean up thread, so do not read it if parts can be deleted, avoid the race. + if (part->isStoredOnDisk() + && part_state != State::Deleting && part_state != State::DeleteOnDestroy && part_state != State::Temporary) + { columns[res_index++]->insert(part->getDataPartStorage().getFullPath()); + } else columns[res_index++]->insertDefault(); } diff --git a/src/Storages/System/StorageSystemPartsColumns.cpp b/src/Storages/System/StorageSystemPartsColumns.cpp index 65b5af0c8e9..00b958b015f 100644 --- a/src/Storages/System/StorageSystemPartsColumns.cpp +++ b/src/Storages/System/StorageSystemPartsColumns.cpp @@ -193,9 +193,12 @@ void StorageSystemPartsColumns::processNextStorage( columns[res_index++]->insert(part->getDataPartStorage().getDiskName()); if (columns_mask[src_index++]) { - // The full path changes at clean up thread under deleting state, do not read it, avoid the race - if (part_state != State::Deleting) + /// The full path changes at clean up thread, so do not read it if parts can be deleted, avoid the race. + if (part->isStoredOnDisk() + && part_state != State::Deleting && part_state != State::DeleteOnDestroy && part_state != State::Temporary) + { columns[res_index++]->insert(part->getDataPartStorage().getFullPath()); + } else columns[res_index++]->insertDefault(); } diff --git a/src/Storages/System/StorageSystemProcesses.cpp b/src/Storages/System/StorageSystemProcesses.cpp index 08d3666216f..213e3ed5dc0 100644 --- a/src/Storages/System/StorageSystemProcesses.cpp +++ b/src/Storages/System/StorageSystemProcesses.cpp @@ -119,7 +119,7 @@ void StorageSystemProcesses::fillData(MutableColumns & res_columns, ContextPtr c res_columns[i++]->insert(process.client_info.quota_key); res_columns[i++]->insert(process.client_info.distributed_depth); - res_columns[i++]->insert(process.elapsed_seconds); + res_columns[i++]->insert(static_cast(process.elapsed_microseconds) / 100000.0); res_columns[i++]->insert(process.is_cancelled); res_columns[i++]->insert(process.is_all_data_sent); res_columns[i++]->insert(process.read_rows); diff --git a/src/Storages/System/StorageSystemReplicas.cpp b/src/Storages/System/StorageSystemReplicas.cpp index 0f7877a6e41..363b47d96cb 100644 --- a/src/Storages/System/StorageSystemReplicas.cpp +++ b/src/Storages/System/StorageSystemReplicas.cpp @@ -153,7 +153,7 @@ Pipe StorageSystemReplicas::read( for (size_t i = 0, size = col_database->size(); i < size; ++i) { - StorageReplicatedMergeTree::Status status; + ReplicatedTableStatus status; dynamic_cast( *replicated_tables [(*col_database)[i].safeGet()] diff --git a/src/Storages/TTLDescription.cpp b/src/Storages/TTLDescription.cpp index 41c9c1996b1..2971d977099 100644 --- a/src/Storages/TTLDescription.cpp +++ b/src/Storages/TTLDescription.cpp @@ -61,7 +61,7 @@ void checkTTLExpression(const ExpressionActionsPtr & ttl_expression, const Strin { if (action.node->type == ActionsDAG::ActionType::FUNCTION) { - IFunctionBase & func = *action.node->function_base; + const IFunctionBase & func = *action.node->function_base; if (!func.isDeterministic()) throw Exception( "TTL expression cannot contain non-deterministic functions, " diff --git a/src/Storages/VirtualColumnUtils.cpp b/src/Storages/VirtualColumnUtils.cpp index 2ada0fa3323..ffa04bcdd83 100644 --- a/src/Storages/VirtualColumnUtils.cpp +++ b/src/Storages/VirtualColumnUtils.cpp @@ -51,7 +51,7 @@ bool isValidFunction(const ASTPtr & expression, const std::function & is_constant, std::vector & result) +bool extractFunctions(const ASTPtr & expression, const std::function & is_constant, ASTs & result) { const auto * function = expression->as(); if (function && (function->name == "and" || function->name == "indexHint")) @@ -175,7 +175,7 @@ bool prepareFilterBlockWithQuery(const ASTPtr & query, ContextPtr context, Block }; /// Create an expression that evaluates the expressions in WHERE and PREWHERE, depending only on the existing columns. - std::vector functions; + ASTs functions; if (select.where()) unmodified &= extractFunctions(select.where(), is_constant, functions); if (select.prewhere()) diff --git a/src/TableFunctions/ITableFunction.cpp b/src/TableFunctions/ITableFunction.cpp index da0de7e47f6..df19e0ebad3 100644 --- a/src/TableFunctions/ITableFunction.cpp +++ b/src/TableFunctions/ITableFunction.cpp @@ -21,13 +21,13 @@ AccessType ITableFunction::getSourceAccessType() const } StoragePtr ITableFunction::execute(const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, - ColumnsDescription cached_columns, bool use_global_context) const + ColumnsDescription cached_columns, bool use_global_context, bool is_insert_query) const { ProfileEvents::increment(ProfileEvents::TableFunctionExecute); AccessFlags required_access = getSourceAccessType(); auto table_function_properties = TableFunctionFactory::instance().tryGetProperties(getName()); - if (!(table_function_properties && table_function_properties->allow_readonly)) + if (is_insert_query || !(table_function_properties && table_function_properties->allow_readonly)) required_access |= AccessType::CREATE_TEMPORARY_TABLE; context->checkAccess(required_access); diff --git a/src/TableFunctions/ITableFunction.h b/src/TableFunctions/ITableFunction.h index 9d490105b17..dfcf9012068 100644 --- a/src/TableFunctions/ITableFunction.h +++ b/src/TableFunctions/ITableFunction.h @@ -74,7 +74,7 @@ public: /// Create storage according to the query. StoragePtr - execute(const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription cached_columns_ = {}, bool use_global_context = false) const; + execute(const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription cached_columns_ = {}, bool use_global_context = false, bool is_insert = false) const; virtual ~ITableFunction() = default; diff --git a/src/TableFunctions/TableFunctionDeltaLake.cpp b/src/TableFunctions/TableFunctionDeltaLake.cpp index f831d4ae609..221a512172d 100644 --- a/src/TableFunctions/TableFunctionDeltaLake.cpp +++ b/src/TableFunctions/TableFunctionDeltaLake.cpp @@ -33,7 +33,7 @@ void TableFunctionDeltaLake::parseArgumentsImpl( if (args.empty() || args.size() > 6) throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, error_message); - auto header_it = StorageURL::collectHeaders(args, base_configuration, context); + auto * header_it = StorageURL::collectHeaders(args, base_configuration, context); if (header_it != args.end()) args.erase(header_it); diff --git a/src/TableFunctions/TableFunctionHudi.cpp b/src/TableFunctions/TableFunctionHudi.cpp index f39f3b515ec..49d9e7da97d 100644 --- a/src/TableFunctions/TableFunctionHudi.cpp +++ b/src/TableFunctions/TableFunctionHudi.cpp @@ -33,7 +33,7 @@ void TableFunctionHudi::parseArgumentsImpl( if (args.empty() || args.size() > 6) throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, error_message); - auto header_it = StorageURL::collectHeaders(args, base_configuration, context); + auto * header_it = StorageURL::collectHeaders(args, base_configuration, context); if (header_it != args.end()) args.erase(header_it); diff --git a/src/TableFunctions/TableFunctionMySQL.cpp b/src/TableFunctions/TableFunctionMySQL.cpp index ab1c23afa7a..a34e87ee313 100644 --- a/src/TableFunctions/TableFunctionMySQL.cpp +++ b/src/TableFunctions/TableFunctionMySQL.cpp @@ -45,7 +45,7 @@ void TableFunctionMySQL::parseArguments(const ASTPtr & ast_function, ContextPtr mysql_settings.connect_timeout = settings.external_storage_connect_timeout_sec; mysql_settings.read_write_timeout = settings.external_storage_rw_timeout_sec; - for (auto it = args.begin(); it != args.end(); ++it) + for (auto * it = args.begin(); it != args.end(); ++it) { const ASTSetQuery * settings_ast = (*it)->as(); if (settings_ast) diff --git a/src/TableFunctions/TableFunctionS3.cpp b/src/TableFunctions/TableFunctionS3.cpp index 23822486c29..562cb3460c6 100644 --- a/src/TableFunctions/TableFunctionS3.cpp +++ b/src/TableFunctions/TableFunctionS3.cpp @@ -40,7 +40,7 @@ void TableFunctionS3::parseArgumentsImpl(const String & error_message, ASTs & ar if (args.empty() || args.size() > 6) throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, error_message); - auto header_it = StorageURL::collectHeaders(args, s3_configuration, context); + auto * header_it = StorageURL::collectHeaders(args, s3_configuration, context); if (header_it != args.end()) args.erase(header_it); diff --git a/src/TableFunctions/TableFunctionURL.cpp b/src/TableFunctions/TableFunctionURL.cpp index 888f3e7b93d..46e09b02901 100644 --- a/src/TableFunctions/TableFunctionURL.cpp +++ b/src/TableFunctions/TableFunctionURL.cpp @@ -71,7 +71,7 @@ void TableFunctionURL::parseArguments(const ASTPtr & ast_function, ContextPtr co auto * url_function_args_expr = assert_cast(args[0].get()); auto & url_function_args = url_function_args_expr->children; - auto headers_it = StorageURL::collectHeaders(url_function_args, configuration, context); + auto * headers_it = StorageURL::collectHeaders(url_function_args, configuration, context); /// ITableFunctionFileLike cannot parse headers argument, so remove it. if (headers_it != url_function_args.end()) url_function_args.erase(headers_it); diff --git a/tests/ci/ast_fuzzer_check.py b/tests/ci/ast_fuzzer_check.py index 01637f928c0..096edeed149 100644 --- a/tests/ci/ast_fuzzer_check.py +++ b/tests/ci/ast_fuzzer_check.py @@ -95,7 +95,7 @@ if __name__ == "__main__": ) logging.info("Going to run %s", run_command) - run_log_path = os.path.join(temp_path, "runlog.log") + run_log_path = os.path.join(temp_path, "run.log") with open(run_log_path, "w", encoding="utf-8") as log: with subprocess.Popen( run_command, shell=True, stderr=log, stdout=log @@ -113,7 +113,7 @@ if __name__ == "__main__": ) s3_prefix = f"{pr_info.number}/{pr_info.sha}/fuzzer_{check_name_lower}/" paths = { - "runlog.log": run_log_path, + "run.log": run_log_path, "main.log": os.path.join(workspace_path, "main.log"), "server.log.gz": os.path.join(workspace_path, "server.log.gz"), "fuzzer.log": os.path.join(workspace_path, "fuzzer.log"), @@ -124,20 +124,12 @@ if __name__ == "__main__": s3_helper = S3Helper() for f in paths: try: - paths[f] = s3_helper.upload_test_report_to_s3(paths[f], s3_prefix + "/" + f) + paths[f] = s3_helper.upload_test_report_to_s3(paths[f], s3_prefix + f) except Exception as ex: logging.info("Exception uploading file %s text %s", f, ex) paths[f] = "" report_url = GITHUB_RUN_URL - if paths["runlog.log"]: - report_url = paths["runlog.log"] - if paths["main.log"]: - report_url = paths["main.log"] - if paths["server.log.gz"]: - report_url = paths["server.log.gz"] - if paths["fuzzer.log"]: - report_url = paths["fuzzer.log"] if paths["report.html"]: report_url = paths["report.html"] diff --git a/tests/ci/bugfix_validate_check.py b/tests/ci/bugfix_validate_check.py index 5050d6c38cb..6bdf3b1f7d2 100644 --- a/tests/ci/bugfix_validate_check.py +++ b/tests/ci/bugfix_validate_check.py @@ -77,7 +77,7 @@ def main(args): pr_info.number, pr_info.sha, test_results, - [], + args.status, check_name_with_group, ) diff --git a/tests/ci/build_check.py b/tests/ci/build_check.py index a718bd53418..603f4619d53 100644 --- a/tests/ci/build_check.py +++ b/tests/ci/build_check.py @@ -38,8 +38,6 @@ BUILD_LOG_NAME = "build_log.log" def _can_export_binaries(build_config: BuildConfig) -> bool: if build_config["package_type"] != "deb": return False - if build_config["libraries"] == "shared": - return False if build_config["sanitizer"] != "": return True if build_config["build_type"] != "": @@ -68,8 +66,6 @@ def get_packager_cmd( cmd += f" --build-type={build_config['build_type']}" if build_config["sanitizer"]: cmd += f" --sanitizer={build_config['sanitizer']}" - if build_config["libraries"] == "shared": - cmd += " --shared-libraries" if build_config["tidy"] == "enable": cmd += " --clang-tidy" diff --git a/tests/ci/build_download_helper.py b/tests/ci/build_download_helper.py index 1a2fdedefed..746ce13bf72 100644 --- a/tests/ci/build_download_helper.py +++ b/tests/ci/build_download_helper.py @@ -126,12 +126,6 @@ def download_all_deb_packages(check_name, reports_path, result_path): ) -def download_shared_build(check_name, reports_path, result_path): - download_builds_filter( - check_name, reports_path, result_path, lambda x: x.endswith("shared_build.tgz") - ) - - def download_unit_tests(check_name, reports_path, result_path): download_builds_filter( check_name, reports_path, result_path, lambda x: x.endswith("unit_tests_dbms") diff --git a/tests/ci/build_report_check.py b/tests/ci/build_report_check.py index 4ece21d5449..1de401cde9c 100644 --- a/tests/ci/build_report_check.py +++ b/tests/ci/build_report_check.py @@ -37,7 +37,6 @@ class BuildResult: compiler, build_type, sanitizer, - libraries, status, elapsed_seconds, with_coverage, @@ -45,7 +44,6 @@ class BuildResult: self.compiler = compiler self.build_type = build_type self.sanitizer = sanitizer - self.libraries = libraries self.status = status self.elapsed_seconds = elapsed_seconds self.with_coverage = with_coverage @@ -89,7 +87,6 @@ def get_failed_report( compiler="unknown", build_type="unknown", sanitizer="unknown", - libraries="unknown", status=message, elapsed_seconds=0, with_coverage=False, @@ -105,7 +102,6 @@ def process_report( compiler=build_config["compiler"], build_type=build_config["build_type"], sanitizer=build_config["sanitizer"], - libraries=build_config["libraries"], status="success" if build_report["status"] else "failure", elapsed_seconds=build_report["elapsed_seconds"], with_coverage=False, diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py index 753da25f300..c77acfb679f 100644 --- a/tests/ci/ci_config.py +++ b/tests/ci/ci_config.py @@ -13,7 +13,6 @@ CI_CONFIG = { "sanitizer": "", "package_type": "deb", "static_binary_name": "amd64", - "libraries": "static", "additional_pkgs": True, "tidy": "disable", "with_coverage": False, @@ -23,7 +22,6 @@ CI_CONFIG = { "build_type": "", "sanitizer": "", "package_type": "coverity", - "libraries": "static", "tidy": "disable", "with_coverage": False, "official": False, @@ -34,7 +32,6 @@ CI_CONFIG = { "sanitizer": "", "package_type": "deb", "static_binary_name": "aarch64", - "libraries": "static", "additional_pkgs": True, "tidy": "disable", "with_coverage": False, @@ -44,7 +41,6 @@ CI_CONFIG = { "build_type": "", "sanitizer": "address", "package_type": "deb", - "libraries": "static", "tidy": "disable", "with_coverage": False, }, @@ -53,7 +49,6 @@ CI_CONFIG = { "build_type": "", "sanitizer": "undefined", "package_type": "deb", - "libraries": "static", "tidy": "disable", "with_coverage": False, }, @@ -62,7 +57,6 @@ CI_CONFIG = { "build_type": "", "sanitizer": "thread", "package_type": "deb", - "libraries": "static", "tidy": "disable", "with_coverage": False, }, @@ -71,7 +65,6 @@ CI_CONFIG = { "build_type": "", "sanitizer": "memory", "package_type": "deb", - "libraries": "static", "tidy": "disable", "with_coverage": False, }, @@ -80,7 +73,6 @@ CI_CONFIG = { "build_type": "debug", "sanitizer": "", "package_type": "deb", - "libraries": "static", "tidy": "disable", "with_coverage": False, }, @@ -89,7 +81,6 @@ CI_CONFIG = { "build_type": "", "sanitizer": "", "package_type": "binary", - "libraries": "static", "tidy": "disable", "with_coverage": False, }, @@ -99,26 +90,15 @@ CI_CONFIG = { "sanitizer": "", "package_type": "binary", "static_binary_name": "debug-amd64", - "libraries": "static", "tidy": "enable", "with_coverage": False, }, - "binary_shared": { - "compiler": "clang-15", - "build_type": "", - "sanitizer": "", - "package_type": "binary", - "libraries": "shared", - "tidy": "disable", - "with_coverage": False, - }, "binary_darwin": { "compiler": "clang-15-darwin", "build_type": "", "sanitizer": "", "package_type": "binary", "static_binary_name": "macos", - "libraries": "static", "tidy": "disable", "with_coverage": False, }, @@ -127,7 +107,6 @@ CI_CONFIG = { "build_type": "", "sanitizer": "", "package_type": "binary", - "libraries": "static", "tidy": "disable", "with_coverage": False, }, @@ -137,7 +116,6 @@ CI_CONFIG = { "sanitizer": "", "package_type": "binary", "static_binary_name": "aarch64v80compat", - "libraries": "static", "tidy": "disable", "with_coverage": False, }, @@ -147,7 +125,6 @@ CI_CONFIG = { "sanitizer": "", "package_type": "binary", "static_binary_name": "freebsd", - "libraries": "static", "tidy": "disable", "with_coverage": False, }, @@ -157,7 +134,6 @@ CI_CONFIG = { "sanitizer": "", "package_type": "binary", "static_binary_name": "macos-aarch64", - "libraries": "static", "tidy": "disable", "with_coverage": False, }, @@ -167,7 +143,6 @@ CI_CONFIG = { "sanitizer": "", "package_type": "binary", "static_binary_name": "powerpc64le", - "libraries": "static", "tidy": "disable", "with_coverage": False, }, @@ -177,7 +152,6 @@ CI_CONFIG = { "sanitizer": "", "package_type": "binary", "static_binary_name": "amd64compat", - "libraries": "static", "tidy": "disable", "with_coverage": False, }, @@ -196,7 +170,6 @@ CI_CONFIG = { ], "ClickHouse special build check": [ "binary_tidy", - "binary_shared", "binary_darwin", "binary_aarch64", "binary_aarch64_v80compat", @@ -308,9 +281,6 @@ CI_CONFIG = { "Compatibility check": { "required_build": "package_release", }, - "Split build smoke test": { - "required_build": "binary_shared", - }, "Unit tests (release-clang)": { "required_build": "binary_release", }, diff --git a/tests/ci/codebrowser_check.py b/tests/ci/codebrowser_check.py index 412bcdf8818..a86749c794c 100644 --- a/tests/ci/codebrowser_check.py +++ b/tests/ci/codebrowser_check.py @@ -57,7 +57,7 @@ if __name__ == "__main__": logging.info("Going to run codebrowser: %s", run_command) - run_log_path = os.path.join(TEMP_PATH, "runlog.log") + run_log_path = os.path.join(TEMP_PATH, "run.log") with TeePopen(run_command, run_log_path) as process: retcode = process.wait() diff --git a/tests/ci/docker_images_check.py b/tests/ci/docker_images_check.py index 0618969f94c..034e0110e2f 100644 --- a/tests/ci/docker_images_check.py +++ b/tests/ci/docker_images_check.py @@ -476,7 +476,6 @@ def main(): url = upload_results(s3_helper, pr_info.number, pr_info.sha, test_results, [], NAME) print(f"::notice ::Report url: {url}") - print(f'::set-output name=url_output::"{url}"') if not args.reports: return diff --git a/tests/ci/docker_manifests_merge.py b/tests/ci/docker_manifests_merge.py index 2ba5a99de0a..14585159d47 100644 --- a/tests/ci/docker_manifests_merge.py +++ b/tests/ci/docker_manifests_merge.py @@ -208,7 +208,6 @@ def main(): url = upload_results(s3_helper, pr_info.number, pr_info.sha, test_results, [], NAME) print(f"::notice ::Report url: {url}") - print(f'::set-output name=url_output::"{url}"') if not args.reports: return diff --git a/tests/ci/docker_server.py b/tests/ci/docker_server.py index e0053f09664..fd28e5a1890 100644 --- a/tests/ci/docker_server.py +++ b/tests/ci/docker_server.py @@ -340,7 +340,6 @@ def main(): url = upload_results(s3_helper, pr_info.number, pr_info.sha, test_results, [], NAME) print(f"::notice ::Report url: {url}") - print(f'::set-output name=url_output::"{url}"') if not args.reports: return diff --git a/tests/ci/docs_check.py b/tests/ci/docs_check.py index c95770b646d..cac1c3aea7c 100644 --- a/tests/ci/docs_check.py +++ b/tests/ci/docs_check.py @@ -82,7 +82,7 @@ if __name__ == "__main__": f"{docker_image}" ) - run_log_path = os.path.join(test_output, "runlog.log") + run_log_path = os.path.join(test_output, "run.log") logging.info("Running command: '%s'", cmd) with TeePopen(cmd, run_log_path) as process: diff --git a/tests/ci/docs_release.py b/tests/ci/docs_release.py index 355e4af7426..f1f420318be 100644 --- a/tests/ci/docs_release.py +++ b/tests/ci/docs_release.py @@ -60,7 +60,7 @@ if __name__ == "__main__": else: user = f"{os.geteuid()}:{os.getegid()}" - run_log_path = os.path.join(test_output, "runlog.log") + run_log_path = os.path.join(test_output, "run.log") with SSHKey("ROBOT_CLICKHOUSE_SSH_KEY"): cmd = ( diff --git a/tests/ci/download_release_packets.py b/tests/ci/download_release_packages.py similarity index 61% rename from tests/ci/download_release_packets.py rename to tests/ci/download_release_packages.py index 0e0f1884fbc..26223de2f8a 100755 --- a/tests/ci/download_release_packets.py +++ b/tests/ci/download_release_packages.py @@ -15,23 +15,27 @@ CLICKHOUSE_TAGS_URL = "https://api.github.com/repos/ClickHouse/ClickHouse/tags" DOWNLOAD_PREFIX = ( "https://github.com/ClickHouse/ClickHouse/releases/download/v{version}-{type}/" ) -CLICKHOUSE_COMMON_STATIC_PACKET_NAME = "clickhouse-common-static_{version}_amd64.deb" -CLICKHOUSE_COMMON_STATIC_DBG_PACKET_NAME = ( +CLICKHOUSE_COMMON_STATIC_PACKAGE_NAME = "clickhouse-common-static_{version}_amd64.deb" +CLICKHOUSE_COMMON_STATIC_DBG_PACKAGE_NAME = ( "clickhouse-common-static-dbg_{version}_amd64.deb" ) -CLICKHOUSE_SERVER_PACKET_NAME = "clickhouse-server_{version}_amd64.deb" -CLICKHOUSE_SERVER_PACKET_FALLBACK = "clickhouse-server_{version}_all.deb" -CLICKHOUSE_CLIENT_PACKET_NAME = "clickhouse-client_{version}_amd64.deb" -CLICKHOUSE_CLIENT_PACKET_FALLBACK = "clickhouse-client_{version}_all.deb" +CLICKHOUSE_SERVER_PACKAGE_NAME = "clickhouse-server_{version}_amd64.deb" +CLICKHOUSE_SERVER_PACKAGE_FALLBACK = "clickhouse-server_{version}_all.deb" +CLICKHOUSE_CLIENT_PACKAGE_NAME = "clickhouse-client_{version}_amd64.deb" +CLICKHOUSE_CLIENT_PACKAGE_FALLBACK = "clickhouse-client_{version}_all.deb" -PACKETS_DIR = "previous_release_package_folder/" +PACKAGES_DIR = "previous_release_package_folder/" VERSION_PATTERN = r"((?:\d+\.)?(?:\d+\.)?(?:\d+\.)?\d+-[a-zA-Z]*)" -def download_packet(url, out_path, retries=10, backoff_factor=0.3): +def download_package(url, out_path, retries=10, backoff_factor=0.3): session = requests.Session() retry = Retry( - total=retries, read=retries, connect=retries, backoff_factor=backoff_factor + total=retries, + read=retries, + connect=retries, + backoff_factor=backoff_factor, + status_forcelist=[500, 502, 503, 504], ) adapter = HTTPAdapter(max_retries=retry) session.mount("http://", adapter) @@ -43,7 +47,7 @@ def download_packet(url, out_path, retries=10, backoff_factor=0.3): fd.write(response.content) -def download_packets(release, dest_path=PACKETS_DIR): +def download_packages(release, dest_path=PACKAGES_DIR): if not os.path.exists(dest_path): os.makedirs(dest_path) @@ -53,35 +57,35 @@ def download_packets(release, dest_path=PACKETS_DIR): return os.path.join(dest_path, pkg_name) for pkg in ( - CLICKHOUSE_COMMON_STATIC_PACKET_NAME, - CLICKHOUSE_COMMON_STATIC_DBG_PACKET_NAME, + CLICKHOUSE_COMMON_STATIC_PACKAGE_NAME, + CLICKHOUSE_COMMON_STATIC_DBG_PACKAGE_NAME, ): url = (DOWNLOAD_PREFIX + pkg).format(version=release.version, type=release.type) pkg_name = get_dest_path(pkg.format(version=release.version)) - download_packet(url, pkg_name) + download_package(url, pkg_name) for pkg, fallback in ( - (CLICKHOUSE_SERVER_PACKET_NAME, CLICKHOUSE_SERVER_PACKET_FALLBACK), - (CLICKHOUSE_CLIENT_PACKET_NAME, CLICKHOUSE_CLIENT_PACKET_FALLBACK), + (CLICKHOUSE_SERVER_PACKAGE_NAME, CLICKHOUSE_SERVER_PACKAGE_FALLBACK), + (CLICKHOUSE_CLIENT_PACKAGE_NAME, CLICKHOUSE_CLIENT_PACKAGE_FALLBACK), ): url = (DOWNLOAD_PREFIX + pkg).format(version=release.version, type=release.type) pkg_name = get_dest_path(pkg.format(version=release.version)) try: - download_packet(url, pkg_name) + download_package(url, pkg_name) except Exception: url = (DOWNLOAD_PREFIX + fallback).format( version=release.version, type=release.type ) pkg_name = get_dest_path(fallback.format(version=release.version)) - download_packet(url, pkg_name) + download_package(url, pkg_name) def download_last_release(dest_path): current_release = get_previous_release(None) - download_packets(current_release, dest_path=dest_path) + download_packages(current_release, dest_path=dest_path) if __name__ == "__main__": logging.basicConfig(level=logging.INFO) release = ReleaseInfo(input()) - download_packets(release) + download_packages(release) diff --git a/tests/ci/fast_test_check.py b/tests/ci/fast_test_check.py index 2a6a0d5fa57..0f4c1b19707 100644 --- a/tests/ci/fast_test_check.py +++ b/tests/ci/fast_test_check.py @@ -155,7 +155,7 @@ if __name__ == "__main__": if not os.path.exists(logs_path): os.makedirs(logs_path) - run_log_path = os.path.join(logs_path, "runlog.log") + run_log_path = os.path.join(logs_path, "run.log") with TeePopen(run_cmd, run_log_path, timeout=40 * 60) as process: retcode = process.wait() if retcode == 0: diff --git a/tests/ci/functional_test_check.py b/tests/ci/functional_test_check.py index 87833d688af..e7689a198cd 100644 --- a/tests/ci/functional_test_check.py +++ b/tests/ci/functional_test_check.py @@ -16,7 +16,7 @@ from s3_helper import S3Helper from get_robot_token import get_best_robot_token from pr_info import FORCE_TESTS_LABEL, PRInfo from build_download_helper import download_all_deb_packages -from download_release_packets import download_last_release +from download_release_packages import download_last_release from upload_result_helper import upload_results from docker_pull_helper import get_image_with_version from commit_status_helper import ( @@ -203,20 +203,21 @@ if __name__ == "__main__": temp_path = TEMP_PATH repo_path = REPO_COPY reports_path = REPORTS_PATH + post_commit_path = os.path.join(temp_path, "functional_commit_status.tsv") args = parse_args() check_name = args.check_name kill_timeout = args.kill_timeout - validate_bugix_check = args.validate_bugfix + validate_bugfix_check = args.validate_bugfix flaky_check = "flaky" in check_name.lower() - run_changed_tests = flaky_check or validate_bugix_check + run_changed_tests = flaky_check or validate_bugfix_check gh = Github(get_best_robot_token(), per_page=100) - # For validate_bugix_check we need up to date information about labels, so pr_event_from_api is used + # For validate_bugfix_check we need up to date information about labels, so pr_event_from_api is used pr_info = PRInfo( - need_changed_files=run_changed_tests, pr_event_from_api=validate_bugix_check + need_changed_files=run_changed_tests, pr_event_from_api=validate_bugfix_check ) atexit.register(update_mergeable_check, gh, pr_info, check_name) @@ -224,10 +225,10 @@ if __name__ == "__main__": if not os.path.exists(temp_path): os.makedirs(temp_path) - if validate_bugix_check and "pr-bugfix" not in pr_info.labels: + if validate_bugfix_check and "pr-bugfix" not in pr_info.labels: if args.post_commit_status == "file": post_commit_status_to_file( - os.path.join(temp_path, "post_commit_status.tsv"), + post_commit_path, f"Skipped (no pr-bugfix in {pr_info.labels})", "success", "null", @@ -256,7 +257,7 @@ if __name__ == "__main__": tests_to_run = get_tests_to_run(pr_info) if not tests_to_run: commit = get_commit(gh, pr_info.sha) - state = override_status("success", check_name, validate_bugix_check) + state = override_status("success", check_name, validate_bugfix_check) if args.post_commit_status == "commit_status": commit.create_status( context=check_name_with_group, @@ -264,9 +265,11 @@ if __name__ == "__main__": state=state, ) elif args.post_commit_status == "file": - fpath = os.path.join(temp_path, "post_commit_status.tsv") post_commit_status_to_file( - fpath, description=NO_CHANGES_MSG, state=state, report_url="null" + post_commit_path, + description=NO_CHANGES_MSG, + state=state, + report_url="null", ) sys.exit(0) @@ -279,7 +282,7 @@ if __name__ == "__main__": if not os.path.exists(packages_path): os.makedirs(packages_path) - if validate_bugix_check: + if validate_bugfix_check: download_last_release(packages_path) else: download_all_deb_packages(check_name, reports_path, packages_path) @@ -292,12 +295,12 @@ if __name__ == "__main__": if not os.path.exists(result_path): os.makedirs(result_path) - run_log_path = os.path.join(result_path, "runlog.log") + run_log_path = os.path.join(result_path, "run.log") additional_envs = get_additional_envs( check_name, run_by_hash_num, run_by_hash_total ) - if validate_bugix_check: + if validate_bugfix_check: additional_envs.append("GLOBAL_TAGS=no-random-settings") run_command = get_run_command( @@ -327,7 +330,7 @@ if __name__ == "__main__": state, description, test_results, additional_logs = process_results( result_path, server_log_path ) - state = override_status(state, check_name, invert=validate_bugix_check) + state = override_status(state, check_name, invert=validate_bugfix_check) ch_helper = ClickHouseHelper() mark_flaky_tests(ch_helper, check_name, test_results) @@ -348,7 +351,7 @@ if __name__ == "__main__": ) elif args.post_commit_status == "file": post_commit_status_to_file( - os.path.join(temp_path, "post_commit_status.tsv"), + post_commit_path, description, state, report_url, diff --git a/tests/ci/integration_test_check.py b/tests/ci/integration_test_check.py index e61117a4b45..86f38a5b8b4 100644 --- a/tests/ci/integration_test_check.py +++ b/tests/ci/integration_test_check.py @@ -16,7 +16,7 @@ from s3_helper import S3Helper from get_robot_token import get_best_robot_token from pr_info import PRInfo from build_download_helper import download_all_deb_packages -from download_release_packets import download_last_release +from download_release_packages import download_last_release from upload_result_helper import upload_results from docker_pull_helper import get_images_with_versions from commit_status_helper import ( @@ -148,12 +148,13 @@ if __name__ == "__main__": stopwatch = Stopwatch() temp_path = TEMP_PATH + post_commit_path = os.path.join(temp_path, "integration_commit_status.tsv") repo_path = REPO_COPY reports_path = REPORTS_PATH args = parse_args() check_name = args.check_name - validate_bugix_check = args.validate_bugfix + validate_bugfix_check = args.validate_bugfix if "RUN_BY_HASH_NUM" in os.environ: run_by_hash_num = int(os.getenv("RUN_BY_HASH_NUM", "0")) @@ -171,16 +172,16 @@ if __name__ == "__main__": is_flaky_check = "flaky" in check_name - # For validate_bugix_check we need up to date information about labels, so pr_event_from_api is used + # For validate_bugfix_check we need up to date information about labels, so pr_event_from_api is used pr_info = PRInfo( - need_changed_files=is_flaky_check or validate_bugix_check, - pr_event_from_api=validate_bugix_check, + need_changed_files=is_flaky_check or validate_bugfix_check, + pr_event_from_api=validate_bugfix_check, ) - if validate_bugix_check and "pr-bugfix" not in pr_info.labels: + if validate_bugfix_check and "pr-bugfix" not in pr_info.labels: if args.post_commit_status == "file": post_commit_status_to_file( - os.path.join(temp_path, "post_commit_status.tsv"), + post_commit_path, f"Skipped (no pr-bugfix in {pr_info.labels})", "success", "null", @@ -209,7 +210,7 @@ if __name__ == "__main__": if not os.path.exists(build_path): os.makedirs(build_path) - if validate_bugix_check: + if validate_bugfix_check: download_last_release(build_path) else: download_all_deb_packages(check_name, reports_path, build_path) @@ -252,7 +253,7 @@ if __name__ == "__main__": subprocess.check_call(f"sudo chown -R ubuntu:ubuntu {temp_path}", shell=True) state, description, test_results, additional_logs = process_results(result_path) - state = override_status(state, check_name, invert=validate_bugix_check) + state = override_status(state, check_name, invert=validate_bugfix_check) ch_helper = ClickHouseHelper() mark_flaky_tests(ch_helper, check_name, test_results) @@ -275,7 +276,7 @@ if __name__ == "__main__": ) elif args.post_commit_status == "file": post_commit_status_to_file( - os.path.join(temp_path, "post_commit_status.tsv"), + post_commit_path, description, state, report_url, diff --git a/tests/ci/jepsen_check.py b/tests/ci/jepsen_check.py index 69964c0a0bc..3ddc0089791 100644 --- a/tests/ci/jepsen_check.py +++ b/tests/ci/jepsen_check.py @@ -251,7 +251,7 @@ if __name__ == "__main__": ) logging.info("Going to run jepsen: %s", cmd) - run_log_path = os.path.join(TEMP_PATH, "runlog.log") + run_log_path = os.path.join(TEMP_PATH, "run.log") with TeePopen(cmd, run_log_path) as process: retcode = process.wait() diff --git a/tests/ci/performance_comparison_check.py b/tests/ci/performance_comparison_check.py index acde5be5814..d0c84d56496 100644 --- a/tests/ci/performance_comparison_check.py +++ b/tests/ci/performance_comparison_check.py @@ -176,7 +176,7 @@ if __name__ == "__main__": ) logging.info("Going to run command %s", run_command) - run_log_path = os.path.join(temp_path, "runlog.log") + run_log_path = os.path.join(temp_path, "run.log") popen_env = os.environ.copy() popen_env.update(env_extra) @@ -198,7 +198,7 @@ if __name__ == "__main__": "all-query-metrics.tsv": os.path.join( result_path, "report/all-query-metrics.tsv" ), - "runlog.log": run_log_path, + "run.log": run_log_path, } s3_prefix = f"{pr_info.number}/{pr_info.sha}/{check_name_prefix}/" @@ -236,7 +236,7 @@ if __name__ == "__main__": # TODO: Remove me, always green mode for the first time, unless errors status = "success" - if "errors" in message: + if "errors" in message.lower(): status = "failure" # TODO: Remove until here except Exception: @@ -253,8 +253,8 @@ if __name__ == "__main__": report_url = GITHUB_RUN_URL - if uploaded["runlog.log"]: - report_url = uploaded["runlog.log"] + if uploaded["run.log"]: + report_url = uploaded["run.log"] if uploaded["compare.log"]: report_url = uploaded["compare.log"] diff --git a/tests/ci/report.py b/tests/ci/report.py index 2904a5519a9..97f6e18122f 100644 --- a/tests/ci/report.py +++ b/tests/ci/report.py @@ -275,7 +275,6 @@ tr:hover td {{filter: brightness(95%);}} Compiler Build type Sanitizer -Libraries Status Build log Build time @@ -319,8 +318,6 @@ def create_build_html_report( else: row += "none" - row += f"{build_result.libraries}" - if build_result.status: style = _get_status_style(build_result.status) row += f'{build_result.status}' diff --git a/tests/ci/split_build_smoke_check.py b/tests/ci/split_build_smoke_check.py deleted file mode 100644 index c6bf1051c87..00000000000 --- a/tests/ci/split_build_smoke_check.py +++ /dev/null @@ -1,154 +0,0 @@ -#!/usr/bin/env python3 - -import os -import logging -import subprocess -import sys - -from github import Github - -from env_helper import TEMP_PATH, REPO_COPY, REPORTS_PATH -from s3_helper import S3Helper -from get_robot_token import get_best_robot_token -from pr_info import PRInfo -from build_download_helper import download_shared_build -from upload_result_helper import upload_results -from docker_pull_helper import get_image_with_version -from commit_status_helper import post_commit_status -from clickhouse_helper import ClickHouseHelper, prepare_tests_results_for_clickhouse -from stopwatch import Stopwatch -from rerun_helper import RerunHelper - - -DOCKER_IMAGE = "clickhouse/split-build-smoke-test" -DOWNLOAD_RETRIES_COUNT = 5 -RESULT_LOG_NAME = "run.log" -CHECK_NAME = "Split build smoke test" - - -def process_result(result_folder, server_log_folder): - status = "success" - description = "Server started and responded" - summary = [("Smoke test", "OK")] - with open(os.path.join(result_folder, RESULT_LOG_NAME), "r") as run_log: - lines = run_log.read().split("\n") - if not lines or lines[0].strip() != "OK": - status = "failure" - logging.info("Lines is not ok: %s", str("\n".join(lines))) - summary = [("Smoke test", "FAIL")] - description = "Server failed to respond, see result in logs" - - result_logs = [] - server_log_path = os.path.join(server_log_folder, "clickhouse-server.log") - stderr_log_path = os.path.join(result_folder, "stderr.log") - client_stderr_log_path = os.path.join(result_folder, "clientstderr.log") - run_log_path = os.path.join(result_folder, RESULT_LOG_NAME) - - for path in [ - server_log_path, - stderr_log_path, - client_stderr_log_path, - run_log_path, - ]: - if os.path.exists(path): - result_logs.append(path) - - return status, description, summary, result_logs - - -def get_run_command(build_path, result_folder, server_log_folder, docker_image): - return ( - f"docker run --network=host --volume={build_path}:/package_folder" - f" --volume={server_log_folder}:/var/log/clickhouse-server" - f" --volume={result_folder}:/test_output" - f" {docker_image} >{result_folder}/{RESULT_LOG_NAME}" - ) - - -if __name__ == "__main__": - logging.basicConfig(level=logging.INFO) - - stopwatch = Stopwatch() - - temp_path = TEMP_PATH - repo_path = REPO_COPY - reports_path = REPORTS_PATH - - pr_info = PRInfo() - - gh = Github(get_best_robot_token(), per_page=100) - - rerun_helper = RerunHelper(gh, pr_info, CHECK_NAME) - if rerun_helper.is_already_finished_by_status(): - logging.info("Check is already finished according to github status, exiting") - sys.exit(0) - - for root, _, files in os.walk(reports_path): - for f in files: - if f == "changed_images.json": - images_path = os.path.join(root, "changed_images.json") - break - - docker_image = get_image_with_version(reports_path, DOCKER_IMAGE) - - packages_path = os.path.join(temp_path, "packages") - if not os.path.exists(packages_path): - os.makedirs(packages_path) - - download_shared_build(CHECK_NAME, reports_path, packages_path) - - server_log_path = os.path.join(temp_path, "server_log") - if not os.path.exists(server_log_path): - os.makedirs(server_log_path) - - result_path = os.path.join(temp_path, "result_path") - if not os.path.exists(result_path): - os.makedirs(result_path) - - run_command = get_run_command( - packages_path, result_path, server_log_path, docker_image - ) - - logging.info("Going to run command %s", run_command) - with subprocess.Popen(run_command, shell=True) as process: - retcode = process.wait() - if retcode == 0: - logging.info("Run successfully") - else: - logging.info("Run failed") - - subprocess.check_call(f"sudo chown -R ubuntu:ubuntu {temp_path}", shell=True) - print("Result path", os.listdir(result_path)) - print("Server log path", os.listdir(server_log_path)) - - state, description, test_results, additional_logs = process_result( - result_path, server_log_path - ) - - ch_helper = ClickHouseHelper() - s3_helper = S3Helper() - report_url = upload_results( - s3_helper, - pr_info.number, - pr_info.sha, - test_results, - additional_logs, - CHECK_NAME, - ) - print(f"::notice ::Report url: {report_url}") - post_commit_status(gh, pr_info.sha, CHECK_NAME, description, state, report_url) - - prepared_events = prepare_tests_results_for_clickhouse( - pr_info, - test_results, - state, - stopwatch.duration_seconds, - stopwatch.start_time_str, - report_url, - CHECK_NAME, - ) - - ch_helper.insert_events_into(db="default", table="checks", events=prepared_events) - - if state == "error": - sys.exit(1) diff --git a/tests/ci/sqlancer_check.py b/tests/ci/sqlancer_check.py index 5e94969d4b1..ce6d89a7267 100644 --- a/tests/ci/sqlancer_check.py +++ b/tests/ci/sqlancer_check.py @@ -95,7 +95,7 @@ if __name__ == "__main__": run_command = get_run_command(build_url, workspace_path, docker_image) logging.info("Going to run %s", run_command) - run_log_path = os.path.join(workspace_path, "runlog.log") + run_log_path = os.path.join(workspace_path, "run.log") with open(run_log_path, "w", encoding="utf-8") as log: with subprocess.Popen( run_command, shell=True, stderr=log, stdout=log diff --git a/tests/ci/stress_check.py b/tests/ci/stress_check.py index b7f74c5aeb7..37277538867 100644 --- a/tests/ci/stress_check.py +++ b/tests/ci/stress_check.py @@ -34,6 +34,7 @@ def get_run_command( "docker run --cap-add=SYS_PTRACE " # a static link, don't use S3_URL or S3_DOWNLOAD "-e S3_URL='https://s3.amazonaws.com/clickhouse-datasets' " + f"-e DISABLE_BC_CHECK={os.environ.get('DISABLE_BC_CHECK', '0')} " # For dmesg and sysctl "--privileged " f"--volume={build_path}:/package_folder " @@ -138,7 +139,7 @@ if __name__ == "__main__": if not os.path.exists(result_path): os.makedirs(result_path) - run_log_path = os.path.join(temp_path, "runlog.log") + run_log_path = os.path.join(temp_path, "run.log") run_command = get_run_command( packages_path, result_path, repo_tests_path, server_log_path, docker_image diff --git a/tests/ci/tests/docker_images.json b/tests/ci/tests/docker_images.json index ca5c516bccb..8c13c760168 100644 --- a/tests/ci/tests/docker_images.json +++ b/tests/ci/tests/docker_images.json @@ -6,8 +6,6 @@ "docker/packager/binary": { "name": "clickhouse/binary-builder", "dependent": [ - "docker/test/split_build_smoke_test", - "docker/test/pvs", "docker/test/codebrowser" ] }, @@ -31,10 +29,6 @@ "name": "clickhouse/performance-comparison", "dependent": [] }, - "docker/test/pvs": { - "name": "clickhouse/pvs-test", - "dependent": [] - }, "docker/test/util": { "name": "clickhouse/test-util", "dependent": [ @@ -63,10 +57,6 @@ "name": "clickhouse/stress-test", "dependent": [] }, - "docker/test/split_build_smoke_test": { - "name": "clickhouse/split-build-smoke-test", - "dependent": [] - }, "docker/test/codebrowser": { "name": "clickhouse/codebrowser", "dependent": [] diff --git a/tests/ci/unit_tests_check.py b/tests/ci/unit_tests_check.py index 4777296da18..7c4fa0e9fe4 100644 --- a/tests/ci/unit_tests_check.py +++ b/tests/ci/unit_tests_check.py @@ -140,7 +140,7 @@ if __name__ == "__main__": run_command = f"docker run --cap-add=SYS_PTRACE --volume={tests_binary_path}:/unit_tests_dbms --volume={test_output}:/test_output {docker_image}" - run_log_path = os.path.join(test_output, "runlog.log") + run_log_path = os.path.join(test_output, "run.log") logging.info("Going to run func tests: %s", run_command) diff --git a/tests/ci/workflow_approve_rerun_lambda/app.py b/tests/ci/workflow_approve_rerun_lambda/app.py index d285e29943d..9fc4266d9d4 100644 --- a/tests/ci/workflow_approve_rerun_lambda/app.py +++ b/tests/ci/workflow_approve_rerun_lambda/app.py @@ -25,7 +25,7 @@ MAX_RETRY = 5 # Number of times a check can re-run as a whole. # It is needed, because we are using AWS "spot" instances, that are terminated often -MAX_WORKFLOW_RERUN = 20 +MAX_WORKFLOW_RERUN = 30 WorkflowDescription = namedtuple( "WorkflowDescription", @@ -366,6 +366,7 @@ def check_need_to_rerun(workflow_description, token): jobs = get_workflow_jobs(workflow_description, token) print("Got jobs", len(jobs)) for job in jobs: + print(f"Job {job['name']} has a conclusion '{job['conclusion']}'") if job["conclusion"] not in ("success", "skipped"): print("Job", job["name"], "failed, checking steps") for step in job["steps"]: diff --git a/tests/clickhouse-test b/tests/clickhouse-test index 6e912c7ca10..2709ad1eecf 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -111,12 +111,21 @@ def clickhouse_execute_http( if default_format is not None: params["default_format"] = default_format - client.request( - "POST", - f"/?{base_args.client_options_query_str}{urllib.parse.urlencode(params)}", - ) - res = client.getresponse() - data = res.read() + for i in range(MAX_RETRIES): + try: + client.request( + "POST", + f"/?{base_args.client_options_query_str}{urllib.parse.urlencode(params)}", + ) + res = client.getresponse() + data = res.read() + break + except Exception as ex: + if i == MAX_RETRIES - 1: + raise ex + + sleep(i + 1) + if res.status != 200: raise HTTPError(data.decode(), res.status) @@ -1001,18 +1010,28 @@ class TestCase: seconds_left = max( args.timeout - (datetime.now() - start_time).total_seconds(), 20 ) + drop_database_query = "DROP DATABASE " + database + if args.replicated_database: + drop_database_query += " ON CLUSTER test_cluster_database_replicated" + try: - drop_database_query = "DROP DATABASE " + database - if args.replicated_database: - drop_database_query += " ON CLUSTER test_cluster_database_replicated" - clickhouse_execute( - args, - drop_database_query, - timeout=seconds_left, - settings={ - "log_comment": args.testcase_basename, - }, - ) + # It's possible to get an error "New table appeared in database being dropped or detached. Try again." + for _ in range(1, 60): + try: + clickhouse_execute( + args, + drop_database_query, + timeout=seconds_left, + settings={ + "log_comment": args.testcase_basename, + }, + ) + except HTTPError as e: + if need_retry(args, e.message, e.message, 0): + continue + raise + break + except socket.timeout: total_time = (datetime.now() - start_time).total_seconds() return ( @@ -1099,7 +1118,7 @@ class TestCase: args, self.get_description_from_exception_info(sys.exc_info()) ), ) - except (ConnectionRefusedError, ConnectionResetError): + except (ConnectionError, http.client.ImproperConnectionState): return TestResult( self.name, TestStatus.FAIL, @@ -1525,7 +1544,7 @@ def check_server_started(args): print(" OK") sys.stdout.flush() return True - except (ConnectionRefusedError, ConnectionResetError): + except (ConnectionError, http.client.ImproperConnectionState): print(".", end="") sys.stdout.flush() retry_count -= 1 @@ -1535,7 +1554,7 @@ def check_server_started(args): print("\nConnection timeout, will not retry") break except Exception as e: - print("\nUexpected exception, will not retry: ", str(e)) + print("\nUexpected exception, will not retry: ", type(e).__name__, ": ", str(e)) break print("\nAll connection tries failed") @@ -1801,8 +1820,7 @@ def main(args): global restarted_tests if not check_server_started(args): - msg = "Server is not responding. Cannot execute 'SELECT 1' query. \ - If you are using split build, you have to specify -c option." + msg = "Server is not responding. Cannot execute 'SELECT 1' query." if args.hung_check: print(msg) pid = get_server_pid() @@ -1987,14 +2005,14 @@ if __name__ == "__main__": "-b", "--binary", default="clickhouse", - help="Path to clickhouse (if monolithic build, clickhouse-server otherwise) " + help="Path to clickhouse" "binary or name of binary in PATH", ) parser.add_argument( "-c", "--client", - help="Path to clickhouse-client (if split build, useless otherwise) binary of " + help="Path to clickhouse-client, this option is useless" "name of binary in PATH", ) @@ -2261,7 +2279,7 @@ if __name__ == "__main__": if find_binary(args.binary + "-client"): args.client = args.binary + "-client" - print("Using " + args.client + " as client program (expecting split build)") + print("Using " + args.client + " as client program") elif find_binary(args.binary): args.client = args.binary + " client" diff --git a/tests/config/config.d/zookeeper_fault_injection.xml b/tests/config/config.d/zookeeper_fault_injection.xml index 45d3cc8193d..1f13155a130 100644 --- a/tests/config/config.d/zookeeper_fault_injection.xml +++ b/tests/config/config.d/zookeeper_fault_injection.xml @@ -4,7 +4,6 @@ localhost 9181 - + true + + + encrypted + disk_s3 + 1234567812345678 + + + + diff --git a/tests/integration/test_endpoint_macro_substitution/configs/users.xml b/tests/integration/test_endpoint_macro_substitution/configs/users.xml new file mode 100644 index 00000000000..4555a2ed494 --- /dev/null +++ b/tests/integration/test_endpoint_macro_substitution/configs/users.xml @@ -0,0 +1,22 @@ + + + + + + + + + + + ::/0 + + default + default + + + + + + + + diff --git a/tests/integration/test_endpoint_macro_substitution/test.py b/tests/integration/test_endpoint_macro_substitution/test.py new file mode 100644 index 00000000000..42a8ddbda84 --- /dev/null +++ b/tests/integration/test_endpoint_macro_substitution/test.py @@ -0,0 +1,81 @@ +import pytest +from helpers.cluster import ClickHouseCluster +from helpers.test_tools import TSV +from pyhdfs import HdfsClient + +disk_types = { + "default": "local", + "disk_s3": "s3", + "disk_hdfs": "hdfs", + "disk_encrypted": "s3", +} + + +@pytest.fixture(scope="module") +def cluster(): + try: + cluster = ClickHouseCluster(__file__) + cluster.add_instance( + "node", + main_configs=["configs/storage.xml", "configs/macros.xml"], + with_minio=True, + with_hdfs=True, + ) + cluster.start() + + fs = HdfsClient(hosts=cluster.hdfs_ip) + fs.mkdirs("/clickhouse") + + yield cluster + finally: + cluster.shutdown() + + +def test_different_types(cluster): + node = cluster.instances["node"] + fs = HdfsClient(hosts=cluster.hdfs_ip) + + response = TSV.toMat(node.query("SELECT * FROM system.disks FORMAT TSVWithNames")) + + assert len(response) > len(disk_types) # at least one extra line for header + + name_col_ix = response[0].index("name") + type_col_ix = response[0].index("type") + encrypted_col_ix = response[0].index("is_encrypted") + + for fields in response[1:]: # skip header + assert len(fields) >= 7 + assert ( + disk_types.get(fields[name_col_ix], "UNKNOWN") == fields[type_col_ix] + ), f"Wrong type ({fields[type_col_ix]}) for disk {fields[name_col_ix]}!" + if "encrypted" in fields[name_col_ix]: + assert ( + fields[encrypted_col_ix] == "1" + ), f"{fields[name_col_ix]} expected to be encrypted!" + else: + assert ( + fields[encrypted_col_ix] == "0" + ), f"{fields[name_col_ix]} expected to be non-encrypted!" + + +def test_select_by_type(cluster): + node = cluster.instances["node"] + fs = HdfsClient(hosts=cluster.hdfs_ip) + + for name, disk_type in list(disk_types.items()): + if disk_type != "s3": + assert ( + node.query( + "SELECT name FROM system.disks WHERE type='" + disk_type + "'" + ) + == name + "\n" + ) + else: + assert ( + node.query( + "SELECT name FROM system.disks WHERE type='" + + disk_type + + "' ORDER BY name" + ) + == "disk_encrypted\ndisk_s3\n" + ) diff --git a/tests/integration/test_grpc_protocol/test.py b/tests/integration/test_grpc_protocol/test.py index a3f2650eac7..8c7e958bbd9 100644 --- a/tests/integration/test_grpc_protocol/test.py +++ b/tests/integration/test_grpc_protocol/test.py @@ -37,7 +37,13 @@ import clickhouse_grpc_pb2_grpc config_dir = os.path.join(SCRIPT_DIR, "./configs") cluster = ClickHouseCluster(__file__) -node = cluster.add_instance("node", main_configs=["configs/grpc_config.xml"]) +node = cluster.add_instance( + "node", + main_configs=["configs/grpc_config.xml"], + # Bug in TSAN reproduces in this test https://github.com/grpc/grpc/issues/29550#issuecomment-1188085387 + # second_deadlock_stack -- just ordinary option we use everywhere, don't want to overwrite it + env_variables={"TSAN_OPTIONS": "report_atomic_races=0 second_deadlock_stack=1"}, +) main_channel = None @@ -368,23 +374,24 @@ progress { read_bytes: 16 total_rows_to_read: 8 } -, output: "0\\t0\\n1\\t0\\n" +, output: "0\\t0\\n1\\t0" , progress { read_rows: 2 read_bytes: 16 } -, output: "2\\t0\\n3\\t0\\n" +, output: "\\n2\\t0\\n3\\t0" , progress { read_rows: 2 read_bytes: 16 } -, output: "4\\t0\\n5\\t0\\n" +, output: "\\n4\\t0\\n5\\t0" , progress { read_rows: 2 read_bytes: 16 } -, output: "6\\t0\\n7\\t0\\n" -, stats { +, output: "\\n6\\t0\\n7\\t0" +, output: "\\n" +stats { rows: 8 blocks: 4 allocated_bytes: 1092 diff --git a/tests/integration/test_grpc_protocol_ssl/test.py b/tests/integration/test_grpc_protocol_ssl/test.py index 80599126dcf..2c2a7f6c61e 100644 --- a/tests/integration/test_grpc_protocol_ssl/test.py +++ b/tests/integration/test_grpc_protocol_ssl/test.py @@ -42,6 +42,9 @@ node = cluster.add_instance( "configs/server-cert.pem", "configs/ca-cert.pem", ], + # Bug in TSAN reproduces in this test https://github.com/grpc/grpc/issues/29550#issuecomment-1188085387 + # second_deadlock_stack -- just ordinary option we use everywhere, don't want to overwrite it + env_variables={"TSAN_OPTIONS": "report_atomic_races=0 second_deadlock_stack=1"}, ) diff --git a/tests/integration/test_jbod_ha/test.py b/tests/integration/test_jbod_ha/test.py index 3dec61985b1..d82fca32f55 100644 --- a/tests/integration/test_jbod_ha/test.py +++ b/tests/integration/test_jbod_ha/test.py @@ -97,8 +97,8 @@ def test_jbod_ha(start_cluster): privileged=True, user="root", ) - node1.query("system restart disk jbod1") + node1.restart_clickhouse() time.sleep(5) assert ( diff --git a/tests/integration/test_keeper_auth/test.py b/tests/integration/test_keeper_auth/test.py index 364d93dfc53..e247984cc6a 100644 --- a/tests/integration/test_keeper_auth/test.py +++ b/tests/integration/test_keeper_auth/test.py @@ -1,4 +1,5 @@ import pytest +import time from helpers.cluster import ClickHouseCluster from kazoo.client import KazooClient, KazooState from kazoo.security import ACL, make_digest_acl, make_acl @@ -45,301 +46,405 @@ def get_genuine_zk(): return cluster.get_kazoo_client("zoo1") +# FIXME: this sleep is a workaround for the bug that is fixed by this patch [1]. +# +# The problem is that after AUTH_FAILED (that is caused by the line above) +# there can be a race, because of which, stop() will hang indefinitely. +# +# [1]: https://github.com/python-zk/kazoo/pull/688 +def zk_auth_failure_workaround(): + time.sleep(2) + + +def zk_stop_and_close(zk): + if zk: + zk.stop() + zk.close() + + @pytest.mark.parametrize(("get_zk"), [get_genuine_zk, get_fake_zk]) def test_remove_acl(started_cluster, get_zk): - auth_connection = get_zk() + auth_connection = None - auth_connection.add_auth("digest", "user1:password1") + try: + auth_connection = get_zk() - # Consistent with zookeeper, accept generated digest - auth_connection.create( - "/test_remove_acl1", - b"dataX", - acl=[ - make_acl( - "digest", - "user1:XDkd2dsEuhc9ImU3q8pa8UOdtpI=", - read=True, - write=False, - create=False, - delete=False, - admin=False, - ) - ], - ) - auth_connection.create( - "/test_remove_acl2", - b"dataX", - acl=[ - make_acl( - "digest", - "user1:XDkd2dsEuhc9ImU3q8pa8UOdtpI=", - read=True, - write=True, - create=False, - delete=False, - admin=False, - ) - ], - ) - auth_connection.create( - "/test_remove_acl3", - b"dataX", - acl=[make_acl("digest", "user1:XDkd2dsEuhc9ImU3q8pa8UOdtpI=", all=True)], - ) + auth_connection.add_auth("digest", "user1:password1") - auth_connection.delete("/test_remove_acl2") + # Consistent with zookeeper, accept generated digest + auth_connection.create( + "/test_remove_acl1", + b"dataX", + acl=[ + make_acl( + "digest", + "user1:XDkd2dsEuhc9ImU3q8pa8UOdtpI=", + read=True, + write=False, + create=False, + delete=False, + admin=False, + ) + ], + ) + auth_connection.create( + "/test_remove_acl2", + b"dataX", + acl=[ + make_acl( + "digest", + "user1:XDkd2dsEuhc9ImU3q8pa8UOdtpI=", + read=True, + write=True, + create=False, + delete=False, + admin=False, + ) + ], + ) + auth_connection.create( + "/test_remove_acl3", + b"dataX", + acl=[make_acl("digest", "user1:XDkd2dsEuhc9ImU3q8pa8UOdtpI=", all=True)], + ) - auth_connection.create( - "/test_remove_acl4", - b"dataX", - acl=[ - make_acl( - "digest", - "user1:XDkd2dsEuhc9ImU3q8pa8UOdtpI=", - read=True, - write=True, - create=True, - delete=False, - admin=False, - ) - ], - ) + auth_connection.delete("/test_remove_acl2") - acls, stat = auth_connection.get_acls("/test_remove_acl3") + auth_connection.create( + "/test_remove_acl4", + b"dataX", + acl=[ + make_acl( + "digest", + "user1:XDkd2dsEuhc9ImU3q8pa8UOdtpI=", + read=True, + write=True, + create=True, + delete=False, + admin=False, + ) + ], + ) - assert stat.aversion == 0 - assert len(acls) == 1 - for acl in acls: - assert acl.acl_list == ["ALL"] - assert acl.perms == 31 + acls, stat = auth_connection.get_acls("/test_remove_acl3") + + assert stat.aversion == 0 + assert len(acls) == 1 + for acl in acls: + assert acl.acl_list == ["ALL"] + assert acl.perms == 31 + finally: + zk_stop_and_close(auth_connection) @pytest.mark.parametrize(("get_zk"), [get_genuine_zk, get_fake_zk]) def test_digest_auth_basic(started_cluster, get_zk): - auth_connection = get_zk() + try: + auth_connection = None + no_auth_connection = None - auth_connection.add_auth("digest", "user1:password1") + auth_connection = get_zk() + auth_connection.add_auth("digest", "user1:password1") - auth_connection.create("/test_no_acl", b"") - auth_connection.create( - "/test_all_acl", b"data", acl=[make_acl("auth", "", all=True)] - ) - # Consistent with zookeeper, accept generated digest - auth_connection.create( - "/test_all_digest_acl", - b"dataX", - acl=[make_acl("digest", "user1:XDkd2dsEuhc9ImU3q8pa8UOdtpI=", all=True)], - ) + auth_connection.create("/test_no_acl", b"") + auth_connection.create( + "/test_all_acl", b"data", acl=[make_acl("auth", "", all=True)] + ) + # Consistent with zookeeper, accept generated digest + auth_connection.create( + "/test_all_digest_acl", + b"dataX", + acl=[make_acl("digest", "user1:XDkd2dsEuhc9ImU3q8pa8UOdtpI=", all=True)], + ) - assert auth_connection.get("/test_all_acl")[0] == b"data" - assert auth_connection.get("/test_all_digest_acl")[0] == b"dataX" + assert auth_connection.get("/test_all_acl")[0] == b"data" + assert auth_connection.get("/test_all_digest_acl")[0] == b"dataX" - no_auth_connection = get_zk() - no_auth_connection.set("/test_no_acl", b"hello") + no_auth_connection = get_zk() + no_auth_connection.set("/test_no_acl", b"hello") + assert no_auth_connection.get("/test_no_acl")[0] == b"hello" - # no ACL, so cannot access these nodes - assert no_auth_connection.get("/test_no_acl")[0] == b"hello" + # no ACL, so cannot access these nodes + with pytest.raises(NoAuthError): + no_auth_connection.set("/test_all_acl", b"hello") - with pytest.raises(NoAuthError): - no_auth_connection.set("/test_all_acl", b"hello") + with pytest.raises(NoAuthError): + no_auth_connection.get("/test_all_acl") - with pytest.raises(NoAuthError): - no_auth_connection.get("/test_all_acl") + with pytest.raises(NoAuthError): + no_auth_connection.get("/test_all_digest_acl") - with pytest.raises(NoAuthError): - no_auth_connection.get("/test_all_digest_acl") + # still doesn't help + with pytest.raises(AuthFailedError): + no_auth_connection.add_auth("world", "anyone") - # still doesn't help - with pytest.raises(AuthFailedError): - no_auth_connection.add_auth("world", "anyone") + zk_auth_failure_workaround() + zk_stop_and_close(no_auth_connection) + # session became broken, reconnect + no_auth_connection = get_zk() - # session became broken, reconnect - no_auth_connection = get_zk() + # wrong auth + no_auth_connection.add_auth("digest", "user2:password2") - # wrong auth - no_auth_connection.add_auth("digest", "user2:password2") + with pytest.raises(NoAuthError): + no_auth_connection.set("/test_all_acl", b"hello") - with pytest.raises(NoAuthError): - no_auth_connection.set("/test_all_acl", b"hello") + with pytest.raises(NoAuthError): + no_auth_connection.set("/test_all_acl", b"hello") - with pytest.raises(NoAuthError): - no_auth_connection.set("/test_all_acl", b"hello") + with pytest.raises(NoAuthError): + no_auth_connection.get("/test_all_acl") - with pytest.raises(NoAuthError): - no_auth_connection.get("/test_all_acl") + with pytest.raises(NoAuthError): + no_auth_connection.get("/test_all_digest_acl") - with pytest.raises(NoAuthError): - no_auth_connection.get("/test_all_digest_acl") + # but can access some non restricted nodes + no_auth_connection.create("/some_allowed_node", b"data") - # but can access some non restricted nodes - no_auth_connection.create("/some_allowed_node", b"data") - - # auth added, go on - no_auth_connection.add_auth("digest", "user1:password1") - for path in ["/test_no_acl", "/test_all_acl"]: - no_auth_connection.set(path, b"auth_added") - assert no_auth_connection.get(path)[0] == b"auth_added" + # auth added, go on + no_auth_connection.add_auth("digest", "user1:password1") + for path in ["/test_no_acl", "/test_all_acl"]: + no_auth_connection.set(path, b"auth_added") + assert no_auth_connection.get(path)[0] == b"auth_added" + finally: + zk_stop_and_close(auth_connection) + zk_stop_and_close(no_auth_connection) def test_super_auth(started_cluster): auth_connection = get_fake_zk() - - auth_connection.add_auth("digest", "user1:password1") - - auth_connection.create("/test_super_no_acl", b"") - auth_connection.create( - "/test_super_all_acl", b"data", acl=[make_acl("auth", "", all=True)] - ) + try: + auth_connection.add_auth("digest", "user1:password1") + auth_connection.create("/test_super_no_acl", b"") + auth_connection.create( + "/test_super_all_acl", b"data", acl=[make_acl("auth", "", all=True)] + ) + finally: + zk_stop_and_close(auth_connection) super_connection = get_fake_zk() - super_connection.add_auth("digest", "super:admin") - - for path in ["/test_super_no_acl", "/test_super_all_acl"]: - super_connection.set(path, b"value") - assert super_connection.get(path)[0] == b"value" + try: + super_connection.add_auth("digest", "super:admin") + for path in ["/test_super_no_acl", "/test_super_all_acl"]: + super_connection.set(path, b"value") + assert super_connection.get(path)[0] == b"value" + finally: + zk_stop_and_close(super_connection) @pytest.mark.parametrize(("get_zk"), [get_genuine_zk, get_fake_zk]) def test_digest_auth_multiple(started_cluster, get_zk): - auth_connection = get_zk() - auth_connection.add_auth("digest", "user1:password1") - auth_connection.add_auth("digest", "user2:password2") - auth_connection.add_auth("digest", "user3:password3") + auth_connection = None + one_auth_connection = None + other_auth_connection = None - auth_connection.create( - "/test_multi_all_acl", b"data", acl=[make_acl("auth", "", all=True)] - ) + try: + auth_connection = get_zk() + auth_connection.add_auth("digest", "user1:password1") + auth_connection.add_auth("digest", "user2:password2") + auth_connection.add_auth("digest", "user3:password3") - one_auth_connection = get_zk() - one_auth_connection.add_auth("digest", "user1:password1") + auth_connection.create( + "/test_multi_all_acl", b"data", acl=[make_acl("auth", "", all=True)] + ) - one_auth_connection.set("/test_multi_all_acl", b"X") - assert one_auth_connection.get("/test_multi_all_acl")[0] == b"X" + one_auth_connection = get_zk() + one_auth_connection.add_auth("digest", "user1:password1") - other_auth_connection = get_zk() - other_auth_connection.add_auth("digest", "user2:password2") + one_auth_connection.set("/test_multi_all_acl", b"X") + assert one_auth_connection.get("/test_multi_all_acl")[0] == b"X" - other_auth_connection.set("/test_multi_all_acl", b"Y") + other_auth_connection = get_zk() + other_auth_connection.add_auth("digest", "user2:password2") - assert other_auth_connection.get("/test_multi_all_acl")[0] == b"Y" + other_auth_connection.set("/test_multi_all_acl", b"Y") + + assert other_auth_connection.get("/test_multi_all_acl")[0] == b"Y" + finally: + zk_stop_and_close(auth_connection) + zk_stop_and_close(one_auth_connection) + zk_stop_and_close(other_auth_connection) @pytest.mark.parametrize(("get_zk"), [get_genuine_zk, get_fake_zk]) def test_partial_auth(started_cluster, get_zk): auth_connection = get_zk() - auth_connection.add_auth("digest", "user1:password1") + try: + auth_connection.add_auth("digest", "user1:password1") - auth_connection.create( - "/test_partial_acl", - b"data", - acl=[ - make_acl( - "auth", "", read=False, write=True, create=True, delete=True, admin=True - ) - ], - ) + auth_connection.create( + "/test_partial_acl", + b"data", + acl=[ + make_acl( + "auth", + "", + read=False, + write=True, + create=True, + delete=True, + admin=True, + ) + ], + ) - auth_connection.set("/test_partial_acl", b"X") - auth_connection.create( - "/test_partial_acl/subnode", - b"X", - acl=[ - make_acl( - "auth", "", read=False, write=True, create=True, delete=True, admin=True - ) - ], - ) + auth_connection.set("/test_partial_acl", b"X") + auth_connection.create( + "/test_partial_acl/subnode", + b"X", + acl=[ + make_acl( + "auth", + "", + read=False, + write=True, + create=True, + delete=True, + admin=True, + ) + ], + ) - with pytest.raises(NoAuthError): - auth_connection.get("/test_partial_acl") + with pytest.raises(NoAuthError): + auth_connection.get("/test_partial_acl") - with pytest.raises(NoAuthError): - auth_connection.get_children("/test_partial_acl") + with pytest.raises(NoAuthError): + auth_connection.get_children("/test_partial_acl") - # exists works without read perm - assert auth_connection.exists("/test_partial_acl") is not None + # exists works without read perm + assert auth_connection.exists("/test_partial_acl") is not None - auth_connection.create( - "/test_partial_acl_create", - b"data", - acl=[ - make_acl( - "auth", "", read=True, write=True, create=False, delete=True, admin=True - ) - ], - ) - with pytest.raises(NoAuthError): - auth_connection.create("/test_partial_acl_create/subnode") + auth_connection.create( + "/test_partial_acl_create", + b"data", + acl=[ + make_acl( + "auth", + "", + read=True, + write=True, + create=False, + delete=True, + admin=True, + ) + ], + ) + with pytest.raises(NoAuthError): + auth_connection.create("/test_partial_acl_create/subnode") - auth_connection.create( - "/test_partial_acl_set", - b"data", - acl=[ - make_acl( - "auth", "", read=True, write=False, create=True, delete=True, admin=True - ) - ], - ) - with pytest.raises(NoAuthError): - auth_connection.set("/test_partial_acl_set", b"X") + auth_connection.create( + "/test_partial_acl_set", + b"data", + acl=[ + make_acl( + "auth", + "", + read=True, + write=False, + create=True, + delete=True, + admin=True, + ) + ], + ) + with pytest.raises(NoAuthError): + auth_connection.set("/test_partial_acl_set", b"X") - # not allowed to delete child node - auth_connection.create( - "/test_partial_acl_delete", - b"data", - acl=[ - make_acl( - "auth", "", read=True, write=True, create=True, delete=False, admin=True - ) - ], - ) - auth_connection.create("/test_partial_acl_delete/subnode") - with pytest.raises(NoAuthError): - auth_connection.delete("/test_partial_acl_delete/subnode") + # not allowed to delete child node + auth_connection.create( + "/test_partial_acl_delete", + b"data", + acl=[ + make_acl( + "auth", + "", + read=True, + write=True, + create=True, + delete=False, + admin=True, + ) + ], + ) + auth_connection.create("/test_partial_acl_delete/subnode") + with pytest.raises(NoAuthError): + auth_connection.delete("/test_partial_acl_delete/subnode") + finally: + zk_stop_and_close(auth_connection) -def test_bad_auth(started_cluster): +def test_bad_auth_1(started_cluster): auth_connection = get_fake_zk() - with pytest.raises(AuthFailedError): auth_connection.add_auth("world", "anyone") + zk_auth_failure_workaround() + zk_stop_and_close(auth_connection) + +def test_bad_auth_2(started_cluster): auth_connection = get_fake_zk() with pytest.raises(AuthFailedError): print("Sending 1") auth_connection.add_auth("adssagf", "user1:password1") + zk_auth_failure_workaround() + zk_stop_and_close(auth_connection) + +def test_bad_auth_3(started_cluster): auth_connection = get_fake_zk() with pytest.raises(AuthFailedError): print("Sending 2") auth_connection.add_auth("digest", "") + zk_auth_failure_workaround() + zk_stop_and_close(auth_connection) + +def test_bad_auth_4(started_cluster): auth_connection = get_fake_zk() with pytest.raises(AuthFailedError): print("Sending 3") auth_connection.add_auth("", "user1:password1") + zk_auth_failure_workaround() + zk_stop_and_close(auth_connection) + +def test_bad_auth_5(started_cluster): auth_connection = get_fake_zk() with pytest.raises(AuthFailedError): print("Sending 4") auth_connection.add_auth("digest", "user1") + zk_auth_failure_workaround() + zk_stop_and_close(auth_connection) + +def test_bad_auth_6(started_cluster): auth_connection = get_fake_zk() with pytest.raises(AuthFailedError): print("Sending 5") auth_connection.add_auth("digest", "user1:password:otherpassword") + zk_auth_failure_workaround() + zk_stop_and_close(auth_connection) + +def test_bad_auth_7(started_cluster): auth_connection = get_fake_zk() with pytest.raises(AuthFailedError): print("Sending 6") auth_connection.add_auth("auth", "user1:password") + zk_auth_failure_workaround() + zk_stop_and_close(auth_connection) + +def test_bad_auth_8(started_cluster): auth_connection = get_fake_zk() with pytest.raises(AuthFailedError): print("Sending 7") auth_connection.add_auth("world", "somebody") + zk_auth_failure_workaround() + zk_stop_and_close(auth_connection) + +def test_bad_auth_9(started_cluster): auth_connection = get_fake_zk() with pytest.raises(InvalidACLError): print("Sending 8") @@ -358,7 +463,11 @@ def test_bad_auth(started_cluster): ) ], ) + zk_auth_failure_workaround() + zk_stop_and_close(auth_connection) + +def test_bad_auth_10(started_cluster): auth_connection = get_fake_zk() with pytest.raises(InvalidACLError): print("Sending 9") @@ -377,7 +486,11 @@ def test_bad_auth(started_cluster): ) ], ) + zk_auth_failure_workaround() + zk_stop_and_close(auth_connection) + +def test_bad_auth_11(started_cluster): auth_connection = get_fake_zk() with pytest.raises(InvalidACLError): print("Sending 10") @@ -390,7 +503,11 @@ def test_bad_auth(started_cluster): ) ], ) + zk_auth_failure_workaround() + zk_stop_and_close(auth_connection) + +def test_bad_auth_12(started_cluster): auth_connection = get_fake_zk() with pytest.raises(InvalidACLError): print("Sending 11") @@ -409,7 +526,11 @@ def test_bad_auth(started_cluster): ) ], ) + zk_auth_failure_workaround() + zk_stop_and_close(auth_connection) + +def test_bad_auth_13(started_cluster): auth_connection = get_fake_zk() with pytest.raises(InvalidACLError): print("Sending 12") @@ -428,117 +549,145 @@ def test_bad_auth(started_cluster): ) ], ) + zk_auth_failure_workaround() + zk_stop_and_close(auth_connection) def test_auth_snapshot(started_cluster): - connection = get_fake_zk() - connection.add_auth("digest", "user1:password1") + connection = None + connection1 = None + connection2 = None - connection.create( - "/test_snapshot_acl", b"data", acl=[make_acl("auth", "", all=True)] - ) + try: + connection = get_fake_zk() + connection.add_auth("digest", "user1:password1") - connection1 = get_fake_zk() - connection1.add_auth("digest", "user2:password2") - - connection1.create( - "/test_snapshot_acl1", b"data", acl=[make_acl("auth", "", all=True)] - ) - - connection2 = get_fake_zk() - - connection2.create("/test_snapshot_acl2", b"data") - - for i in range(100): connection.create( - f"/test_snapshot_acl/path{i}", b"data", acl=[make_acl("auth", "", all=True)] + "/test_snapshot_acl", b"data", acl=[make_acl("auth", "", all=True)] ) - node.restart_clickhouse() + connection1 = get_fake_zk() + connection1.add_auth("digest", "user2:password2") - connection = get_fake_zk() + connection1.create( + "/test_snapshot_acl1", b"data", acl=[make_acl("auth", "", all=True)] + ) - with pytest.raises(NoAuthError): - connection.get("/test_snapshot_acl") + connection2 = get_fake_zk() - connection.add_auth("digest", "user1:password1") + connection2.create("/test_snapshot_acl2", b"data") - assert connection.get("/test_snapshot_acl")[0] == b"data" + for i in range(100): + connection.create( + f"/test_snapshot_acl/path{i}", + b"data", + acl=[make_acl("auth", "", all=True)], + ) - with pytest.raises(NoAuthError): - connection.get("/test_snapshot_acl1") + node.restart_clickhouse() - assert connection.get("/test_snapshot_acl2")[0] == b"data" + zk_stop_and_close(connection) + connection = get_fake_zk() - for i in range(100): - assert connection.get(f"/test_snapshot_acl/path{i}")[0] == b"data" + with pytest.raises(NoAuthError): + connection.get("/test_snapshot_acl") - connection1 = get_fake_zk() - connection1.add_auth("digest", "user2:password2") + connection.add_auth("digest", "user1:password1") - assert connection1.get("/test_snapshot_acl1")[0] == b"data" + assert connection.get("/test_snapshot_acl")[0] == b"data" - with pytest.raises(NoAuthError): - connection1.get("/test_snapshot_acl") + with pytest.raises(NoAuthError): + connection.get("/test_snapshot_acl1") - connection2 = get_fake_zk() - assert connection2.get("/test_snapshot_acl2")[0] == b"data" - with pytest.raises(NoAuthError): - connection2.get("/test_snapshot_acl") + assert connection.get("/test_snapshot_acl2")[0] == b"data" - with pytest.raises(NoAuthError): - connection2.get("/test_snapshot_acl1") + for i in range(100): + assert connection.get(f"/test_snapshot_acl/path{i}")[0] == b"data" + + zk_stop_and_close(connection1) + connection1 = get_fake_zk() + connection1.add_auth("digest", "user2:password2") + + assert connection1.get("/test_snapshot_acl1")[0] == b"data" + + with pytest.raises(NoAuthError): + connection1.get("/test_snapshot_acl") + + zk_stop_and_close(connection2) + connection2 = get_fake_zk() + assert connection2.get("/test_snapshot_acl2")[0] == b"data" + with pytest.raises(NoAuthError): + connection2.get("/test_snapshot_acl") + + with pytest.raises(NoAuthError): + connection2.get("/test_snapshot_acl1") + finally: + zk_stop_and_close(connection) + zk_stop_and_close(connection1) + zk_stop_and_close(connection2) @pytest.mark.parametrize(("get_zk"), [get_genuine_zk, get_fake_zk]) def test_get_set_acl(started_cluster, get_zk): - auth_connection = get_zk() - auth_connection.add_auth("digest", "username1:secret1") - auth_connection.add_auth("digest", "username2:secret2") + auth_connection = None + other_auth_connection = None + try: + auth_connection = get_zk() + auth_connection.add_auth("digest", "username1:secret1") + auth_connection.add_auth("digest", "username2:secret2") - auth_connection.create( - "/test_set_get_acl", b"data", acl=[make_acl("auth", "", all=True)] - ) - - acls, stat = auth_connection.get_acls("/test_set_get_acl") - - assert stat.aversion == 0 - assert len(acls) == 2 - for acl in acls: - assert acl.acl_list == ["ALL"] - assert acl.id.scheme == "digest" - assert acl.perms == 31 - assert acl.id.id in ( - "username1:eGncMdBgOfGS/TCojt51xWsWv/Y=", - "username2:qgSSumukVlhftkVycylbHNvxhFU=", + auth_connection.create( + "/test_set_get_acl", b"data", acl=[make_acl("auth", "", all=True)] ) - other_auth_connection = get_zk() - other_auth_connection.add_auth("digest", "username1:secret1") - other_auth_connection.add_auth("digest", "username3:secret3") - other_auth_connection.set_acls( - "/test_set_get_acl", - acls=[ - make_acl( - "auth", "", read=True, write=False, create=True, delete=True, admin=True + acls, stat = auth_connection.get_acls("/test_set_get_acl") + + assert stat.aversion == 0 + assert len(acls) == 2 + for acl in acls: + assert acl.acl_list == ["ALL"] + assert acl.id.scheme == "digest" + assert acl.perms == 31 + assert acl.id.id in ( + "username1:eGncMdBgOfGS/TCojt51xWsWv/Y=", + "username2:qgSSumukVlhftkVycylbHNvxhFU=", ) - ], - ) - acls, stat = other_auth_connection.get_acls("/test_set_get_acl") - - assert stat.aversion == 1 - assert len(acls) == 2 - for acl in acls: - assert acl.acl_list == ["READ", "CREATE", "DELETE", "ADMIN"] - assert acl.id.scheme == "digest" - assert acl.perms == 29 - assert acl.id.id in ( - "username1:eGncMdBgOfGS/TCojt51xWsWv/Y=", - "username3:CvWITOxxTwk+u6S5PoGlQ4hNoWI=", - ) - - with pytest.raises(KazooException): + other_auth_connection = get_zk() + other_auth_connection.add_auth("digest", "username1:secret1") + other_auth_connection.add_auth("digest", "username3:secret3") other_auth_connection.set_acls( - "/test_set_get_acl", acls=[make_acl("auth", "", all=True)], version=0 + "/test_set_get_acl", + acls=[ + make_acl( + "auth", + "", + read=True, + write=False, + create=True, + delete=True, + admin=True, + ) + ], ) + + acls, stat = other_auth_connection.get_acls("/test_set_get_acl") + + assert stat.aversion == 1 + assert len(acls) == 2 + for acl in acls: + assert acl.acl_list == ["READ", "CREATE", "DELETE", "ADMIN"] + assert acl.id.scheme == "digest" + assert acl.perms == 29 + assert acl.id.id in ( + "username1:eGncMdBgOfGS/TCojt51xWsWv/Y=", + "username3:CvWITOxxTwk+u6S5PoGlQ4hNoWI=", + ) + + with pytest.raises(KazooException): + other_auth_connection.set_acls( + "/test_set_get_acl", acls=[make_acl("auth", "", all=True)], version=0 + ) + finally: + zk_stop_and_close(auth_connection) + zk_stop_and_close(other_auth_connection) diff --git a/tests/integration/test_keeper_multinode_simple/test.py b/tests/integration/test_keeper_multinode_simple/test.py index 1dcbb290fa8..1999f361dd4 100644 --- a/tests/integration/test_keeper_multinode_simple/test.py +++ b/tests/integration/test_keeper_multinode_simple/test.py @@ -63,6 +63,10 @@ def test_read_write_multinode(started_cluster): node2_zk = get_fake_zk("node2") node3_zk = get_fake_zk("node3") + # Cleanup + if node1_zk.exists("/test_read_write_multinode_node1") != None: + node1_zk.delete("/test_read_write_multinode_node1") + node1_zk.create("/test_read_write_multinode_node1", b"somedata1") node2_zk.create("/test_read_write_multinode_node2", b"somedata2") node3_zk.create("/test_read_write_multinode_node3", b"somedata3") @@ -105,6 +109,10 @@ def test_watch_on_follower(started_cluster): node2_zk = get_fake_zk("node2") node3_zk = get_fake_zk("node3") + # Cleanup + if node1_zk.exists("/test_data_watches") != None: + node1_zk.delete("/test_data_watches") + node1_zk.create("/test_data_watches") node2_zk.set("/test_data_watches", b"hello") node3_zk.set("/test_data_watches", b"world") @@ -163,6 +171,10 @@ def test_session_expiration(started_cluster): node3_zk = get_fake_zk("node3", timeout=3.0) print("Node3 session id", node3_zk._session_id) + # Cleanup + if node3_zk.exists("/test_ephemeral_node") != None: + node3_zk.delete("/test_ephemeral_node") + node3_zk.create("/test_ephemeral_node", b"world", ephemeral=True) with PartitionManager() as pm: @@ -201,13 +213,18 @@ def test_follower_restart(started_cluster): try: wait_nodes() node1_zk = get_fake_zk("node1") - - node1_zk.create("/test_restart_node", b"hello") - - node3.restart_clickhouse(kill=True) - node3_zk = get_fake_zk("node3") + # Cleanup + if node1_zk.exists("/test_restart_node") != None: + node1_zk.delete("/test_restart_node") + + node1_zk.create("/test_restart_node", b"hello") + node3.restart_clickhouse(kill=True) + + wait_nodes() + + node3_zk = get_fake_zk("node3") # got data from log assert node3_zk.get("/test_restart_node")[0] == b"hello" @@ -225,11 +242,11 @@ def test_follower_restart(started_cluster): def test_simple_replicated_table(started_cluster): wait_nodes() + for i, node in enumerate([node1, node2, node3]): + node.query("DROP TABLE IF EXISTS t SYNC") node.query( - "CREATE TABLE t (value UInt64) ENGINE = ReplicatedMergeTree('/clickhouse/t', '{}') ORDER BY tuple()".format( - i + 1 - ) + f"CREATE TABLE t (value UInt64) ENGINE = ReplicatedMergeTree('/clickhouse/t', '{i + 1}') ORDER BY tuple()" ) node2.query("INSERT INTO t SELECT number FROM numbers(10)") diff --git a/tests/integration/test_keeper_nodes_add/test.py b/tests/integration/test_keeper_nodes_add/test.py index aad674332ac..c5282de1bc8 100644 --- a/tests/integration/test_keeper_nodes_add/test.py +++ b/tests/integration/test_keeper_nodes_add/test.py @@ -46,72 +46,85 @@ def start(node): def test_nodes_add(started_cluster): - keeper_utils.wait_until_connected(cluster, node1) - zk_conn = get_fake_zk(node1) + zk_conn = None + zk_conn2 = None + zk_conn3 = None - for i in range(100): - zk_conn.create("/test_two_" + str(i), b"somedata") + try: + keeper_utils.wait_until_connected(cluster, node1) + zk_conn = get_fake_zk(node1) - p = Pool(3) - node2.stop_clickhouse() - node2.copy_file_to_container( - os.path.join(CONFIG_DIR, "enable_keeper_two_nodes_2.xml"), - "/etc/clickhouse-server/config.d/enable_keeper2.xml", - ) - waiter = p.apply_async(start, (node2,)) - node1.copy_file_to_container( - os.path.join(CONFIG_DIR, "enable_keeper_two_nodes_1.xml"), - "/etc/clickhouse-server/config.d/enable_keeper1.xml", - ) - node1.query("SYSTEM RELOAD CONFIG") - waiter.wait() - keeper_utils.wait_until_connected(cluster, node2) + for i in range(100): + zk_conn.create("/test_two_" + str(i), b"somedata") - zk_conn2 = get_fake_zk(node2) + p = Pool(3) + node2.stop_clickhouse() + node2.copy_file_to_container( + os.path.join(CONFIG_DIR, "enable_keeper_two_nodes_2.xml"), + "/etc/clickhouse-server/config.d/enable_keeper2.xml", + ) + waiter = p.apply_async(start, (node2,)) + node1.copy_file_to_container( + os.path.join(CONFIG_DIR, "enable_keeper_two_nodes_1.xml"), + "/etc/clickhouse-server/config.d/enable_keeper1.xml", + ) + node1.query("SYSTEM RELOAD CONFIG") + waiter.wait() + keeper_utils.wait_until_connected(cluster, node2) - for i in range(100): - assert zk_conn2.exists("/test_two_" + str(i)) is not None + zk_conn2 = get_fake_zk(node2) - zk_conn = get_fake_zk(node1) + for i in range(100): + assert zk_conn2.exists("/test_two_" + str(i)) is not None - for i in range(100): - zk_conn.create("/test_three_" + str(i), b"somedata") + zk_conn.stop() + zk_conn.close() - node3.stop_clickhouse() + zk_conn = get_fake_zk(node1) - node3.copy_file_to_container( - os.path.join(CONFIG_DIR, "enable_keeper_three_nodes_3.xml"), - "/etc/clickhouse-server/config.d/enable_keeper3.xml", - ) - waiter = p.apply_async(start, (node3,)) - node2.copy_file_to_container( - os.path.join(CONFIG_DIR, "enable_keeper_three_nodes_2.xml"), - "/etc/clickhouse-server/config.d/enable_keeper2.xml", - ) - node1.copy_file_to_container( - os.path.join(CONFIG_DIR, "enable_keeper_three_nodes_1.xml"), - "/etc/clickhouse-server/config.d/enable_keeper1.xml", - ) + for i in range(100): + zk_conn.create("/test_three_" + str(i), b"somedata") - node1.query("SYSTEM RELOAD CONFIG") - node2.query("SYSTEM RELOAD CONFIG") + node3.stop_clickhouse() - waiter.wait() - keeper_utils.wait_until_connected(cluster, node3) - zk_conn3 = get_fake_zk(node3) + node3.copy_file_to_container( + os.path.join(CONFIG_DIR, "enable_keeper_three_nodes_3.xml"), + "/etc/clickhouse-server/config.d/enable_keeper3.xml", + ) + waiter = p.apply_async(start, (node3,)) + node2.copy_file_to_container( + os.path.join(CONFIG_DIR, "enable_keeper_three_nodes_2.xml"), + "/etc/clickhouse-server/config.d/enable_keeper2.xml", + ) + node1.copy_file_to_container( + os.path.join(CONFIG_DIR, "enable_keeper_three_nodes_1.xml"), + "/etc/clickhouse-server/config.d/enable_keeper1.xml", + ) - for i in range(100): - assert zk_conn3.exists("/test_three_" + str(i)) is not None + node1.query("SYSTEM RELOAD CONFIG") + node2.query("SYSTEM RELOAD CONFIG") - # configs which change endpoints of server should not be allowed - node1.replace_in_config( - "/etc/clickhouse-server/config.d/enable_keeper1.xml", - "node3", - "non_existing_node", - ) + waiter.wait() + keeper_utils.wait_until_connected(cluster, node3) + zk_conn3 = get_fake_zk(node3) - node1.query("SYSTEM RELOAD CONFIG") - time.sleep(2) - assert node1.contains_in_log( - "Config will be ignored because a server with ID 3 is already present in the cluster" - ) + for i in range(100): + assert zk_conn3.exists("/test_three_" + str(i)) is not None + + # configs which change endpoints of server should not be allowed + node1.replace_in_config( + "/etc/clickhouse-server/config.d/enable_keeper1.xml", + "node3", + "non_existing_node", + ) + + node1.query("SYSTEM RELOAD CONFIG") + time.sleep(2) + assert node1.contains_in_log( + "Config will be ignored because a server with ID 3 is already present in the cluster" + ) + finally: + for zk in [zk_conn, zk_conn2, zk_conn3]: + if zk: + zk.stop() + zk.close() diff --git a/tests/integration/test_keeper_nodes_move/test.py b/tests/integration/test_keeper_nodes_move/test.py index 1e3bd95c5e7..6884ff29607 100644 --- a/tests/integration/test_keeper_nodes_move/test.py +++ b/tests/integration/test_keeper_nodes_move/test.py @@ -54,51 +54,62 @@ def get_fake_zk(node, timeout=30.0): def test_node_move(started_cluster): - zk_conn = get_fake_zk(node1) + zk_conn = None + zk_conn2 = None + zk_conn3 = None + zk_conn4 = None - for i in range(100): - zk_conn.create("/test_four_" + str(i), b"somedata") + try: + zk_conn = get_fake_zk(node1) - zk_conn2 = get_fake_zk(node2) - zk_conn2.sync("/test_four_0") + for i in range(100): + zk_conn.create("/test_four_" + str(i), b"somedata") - zk_conn3 = get_fake_zk(node3) - zk_conn3.sync("/test_four_0") + zk_conn2 = get_fake_zk(node2) + zk_conn2.sync("/test_four_0") - for i in range(100): - assert zk_conn2.exists("test_four_" + str(i)) is not None - assert zk_conn3.exists("test_four_" + str(i)) is not None + zk_conn3 = get_fake_zk(node3) + zk_conn3.sync("/test_four_0") - node4.stop_clickhouse() - node4.copy_file_to_container( - os.path.join(CONFIG_DIR, "enable_keeper_node4_4.xml"), - "/etc/clickhouse-server/config.d/enable_keeper4.xml", - ) - p = Pool(3) - waiter = p.apply_async(start, (node4,)) - node1.copy_file_to_container( - os.path.join(CONFIG_DIR, "enable_keeper_node4_1.xml"), - "/etc/clickhouse-server/config.d/enable_keeper1.xml", - ) - node2.copy_file_to_container( - os.path.join(CONFIG_DIR, "enable_keeper_node4_2.xml"), - "/etc/clickhouse-server/config.d/enable_keeper2.xml", - ) + for i in range(100): + assert zk_conn2.exists("test_four_" + str(i)) is not None + assert zk_conn3.exists("test_four_" + str(i)) is not None - node1.query("SYSTEM RELOAD CONFIG") - node2.query("SYSTEM RELOAD CONFIG") + node4.stop_clickhouse() + node4.copy_file_to_container( + os.path.join(CONFIG_DIR, "enable_keeper_node4_4.xml"), + "/etc/clickhouse-server/config.d/enable_keeper4.xml", + ) + p = Pool(3) + waiter = p.apply_async(start, (node4,)) + node1.copy_file_to_container( + os.path.join(CONFIG_DIR, "enable_keeper_node4_1.xml"), + "/etc/clickhouse-server/config.d/enable_keeper1.xml", + ) + node2.copy_file_to_container( + os.path.join(CONFIG_DIR, "enable_keeper_node4_2.xml"), + "/etc/clickhouse-server/config.d/enable_keeper2.xml", + ) - waiter.wait() + node1.query("SYSTEM RELOAD CONFIG") + node2.query("SYSTEM RELOAD CONFIG") - zk_conn4 = get_fake_zk(node4) - zk_conn4.sync("/test_four_0") + waiter.wait() - for i in range(100): - assert zk_conn4.exists("/test_four_" + str(i)) is not None + zk_conn4 = get_fake_zk(node4) + zk_conn4.sync("/test_four_0") - with pytest.raises(Exception): - # Adding and removing nodes is async operation - for i in range(10): - zk_conn3 = get_fake_zk(node3) - zk_conn3.sync("/test_four_0") - time.sleep(i) + for i in range(100): + assert zk_conn4.exists("/test_four_" + str(i)) is not None + + with pytest.raises(Exception): + # Adding and removing nodes is async operation + for i in range(10): + zk_conn3 = get_fake_zk(node3) + zk_conn3.sync("/test_four_0") + time.sleep(i) + finally: + for zk in [zk_conn, zk_conn2, zk_conn3, zk_conn4]: + if zk: + zk.stop() + zk.close() diff --git a/tests/integration/test_keeper_nodes_remove/test.py b/tests/integration/test_keeper_nodes_remove/test.py index 59bdaadf2e2..12c37e54927 100644 --- a/tests/integration/test_keeper_nodes_remove/test.py +++ b/tests/integration/test_keeper_nodes_remove/test.py @@ -40,74 +40,94 @@ def get_fake_zk(node, timeout=30.0): def test_nodes_remove(started_cluster): - zk_conn = get_fake_zk(node1) - - for i in range(100): - zk_conn.create("/test_two_" + str(i), b"somedata") - - zk_conn2 = get_fake_zk(node2) - zk_conn2.sync("/test_two_0") - - zk_conn3 = get_fake_zk(node3) - zk_conn3.sync("/test_two_0") - - for i in range(100): - assert zk_conn2.exists("test_two_" + str(i)) is not None - assert zk_conn3.exists("test_two_" + str(i)) is not None - - node2.copy_file_to_container( - os.path.join(CONFIG_DIR, "enable_keeper_two_nodes_2.xml"), - "/etc/clickhouse-server/config.d/enable_keeper2.xml", - ) - node1.copy_file_to_container( - os.path.join(CONFIG_DIR, "enable_keeper_two_nodes_1.xml"), - "/etc/clickhouse-server/config.d/enable_keeper1.xml", - ) - - node1.query("SYSTEM RELOAD CONFIG") - node2.query("SYSTEM RELOAD CONFIG") - - zk_conn2 = get_fake_zk(node2) - - for i in range(100): - assert zk_conn2.exists("test_two_" + str(i)) is not None - zk_conn2.create("/test_two_" + str(100 + i), b"otherdata") - - zk_conn = get_fake_zk(node1) - zk_conn.sync("/test_two_0") - - for i in range(100): - assert zk_conn.exists("test_two_" + str(i)) is not None - assert zk_conn.exists("test_two_" + str(100 + i)) is not None + zk_conn = None + zk_conn2 = None + zk_conn3 = None try: - zk_conn3 = get_fake_zk(node3) - zk_conn3.sync("/test_two_0") - time.sleep(0.1) - except Exception: - pass + zk_conn = get_fake_zk(node1) - node3.stop_clickhouse() + for i in range(100): + zk_conn.create("/test_two_" + str(i), b"somedata") - node1.copy_file_to_container( - os.path.join(CONFIG_DIR, "enable_single_keeper1.xml"), - "/etc/clickhouse-server/config.d/enable_keeper1.xml", - ) - - node1.query("SYSTEM RELOAD CONFIG") - - zk_conn = get_fake_zk(node1) - zk_conn.sync("/test_two_0") - - for i in range(100): - assert zk_conn.exists("test_two_" + str(i)) is not None - assert zk_conn.exists("test_two_" + str(100 + i)) is not None - - try: zk_conn2 = get_fake_zk(node2) zk_conn2.sync("/test_two_0") - time.sleep(0.1) - except Exception: - pass - node2.stop_clickhouse() + zk_conn3 = get_fake_zk(node3) + zk_conn3.sync("/test_two_0") + + for i in range(100): + assert zk_conn2.exists("test_two_" + str(i)) is not None + assert zk_conn3.exists("test_two_" + str(i)) is not None + + node2.copy_file_to_container( + os.path.join(CONFIG_DIR, "enable_keeper_two_nodes_2.xml"), + "/etc/clickhouse-server/config.d/enable_keeper2.xml", + ) + node1.copy_file_to_container( + os.path.join(CONFIG_DIR, "enable_keeper_two_nodes_1.xml"), + "/etc/clickhouse-server/config.d/enable_keeper1.xml", + ) + + node1.query("SYSTEM RELOAD CONFIG") + node2.query("SYSTEM RELOAD CONFIG") + + zk_conn2.stop() + zk_conn2.close() + zk_conn2 = get_fake_zk(node2) + + for i in range(100): + assert zk_conn2.exists("test_two_" + str(i)) is not None + zk_conn2.create("/test_two_" + str(100 + i), b"otherdata") + + zk_conn.stop() + zk_conn.close() + zk_conn = get_fake_zk(node1) + zk_conn.sync("/test_two_0") + + for i in range(100): + assert zk_conn.exists("test_two_" + str(i)) is not None + assert zk_conn.exists("test_two_" + str(100 + i)) is not None + + try: + zk_conn3.stop() + zk_conn3.close() + zk_conn3 = get_fake_zk(node3) + zk_conn3.sync("/test_two_0") + time.sleep(0.1) + except Exception: + pass + + node3.stop_clickhouse() + + node1.copy_file_to_container( + os.path.join(CONFIG_DIR, "enable_single_keeper1.xml"), + "/etc/clickhouse-server/config.d/enable_keeper1.xml", + ) + + node1.query("SYSTEM RELOAD CONFIG") + + zk_conn.stop() + zk_conn.close() + zk_conn = get_fake_zk(node1) + zk_conn.sync("/test_two_0") + + for i in range(100): + assert zk_conn.exists("test_two_" + str(i)) is not None + assert zk_conn.exists("test_two_" + str(100 + i)) is not None + + try: + zk_conn2.stop() + zk_conn2.close() + zk_conn2 = get_fake_zk(node2) + zk_conn2.sync("/test_two_0") + time.sleep(0.1) + except Exception: + pass + + node2.stop_clickhouse() + finally: + for zk in [zk_conn, zk_conn2, zk_conn3]: + if zk: + zk.stop() + zk.close() diff --git a/tests/integration/test_keeper_snapshot_on_exit/test.py b/tests/integration/test_keeper_snapshot_on_exit/test.py index 1ca5888ab4d..327affc8372 100644 --- a/tests/integration/test_keeper_snapshot_on_exit/test.py +++ b/tests/integration/test_keeper_snapshot_on_exit/test.py @@ -35,18 +35,23 @@ def started_cluster(): def test_snapshot_on_exit(started_cluster): - zk_conn = get_fake_zk(node1) + zk_conn = None + try: + zk_conn = get_fake_zk(node1) + zk_conn.create("/some_path", b"some_data") - zk_conn.create("/some_path", b"some_data") + node1.stop_clickhouse() + assert node1.contains_in_log("Created persistent snapshot") - node1.stop_clickhouse() - assert node1.contains_in_log("Created persistent snapshot") + node1.start_clickhouse() + assert node1.contains_in_log("Loaded snapshot") - node1.start_clickhouse() - assert node1.contains_in_log("Loaded snapshot") + node2.stop_clickhouse() + assert not node2.contains_in_log("Created persistent snapshot") - node2.stop_clickhouse() - assert not node2.contains_in_log("Created persistent snapshot") - - node2.start_clickhouse() - assert node2.contains_in_log("No existing snapshots") + node2.start_clickhouse() + assert node2.contains_in_log("No existing snapshots") + finally: + if zk_conn: + zk_conn.stop() + zk_conn.close() diff --git a/tests/integration/test_keeper_snapshot_small_distance/test.py b/tests/integration/test_keeper_snapshot_small_distance/test.py index 6a64cf0ac92..6234a24817c 100644 --- a/tests/integration/test_keeper_snapshot_small_distance/test.py +++ b/tests/integration/test_keeper_snapshot_small_distance/test.py @@ -4,7 +4,8 @@ import pytest from helpers.cluster import ClickHouseCluster import helpers.keeper_utils as keeper_utils from multiprocessing.dummy import Pool -from kazoo.client import KazooClient, KazooState +from kazoo.client import KazooClient, KazooRetry +from kazoo.handlers.threading import KazooTimeoutError import random import string import os @@ -28,6 +29,11 @@ def start_zookeeper(node): def stop_zookeeper(node): node.exec_in_container(["bash", "-c", "/opt/zookeeper/bin/zkServer.sh stop"]) + timeout = time.time() + 60 + while node.get_process_pid("zookeeper") != None: + if time.time() > timeout: + raise Exception("Failed to stop ZooKeeper in 60 secs") + time.sleep(0.2) def clear_zookeeper(node): @@ -40,6 +46,11 @@ def restart_and_clear_zookeeper(node): start_zookeeper(node) +def restart_zookeeper(node): + stop_zookeeper(node) + start_zookeeper(node) + + def clear_clickhouse_data(node): node.exec_in_container( [ @@ -51,6 +62,13 @@ def clear_clickhouse_data(node): def convert_zookeeper_data(node): + node.exec_in_container( + [ + "bash", + "-c", + "tar -cvzf /var/lib/clickhouse/zk-data.tar.gz /zookeeper/version-2", + ] + ) cmd = "/usr/bin/clickhouse keeper-converter --zookeeper-logs-dir /zookeeper/version-2/ --zookeeper-snapshots-dir /zookeeper/version-2/ --output-dir /var/lib/clickhouse/coordination/snapshots" node.exec_in_container(["bash", "-c", cmd]) return os.path.join( @@ -104,53 +122,79 @@ def get_fake_zk(node, timeout=30.0): def get_genuine_zk(node, timeout=30.0): - _genuine_zk_instance = KazooClient( - hosts=cluster.get_instance_ip(node.name) + ":2181", timeout=timeout - ) - _genuine_zk_instance.start() - return _genuine_zk_instance + CONNECTION_RETRIES = 100 + for i in range(CONNECTION_RETRIES): + try: + _genuine_zk_instance = KazooClient( + hosts=cluster.get_instance_ip(node.name) + ":2181", + timeout=timeout, + connection_retry=KazooRetry(max_tries=20), + ) + _genuine_zk_instance.start() + return _genuine_zk_instance + except KazooTimeoutError: + if i == CONNECTION_RETRIES - 1: + raise + + print( + "Failed to connect to ZK cluster because of timeout. Restarting cluster and trying again." + ) + time.sleep(0.2) + restart_zookeeper(node) def test_snapshot_and_load(started_cluster): - restart_and_clear_zookeeper(node1) - genuine_connection = get_genuine_zk(node1) - for node in [node1, node2, node3]: - print("Stop and clear", node.name, "with dockerid", node.docker_id) - stop_clickhouse(node) - clear_clickhouse_data(node) + genuine_connection = None + fake_zks = [] - for i in range(1000): - genuine_connection.create("/test" + str(i), b"data") + try: + restart_and_clear_zookeeper(node1) + genuine_connection = get_genuine_zk(node1) + for node in [node1, node2, node3]: + print("Stop and clear", node.name, "with dockerid", node.docker_id) + stop_clickhouse(node) + clear_clickhouse_data(node) - print("Data loaded to zookeeper") + for i in range(1000): + genuine_connection.create("/test" + str(i), b"data") - stop_zookeeper(node1) - start_zookeeper(node1) - stop_zookeeper(node1) + print("Data loaded to zookeeper") - print("Data copied to node1") - resulted_path = convert_zookeeper_data(node1) - print("Resulted path", resulted_path) - for node in [node2, node3]: - print("Copy snapshot from", node1.name, "to", node.name) - cluster.copy_file_from_container_to_container( - node1, resulted_path, node, "/var/lib/clickhouse/coordination/snapshots" - ) + stop_zookeeper(node1) + start_zookeeper(node1) + stop_zookeeper(node1) - print("Starting clickhouses") + print("Data copied to node1") + resulted_path = convert_zookeeper_data(node1) + print("Resulted path", resulted_path) + for node in [node2, node3]: + print("Copy snapshot from", node1.name, "to", node.name) + cluster.copy_file_from_container_to_container( + node1, resulted_path, node, "/var/lib/clickhouse/coordination/snapshots" + ) - p = Pool(3) - result = p.map_async(start_clickhouse, [node1, node2, node3]) - result.wait() + print("Starting clickhouses") - print("Loading additional data") - fake_zks = [get_fake_zk(node) for node in [node1, node2, node3]] - for i in range(1000): - fake_zk = random.choice(fake_zks) - try: - fake_zk.create("/test" + str(i + 1000), b"data") - except Exception as ex: - print("Got exception:" + str(ex)) + p = Pool(3) + result = p.map_async(start_clickhouse, [node1, node2, node3]) + result.wait() - print("Final") - fake_zks[0].create("/test10000", b"data") + print("Loading additional data") + fake_zks = [get_fake_zk(node) for node in [node1, node2, node3]] + for i in range(1000): + fake_zk = random.choice(fake_zks) + try: + fake_zk.create("/test" + str(i + 1000), b"data") + except Exception as ex: + print("Got exception:" + str(ex)) + + print("Final") + fake_zks[0].create("/test10000", b"data") + finally: + for zk in fake_zks: + if zk: + zk.stop() + zk.close() + if genuine_connection: + genuine_connection.stop() + genuine_connection.close() diff --git a/tests/integration/test_keeper_three_nodes_start/test.py b/tests/integration/test_keeper_three_nodes_start/test.py index e451f969b37..bc93a6089cb 100644 --- a/tests/integration/test_keeper_three_nodes_start/test.py +++ b/tests/integration/test_keeper_three_nodes_start/test.py @@ -29,6 +29,8 @@ def get_fake_zk(nodename, timeout=30.0): def test_smoke(): + node1_zk = None + try: cluster.start() @@ -37,3 +39,7 @@ def test_smoke(): finally: cluster.shutdown() + + if node1_zk: + node1_zk.stop() + node1_zk.close() diff --git a/tests/integration/test_keeper_three_nodes_two_alive/test.py b/tests/integration/test_keeper_three_nodes_two_alive/test.py index bd29ded357f..1b57bf602df 100644 --- a/tests/integration/test_keeper_three_nodes_two_alive/test.py +++ b/tests/integration/test_keeper_three_nodes_two_alive/test.py @@ -54,11 +54,16 @@ def start(node): def delete_with_retry(node_name, path): for _ in range(30): + zk = None try: - get_fake_zk(node_name).delete(path) + zk = get_fake_zk(node_name) + zk.delete(path) return except: time.sleep(0.5) + finally: + zk.stop() + zk.close() raise Exception(f"Cannot delete {path} from node {node_name}") @@ -89,9 +94,14 @@ def test_start_offline(started_cluster): p.map(start, [node1, node2, node3]) delete_with_retry("node1", "/test_alive") + node1_zk.stop() + node1_zk.close() + def test_start_non_existing(started_cluster): p = Pool(3) + node2_zk = None + try: node1.stop_clickhouse() node2.stop_clickhouse() @@ -134,15 +144,23 @@ def test_start_non_existing(started_cluster): p.map(start, [node1, node2, node3]) delete_with_retry("node2", "/test_non_exising") + if node2_zk: + node2_zk.stop() + node2_zk.close() + def test_restart_third_node(started_cluster): - node1_zk = get_fake_zk("node1") - node1_zk.create("/test_restart", b"aaaa") + try: + node1_zk = get_fake_zk("node1") + node1_zk.create("/test_restart", b"aaaa") - node3.restart_clickhouse() - keeper_utils.wait_until_connected(cluster, node3) + node3.restart_clickhouse() + keeper_utils.wait_until_connected(cluster, node3) - assert node3.contains_in_log( - "Connected to ZooKeeper (or Keeper) before internal Keeper start" - ) - node1_zk.delete("/test_restart") + assert node3.contains_in_log( + "Connected to ZooKeeper (or Keeper) before internal Keeper start" + ) + node1_zk.delete("/test_restart") + finally: + node1_zk.stop() + node1_zk.close() diff --git a/tests/integration/test_keeper_zookeeper_converter/test.py b/tests/integration/test_keeper_zookeeper_converter/test.py index 3c48e9cec22..2691eab02f7 100644 --- a/tests/integration/test_keeper_zookeeper_converter/test.py +++ b/tests/integration/test_keeper_zookeeper_converter/test.py @@ -24,6 +24,11 @@ def start_zookeeper(): def stop_zookeeper(): node.exec_in_container(["bash", "-c", "/opt/zookeeper/bin/zkServer.sh stop"]) + timeout = time.time() + 60 + while node.get_process_pid("zookeeper") != None: + if time.time() > timeout: + raise Exception("Failed to stop ZooKeeper in 60 secs") + time.sleep(0.2) def clear_zookeeper(): diff --git a/tests/integration/test_library_bridge/test.py b/tests/integration/test_library_bridge/test.py index 6e2c2ec0597..a4dca545d44 100644 --- a/tests/integration/test_library_bridge/test.py +++ b/tests/integration/test_library_bridge/test.py @@ -16,9 +16,9 @@ instance = cluster.add_instance( ) -def create_dict_simple(): - instance.query("DROP DICTIONARY IF EXISTS lib_dict_c") - instance.query( +def create_dict_simple(ch_instance): + ch_instance.query("DROP DICTIONARY IF EXISTS lib_dict_c") + ch_instance.query( """ CREATE DICTIONARY lib_dict_c (key UInt64, value1 UInt64, value2 UInt64, value3 UInt64) PRIMARY KEY key SOURCE(library(PATH '/etc/clickhouse-server/config.d/dictionaries_lib/dict_lib.so')) @@ -242,7 +242,7 @@ def test_recover_after_bridge_crash(ch_cluster): if instance.is_built_with_memory_sanitizer(): pytest.skip("Memory Sanitizer cannot work with third-party shared libraries") - create_dict_simple() + create_dict_simple(instance) result = instance.query("""select dictGet(lib_dict_c, 'value1', toUInt64(0));""") assert result.strip() == "100" @@ -269,7 +269,7 @@ def test_server_restart_bridge_might_be_stil_alive(ch_cluster): if instance.is_built_with_memory_sanitizer(): pytest.skip("Memory Sanitizer cannot work with third-party shared libraries") - create_dict_simple() + create_dict_simple(instance) result = instance.query("""select dictGet(lib_dict_c, 'value1', toUInt64(1));""") assert result.strip() == "101" @@ -290,53 +290,6 @@ def test_server_restart_bridge_might_be_stil_alive(ch_cluster): instance.query("DROP DICTIONARY lib_dict_c") -def test_bridge_dies_with_parent(ch_cluster): - if instance.is_built_with_memory_sanitizer(): - pytest.skip("Memory Sanitizer cannot work with third-party shared libraries") - if instance.is_built_with_address_sanitizer(): - pytest.skip( - "Leak sanitizer falsely reports about a leak of 16 bytes in clickhouse-odbc-bridge" - ) - - create_dict_simple() - result = instance.query("""select dictGet(lib_dict_c, 'value1', toUInt64(1));""") - assert result.strip() == "101" - - clickhouse_pid = instance.get_process_pid("clickhouse server") - bridge_pid = instance.get_process_pid("library-bridge") - assert clickhouse_pid is not None - assert bridge_pid is not None - - while clickhouse_pid is not None: - try: - instance.exec_in_container( - ["kill", str(clickhouse_pid)], privileged=True, user="root" - ) - except: - pass - clickhouse_pid = instance.get_process_pid("clickhouse server") - time.sleep(1) - - for i in range(30): - time.sleep(1) - bridge_pid = instance.get_process_pid("library-bridge") - if bridge_pid is None: - break - - if bridge_pid: - out = instance.exec_in_container( - ["gdb", "-p", str(bridge_pid), "--ex", "thread apply all bt", "--ex", "q"], - privileged=True, - user="root", - ) - logging.debug(f"Bridge is running, gdb output:\n{out}") - - assert clickhouse_pid is None - assert bridge_pid is None - instance.start_clickhouse(20) - instance.query("DROP DICTIONARY lib_dict_c") - - def test_path_validation(ch_cluster): if instance.is_built_with_memory_sanitizer(): pytest.skip("Memory Sanitizer cannot work with third-party shared libraries") diff --git a/tests/integration/test_library_bridge/test_exiled.py b/tests/integration/test_library_bridge/test_exiled.py new file mode 100644 index 00000000000..56be1bec20a --- /dev/null +++ b/tests/integration/test_library_bridge/test_exiled.py @@ -0,0 +1,98 @@ +import os +import os.path as p +import pytest +import time +import logging + +from helpers.cluster import ClickHouseCluster, run_and_check +from test_library_bridge.test import create_dict_simple + +cluster = ClickHouseCluster(__file__) + +instance = cluster.add_instance( + "instance", + dictionaries=["configs/dictionaries/dict1.xml"], + main_configs=["configs/config.d/config.xml"], + stay_alive=True, +) + + +@pytest.fixture(scope="module") +def ch_cluster(): + try: + cluster.start() + instance.query("CREATE DATABASE test") + + instance.copy_file_to_container( + os.path.join( + os.path.dirname(os.path.realpath(__file__)), "configs/dict_lib.cpp" + ), + "/etc/clickhouse-server/config.d/dictionaries_lib/dict_lib.cpp", + ) + + instance.query("SYSTEM RELOAD CONFIG") + + instance.exec_in_container( + [ + "bash", + "-c", + "/usr/bin/g++ -shared -o /etc/clickhouse-server/config.d/dictionaries_lib/dict_lib.so -fPIC /etc/clickhouse-server/config.d/dictionaries_lib/dict_lib.cpp", + ], + user="root", + ) + yield cluster + + finally: + cluster.shutdown() + + +def test_bridge_dies_with_parent(ch_cluster): + if instance.is_built_with_memory_sanitizer(): + pytest.skip("Memory Sanitizer cannot work with third-party shared libraries") + if instance.is_built_with_address_sanitizer(): + pytest.skip( + "Leak sanitizer falsely reports about a leak of 16 bytes in clickhouse-odbc-bridge" + ) + + create_dict_simple(instance) + result = instance.query("""select dictGet(lib_dict_c, 'value1', toUInt64(1));""") + assert result.strip() == "101" + + clickhouse_pid = instance.get_process_pid("clickhouse server") + bridge_pid = instance.get_process_pid("library-bridge") + assert clickhouse_pid is not None + assert bridge_pid is not None + + try: + instance.exec_in_container( + ["kill", str(clickhouse_pid)], privileged=True, user="root" + ) + except: + pass + + for i in range(30): + time.sleep(1) + clickhouse_pid = instance.get_process_pid("clickhouse server") + if clickhouse_pid is None: + break + + for i in range(30): + time.sleep(1) + bridge_pid = instance.get_process_pid("library-bridge") + if bridge_pid is None: + break + + if bridge_pid: + out = instance.exec_in_container( + ["gdb", "-p", str(bridge_pid), "--ex", "thread apply all bt", "--ex", "q"], + privileged=True, + user="root", + ) + logging.debug(f"Bridge is running, gdb output:\n{out}") + + try: + assert clickhouse_pid is None + assert bridge_pid is None + finally: + instance.start_clickhouse(20) + instance.query("DROP DICTIONARY lib_dict_c") diff --git a/tests/integration/test_merge_tree_azure_blob_storage/test.py b/tests/integration/test_merge_tree_azure_blob_storage/test.py index e32df110a18..e41529eb385 100644 --- a/tests/integration/test_merge_tree_azure_blob_storage/test.py +++ b/tests/integration/test_merge_tree_azure_blob_storage/test.py @@ -514,61 +514,6 @@ def test_apply_new_settings(cluster): ) -# NOTE: this test takes a couple of minutes when run together with other tests -@pytest.mark.long_run -def test_restart_during_load(cluster): - node = cluster.instances[NODE_NAME] - create_table(node, TABLE_NAME) - config_path = os.path.join( - SCRIPT_DIR, - "./{}/node/configs/config.d/storage_conf.xml".format( - cluster.instances_dir_name - ), - ) - - # Force multi-part upload mode. - replace_config( - config_path, "false", "" - ) - - azure_query( - node, f"INSERT INTO {TABLE_NAME} VALUES {generate_values('2020-01-04', 4096)}" - ) - azure_query( - node, - f"INSERT INTO {TABLE_NAME} VALUES {generate_values('2020-01-05', 4096, -1)}", - ) - - def read(): - for ii in range(0, 5): - logging.info(f"Executing {ii} query") - assert ( - azure_query(node, f"SELECT sum(id) FROM {TABLE_NAME} FORMAT Values") - == "(0)" - ) - logging.info(f"Query {ii} executed") - time.sleep(0.2) - - def restart_disk(): - for iii in range(0, 2): - logging.info(f"Restarting disk, attempt {iii}") - node.query(f"SYSTEM RESTART DISK {AZURE_BLOB_STORAGE_DISK}") - logging.info(f"Disk restarted, attempt {iii}") - time.sleep(0.5) - - threads = [] - for _ in range(0, 4): - threads.append(SafeThread(target=read)) - - threads.append(SafeThread(target=restart_disk)) - - for thread in threads: - thread.start() - - for thread in threads: - thread.join() - - def test_big_insert(cluster): node = cluster.instances[NODE_NAME] create_table(node, TABLE_NAME) diff --git a/tests/integration/test_merge_tree_hdfs/test.py b/tests/integration/test_merge_tree_hdfs/test.py index 8a6703be2dc..3950077e619 100644 --- a/tests/integration/test_merge_tree_hdfs/test.py +++ b/tests/integration/test_merge_tree_hdfs/test.py @@ -213,9 +213,9 @@ def test_attach_detach_partition(cluster): assert node.query("SELECT count(*) FROM hdfs_test FORMAT Values") == "(4096)" wait_for_delete_empty_parts(node, "hdfs_test") wait_for_delete_inactive_parts(node, "hdfs_test") - - hdfs_objects = fs.listdir("/clickhouse") - assert len(hdfs_objects) == FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE * 2 + wait_for_delete_hdfs_objects( + cluster, FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE * 2 + ) node.query("ALTER TABLE hdfs_test ATTACH PARTITION '2020-01-03'") assert node.query("SELECT count(*) FROM hdfs_test FORMAT Values") == "(8192)" @@ -227,9 +227,7 @@ def test_attach_detach_partition(cluster): assert node.query("SELECT count(*) FROM hdfs_test FORMAT Values") == "(4096)" wait_for_delete_empty_parts(node, "hdfs_test") wait_for_delete_inactive_parts(node, "hdfs_test") - - hdfs_objects = fs.listdir("/clickhouse") - assert len(hdfs_objects) == FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE + wait_for_delete_hdfs_objects(cluster, FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE) node.query("ALTER TABLE hdfs_test DETACH PARTITION '2020-01-04'") node.query( @@ -239,9 +237,7 @@ def test_attach_detach_partition(cluster): assert node.query("SELECT count(*) FROM hdfs_test FORMAT Values") == "(0)" wait_for_delete_empty_parts(node, "hdfs_test") wait_for_delete_inactive_parts(node, "hdfs_test") - - hdfs_objects = fs.listdir("/clickhouse") - assert len(hdfs_objects) == FILES_OVERHEAD + wait_for_delete_hdfs_objects(cluster, FILES_OVERHEAD) def test_move_partition_to_another_disk(cluster): @@ -307,9 +303,7 @@ def test_table_manipulations(cluster): assert node.query("SELECT count(*) FROM hdfs_test FORMAT Values") == "(0)" wait_for_delete_empty_parts(node, "hdfs_test") wait_for_delete_inactive_parts(node, "hdfs_test") - - hdfs_objects = fs.listdir("/clickhouse") - assert len(hdfs_objects) == FILES_OVERHEAD + wait_for_delete_hdfs_objects(cluster, FILES_OVERHEAD) def test_move_replace_partition_to_another_table(cluster): @@ -376,7 +370,6 @@ def test_move_replace_partition_to_another_table(cluster): assert node.query("SELECT count(*) FROM hdfs_clone FORMAT Values") == "(8192)" # Wait for outdated partitions deletion. - print(1) wait_for_delete_hdfs_objects( cluster, FILES_OVERHEAD * 2 + FILES_OVERHEAD_PER_PART_WIDE * 4 ) diff --git a/tests/integration/test_merge_tree_s3/test.py b/tests/integration/test_merge_tree_s3/test.py index 417fa436471..ec59d6f9cb2 100644 --- a/tests/integration/test_merge_tree_s3/test.py +++ b/tests/integration/test_merge_tree_s3/test.py @@ -650,49 +650,6 @@ def test_s3_disk_apply_new_settings(cluster, node_name): assert get_s3_requests() - s3_requests_before == s3_requests_to_write_partition * 3 -@pytest.mark.parametrize("node_name", ["node"]) -def test_s3_disk_restart_during_load(cluster, node_name): - node = cluster.instances[node_name] - create_table(node, "s3_test") - - node.query( - "INSERT INTO s3_test VALUES {}".format( - generate_values("2020-01-04", 1024 * 1024) - ) - ) - node.query( - "INSERT INTO s3_test VALUES {}".format( - generate_values("2020-01-05", 1024 * 1024, -1) - ) - ) - - def read(): - for ii in range(0, 20): - logging.info("Executing %d query", ii) - assert node.query("SELECT sum(id) FROM s3_test FORMAT Values") == "(0)" - logging.info("Query %d executed", ii) - time.sleep(0.2) - - def restart_disk(): - for iii in range(0, 5): - logging.info("Restarting disk, attempt %d", iii) - node.query("SYSTEM RESTART DISK s3") - logging.info("Disk restarted, attempt %d", iii) - time.sleep(0.5) - - threads = [] - for i in range(0, 4): - threads.append(SafeThread(target=read)) - - threads.append(SafeThread(target=restart_disk)) - - for thread in threads: - thread.start() - - for thread in threads: - thread.join() - - @pytest.mark.parametrize("node_name", ["node"]) def test_s3_no_delete_objects(cluster, node_name): node = cluster.instances[node_name] diff --git a/tests/integration/test_merge_tree_s3_restore/test.py b/tests/integration/test_merge_tree_s3_restore/test.py index d29bb1e34ac..ffb0d9477cf 100644 --- a/tests/integration/test_merge_tree_s3_restore/test.py +++ b/tests/integration/test_merge_tree_s3_restore/test.py @@ -242,8 +242,7 @@ def test_full_restore(cluster, replicated, db_atomic): node.query("DETACH TABLE s3.test") drop_s3_metadata(node) create_restore_file(node) - node.query("SYSTEM RESTART DISK s3") - node.query("ATTACH TABLE s3.test") + node.restart_clickhouse() assert node.query("SELECT count(*) FROM s3.test FORMAT Values") == "({})".format( 4096 * 4 @@ -283,7 +282,7 @@ def test_restore_another_bucket_path(cluster, db_atomic): node_another_bucket = cluster.instances["node_another_bucket"] create_restore_file(node_another_bucket, bucket="root") - node_another_bucket.query("SYSTEM RESTART DISK s3") + node_another_bucket.restart_clickhouse() create_table( node_another_bucket, "test", attach=True, db_atomic=db_atomic, uuid=uuid ) @@ -298,7 +297,7 @@ def test_restore_another_bucket_path(cluster, db_atomic): node_another_bucket_path = cluster.instances["node_another_bucket_path"] create_restore_file(node_another_bucket_path, bucket="root2", path="data") - node_another_bucket_path.query("SYSTEM RESTART DISK s3") + node_another_bucket_path.restart_clickhouse() create_table( node_another_bucket_path, "test", attach=True, db_atomic=db_atomic, uuid=uuid ) @@ -357,7 +356,7 @@ def test_restore_different_revisions(cluster, db_atomic): # Restore to revision 1 (2 parts). create_restore_file(node_another_bucket, revision=revision1, bucket="root") - node_another_bucket.query("SYSTEM RESTART DISK s3") + node_another_bucket.restart_clickhouse() create_table( node_another_bucket, "test", attach=True, db_atomic=db_atomic, uuid=uuid ) @@ -378,8 +377,7 @@ def test_restore_different_revisions(cluster, db_atomic): # Restore to revision 2 (4 parts). node_another_bucket.query("DETACH TABLE s3.test") create_restore_file(node_another_bucket, revision=revision2, bucket="root") - node_another_bucket.query("SYSTEM RESTART DISK s3") - node_another_bucket.query("ATTACH TABLE s3.test") + node_another_bucket.restart_clickhouse() assert node_another_bucket.query( "SELECT count(*) FROM s3.test FORMAT Values" @@ -397,8 +395,7 @@ def test_restore_different_revisions(cluster, db_atomic): # Restore to revision 3 (4 parts + 1 merged). node_another_bucket.query("DETACH TABLE s3.test") create_restore_file(node_another_bucket, revision=revision3, bucket="root") - node_another_bucket.query("SYSTEM RESTART DISK s3") - node_another_bucket.query("ATTACH TABLE s3.test") + node_another_bucket.restart_clickhouse() assert node_another_bucket.query( "SELECT count(*) FROM s3.test FORMAT Values" @@ -444,7 +441,7 @@ def test_restore_mutations(cluster, db_atomic): create_restore_file( node_another_bucket, revision=revision_before_mutation, bucket="root" ) - node_another_bucket.query("SYSTEM RESTART DISK s3") + node_another_bucket.restart_clickhouse() create_table( node_another_bucket, "test", attach=True, db_atomic=db_atomic, uuid=uuid ) @@ -464,8 +461,7 @@ def test_restore_mutations(cluster, db_atomic): create_restore_file( node_another_bucket, revision=revision_after_mutation, bucket="root" ) - node_another_bucket.query("SYSTEM RESTART DISK s3") - node_another_bucket.query("ATTACH TABLE s3.test") + node_another_bucket.restart_clickhouse() assert node_another_bucket.query( "SELECT count(*) FROM s3.test FORMAT Values" @@ -485,8 +481,7 @@ def test_restore_mutations(cluster, db_atomic): node_another_bucket.query("DETACH TABLE s3.test") revision = (revision_before_mutation + revision_after_mutation) // 2 create_restore_file(node_another_bucket, revision=revision, bucket="root") - node_another_bucket.query("SYSTEM RESTART DISK s3") - node_another_bucket.query("ATTACH TABLE s3.test") + node_another_bucket.restart_clickhouse() # Wait for unfinished mutation completion. time.sleep(3) @@ -556,7 +551,7 @@ def test_migrate_to_restorable_schema(cluster): create_restore_file( node_another_bucket, revision=revision, bucket="root", path="another_data" ) - node_another_bucket.query("SYSTEM RESTART DISK s3") + node_another_bucket.restart_clickhouse() create_table( node_another_bucket, "test", attach=True, db_atomic=db_atomic, uuid=uuid ) @@ -615,7 +610,7 @@ def test_restore_to_detached(cluster, replicated, db_atomic): path="data", detached=True, ) - node_another_bucket.query("SYSTEM RESTART DISK s3") + node_another_bucket.restart_clickhouse() create_table( node_another_bucket, "test", @@ -681,7 +676,7 @@ def test_restore_without_detached(cluster, replicated, db_atomic): path="data", detached=True, ) - node_another_bucket.query("SYSTEM RESTART DISK s3") + node_another_bucket.restart_clickhouse() create_table( node_another_bucket, "test", diff --git a/tests/integration/test_multiple_disks/test.py b/tests/integration/test_multiple_disks/test.py index 9b7bad2b256..160c77b5688 100644 --- a/tests/integration/test_multiple_disks/test.py +++ b/tests/integration/test_multiple_disks/test.py @@ -590,6 +590,13 @@ def test_jbod_overflow(start_cluster, name, engine): ) node1.query(f"SYSTEM STOP MERGES {name}") + # The test tries to utilize 35/40=87.5% of space, while during last + # INSERT parts mover may see up to ~100% of used space on disk due to + # reservations (since INSERT first reserves the space and later write + # the same, more or less, amount of space, and util the reservation had + # been destroyed it will be taken into account as reserved on the + # disk). + node1.query(f"SYSTEM STOP MOVES {name}") # small jbod size is 40MB, so lets insert 5MB batch 7 times for i in range(7): @@ -621,6 +628,7 @@ def test_jbod_overflow(start_cluster, name, engine): assert used_disks[-1] == "external" node1.query(f"SYSTEM START MERGES {name}") + node1.query(f"SYSTEM START MOVES {name}") time.sleep(1) node1.query_with_retry("OPTIMIZE TABLE {} FINAL".format(name)) diff --git a/tests/integration/test_mysql_database_engine/test.py b/tests/integration/test_mysql_database_engine/test.py index 8626980a768..65fd54c7f34 100644 --- a/tests/integration/test_mysql_database_engine/test.py +++ b/tests/integration/test_mysql_database_engine/test.py @@ -75,6 +75,7 @@ def test_mysql_ddl_for_mysql_database(started_cluster): mysql_node.query("DROP DATABASE IF EXISTS test_database") mysql_node.query("CREATE DATABASE test_database DEFAULT CHARACTER SET 'utf8'") + clickhouse_node.query("DROP DATABASE IF EXISTS test_database") clickhouse_node.query( "CREATE DATABASE test_database ENGINE = MySQL('mysql57:3306', 'test_database', 'root', 'clickhouse')" ) @@ -122,11 +123,13 @@ def test_clickhouse_ddl_for_mysql_database(started_cluster): "root", "clickhouse", started_cluster.mysql_ip, started_cluster.mysql_port ) ) as mysql_node: + mysql_node.query("DROP DATABASE IF EXISTS test_database") mysql_node.query("CREATE DATABASE test_database DEFAULT CHARACTER SET 'utf8'") mysql_node.query( "CREATE TABLE `test_database`.`test_table` ( `id` int(11) NOT NULL, PRIMARY KEY (`id`) ) ENGINE=InnoDB;" ) + clickhouse_node.query("DROP DATABASE IF EXISTS test_database") clickhouse_node.query( "CREATE DATABASE test_database ENGINE = MySQL('mysql57:3306', 'test_database', 'root', 'clickhouse')" ) @@ -157,10 +160,13 @@ def test_clickhouse_dml_for_mysql_database(started_cluster): "root", "clickhouse", started_cluster.mysql_ip, started_cluster.mysql_port ) ) as mysql_node: + mysql_node.query("DROP DATABASE IF EXISTS test_database") mysql_node.query("CREATE DATABASE test_database DEFAULT CHARACTER SET 'utf8'") mysql_node.query( "CREATE TABLE `test_database`.`test_table` ( `i``d` int(11) NOT NULL, PRIMARY KEY (`i``d`)) ENGINE=InnoDB;" ) + + clickhouse_node.query("DROP DATABASE IF EXISTS test_database") clickhouse_node.query( "CREATE DATABASE test_database ENGINE = MySQL('mysql57:3306', test_database, 'root', 'clickhouse')" ) @@ -193,9 +199,8 @@ def test_clickhouse_join_for_mysql_database(started_cluster): "root", "clickhouse", started_cluster.mysql_ip, started_cluster.mysql_port ) ) as mysql_node: - mysql_node.query( - "CREATE DATABASE IF NOT EXISTS test DEFAULT CHARACTER SET 'utf8'" - ) + mysql_node.query("DROP DATABASE IF EXISTS test") + mysql_node.query("CREATE DATABASE test DEFAULT CHARACTER SET 'utf8'") mysql_node.query( "CREATE TABLE test.t1_mysql_local (" "pays VARCHAR(55) DEFAULT 'FRA' NOT NULL," @@ -209,6 +214,8 @@ def test_clickhouse_join_for_mysql_database(started_cluster): "opco VARCHAR(5) DEFAULT ''" ")" ) + clickhouse_node.query("DROP TABLE IF EXISTS default.t1_remote_mysql SYNC") + clickhouse_node.query("DROP TABLE IF EXISTS default.t2_remote_mysql SYNC") clickhouse_node.query( "CREATE TABLE default.t1_remote_mysql AS mysql('mysql57:3306','test','t1_mysql_local','root','clickhouse')" ) @@ -266,6 +273,7 @@ def test_column_comments_for_mysql_database_engine(started_cluster): mysql_node.query("DROP DATABASE IF EXISTS test_database") mysql_node.query("CREATE DATABASE test_database DEFAULT CHARACTER SET 'utf8'") + clickhouse_node.query("DROP DATABASE IF EXISTS test_database") clickhouse_node.query( "CREATE DATABASE test_database ENGINE = MySQL('mysql57:3306', 'test_database', 'root', 'clickhouse')" ) @@ -298,9 +306,11 @@ def test_data_types_support_level_for_mysql_database_engine(started_cluster): "root", "clickhouse", started_cluster.mysql_ip, started_cluster.mysql_port ) ) as mysql_node: + mysql_node.query("DROP DATABASE IF EXISTS test") mysql_node.query( "CREATE DATABASE IF NOT EXISTS test DEFAULT CHARACTER SET 'utf8'" ) + clickhouse_node.query("DROP DATABASE IF EXISTS test_database") clickhouse_node.query( "CREATE DATABASE test_database ENGINE = MySQL('mysql57:3306', test, 'root', 'clickhouse')", settings={"mysql_datatypes_support_level": "decimal,datetime64"}, diff --git a/tests/integration/test_odbc_interaction/test.py b/tests/integration/test_odbc_interaction/test.py index 06028af63c5..ed925759114 100644 --- a/tests/integration/test_odbc_interaction/test.py +++ b/tests/integration/test_odbc_interaction/test.py @@ -4,7 +4,6 @@ import psycopg2 import pymysql.cursors import pytest import logging -import os.path from helpers.cluster import ClickHouseCluster from helpers.test_tools import assert_eq_with_retry @@ -23,7 +22,6 @@ node1 = cluster.add_instance( "configs/dictionaries/sqlite3_odbc_cached_dictionary.xml", "configs/dictionaries/postgres_odbc_hashed_dictionary.xml", ], - stay_alive=True, ) @@ -80,10 +78,14 @@ def create_mysql_db(conn, name): def create_mysql_table(conn, table_name): with conn.cursor() as cursor: - cursor.execute(drop_table_sql_template.format(table_name)) cursor.execute(create_table_sql_template.format(table_name)) +def drop_mysql_table(conn, table_name): + with conn.cursor() as cursor: + cursor.execute(drop_table_sql_template.format(table_name)) + + def get_postgres_conn(started_cluster): conn_string = "host={} port={} user='postgres' password='mysecretpassword'".format( started_cluster.postgres_ip, started_cluster.postgres_port @@ -267,6 +269,8 @@ CREATE TABLE {}(id UInt32, name String, age UInt32, money UInt32, column_x Nulla # just to be sure :) assert node1.query("select 1") == "1\n" + node1.query(f"DROP TABLE {table_name}") + drop_mysql_table(conn, table_name) conn.close() @@ -309,6 +313,9 @@ def test_mysql_insert(started_cluster): == "3\tinsert\t33\t333\t3333\n4\tTEST\t44\t444\t\\N\n" ) + node1.query("DROP TABLE mysql_insert") + drop_mysql_table(conn, table_name) + def test_sqlite_simple_select_function_works(started_cluster): skip_test_msan(node1) @@ -367,6 +374,12 @@ def test_sqlite_simple_select_function_works(started_cluster): == "1\t1\n" ) + node1.exec_in_container( + ["sqlite3", sqlite_db, "DELETE FROM t1;"], + privileged=True, + user="root", + ) + def test_sqlite_table_function(started_cluster): skip_test_msan(node1) @@ -392,6 +405,12 @@ def test_sqlite_table_function(started_cluster): assert node1.query("select x, y from odbc_tf") == "1\t2\n" assert node1.query("select z, x, y from odbc_tf") == "3\t1\t2\n" assert node1.query("select count(), sum(x) from odbc_tf group by x") == "1\t1\n" + node1.query("DROP TABLE odbc_tf") + node1.exec_in_container( + ["sqlite3", sqlite_db, "DELETE FROM tf1;"], + privileged=True, + user="root", + ) def test_sqlite_simple_select_storage_works(started_cluster): @@ -418,6 +437,13 @@ def test_sqlite_simple_select_storage_works(started_cluster): assert node1.query("select x, y from SqliteODBC") == "1\t2\n" assert node1.query("select z, x, y from SqliteODBC") == "3\t1\t2\n" assert node1.query("select count(), sum(x) from SqliteODBC group by x") == "1\t1\n" + node1.query("DROP TABLE SqliteODBC") + + node1.exec_in_container( + ["sqlite3", sqlite_db, "DELETE FROM t4;"], + privileged=True, + user="root", + ) def test_sqlite_odbc_hashed_dictionary(started_cluster): @@ -496,6 +522,12 @@ def test_sqlite_odbc_hashed_dictionary(started_cluster): node1, "select dictGetUInt8('sqlite3_odbc_hashed', 'Z', toUInt64(200))", "7" ) + node1.exec_in_container( + ["sqlite3", sqlite_db, "DELETE FROM t2;"], + privileged=True, + user="root", + ) + def test_sqlite_odbc_cached_dictionary(started_cluster): skip_test_msan(node1) @@ -537,13 +569,20 @@ def test_sqlite_odbc_cached_dictionary(started_cluster): node1, "select dictGetUInt8('sqlite3_odbc_cached', 'Z', toUInt64(1))", "12" ) + node1.exec_in_container( + ["sqlite3", sqlite_db, "DELETE FROM t3;"], + privileged=True, + user="root", + ) + + node1.query("SYSTEM RELOAD DICTIONARIES") + def test_postgres_odbc_hashed_dictionary_with_schema(started_cluster): skip_test_msan(node1) conn = get_postgres_conn(started_cluster) cursor = conn.cursor() - cursor.execute("truncate table clickhouse.test_table") cursor.execute( "insert into clickhouse.test_table values(1, 1, 'hello'),(2, 2, 'world')" ) @@ -562,6 +601,7 @@ def test_postgres_odbc_hashed_dictionary_with_schema(started_cluster): "select dictGetString('postgres_odbc_hashed', 'column2', toUInt64(2))", "world", ) + cursor.execute("truncate table clickhouse.test_table") def test_postgres_odbc_hashed_dictionary_no_tty_pipe_overflow(started_cluster): @@ -569,7 +609,6 @@ def test_postgres_odbc_hashed_dictionary_no_tty_pipe_overflow(started_cluster): conn = get_postgres_conn(started_cluster) cursor = conn.cursor() - cursor.execute("truncate table clickhouse.test_table") cursor.execute("insert into clickhouse.test_table values(3, 3, 'xxx')") for i in range(100): try: @@ -582,13 +621,13 @@ def test_postgres_odbc_hashed_dictionary_no_tty_pipe_overflow(started_cluster): "select dictGetString('postgres_odbc_hashed', 'column2', toUInt64(3))", "xxx", ) + cursor.execute("truncate table clickhouse.test_table") def test_postgres_insert(started_cluster): skip_test_msan(node1) conn = get_postgres_conn(started_cluster) - conn.cursor().execute("truncate table clickhouse.test_table") # Also test with Servername containing '.' and '-' symbols (defined in # postgres .yml file). This is needed to check parsing, validation and @@ -615,53 +654,8 @@ def test_postgres_insert(started_cluster): ) == "55\t10\n" ) - - -def test_bridge_dies_with_parent(started_cluster): - skip_test_msan(node1) - - if node1.is_built_with_address_sanitizer(): - # TODO: Leak sanitizer falsely reports about a leak of 16 bytes in clickhouse-odbc-bridge in this test and - # that's linked somehow with that we have replaced getauxval() in glibc-compatibility. - # The leak sanitizer calls getauxval() for its own purposes, and our replaced version doesn't seem to be equivalent in that case. - pytest.skip( - "Leak sanitizer falsely reports about a leak of 16 bytes in clickhouse-odbc-bridge" - ) - - node1.query("select dictGetString('postgres_odbc_hashed', 'column2', toUInt64(1))") - - clickhouse_pid = node1.get_process_pid("clickhouse server") - bridge_pid = node1.get_process_pid("odbc-bridge") - assert clickhouse_pid is not None - assert bridge_pid is not None - - while clickhouse_pid is not None: - try: - node1.exec_in_container( - ["kill", str(clickhouse_pid)], privileged=True, user="root" - ) - except: - pass - clickhouse_pid = node1.get_process_pid("clickhouse server") - time.sleep(1) - - for i in range(30): - time.sleep(1) # just for sure, that odbc-bridge caught signal - bridge_pid = node1.get_process_pid("odbc-bridge") - if bridge_pid is None: - break - - if bridge_pid: - out = node1.exec_in_container( - ["gdb", "-p", str(bridge_pid), "--ex", "thread apply all bt", "--ex", "q"], - privileged=True, - user="root", - ) - logging.debug(f"Bridge is running, gdb output:\n{out}") - - assert clickhouse_pid is None - assert bridge_pid is None - node1.start_clickhouse(20) + node1.query("DROP TABLE pg_insert") + conn.cursor().execute("truncate table clickhouse.test_table") def test_odbc_postgres_date_data_type(started_cluster): @@ -670,7 +664,7 @@ def test_odbc_postgres_date_data_type(started_cluster): conn = get_postgres_conn(started_cluster) cursor = conn.cursor() cursor.execute( - "CREATE TABLE IF NOT EXISTS clickhouse.test_date (id integer, column1 integer, column2 date)" + "CREATE TABLE clickhouse.test_date (id integer, column1 integer, column2 date)" ) cursor.execute("INSERT INTO clickhouse.test_date VALUES (1, 1, '2020-12-01')") @@ -687,8 +681,8 @@ def test_odbc_postgres_date_data_type(started_cluster): expected = "1\t1\t2020-12-01\n2\t2\t2020-12-02\n3\t3\t2020-12-03\n" result = node1.query("SELECT * FROM test_date") assert result == expected - cursor.execute("DROP TABLE IF EXISTS clickhouse.test_date") - node1.query("DROP TABLE IF EXISTS test_date") + cursor.execute("DROP TABLE clickhouse.test_date") + node1.query("DROP TABLE test_date") def test_odbc_postgres_conversions(started_cluster): @@ -698,7 +692,7 @@ def test_odbc_postgres_conversions(started_cluster): cursor = conn.cursor() cursor.execute( - """CREATE TABLE IF NOT EXISTS clickhouse.test_types ( + """CREATE TABLE clickhouse.test_types ( a smallint, b integer, c bigint, d real, e double precision, f serial, g bigserial, h timestamp)""" ) @@ -724,7 +718,7 @@ def test_odbc_postgres_conversions(started_cluster): cursor.execute("DROP TABLE IF EXISTS clickhouse.test_types") cursor.execute( - """CREATE TABLE IF NOT EXISTS clickhouse.test_types (column1 Timestamp, column2 Numeric)""" + """CREATE TABLE clickhouse.test_types (column1 Timestamp, column2 Numeric)""" ) node1.query( @@ -742,8 +736,8 @@ def test_odbc_postgres_conversions(started_cluster): "SELECT toDateTime64('2019-01-01 00:00:00', 3, 'Etc/UTC'), toDecimal32(1.1, 1)" ) result = node1.query("SELECT * FROM test_types") - logging.debug(result) - cursor.execute("DROP TABLE IF EXISTS clickhouse.test_types") + cursor.execute("DROP TABLE clickhouse.test_types") + node1.query("DROP TABLE test_types") assert result == expected @@ -771,6 +765,7 @@ def test_odbc_cyrillic_with_varchar(started_cluster): """ SELECT name FROM odbc('DSN=postgresql_odbc; Servername=postgre-sql.local', 'clickhouse', 'test_cyrillic') """ ) assert result == "A-nice-word\nКрасивенько\n" + node1.query("DROP TABLE test_cyrillic") def test_many_connections(started_cluster): @@ -779,7 +774,6 @@ def test_many_connections(started_cluster): conn = get_postgres_conn(started_cluster) cursor = conn.cursor() - cursor.execute("DROP TABLE IF EXISTS clickhouse.test_pg_table") cursor.execute("CREATE TABLE clickhouse.test_pg_table (key integer, value integer)") node1.query( @@ -797,6 +791,7 @@ def test_many_connections(started_cluster): query += "SELECT key FROM {t})" assert node1.query(query.format(t="test_pg_table")) == "250\n" + cursor.execute("DROP TABLE clickhouse.test_pg_table") def test_concurrent_queries(started_cluster): @@ -896,7 +891,6 @@ def test_odbc_long_text(started_cluster): conn = get_postgres_conn(started_cluster) cursor = conn.cursor() - cursor.execute("drop table if exists clickhouse.test_long_text") cursor.execute("create table clickhouse.test_long_text(flen int, field1 text)") # sample test from issue 9363 @@ -924,3 +918,5 @@ def test_odbc_long_text(started_cluster): ) result = node1.query("select field1 from test_long_text where flen=400000;") assert result.strip() == long_text + node1.query("DROP TABLE test_long_text") + cursor.execute("drop table clickhouse.test_long_text") diff --git a/tests/integration/test_odbc_interaction/test_exiled.py b/tests/integration/test_odbc_interaction/test_exiled.py new file mode 100644 index 00000000000..bdc819b8221 --- /dev/null +++ b/tests/integration/test_odbc_interaction/test_exiled.py @@ -0,0 +1,111 @@ +import time +import logging +import pytest + +from helpers.cluster import ClickHouseCluster, assert_eq_with_retry +from test_odbc_interaction.test import ( + create_mysql_db, + create_mysql_table, + get_mysql_conn, + skip_test_msan, +) + + +cluster = ClickHouseCluster(__file__) +node1 = cluster.add_instance( + "node1", + with_odbc_drivers=True, + main_configs=["configs/openssl.xml", "configs/odbc_logging.xml"], + stay_alive=True, + dictionaries=["configs/dictionaries/sqlite3_odbc_hashed_dictionary.xml"], +) + + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster.start() + + sqlite_db = node1.odbc_drivers["SQLite3"]["Database"] + logging.debug(f"sqlite data received: {sqlite_db}") + node1.exec_in_container( + [ + "sqlite3", + sqlite_db, + "CREATE TABLE t2(id INTEGER PRIMARY KEY ASC, X INTEGER, Y, Z);", + ], + privileged=True, + user="root", + ) + + node1.exec_in_container( + ["sqlite3", sqlite_db, "INSERT INTO t2 values(1, 1, 2, 3);"], + privileged=True, + user="root", + ) + + node1.query("SYSTEM RELOAD DICTIONARY sqlite3_odbc_hashed") + + yield cluster + except Exception as ex: + logging.exception(ex) + raise ex + finally: + cluster.shutdown() + + +# This test kills ClickHouse server and ODBC bridge and in worst scenario +# may cause group test crashes. Thus, this test is executed in a separate "module" +# with separate environment. +def test_bridge_dies_with_parent(started_cluster): + skip_test_msan(node1) + + if node1.is_built_with_address_sanitizer(): + # TODO: Leak sanitizer falsely reports about a leak of 16 bytes in clickhouse-odbc-bridge in this test and + # that's linked somehow with that we have replaced getauxval() in glibc-compatibility. + # The leak sanitizer calls getauxval() for its own purposes, and our replaced version doesn't seem to be equivalent in that case. + pytest.skip( + "Leak sanitizer falsely reports about a leak of 16 bytes in clickhouse-odbc-bridge" + ) + + assert_eq_with_retry( + node1, "select dictGetUInt8('sqlite3_odbc_hashed', 'Z', toUInt64(1))", "3" + ) + + clickhouse_pid = node1.get_process_pid("clickhouse server") + bridge_pid = node1.get_process_pid("odbc-bridge") + assert clickhouse_pid is not None + assert bridge_pid is not None + + try: + node1.exec_in_container( + ["kill", str(clickhouse_pid)], privileged=True, user="root" + ) + except: + pass + + for _ in range(30): + time.sleep(1) + clickhouse_pid = node1.get_process_pid("clickhouse server") + if clickhouse_pid is None: + break + + for _ in range(30): + time.sleep(1) # just for sure, that odbc-bridge caught signal + bridge_pid = node1.get_process_pid("odbc-bridge") + if bridge_pid is None: + break + + if bridge_pid: + out = node1.exec_in_container( + ["gdb", "-p", str(bridge_pid), "--ex", "thread apply all bt", "--ex", "q"], + privileged=True, + user="root", + ) + logging.debug(f"Bridge is running, gdb output:\n{out}") + + try: + assert clickhouse_pid is None + assert bridge_pid is None + finally: + node1.start_clickhouse(20) diff --git a/tests/integration/test_replicated_database/test.py b/tests/integration/test_replicated_database/test.py index 1e6a39ee1bd..d3fcc89561a 100644 --- a/tests/integration/test_replicated_database/test.py +++ b/tests/integration/test_replicated_database/test.py @@ -836,6 +836,153 @@ def test_recover_staled_replica(started_cluster): dummy_node.query("DROP DATABASE recover SYNC") +def test_recover_staled_replica_many_mvs(started_cluster): + main_node.query("DROP DATABASE IF EXISTS recover_mvs") + dummy_node.query("DROP DATABASE IF EXISTS recover_mvs") + + main_node.query_with_retry( + "CREATE DATABASE IF NOT EXISTS recover_mvs ENGINE = Replicated('/clickhouse/databases/recover_mvs', 'shard1', 'replica1');" + ) + started_cluster.get_kazoo_client("zoo1").set( + "/clickhouse/databases/recover_mvs/logs_to_keep", b"10" + ) + dummy_node.query_with_retry( + "CREATE DATABASE IF NOT EXISTS recover_mvs ENGINE = Replicated('/clickhouse/databases/recover_mvs', 'shard1', 'replica2');" + ) + + settings = {"distributed_ddl_task_timeout": 0} + + with PartitionManager() as pm: + pm.drop_instance_zk_connections(dummy_node) + dummy_node.query_and_get_error("RENAME TABLE recover_mvs.t1 TO recover_mvs.m1") + + for identifier in ["1", "2", "3", "4"]: + main_node.query( + f"CREATE TABLE recover_mvs.rmt{identifier} (n int) ENGINE=ReplicatedMergeTree ORDER BY n", + settings=settings, + ) + + print("Created tables") + + for identifier in ["1", "2", "3", "4"]: + main_node.query( + f"CREATE TABLE recover_mvs.mv_inner{identifier} (n int) ENGINE=ReplicatedMergeTree ORDER BY n", + settings=settings, + ) + + for identifier in ["1", "2", "3", "4"]: + main_node.query_with_retry( + f"""CREATE MATERIALIZED VIEW recover_mvs.mv{identifier} + TO recover_mvs.mv_inner{identifier} + AS SELECT * FROM recover_mvs.rmt{identifier}""", + settings=settings, + ) + + print("Created MVs") + + for identifier in ["1", "2", "3", "4"]: + main_node.query_with_retry( + f"""CREATE VIEW recover_mvs.view_from_mv{identifier} + AS SELECT * FROM recover_mvs.mv{identifier}""", + settings=settings, + ) + + print("Created Views on top of MVs") + + for identifier in ["1", "2", "3", "4"]: + main_node.query_with_retry( + f"""CREATE MATERIALIZED VIEW recover_mvs.cascade_mv{identifier} + ENGINE=MergeTree() ORDER BY tuple() + POPULATE AS SELECT * FROM recover_mvs.mv_inner{identifier};""", + settings=settings, + ) + + print("Created cascade MVs") + + for identifier in ["1", "2", "3", "4"]: + main_node.query_with_retry( + f"""CREATE VIEW recover_mvs.view_from_cascade_mv{identifier} + AS SELECT * FROM recover_mvs.cascade_mv{identifier}""", + settings=settings, + ) + + print("Created Views on top of cascade MVs") + + for identifier in ["1", "2", "3", "4"]: + main_node.query_with_retry( + f"""CREATE MATERIALIZED VIEW recover_mvs.double_cascade_mv{identifier} + ENGINE=MergeTree() ORDER BY tuple() + POPULATE AS SELECT * FROM recover_mvs.`.inner_id.{get_table_uuid("recover_mvs", f"cascade_mv{identifier}")}`""", + settings=settings, + ) + + print("Created double cascade MVs") + + for identifier in ["1", "2", "3", "4"]: + main_node.query_with_retry( + f"""CREATE VIEW recover_mvs.view_from_double_cascade_mv{identifier} + AS SELECT * FROM recover_mvs.double_cascade_mv{identifier}""", + settings=settings, + ) + + print("Created Views on top of double cascade MVs") + + # This weird table name is actually makes sence because it starts with letter `a` and may break some internal sorting + main_node.query_with_retry( + """ + CREATE VIEW recover_mvs.anime + AS + SELECT n + FROM + ( + SELECT * + FROM + ( + SELECT * + FROM + ( + SELECT * + FROM recover_mvs.mv_inner1 AS q1 + INNER JOIN recover_mvs.mv_inner2 AS q2 ON q1.n = q2.n + ) AS new_table_1 + INNER JOIN recover_mvs.mv_inner3 AS q3 ON new_table_1.n = q3.n + ) AS new_table_2 + INNER JOIN recover_mvs.mv_inner4 AS q4 ON new_table_2.n = q4.n + ) + """, + settings=settings, + ) + + print("Created final boss") + + for identifier in ["1", "2", "3", "4"]: + main_node.query_with_retry( + f"""CREATE DICTIONARY recover_mvs.`11111d{identifier}` (n UInt64) + PRIMARY KEY n + SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() TABLE 'double_cascade_mv{identifier}' DB 'recover_mvs')) + LAYOUT(FLAT()) LIFETIME(1)""", + settings=settings, + ) + + print("Created dictionaries") + + for identifier in ["1", "2", "3", "4"]: + main_node.query_with_retry( + f"""CREATE VIEW recover_mvs.`00000vd{identifier}` + AS SELECT * FROM recover_mvs.`11111d{identifier}`""", + settings=settings, + ) + + print("Created Views on top of dictionaries") + + dummy_node.query("SYSTEM SYNC DATABASE REPLICA recover_mvs") + query = "SELECT name FROM system.tables WHERE database='recover_mvs' ORDER BY name" + assert main_node.query(query) == dummy_node.query(query) + + main_node.query("DROP DATABASE IF EXISTS recover_mvs") + dummy_node.query("DROP DATABASE IF EXISTS recover_mvs") + + def test_startup_without_zk(started_cluster): with PartitionManager() as pm: pm.drop_instance_zk_connections(main_node) @@ -1005,6 +1152,9 @@ def test_force_synchronous_settings(started_cluster): def test_recover_digest_mismatch(started_cluster): + main_node.query("DROP DATABASE IF EXISTS recover_digest_mismatch") + dummy_node.query("DROP DATABASE IF EXISTS recover_digest_mismatch") + main_node.query( "CREATE DATABASE recover_digest_mismatch ENGINE = Replicated('/clickhouse/databases/recover_digest_mismatch', 'shard1', 'replica1');" ) @@ -1014,19 +1164,22 @@ def test_recover_digest_mismatch(started_cluster): create_some_tables("recover_digest_mismatch") + main_node.query("SYSTEM SYNC DATABASE REPLICA recover_digest_mismatch") + dummy_node.query("SYSTEM SYNC DATABASE REPLICA recover_digest_mismatch") + ways_to_corrupt_metadata = [ - f"mv /var/lib/clickhouse/metadata/recover_digest_mismatch/t1.sql /var/lib/clickhouse/metadata/recover_digest_mismatch/m1.sql", - f"sed --follow-symlinks -i 's/Int32/String/' /var/lib/clickhouse/metadata/recover_digest_mismatch/mv1.sql", - f"rm -f /var/lib/clickhouse/metadata/recover_digest_mismatch/d1.sql", + "mv /var/lib/clickhouse/metadata/recover_digest_mismatch/t1.sql /var/lib/clickhouse/metadata/recover_digest_mismatch/m1.sql", + "sed --follow-symlinks -i 's/Int32/String/' /var/lib/clickhouse/metadata/recover_digest_mismatch/mv1.sql", + "rm -f /var/lib/clickhouse/metadata/recover_digest_mismatch/d1.sql", # f"rm -rf /var/lib/clickhouse/metadata/recover_digest_mismatch/", # Directory already exists - f"rm -rf /var/lib/clickhouse/store", + "rm -rf /var/lib/clickhouse/store", ] for command in ways_to_corrupt_metadata: + print(f"Corrupting data using `{command}`") need_remove_is_active_node = "rm -rf" in command dummy_node.stop_clickhouse(kill=not need_remove_is_active_node) dummy_node.exec_in_container(["bash", "-c", command]) - dummy_node.start_clickhouse() query = ( "SELECT name, uuid, create_table_query FROM system.tables WHERE database='recover_digest_mismatch' AND name NOT LIKE '.inner_id.%' " @@ -1034,10 +1187,18 @@ def test_recover_digest_mismatch(started_cluster): ) expected = main_node.query(query) - if "rm -rf" in command: + if need_remove_is_active_node: # NOTE Otherwise it fails to recreate ReplicatedMergeTree table due to "Replica already exists" main_node.query( "SYSTEM DROP REPLICA '2' FROM DATABASE recover_digest_mismatch" ) + # There is a race condition between deleting active node and creating it on server startup + # So we start a server only after we deleted all table replicas from the Keeper + dummy_node.start_clickhouse() assert_eq_with_retry(dummy_node, query, expected) + + main_node.query("DROP DATABASE IF EXISTS recover_digest_mismatch") + dummy_node.query("DROP DATABASE IF EXISTS recover_digest_mismatch") + + print("Everything Okay") diff --git a/tests/integration/test_replicated_merge_tree_s3_restore/test.py b/tests/integration/test_replicated_merge_tree_s3_restore/test.py index fc13c8a1184..822a81d2655 100644 --- a/tests/integration/test_replicated_merge_tree_s3_restore/test.py +++ b/tests/integration/test_replicated_merge_tree_s3_restore/test.py @@ -249,7 +249,7 @@ def test_restore_another_bucket_path(cluster, db_atomic, zero_copy): node_another_bucket = cluster.instances["node_another_bucket"] create_restore_file(node_another_bucket, bucket="root") - node_another_bucket.query("SYSTEM RESTART DISK s3") + node_another_bucket.restart_clickhouse() create_table( node_another_bucket, "test", schema, attach=True, db_atomic=db_atomic, uuid=uuid ) diff --git a/tests/integration/test_s3_ec2_metadata/__init__.py b/tests/integration/test_s3_ec2_metadata/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_s3_ec2_metadata/configs/use_environment_credentials.xml b/tests/integration/test_s3_ec2_metadata/configs/use_environment_credentials.xml new file mode 100644 index 00000000000..db901ec0823 --- /dev/null +++ b/tests/integration/test_s3_ec2_metadata/configs/use_environment_credentials.xml @@ -0,0 +1,5 @@ + + + 1 + + diff --git a/tests/integration/test_s3_ec2_metadata/ec2_metadata_server/request_response_server.py b/tests/integration/test_s3_ec2_metadata/ec2_metadata_server/request_response_server.py new file mode 100644 index 00000000000..f347866be58 --- /dev/null +++ b/tests/integration/test_s3_ec2_metadata/ec2_metadata_server/request_response_server.py @@ -0,0 +1,36 @@ +import http.server +import sys + + +class RequestHandler(http.server.BaseHTTPRequestHandler): + def get_response(self): + if self.path == "/": + return "OK" + elif self.path == "/latest/meta-data/iam/security-credentials": + return "myrole" + elif self.path == "/latest/meta-data/iam/security-credentials/myrole": + return '{ "Code" : "Success", "Type" : "AWS-HMAC", "AccessKeyId" : "minio", "SecretAccessKey" : "minio123" }' + else: + return None + + def do_HEAD(self): + response = self.get_response() + if response: + self.send_response(200) + self.send_header("Content-Type", "text/plain") + self.send_header("Content-Length", len(response.encode())) + self.end_headers() + else: + self.send_response(404) + self.send_header("Content-Type", "text/plain") + self.end_headers() + + def do_GET(self): + self.do_HEAD() + response = self.get_response() + if response: + self.wfile.write(response.encode()) + + +httpd = http.server.HTTPServer(("0.0.0.0", int(sys.argv[1])), RequestHandler) +httpd.serve_forever() diff --git a/tests/integration/test_s3_ec2_metadata/test.py b/tests/integration/test_s3_ec2_metadata/test.py new file mode 100644 index 00000000000..982656df009 --- /dev/null +++ b/tests/integration/test_s3_ec2_metadata/test.py @@ -0,0 +1,94 @@ +import pytest +from helpers.cluster import ClickHouseCluster +import logging +import os +import time + +SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) + +EC2_METADATA_SERVER_HOSTNAME = "resolver" +EC2_METADATA_SERVER_PORT = 8080 + +cluster = ClickHouseCluster(__file__) + +node = cluster.add_instance( + "node", + with_minio=True, + main_configs=["configs/use_environment_credentials.xml"], + env_variables={ + "AWS_EC2_METADATA_SERVICE_ENDPOINT": f"{EC2_METADATA_SERVER_HOSTNAME}:{EC2_METADATA_SERVER_PORT}", + }, +) + + +def start_ec2_metadata_server(): + logging.info("Starting EC2 metadata server") + container_id = cluster.get_container_id("resolver") + + cluster.copy_file_to_container( + container_id, + os.path.join(SCRIPT_DIR, "ec2_metadata_server/request_response_server.py"), + "request_response_server.py", + ) + + cluster.exec_in_container( + container_id, + ["python", "request_response_server.py", str(EC2_METADATA_SERVER_PORT)], + detach=True, + ) + + # Wait for the server to start. + num_attempts = 100 + for attempt in range(num_attempts): + ping_response = cluster.exec_in_container( + container_id, + ["curl", "-s", f"http://localhost:{EC2_METADATA_SERVER_PORT}/"], + nothrow=True, + ) + if ping_response != "OK": + if attempt == num_attempts - 1: + assert ping_response == "OK", 'Expected "OK", but got "{}"'.format( + ping_response + ) + else: + time.sleep(1) + else: + logging.debug( + f"request_response_server.py answered {ping_response} on attempt {attempt}" + ) + break + + logging.info("EC2 metadata server started") + + +@pytest.fixture(scope="module", autouse=True) +def start_cluster(): + try: + cluster.start() + start_ec2_metadata_server() + yield + finally: + cluster.shutdown() + + +def test_credentials_from_ec2_metadata(): + node.query( + f"INSERT INTO FUNCTION s3('http://{cluster.minio_host}:{cluster.minio_port}/{cluster.minio_bucket}/test1.jsonl') SELECT * FROM numbers(100)" + ) + + assert ( + "100" + == node.query( + f"SELECT count() FROM s3('http://{cluster.minio_host}:{cluster.minio_port}/{cluster.minio_bucket}/test1.jsonl')" + ).strip() + ) + + expected_logs = [ + "Getting default credentials for ec2 instance from resolver:8080", + "Calling EC2MetadataService resource, /latest/meta-data/iam/security-credentials returned credential string myrole", + "Calling EC2MetadataService resource /latest/meta-data/iam/security-credentials/myrole", + "Successfully pulled credentials from EC2MetadataService with access key", + ] + + for expected_msg in expected_logs: + node.contains_in_log("AWSEC2InstanceProfileConfigLoader: " + expected_msg) diff --git a/tests/integration/test_storage_kafka/test.py b/tests/integration/test_storage_kafka/test.py index 46bf7b0b3a0..9f617369859 100644 --- a/tests/integration/test_storage_kafka/test.py +++ b/tests/integration/test_storage_kafka/test.py @@ -2271,7 +2271,11 @@ def test_kafka_produce_key_timestamp(kafka_cluster): ) topic_name = "insert3" - kafka_create_topic(admin_client, topic_name) + topic_config = { + # default retention, since predefined timestamp_ms is used. + "retention.ms": "-1", + } + kafka_create_topic(admin_client, topic_name, config=topic_config) instance.query( """ @@ -3528,7 +3532,7 @@ def test_kafka_formats_with_broken_message(kafka_cluster): # broken message '0,"BAD","AM",0.5,1\n', ], - "expected": """{"raw_message":"0,\\"BAD\\",\\"AM\\",0.5,1\\n","error":"Cannot parse input: expected '\\"' before: 'BAD\\",\\"AM\\",0.5,1\\\\n': Could not print diagnostic info because two last rows aren't in buffer (rare case)\\n"}""", + "expected": """{"raw_message":"0,\\"BAD\\",\\"AM\\",0.5,1\\n","error":"Cannot parse input: expected '\\"' before: 'BAD\\",\\"AM\\",0.5,1\\\\n'"}""", "printable": True, "supports_empty_value": True, }, @@ -3540,7 +3544,7 @@ def test_kafka_formats_with_broken_message(kafka_cluster): # broken message "0\tBAD\tAM\t0.5\t1\n", ], - "expected": """{"raw_message":"0\\tBAD\\tAM\\t0.5\\t1\\n","error":"Cannot parse input: expected '\\\\t' before: 'BAD\\\\tAM\\\\t0.5\\\\t1\\\\n': Could not print diagnostic info because two last rows aren't in buffer (rare case)\\n"}""", + "expected": """{"raw_message":"0\\tBAD\\tAM\\t0.5\\t1\\n","error":"Cannot parse input: expected '\\\\t' before: 'BAD\\\\tAM\\\\t0.5\\\\t1\\\\n'"}""", "supports_empty_value": True, "printable": True, }, @@ -3552,7 +3556,7 @@ def test_kafka_formats_with_broken_message(kafka_cluster): # broken message '"id","blockNo","val1","val2","val3"\n0,"BAD","AM",0.5,1\n', ], - "expected": """{"raw_message":"\\"id\\",\\"blockNo\\",\\"val1\\",\\"val2\\",\\"val3\\"\\n0,\\"BAD\\",\\"AM\\",0.5,1\\n","error":"Cannot parse input: expected '\\"' before: 'BAD\\",\\"AM\\",0.5,1\\\\n': Could not print diagnostic info because two last rows aren't in buffer (rare case)\\n"}""", + "expected": """{"raw_message":"\\"id\\",\\"blockNo\\",\\"val1\\",\\"val2\\",\\"val3\\"\\n0,\\"BAD\\",\\"AM\\",0.5,1\\n","error":"Cannot parse input: expected '\\"' before: 'BAD\\",\\"AM\\",0.5,1\\\\n'"}""", "printable": True, }, "Values": { @@ -3575,7 +3579,7 @@ def test_kafka_formats_with_broken_message(kafka_cluster): # broken message "id\tblockNo\tval1\tval2\tval3\n0\tBAD\tAM\t0.5\t1\n", ], - "expected": """{"raw_message":"id\\tblockNo\\tval1\\tval2\\tval3\\n0\\tBAD\\tAM\\t0.5\\t1\\n","error":"Cannot parse input: expected '\\\\t' before: 'BAD\\\\tAM\\\\t0.5\\\\t1\\\\n': Could not print diagnostic info because two last rows aren't in buffer (rare case)\\n"}""", + "expected": """{"raw_message":"id\\tblockNo\\tval1\\tval2\\tval3\\n0\\tBAD\\tAM\\t0.5\\t1\\n","error":"Cannot parse input: expected '\\\\t' before: 'BAD\\\\tAM\\\\t0.5\\\\t1\\\\n"}""", "supports_empty_value": True, "printable": True, }, @@ -3587,7 +3591,7 @@ def test_kafka_formats_with_broken_message(kafka_cluster): # broken message "id\tblockNo\tval1\tval2\tval3\nInt64\tUInt16\tString\tFloat32\tUInt8\n0\tBAD\tAM\t0.5\t1\n", ], - "expected": """{"raw_message":"id\\tblockNo\\tval1\\tval2\\tval3\\nInt64\\tUInt16\\tString\\tFloat32\\tUInt8\\n0\\tBAD\\tAM\\t0.5\\t1\\n","error":"Cannot parse input: expected '\\\\t' before: 'BAD\\\\tAM\\\\t0.5\\\\t1\\\\n': Could not print diagnostic info because two last rows aren't in buffer (rare case)\\n"}""", + "expected": """{"raw_message":"id\\tblockNo\\tval1\\tval2\\tval3\\nInt64\\tUInt16\\tString\\tFloat32\\tUInt8\\n0\\tBAD\\tAM\\t0.5\\t1\\n","error":"Cannot parse input: expected '\\\\t' before: 'BAD\\\\tAM\\\\t0.5\\\\t1\\\\n'"}""", "printable": True, }, "Native": { @@ -4127,6 +4131,315 @@ def test_num_consumers_limit(kafka_cluster): instance.query("DROP TABLE test.kafka") +def test_format_with_prefix_and_suffix(kafka_cluster): + admin_client = KafkaAdminClient( + bootstrap_servers="localhost:{}".format(kafka_cluster.kafka_port) + ) + + kafka_create_topic(admin_client, "custom") + + instance.query( + """ + DROP TABLE IF EXISTS test.kafka; + + CREATE TABLE test.kafka (key UInt64, value UInt64) + ENGINE = Kafka + SETTINGS kafka_broker_list = 'kafka1:19092', + kafka_topic_list = 'custom', + kafka_group_name = 'custom', + kafka_format = 'CustomSeparated'; + """ + ) + + instance.query( + "INSERT INTO test.kafka select number*10 as key, number*100 as value from numbers(2) settings format_custom_result_before_delimiter='\n', format_custom_result_after_delimiter='\n'" + ) + + messages = [] + + attempt = 0 + while attempt < 100: + messages.extend(kafka_consume(kafka_cluster, "custom")) + if len(messages) == 2: + break + attempt += 1 + + assert len(messages) == 2 + + assert ( + "".join(messages) == "\n0\t0\n\n\n10\t100\n\n" + ) + + kafka_delete_topic(admin_client, "custom") + + +def test_max_rows_per_message(kafka_cluster): + admin_client = KafkaAdminClient( + bootstrap_servers="localhost:{}".format(kafka_cluster.kafka_port) + ) + + topic = "custom_max_rows_per_message" + + kafka_create_topic(admin_client, topic) + + num_rows = 5 + + instance.query( + f""" + DROP TABLE IF EXISTS test.view; + DROP TABLE IF EXISTS test.kafka; + + CREATE TABLE test.kafka (key UInt64, value UInt64) + ENGINE = Kafka + SETTINGS kafka_broker_list = 'kafka1:19092', + kafka_topic_list = '{topic}', + kafka_group_name = '{topic}', + kafka_format = 'CustomSeparated', + format_custom_result_before_delimiter = '\n', + format_custom_result_after_delimiter = '\n', + kafka_max_rows_per_message = 3; + + CREATE MATERIALIZED VIEW test.view Engine=Log AS + SELECT key, value FROM test.kafka; + """ + ) + + instance.query( + f"INSERT INTO test.kafka select number*10 as key, number*100 as value from numbers({num_rows}) settings format_custom_result_before_delimiter='\n', format_custom_result_after_delimiter='\n'" + ) + + messages = [] + + attempt = 0 + while attempt < 500: + messages.extend(kafka_consume(kafka_cluster, topic)) + if len(messages) == 2: + break + attempt += 1 + + assert len(messages) == 2 + + assert ( + "".join(messages) + == "\n0\t0\n10\t100\n20\t200\n\n\n30\t300\n40\t400\n\n" + ) + + attempt = 0 + rows = 0 + while attempt < 500: + rows = int(instance.query("SELECT count() FROM test.view")) + if rows == num_rows: + break + attempt += 1 + + assert rows == num_rows + + result = instance.query("SELECT * FROM test.view") + assert result == "0\t0\n10\t100\n20\t200\n30\t300\n40\t400\n" + + kafka_delete_topic(admin_client, topic) + + +def test_row_based_formats(kafka_cluster): + admin_client = KafkaAdminClient( + bootstrap_servers="localhost:{}".format(kafka_cluster.kafka_port) + ) + + for format_name in [ + "TSV", + "TSVWithNamesAndTypes", + "TSKV", + "CSV", + "CSVWithNamesAndTypes", + "CustomSeparatedWithNamesAndTypes", + "Values", + "JSON", + "JSONEachRow", + "JSONCompactEachRow", + "JSONCompactEachRowWithNamesAndTypes", + "JSONObjectEachRow", + "Avro", + "RowBinary", + "RowBinaryWithNamesAndTypes", + "MsgPack", + ]: + + print(format_name) + + kafka_create_topic(admin_client, format_name) + + num_rows = 10 + + instance.query( + f""" + DROP TABLE IF EXISTS test.view; + DROP TABLE IF EXISTS test.kafka; + + CREATE TABLE test.kafka (key UInt64, value UInt64) + ENGINE = Kafka + SETTINGS kafka_broker_list = 'kafka1:19092', + kafka_topic_list = '{format_name}', + kafka_group_name = '{format_name}', + kafka_format = '{format_name}', + kafka_max_rows_per_message = 5; + + CREATE MATERIALIZED VIEW test.view Engine=Log AS + SELECT key, value FROM test.kafka; + + INSERT INTO test.kafka SELECT number * 10 as key, number * 100 as value FROM numbers({num_rows}); + """ + ) + + messages = [] + + attempt = 0 + while attempt < 500: + messages.extend(kafka_consume(kafka_cluster, format_name, needDecode=False)) + if len(messages) == 2: + break + attempt += 1 + + assert len(messages) == 2 + + attempt = 0 + rows = 0 + while attempt < 500: + rows = int(instance.query("SELECT count() FROM test.view")) + if rows == num_rows: + break + attempt += 1 + + assert rows == num_rows + + result = instance.query("SELECT * FROM test.view") + expected = "" + for i in range(num_rows): + expected += str(i * 10) + "\t" + str(i * 100) + "\n" + assert result == expected + + kafka_delete_topic(admin_client, format_name) + + +def test_block_based_formats_1(kafka_cluster): + admin_client = KafkaAdminClient( + bootstrap_servers="localhost:{}".format(kafka_cluster.kafka_port) + ) + + topic = "pretty_space" + kafka_create_topic(admin_client, topic) + + instance.query( + f""" + DROP TABLE IF EXISTS test.kafka; + + CREATE TABLE test.kafka (key UInt64, value UInt64) + ENGINE = Kafka + SETTINGS kafka_broker_list = 'kafka1:19092', + kafka_topic_list = '{topic}', + kafka_group_name = '{topic}', + kafka_format = 'PrettySpace'; + + INSERT INTO test.kafka SELECT number * 10 as key, number * 100 as value FROM numbers(5) settings max_block_size=2, optimize_trivial_insert_select=0; + """ + ) + + messages = [] + + attempt = 0 + while attempt < 500: + messages.extend(kafka_consume(kafka_cluster, topic)) + if len(messages) == 3: + break + attempt += 1 + + assert len(messages) == 3 + + data = [] + for message in messages: + splitted = message.split("\n") + assert splitted[0] == " \x1b[1mkey\x1b[0m \x1b[1mvalue\x1b[0m" + assert splitted[1] == "" + assert splitted[-1] == "" + data += [line.split() for line in splitted[2:-1]] + + assert data == [ + ["0", "0"], + ["10", "100"], + ["20", "200"], + ["30", "300"], + ["40", "400"], + ] + + kafka_delete_topic(admin_client, topic) + + +def test_block_based_formats_2(kafka_cluster): + admin_client = KafkaAdminClient( + bootstrap_servers="localhost:{}".format(kafka_cluster.kafka_port) + ) + + num_rows = 100 + + for format_name in [ + "JSONColumns", + "Native", + "Arrow", + "Parquet", + "ORC", + "JSONCompactColumns", + ]: + + kafka_create_topic(admin_client, format_name) + + instance.query( + f""" + DROP TABLE IF EXISTS test.view; + DROP TABLE IF EXISTS test.kafka; + + CREATE TABLE test.kafka (key UInt64, value UInt64) + ENGINE = Kafka + SETTINGS kafka_broker_list = 'kafka1:19092', + kafka_topic_list = '{format_name}', + kafka_group_name = '{format_name}', + kafka_format = '{format_name}'; + + CREATE MATERIALIZED VIEW test.view Engine=Log AS + SELECT key, value FROM test.kafka; + + INSERT INTO test.kafka SELECT number * 10 as key, number * 100 as value FROM numbers({num_rows}) settings max_block_size=12, optimize_trivial_insert_select=0; + """ + ) + + messages = [] + + attempt = 0 + while attempt < 500: + messages.extend(kafka_consume(kafka_cluster, format_name, needDecode=False)) + if len(messages) == 9: + break + attempt += 1 + + assert len(messages) == 9 + + attempt = 0 + rows = 0 + while attempt < 500: + rows = int(instance.query("SELECT count() FROM test.view")) + if rows == num_rows: + break + attempt += 1 + + assert rows == num_rows + + result = instance.query("SELECT * FROM test.view ORDER by key") + expected = "" + for i in range(num_rows): + expected += str(i * 10) + "\t" + str(i * 100) + "\n" + assert result == expected + + kafka_delete_topic(admin_client, format_name) + + if __name__ == "__main__": cluster.start() input("Cluster created, press any key to destroy...") diff --git a/tests/integration/test_storage_nats/test.py b/tests/integration/test_storage_nats/test.py index 77db3008524..2988c67bf63 100644 --- a/tests/integration/test_storage_nats/test.py +++ b/tests/integration/test_storage_nats/test.py @@ -1498,6 +1498,376 @@ def test_nats_predefined_configuration(nats_cluster): break +def test_format_with_prefix_and_suffix(nats_cluster): + instance.query( + """ + DROP TABLE IF EXISTS test.nats; + + CREATE TABLE test.nats (key UInt64, value UInt64) + ENGINE = NATS + SETTINGS nats_url = 'nats1:4444', + nats_subjects = 'custom', + nats_format = 'CustomSeparated'; + """ + ) + while not check_table_is_ready(instance, "test.nats"): + logging.debug("Table test.nats is not yet ready") + time.sleep(0.5) + + insert_messages = [] + + async def sub_to_nats(): + nc = await nats_connect_ssl( + nats_cluster.nats_port, + user="click", + password="house", + ssl_ctx=nats_cluster.nats_ssl_context, + ) + sub = await nc.subscribe("custom") + await sub.unsubscribe(2) + async for msg in sub.messages: + insert_messages.append(msg.data.decode()) + + await sub.drain() + await nc.drain() + + def run_sub(): + asyncio.run(sub_to_nats()) + + thread = threading.Thread(target=run_sub) + thread.start() + time.sleep(1) + + instance.query( + "INSERT INTO test.nats select number*10 as key, number*100 as value from numbers(2) settings format_custom_result_before_delimiter='\n', format_custom_result_after_delimiter='\n'" + ) + + thread.join() + + assert ( + "".join(insert_messages) + == "\n0\t0\n\n\n10\t100\n\n" + ) + + +def test_max_rows_per_message(nats_cluster): + instance.query( + """ + DROP TABLE IF EXISTS test.view; + DROP TABLE IF EXISTS test.nats; + + CREATE TABLE test.nats (key UInt64, value UInt64) + ENGINE = NATS + SETTINGS nats_url = 'nats1:4444', + nats_subjects = 'custom1', + nats_format = 'CustomSeparated', + nats_max_rows_per_message = 3, + format_custom_result_before_delimiter = '\n', + format_custom_result_after_delimiter = '\n'; + + CREATE MATERIALIZED VIEW test.view Engine=Log AS + SELECT key, value FROM test.nats; + """ + ) + while not check_table_is_ready(instance, "test.nats"): + logging.debug("Table test.nats is not yet ready") + time.sleep(0.5) + + num_rows = 5 + + insert_messages = [] + + async def sub_to_nats(): + nc = await nats_connect_ssl( + nats_cluster.nats_port, + user="click", + password="house", + ssl_ctx=nats_cluster.nats_ssl_context, + ) + sub = await nc.subscribe("custom1") + await sub.unsubscribe(2) + async for msg in sub.messages: + insert_messages.append(msg.data.decode()) + + await sub.drain() + await nc.drain() + + def run_sub(): + asyncio.run(sub_to_nats()) + + thread = threading.Thread(target=run_sub) + thread.start() + time.sleep(1) + + instance.query( + f"INSERT INTO test.nats select number*10 as key, number*100 as value from numbers({num_rows}) settings format_custom_result_before_delimiter='\n', format_custom_result_after_delimiter='\n'" + ) + + thread.join() + + assert ( + "".join(insert_messages) + == "\n0\t0\n10\t100\n20\t200\n\n\n30\t300\n40\t400\n\n" + ) + + attempt = 0 + rows = 0 + while attempt < 100: + rows = int(instance.query("SELECT count() FROM test.view")) + if rows == num_rows: + break + attempt += 1 + + assert rows == num_rows + + result = instance.query("SELECT * FROM test.view") + assert result == "0\t0\n10\t100\n20\t200\n30\t300\n40\t400\n" + + +def test_row_based_formats(nats_cluster): + num_rows = 10 + + for format_name in [ + "TSV", + "TSVWithNamesAndTypes", + "TSKV", + "CSV", + "CSVWithNamesAndTypes", + "CustomSeparatedWithNamesAndTypes", + "Values", + "JSON", + "JSONEachRow", + "JSONCompactEachRow", + "JSONCompactEachRowWithNamesAndTypes", + "JSONObjectEachRow", + "Avro", + "RowBinary", + "RowBinaryWithNamesAndTypes", + "MsgPack", + ]: + print(format_name) + + instance.query( + f""" + DROP TABLE IF EXISTS test.view; + DROP TABLE IF EXISTS test.nats; + + CREATE TABLE test.nats (key UInt64, value UInt64) + ENGINE = NATS + SETTINGS nats_url = 'nats1:4444', + nats_subjects = '{format_name}', + nats_format = '{format_name}'; + + CREATE MATERIALIZED VIEW test.view Engine=Log AS + SELECT key, value FROM test.nats; + """ + ) + + while not check_table_is_ready(instance, "test.nats"): + logging.debug("Table test.nats is not yet ready") + time.sleep(0.5) + + insert_messages = 0 + + async def sub_to_nats(): + nc = await nats_connect_ssl( + nats_cluster.nats_port, + user="click", + password="house", + ssl_ctx=nats_cluster.nats_ssl_context, + ) + sub = await nc.subscribe(format_name) + await sub.unsubscribe(2) + async for msg in sub.messages: + nonlocal insert_messages + insert_messages += 1 + + await sub.drain() + await nc.drain() + + def run_sub(): + asyncio.run(sub_to_nats()) + + thread = threading.Thread(target=run_sub) + thread.start() + time.sleep(1) + + instance.query( + f"INSERT INTO test.nats select number*10 as key, number*100 as value from numbers({num_rows})" + ) + + thread.join() + + assert insert_messages == 2 + + attempt = 0 + rows = 0 + while attempt < 100: + rows = int(instance.query("SELECT count() FROM test.view")) + if rows == num_rows: + break + attempt += 1 + + assert rows == num_rows + + expected = "" + for i in range(num_rows): + expected += str(i * 10) + "\t" + str(i * 100) + "\n" + + result = instance.query("SELECT * FROM test.view") + assert result == expected + + +def test_block_based_formats_1(nats_cluster): + instance.query( + """ + DROP TABLE IF EXISTS test.nats; + + CREATE TABLE test.nats (key UInt64, value UInt64) + ENGINE = NATS + SETTINGS nats_url = 'nats1:4444', + nats_subjects = 'PrettySpace', + nats_format = 'PrettySpace'; + """ + ) + + insert_messages = [] + + async def sub_to_nats(): + nc = await nats_connect_ssl( + nats_cluster.nats_port, + user="click", + password="house", + ssl_ctx=nats_cluster.nats_ssl_context, + ) + sub = await nc.subscribe("PrettySpace") + await sub.unsubscribe(3) + async for msg in sub.messages: + insert_messages.append(msg.data.decode()) + + await sub.drain() + await nc.drain() + + def run_sub(): + asyncio.run(sub_to_nats()) + + thread = threading.Thread(target=run_sub) + thread.start() + time.sleep(1) + + attempt = 0 + while attempt < 100: + try: + instance.query( + "INSERT INTO test.nats SELECT number * 10 as key, number * 100 as value FROM numbers(5) settings max_block_size=2, optimize_trivial_insert_select=0;" + ) + break + except Exception: + logging.debug("Table test.nats is not yet ready") + time.sleep(0.5) + attempt += 1 + thread.join() + + data = [] + for message in insert_messages: + splitted = message.split("\n") + assert splitted[0] == " \x1b[1mkey\x1b[0m \x1b[1mvalue\x1b[0m" + assert splitted[1] == "" + assert splitted[-1] == "" + data += [line.split() for line in splitted[2:-1]] + + assert data == [ + ["0", "0"], + ["10", "100"], + ["20", "200"], + ["30", "300"], + ["40", "400"], + ] + + +def test_block_based_formats_2(nats_cluster): + num_rows = 100 + + for format_name in [ + "JSONColumns", + "Native", + "Arrow", + "Parquet", + "ORC", + "JSONCompactColumns", + ]: + print(format_name) + + instance.query( + f""" + DROP TABLE IF EXISTS test.view; + DROP TABLE IF EXISTS test.nats; + + CREATE TABLE test.nats (key UInt64, value UInt64) + ENGINE = NATS + SETTINGS nats_url = 'nats1:4444', + nats_subjects = '{format_name}', + nats_format = '{format_name}'; + + CREATE MATERIALIZED VIEW test.view Engine=Log AS + SELECT key, value FROM test.nats; + """ + ) + + while not check_table_is_ready(instance, "test.nats"): + logging.debug("Table test.nats is not yet ready") + time.sleep(0.5) + + insert_messages = 0 + + async def sub_to_nats(): + nc = await nats_connect_ssl( + nats_cluster.nats_port, + user="click", + password="house", + ssl_ctx=nats_cluster.nats_ssl_context, + ) + sub = await nc.subscribe(format_name) + await sub.unsubscribe(9) + async for msg in sub.messages: + nonlocal insert_messages + insert_messages += 1 + + await sub.drain() + await nc.drain() + + def run_sub(): + asyncio.run(sub_to_nats()) + + thread = threading.Thread(target=run_sub) + thread.start() + time.sleep(1) + + instance.query( + f"INSERT INTO test.nats SELECT number * 10 as key, number * 100 as value FROM numbers({num_rows}) settings max_block_size=12, optimize_trivial_insert_select=0;" + ) + + thread.join() + + assert insert_messages == 9 + + attempt = 0 + rows = 0 + while attempt < 100: + rows = int(instance.query("SELECT count() FROM test.view")) + if rows == num_rows: + break + attempt += 1 + + assert rows == num_rows + + result = instance.query("SELECT * FROM test.view ORDER by key") + expected = "" + for i in range(num_rows): + expected += str(i * 10) + "\t" + str(i * 100) + "\n" + assert result == expected + + if __name__ == "__main__": cluster.start() input("Cluster created, press any key to destroy...") diff --git a/tests/integration/test_storage_rabbitmq/test.py b/tests/integration/test_storage_rabbitmq/test.py index f5d216a8b92..63b8d1215aa 100644 --- a/tests/integration/test_storage_rabbitmq/test.py +++ b/tests/integration/test_storage_rabbitmq/test.py @@ -2848,3 +2848,355 @@ def test_rabbitmq_address(rabbitmq_cluster): instance2.query("drop table rabbit_in sync") instance2.query("drop table rabbit_out sync") + + +def test_format_with_prefix_and_suffix(rabbitmq_cluster): + instance.query( + """ + CREATE TABLE test.rabbitmq (key UInt64, value UInt64) + ENGINE = RabbitMQ + SETTINGS rabbitmq_host_port = 'rabbitmq1:5672', + rabbitmq_exchange_name = 'insert', + rabbitmq_exchange_type = 'direct', + rabbitmq_routing_key_list = 'custom', + rabbitmq_format = 'CustomSeparated'; + """ + ) + + credentials = pika.PlainCredentials("root", "clickhouse") + parameters = pika.ConnectionParameters( + rabbitmq_cluster.rabbitmq_ip, rabbitmq_cluster.rabbitmq_port, "/", credentials + ) + consumer_connection = pika.BlockingConnection(parameters) + + consumer = consumer_connection.channel() + result = consumer.queue_declare(queue="") + queue_name = result.method.queue + consumer.queue_bind(exchange="insert", queue=queue_name, routing_key="custom") + + instance.query( + "INSERT INTO test.rabbitmq select number*10 as key, number*100 as value from numbers(2) settings format_custom_result_before_delimiter='\n', format_custom_result_after_delimiter='\n'" + ) + + insert_messages = [] + + def onReceived(channel, method, properties, body): + insert_messages.append(body.decode()) + if len(insert_messages) == 2: + channel.stop_consuming() + + consumer.basic_consume(onReceived, queue_name) + consumer.start_consuming() + consumer_connection.close() + + assert ( + "".join(insert_messages) + == "\n0\t0\n\n\n10\t100\n\n" + ) + + +def test_max_rows_per_message(rabbitmq_cluster): + num_rows = 5 + + instance.query( + """ + DROP TABLE IF EXISTS test.view; + DROP TABLE IF EXISTS test.rabbit; + + CREATE TABLE test.rabbit (key UInt64, value UInt64) + ENGINE = RabbitMQ + SETTINGS rabbitmq_host_port = 'rabbitmq1:5672', + rabbitmq_format = 'CustomSeparated', + rabbitmq_exchange_name = 'custom', + rabbitmq_exchange_type = 'direct', + rabbitmq_routing_key_list = 'custom1', + rabbitmq_max_rows_per_message = 3, + format_custom_result_before_delimiter = '\n', + format_custom_result_after_delimiter = '\n'; + + CREATE MATERIALIZED VIEW test.view Engine=Log AS + SELECT key, value FROM test.rabbit; + """ + ) + + credentials = pika.PlainCredentials("root", "clickhouse") + parameters = pika.ConnectionParameters( + rabbitmq_cluster.rabbitmq_ip, rabbitmq_cluster.rabbitmq_port, "/", credentials + ) + consumer_connection = pika.BlockingConnection(parameters) + + consumer = consumer_connection.channel() + result = consumer.queue_declare(queue="") + queue_name = result.method.queue + consumer.queue_bind(exchange="custom", queue=queue_name, routing_key="custom1") + + instance.query( + f"INSERT INTO test.rabbit select number*10 as key, number*100 as value from numbers({num_rows}) settings format_custom_result_before_delimiter='\n', format_custom_result_after_delimiter='\n'" + ) + + insert_messages = [] + + def onReceived(channel, method, properties, body): + insert_messages.append(body.decode()) + if len(insert_messages) == 2: + channel.stop_consuming() + + consumer.basic_consume(onReceived, queue_name) + consumer.start_consuming() + consumer_connection.close() + + assert len(insert_messages) == 2 + + assert ( + "".join(insert_messages) + == "\n0\t0\n10\t100\n20\t200\n\n\n30\t300\n40\t400\n\n" + ) + + attempt = 0 + rows = 0 + while attempt < 100: + rows = int(instance.query("SELECT count() FROM test.view")) + if rows == num_rows: + break + attempt += 1 + + assert rows == num_rows + + result = instance.query("SELECT * FROM test.view") + assert result == "0\t0\n10\t100\n20\t200\n30\t300\n40\t400\n" + + +def test_row_based_formats(rabbitmq_cluster): + num_rows = 10 + + for format_name in [ + "TSV", + "TSVWithNamesAndTypes", + "TSKV", + "CSV", + "CSVWithNamesAndTypes", + "CustomSeparatedWithNamesAndTypes", + "Values", + "JSON", + "JSONEachRow", + "JSONCompactEachRow", + "JSONCompactEachRowWithNamesAndTypes", + "JSONObjectEachRow", + "Avro", + "RowBinary", + "RowBinaryWithNamesAndTypes", + "MsgPack", + ]: + print(format_name) + + instance.query( + f""" + DROP TABLE IF EXISTS test.view; + DROP TABLE IF EXISTS test.rabbit; + + CREATE TABLE test.rabbit (key UInt64, value UInt64) + ENGINE = RabbitMQ + SETTINGS rabbitmq_host_port = 'rabbitmq1:5672', + rabbitmq_format = '{format_name}', + rabbitmq_exchange_name = '{format_name}', + rabbitmq_exchange_type = 'direct', + rabbitmq_routing_key_list = '{format_name}', + rabbitmq_max_rows_per_message = 5; + + CREATE MATERIALIZED VIEW test.view Engine=Log AS + SELECT key, value FROM test.rabbit; + """ + ) + + credentials = pika.PlainCredentials("root", "clickhouse") + parameters = pika.ConnectionParameters( + rabbitmq_cluster.rabbitmq_ip, + rabbitmq_cluster.rabbitmq_port, + "/", + credentials, + ) + consumer_connection = pika.BlockingConnection(parameters) + + consumer = consumer_connection.channel() + result = consumer.queue_declare(queue="") + queue_name = result.method.queue + consumer.queue_bind( + exchange=format_name, queue=queue_name, routing_key=format_name + ) + + instance.query( + f"INSERT INTO test.rabbit SELECT number * 10 as key, number * 100 as value FROM numbers({num_rows});" + ) + + insert_messages = 0 + + def onReceived(channel, method, properties, body): + nonlocal insert_messages + insert_messages += 1 + if insert_messages == 2: + channel.stop_consuming() + + consumer.basic_consume(onReceived, queue_name) + consumer.start_consuming() + consumer_connection.close() + + assert insert_messages == 2 + + attempt = 0 + rows = 0 + while attempt < 100: + rows = int(instance.query("SELECT count() FROM test.view")) + if rows == num_rows: + break + attempt += 1 + + assert rows == num_rows + + expected = "" + for i in range(num_rows): + expected += str(i * 10) + "\t" + str(i * 100) + "\n" + + result = instance.query("SELECT * FROM test.view") + assert result == expected + + +def test_block_based_formats_1(rabbitmq_cluster): + instance.query( + """ + CREATE TABLE test.rabbitmq (key UInt64, value UInt64) + ENGINE = RabbitMQ + SETTINGS rabbitmq_host_port = 'rabbitmq1:5672', + rabbitmq_exchange_name = 'PrettySpace', + rabbitmq_exchange_type = 'direct', + rabbitmq_routing_key_list = 'PrettySpace', + rabbitmq_format = 'PrettySpace'; + """ + ) + + credentials = pika.PlainCredentials("root", "clickhouse") + parameters = pika.ConnectionParameters( + rabbitmq_cluster.rabbitmq_ip, rabbitmq_cluster.rabbitmq_port, "/", credentials + ) + consumer_connection = pika.BlockingConnection(parameters) + + consumer = consumer_connection.channel() + result = consumer.queue_declare(queue="") + queue_name = result.method.queue + consumer.queue_bind( + exchange="PrettySpace", queue=queue_name, routing_key="PrettySpace" + ) + + instance.query( + "INSERT INTO test.rabbitmq SELECT number * 10 as key, number * 100 as value FROM numbers(5) settings max_block_size=2, optimize_trivial_insert_select=0;" + ) + insert_messages = [] + + def onReceived(channel, method, properties, body): + insert_messages.append(body.decode()) + if len(insert_messages) == 3: + channel.stop_consuming() + + consumer.basic_consume(onReceived, queue_name) + consumer.start_consuming() + consumer_connection.close() + + assert len(insert_messages) == 3 + + data = [] + for message in insert_messages: + splitted = message.split("\n") + assert splitted[0] == " \x1b[1mkey\x1b[0m \x1b[1mvalue\x1b[0m" + assert splitted[1] == "" + assert splitted[-1] == "" + data += [line.split() for line in splitted[2:-1]] + + assert data == [ + ["0", "0"], + ["10", "100"], + ["20", "200"], + ["30", "300"], + ["40", "400"], + ] + + +def test_block_based_formats_2(rabbitmq_cluster): + num_rows = 100 + + for format_name in [ + "JSONColumns", + "Native", + "Arrow", + "Parquet", + "ORC", + "JSONCompactColumns", + ]: + + print(format_name) + + instance.query( + f""" + DROP TABLE IF EXISTS test.view; + DROP TABLE IF EXISTS test.rabbit; + + CREATE TABLE test.rabbit (key UInt64, value UInt64) + ENGINE = RabbitMQ + SETTINGS rabbitmq_host_port = 'rabbitmq1:5672', + rabbitmq_format = '{format_name}', + rabbitmq_exchange_name = '{format_name}', + rabbitmq_exchange_type = 'direct', + rabbitmq_routing_key_list = '{format_name}'; + + CREATE MATERIALIZED VIEW test.view Engine=Log AS + SELECT key, value FROM test.rabbit; + """ + ) + + credentials = pika.PlainCredentials("root", "clickhouse") + parameters = pika.ConnectionParameters( + rabbitmq_cluster.rabbitmq_ip, + rabbitmq_cluster.rabbitmq_port, + "/", + credentials, + ) + consumer_connection = pika.BlockingConnection(parameters) + + consumer = consumer_connection.channel() + result = consumer.queue_declare(queue="") + queue_name = result.method.queue + consumer.queue_bind( + exchange=format_name, queue=queue_name, routing_key=format_name + ) + + instance.query( + f"INSERT INTO test.rabbit SELECT number * 10 as key, number * 100 as value FROM numbers({num_rows}) settings max_block_size=12, optimize_trivial_insert_select=0;" + ) + + insert_messages = 0 + + def onReceived(channel, method, properties, body): + nonlocal insert_messages + insert_messages += 1 + if insert_messages == 9: + channel.stop_consuming() + + consumer.basic_consume(onReceived, queue_name) + consumer.start_consuming() + consumer_connection.close() + + assert insert_messages == 9 + + attempt = 0 + rows = 0 + while attempt < 100: + rows = int(instance.query("SELECT count() FROM test.view")) + if rows == num_rows: + break + attempt += 1 + + assert rows == num_rows + + result = instance.query("SELECT * FROM test.view ORDER by key") + expected = "" + for i in range(num_rows): + expected += str(i * 10) + "\t" + str(i * 100) + "\n" + assert result == expected diff --git a/tests/integration/test_storage_s3/test.py b/tests/integration/test_storage_s3/test.py index 937f14bb878..2fa499eb78e 100644 --- a/tests/integration/test_storage_s3/test.py +++ b/tests/integration/test_storage_s3/test.py @@ -1013,6 +1013,9 @@ def test_predefined_connection_configuration(started_cluster): ) assert result == instance.query("SELECT number FROM numbers(10)") + result = instance.query_and_get_error("SELECT * FROM s3(no_collection)") + assert "There is no named collection `no_collection`" in result + result = "" diff --git a/tests/integration/test_temporary_data_in_cache/__init__.py b/tests/integration/test_temporary_data_in_cache/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_temporary_data_in_cache/configs/config.d/storage_configuration.xml b/tests/integration/test_temporary_data_in_cache/configs/config.d/storage_configuration.xml new file mode 100644 index 00000000000..acf0f765c6c --- /dev/null +++ b/tests/integration/test_temporary_data_in_cache/configs/config.d/storage_configuration.xml @@ -0,0 +1,39 @@ + + + + + local + /local_disk/ + + + + cache + local_disk + /tiny_local_cache/ + 10M + 1M + 1 + 0 + + + + + + local + /tiny_local_cache/ + + + + + + +
+ tiny_local_cache +
+
+
+
+
+ + tiny_local_cache +
diff --git a/tests/integration/test_temporary_data_in_cache/test.py b/tests/integration/test_temporary_data_in_cache/test.py new file mode 100644 index 00000000000..0e8c7305405 --- /dev/null +++ b/tests/integration/test_temporary_data_in_cache/test.py @@ -0,0 +1,81 @@ +# pylint: disable=unused-argument +# pylint: disable=redefined-outer-name + +import pytest + +from helpers.cluster import ClickHouseCluster +from helpers.client import QueryRuntimeException + +cluster = ClickHouseCluster(__file__) + +node = cluster.add_instance( + "node", + main_configs=["configs/config.d/storage_configuration.xml"], + tmpfs=["/local_disk:size=50M", "/tiny_local_cache:size=12M"], +) + + +@pytest.fixture(scope="module") +def start_cluster(): + try: + cluster.start() + yield cluster + finally: + cluster.shutdown() + + +def test_cache_evicted_by_temporary_data(start_cluster): + q = node.query + qi = lambda query: int(node.query(query).strip()) + + cache_size_initial = qi("SELECT sum(size) FROM system.filesystem_cache") + assert cache_size_initial == 0 + + free_space_initial = qi( + "SELECT free_space FROM system.disks WHERE name = 'tiny_local_cache_local_disk'" + ) + assert free_space_initial > 8 * 1024 * 1024 + + q( + "CREATE TABLE t1 (x UInt64) ENGINE = MergeTree ORDER BY x SETTINGS storage_policy = 'tiny_local_cache'" + ) + q("INSERT INTO t1 SELECT number FROM numbers(1024 * 1024)") + + # To be sure that nothing is reading the cache and entries for t1 can be evited + q("OPTIMIZE TABLE t1 FINAL") + q("SYSTEM STOP MERGES t1") + + # Read some data to fill the cache + q("SELECT sum(x) FROM t1") + + cache_size_with_t1 = qi("SELECT sum(size) FROM system.filesystem_cache") + assert cache_size_with_t1 > 8 * 1024 * 1024 + + # Almost all disk space is occupied by t1 cache + free_space_with_t1 = qi( + "SELECT free_space FROM system.disks WHERE name = 'tiny_local_cache_local_disk'" + ) + assert free_space_with_t1 < 4 * 1024 * 1024 + + # Try to sort the table, but fail because of lack of disk space + with pytest.raises(QueryRuntimeException) as exc: + q( + "SELECT ignore(*) FROM numbers(10 * 1024 * 1024) ORDER BY sipHash64(number)", + settings={ + "max_bytes_before_external_group_by": "4M", + "max_bytes_before_external_sort": "4M", + }, + ) + assert "Failed to reserve space for the file cache" in str(exc.value) + + # Some data evicted from cache by temporary data + cache_size_after_eviction = qi("SELECT sum(size) FROM system.filesystem_cache") + assert cache_size_after_eviction < cache_size_with_t1 + + # Disk space freed, at least 3 MB, because temporary data tried to write 4 MB + free_space_after_eviction = qi( + "SELECT free_space FROM system.disks WHERE name = 'tiny_local_cache_local_disk'" + ) + assert free_space_after_eviction > free_space_with_t1 + 3 * 1024 * 1024 + + q("DROP TABLE IF EXISTS t1") diff --git a/tests/integration/test_tmp_policy/test.py b/tests/integration/test_tmp_policy/test.py index c919d9a0c3d..870a70b127a 100644 --- a/tests/integration/test_tmp_policy/test.py +++ b/tests/integration/test_tmp_policy/test.py @@ -23,7 +23,7 @@ def start_cluster(): cluster.shutdown() -def test_different_versions(start_cluster): +def test_disk_selection(start_cluster): query = "SELECT count(ignore(*)) FROM (SELECT * FROM system.numbers LIMIT 1e7) GROUP BY number" settings = { "max_bytes_before_external_group_by": 1 << 20, diff --git a/tests/performance/explain_ast.xml b/tests/performance/explain_ast.xml index 0daa748de83..5bcdd96c10e 100644 --- a/tests/performance/explain_ast.xml +++ b/tests/performance/explain_ast.xml @@ -1,6 +1,6 @@ - - + + 0, c3, NULL)) AS c3_q, +quantiles(0.25, 0.5, 0.75)(if(c4 > 0, c4, NULL)) AS c4_q, +quantiles(0.25, 0.5, 0.75)(t.c17 / t.c19) AS c5_q, +quantiles(0.25, 0.5, 0.75)(c6) AS c6_q, +quantiles(0.25, 0.5, 0.75)(c7) AS c7_q, +quantiles(0.25, 0.5, 0.75)(c8) AS c8_q, +quantiles(0.25, 0.5, 0.75)(c9) AS c9_q, +quantiles(0.25, 0.5, 0.75)(c10) AS c10_q, +quantiles(0.25, 0.5, 0.75)(c11) AS c11_q, +quantiles(0.25, 0.5, 0.75)(c12) AS c12_q, +quantiles(0.25, 0.5, 0.75)(c13) AS c13_q, +quantiles(0.25, 0.5, 0.75)(if(t.c24 > 0, t.c24, NULL) / t.c19) AS c14_q, +quantiles(0.25, 0.5, 0.75)(if(t.c24 > 0, t.c24, NULL) / t.c17) AS c15_q, +quantiles(0.25, 0.5, 0.75)(t.c16) AS c16_q, +quantiles(0.25, 0.5, 0.75)(t.c17) AS c17_q, +quantiles(0.25, 0.5, 0.75)(if(t.c18 > 0, t.c18, NULL)) AS c18_q, +max(if(c3 > 0, c3, NULL)) AS c3_max, +min(if(c3 > 0, c3, NULL)) AS c3_min, +avg(if(c3 > 0, c3, NULL)) AS c3_avg, +max(if(c4 > 0, c4, NULL)) AS c4_max, +min(if(c4 > 0, c4, NULL)) AS c4_min, +avg(if(c4 > 0, c4, NULL)) AS c4_avg, +max(t.c17 / t.c19) AS c5_max, +min(t.c17 / t.c19) AS c5_min, +avg(t.c17 / t.c19) AS c5_avg, +max(if(c6 > 0, c6, NULL)) AS c6_max, +min(if(c6 > 0, c6, NULL)) AS c6_min, +avg(if(c6 > 0, c6, NULL)) AS c6_avg, +max(if(c7 > 0, c7, NULL)) AS c7_max, +min(if(c7 > 0, c7, NULL)) AS c7_min, +avg(if(c7 > 0, c7, NULL)) AS c7_avg, +max(if(c10 > 0, c10, NULL)) AS c10_max, +min(if(c10 > 0, c10, NULL)) AS c10_min, +avg(if(c10 > 0, c10, NULL)) AS c10_avg, +max(if(c8 > 0, c8, NULL)) AS c8_max, +min(if(c8 > 0, c8, NULL)) AS c8_min, +avg(if(c8 > 0, c8, NULL)) AS c8_avg, +max(if(c9 > 0, c9, NULL)) AS c9_max, +min(if(c9 > 0, c9, NULL)) AS c9_min, +avg(if(c9 > 0, c9, NULL)) AS c9_avg, +max(if(c11 > 0, c11, NULL)) AS c11_max, +min(if(c11 > 0, c11, NULL)) AS c11_min, +avg(if(c11 > 0, c11, NULL)) AS c11_avg, +max(if(c12 > 0, c12, NULL)) AS c12_max, +min(if(c12 > 0, c12, NULL)) AS c12_min, +avg(if(c12 > 0, c12, NULL)) AS c12_avg, +max(if(c13 > 0, c13, NULL)) AS c13_max, +min(if(c13 > 0, c13, NULL)) AS c13_min, +avg(if(c13 > 0, c13, NULL)) AS c13_avg, +max(if(t.c24 > 0, t.c24, NULL) / t.c19) AS c14_max, +min(if(t.c24 > 0, t.c24, NULL) / t.c19) AS c14_min, +avg(if(t.c24 > 0, t.c24, NULL) / t.c19) AS c14_avg, +max(if(t.c24 > 0, t.c24, NULL) / t.c17) AS c15_max, +min(if(t.c24 > 0, t.c24, NULL) / t.c17) AS c15_min, +avg(if(t.c24 > 0, t.c24, NULL) / t.c17) AS c15_avg, +max(t.c16) AS c16_max, +min(t.c16) AS c16_min, +avg(t.c16) AS c16_avg, +max(t.c17) AS c17_max, +min(t.c17) AS c17_min, +avg(t.c17) AS c17_avg, +max(if(t.c18 > 0, t.c18, NULL)) AS c18_max, +min(if(t.c18 > 0, t.c18, NULL)) AS c18_min, +avg(if(t.c18 > 0, t.c18, NULL)) AS c18_avg, +sum(t.c19) AS c19, +sum(if(t.c18 > 0, t.c18, NULL)) AS c18, +sum(t.c16) AS c16, +sum(c23) AS c23, +sum(t.c17) AS c17, +sum(if(t.c24 > 0, t.c24, NULL)) AS c24, +c24 / c19 AS c14, +c24 / c17 AS c15, +median(if(isNotNull(c29) AND (t.c22 > 0), c13 * (t.c22 / c29), NULL)) AS c21, +sum(c22) AS c22 +FROM +( +SELECT +c27, +c39 AS c1, +c29, +c19, +c23, +c17, +c16, +c18, +c22, +c24, +c3, +c4, +c8, +c9, +c10, +c11, +c12, +c13, +c6, +c7 +FROM +( +SELECT +c27, +uniqExact(c30, c31) AS c19, +uniqExact(c30, c31, c32) AS c23, +uniqExactIf(c30, c31, c33 IN ('c37', 'c38')) AS c17, +countIf(c33 IN ('c37', 'c38')) AS c16, +countIf(c33 = 'c39') AS c18, +coalesce(sumIf(c29, c33 = 'c39'), 0) AS c22, +coalesce(sumIf(c37, c33 = 'c39'), 0) AS c24, +if((c18 > 0) AND (c19 > 0), c18 / c19, NULL) AS c3, +if(c17 != 0, c18 / c17, NULL) AS c4, +coalesce(avgIf(c34, (c34 > 0) AND (c33 IN ('c37', 'c38'))), NULL) AS c8, +coalesce(avgIf(c35, (c35 > 0) AND (c33 IN ('c37', 'c38'))), NULL) AS c9, +coalesce(avgIf(c34, (c34 > 0) AND (c33 = 'c39')), NULL) AS c10, +coalesce(avgIf(c35, (c35 > 0) AND (c33 = 'c39')), NULL) AS c11, +coalesce(avgIf(c37, c33 = 'c39'), NULL) AS c12, +coalesce(avgIf(c37 / c34, (c34 > 0) AND (c33 = 'c39')), NULL) AS c13, +coalesce(avgIf(c37, (c37 > 0) AND (c33 IN ('c37', 'c38'))), NULL) AS c6, +coalesce(minIf(c37, (c37 > 0) AND (c33 IN ('c37', 'c38')) AND (c37 > (c36 / 2))), NULL) AS c7 +FROM +( +SELECT +c27, +c30, +c32, +c31, +NULL AS c29, +NULL AS c33, +NULL AS c37, +NULL AS c34, +NULL AS c35 +FROM +( +SELECT +c27, +c30, +c32, +c31 +FROM database.t1 +PREWHERE ((c32 >= parseDateTimeBestEffort('2020-01-01')) AND (c32 <= parseDateTimeBestEffort('2020-01-01 23:59:59'))) AND (c27 IN +( +SELECT comp_c27 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +groupArray(c39) AS c39, +any(c40) AS c40, +any(c41) AS c41, +any(c42) AS c42, +any(c29) AS c29, +any(c43) AS c43, +any(c44) AS c44, +any(min_c32) AS min_c32, +any(max_c32) AS max_c32, +any(c45) AS c45, +any(c46) AS c46, +any(c38) AS c38, +any(c47) AS c47 +FROM +( +SELECT +c27 AS comp_c27, +if(comp_c27 = 0, toDate('2010-01-01'), toDate(minMerge(min_c32))) AS min_c32, +if(comp_c27 = 0, toDate(now()), toDate(maxMerge(max_c32))) + 1 AS max_c32, +NULL AS c39, +NULL AS c40, +NULL AS c41, +NULL AS c42, +NULL AS c29, +NULL AS c43, +NULL AS c44, +NULL AS c45, +NULL AS c46, +NULL AS c38, +NULL AS c47 +FROM database.table15 +GROUP BY comp_c27 +UNION ALL +SELECT +comp_c27, +NULL AS min_c32, +NULL AS max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38, +c47 +FROM +( +SELECT +c39, +comp_c27 AS c27, +comp_c27, +c40, +c41, +assumeNotNull(c45) AS c45, +assumeNotNull(c46) AS c46, +assumeNotNull(c38) AS c38, +joinGet('database.table14', 'c48', c40) AS c42, +joinGet('database.table14', 'c29', c40) AS c29, +joinGet('database.table14', 'c43', c40) AS c43, +joinGet('database.table14', 'property_c44', c40) AS c44, +splitByChar(',', assumeNotNull(joinGet('database.jointable13', 'prefix_c33', comp_c27))) AS c33s, +joinGet('database.jointable13', 'c47', comp_c27) AS c47 +FROM +( +SELECT +c39, +comp_c27, +joinGet('database.jointable13', 'c40', comp_c27) AS c40, +c41, +c45, +c46, +c38 +FROM +( +SELECT +c39, +arrayJoin(arrayMap(x -> toInt64(x), arrayFilter(x -> (length(x) > 0), splitByString(', ', c49)))) AS comp_c27, +c41, +c45, +c46, +c38 +FROM +( +SELECT +'c57' AS c39, +toString(c27) AS c49, +1 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE c27 IN (322) +UNION ALL +SELECT +'c58' AS c39, +arrayStringConcat(groupArray(toString(c27)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE chain_id IN +( +SELECT chain_id +FROM database.table12 +WHERE c27 IN (322) +) +UNION ALL +SELECT +'c59' AS c39, +assumeNotNull(c27s_str) AS c49, +0 AS c41, +c50 AS c45, +c51 AS c46, +c52 AS c38 +FROM +( +SELECT * +FROM table11 +WHERE c27 IN (322) +) AS c1s_c59 +WHERE c27 IN (322) +UNION ALL +SELECT +'superSupercalifragilisticexpialidocious' AS c39, +arrayStringConcat(groupArray(toString(c1_id)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table10 +WHERE c27 IN (322) +) AS table9 +) +) AS a +) AS table8 +) AS table7 +GROUP BY comp_c27 +) AS table6 +WHERE (parseDateTimeBestEffort('2020-01-01') >= min_c32) AND (max_c32 >= (parseDateTimeBestEffort('2021-05-02') - 2)) +) AS table5 +ARRAY JOIN c39 +WHERE isNotNull(c39) +) AS table4 +)) +WHERE c61 = 0 +) AS table25 +UNION ALL +SELECT +c27, +c30, +c32, +c31, +c29, +c33, +c37, +c34, +c35 +FROM +( +SELECT +c27, +c30, +c32, +'c37' AS c33, +coalesce(c37 * joinGet('database.table18', 'c60', concat(c26, '_', 'CH')), 0) AS c37, +if(c53 > 0, c53, 2) AS c53, +c54, +if(c29 > 0, c29, 1) AS c29, +c55, +c56, +datediff('day', c55, c56) AS c34, +datediff('day', c32, c55) AS c35, +c31 +FROM database.table24 +PREWHERE ((c32 >= parseDateTimeBestEffort('2020-01-01')) AND (c32 <= parseDateTimeBestEffort('2020-01-01 23:59:59'))) AND (c27 IN +( +SELECT comp_c27 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +groupArray(c39) AS c39, +any(c40) AS c40, +any(c41) AS c41, +any(c42) AS c42, +any(c29) AS c29, +any(c43) AS c43, +any(c44) AS c44, +any(min_c32) AS min_c32, +any(max_c32) AS max_c32, +any(c45) AS c45, +any(c46) AS c46, +any(c38) AS c38, +any(c47) AS c47 +FROM +( +SELECT +c27 AS comp_c27, +if(comp_c27 = 0, toDate('2010-01-01'), toDate(minMerge(min_c32))) AS min_c32, +if(comp_c27 = 0, toDate(now()), toDate(maxMerge(max_c32))) + 1 AS max_c32, +NULL AS c39, +NULL AS c40, +NULL AS c41, +NULL AS c42, +NULL AS c29, +NULL AS c43, +NULL AS c44, +NULL AS c45, +NULL AS c46, +NULL AS c38, +NULL AS c47 +FROM database.table15 +GROUP BY comp_c27 +UNION ALL +SELECT +comp_c27, +NULL AS min_c32, +NULL AS max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38, +c47 +FROM +( +SELECT +c39, +comp_c27 AS c27, +comp_c27, +c40, +c41, +assumeNotNull(c45) AS c45, +assumeNotNull(c46) AS c46, +assumeNotNull(c38) AS c38, +joinGet('database.table14', 'c48', c40) AS c42, +joinGet('database.table14', 'c29', c40) AS c29, +joinGet('database.table14', 'c43', c40) AS c43, +joinGet('database.table14', 'property_c44', c40) AS c44, +splitByChar(',', assumeNotNull(joinGet('database.jointable13', 'prefix_c33', comp_c27))) AS c33s, +joinGet('database.jointable13', 'c47', comp_c27) AS c47 +FROM +( +SELECT +c39, +comp_c27, +joinGet('database.jointable13', 'c40', comp_c27) AS c40, +c41, +c45, +c46, +c38 +FROM +( +SELECT +c39, +arrayJoin(arrayMap(x -> toInt64(x), arrayFilter(x -> (length(x) > 0), splitByString(', ', c49)))) AS comp_c27, +c41, +c45, +c46, +c38 +FROM +( +SELECT +'c57' AS c39, +toString(c27) AS c49, +1 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE c27 IN (322) +UNION ALL +SELECT +'c58' AS c39, +arrayStringConcat(groupArray(toString(c27)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE chain_id IN +( +SELECT chain_id +FROM database.table12 +WHERE c27 IN (322) +) +UNION ALL +SELECT +'c59' AS c39, +assumeNotNull(c27s_str) AS c49, +0 AS c41, +c50 AS c45, +c51 AS c46, +c52 AS c38 +FROM +( +SELECT * +FROM table11 +WHERE c27 IN (322) +) AS c1s_c59 +WHERE c27 IN (322) +UNION ALL +SELECT +'superSupercalifragilisticexpialidocious' AS c39, +arrayStringConcat(groupArray(toString(c1_id)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table10 +WHERE c27 IN (322) +) AS table9 +) +) AS a +) AS table8 +) AS table7 +GROUP BY comp_c27 +) AS table6 +WHERE (parseDateTimeBestEffort('2020-01-01') >= min_c32) AND (max_c32 >= (parseDateTimeBestEffort('2021-05-02') - 2)) +) AS table5 +ARRAY JOIN c39 +WHERE isNotNull(c39) +) AS table4 +)) +WHERE (c61 = 0) AND (c37 < (666 * (1 / joinGet('database.table18', 'c60', concat(c26, '_', 'CH'))))) +) AS table23 +UNION ALL +SELECT +c27, +c30, +c32, +c31, +c29, +c33, +c37, +c34, +c35 +FROM +( +SELECT +c27, +c30, +c32, +'c39' AS c33, +coalesce(c37 * joinGet('database.table18', 'c60', concat(c26, '_', 'CH')), 0) AS c37, +if(c53 > 0, c53, 2) AS c53, +c54, +if(c29 > 0, c29, 1) AS c29, +c55, +c56, +datediff('day', c55, c56) AS c34, +datediff('day', c32, c55) AS c35, +c31 +FROM database.table22 +PREWHERE ((c32 >= parseDateTimeBestEffort('2020-01-01')) AND (c32 <= parseDateTimeBestEffort('2020-01-01 23:59:59'))) AND (c27 IN +( +SELECT comp_c27 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +groupArray(c39) AS c39, +any(c40) AS c40, +any(c41) AS c41, +any(c42) AS c42, +any(c29) AS c29, +any(c43) AS c43, +any(c44) AS c44, +any(min_c32) AS min_c32, +any(max_c32) AS max_c32, +any(c45) AS c45, +any(c46) AS c46, +any(c38) AS c38, +any(c47) AS c47 +FROM +( +SELECT +c27 AS comp_c27, +if(comp_c27 = 0, toDate('2010-01-01'), toDate(minMerge(min_c32))) AS min_c32, +if(comp_c27 = 0, toDate(now()), toDate(maxMerge(max_c32))) + 1 AS max_c32, +NULL AS c39, +NULL AS c40, +NULL AS c41, +NULL AS c42, +NULL AS c29, +NULL AS c43, +NULL AS c44, +NULL AS c45, +NULL AS c46, +NULL AS c38, +NULL AS c47 +FROM database.table15 +GROUP BY comp_c27 +UNION ALL +SELECT +comp_c27, +NULL AS min_c32, +NULL AS max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38, +c47 +FROM +( +SELECT +c39, +comp_c27 AS c27, +comp_c27, +c40, +c41, +assumeNotNull(c45) AS c45, +assumeNotNull(c46) AS c46, +assumeNotNull(c38) AS c38, +joinGet('database.table14', 'c48', c40) AS c42, +joinGet('database.table14', 'c29', c40) AS c29, +joinGet('database.table14', 'c43', c40) AS c43, +joinGet('database.table14', 'property_c44', c40) AS c44, +splitByChar(',', assumeNotNull(joinGet('database.jointable13', 'prefix_c33', comp_c27))) AS c33s, +joinGet('database.jointable13', 'c47', comp_c27) AS c47 +FROM +( +SELECT +c39, +comp_c27, +joinGet('database.jointable13', 'c40', comp_c27) AS c40, +c41, +c45, +c46, +c38 +FROM +( +SELECT +c39, +arrayJoin(arrayMap(x -> toInt64(x), arrayFilter(x -> (length(x) > 0), splitByString(', ', c49)))) AS comp_c27, +c41, +c45, +c46, +c38 +FROM +( +SELECT +'c57' AS c39, +toString(c27) AS c49, +1 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE c27 IN (322) +UNION ALL +SELECT +'c58' AS c39, +arrayStringConcat(groupArray(toString(c27)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE chain_id IN +( +SELECT chain_id +FROM database.table12 +WHERE c27 IN (322) +) +UNION ALL +SELECT +'c59' AS c39, +assumeNotNull(c27s_str) AS c49, +0 AS c41, +c50 AS c45, +c51 AS c46, +c52 AS c38 +FROM +( +SELECT * +FROM table11 +WHERE c27 IN (322) +) AS c1s_c59 +WHERE c27 IN (322) +UNION ALL +SELECT +'superSupercalifragilisticexpialidocious' AS c39, +arrayStringConcat(groupArray(toString(c1_id)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table10 +WHERE c27 IN (322) +) AS table9 +) +) AS a +) AS table8 +) AS table7 +GROUP BY comp_c27 +) AS table6 +WHERE (parseDateTimeBestEffort('2020-01-01') >= min_c32) AND (max_c32 >= (parseDateTimeBestEffort('2021-05-02') - 2)) +) AS table5 +ARRAY JOIN c39 +WHERE isNotNull(c39) +) AS table4 +)) +WHERE (c61 = 0) AND (c37 < (666 * (1 / joinGet('database.table18', 'c60', concat(c26, '_', 'CH'))))) +) AS table21 +) AS table20 +ALL LEFT JOIN +( +SELECT +c27, +avgMerge(avg_c37) * joinGet('database.table18', 'c60', concat('USD', '_', 'CH')) AS c36 +FROM database.table19 +PREWHERE c27 IN +( +SELECT comp_c27 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +groupArray(c39) AS c39, +any(c40) AS c40, +any(c41) AS c41, +any(c42) AS c42, +any(c29) AS c29, +any(c43) AS c43, +any(c44) AS c44, +any(min_c32) AS min_c32, +any(max_c32) AS max_c32, +any(c45) AS c45, +any(c46) AS c46, +any(c38) AS c38, +any(c47) AS c47 +FROM +( +SELECT +c27 AS comp_c27, +if(comp_c27 = 0, toDate('2010-01-01'), toDate(minMerge(min_c32))) AS min_c32, +if(comp_c27 = 0, toDate(now()), toDate(maxMerge(max_c32))) + 1 AS max_c32, +NULL AS c39, +NULL AS c40, +NULL AS c41, +NULL AS c42, +NULL AS c29, +NULL AS c43, +NULL AS c44, +NULL AS c45, +NULL AS c46, +NULL AS c38, +NULL AS c47 +FROM database.table15 +GROUP BY comp_c27 +UNION ALL +SELECT +comp_c27, +NULL AS min_c32, +NULL AS max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38, +c47 +FROM +( +SELECT +c39, +comp_c27 AS c27, +comp_c27, +c40, +c41, +assumeNotNull(c45) AS c45, +assumeNotNull(c46) AS c46, +assumeNotNull(c38) AS c38, +joinGet('database.table14', 'c48', c40) AS c42, +joinGet('database.table14', 'c29', c40) AS c29, +joinGet('database.table14', 'c43', c40) AS c43, +joinGet('database.table14', 'property_c44', c40) AS c44, +splitByChar(',', assumeNotNull(joinGet('database.jointable13', 'prefix_c33', comp_c27))) AS c33s, +joinGet('database.jointable13', 'c47', comp_c27) AS c47 +FROM +( +SELECT +c39, +comp_c27, +joinGet('database.jointable13', 'c40', comp_c27) AS c40, +c41, +c45, +c46, +c38 +FROM +( +SELECT +c39, +arrayJoin(arrayMap(x -> toInt64(x), arrayFilter(x -> (length(x) > 0), splitByString(', ', c49)))) AS comp_c27, +c41, +c45, +c46, +c38 +FROM +( +SELECT +'c57' AS c39, +toString(c27) AS c49, +1 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE c27 IN (322) +UNION ALL +SELECT +'c58' AS c39, +arrayStringConcat(groupArray(toString(c27)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE chain_id IN +( +SELECT chain_id +FROM database.table12 +WHERE c27 IN (322) +) +UNION ALL +SELECT +'c59' AS c39, +assumeNotNull(c27s_str) AS c49, +0 AS c41, +c50 AS c45, +c51 AS c46, +c52 AS c38 +FROM +( +SELECT * +FROM table11 +WHERE c27 IN (322) +) AS c1s_c59 +WHERE c27 IN (322) +UNION ALL +SELECT +'superSupercalifragilisticexpialidocious' AS c39, +arrayStringConcat(groupArray(toString(c1_id)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table10 +WHERE c27 IN (322) +) AS table9 +) +) AS a +) AS table8 +) AS table7 +GROUP BY comp_c27 +) AS table6 +WHERE (parseDateTimeBestEffort('2020-01-01') >= min_c32) AND (max_c32 >= (parseDateTimeBestEffort('2021-05-02') - 2)) +) AS table5 +ARRAY JOIN c39 +WHERE isNotNull(c39) +) AS table4 +) +WHERE date > (now() - toIntervalMonth(3)) +GROUP BY c27 +) AS table17 USING (c27) +GROUP BY c27 +) AS table16 +ALL LEFT JOIN +( +SELECT +comp_c27 AS c27, +assumeNotNull(c39) AS c39, +c29 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +groupArray(c39) AS c39, +any(c40) AS c40, +any(c41) AS c41, +any(c42) AS c42, +any(c29) AS c29, +any(c43) AS c43, +any(c44) AS c44, +any(min_c32) AS min_c32, +any(max_c32) AS max_c32, +any(c45) AS c45, +any(c46) AS c46, +any(c38) AS c38, +any(c47) AS c47 +FROM +( +SELECT +c27 AS comp_c27, +if(comp_c27 = 0, toDate('2010-01-01'), toDate(minMerge(min_c32))) AS min_c32, +if(comp_c27 = 0, toDate(now()), toDate(maxMerge(max_c32))) + 1 AS max_c32, +NULL AS c39, +NULL AS c40, +NULL AS c41, +NULL AS c42, +NULL AS c29, +NULL AS c43, +NULL AS c44, +NULL AS c45, +NULL AS c46, +NULL AS c38, +NULL AS c47 +FROM database.table15 +GROUP BY comp_c27 +UNION ALL +SELECT +comp_c27, +NULL AS min_c32, +NULL AS max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38, +c47 +FROM +( +SELECT +c39, +comp_c27 AS c27, +comp_c27, +c40, +c41, +assumeNotNull(c45) AS c45, +assumeNotNull(c46) AS c46, +assumeNotNull(c38) AS c38, +joinGet('database.table14', 'c48', c40) AS c42, +joinGet('database.table14', 'c29', c40) AS c29, +joinGet('database.table14', 'c43', c40) AS c43, +joinGet('database.table14', 'property_c44', c40) AS c44, +splitByChar(',', assumeNotNull(joinGet('database.jointable13', 'prefix_c33', comp_c27))) AS c33s, +joinGet('database.jointable13', 'c47', comp_c27) AS c47 +FROM +( +SELECT +c39, +comp_c27, +joinGet('database.jointable13', 'c40', comp_c27) AS c40, +c41, +c45, +c46, +c38 +FROM +( +SELECT +c39, +arrayJoin(arrayMap(x -> toInt64(x), arrayFilter(x -> (length(x) > 0), splitByString(', ', c49)))) AS comp_c27, +c41, +c45, +c46, +c38 +FROM +( +SELECT +'c57' AS c39, +toString(c27) AS c49, +1 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE c27 IN (322) +UNION ALL +SELECT +'c58' AS c39, +arrayStringConcat(groupArray(toString(c27)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE chain_id IN +( +SELECT chain_id +FROM database.table12 +WHERE c27 IN (322) +) +UNION ALL +SELECT +'c59' AS c39, +assumeNotNull(c27s_str) AS c49, +0 AS c41, +c50 AS c45, +c51 AS c46, +c52 AS c38 +FROM +( +SELECT * +FROM table11 +WHERE c27 IN (322) +) AS c1s_c59 +WHERE c27 IN (322) +UNION ALL +SELECT +'superSupercalifragilisticexpialidocious' AS c39, +arrayStringConcat(groupArray(toString(c1_id)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table10 +WHERE c27 IN (322) +) AS table9 +) +) AS a +) AS table8 +) AS table7 +GROUP BY comp_c27 +) AS table6 +WHERE (parseDateTimeBestEffort('2020-01-01') >= min_c32) AND (max_c32 >= (parseDateTimeBestEffort('2021-05-02') - 2)) +) AS table5 +ARRAY JOIN c39 +WHERE isNotNull(c39) +) AS table4 +) USING (c27) +) AS t +ALL LEFT JOIN +( +SELECT +c1, +c2 +FROM +( +SELECT +c39 AS c1, +groupArray(comp_c27) AS c49, +multiIf(c1 = 'c58', if(length(c49) <= 2, 0, 1), c1 = 'c57', 1, if(length(c49) <= 3, 0, 1)) AS c2 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +groupArray(c39) AS c39, +any(c40) AS c40, +any(c41) AS c41, +any(c42) AS c42, +any(c29) AS c29, +any(c43) AS c43, +any(c44) AS c44, +any(min_c32) AS min_c32, +any(max_c32) AS max_c32, +any(c45) AS c45, +any(c46) AS c46, +any(c38) AS c38, +any(c47) AS c47 +FROM +( +SELECT +c27 AS comp_c27, +if(comp_c27 = 0, toDate('2010-01-01'), toDate(minMerge(min_c32))) AS min_c32, +if(comp_c27 = 0, toDate(now()), toDate(maxMerge(max_c32))) + 1 AS max_c32, +NULL AS c39, +NULL AS c40, +NULL AS c41, +NULL AS c42, +NULL AS c29, +NULL AS c43, +NULL AS c44, +NULL AS c45, +NULL AS c46, +NULL AS c38, +NULL AS c47 +FROM database.table15 +GROUP BY comp_c27 +UNION ALL +SELECT +comp_c27, +NULL AS min_c32, +NULL AS max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38, +c47 +FROM +( +SELECT +c39, +comp_c27 AS c27, +comp_c27, +c40, +c41, +assumeNotNull(c45) AS c45, +assumeNotNull(c46) AS c46, +assumeNotNull(c38) AS c38, +joinGet('database.table14', 'c48', c40) AS c42, +joinGet('database.table14', 'c29', c40) AS c29, +joinGet('database.table14', 'c43', c40) AS c43, +joinGet('database.table14', 'property_c44', c40) AS c44, +splitByChar(',', assumeNotNull(joinGet('database.jointable13', 'prefix_c33', comp_c27))) AS c33s, +joinGet('database.jointable13', 'c47', comp_c27) AS c47 +FROM +( +SELECT +c39, +comp_c27, +joinGet('database.jointable13', 'c40', comp_c27) AS c40, +c41, +c45, +c46, +c38 +FROM +( +SELECT +c39, +arrayJoin(arrayMap(x -> toInt64(x), arrayFilter(x -> (length(x) > 0), splitByString(', ', c49)))) AS comp_c27, +c41, +c45, +c46, +c38 +FROM +( +SELECT +'c57' AS c39, +toString(c27) AS c49, +1 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE c27 IN (322) +UNION ALL +SELECT +'c58' AS c39, +arrayStringConcat(groupArray(toString(c27)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE chain_id IN +( +SELECT chain_id +FROM database.table12 +WHERE c27 IN (322) +) +UNION ALL +SELECT +'c59' AS c39, +assumeNotNull(c27s_str) AS c49, +0 AS c41, +c50 AS c45, +c51 AS c46, +c52 AS c38 +FROM +( +SELECT * +FROM table11 +WHERE c27 IN (322) +) AS c1s_c59 +WHERE c27 IN (322) +UNION ALL +SELECT +'superSupercalifragilisticexpialidocious' AS c39, +arrayStringConcat(groupArray(toString(c1_id)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table10 +WHERE c27 IN (322) +) AS table9 +) +) AS a +) AS table8 +) AS table7 +GROUP BY comp_c27 +) AS table6 +WHERE (parseDateTimeBestEffort('2020-01-01') >= min_c32) AND (max_c32 >= (parseDateTimeBestEffort('2021-05-02') - 2)) +) AS table5 +ARRAY JOIN c39 +WHERE isNotNull(c39) +) AS table4 +GROUP BY c39 +) AS table3 +) USING (c1) +GROUP BY +c1, +c2 +) AS table2 +ORDER BY c1 ASC +) AS table1 +UNION ALL +SELECT * +FROM +( +SELECT +c1, +c2, +c3_q[1] AS c3_q1, +c3_q[3] AS c3_q3, +c3_q[2] AS c3_median, +least(c3_max, c3_q3 + (1.5 * (c3_q3 - c3_q1))) AS c3_max, +greatest(c3_min, c3_q1 - (1.5 * (c3_q3 - c3_q1))) AS c3_min, +c3_avg, +c4_q[1] AS c4_q1, +c4_q[3] AS c4_q3, +c4_q[2] AS c4_median, +least(c4_max, c4_q3 + (1.5 * (c4_q3 - c4_q1))) AS c4_max, +greatest(c4_min, c4_q1 - (1.5 * (c4_q3 - c4_q1))) AS c4_min, +c4_avg, +c5_q[1] AS c5_q1, +c5_q[3] AS c5_q3, +c5_q[2] AS c5_median, +least(c5_max, c5_q3 + (1.5 * (c5_q3 - c5_q1))) AS c5_max, +greatest(c5_min, c5_q1 - (1.5 * (c5_q3 - c5_q1))) AS c5_min, +c5_avg, +c6_q[1] AS c6_q1, +c6_q[3] AS c6_q3, +c6_q[2] AS c6_median, +least(c6_max, c6_q3 + (1.5 * (c6_q3 - c6_q1))) AS c6_max, +greatest(c6_min, c6_q1 - (1.5 * (c6_q3 - c6_q1))) AS c6_min, +c6_avg, +c7_q[1] AS c7_q1, +c7_q[3] AS c7_q3, +c7_q[2] AS c7_median, +least(c7_max, c7_q3 + (1.5 * (c7_q3 - c7_q1))) AS c7_max, +greatest(c7_min, c7_q1 - (1.5 * (c7_q3 - c7_q1))) AS c7_min, +c7_avg, +c8_q[1] AS c8_q1, +c8_q[3] AS c8_q3, +c8_q[2] AS c8_median, +least(c8_max, c8_q3 + (1.5 * (c8_q3 - c8_q1))) AS c8_max, +greatest(c8_min, c8_q1 - (1.5 * (c8_q3 - c8_q1))) AS c8_min, +c8_avg, +c9_q[1] AS c9_q1, +c9_q[3] AS c9_q3, +c9_q[2] AS c9_median, +least(c9_max, c9_q3 + (1.5 * (c9_q3 - c9_q1))) AS c9_max, +greatest(c9_min, c9_q1 - (1.5 * (c9_q3 - c9_q1))) AS c9_min, +c9_avg, +c10_q[1] AS c10_q1, +c10_q[3] AS c10_q3, +c10_q[2] AS c10_median, +least(c10_max, c10_q3 + (1.5 * (c10_q3 - c10_q1))) AS c10_max, +greatest(c10_min, c10_q1 - (1.5 * (c10_q3 - c10_q1))) AS c10_min, +c10_avg, +c10_avg, +c11_q[1] AS c11_q1, +c11_q[3] AS c11_q3, +c11_q[2] AS c11_median, +least(c11_max, c11_q3 + (1.5 * (c11_q3 - c11_q1))) AS c11_max, +greatest(c11_min, c11_q1 - (1.5 * (c11_q3 - c11_q1))) AS c11_min, +c11_avg, +c12_q[1] AS c12_q1, +c12_q[3] AS c12_q3, +c12_q[2] AS c12_median, +least(c12_max, c12_q3 + (1.5 * (c12_q3 - c12_q1))) AS c12_max, +greatest(c12_min, c12_q1 - (1.5 * (c12_q3 - c12_q1))) AS c12_min, +c12_avg, +c13_q[1] AS c13_q1, +c13_q[3] AS c13_q3, +c13_q[2] AS c13_median, +least(c13_max, c13_q3 + (1.5 * (c13_q3 - c13_q1))) AS c13_max, +greatest(c13_min, c13_q1 - (1.5 * (c13_q3 - c13_q1))) AS c13_min, +c13_avg, +c14_q[1] AS c14_q1, +c14_q[3] AS c14_q3, +c14_q[2] AS c14_median, +least(c14_max, c14_q3 + (1.5 * (c14_q3 - c14_q1))) AS c14_max, +greatest(c14_min, c14_q1 - (1.5 * (c14_q3 - c14_q1))) AS c14_min, +c14_avg, +c15_q[1] AS c15_q1, +c15_q[3] AS c15_q3, +c15_q[2] AS c15_median, +least(c15_max, c15_q3 + (1.5 * (c15_q3 - c15_q1))) AS c15_max, +greatest(c15_min, c15_q1 - (1.5 * (c15_q3 - c15_q1))) AS c15_min, +c15_avg, +c16_q[1] AS c16_q1, +c16_q[3] AS c16_q3, +c16_q[2] AS c16_median, +least(toFloat64(c16_max), c16_q3 + (1.5 * (c16_q3 - c16_q1))) AS c16_max, +greatest(toFloat64(c16_min), c16_q1 - (1.5 * (c16_q3 - c16_q1))) AS c16_min, +c16_avg, +c17_q[1] AS c17_q1, +c17_q[3] AS c17_q3, +c17_q[2] AS c17_median, +least(toFloat64(c17_max), c17_q3 + (1.5 * (c17_q3 - c17_q1))) AS c17_max, +greatest(toFloat64(c17_min), c17_q1 - (1.5 * (c17_q3 - c17_q1))) AS c17_min, +c17_avg, +c18_q[1] AS c18_q1, +c18_q[3] AS c18_q3, +c18_q[2] AS c18_median, +least(toFloat64(c18_max), c18_q3 + (1.5 * (c18_q3 - c18_q1))) AS c18_max, +greatest(toFloat64(c18_min), c18_q1 - (1.5 * (c18_q3 - c18_q1))) AS c18_min, +c18_avg, +round(if(c19 != 0, c24 / c19, 0), 2) AS c20, +c21, +c22, +c23 AS c23, +c19 AS c19, +c16 AS c16, +c17 AS c17, +c18 AS c18, +round(c24, 2) AS c24, +round(if(c17 != 0, c24 / c17, 0), 2) AS c25, +'CH' AS c26 +FROM +( +SELECT +c1, +c2, +groupUniqArray(c27) AS c28, +groupUniqArrayIf(c27, isNotNull(c29)) AS c28_with_c29, +quantiles(0.25, 0.5, 0.75)(if(c3 > 0, c3, NULL)) AS c3_q, +quantiles(0.25, 0.5, 0.75)(if(c4 > 0, c4, NULL)) AS c4_q, +quantiles(0.25, 0.5, 0.75)(t.c17 / t.c19) AS c5_q, +quantiles(0.25, 0.5, 0.75)(c6) AS c6_q, +quantiles(0.25, 0.5, 0.75)(c7) AS c7_q, +quantiles(0.25, 0.5, 0.75)(c8) AS c8_q, +quantiles(0.25, 0.5, 0.75)(c9) AS c9_q, +quantiles(0.25, 0.5, 0.75)(c10) AS c10_q, +quantiles(0.25, 0.5, 0.75)(c11) AS c11_q, +quantiles(0.25, 0.5, 0.75)(c12) AS c12_q, +quantiles(0.25, 0.5, 0.75)(c13) AS c13_q, +quantiles(0.25, 0.5, 0.75)(if(t.c24 > 0, t.c24, NULL) / t.c19) AS c14_q, +quantiles(0.25, 0.5, 0.75)(if(t.c24 > 0, t.c24, NULL) / t.c17) AS c15_q, +quantiles(0.25, 0.5, 0.75)(t.c16) AS c16_q, +quantiles(0.25, 0.5, 0.75)(t.c17) AS c17_q, +quantiles(0.25, 0.5, 0.75)(if(t.c18 > 0, t.c18, NULL)) AS c18_q, +max(if(c3 > 0, c3, NULL)) AS c3_max, +min(if(c3 > 0, c3, NULL)) AS c3_min, +avg(if(c3 > 0, c3, NULL)) AS c3_avg, +max(if(c4 > 0, c4, NULL)) AS c4_max, +min(if(c4 > 0, c4, NULL)) AS c4_min, +avg(if(c4 > 0, c4, NULL)) AS c4_avg, +max(t.c17 / t.c19) AS c5_max, +min(t.c17 / t.c19) AS c5_min, +avg(t.c17 / t.c19) AS c5_avg, +max(if(c6 > 0, c6, NULL)) AS c6_max, +min(if(c6 > 0, c6, NULL)) AS c6_min, +avg(if(c6 > 0, c6, NULL)) AS c6_avg, +max(if(c7 > 0, c7, NULL)) AS c7_max, +min(if(c7 > 0, c7, NULL)) AS c7_min, +avg(if(c7 > 0, c7, NULL)) AS c7_avg, +max(if(c10 > 0, c10, NULL)) AS c10_max, +min(if(c10 > 0, c10, NULL)) AS c10_min, +avg(if(c10 > 0, c10, NULL)) AS c10_avg, +max(if(c8 > 0, c8, NULL)) AS c8_max, +min(if(c8 > 0, c8, NULL)) AS c8_min, +avg(if(c8 > 0, c8, NULL)) AS c8_avg, +max(if(c9 > 0, c9, NULL)) AS c9_max, +min(if(c9 > 0, c9, NULL)) AS c9_min, +avg(if(c9 > 0, c9, NULL)) AS c9_avg, +max(if(c11 > 0, c11, NULL)) AS c11_max, +min(if(c11 > 0, c11, NULL)) AS c11_min, +avg(if(c11 > 0, c11, NULL)) AS c11_avg, +max(if(c12 > 0, c12, NULL)) AS c12_max, +min(if(c12 > 0, c12, NULL)) AS c12_min, +avg(if(c12 > 0, c12, NULL)) AS c12_avg, +max(if(c13 > 0, c13, NULL)) AS c13_max, +min(if(c13 > 0, c13, NULL)) AS c13_min, +avg(if(c13 > 0, c13, NULL)) AS c13_avg, +max(if(t.c24 > 0, t.c24, NULL) / t.c19) AS c14_max, +min(if(t.c24 > 0, t.c24, NULL) / t.c19) AS c14_min, +avg(if(t.c24 > 0, t.c24, NULL) / t.c19) AS c14_avg, +max(if(t.c24 > 0, t.c24, NULL) / t.c17) AS c15_max, +min(if(t.c24 > 0, t.c24, NULL) / t.c17) AS c15_min, +avg(if(t.c24 > 0, t.c24, NULL) / t.c17) AS c15_avg, +max(t.c16) AS c16_max, +min(t.c16) AS c16_min, +avg(t.c16) AS c16_avg, +max(t.c17) AS c17_max, +min(t.c17) AS c17_min, +avg(t.c17) AS c17_avg, +max(if(t.c18 > 0, t.c18, NULL)) AS c18_max, +min(if(t.c18 > 0, t.c18, NULL)) AS c18_min, +avg(if(t.c18 > 0, t.c18, NULL)) AS c18_avg, +sum(t.c19) AS c19, +sum(if(t.c18 > 0, t.c18, NULL)) AS c18, +sum(t.c16) AS c16, +sum(c23) AS c23, +sum(t.c17) AS c17, +sum(if(t.c24 > 0, t.c24, NULL)) AS c24, +c24 / c19 AS c14, +c24 / c17 AS c15, +median(if(isNotNull(c29) AND (t.c22 > 0), c13 * (t.c22 / c29), NULL)) AS c21, +sum(c22) AS c22 +FROM +( +SELECT +c27, +c39 AS c1, +c29, +c19, +c23, +c17, +c16, +c18, +c22, +c24, +c3, +c4, +c8, +c9, +c10, +c11, +c12, +c13, +c6, +c7 +FROM +( +SELECT +c27, +uniqExact(c30, c31) AS c19, +uniqExact(c30, c31, c32) AS c23, +uniqExactIf(c30, c31, c33 IN ('c37', 'c38')) AS c17, +countIf(c33 IN ('c37', 'c38')) AS c16, +countIf(c33 = 'c39') AS c18, +coalesce(sumIf(c29, c33 = 'c39'), 0) AS c22, +coalesce(sumIf(c37, c33 = 'c39'), 0) AS c24, +if((c18 > 0) AND (c19 > 0), c18 / c19, NULL) AS c3, +if(c17 != 0, c18 / c17, NULL) AS c4, +coalesce(avgIf(c34, (c34 > 0) AND (c33 IN ('c37', 'c38'))), NULL) AS c8, +coalesce(avgIf(c35, (c35 > 0) AND (c33 IN ('c37', 'c38'))), NULL) AS c9, +coalesce(avgIf(c34, (c34 > 0) AND (c33 = 'c39')), NULL) AS c10, +coalesce(avgIf(c35, (c35 > 0) AND (c33 = 'c39')), NULL) AS c11, +coalesce(avgIf(c37, c33 = 'c39'), NULL) AS c12, +coalesce(avgIf(c37 / c34, (c34 > 0) AND (c33 = 'c39')), NULL) AS c13, +coalesce(avgIf(c37, (c37 > 0) AND (c33 IN ('c37', 'c38'))), NULL) AS c6, +coalesce(minIf(c37, (c37 > 0) AND (c33 IN ('c37', 'c38')) AND (c37 > (c36 / 2))), NULL) AS c7 +FROM +( +SELECT +c27, +c30, +c32, +c31, +NULL AS c29, +NULL AS c33, +NULL AS c37, +NULL AS c34, +NULL AS c35 +FROM +( +SELECT +c27, +c30, +c32, +c31 +FROM database.t1 +PREWHERE ((c32 >= parseDateTimeBestEffort('2020-01-01')) AND (c32 <= parseDateTimeBestEffort('2020-01-01 23:59:59'))) AND (c27 IN +( +SELECT comp_c27 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +groupArray(c39) AS c39, +any(c40) AS c40, +any(c41) AS c41, +any(c42) AS c42, +any(c29) AS c29, +any(c43) AS c43, +any(c44) AS c44, +any(min_c32) AS min_c32, +any(max_c32) AS max_c32, +any(c45) AS c45, +any(c46) AS c46, +any(c38) AS c38, +any(c47) AS c47 +FROM +( +SELECT +c27 AS comp_c27, +if(comp_c27 = 0, toDate('2010-01-01'), toDate(minMerge(min_c32))) AS min_c32, +if(comp_c27 = 0, toDate(now()), toDate(maxMerge(max_c32))) + 1 AS max_c32, +NULL AS c39, +NULL AS c40, +NULL AS c41, +NULL AS c42, +NULL AS c29, +NULL AS c43, +NULL AS c44, +NULL AS c45, +NULL AS c46, +NULL AS c38, +NULL AS c47 +FROM database.table15 +GROUP BY comp_c27 +UNION ALL +SELECT +comp_c27, +NULL AS min_c32, +NULL AS max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38, +c47 +FROM +( +SELECT +c39, +comp_c27 AS c27, +comp_c27, +c40, +c41, +assumeNotNull(c45) AS c45, +assumeNotNull(c46) AS c46, +assumeNotNull(c38) AS c38, +joinGet('database.table14', 'c48', c40) AS c42, +joinGet('database.table14', 'c29', c40) AS c29, +joinGet('database.table14', 'c43', c40) AS c43, +joinGet('database.table14', 'property_c44', c40) AS c44, +splitByChar(',', assumeNotNull(joinGet('database.jointable13', 'prefix_c33', comp_c27))) AS c33s, +joinGet('database.jointable13', 'c47', comp_c27) AS c47 +FROM +( +SELECT +c39, +comp_c27, +joinGet('database.jointable13', 'c40', comp_c27) AS c40, +c41, +c45, +c46, +c38 +FROM +( +SELECT +c39, +arrayJoin(arrayMap(x -> toInt64(x), arrayFilter(x -> (length(x) > 0), splitByString(', ', c49)))) AS comp_c27, +c41, +c45, +c46, +c38 +FROM +( +SELECT +'c57' AS c39, +toString(c27) AS c49, +1 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE c27 IN (322) +UNION ALL +SELECT +'c58' AS c39, +arrayStringConcat(groupArray(toString(c27)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE chain_id IN +( +SELECT chain_id +FROM database.table12 +WHERE c27 IN (322) +) +UNION ALL +SELECT +'c59' AS c39, +assumeNotNull(c27s_str) AS c49, +0 AS c41, +c50 AS c45, +c51 AS c46, +c52 AS c38 +FROM +( +SELECT * +FROM table11 +WHERE c27 IN (322) +) AS c1s_c59 +WHERE c27 IN (322) +UNION ALL +SELECT +'superSupercalifragilisticexpialidocious' AS c39, +arrayStringConcat(groupArray(toString(c1_id)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table10 +WHERE c27 IN (322) +) AS table9 +) +) AS a +) AS table8 +) AS table7 +GROUP BY comp_c27 +) AS table6 +WHERE (parseDateTimeBestEffort('2020-01-01') >= min_c32) AND (max_c32 >= (parseDateTimeBestEffort('2021-05-02') - 2)) +) AS table5 +ARRAY JOIN c39 +WHERE isNotNull(c39) +) AS table4 +)) +WHERE c61 = 0 +) AS table25 +UNION ALL +SELECT +c27, +c30, +c32, +c31, +c29, +c33, +c37, +c34, +c35 +FROM +( +SELECT +c27, +c30, +c32, +'c37' AS c33, +coalesce(c37 * joinGet('database.table18', 'c60', concat(c26, '_', 'CH')), 0) AS c37, +if(c53 > 0, c53, 2) AS c53, +c54, +if(c29 > 0, c29, 1) AS c29, +c55, +c56, +datediff('day', c55, c56) AS c34, +datediff('day', c32, c55) AS c35, +c31 +FROM database.table24 +PREWHERE ((c32 >= parseDateTimeBestEffort('2020-01-01')) AND (c32 <= parseDateTimeBestEffort('2020-01-01 23:59:59'))) AND (c27 IN +( +SELECT comp_c27 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +groupArray(c39) AS c39, +any(c40) AS c40, +any(c41) AS c41, +any(c42) AS c42, +any(c29) AS c29, +any(c43) AS c43, +any(c44) AS c44, +any(min_c32) AS min_c32, +any(max_c32) AS max_c32, +any(c45) AS c45, +any(c46) AS c46, +any(c38) AS c38, +any(c47) AS c47 +FROM +( +SELECT +c27 AS comp_c27, +if(comp_c27 = 0, toDate('2010-01-01'), toDate(minMerge(min_c32))) AS min_c32, +if(comp_c27 = 0, toDate(now()), toDate(maxMerge(max_c32))) + 1 AS max_c32, +NULL AS c39, +NULL AS c40, +NULL AS c41, +NULL AS c42, +NULL AS c29, +NULL AS c43, +NULL AS c44, +NULL AS c45, +NULL AS c46, +NULL AS c38, +NULL AS c47 +FROM database.table15 +GROUP BY comp_c27 +UNION ALL +SELECT +comp_c27, +NULL AS min_c32, +NULL AS max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38, +c47 +FROM +( +SELECT +c39, +comp_c27 AS c27, +comp_c27, +c40, +c41, +assumeNotNull(c45) AS c45, +assumeNotNull(c46) AS c46, +assumeNotNull(c38) AS c38, +joinGet('database.table14', 'c48', c40) AS c42, +joinGet('database.table14', 'c29', c40) AS c29, +joinGet('database.table14', 'c43', c40) AS c43, +joinGet('database.table14', 'property_c44', c40) AS c44, +splitByChar(',', assumeNotNull(joinGet('database.jointable13', 'prefix_c33', comp_c27))) AS c33s, +joinGet('database.jointable13', 'c47', comp_c27) AS c47 +FROM +( +SELECT +c39, +comp_c27, +joinGet('database.jointable13', 'c40', comp_c27) AS c40, +c41, +c45, +c46, +c38 +FROM +( +SELECT +c39, +arrayJoin(arrayMap(x -> toInt64(x), arrayFilter(x -> (length(x) > 0), splitByString(', ', c49)))) AS comp_c27, +c41, +c45, +c46, +c38 +FROM +( +SELECT +'c57' AS c39, +toString(c27) AS c49, +1 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE c27 IN (322) +UNION ALL +SELECT +'c58' AS c39, +arrayStringConcat(groupArray(toString(c27)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE chain_id IN +( +SELECT chain_id +FROM database.table12 +WHERE c27 IN (322) +) +UNION ALL +SELECT +'c59' AS c39, +assumeNotNull(c27s_str) AS c49, +0 AS c41, +c50 AS c45, +c51 AS c46, +c52 AS c38 +FROM +( +SELECT * +FROM table11 +WHERE c27 IN (322) +) AS c1s_c59 +WHERE c27 IN (322) +UNION ALL +SELECT +'superSupercalifragilisticexpialidocious' AS c39, +arrayStringConcat(groupArray(toString(c1_id)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table10 +WHERE c27 IN (322) +) AS table9 +) +) AS a +) AS table8 +) AS table7 +GROUP BY comp_c27 +) AS table6 +WHERE (parseDateTimeBestEffort('2020-01-01') >= min_c32) AND (max_c32 >= (parseDateTimeBestEffort('2021-05-02') - 2)) +) AS table5 +ARRAY JOIN c39 +WHERE isNotNull(c39) +) AS table4 +)) +WHERE (c61 = 0) AND (c37 < (666 * (1 / joinGet('database.table18', 'c60', concat(c26, '_', 'CH'))))) +) AS table23 +UNION ALL +SELECT +c27, +c30, +c32, +c31, +c29, +c33, +c37, +c34, +c35 +FROM +( +SELECT +c27, +c30, +c32, +'c39' AS c33, +coalesce(c37 * joinGet('database.table18', 'c60', concat(c26, '_', 'CH')), 0) AS c37, +if(c53 > 0, c53, 2) AS c53, +c54, +if(c29 > 0, c29, 1) AS c29, +c55, +c56, +datediff('day', c55, c56) AS c34, +datediff('day', c32, c55) AS c35, +c31 +FROM database.table22 +PREWHERE ((c32 >= parseDateTimeBestEffort('2020-01-01')) AND (c32 <= parseDateTimeBestEffort('2020-01-01 23:59:59'))) AND (c27 IN +( +SELECT comp_c27 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +groupArray(c39) AS c39, +any(c40) AS c40, +any(c41) AS c41, +any(c42) AS c42, +any(c29) AS c29, +any(c43) AS c43, +any(c44) AS c44, +any(min_c32) AS min_c32, +any(max_c32) AS max_c32, +any(c45) AS c45, +any(c46) AS c46, +any(c38) AS c38, +any(c47) AS c47 +FROM +( +SELECT +c27 AS comp_c27, +if(comp_c27 = 0, toDate('2010-01-01'), toDate(minMerge(min_c32))) AS min_c32, +if(comp_c27 = 0, toDate(now()), toDate(maxMerge(max_c32))) + 1 AS max_c32, +NULL AS c39, +NULL AS c40, +NULL AS c41, +NULL AS c42, +NULL AS c29, +NULL AS c43, +NULL AS c44, +NULL AS c45, +NULL AS c46, +NULL AS c38, +NULL AS c47 +FROM database.table15 +GROUP BY comp_c27 +UNION ALL +SELECT +comp_c27, +NULL AS min_c32, +NULL AS max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38, +c47 +FROM +( +SELECT +c39, +comp_c27 AS c27, +comp_c27, +c40, +c41, +assumeNotNull(c45) AS c45, +assumeNotNull(c46) AS c46, +assumeNotNull(c38) AS c38, +joinGet('database.table14', 'c48', c40) AS c42, +joinGet('database.table14', 'c29', c40) AS c29, +joinGet('database.table14', 'c43', c40) AS c43, +joinGet('database.table14', 'property_c44', c40) AS c44, +splitByChar(',', assumeNotNull(joinGet('database.jointable13', 'prefix_c33', comp_c27))) AS c33s, +joinGet('database.jointable13', 'c47', comp_c27) AS c47 +FROM +( +SELECT +c39, +comp_c27, +joinGet('database.jointable13', 'c40', comp_c27) AS c40, +c41, +c45, +c46, +c38 +FROM +( +SELECT +c39, +arrayJoin(arrayMap(x -> toInt64(x), arrayFilter(x -> (length(x) > 0), splitByString(', ', c49)))) AS comp_c27, +c41, +c45, +c46, +c38 +FROM +( +SELECT +'c57' AS c39, +toString(c27) AS c49, +1 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE c27 IN (322) +UNION ALL +SELECT +'c58' AS c39, +arrayStringConcat(groupArray(toString(c27)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE chain_id IN +( +SELECT chain_id +FROM database.table12 +WHERE c27 IN (322) +) +UNION ALL +SELECT +'c59' AS c39, +assumeNotNull(c27s_str) AS c49, +0 AS c41, +c50 AS c45, +c51 AS c46, +c52 AS c38 +FROM +( +SELECT * +FROM table11 +WHERE c27 IN (322) +) AS c1s_c59 +WHERE c27 IN (322) +UNION ALL +SELECT +'superSupercalifragilisticexpialidocious' AS c39, +arrayStringConcat(groupArray(toString(c1_id)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table10 +WHERE c27 IN (322) +) AS table9 +) +) AS a +) AS table8 +) AS table7 +GROUP BY comp_c27 +) AS table6 +WHERE (parseDateTimeBestEffort('2020-01-01') >= min_c32) AND (max_c32 >= (parseDateTimeBestEffort('2021-05-02') - 2)) +) AS table5 +ARRAY JOIN c39 +WHERE isNotNull(c39) +) AS table4 +)) +WHERE (c61 = 0) AND (c37 < (666 * (1 / joinGet('database.table18', 'c60', concat(c26, '_', 'CH'))))) +) AS table21 +) AS table20 +ALL LEFT JOIN +( +SELECT +c27, +avgMerge(avg_c37) * joinGet('database.table18', 'c60', concat('USD', '_', 'CH')) AS c36 +FROM database.table19 +PREWHERE c27 IN +( +SELECT comp_c27 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +groupArray(c39) AS c39, +any(c40) AS c40, +any(c41) AS c41, +any(c42) AS c42, +any(c29) AS c29, +any(c43) AS c43, +any(c44) AS c44, +any(min_c32) AS min_c32, +any(max_c32) AS max_c32, +any(c45) AS c45, +any(c46) AS c46, +any(c38) AS c38, +any(c47) AS c47 +FROM +( +SELECT +c27 AS comp_c27, +if(comp_c27 = 0, toDate('2010-01-01'), toDate(minMerge(min_c32))) AS min_c32, +if(comp_c27 = 0, toDate(now()), toDate(maxMerge(max_c32))) + 1 AS max_c32, +NULL AS c39, +NULL AS c40, +NULL AS c41, +NULL AS c42, +NULL AS c29, +NULL AS c43, +NULL AS c44, +NULL AS c45, +NULL AS c46, +NULL AS c38, +NULL AS c47 +FROM database.table15 +GROUP BY comp_c27 +UNION ALL +SELECT +comp_c27, +NULL AS min_c32, +NULL AS max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38, +c47 +FROM +( +SELECT +c39, +comp_c27 AS c27, +comp_c27, +c40, +c41, +assumeNotNull(c45) AS c45, +assumeNotNull(c46) AS c46, +assumeNotNull(c38) AS c38, +joinGet('database.table14', 'c48', c40) AS c42, +joinGet('database.table14', 'c29', c40) AS c29, +joinGet('database.table14', 'c43', c40) AS c43, +joinGet('database.table14', 'property_c44', c40) AS c44, +splitByChar(',', assumeNotNull(joinGet('database.jointable13', 'prefix_c33', comp_c27))) AS c33s, +joinGet('database.jointable13', 'c47', comp_c27) AS c47 +FROM +( +SELECT +c39, +comp_c27, +joinGet('database.jointable13', 'c40', comp_c27) AS c40, +c41, +c45, +c46, +c38 +FROM +( +SELECT +c39, +arrayJoin(arrayMap(x -> toInt64(x), arrayFilter(x -> (length(x) > 0), splitByString(', ', c49)))) AS comp_c27, +c41, +c45, +c46, +c38 +FROM +( +SELECT +'c57' AS c39, +toString(c27) AS c49, +1 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE c27 IN (322) +UNION ALL +SELECT +'c58' AS c39, +arrayStringConcat(groupArray(toString(c27)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE chain_id IN +( +SELECT chain_id +FROM database.table12 +WHERE c27 IN (322) +) +UNION ALL +SELECT +'c59' AS c39, +assumeNotNull(c27s_str) AS c49, +0 AS c41, +c50 AS c45, +c51 AS c46, +c52 AS c38 +FROM +( +SELECT * +FROM table11 +WHERE c27 IN (322) +) AS c1s_c59 +WHERE c27 IN (322) +UNION ALL +SELECT +'superSupercalifragilisticexpialidocious' AS c39, +arrayStringConcat(groupArray(toString(c1_id)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table10 +WHERE c27 IN (322) +) AS table9 +) +) AS a +) AS table8 +) AS table7 +GROUP BY comp_c27 +) AS table6 +WHERE (parseDateTimeBestEffort('2020-01-01') >= min_c32) AND (max_c32 >= (parseDateTimeBestEffort('2021-05-02') - 2)) +) AS table5 +ARRAY JOIN c39 +WHERE isNotNull(c39) +) AS table4 +) +WHERE date > (now() - toIntervalMonth(3)) +GROUP BY c27 +) AS table17 USING (c27) +GROUP BY c27 +) AS table16 +ALL LEFT JOIN +( +SELECT +comp_c27 AS c27, +assumeNotNull(c39) AS c39, +c29 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +groupArray(c39) AS c39, +any(c40) AS c40, +any(c41) AS c41, +any(c42) AS c42, +any(c29) AS c29, +any(c43) AS c43, +any(c44) AS c44, +any(min_c32) AS min_c32, +any(max_c32) AS max_c32, +any(c45) AS c45, +any(c46) AS c46, +any(c38) AS c38, +any(c47) AS c47 +FROM +( +SELECT +c27 AS comp_c27, +if(comp_c27 = 0, toDate('2010-01-01'), toDate(minMerge(min_c32))) AS min_c32, +if(comp_c27 = 0, toDate(now()), toDate(maxMerge(max_c32))) + 1 AS max_c32, +NULL AS c39, +NULL AS c40, +NULL AS c41, +NULL AS c42, +NULL AS c29, +NULL AS c43, +NULL AS c44, +NULL AS c45, +NULL AS c46, +NULL AS c38, +NULL AS c47 +FROM database.table15 +GROUP BY comp_c27 +UNION ALL +SELECT +comp_c27, +NULL AS min_c32, +NULL AS max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38, +c47 +FROM +( +SELECT +c39, +comp_c27 AS c27, +comp_c27, +c40, +c41, +assumeNotNull(c45) AS c45, +assumeNotNull(c46) AS c46, +assumeNotNull(c38) AS c38, +joinGet('database.table14', 'c48', c40) AS c42, +joinGet('database.table14', 'c29', c40) AS c29, +joinGet('database.table14', 'c43', c40) AS c43, +joinGet('database.table14', 'property_c44', c40) AS c44, +splitByChar(',', assumeNotNull(joinGet('database.jointable13', 'prefix_c33', comp_c27))) AS c33s, +joinGet('database.jointable13', 'c47', comp_c27) AS c47 +FROM +( +SELECT +c39, +comp_c27, +joinGet('database.jointable13', 'c40', comp_c27) AS c40, +c41, +c45, +c46, +c38 +FROM +( +SELECT +c39, +arrayJoin(arrayMap(x -> toInt64(x), arrayFilter(x -> (length(x) > 0), splitByString(', ', c49)))) AS comp_c27, +c41, +c45, +c46, +c38 +FROM +( +SELECT +'c57' AS c39, +toString(c27) AS c49, +1 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE c27 IN (322) +UNION ALL +SELECT +'c58' AS c39, +arrayStringConcat(groupArray(toString(c27)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE chain_id IN +( +SELECT chain_id +FROM database.table12 +WHERE c27 IN (322) +) +UNION ALL +SELECT +'c59' AS c39, +assumeNotNull(c27s_str) AS c49, +0 AS c41, +c50 AS c45, +c51 AS c46, +c52 AS c38 +FROM +( +SELECT * +FROM table11 +WHERE c27 IN (322) +) AS c1s_c59 +WHERE c27 IN (322) +UNION ALL +SELECT +'superSupercalifragilisticexpialidocious' AS c39, +arrayStringConcat(groupArray(toString(c1_id)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table10 +WHERE c27 IN (322) +) AS table9 +) +) AS a +) AS table8 +) AS table7 +GROUP BY comp_c27 +) AS table6 +WHERE (parseDateTimeBestEffort('2020-01-01') >= min_c32) AND (max_c32 >= (parseDateTimeBestEffort('2021-05-02') - 2)) +) AS table5 +ARRAY JOIN c39 +WHERE isNotNull(c39) +) AS table4 +) USING (c27) +) AS t +ALL LEFT JOIN +( +SELECT +c1, +c2 +FROM +( +SELECT +c39 AS c1, +groupArray(comp_c27) AS c49, +multiIf(c1 = 'c58', if(length(c49) <= 2, 0, 1), c1 = 'c57', 1, if(length(c49) <= 3, 0, 1)) AS c2 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +groupArray(c39) AS c39, +any(c40) AS c40, +any(c41) AS c41, +any(c42) AS c42, +any(c29) AS c29, +any(c43) AS c43, +any(c44) AS c44, +any(min_c32) AS min_c32, +any(max_c32) AS max_c32, +any(c45) AS c45, +any(c46) AS c46, +any(c38) AS c38, +any(c47) AS c47 +FROM +( +SELECT +c27 AS comp_c27, +if(comp_c27 = 0, toDate('2010-01-01'), toDate(minMerge(min_c32))) AS min_c32, +if(comp_c27 = 0, toDate(now()), toDate(maxMerge(max_c32))) + 1 AS max_c32, +NULL AS c39, +NULL AS c40, +NULL AS c41, +NULL AS c42, +NULL AS c29, +NULL AS c43, +NULL AS c44, +NULL AS c45, +NULL AS c46, +NULL AS c38, +NULL AS c47 +FROM database.table15 +GROUP BY comp_c27 +UNION ALL +SELECT +comp_c27, +NULL AS min_c32, +NULL AS max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38, +c47 +FROM +( +SELECT +c39, +comp_c27 AS c27, +comp_c27, +c40, +c41, +assumeNotNull(c45) AS c45, +assumeNotNull(c46) AS c46, +assumeNotNull(c38) AS c38, +joinGet('database.table14', 'c48', c40) AS c42, +joinGet('database.table14', 'c29', c40) AS c29, +joinGet('database.table14', 'c43', c40) AS c43, +joinGet('database.table14', 'property_c44', c40) AS c44, +splitByChar(',', assumeNotNull(joinGet('database.jointable13', 'prefix_c33', comp_c27))) AS c33s, +joinGet('database.jointable13', 'c47', comp_c27) AS c47 +FROM +( +SELECT +c39, +comp_c27, +joinGet('database.jointable13', 'c40', comp_c27) AS c40, +c41, +c45, +c46, +c38 +FROM +( +SELECT +c39, +arrayJoin(arrayMap(x -> toInt64(x), arrayFilter(x -> (length(x) > 0), splitByString(', ', c49)))) AS comp_c27, +c41, +c45, +c46, +c38 +FROM +( +SELECT +'c57' AS c39, +toString(c27) AS c49, +1 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE c27 IN (322) +UNION ALL +SELECT +'c58' AS c39, +arrayStringConcat(groupArray(toString(c27)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE chain_id IN +( +SELECT chain_id +FROM database.table12 +WHERE c27 IN (322) +) +UNION ALL +SELECT +'c59' AS c39, +assumeNotNull(c27s_str) AS c49, +0 AS c41, +c50 AS c45, +c51 AS c46, +c52 AS c38 +FROM +( +SELECT * +FROM table11 +WHERE c27 IN (322) +) AS c1s_c59 +WHERE c27 IN (322) +UNION ALL +SELECT +'superSupercalifragilisticexpialidocious' AS c39, +arrayStringConcat(groupArray(toString(c1_id)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table10 +WHERE c27 IN (322) +) AS table9 +) +) AS a +) AS table8 +) AS table7 +GROUP BY comp_c27 +) AS table6 +WHERE (parseDateTimeBestEffort('2020-01-01') >= min_c32) AND (max_c32 >= (parseDateTimeBestEffort('2021-05-02') - 2)) +) AS table5 +ARRAY JOIN c39 +WHERE isNotNull(c39) +) AS table4 +GROUP BY c39 +) AS table3 +) USING (c1) +GROUP BY +c1, +c2 +) AS table2 +ORDER BY c1 ASC +) AS table1 +UNION ALL +SELECT * +FROM +( +SELECT +c1, +c2, +c3_q[1] AS c3_q1, +c3_q[3] AS c3_q3, +c3_q[2] AS c3_median, +least(c3_max, c3_q3 + (1.5 * (c3_q3 - c3_q1))) AS c3_max, +greatest(c3_min, c3_q1 - (1.5 * (c3_q3 - c3_q1))) AS c3_min, +c3_avg, +c4_q[1] AS c4_q1, +c4_q[3] AS c4_q3, +c4_q[2] AS c4_median, +least(c4_max, c4_q3 + (1.5 * (c4_q3 - c4_q1))) AS c4_max, +greatest(c4_min, c4_q1 - (1.5 * (c4_q3 - c4_q1))) AS c4_min, +c4_avg, +c5_q[1] AS c5_q1, +c5_q[3] AS c5_q3, +c5_q[2] AS c5_median, +least(c5_max, c5_q3 + (1.5 * (c5_q3 - c5_q1))) AS c5_max, +greatest(c5_min, c5_q1 - (1.5 * (c5_q3 - c5_q1))) AS c5_min, +c5_avg, +c6_q[1] AS c6_q1, +c6_q[3] AS c6_q3, +c6_q[2] AS c6_median, +least(c6_max, c6_q3 + (1.5 * (c6_q3 - c6_q1))) AS c6_max, +greatest(c6_min, c6_q1 - (1.5 * (c6_q3 - c6_q1))) AS c6_min, +c6_avg, +c7_q[1] AS c7_q1, +c7_q[3] AS c7_q3, +c7_q[2] AS c7_median, +least(c7_max, c7_q3 + (1.5 * (c7_q3 - c7_q1))) AS c7_max, +greatest(c7_min, c7_q1 - (1.5 * (c7_q3 - c7_q1))) AS c7_min, +c7_avg, +c8_q[1] AS c8_q1, +c8_q[3] AS c8_q3, +c8_q[2] AS c8_median, +least(c8_max, c8_q3 + (1.5 * (c8_q3 - c8_q1))) AS c8_max, +greatest(c8_min, c8_q1 - (1.5 * (c8_q3 - c8_q1))) AS c8_min, +c8_avg, +c9_q[1] AS c9_q1, +c9_q[3] AS c9_q3, +c9_q[2] AS c9_median, +least(c9_max, c9_q3 + (1.5 * (c9_q3 - c9_q1))) AS c9_max, +greatest(c9_min, c9_q1 - (1.5 * (c9_q3 - c9_q1))) AS c9_min, +c9_avg, +c10_q[1] AS c10_q1, +c10_q[3] AS c10_q3, +c10_q[2] AS c10_median, +least(c10_max, c10_q3 + (1.5 * (c10_q3 - c10_q1))) AS c10_max, +greatest(c10_min, c10_q1 - (1.5 * (c10_q3 - c10_q1))) AS c10_min, +c10_avg, +c10_avg, +c11_q[1] AS c11_q1, +c11_q[3] AS c11_q3, +c11_q[2] AS c11_median, +least(c11_max, c11_q3 + (1.5 * (c11_q3 - c11_q1))) AS c11_max, +greatest(c11_min, c11_q1 - (1.5 * (c11_q3 - c11_q1))) AS c11_min, +c11_avg, +c12_q[1] AS c12_q1, +c12_q[3] AS c12_q3, +c12_q[2] AS c12_median, +least(c12_max, c12_q3 + (1.5 * (c12_q3 - c12_q1))) AS c12_max, +greatest(c12_min, c12_q1 - (1.5 * (c12_q3 - c12_q1))) AS c12_min, +c12_avg, +c13_q[1] AS c13_q1, +c13_q[3] AS c13_q3, +c13_q[2] AS c13_median, +least(c13_max, c13_q3 + (1.5 * (c13_q3 - c13_q1))) AS c13_max, +greatest(c13_min, c13_q1 - (1.5 * (c13_q3 - c13_q1))) AS c13_min, +c13_avg, +c14_q[1] AS c14_q1, +c14_q[3] AS c14_q3, +c14_q[2] AS c14_median, +least(c14_max, c14_q3 + (1.5 * (c14_q3 - c14_q1))) AS c14_max, +greatest(c14_min, c14_q1 - (1.5 * (c14_q3 - c14_q1))) AS c14_min, +c14_avg, +c15_q[1] AS c15_q1, +c15_q[3] AS c15_q3, +c15_q[2] AS c15_median, +least(c15_max, c15_q3 + (1.5 * (c15_q3 - c15_q1))) AS c15_max, +greatest(c15_min, c15_q1 - (1.5 * (c15_q3 - c15_q1))) AS c15_min, +c15_avg, +c16_q[1] AS c16_q1, +c16_q[3] AS c16_q3, +c16_q[2] AS c16_median, +least(toFloat64(c16_max), c16_q3 + (1.5 * (c16_q3 - c16_q1))) AS c16_max, +greatest(toFloat64(c16_min), c16_q1 - (1.5 * (c16_q3 - c16_q1))) AS c16_min, +c16_avg, +c17_q[1] AS c17_q1, +c17_q[3] AS c17_q3, +c17_q[2] AS c17_median, +least(toFloat64(c17_max), c17_q3 + (1.5 * (c17_q3 - c17_q1))) AS c17_max, +greatest(toFloat64(c17_min), c17_q1 - (1.5 * (c17_q3 - c17_q1))) AS c17_min, +c17_avg, +c18_q[1] AS c18_q1, +c18_q[3] AS c18_q3, +c18_q[2] AS c18_median, +least(toFloat64(c18_max), c18_q3 + (1.5 * (c18_q3 - c18_q1))) AS c18_max, +greatest(toFloat64(c18_min), c18_q1 - (1.5 * (c18_q3 - c18_q1))) AS c18_min, +c18_avg, +round(if(c19 != 0, c24 / c19, 0), 2) AS c20, +c21, +c22, +c23 AS c23, +c19 AS c19, +c16 AS c16, +c17 AS c17, +c18 AS c18, +round(c24, 2) AS c24, +round(if(c17 != 0, c24 / c17, 0), 2) AS c25, +'CH' AS c26 +FROM +( +SELECT +c1, +c2, +groupUniqArray(c27) AS c28, +groupUniqArrayIf(c27, isNotNull(c29)) AS c28_with_c29, +quantiles(0.25, 0.5, 0.75)(if(c3 > 0, c3, NULL)) AS c3_q, +quantiles(0.25, 0.5, 0.75)(if(c4 > 0, c4, NULL)) AS c4_q, +quantiles(0.25, 0.5, 0.75)(t.c17 / t.c19) AS c5_q, +quantiles(0.25, 0.5, 0.75)(c6) AS c6_q, +quantiles(0.25, 0.5, 0.75)(c7) AS c7_q, +quantiles(0.25, 0.5, 0.75)(c8) AS c8_q, +quantiles(0.25, 0.5, 0.75)(c9) AS c9_q, +quantiles(0.25, 0.5, 0.75)(c10) AS c10_q, +quantiles(0.25, 0.5, 0.75)(c11) AS c11_q, +quantiles(0.25, 0.5, 0.75)(c12) AS c12_q, +quantiles(0.25, 0.5, 0.75)(c13) AS c13_q, +quantiles(0.25, 0.5, 0.75)(if(t.c24 > 0, t.c24, NULL) / t.c19) AS c14_q, +quantiles(0.25, 0.5, 0.75)(if(t.c24 > 0, t.c24, NULL) / t.c17) AS c15_q, +quantiles(0.25, 0.5, 0.75)(t.c16) AS c16_q, +quantiles(0.25, 0.5, 0.75)(t.c17) AS c17_q, +quantiles(0.25, 0.5, 0.75)(if(t.c18 > 0, t.c18, NULL)) AS c18_q, +max(if(c3 > 0, c3, NULL)) AS c3_max, +min(if(c3 > 0, c3, NULL)) AS c3_min, +avg(if(c3 > 0, c3, NULL)) AS c3_avg, +max(if(c4 > 0, c4, NULL)) AS c4_max, +min(if(c4 > 0, c4, NULL)) AS c4_min, +avg(if(c4 > 0, c4, NULL)) AS c4_avg, +max(t.c17 / t.c19) AS c5_max, +min(t.c17 / t.c19) AS c5_min, +avg(t.c17 / t.c19) AS c5_avg, +max(if(c6 > 0, c6, NULL)) AS c6_max, +min(if(c6 > 0, c6, NULL)) AS c6_min, +avg(if(c6 > 0, c6, NULL)) AS c6_avg, +max(if(c7 > 0, c7, NULL)) AS c7_max, +min(if(c7 > 0, c7, NULL)) AS c7_min, +avg(if(c7 > 0, c7, NULL)) AS c7_avg, +max(if(c10 > 0, c10, NULL)) AS c10_max, +min(if(c10 > 0, c10, NULL)) AS c10_min, +avg(if(c10 > 0, c10, NULL)) AS c10_avg, +max(if(c8 > 0, c8, NULL)) AS c8_max, +min(if(c8 > 0, c8, NULL)) AS c8_min, +avg(if(c8 > 0, c8, NULL)) AS c8_avg, +max(if(c9 > 0, c9, NULL)) AS c9_max, +min(if(c9 > 0, c9, NULL)) AS c9_min, +avg(if(c9 > 0, c9, NULL)) AS c9_avg, +max(if(c11 > 0, c11, NULL)) AS c11_max, +min(if(c11 > 0, c11, NULL)) AS c11_min, +avg(if(c11 > 0, c11, NULL)) AS c11_avg, +max(if(c12 > 0, c12, NULL)) AS c12_max, +min(if(c12 > 0, c12, NULL)) AS c12_min, +avg(if(c12 > 0, c12, NULL)) AS c12_avg, +max(if(c13 > 0, c13, NULL)) AS c13_max, +min(if(c13 > 0, c13, NULL)) AS c13_min, +avg(if(c13 > 0, c13, NULL)) AS c13_avg, +max(if(t.c24 > 0, t.c24, NULL) / t.c19) AS c14_max, +min(if(t.c24 > 0, t.c24, NULL) / t.c19) AS c14_min, +avg(if(t.c24 > 0, t.c24, NULL) / t.c19) AS c14_avg, +max(if(t.c24 > 0, t.c24, NULL) / t.c17) AS c15_max, +min(if(t.c24 > 0, t.c24, NULL) / t.c17) AS c15_min, +avg(if(t.c24 > 0, t.c24, NULL) / t.c17) AS c15_avg, +max(t.c16) AS c16_max, +min(t.c16) AS c16_min, +avg(t.c16) AS c16_avg, +max(t.c17) AS c17_max, +min(t.c17) AS c17_min, +avg(t.c17) AS c17_avg, +max(if(t.c18 > 0, t.c18, NULL)) AS c18_max, +min(if(t.c18 > 0, t.c18, NULL)) AS c18_min, +avg(if(t.c18 > 0, t.c18, NULL)) AS c18_avg, +sum(t.c19) AS c19, +sum(if(t.c18 > 0, t.c18, NULL)) AS c18, +sum(t.c16) AS c16, +sum(c23) AS c23, +sum(t.c17) AS c17, +sum(if(t.c24 > 0, t.c24, NULL)) AS c24, +c24 / c19 AS c14, +c24 / c17 AS c15, +median(if(isNotNull(c29) AND (t.c22 > 0), c13 * (t.c22 / c29), NULL)) AS c21, +sum(c22) AS c22 +FROM +( +SELECT +c27, +c39 AS c1, +c29, +c19, +c23, +c17, +c16, +c18, +c22, +c24, +c3, +c4, +c8, +c9, +c10, +c11, +c12, +c13, +c6, +c7 +FROM +( +SELECT +c27, +uniqExact(c30, c31) AS c19, +uniqExact(c30, c31, c32) AS c23, +uniqExactIf(c30, c31, c33 IN ('c37', 'c38')) AS c17, +countIf(c33 IN ('c37', 'c38')) AS c16, +countIf(c33 = 'c39') AS c18, +coalesce(sumIf(c29, c33 = 'c39'), 0) AS c22, +coalesce(sumIf(c37, c33 = 'c39'), 0) AS c24, +if((c18 > 0) AND (c19 > 0), c18 / c19, NULL) AS c3, +if(c17 != 0, c18 / c17, NULL) AS c4, +coalesce(avgIf(c34, (c34 > 0) AND (c33 IN ('c37', 'c38'))), NULL) AS c8, +coalesce(avgIf(c35, (c35 > 0) AND (c33 IN ('c37', 'c38'))), NULL) AS c9, +coalesce(avgIf(c34, (c34 > 0) AND (c33 = 'c39')), NULL) AS c10, +coalesce(avgIf(c35, (c35 > 0) AND (c33 = 'c39')), NULL) AS c11, +coalesce(avgIf(c37, c33 = 'c39'), NULL) AS c12, +coalesce(avgIf(c37 / c34, (c34 > 0) AND (c33 = 'c39')), NULL) AS c13, +coalesce(avgIf(c37, (c37 > 0) AND (c33 IN ('c37', 'c38'))), NULL) AS c6, +coalesce(minIf(c37, (c37 > 0) AND (c33 IN ('c37', 'c38')) AND (c37 > (c36 / 2))), NULL) AS c7 +FROM +( +SELECT +c27, +c30, +c32, +c31, +NULL AS c29, +NULL AS c33, +NULL AS c37, +NULL AS c34, +NULL AS c35 +FROM +( +SELECT +c27, +c30, +c32, +c31 +FROM database.t1 +PREWHERE ((c32 >= parseDateTimeBestEffort('2020-01-01')) AND (c32 <= parseDateTimeBestEffort('2020-01-01 23:59:59'))) AND (c27 IN +( +SELECT comp_c27 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +groupArray(c39) AS c39, +any(c40) AS c40, +any(c41) AS c41, +any(c42) AS c42, +any(c29) AS c29, +any(c43) AS c43, +any(c44) AS c44, +any(min_c32) AS min_c32, +any(max_c32) AS max_c32, +any(c45) AS c45, +any(c46) AS c46, +any(c38) AS c38, +any(c47) AS c47 +FROM +( +SELECT +c27 AS comp_c27, +if(comp_c27 = 0, toDate('2010-01-01'), toDate(minMerge(min_c32))) AS min_c32, +if(comp_c27 = 0, toDate(now()), toDate(maxMerge(max_c32))) + 1 AS max_c32, +NULL AS c39, +NULL AS c40, +NULL AS c41, +NULL AS c42, +NULL AS c29, +NULL AS c43, +NULL AS c44, +NULL AS c45, +NULL AS c46, +NULL AS c38, +NULL AS c47 +FROM database.table15 +GROUP BY comp_c27 +UNION ALL +SELECT +comp_c27, +NULL AS min_c32, +NULL AS max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38, +c47 +FROM +( +SELECT +c39, +comp_c27 AS c27, +comp_c27, +c40, +c41, +assumeNotNull(c45) AS c45, +assumeNotNull(c46) AS c46, +assumeNotNull(c38) AS c38, +joinGet('database.table14', 'c48', c40) AS c42, +joinGet('database.table14', 'c29', c40) AS c29, +joinGet('database.table14', 'c43', c40) AS c43, +joinGet('database.table14', 'property_c44', c40) AS c44, +splitByChar(',', assumeNotNull(joinGet('database.jointable13', 'prefix_c33', comp_c27))) AS c33s, +joinGet('database.jointable13', 'c47', comp_c27) AS c47 +FROM +( +SELECT +c39, +comp_c27, +joinGet('database.jointable13', 'c40', comp_c27) AS c40, +c41, +c45, +c46, +c38 +FROM +( +SELECT +c39, +arrayJoin(arrayMap(x -> toInt64(x), arrayFilter(x -> (length(x) > 0), splitByString(', ', c49)))) AS comp_c27, +c41, +c45, +c46, +c38 +FROM +( +SELECT +'c57' AS c39, +toString(c27) AS c49, +1 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE c27 IN (322) +UNION ALL +SELECT +'c58' AS c39, +arrayStringConcat(groupArray(toString(c27)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE chain_id IN +( +SELECT chain_id +FROM database.table12 +WHERE c27 IN (322) +) +UNION ALL +SELECT +'c59' AS c39, +assumeNotNull(c27s_str) AS c49, +0 AS c41, +c50 AS c45, +c51 AS c46, +c52 AS c38 +FROM +( +SELECT * +FROM table11 +WHERE c27 IN (322) +) AS c1s_c59 +WHERE c27 IN (322) +UNION ALL +SELECT +'superSupercalifragilisticexpialidocious' AS c39, +arrayStringConcat(groupArray(toString(c1_id)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table10 +WHERE c27 IN (322) +) AS table9 +) +) AS a +) AS table8 +) AS table7 +GROUP BY comp_c27 +) AS table6 +WHERE (parseDateTimeBestEffort('2020-01-01') >= min_c32) AND (max_c32 >= (parseDateTimeBestEffort('2021-05-02') - 2)) +) AS table5 +ARRAY JOIN c39 +WHERE isNotNull(c39) +) AS table4 +)) +WHERE c61 = 0 +) AS table25 +UNION ALL +SELECT +c27, +c30, +c32, +c31, +c29, +c33, +c37, +c34, +c35 +FROM +( +SELECT +c27, +c30, +c32, +'c37' AS c33, +coalesce(c37 * joinGet('database.table18', 'c60', concat(c26, '_', 'CH')), 0) AS c37, +if(c53 > 0, c53, 2) AS c53, +c54, +if(c29 > 0, c29, 1) AS c29, +c55, +c56, +datediff('day', c55, c56) AS c34, +datediff('day', c32, c55) AS c35, +c31 +FROM database.table24 +PREWHERE ((c32 >= parseDateTimeBestEffort('2020-01-01')) AND (c32 <= parseDateTimeBestEffort('2020-01-01 23:59:59'))) AND (c27 IN +( +SELECT comp_c27 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +groupArray(c39) AS c39, +any(c40) AS c40, +any(c41) AS c41, +any(c42) AS c42, +any(c29) AS c29, +any(c43) AS c43, +any(c44) AS c44, +any(min_c32) AS min_c32, +any(max_c32) AS max_c32, +any(c45) AS c45, +any(c46) AS c46, +any(c38) AS c38, +any(c47) AS c47 +FROM +( +SELECT +c27 AS comp_c27, +if(comp_c27 = 0, toDate('2010-01-01'), toDate(minMerge(min_c32))) AS min_c32, +if(comp_c27 = 0, toDate(now()), toDate(maxMerge(max_c32))) + 1 AS max_c32, +NULL AS c39, +NULL AS c40, +NULL AS c41, +NULL AS c42, +NULL AS c29, +NULL AS c43, +NULL AS c44, +NULL AS c45, +NULL AS c46, +NULL AS c38, +NULL AS c47 +FROM database.table15 +GROUP BY comp_c27 +UNION ALL +SELECT +comp_c27, +NULL AS min_c32, +NULL AS max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38, +c47 +FROM +( +SELECT +c39, +comp_c27 AS c27, +comp_c27, +c40, +c41, +assumeNotNull(c45) AS c45, +assumeNotNull(c46) AS c46, +assumeNotNull(c38) AS c38, +joinGet('database.table14', 'c48', c40) AS c42, +joinGet('database.table14', 'c29', c40) AS c29, +joinGet('database.table14', 'c43', c40) AS c43, +joinGet('database.table14', 'property_c44', c40) AS c44, +splitByChar(',', assumeNotNull(joinGet('database.jointable13', 'prefix_c33', comp_c27))) AS c33s, +joinGet('database.jointable13', 'c47', comp_c27) AS c47 +FROM +( +SELECT +c39, +comp_c27, +joinGet('database.jointable13', 'c40', comp_c27) AS c40, +c41, +c45, +c46, +c38 +FROM +( +SELECT +c39, +arrayJoin(arrayMap(x -> toInt64(x), arrayFilter(x -> (length(x) > 0), splitByString(', ', c49)))) AS comp_c27, +c41, +c45, +c46, +c38 +FROM +( +SELECT +'c57' AS c39, +toString(c27) AS c49, +1 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE c27 IN (322) +UNION ALL +SELECT +'c58' AS c39, +arrayStringConcat(groupArray(toString(c27)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE chain_id IN +( +SELECT chain_id +FROM database.table12 +WHERE c27 IN (322) +) +UNION ALL +SELECT +'c59' AS c39, +assumeNotNull(c27s_str) AS c49, +0 AS c41, +c50 AS c45, +c51 AS c46, +c52 AS c38 +FROM +( +SELECT * +FROM table11 +WHERE c27 IN (322) +) AS c1s_c59 +WHERE c27 IN (322) +UNION ALL +SELECT +'superSupercalifragilisticexpialidocious' AS c39, +arrayStringConcat(groupArray(toString(c1_id)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table10 +WHERE c27 IN (322) +) AS table9 +) +) AS a +) AS table8 +) AS table7 +GROUP BY comp_c27 +) AS table6 +WHERE (parseDateTimeBestEffort('2020-01-01') >= min_c32) AND (max_c32 >= (parseDateTimeBestEffort('2021-05-02') - 2)) +) AS table5 +ARRAY JOIN c39 +WHERE isNotNull(c39) +) AS table4 +)) +WHERE (c61 = 0) AND (c37 < (666 * (1 / joinGet('database.table18', 'c60', concat(c26, '_', 'CH'))))) +) AS table23 +UNION ALL +SELECT +c27, +c30, +c32, +c31, +c29, +c33, +c37, +c34, +c35 +FROM +( +SELECT +c27, +c30, +c32, +'c39' AS c33, +coalesce(c37 * joinGet('database.table18', 'c60', concat(c26, '_', 'CH')), 0) AS c37, +if(c53 > 0, c53, 2) AS c53, +c54, +if(c29 > 0, c29, 1) AS c29, +c55, +c56, +datediff('day', c55, c56) AS c34, +datediff('day', c32, c55) AS c35, +c31 +FROM database.table22 +PREWHERE ((c32 >= parseDateTimeBestEffort('2020-01-01')) AND (c32 <= parseDateTimeBestEffort('2020-01-01 23:59:59'))) AND (c27 IN +( +SELECT comp_c27 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +groupArray(c39) AS c39, +any(c40) AS c40, +any(c41) AS c41, +any(c42) AS c42, +any(c29) AS c29, +any(c43) AS c43, +any(c44) AS c44, +any(min_c32) AS min_c32, +any(max_c32) AS max_c32, +any(c45) AS c45, +any(c46) AS c46, +any(c38) AS c38, +any(c47) AS c47 +FROM +( +SELECT +c27 AS comp_c27, +if(comp_c27 = 0, toDate('2010-01-01'), toDate(minMerge(min_c32))) AS min_c32, +if(comp_c27 = 0, toDate(now()), toDate(maxMerge(max_c32))) + 1 AS max_c32, +NULL AS c39, +NULL AS c40, +NULL AS c41, +NULL AS c42, +NULL AS c29, +NULL AS c43, +NULL AS c44, +NULL AS c45, +NULL AS c46, +NULL AS c38, +NULL AS c47 +FROM database.table15 +GROUP BY comp_c27 +UNION ALL +SELECT +comp_c27, +NULL AS min_c32, +NULL AS max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38, +c47 +FROM +( +SELECT +c39, +comp_c27 AS c27, +comp_c27, +c40, +c41, +assumeNotNull(c45) AS c45, +assumeNotNull(c46) AS c46, +assumeNotNull(c38) AS c38, +joinGet('database.table14', 'c48', c40) AS c42, +joinGet('database.table14', 'c29', c40) AS c29, +joinGet('database.table14', 'c43', c40) AS c43, +joinGet('database.table14', 'property_c44', c40) AS c44, +splitByChar(',', assumeNotNull(joinGet('database.jointable13', 'prefix_c33', comp_c27))) AS c33s, +joinGet('database.jointable13', 'c47', comp_c27) AS c47 +FROM +( +SELECT +c39, +comp_c27, +joinGet('database.jointable13', 'c40', comp_c27) AS c40, +c41, +c45, +c46, +c38 +FROM +( +SELECT +c39, +arrayJoin(arrayMap(x -> toInt64(x), arrayFilter(x -> (length(x) > 0), splitByString(', ', c49)))) AS comp_c27, +c41, +c45, +c46, +c38 +FROM +( +SELECT +'c57' AS c39, +toString(c27) AS c49, +1 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE c27 IN (322) +UNION ALL +SELECT +'c58' AS c39, +arrayStringConcat(groupArray(toString(c27)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE chain_id IN +( +SELECT chain_id +FROM database.table12 +WHERE c27 IN (322) +) +UNION ALL +SELECT +'c59' AS c39, +assumeNotNull(c27s_str) AS c49, +0 AS c41, +c50 AS c45, +c51 AS c46, +c52 AS c38 +FROM +( +SELECT * +FROM table11 +WHERE c27 IN (322) +) AS c1s_c59 +WHERE c27 IN (322) +UNION ALL +SELECT +'superSupercalifragilisticexpialidocious' AS c39, +arrayStringConcat(groupArray(toString(c1_id)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table10 +WHERE c27 IN (322) +) AS table9 +) +) AS a +) AS table8 +) AS table7 +GROUP BY comp_c27 +) AS table6 +WHERE (parseDateTimeBestEffort('2020-01-01') >= min_c32) AND (max_c32 >= (parseDateTimeBestEffort('2021-05-02') - 2)) +) AS table5 +ARRAY JOIN c39 +WHERE isNotNull(c39) +) AS table4 +)) +WHERE (c61 = 0) AND (c37 < (666 * (1 / joinGet('database.table18', 'c60', concat(c26, '_', 'CH'))))) +) AS table21 +) AS table20 +ALL LEFT JOIN +( +SELECT +c27, +avgMerge(avg_c37) * joinGet('database.table18', 'c60', concat('USD', '_', 'CH')) AS c36 +FROM database.table19 +PREWHERE c27 IN +( +SELECT comp_c27 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +groupArray(c39) AS c39, +any(c40) AS c40, +any(c41) AS c41, +any(c42) AS c42, +any(c29) AS c29, +any(c43) AS c43, +any(c44) AS c44, +any(min_c32) AS min_c32, +any(max_c32) AS max_c32, +any(c45) AS c45, +any(c46) AS c46, +any(c38) AS c38, +any(c47) AS c47 +FROM +( +SELECT +c27 AS comp_c27, +if(comp_c27 = 0, toDate('2010-01-01'), toDate(minMerge(min_c32))) AS min_c32, +if(comp_c27 = 0, toDate(now()), toDate(maxMerge(max_c32))) + 1 AS max_c32, +NULL AS c39, +NULL AS c40, +NULL AS c41, +NULL AS c42, +NULL AS c29, +NULL AS c43, +NULL AS c44, +NULL AS c45, +NULL AS c46, +NULL AS c38, +NULL AS c47 +FROM database.table15 +GROUP BY comp_c27 +UNION ALL +SELECT +comp_c27, +NULL AS min_c32, +NULL AS max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38, +c47 +FROM +( +SELECT +c39, +comp_c27 AS c27, +comp_c27, +c40, +c41, +assumeNotNull(c45) AS c45, +assumeNotNull(c46) AS c46, +assumeNotNull(c38) AS c38, +joinGet('database.table14', 'c48', c40) AS c42, +joinGet('database.table14', 'c29', c40) AS c29, +joinGet('database.table14', 'c43', c40) AS c43, +joinGet('database.table14', 'property_c44', c40) AS c44, +splitByChar(',', assumeNotNull(joinGet('database.jointable13', 'prefix_c33', comp_c27))) AS c33s, +joinGet('database.jointable13', 'c47', comp_c27) AS c47 +FROM +( +SELECT +c39, +comp_c27, +joinGet('database.jointable13', 'c40', comp_c27) AS c40, +c41, +c45, +c46, +c38 +FROM +( +SELECT +c39, +arrayJoin(arrayMap(x -> toInt64(x), arrayFilter(x -> (length(x) > 0), splitByString(', ', c49)))) AS comp_c27, +c41, +c45, +c46, +c38 +FROM +( +SELECT +'c57' AS c39, +toString(c27) AS c49, +1 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE c27 IN (322) +UNION ALL +SELECT +'c58' AS c39, +arrayStringConcat(groupArray(toString(c27)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE chain_id IN +( +SELECT chain_id +FROM database.table12 +WHERE c27 IN (322) +) +UNION ALL +SELECT +'c59' AS c39, +assumeNotNull(c27s_str) AS c49, +0 AS c41, +c50 AS c45, +c51 AS c46, +c52 AS c38 +FROM +( +SELECT * +FROM table11 +WHERE c27 IN (322) +) AS c1s_c59 +WHERE c27 IN (322) +UNION ALL +SELECT +'superSupercalifragilisticexpialidocious' AS c39, +arrayStringConcat(groupArray(toString(c1_id)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table10 +WHERE c27 IN (322) +) AS table9 +) +) AS a +) AS table8 +) AS table7 +GROUP BY comp_c27 +) AS table6 +WHERE (parseDateTimeBestEffort('2020-01-01') >= min_c32) AND (max_c32 >= (parseDateTimeBestEffort('2021-05-02') - 2)) +) AS table5 +ARRAY JOIN c39 +WHERE isNotNull(c39) +) AS table4 +) +WHERE date > (now() - toIntervalMonth(3)) +GROUP BY c27 +) AS table17 USING (c27) +GROUP BY c27 +) AS table16 +ALL LEFT JOIN +( +SELECT +comp_c27 AS c27, +assumeNotNull(c39) AS c39, +c29 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +groupArray(c39) AS c39, +any(c40) AS c40, +any(c41) AS c41, +any(c42) AS c42, +any(c29) AS c29, +any(c43) AS c43, +any(c44) AS c44, +any(min_c32) AS min_c32, +any(max_c32) AS max_c32, +any(c45) AS c45, +any(c46) AS c46, +any(c38) AS c38, +any(c47) AS c47 +FROM +( +SELECT +c27 AS comp_c27, +if(comp_c27 = 0, toDate('2010-01-01'), toDate(minMerge(min_c32))) AS min_c32, +if(comp_c27 = 0, toDate(now()), toDate(maxMerge(max_c32))) + 1 AS max_c32, +NULL AS c39, +NULL AS c40, +NULL AS c41, +NULL AS c42, +NULL AS c29, +NULL AS c43, +NULL AS c44, +NULL AS c45, +NULL AS c46, +NULL AS c38, +NULL AS c47 +FROM database.table15 +GROUP BY comp_c27 +UNION ALL +SELECT +comp_c27, +NULL AS min_c32, +NULL AS max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38, +c47 +FROM +( +SELECT +c39, +comp_c27 AS c27, +comp_c27, +c40, +c41, +assumeNotNull(c45) AS c45, +assumeNotNull(c46) AS c46, +assumeNotNull(c38) AS c38, +joinGet('database.table14', 'c48', c40) AS c42, +joinGet('database.table14', 'c29', c40) AS c29, +joinGet('database.table14', 'c43', c40) AS c43, +joinGet('database.table14', 'property_c44', c40) AS c44, +splitByChar(',', assumeNotNull(joinGet('database.jointable13', 'prefix_c33', comp_c27))) AS c33s, +joinGet('database.jointable13', 'c47', comp_c27) AS c47 +FROM +( +SELECT +c39, +comp_c27, +joinGet('database.jointable13', 'c40', comp_c27) AS c40, +c41, +c45, +c46, +c38 +FROM +( +SELECT +c39, +arrayJoin(arrayMap(x -> toInt64(x), arrayFilter(x -> (length(x) > 0), splitByString(', ', c49)))) AS comp_c27, +c41, +c45, +c46, +c38 +FROM +( +SELECT +'c57' AS c39, +toString(c27) AS c49, +1 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE c27 IN (322) +UNION ALL +SELECT +'c58' AS c39, +arrayStringConcat(groupArray(toString(c27)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE chain_id IN +( +SELECT chain_id +FROM database.table12 +WHERE c27 IN (322) +) +UNION ALL +SELECT +'c59' AS c39, +assumeNotNull(c27s_str) AS c49, +0 AS c41, +c50 AS c45, +c51 AS c46, +c52 AS c38 +FROM +( +SELECT * +FROM table11 +WHERE c27 IN (322) +) AS c1s_c59 +WHERE c27 IN (322) +UNION ALL +SELECT +'superSupercalifragilisticexpialidocious' AS c39, +arrayStringConcat(groupArray(toString(c1_id)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table10 +WHERE c27 IN (322) +) AS table9 +) +) AS a +) AS table8 +) AS table7 +GROUP BY comp_c27 +) AS table6 +WHERE (parseDateTimeBestEffort('2020-01-01') >= min_c32) AND (max_c32 >= (parseDateTimeBestEffort('2021-05-02') - 2)) +) AS table5 +ARRAY JOIN c39 +WHERE isNotNull(c39) +) AS table4 +) USING (c27) +) AS t +ALL LEFT JOIN +( +SELECT +c1, +c2 +FROM +( +SELECT +c39 AS c1, +groupArray(comp_c27) AS c49, +multiIf(c1 = 'c58', if(length(c49) <= 2, 0, 1), c1 = 'c57', 1, if(length(c49) <= 3, 0, 1)) AS c2 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +groupArray(c39) AS c39, +any(c40) AS c40, +any(c41) AS c41, +any(c42) AS c42, +any(c29) AS c29, +any(c43) AS c43, +any(c44) AS c44, +any(min_c32) AS min_c32, +any(max_c32) AS max_c32, +any(c45) AS c45, +any(c46) AS c46, +any(c38) AS c38, +any(c47) AS c47 +FROM +( +SELECT +c27 AS comp_c27, +if(comp_c27 = 0, toDate('2010-01-01'), toDate(minMerge(min_c32))) AS min_c32, +if(comp_c27 = 0, toDate(now()), toDate(maxMerge(max_c32))) + 1 AS max_c32, +NULL AS c39, +NULL AS c40, +NULL AS c41, +NULL AS c42, +NULL AS c29, +NULL AS c43, +NULL AS c44, +NULL AS c45, +NULL AS c46, +NULL AS c38, +NULL AS c47 +FROM database.table15 +GROUP BY comp_c27 +UNION ALL +SELECT +comp_c27, +NULL AS min_c32, +NULL AS max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38, +c47 +FROM +( +SELECT +c39, +comp_c27 AS c27, +comp_c27, +c40, +c41, +assumeNotNull(c45) AS c45, +assumeNotNull(c46) AS c46, +assumeNotNull(c38) AS c38, +joinGet('database.table14', 'c48', c40) AS c42, +joinGet('database.table14', 'c29', c40) AS c29, +joinGet('database.table14', 'c43', c40) AS c43, +joinGet('database.table14', 'property_c44', c40) AS c44, +splitByChar(',', assumeNotNull(joinGet('database.jointable13', 'prefix_c33', comp_c27))) AS c33s, +joinGet('database.jointable13', 'c47', comp_c27) AS c47 +FROM +( +SELECT +c39, +comp_c27, +joinGet('database.jointable13', 'c40', comp_c27) AS c40, +c41, +c45, +c46, +c38 +FROM +( +SELECT +c39, +arrayJoin(arrayMap(x -> toInt64(x), arrayFilter(x -> (length(x) > 0), splitByString(', ', c49)))) AS comp_c27, +c41, +c45, +c46, +c38 +FROM +( +SELECT +'c57' AS c39, +toString(c27) AS c49, +1 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE c27 IN (322) +UNION ALL +SELECT +'c58' AS c39, +arrayStringConcat(groupArray(toString(c27)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE chain_id IN +( +SELECT chain_id +FROM database.table12 +WHERE c27 IN (322) +) +UNION ALL +SELECT +'c59' AS c39, +assumeNotNull(c27s_str) AS c49, +0 AS c41, +c50 AS c45, +c51 AS c46, +c52 AS c38 +FROM +( +SELECT * +FROM table11 +WHERE c27 IN (322) +) AS c1s_c59 +WHERE c27 IN (322) +UNION ALL +SELECT +'superSupercalifragilisticexpialidocious' AS c39, +arrayStringConcat(groupArray(toString(c1_id)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table10 +WHERE c27 IN (322) +) AS table9 +) +) AS a +) AS table8 +) AS table7 +GROUP BY comp_c27 +) AS table6 +WHERE (parseDateTimeBestEffort('2020-01-01') >= min_c32) AND (max_c32 >= (parseDateTimeBestEffort('2021-05-02') - 2)) +) AS table5 +ARRAY JOIN c39 +WHERE isNotNull(c39) +) AS table4 +GROUP BY c39 +) AS table3 +) USING (c1) +GROUP BY +c1, +c2 +) AS table2 +ORDER BY c1 ASC +) AS table1 +UNION ALL +SELECT * +FROM +( +SELECT +c1, +c2, +c3_q[1] AS c3_q1, +c3_q[3] AS c3_q3, +c3_q[2] AS c3_median, +least(c3_max, c3_q3 + (1.5 * (c3_q3 - c3_q1))) AS c3_max, +greatest(c3_min, c3_q1 - (1.5 * (c3_q3 - c3_q1))) AS c3_min, +c3_avg, +c4_q[1] AS c4_q1, +c4_q[3] AS c4_q3, +c4_q[2] AS c4_median, +least(c4_max, c4_q3 + (1.5 * (c4_q3 - c4_q1))) AS c4_max, +greatest(c4_min, c4_q1 - (1.5 * (c4_q3 - c4_q1))) AS c4_min, +c4_avg, +c5_q[1] AS c5_q1, +c5_q[3] AS c5_q3, +c5_q[2] AS c5_median, +least(c5_max, c5_q3 + (1.5 * (c5_q3 - c5_q1))) AS c5_max, +greatest(c5_min, c5_q1 - (1.5 * (c5_q3 - c5_q1))) AS c5_min, +c5_avg, +c6_q[1] AS c6_q1, +c6_q[3] AS c6_q3, +c6_q[2] AS c6_median, +least(c6_max, c6_q3 + (1.5 * (c6_q3 - c6_q1))) AS c6_max, +greatest(c6_min, c6_q1 - (1.5 * (c6_q3 - c6_q1))) AS c6_min, +c6_avg, +c7_q[1] AS c7_q1, +c7_q[3] AS c7_q3, +c7_q[2] AS c7_median, +least(c7_max, c7_q3 + (1.5 * (c7_q3 - c7_q1))) AS c7_max, +greatest(c7_min, c7_q1 - (1.5 * (c7_q3 - c7_q1))) AS c7_min, +c7_avg, +c8_q[1] AS c8_q1, +c8_q[3] AS c8_q3, +c8_q[2] AS c8_median, +least(c8_max, c8_q3 + (1.5 * (c8_q3 - c8_q1))) AS c8_max, +greatest(c8_min, c8_q1 - (1.5 * (c8_q3 - c8_q1))) AS c8_min, +c8_avg, +c9_q[1] AS c9_q1, +c9_q[3] AS c9_q3, +c9_q[2] AS c9_median, +least(c9_max, c9_q3 + (1.5 * (c9_q3 - c9_q1))) AS c9_max, +greatest(c9_min, c9_q1 - (1.5 * (c9_q3 - c9_q1))) AS c9_min, +c9_avg, +c10_q[1] AS c10_q1, +c10_q[3] AS c10_q3, +c10_q[2] AS c10_median, +least(c10_max, c10_q3 + (1.5 * (c10_q3 - c10_q1))) AS c10_max, +greatest(c10_min, c10_q1 - (1.5 * (c10_q3 - c10_q1))) AS c10_min, +c10_avg, +c10_avg, +c11_q[1] AS c11_q1, +c11_q[3] AS c11_q3, +c11_q[2] AS c11_median, +least(c11_max, c11_q3 + (1.5 * (c11_q3 - c11_q1))) AS c11_max, +greatest(c11_min, c11_q1 - (1.5 * (c11_q3 - c11_q1))) AS c11_min, +c11_avg, +c12_q[1] AS c12_q1, +c12_q[3] AS c12_q3, +c12_q[2] AS c12_median, +least(c12_max, c12_q3 + (1.5 * (c12_q3 - c12_q1))) AS c12_max, +greatest(c12_min, c12_q1 - (1.5 * (c12_q3 - c12_q1))) AS c12_min, +c12_avg, +c13_q[1] AS c13_q1, +c13_q[3] AS c13_q3, +c13_q[2] AS c13_median, +least(c13_max, c13_q3 + (1.5 * (c13_q3 - c13_q1))) AS c13_max, +greatest(c13_min, c13_q1 - (1.5 * (c13_q3 - c13_q1))) AS c13_min, +c13_avg, +c14_q[1] AS c14_q1, +c14_q[3] AS c14_q3, +c14_q[2] AS c14_median, +least(c14_max, c14_q3 + (1.5 * (c14_q3 - c14_q1))) AS c14_max, +greatest(c14_min, c14_q1 - (1.5 * (c14_q3 - c14_q1))) AS c14_min, +c14_avg, +c15_q[1] AS c15_q1, +c15_q[3] AS c15_q3, +c15_q[2] AS c15_median, +least(c15_max, c15_q3 + (1.5 * (c15_q3 - c15_q1))) AS c15_max, +greatest(c15_min, c15_q1 - (1.5 * (c15_q3 - c15_q1))) AS c15_min, +c15_avg, +c16_q[1] AS c16_q1, +c16_q[3] AS c16_q3, +c16_q[2] AS c16_median, +least(toFloat64(c16_max), c16_q3 + (1.5 * (c16_q3 - c16_q1))) AS c16_max, +greatest(toFloat64(c16_min), c16_q1 - (1.5 * (c16_q3 - c16_q1))) AS c16_min, +c16_avg, +c17_q[1] AS c17_q1, +c17_q[3] AS c17_q3, +c17_q[2] AS c17_median, +least(toFloat64(c17_max), c17_q3 + (1.5 * (c17_q3 - c17_q1))) AS c17_max, +greatest(toFloat64(c17_min), c17_q1 - (1.5 * (c17_q3 - c17_q1))) AS c17_min, +c17_avg, +c18_q[1] AS c18_q1, +c18_q[3] AS c18_q3, +c18_q[2] AS c18_median, +least(toFloat64(c18_max), c18_q3 + (1.5 * (c18_q3 - c18_q1))) AS c18_max, +greatest(toFloat64(c18_min), c18_q1 - (1.5 * (c18_q3 - c18_q1))) AS c18_min, +c18_avg, +round(if(c19 != 0, c24 / c19, 0), 2) AS c20, +c21, +c22, +c23 AS c23, +c19 AS c19, +c16 AS c16, +c17 AS c17, +c18 AS c18, +round(c24, 2) AS c24, +round(if(c17 != 0, c24 / c17, 0), 2) AS c25, +'CH' AS c26 +FROM +( +SELECT +c1, +c2, +groupUniqArray(c27) AS c28, +groupUniqArrayIf(c27, isNotNull(c29)) AS c28_with_c29, +quantiles(0.25, 0.5, 0.75)(if(c3 > 0, c3, NULL)) AS c3_q, +quantiles(0.25, 0.5, 0.75)(if(c4 > 0, c4, NULL)) AS c4_q, +quantiles(0.25, 0.5, 0.75)(t.c17 / t.c19) AS c5_q, +quantiles(0.25, 0.5, 0.75)(c6) AS c6_q, +quantiles(0.25, 0.5, 0.75)(c7) AS c7_q, +quantiles(0.25, 0.5, 0.75)(c8) AS c8_q, +quantiles(0.25, 0.5, 0.75)(c9) AS c9_q, +quantiles(0.25, 0.5, 0.75)(c10) AS c10_q, +quantiles(0.25, 0.5, 0.75)(c11) AS c11_q, +quantiles(0.25, 0.5, 0.75)(c12) AS c12_q, +quantiles(0.25, 0.5, 0.75)(c13) AS c13_q, +quantiles(0.25, 0.5, 0.75)(if(t.c24 > 0, t.c24, NULL) / t.c19) AS c14_q, +quantiles(0.25, 0.5, 0.75)(if(t.c24 > 0, t.c24, NULL) / t.c17) AS c15_q, +quantiles(0.25, 0.5, 0.75)(t.c16) AS c16_q, +quantiles(0.25, 0.5, 0.75)(t.c17) AS c17_q, +quantiles(0.25, 0.5, 0.75)(if(t.c18 > 0, t.c18, NULL)) AS c18_q, +max(if(c3 > 0, c3, NULL)) AS c3_max, +min(if(c3 > 0, c3, NULL)) AS c3_min, +avg(if(c3 > 0, c3, NULL)) AS c3_avg, +max(if(c4 > 0, c4, NULL)) AS c4_max, +min(if(c4 > 0, c4, NULL)) AS c4_min, +avg(if(c4 > 0, c4, NULL)) AS c4_avg, +max(t.c17 / t.c19) AS c5_max, +min(t.c17 / t.c19) AS c5_min, +avg(t.c17 / t.c19) AS c5_avg, +max(if(c6 > 0, c6, NULL)) AS c6_max, +min(if(c6 > 0, c6, NULL)) AS c6_min, +avg(if(c6 > 0, c6, NULL)) AS c6_avg, +max(if(c7 > 0, c7, NULL)) AS c7_max, +min(if(c7 > 0, c7, NULL)) AS c7_min, +avg(if(c7 > 0, c7, NULL)) AS c7_avg, +max(if(c10 > 0, c10, NULL)) AS c10_max, +min(if(c10 > 0, c10, NULL)) AS c10_min, +avg(if(c10 > 0, c10, NULL)) AS c10_avg, +max(if(c8 > 0, c8, NULL)) AS c8_max, +min(if(c8 > 0, c8, NULL)) AS c8_min, +avg(if(c8 > 0, c8, NULL)) AS c8_avg, +max(if(c9 > 0, c9, NULL)) AS c9_max, +min(if(c9 > 0, c9, NULL)) AS c9_min, +avg(if(c9 > 0, c9, NULL)) AS c9_avg, +max(if(c11 > 0, c11, NULL)) AS c11_max, +min(if(c11 > 0, c11, NULL)) AS c11_min, +avg(if(c11 > 0, c11, NULL)) AS c11_avg, +max(if(c12 > 0, c12, NULL)) AS c12_max, +min(if(c12 > 0, c12, NULL)) AS c12_min, +avg(if(c12 > 0, c12, NULL)) AS c12_avg, +max(if(c13 > 0, c13, NULL)) AS c13_max, +min(if(c13 > 0, c13, NULL)) AS c13_min, +avg(if(c13 > 0, c13, NULL)) AS c13_avg, +max(if(t.c24 > 0, t.c24, NULL) / t.c19) AS c14_max, +min(if(t.c24 > 0, t.c24, NULL) / t.c19) AS c14_min, +avg(if(t.c24 > 0, t.c24, NULL) / t.c19) AS c14_avg, +max(if(t.c24 > 0, t.c24, NULL) / t.c17) AS c15_max, +min(if(t.c24 > 0, t.c24, NULL) / t.c17) AS c15_min, +avg(if(t.c24 > 0, t.c24, NULL) / t.c17) AS c15_avg, +max(t.c16) AS c16_max, +min(t.c16) AS c16_min, +avg(t.c16) AS c16_avg, +max(t.c17) AS c17_max, +min(t.c17) AS c17_min, +avg(t.c17) AS c17_avg, +max(if(t.c18 > 0, t.c18, NULL)) AS c18_max, +min(if(t.c18 > 0, t.c18, NULL)) AS c18_min, +avg(if(t.c18 > 0, t.c18, NULL)) AS c18_avg, +sum(t.c19) AS c19, +sum(if(t.c18 > 0, t.c18, NULL)) AS c18, +sum(t.c16) AS c16, +sum(c23) AS c23, +sum(t.c17) AS c17, +sum(if(t.c24 > 0, t.c24, NULL)) AS c24, +c24 / c19 AS c14, +c24 / c17 AS c15, +median(if(isNotNull(c29) AND (t.c22 > 0), c13 * (t.c22 / c29), NULL)) AS c21, +sum(c22) AS c22 +FROM +( +SELECT +c27, +c39 AS c1, +c29, +c19, +c23, +c17, +c16, +c18, +c22, +c24, +c3, +c4, +c8, +c9, +c10, +c11, +c12, +c13, +c6, +c7 +FROM +( +SELECT +c27, +uniqExact(c30, c31) AS c19, +uniqExact(c30, c31, c32) AS c23, +uniqExactIf(c30, c31, c33 IN ('c37', 'c38')) AS c17, +countIf(c33 IN ('c37', 'c38')) AS c16, +countIf(c33 = 'c39') AS c18, +coalesce(sumIf(c29, c33 = 'c39'), 0) AS c22, +coalesce(sumIf(c37, c33 = 'c39'), 0) AS c24, +if((c18 > 0) AND (c19 > 0), c18 / c19, NULL) AS c3, +if(c17 != 0, c18 / c17, NULL) AS c4, +coalesce(avgIf(c34, (c34 > 0) AND (c33 IN ('c37', 'c38'))), NULL) AS c8, +coalesce(avgIf(c35, (c35 > 0) AND (c33 IN ('c37', 'c38'))), NULL) AS c9, +coalesce(avgIf(c34, (c34 > 0) AND (c33 = 'c39')), NULL) AS c10, +coalesce(avgIf(c35, (c35 > 0) AND (c33 = 'c39')), NULL) AS c11, +coalesce(avgIf(c37, c33 = 'c39'), NULL) AS c12, +coalesce(avgIf(c37 / c34, (c34 > 0) AND (c33 = 'c39')), NULL) AS c13, +coalesce(avgIf(c37, (c37 > 0) AND (c33 IN ('c37', 'c38'))), NULL) AS c6, +coalesce(minIf(c37, (c37 > 0) AND (c33 IN ('c37', 'c38')) AND (c37 > (c36 / 2))), NULL) AS c7 +FROM +( +SELECT +c27, +c30, +c32, +c31, +NULL AS c29, +NULL AS c33, +NULL AS c37, +NULL AS c34, +NULL AS c35 +FROM +( +SELECT +c27, +c30, +c32, +c31 +FROM database.t1 +PREWHERE ((c32 >= parseDateTimeBestEffort('2020-01-01')) AND (c32 <= parseDateTimeBestEffort('2020-01-01 23:59:59'))) AND (c27 IN +( +SELECT comp_c27 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +groupArray(c39) AS c39, +any(c40) AS c40, +any(c41) AS c41, +any(c42) AS c42, +any(c29) AS c29, +any(c43) AS c43, +any(c44) AS c44, +any(min_c32) AS min_c32, +any(max_c32) AS max_c32, +any(c45) AS c45, +any(c46) AS c46, +any(c38) AS c38, +any(c47) AS c47 +FROM +( +SELECT +c27 AS comp_c27, +if(comp_c27 = 0, toDate('2010-01-01'), toDate(minMerge(min_c32))) AS min_c32, +if(comp_c27 = 0, toDate(now()), toDate(maxMerge(max_c32))) + 1 AS max_c32, +NULL AS c39, +NULL AS c40, +NULL AS c41, +NULL AS c42, +NULL AS c29, +NULL AS c43, +NULL AS c44, +NULL AS c45, +NULL AS c46, +NULL AS c38, +NULL AS c47 +FROM database.table15 +GROUP BY comp_c27 +UNION ALL +SELECT +comp_c27, +NULL AS min_c32, +NULL AS max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38, +c47 +FROM +( +SELECT +c39, +comp_c27 AS c27, +comp_c27, +c40, +c41, +assumeNotNull(c45) AS c45, +assumeNotNull(c46) AS c46, +assumeNotNull(c38) AS c38, +joinGet('database.table14', 'c48', c40) AS c42, +joinGet('database.table14', 'c29', c40) AS c29, +joinGet('database.table14', 'c43', c40) AS c43, +joinGet('database.table14', 'property_c44', c40) AS c44, +splitByChar(',', assumeNotNull(joinGet('database.jointable13', 'prefix_c33', comp_c27))) AS c33s, +joinGet('database.jointable13', 'c47', comp_c27) AS c47 +FROM +( +SELECT +c39, +comp_c27, +joinGet('database.jointable13', 'c40', comp_c27) AS c40, +c41, +c45, +c46, +c38 +FROM +( +SELECT +c39, +arrayJoin(arrayMap(x -> toInt64(x), arrayFilter(x -> (length(x) > 0), splitByString(', ', c49)))) AS comp_c27, +c41, +c45, +c46, +c38 +FROM +( +SELECT +'c57' AS c39, +toString(c27) AS c49, +1 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE c27 IN (322) +UNION ALL +SELECT +'c58' AS c39, +arrayStringConcat(groupArray(toString(c27)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE chain_id IN +( +SELECT chain_id +FROM database.table12 +WHERE c27 IN (322) +) +UNION ALL +SELECT +'c59' AS c39, +assumeNotNull(c27s_str) AS c49, +0 AS c41, +c50 AS c45, +c51 AS c46, +c52 AS c38 +FROM +( +SELECT * +FROM table11 +WHERE c27 IN (322) +) AS c1s_c59 +WHERE c27 IN (322) +UNION ALL +SELECT +'superSupercalifragilisticexpialidocious' AS c39, +arrayStringConcat(groupArray(toString(c1_id)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table10 +WHERE c27 IN (322) +) AS table9 +) +) AS a +) AS table8 +) AS table7 +GROUP BY comp_c27 +) AS table6 +WHERE (parseDateTimeBestEffort('2020-01-01') >= min_c32) AND (max_c32 >= (parseDateTimeBestEffort('2021-05-02') - 2)) +) AS table5 +ARRAY JOIN c39 +WHERE isNotNull(c39) +) AS table4 +)) +WHERE c61 = 0 +) AS table25 +UNION ALL +SELECT +c27, +c30, +c32, +c31, +c29, +c33, +c37, +c34, +c35 +FROM +( +SELECT +c27, +c30, +c32, +'c37' AS c33, +coalesce(c37 * joinGet('database.table18', 'c60', concat(c26, '_', 'CH')), 0) AS c37, +if(c53 > 0, c53, 2) AS c53, +c54, +if(c29 > 0, c29, 1) AS c29, +c55, +c56, +datediff('day', c55, c56) AS c34, +datediff('day', c32, c55) AS c35, +c31 +FROM database.table24 +PREWHERE ((c32 >= parseDateTimeBestEffort('2020-01-01')) AND (c32 <= parseDateTimeBestEffort('2020-01-01 23:59:59'))) AND (c27 IN +( +SELECT comp_c27 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +groupArray(c39) AS c39, +any(c40) AS c40, +any(c41) AS c41, +any(c42) AS c42, +any(c29) AS c29, +any(c43) AS c43, +any(c44) AS c44, +any(min_c32) AS min_c32, +any(max_c32) AS max_c32, +any(c45) AS c45, +any(c46) AS c46, +any(c38) AS c38, +any(c47) AS c47 +FROM +( +SELECT +c27 AS comp_c27, +if(comp_c27 = 0, toDate('2010-01-01'), toDate(minMerge(min_c32))) AS min_c32, +if(comp_c27 = 0, toDate(now()), toDate(maxMerge(max_c32))) + 1 AS max_c32, +NULL AS c39, +NULL AS c40, +NULL AS c41, +NULL AS c42, +NULL AS c29, +NULL AS c43, +NULL AS c44, +NULL AS c45, +NULL AS c46, +NULL AS c38, +NULL AS c47 +FROM database.table15 +GROUP BY comp_c27 +UNION ALL +SELECT +comp_c27, +NULL AS min_c32, +NULL AS max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38, +c47 +FROM +( +SELECT +c39, +comp_c27 AS c27, +comp_c27, +c40, +c41, +assumeNotNull(c45) AS c45, +assumeNotNull(c46) AS c46, +assumeNotNull(c38) AS c38, +joinGet('database.table14', 'c48', c40) AS c42, +joinGet('database.table14', 'c29', c40) AS c29, +joinGet('database.table14', 'c43', c40) AS c43, +joinGet('database.table14', 'property_c44', c40) AS c44, +splitByChar(',', assumeNotNull(joinGet('database.jointable13', 'prefix_c33', comp_c27))) AS c33s, +joinGet('database.jointable13', 'c47', comp_c27) AS c47 +FROM +( +SELECT +c39, +comp_c27, +joinGet('database.jointable13', 'c40', comp_c27) AS c40, +c41, +c45, +c46, +c38 +FROM +( +SELECT +c39, +arrayJoin(arrayMap(x -> toInt64(x), arrayFilter(x -> (length(x) > 0), splitByString(', ', c49)))) AS comp_c27, +c41, +c45, +c46, +c38 +FROM +( +SELECT +'c57' AS c39, +toString(c27) AS c49, +1 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE c27 IN (322) +UNION ALL +SELECT +'c58' AS c39, +arrayStringConcat(groupArray(toString(c27)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE chain_id IN +( +SELECT chain_id +FROM database.table12 +WHERE c27 IN (322) +) +UNION ALL +SELECT +'c59' AS c39, +assumeNotNull(c27s_str) AS c49, +0 AS c41, +c50 AS c45, +c51 AS c46, +c52 AS c38 +FROM +( +SELECT * +FROM table11 +WHERE c27 IN (322) +) AS c1s_c59 +WHERE c27 IN (322) +UNION ALL +SELECT +'superSupercalifragilisticexpialidocious' AS c39, +arrayStringConcat(groupArray(toString(c1_id)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table10 +WHERE c27 IN (322) +) AS table9 +) +) AS a +) AS table8 +) AS table7 +GROUP BY comp_c27 +) AS table6 +WHERE (parseDateTimeBestEffort('2020-01-01') >= min_c32) AND (max_c32 >= (parseDateTimeBestEffort('2021-05-02') - 2)) +) AS table5 +ARRAY JOIN c39 +WHERE isNotNull(c39) +) AS table4 +)) +WHERE (c61 = 0) AND (c37 < (666 * (1 / joinGet('database.table18', 'c60', concat(c26, '_', 'CH'))))) +) AS table23 +UNION ALL +SELECT +c27, +c30, +c32, +c31, +c29, +c33, +c37, +c34, +c35 +FROM +( +SELECT +c27, +c30, +c32, +'c39' AS c33, +coalesce(c37 * joinGet('database.table18', 'c60', concat(c26, '_', 'CH')), 0) AS c37, +if(c53 > 0, c53, 2) AS c53, +c54, +if(c29 > 0, c29, 1) AS c29, +c55, +c56, +datediff('day', c55, c56) AS c34, +datediff('day', c32, c55) AS c35, +c31 +FROM database.table22 +PREWHERE ((c32 >= parseDateTimeBestEffort('2020-01-01')) AND (c32 <= parseDateTimeBestEffort('2020-01-01 23:59:59'))) AND (c27 IN +( +SELECT comp_c27 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +groupArray(c39) AS c39, +any(c40) AS c40, +any(c41) AS c41, +any(c42) AS c42, +any(c29) AS c29, +any(c43) AS c43, +any(c44) AS c44, +any(min_c32) AS min_c32, +any(max_c32) AS max_c32, +any(c45) AS c45, +any(c46) AS c46, +any(c38) AS c38, +any(c47) AS c47 +FROM +( +SELECT +c27 AS comp_c27, +if(comp_c27 = 0, toDate('2010-01-01'), toDate(minMerge(min_c32))) AS min_c32, +if(comp_c27 = 0, toDate(now()), toDate(maxMerge(max_c32))) + 1 AS max_c32, +NULL AS c39, +NULL AS c40, +NULL AS c41, +NULL AS c42, +NULL AS c29, +NULL AS c43, +NULL AS c44, +NULL AS c45, +NULL AS c46, +NULL AS c38, +NULL AS c47 +FROM database.table15 +GROUP BY comp_c27 +UNION ALL +SELECT +comp_c27, +NULL AS min_c32, +NULL AS max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38, +c47 +FROM +( +SELECT +c39, +comp_c27 AS c27, +comp_c27, +c40, +c41, +assumeNotNull(c45) AS c45, +assumeNotNull(c46) AS c46, +assumeNotNull(c38) AS c38, +joinGet('database.table14', 'c48', c40) AS c42, +joinGet('database.table14', 'c29', c40) AS c29, +joinGet('database.table14', 'c43', c40) AS c43, +joinGet('database.table14', 'property_c44', c40) AS c44, +splitByChar(',', assumeNotNull(joinGet('database.jointable13', 'prefix_c33', comp_c27))) AS c33s, +joinGet('database.jointable13', 'c47', comp_c27) AS c47 +FROM +( +SELECT +c39, +comp_c27, +joinGet('database.jointable13', 'c40', comp_c27) AS c40, +c41, +c45, +c46, +c38 +FROM +( +SELECT +c39, +arrayJoin(arrayMap(x -> toInt64(x), arrayFilter(x -> (length(x) > 0), splitByString(', ', c49)))) AS comp_c27, +c41, +c45, +c46, +c38 +FROM +( +SELECT +'c57' AS c39, +toString(c27) AS c49, +1 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE c27 IN (322) +UNION ALL +SELECT +'c58' AS c39, +arrayStringConcat(groupArray(toString(c27)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE chain_id IN +( +SELECT chain_id +FROM database.table12 +WHERE c27 IN (322) +) +UNION ALL +SELECT +'c59' AS c39, +assumeNotNull(c27s_str) AS c49, +0 AS c41, +c50 AS c45, +c51 AS c46, +c52 AS c38 +FROM +( +SELECT * +FROM table11 +WHERE c27 IN (322) +) AS c1s_c59 +WHERE c27 IN (322) +UNION ALL +SELECT +'superSupercalifragilisticexpialidocious' AS c39, +arrayStringConcat(groupArray(toString(c1_id)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table10 +WHERE c27 IN (322) +) AS table9 +) +) AS a +) AS table8 +) AS table7 +GROUP BY comp_c27 +) AS table6 +WHERE (parseDateTimeBestEffort('2020-01-01') >= min_c32) AND (max_c32 >= (parseDateTimeBestEffort('2021-05-02') - 2)) +) AS table5 +ARRAY JOIN c39 +WHERE isNotNull(c39) +) AS table4 +)) +WHERE (c61 = 0) AND (c37 < (666 * (1 / joinGet('database.table18', 'c60', concat(c26, '_', 'CH'))))) +) AS table21 +) AS table20 +ALL LEFT JOIN +( +SELECT +c27, +avgMerge(avg_c37) * joinGet('database.table18', 'c60', concat('USD', '_', 'CH')) AS c36 +FROM database.table19 +PREWHERE c27 IN +( +SELECT comp_c27 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +groupArray(c39) AS c39, +any(c40) AS c40, +any(c41) AS c41, +any(c42) AS c42, +any(c29) AS c29, +any(c43) AS c43, +any(c44) AS c44, +any(min_c32) AS min_c32, +any(max_c32) AS max_c32, +any(c45) AS c45, +any(c46) AS c46, +any(c38) AS c38, +any(c47) AS c47 +FROM +( +SELECT +c27 AS comp_c27, +if(comp_c27 = 0, toDate('2010-01-01'), toDate(minMerge(min_c32))) AS min_c32, +if(comp_c27 = 0, toDate(now()), toDate(maxMerge(max_c32))) + 1 AS max_c32, +NULL AS c39, +NULL AS c40, +NULL AS c41, +NULL AS c42, +NULL AS c29, +NULL AS c43, +NULL AS c44, +NULL AS c45, +NULL AS c46, +NULL AS c38, +NULL AS c47 +FROM database.table15 +GROUP BY comp_c27 +UNION ALL +SELECT +comp_c27, +NULL AS min_c32, +NULL AS max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38, +c47 +FROM +( +SELECT +c39, +comp_c27 AS c27, +comp_c27, +c40, +c41, +assumeNotNull(c45) AS c45, +assumeNotNull(c46) AS c46, +assumeNotNull(c38) AS c38, +joinGet('database.table14', 'c48', c40) AS c42, +joinGet('database.table14', 'c29', c40) AS c29, +joinGet('database.table14', 'c43', c40) AS c43, +joinGet('database.table14', 'property_c44', c40) AS c44, +splitByChar(',', assumeNotNull(joinGet('database.jointable13', 'prefix_c33', comp_c27))) AS c33s, +joinGet('database.jointable13', 'c47', comp_c27) AS c47 +FROM +( +SELECT +c39, +comp_c27, +joinGet('database.jointable13', 'c40', comp_c27) AS c40, +c41, +c45, +c46, +c38 +FROM +( +SELECT +c39, +arrayJoin(arrayMap(x -> toInt64(x), arrayFilter(x -> (length(x) > 0), splitByString(', ', c49)))) AS comp_c27, +c41, +c45, +c46, +c38 +FROM +( +SELECT +'c57' AS c39, +toString(c27) AS c49, +1 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE c27 IN (322) +UNION ALL +SELECT +'c58' AS c39, +arrayStringConcat(groupArray(toString(c27)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE chain_id IN +( +SELECT chain_id +FROM database.table12 +WHERE c27 IN (322) +) +UNION ALL +SELECT +'c59' AS c39, +assumeNotNull(c27s_str) AS c49, +0 AS c41, +c50 AS c45, +c51 AS c46, +c52 AS c38 +FROM +( +SELECT * +FROM table11 +WHERE c27 IN (322) +) AS c1s_c59 +WHERE c27 IN (322) +UNION ALL +SELECT +'superSupercalifragilisticexpialidocious' AS c39, +arrayStringConcat(groupArray(toString(c1_id)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table10 +WHERE c27 IN (322) +) AS table9 +) +) AS a +) AS table8 +) AS table7 +GROUP BY comp_c27 +) AS table6 +WHERE (parseDateTimeBestEffort('2020-01-01') >= min_c32) AND (max_c32 >= (parseDateTimeBestEffort('2021-05-02') - 2)) +) AS table5 +ARRAY JOIN c39 +WHERE isNotNull(c39) +) AS table4 +) +WHERE date > (now() - toIntervalMonth(3)) +GROUP BY c27 +) AS table17 USING (c27) +GROUP BY c27 +) AS table16 +ALL LEFT JOIN +( +SELECT +comp_c27 AS c27, +assumeNotNull(c39) AS c39, +c29 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +groupArray(c39) AS c39, +any(c40) AS c40, +any(c41) AS c41, +any(c42) AS c42, +any(c29) AS c29, +any(c43) AS c43, +any(c44) AS c44, +any(min_c32) AS min_c32, +any(max_c32) AS max_c32, +any(c45) AS c45, +any(c46) AS c46, +any(c38) AS c38, +any(c47) AS c47 +FROM +( +SELECT +c27 AS comp_c27, +if(comp_c27 = 0, toDate('2010-01-01'), toDate(minMerge(min_c32))) AS min_c32, +if(comp_c27 = 0, toDate(now()), toDate(maxMerge(max_c32))) + 1 AS max_c32, +NULL AS c39, +NULL AS c40, +NULL AS c41, +NULL AS c42, +NULL AS c29, +NULL AS c43, +NULL AS c44, +NULL AS c45, +NULL AS c46, +NULL AS c38, +NULL AS c47 +FROM database.table15 +GROUP BY comp_c27 +UNION ALL +SELECT +comp_c27, +NULL AS min_c32, +NULL AS max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38, +c47 +FROM +( +SELECT +c39, +comp_c27 AS c27, +comp_c27, +c40, +c41, +assumeNotNull(c45) AS c45, +assumeNotNull(c46) AS c46, +assumeNotNull(c38) AS c38, +joinGet('database.table14', 'c48', c40) AS c42, +joinGet('database.table14', 'c29', c40) AS c29, +joinGet('database.table14', 'c43', c40) AS c43, +joinGet('database.table14', 'property_c44', c40) AS c44, +splitByChar(',', assumeNotNull(joinGet('database.jointable13', 'prefix_c33', comp_c27))) AS c33s, +joinGet('database.jointable13', 'c47', comp_c27) AS c47 +FROM +( +SELECT +c39, +comp_c27, +joinGet('database.jointable13', 'c40', comp_c27) AS c40, +c41, +c45, +c46, +c38 +FROM +( +SELECT +c39, +arrayJoin(arrayMap(x -> toInt64(x), arrayFilter(x -> (length(x) > 0), splitByString(', ', c49)))) AS comp_c27, +c41, +c45, +c46, +c38 +FROM +( +SELECT +'c57' AS c39, +toString(c27) AS c49, +1 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE c27 IN (322) +UNION ALL +SELECT +'c58' AS c39, +arrayStringConcat(groupArray(toString(c27)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE chain_id IN +( +SELECT chain_id +FROM database.table12 +WHERE c27 IN (322) +) +UNION ALL +SELECT +'c59' AS c39, +assumeNotNull(c27s_str) AS c49, +0 AS c41, +c50 AS c45, +c51 AS c46, +c52 AS c38 +FROM +( +SELECT * +FROM table11 +WHERE c27 IN (322) +) AS c1s_c59 +WHERE c27 IN (322) +UNION ALL +SELECT +'superSupercalifragilisticexpialidocious' AS c39, +arrayStringConcat(groupArray(toString(c1_id)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table10 +WHERE c27 IN (322) +) AS table9 +) +) AS a +) AS table8 +) AS table7 +GROUP BY comp_c27 +) AS table6 +WHERE (parseDateTimeBestEffort('2020-01-01') >= min_c32) AND (max_c32 >= (parseDateTimeBestEffort('2021-05-02') - 2)) +) AS table5 +ARRAY JOIN c39 +WHERE isNotNull(c39) +) AS table4 +) USING (c27) +) AS t +ALL LEFT JOIN +( +SELECT +c1, +c2 +FROM +( +SELECT +c39 AS c1, +groupArray(comp_c27) AS c49, +multiIf(c1 = 'c58', if(length(c49) <= 2, 0, 1), c1 = 'c57', 1, if(length(c49) <= 3, 0, 1)) AS c2 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +groupArray(c39) AS c39, +any(c40) AS c40, +any(c41) AS c41, +any(c42) AS c42, +any(c29) AS c29, +any(c43) AS c43, +any(c44) AS c44, +any(min_c32) AS min_c32, +any(max_c32) AS max_c32, +any(c45) AS c45, +any(c46) AS c46, +any(c38) AS c38, +any(c47) AS c47 +FROM +( +SELECT +c27 AS comp_c27, +if(comp_c27 = 0, toDate('2010-01-01'), toDate(minMerge(min_c32))) AS min_c32, +if(comp_c27 = 0, toDate(now()), toDate(maxMerge(max_c32))) + 1 AS max_c32, +NULL AS c39, +NULL AS c40, +NULL AS c41, +NULL AS c42, +NULL AS c29, +NULL AS c43, +NULL AS c44, +NULL AS c45, +NULL AS c46, +NULL AS c38, +NULL AS c47 +FROM database.table15 +GROUP BY comp_c27 +UNION ALL +SELECT +comp_c27, +NULL AS min_c32, +NULL AS max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38, +c47 +FROM +( +SELECT +c39, +comp_c27 AS c27, +comp_c27, +c40, +c41, +assumeNotNull(c45) AS c45, +assumeNotNull(c46) AS c46, +assumeNotNull(c38) AS c38, +joinGet('database.table14', 'c48', c40) AS c42, +joinGet('database.table14', 'c29', c40) AS c29, +joinGet('database.table14', 'c43', c40) AS c43, +joinGet('database.table14', 'property_c44', c40) AS c44, +splitByChar(',', assumeNotNull(joinGet('database.jointable13', 'prefix_c33', comp_c27))) AS c33s, +joinGet('database.jointable13', 'c47', comp_c27) AS c47 +FROM +( +SELECT +c39, +comp_c27, +joinGet('database.jointable13', 'c40', comp_c27) AS c40, +c41, +c45, +c46, +c38 +FROM +( +SELECT +c39, +arrayJoin(arrayMap(x -> toInt64(x), arrayFilter(x -> (length(x) > 0), splitByString(', ', c49)))) AS comp_c27, +c41, +c45, +c46, +c38 +FROM +( +SELECT +'c57' AS c39, +toString(c27) AS c49, +1 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE c27 IN (322) +UNION ALL +SELECT +'c58' AS c39, +arrayStringConcat(groupArray(toString(c27)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE chain_id IN +( +SELECT chain_id +FROM database.table12 +WHERE c27 IN (322) +) +UNION ALL +SELECT +'c59' AS c39, +assumeNotNull(c27s_str) AS c49, +0 AS c41, +c50 AS c45, +c51 AS c46, +c52 AS c38 +FROM +( +SELECT * +FROM table11 +WHERE c27 IN (322) +) AS c1s_c59 +WHERE c27 IN (322) +UNION ALL +SELECT +'superSupercalifragilisticexpialidocious' AS c39, +arrayStringConcat(groupArray(toString(c1_id)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table10 +WHERE c27 IN (322) +) AS table9 +) +) AS a +) AS table8 +) AS table7 +GROUP BY comp_c27 +) AS table6 +WHERE (parseDateTimeBestEffort('2020-01-01') >= min_c32) AND (max_c32 >= (parseDateTimeBestEffort('2021-05-02') - 2)) +) AS table5 +ARRAY JOIN c39 +WHERE isNotNull(c39) +) AS table4 +GROUP BY c39 +) AS table3 +) USING (c1) +GROUP BY +c1, +c2 +) AS table2 +ORDER BY c1 ASC ) AS table1 FORMAT Null ]]> diff --git a/tests/performance/questdb_sum_float32.xml b/tests/performance/questdb_sum_float32.xml deleted file mode 100644 index 0b830857e62..00000000000 --- a/tests/performance/questdb_sum_float32.xml +++ /dev/null @@ -1,33 +0,0 @@ - - - 4 - 20G - 1 - 2000000000 - 10G - - - - - engine - - Memory - MergeTree ORDER BY tuple() - - - - type - - Float32 - Float32 NULL - - - - - CREATE TABLE `zz_{type}_{engine}` (x {type}) ENGINE {engine} - INSERT INTO `zz_{type}_{engine}` SELECT rand() FROM numbers(500000000) - - SELECT sum(x) FROM `zz_{type}_{engine}` - - DROP TABLE IF EXISTS `zz_{type}_{engine}` - diff --git a/tests/performance/questdb_sum_float64.xml b/tests/performance/questdb_sum_float64.xml deleted file mode 100644 index fde475a1431..00000000000 --- a/tests/performance/questdb_sum_float64.xml +++ /dev/null @@ -1,33 +0,0 @@ - - - 4 - 20G - 1 - 2000000000 - 10G - - - - - engine - - Memory - MergeTree ORDER BY tuple() - - - - type - - Float64 - Float64 NULL - - - - - CREATE TABLE `zz_{type}_{engine}` (x {type}) ENGINE {engine} - INSERT INTO `zz_{type}_{engine}` SELECT rand() FROM numbers(500000000) - - SELECT sum(x) FROM `zz_{type}_{engine}` - - DROP TABLE IF EXISTS `zz_{type}_{engine}` - diff --git a/tests/performance/questdb_sum_int32.xml b/tests/performance/questdb_sum_int32.xml deleted file mode 100644 index ba1eed6b074..00000000000 --- a/tests/performance/questdb_sum_int32.xml +++ /dev/null @@ -1,34 +0,0 @@ - - - 4 - 20G - 1 - 2000000000 - 10G - - - - - engine - - Memory - MergeTree ORDER BY tuple() - - - - type - - Int32 - Int32 NULL - - - - - CREATE TABLE `zz_{type}_{engine}` (x {type}) ENGINE {engine} - INSERT INTO `zz_{type}_{engine}` SELECT rand() FROM numbers_mt(300000000) SETTINGS max_insert_threads = 8 - OPTIMIZE TABLE `zz_{type}_MergeTree ORDER BY tuple()` FINAL - - SELECT sum(x) FROM `zz_{type}_{engine}` - - DROP TABLE IF EXISTS `zz_{type}_{engine}` - diff --git a/tests/queries/0_stateless/00057_join_aliases.sql b/tests/queries/0_stateless/00057_join_aliases.sql index 481b0621ed7..b994e26a747 100644 --- a/tests/queries/0_stateless/00057_join_aliases.sql +++ b/tests/queries/0_stateless/00057_join_aliases.sql @@ -3,4 +3,5 @@ SELECT * FROM ( FROM system.numbers ANY LEFT JOIN (SELECT number / 3 AS n, number AS j1, 'Hello' AS j2 FROM system.numbers LIMIT 10) js2 USING n LIMIT 10 -) ORDER BY n; +) ORDER BY n +SETTINGS join_algorithm = 'hash'; -- the query does not finish with merge join diff --git a/tests/queries/0_stateless/00609_prewhere_and_default.sql b/tests/queries/0_stateless/00609_prewhere_and_default.sql index 7da809cd140..f1aa69c1320 100644 --- a/tests/queries/0_stateless/00609_prewhere_and_default.sql +++ b/tests/queries/0_stateless/00609_prewhere_and_default.sql @@ -3,11 +3,25 @@ create table `table_00609` (key UInt64, val UInt64) engine = MergeTree order by insert into `table_00609` select number, number / 8192 from system.numbers limit 100000; alter table `table_00609` add column def UInt64 default val + 1; select * from `table_00609` prewhere val > 2 format Null; +select * from `table_00609` prewhere val > 2 format Null SETTINGS max_block_size=100; +select * from `table_00609` prewhere val > 2 format Null SETTINGS max_block_size=1000; +select * from `table_00609` prewhere val > 2 format Null SETTINGS max_block_size=10000; +select * from `table_00609` prewhere val > 2 format Null SETTINGS max_block_size=20000; +select * from `table_00609` prewhere val > 2 format Null SETTINGS max_block_size=30000; +select * from `table_00609` prewhere val > 2 format Null SETTINGS max_block_size=40000; +select * from `table_00609` prewhere val > 2 format Null SETTINGS max_block_size=80000; drop table if exists `table_00609`; create table `table_00609` (key UInt64, val UInt64) engine = MergeTree order by key settings index_granularity=8192; insert into `table_00609` select number, number / 8192 from system.numbers limit 100000; alter table `table_00609` add column def UInt64; select * from `table_00609` prewhere val > 2 format Null; +select * from `table_00609` prewhere val > 2 format Null SETTINGS max_block_size=100; +select * from `table_00609` prewhere val > 2 format Null SETTINGS max_block_size=1000; +select * from `table_00609` prewhere val > 2 format Null SETTINGS max_block_size=10000; +select * from `table_00609` prewhere val > 2 format Null SETTINGS max_block_size=20000; +select * from `table_00609` prewhere val > 2 format Null SETTINGS max_block_size=30000; +select * from `table_00609` prewhere val > 2 format Null SETTINGS max_block_size=40000; +select * from `table_00609` prewhere val > 2 format Null SETTINGS max_block_size=80000; drop table if exists `table_00609`; diff --git a/tests/queries/0_stateless/00855_join_with_array_join.sql b/tests/queries/0_stateless/00855_join_with_array_join.sql index 05180573525..c1ea0bbb429 100644 --- a/tests/queries/0_stateless/00855_join_with_array_join.sql +++ b/tests/queries/0_stateless/00855_join_with_array_join.sql @@ -43,7 +43,7 @@ JOIN system.one AS y USING dummy; SELECT * FROM ( SELECT [toUInt32(dummy), toUInt32(dummy)] AS dummy FROM system.one ) AS x ARRAY JOIN dummy JOIN (select toInt32(dummy) as dummy from system.one ) AS y USING dummy; -SELECT dummy > 0, toTypeName(any(dummy)), any(toTypeName(dummy)) +SELECT dummy > 0, toTypeName(any(dummy)), any(toTypeName(dummy)) FROM ( SELECT [toUInt32(dummy), toUInt32(dummy)] AS dummy FROM system.one ) AS x ARRAY JOIN dummy JOIN ( SELECT toInt32(dummy) AS dummy FROM system.one ) AS y USING dummy GROUP BY (dummy > 0); diff --git a/tests/queries/0_stateless/01044_great_circle_angle.reference b/tests/queries/0_stateless/01044_great_circle_angle.reference index ebdeaa10067..c247e398824 100644 --- a/tests/queries/0_stateless/01044_great_circle_angle.reference +++ b/tests/queries/0_stateless/01044_great_circle_angle.reference @@ -29,24 +29,24 @@ ██████████████████████████████████▎ ████████████████████████████████████▏ ██████████████████████████████████████ -███████████████████████████████████████▊ -█████████████████████████████████████████▋ +███████████████████████████████████████▉ +█████████████████████████████████████████▊ ███████████████████████████████████████████▌ █████████████████████████████████████████████▍ ███████████████████████████████████████████████▏ -████████████████████████████████████████████████▊ +████████████████████████████████████████████████▉ ██████████████████████████████████████████████████▌ ████████████████████████████████████████████████████▏ -█████████████████████████████████████████████████████▊ +█████████████████████████████████████████████████████▉ ███████████████████████████████████████████████████████▍ █████████████████████████████████████████████████████████ ██████████████████████████████████████████████████████████▌ ████████████████████████████████████████████████████████████ █████████████████████████████████████████████████████████████▌ -██████████████████████████████████████████████████████████████▊ +██████████████████████████████████████████████████████████████▉ ████████████████████████████████████████████████████████████████▎ █████████████████████████████████████████████████████████████████▌ -██████████████████████████████████████████████████████████████████▋ +██████████████████████████████████████████████████████████████████▊ ████████████████████████████████████████████████████████████████████ █████████████████████████████████████████████████████████████████████▏ ██████████████████████████████████████████████████████████████████████▎ @@ -59,13 +59,13 @@ ████████████████████████████████████████████████████████████████████████████▍ █████████████████████████████████████████████████████████████████████████████ █████████████████████████████████████████████████████████████████████████████▌ -█████████████████████████████████████████████████████████████████████████████▊ +█████████████████████████████████████████████████████████████████████████████▉ ██████████████████████████████████████████████████████████████████████████████▎ ██████████████████████████████████████████████████████████████████████████████▋ -██████████████████████████████████████████████████████████████████████████████▋ -██████████████████████████████████████████████████████████████████████████████▊ -██████████████████████████████████████████████████████████████████████████████▊ ██████████████████████████████████████████████████████████████████████████████▊ +██████████████████████████████████████████████████████████████████████████████▉ +██████████████████████████████████████████████████████████████████████████████▉ +██████████████████████████████████████████████████████████████████████████████▉ ██████████████████████████████████████████████████████████████████████████████▋ ██████████████████████████████████████████████████████████████████████████████▍ ██████████████████████████████████████████████████████████████████████████████ @@ -84,18 +84,18 @@ ██████████████████████████████████████████████████████████████▌ ████████████████████████████████████████████████████████████▍ ██████████████████████████████████████████████████████████▏ -███████████████████████████████████████████████████████▋ +███████████████████████████████████████████████████████▊ █████████████████████████████████████████████████████▏ ██████████████████████████████████████████████████▍ ███████████████████████████████████████████████▌ ████████████████████████████████████████████▌ █████████████████████████████████████████▎ -█████████████████████████████████████▊ +█████████████████████████████████████▉ ██████████████████████████████████▍ ██████████████████████████████▋ -██████████████████████████▋ -██████████████████████▋ +██████████████████████████▊ +██████████████████████▊ ██████████████████▌ ██████████████▏ █████████▋ -████▊ +████▉ diff --git a/tests/queries/0_stateless/01097_one_more_range_reader_test_wide_part.reference b/tests/queries/0_stateless/01097_one_more_range_reader_test_wide_part.reference new file mode 100644 index 00000000000..b4dfe343bbe --- /dev/null +++ b/tests/queries/0_stateless/01097_one_more_range_reader_test_wide_part.reference @@ -0,0 +1,3 @@ +foo +foo +foo diff --git a/tests/queries/0_stateless/01097_one_more_range_reader_test_wide_part.sql b/tests/queries/0_stateless/01097_one_more_range_reader_test_wide_part.sql new file mode 100644 index 00000000000..244f58b6717 --- /dev/null +++ b/tests/queries/0_stateless/01097_one_more_range_reader_test_wide_part.sql @@ -0,0 +1,17 @@ +drop table if exists t; + +create table t (id UInt32, a Int) engine = MergeTree order by id settings min_bytes_for_wide_part=0; + +insert into t values (1, 0) (2, 1) (3, 0) (4, 0) (5, 0); +alter table t add column s String default 'foo'; +select s from t prewhere a = 1; + +drop table t; + +create table t (id UInt32, a Int) engine = MergeTree order by id settings min_bytes_for_wide_part=0; + +insert into t values (1, 1) (2, 1) (3, 0) (4, 0) (5, 0); +alter table t add column s String default 'foo'; +select s from t prewhere a = 1; + +drop table t; diff --git a/tests/queries/0_stateless/01111_create_drop_replicated_db_stress.sh b/tests/queries/0_stateless/01111_create_drop_replicated_db_stress.sh index 983cb515d8e..bbe3a5a51c0 100755 --- a/tests/queries/0_stateless/01111_create_drop_replicated_db_stress.sh +++ b/tests/queries/0_stateless/01111_create_drop_replicated_db_stress.sh @@ -64,7 +64,7 @@ function alter_table() if [ -z "$table" ]; then continue; fi $CLICKHOUSE_CLIENT --distributed_ddl_task_timeout=0 -q \ "alter table $table update n = n + (select max(n) from merge(REGEXP('${CLICKHOUSE_DATABASE}.*'), '.*')) where 1 settings allow_nondeterministic_mutations=1" \ - 2>&1| grep -Fa "Exception: " | grep -Fv "Cannot enqueue query" | grep -Fv "ZooKeeper session expired" | grep -Fv UNKNOWN_DATABASE | grep -Fv UNKNOWN_TABLE | grep -Fv TABLE_IS_READ_ONLY + 2>&1| grep -Fa "Exception: " | grep -Fv "Cannot enqueue query" | grep -Fv "ZooKeeper session expired" | grep -Fv UNKNOWN_DATABASE | grep -Fv UNKNOWN_TABLE | grep -Fv TABLE_IS_READ_ONLY | grep -Fv TABLE_IS_DROPPED sleep 0.$RANDOM done } @@ -75,7 +75,7 @@ function insert() table=$($CLICKHOUSE_CLIENT -q "select database || '.' || name from system.tables where database like '${CLICKHOUSE_DATABASE}%' order by rand() limit 1") if [ -z "$table" ]; then continue; fi $CLICKHOUSE_CLIENT -q \ - "insert into $table values ($RANDOM)" 2>&1| grep -Fa "Exception: " | grep -Fv UNKNOWN_DATABASE | grep -Fv UNKNOWN_TABLE | grep -Fv TABLE_IS_READ_ONLY + "insert into $table values ($RANDOM)" 2>&1| grep -Fa "Exception: " | grep -Fv UNKNOWN_DATABASE | grep -Fv UNKNOWN_TABLE | grep -Fv TABLE_IS_READ_ONLY | grep -Fv TABLE_IS_DROPPED done } diff --git a/tests/queries/0_stateless/01159_combinators_with_parameters.reference b/tests/queries/0_stateless/01159_combinators_with_parameters.reference index cc0cb604bf3..c1edc826fcb 100644 --- a/tests/queries/0_stateless/01159_combinators_with_parameters.reference +++ b/tests/queries/0_stateless/01159_combinators_with_parameters.reference @@ -3,7 +3,6 @@ AggregateFunction(topKDistinct(10), String) AggregateFunction(topKForEach(10), Array(String)) AggregateFunction(topKIf(10), String, UInt8) AggregateFunction(topK(10), String) -AggregateFunction(topKOrNull(10), String) AggregateFunction(topKOrDefault(10), String) AggregateFunction(topKResample(10, 1, 2, 42), String, UInt64) AggregateFunction(topK(10), String) diff --git a/tests/queries/0_stateless/01159_combinators_with_parameters.sql b/tests/queries/0_stateless/01159_combinators_with_parameters.sql index 69508d8e304..8b2dbde6480 100644 --- a/tests/queries/0_stateless/01159_combinators_with_parameters.sql +++ b/tests/queries/0_stateless/01159_combinators_with_parameters.sql @@ -3,7 +3,7 @@ SELECT toTypeName(topKDistinctState(10)(toString(number))) FROM numbers(100); SELECT toTypeName(topKForEachState(10)([toString(number)])) FROM numbers(100); SELECT toTypeName(topKIfState(10)(toString(number), number % 2)) FROM numbers(100); SELECT toTypeName(topKMergeState(10)(state)) FROM (SELECT topKState(10)(toString(number)) as state FROM numbers(100)); -SELECT toTypeName(topKOrNullState(10)(toString(number))) FROM numbers(100); +SELECT toTypeName(topKOrNullState(10)(toString(number))) FROM numbers(100); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } SELECT toTypeName(topKOrDefaultState(10)(toString(number))) FROM numbers(100); SELECT toTypeName(topKResampleState(10, 1, 2, 42)(toString(number), number)) FROM numbers(100); SELECT toTypeName(topKState(10)(toString(number))) FROM numbers(100); diff --git a/tests/queries/0_stateless/01246_insert_into_watch_live_view.py b/tests/queries/0_stateless/01246_insert_into_watch_live_view.py index 67c79778736..02d03abc8c3 100755 --- a/tests/queries/0_stateless/01246_insert_into_watch_live_view.py +++ b/tests/queries/0_stateless/01246_insert_into_watch_live_view.py @@ -53,15 +53,15 @@ with client(name="client1>", log=log) as client1, client( client1.send("INSERT INTO test.sums WATCH test.lv") client1.expect(r"INSERT INTO") - client3.expect("0,1.*\r\n") + client3.expect("0,1.*\n") client2.send("INSERT INTO test.mt VALUES (1),(2),(3)") client2.expect(prompt) - client3.expect("6,2.*\r\n") + client3.expect("6,2.*\n") client2.send("INSERT INTO test.mt VALUES (4),(5),(6)") client2.expect(prompt) - client3.expect("21,3.*\r\n") + client3.expect("21,3.*\n") # send Ctrl-C client3.send("\x03", eol="") diff --git a/tests/queries/0_stateless/01249_flush_interactive.sh b/tests/queries/0_stateless/01249_flush_interactive.sh index 8eb06cf4f06..551e11c8c8d 100755 --- a/tests/queries/0_stateless/01249_flush_interactive.sh +++ b/tests/queries/0_stateless/01249_flush_interactive.sh @@ -16,10 +16,10 @@ function test() { timeout 5 ${CLICKHOUSE_LOCAL} --max_execution_time 10 --query " SELECT DISTINCT number % 5 FROM system.numbers" ||: - echo '---' + echo -e '---' timeout 5 ${CLICKHOUSE_CURL} -sS --no-buffer "${CLICKHOUSE_URL}&max_execution_time=10" --data-binary " SELECT DISTINCT number % 5 FROM system.numbers" ||: - echo '---' + echo -e '---' } # The test depends on timeouts. And there is a chance that under high system load the query diff --git a/tests/queries/0_stateless/01420_format_row.reference b/tests/queries/0_stateless/01420_format_row.reference index 95eaed7b156..560e1e038cb 100644 --- a/tests/queries/0_stateless/01420_format_row.reference +++ b/tests/queries/0_stateless/01420_format_row.reference @@ -1,12 +1,117 @@ +CSV +formatRow 0,"good"\n 1,"good"\n 2,"good"\n -0\t2001-12-12\t1.4 -1\t2001-12-12\t1.4 -2\t2001-12-12\t1.4 -{"number":"0","toNullable(3)":3,"NULL":null}\n -{"number":"1","toNullable(3)":3,"NULL":null}\n -{"number":"2","toNullable(3)":3,"NULL":null}\n -{"number":"0"} -{"number":"1"} -{"number":"2"} +formatRowNoNewline +0,"good" +1,"good" +2,"good" +TSV +formatRow +0\tgood\n +1\tgood\n +2\tgood\n +formatRowNoNewline +0\tgood +1\tgood +2\tgood +JSONEachRow +formatRow +{"number":"0","good":"good"}\n +{"number":"1","good":"good"}\n +{"number":"2","good":"good"}\n +formatRowNoNewline +{"number":"0","good":"good"} +{"number":"1","good":"good"} +{"number":"2","good":"good"} +JSONCompactEachRow +formatRow +["0", "good"]\n +["1", "good"]\n +["2", "good"]\n +formatRowNoNewline +["0", "good"] +["1", "good"] +["2", "good"] +TSKV +formatRow +number=0\tgood=good\n +number=1\tgood=good\n +number=2\tgood=good\n +formatRowNoNewline +number=0\tgood=good +number=1\tgood=good +number=2\tgood=good +XML +formatRow +\n\n\t\n\t\t\n\t\t\t\n\t\t\t\tnumber\n\t\t\t\tUInt64\n\t\t\t\n\t\t\t\n\t\t\t\tgood\n\t\t\t\tString\n\t\t\t\n\t\t\n\t\n\t\n\t\t\n\t\t\t0\n\t\t\tgood\n\t\t\n\t\n\t1\n\n +\n\n\t\n\t\t\n\t\t\t\n\t\t\t\tnumber\n\t\t\t\tUInt64\n\t\t\t\n\t\t\t\n\t\t\t\tgood\n\t\t\t\tString\n\t\t\t\n\t\t\n\t\n\t\n\t\t\n\t\t\t1\n\t\t\tgood\n\t\t\n\t\n\t1\n\n +\n\n\t\n\t\t\n\t\t\t\n\t\t\t\tnumber\n\t\t\t\tUInt64\n\t\t\t\n\t\t\t\n\t\t\t\tgood\n\t\t\t\tString\n\t\t\t\n\t\t\n\t\n\t\n\t\t\n\t\t\t2\n\t\t\tgood\n\t\t\n\t\n\t1\n\n +formatRowNoNewline +\n\n\t\n\t\t\n\t\t\t\n\t\t\t\tnumber\n\t\t\t\tUInt64\n\t\t\t\n\t\t\t\n\t\t\t\tgood\n\t\t\t\tString\n\t\t\t\n\t\t\n\t\n\t\n\t\t\n\t\t\t0\n\t\t\tgood\n\t\t\n\t\n\t1\n +\n\n\t\n\t\t\n\t\t\t\n\t\t\t\tnumber\n\t\t\t\tUInt64\n\t\t\t\n\t\t\t\n\t\t\t\tgood\n\t\t\t\tString\n\t\t\t\n\t\t\n\t\n\t\n\t\t\n\t\t\t1\n\t\t\tgood\n\t\t\n\t\n\t1\n +\n\n\t\n\t\t\n\t\t\t\n\t\t\t\tnumber\n\t\t\t\tUInt64\n\t\t\t\n\t\t\t\n\t\t\t\tgood\n\t\t\t\tString\n\t\t\t\n\t\t\n\t\n\t\n\t\t\n\t\t\t2\n\t\t\tgood\n\t\t\n\t\n\t1\n +Markdown +formatRow +| number | good |\n|-:|:-|\n| 0 | good |\n +| number | good |\n|-:|:-|\n| 1 | good |\n +| number | good |\n|-:|:-|\n| 2 | good |\n +formatRowNoNewline +| number | good |\n|-:|:-|\n| 0 | good | +| number | good |\n|-:|:-|\n| 1 | good | +| number | good |\n|-:|:-|\n| 2 | good | +CustomSeparated +formatRow +0\tgood\n +1\tgood\n +2\tgood\n +formatRowNoNewline +0\tgood +1\tgood +2\tgood +SQLInsert +formatRow +INSERT INTO table (`number`, `good`) VALUES (0, \'good\');\n +INSERT INTO table (`number`, `good`) VALUES (1, \'good\');\n +INSERT INTO table (`number`, `good`) VALUES (2, \'good\');\n +formatRowNoNewline +INSERT INTO table (`number`, `good`) VALUES (0, \'good\'); +INSERT INTO table (`number`, `good`) VALUES (1, \'good\'); +INSERT INTO table (`number`, `good`) VALUES (2, \'good\'); +Vertical +formatRow +Row 1:\n──────\nnumber: 0\ngood: good\n +Row 1:\n──────\nnumber: 1\ngood: good\n +Row 1:\n──────\nnumber: 2\ngood: good\n +formatRowNoNewline +Row 1:\n──────\nnumber: 0\ngood: good +Row 1:\n──────\nnumber: 1\ngood: good +Row 1:\n──────\nnumber: 2\ngood: good +JSON +formatRow +{\n\t"meta":\n\t[\n\t\t{\n\t\t\t"name": "number",\n\t\t\t"type": "UInt64"\n\t\t},\n\t\t{\n\t\t\t"name": "good",\n\t\t\t"type": "String"\n\t\t}\n\t],\n\n\t"data":\n\t[\n\t\t{\n\t\t\t"number": "0",\n\t\t\t"good": "good"\n\t\t}\n\t],\n\n\t"rows": 1\n}\n +{\n\t"meta":\n\t[\n\t\t{\n\t\t\t"name": "number",\n\t\t\t"type": "UInt64"\n\t\t},\n\t\t{\n\t\t\t"name": "good",\n\t\t\t"type": "String"\n\t\t}\n\t],\n\n\t"data":\n\t[\n\t\t{\n\t\t\t"number": "1",\n\t\t\t"good": "good"\n\t\t}\n\t],\n\n\t"rows": 1\n}\n +{\n\t"meta":\n\t[\n\t\t{\n\t\t\t"name": "number",\n\t\t\t"type": "UInt64"\n\t\t},\n\t\t{\n\t\t\t"name": "good",\n\t\t\t"type": "String"\n\t\t}\n\t],\n\n\t"data":\n\t[\n\t\t{\n\t\t\t"number": "2",\n\t\t\t"good": "good"\n\t\t}\n\t],\n\n\t"rows": 1\n}\n +formatRowNoNewline +{\n\t"meta":\n\t[\n\t\t{\n\t\t\t"name": "number",\n\t\t\t"type": "UInt64"\n\t\t},\n\t\t{\n\t\t\t"name": "good",\n\t\t\t"type": "String"\n\t\t}\n\t],\n\n\t"data":\n\t[\n\t\t{\n\t\t\t"number": "0",\n\t\t\t"good": "good"\n\t\t}\n\t],\n\n\t"rows": 1\n} +{\n\t"meta":\n\t[\n\t\t{\n\t\t\t"name": "number",\n\t\t\t"type": "UInt64"\n\t\t},\n\t\t{\n\t\t\t"name": "good",\n\t\t\t"type": "String"\n\t\t}\n\t],\n\n\t"data":\n\t[\n\t\t{\n\t\t\t"number": "1",\n\t\t\t"good": "good"\n\t\t}\n\t],\n\n\t"rows": 1\n} +{\n\t"meta":\n\t[\n\t\t{\n\t\t\t"name": "number",\n\t\t\t"type": "UInt64"\n\t\t},\n\t\t{\n\t\t\t"name": "good",\n\t\t\t"type": "String"\n\t\t}\n\t],\n\n\t"data":\n\t[\n\t\t{\n\t\t\t"number": "2",\n\t\t\t"good": "good"\n\t\t}\n\t],\n\n\t"rows": 1\n} +JSONCompact +formatRow +{\n\t"meta":\n\t[\n\t\t{\n\t\t\t"name": "number",\n\t\t\t"type": "UInt64"\n\t\t},\n\t\t{\n\t\t\t"name": "good",\n\t\t\t"type": "String"\n\t\t}\n\t],\n\n\t"data":\n\t[\n\t\t["0", "good"]\n\t],\n\n\t"rows": 1\n}\n +{\n\t"meta":\n\t[\n\t\t{\n\t\t\t"name": "number",\n\t\t\t"type": "UInt64"\n\t\t},\n\t\t{\n\t\t\t"name": "good",\n\t\t\t"type": "String"\n\t\t}\n\t],\n\n\t"data":\n\t[\n\t\t["1", "good"]\n\t],\n\n\t"rows": 1\n}\n +{\n\t"meta":\n\t[\n\t\t{\n\t\t\t"name": "number",\n\t\t\t"type": "UInt64"\n\t\t},\n\t\t{\n\t\t\t"name": "good",\n\t\t\t"type": "String"\n\t\t}\n\t],\n\n\t"data":\n\t[\n\t\t["2", "good"]\n\t],\n\n\t"rows": 1\n}\n +formatRowNoNewline +{\n\t"meta":\n\t[\n\t\t{\n\t\t\t"name": "number",\n\t\t\t"type": "UInt64"\n\t\t},\n\t\t{\n\t\t\t"name": "good",\n\t\t\t"type": "String"\n\t\t}\n\t],\n\n\t"data":\n\t[\n\t\t["0", "good"]\n\t],\n\n\t"rows": 1\n} +{\n\t"meta":\n\t[\n\t\t{\n\t\t\t"name": "number",\n\t\t\t"type": "UInt64"\n\t\t},\n\t\t{\n\t\t\t"name": "good",\n\t\t\t"type": "String"\n\t\t}\n\t],\n\n\t"data":\n\t[\n\t\t["1", "good"]\n\t],\n\n\t"rows": 1\n} +{\n\t"meta":\n\t[\n\t\t{\n\t\t\t"name": "number",\n\t\t\t"type": "UInt64"\n\t\t},\n\t\t{\n\t\t\t"name": "good",\n\t\t\t"type": "String"\n\t\t}\n\t],\n\n\t"data":\n\t[\n\t\t["2", "good"]\n\t],\n\n\t"rows": 1\n} +Values +formatRow +(0,\'good\') +(1,\'good\') +(2,\'good\') +formatRowNoNewline +(0,\'good\' +(1,\'good\' +(2,\'good\' diff --git a/tests/queries/0_stateless/01420_format_row.sql b/tests/queries/0_stateless/01420_format_row.sql deleted file mode 100644 index c8eeecc4158..00000000000 --- a/tests/queries/0_stateless/01420_format_row.sql +++ /dev/null @@ -1,7 +0,0 @@ -select formatRow('CSV', number, 'good') from numbers(3); -select formatRowNoNewline('TSV', number, DATE '2001-12-12', 1.4) from numbers(3); -select formatRow('JSONEachRow', number, toNullable(3), Null) from numbers(3); -select formatRowNoNewline('JSONEachRow', *) from numbers(3); - --- unknown format -select formatRow('aaa', *) from numbers(3); -- { serverError 73 } diff --git a/tests/queries/0_stateless/01420_format_row.sql.j2 b/tests/queries/0_stateless/01420_format_row.sql.j2 new file mode 100644 index 00000000000..98a6858f2a9 --- /dev/null +++ b/tests/queries/0_stateless/01420_format_row.sql.j2 @@ -0,0 +1,17 @@ +-- Tags: no-fasttest + +set output_format_write_statistics=0; + +{% for format in ['CSV', 'TSV', 'JSONEachRow', 'JSONCompactEachRow', 'TSKV', 'XML', 'Markdown', 'CustomSeparated', 'SQLInsert', 'Vertical', 'JSON', 'JSONCompact', 'Values'] -%} + +select '{{ format }}'; +select 'formatRow'; +select formatRow('{{ format }}', number, good) from (select number, 'good' as good from numbers(3)); +select 'formatRowNoNewline'; +select formatRowNoNewline('{{ format }}', number, good) from (select number, 'good' as good from numbers(3)); + +{% endfor -%} + +-- unknown format +select formatRow('aaa', *) from numbers(3); -- { serverError 73 } + diff --git a/tests/queries/0_stateless/01459_manual_write_to_replicas_quorum_detach_attach.reference b/tests/queries/0_stateless/01459_manual_write_to_replicas_quorum_detach_attach.reference new file mode 100644 index 00000000000..0d6e68f032f --- /dev/null +++ b/tests/queries/0_stateless/01459_manual_write_to_replicas_quorum_detach_attach.reference @@ -0,0 +1,6 @@ +10 0 9 45 +10 0 9 45 +10 0 9 45 +10 0 9 45 +10 0 9 45 +10 0 9 45 diff --git a/tests/queries/0_stateless/01459_manual_write_to_replicas_quorum_detach_attach.sh b/tests/queries/0_stateless/01459_manual_write_to_replicas_quorum_detach_attach.sh new file mode 100755 index 00000000000..01c88336282 --- /dev/null +++ b/tests/queries/0_stateless/01459_manual_write_to_replicas_quorum_detach_attach.sh @@ -0,0 +1,51 @@ +#!/usr/bin/env bash +# Tags: replica, no-replicated-database, no-parallel +# Tag no-replicated-database: Fails due to additional replicas or shards + +set -e + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +NUM_REPLICAS=6 + +for i in $(seq 1 $NUM_REPLICAS); do + $CLICKHOUSE_CLIENT -n -q " + DROP TABLE IF EXISTS r$i SYNC; + CREATE TABLE r$i (x UInt64) ENGINE = ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/r', 'r$i') ORDER BY x; + " +done + +valid_exceptions_to_retry='Quorum for previous write has not been satisfied yet|Another quorum insert has been already started|Unexpected logical error while adding block' + +function thread { + for x in {0..9}; do + while true; do + $CLICKHOUSE_CLIENT --query "DETACH TABLE r$1" + $CLICKHOUSE_CLIENT --query "ATTACH TABLE r$1" + $CLICKHOUSE_CLIENT --insert_quorum 3 --insert_quorum_parallel 0 --insert_keeper_fault_injection_probability=0 --query "INSERT INTO r$1 SELECT $x" 2>&1 | grep -qE "$valid_exceptions_to_retry" || break + done + done +} + +for i in $(seq 1 $NUM_REPLICAS); do + thread $i & +done + +wait + +for i in $(seq 1 $NUM_REPLICAS); do + $CLICKHOUSE_CLIENT -n -q " + SYSTEM SYNC REPLICA r$i; + SELECT count(), min(x), max(x), sum(x) FROM r$i;" +done + +for i in $(seq 1 $NUM_REPLICAS); do + # We filter out 'Removing temporary directory' on table DROP because in this test + # we constantly DETACH and ATTACH tables. So some replica can start fetching some part + # and other replica can be DETACHed during fetch. We will get unfinished tmp directory + # which should be removed in background, but it's async operation so the tmp directory can + # left on disk until table DROP. + $CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS r$i SYNC;" 2>&1 | grep -v 'Removing temporary directory' ||: +done diff --git a/tests/queries/0_stateless/01502_long_log_tinylog_deadlock_race.sh b/tests/queries/0_stateless/01502_long_log_tinylog_deadlock_race.sh index 1087a7ed96b..3cf94a0b2bd 100755 --- a/tests/queries/0_stateless/01502_long_log_tinylog_deadlock_race.sh +++ b/tests/queries/0_stateless/01502_long_log_tinylog_deadlock_race.sh @@ -88,3 +88,9 @@ test_with_engine Log $CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS t1" $CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS t2" + +# It is not enough to kill the commands running the queries, we also have to kill the queries, the server might be still running +# to avoid the following error: +# Code: 219. DB::Exception: New table appeared in database being dropped or detached. Try again. (DATABASE_NOT_EMPTY) + +$CLICKHOUSE_CLIENT -q "KILL QUERY WHERE current_database = currentDatabase() SYNC FORMAT Null" diff --git a/tests/queries/0_stateless/01509_output_format_pretty_row_numbers.reference b/tests/queries/0_stateless/01509_output_format_pretty_row_numbers.reference index 9010c371fdf..a270447ffc9 100644 --- a/tests/queries/0_stateless/01509_output_format_pretty_row_numbers.reference +++ b/tests/queries/0_stateless/01509_output_format_pretty_row_numbers.reference @@ -209,4 +209,284 @@ 7. 6 8. 7 9. 8 +10. 9 + ┏━━━┓ + ┃ a ┃ + ┡━━━┩ +1. │ 1 │ + └───┘ + ┏━━━┓ + ┃ a ┃ + ┡━━━┩ +2. │ 2 │ + └───┘ + ┌─a─┐ +1. │ 1 │ + └───┘ + ┌─a─┐ +2. │ 2 │ + └───┘ + ┌─a─┐ +1. │ 1 │ +2. │ 2 │ + └───┘ + ┏━━━┓ + ┃ a ┃ + ┡━━━┩ +1. │ 1 │ + └───┘ + ┏━━━┓ + ┃ a ┃ + ┡━━━┩ +2. │ 2 │ + └───┘ + ┌─a─┐ +1. │ 1 │ + └───┘ + ┌─a─┐ +2. │ 2 │ + └───┘ + a + +1. 1 + a + +2. 2 + a + +1. 1 + a + +2. 2 + ┏━━━━━━━━┓ + ┃ number ┃ + ┡━━━━━━━━┩ +1. │ 0 │ + └────────┘ + ┏━━━━━━━━┓ + ┃ number ┃ + ┡━━━━━━━━┩ +2. │ 1 │ + └────────┘ + ┏━━━━━━━━┓ + ┃ number ┃ + ┡━━━━━━━━┩ +3. │ 2 │ + └────────┘ + ┏━━━━━━━━┓ + ┃ number ┃ + ┡━━━━━━━━┩ +4. │ 3 │ + └────────┘ + ┏━━━━━━━━┓ + ┃ number ┃ + ┡━━━━━━━━┩ +5. │ 4 │ + └────────┘ + ┏━━━━━━━━┓ + ┃ number ┃ + ┡━━━━━━━━┩ +6. │ 5 │ + └────────┘ + ┏━━━━━━━━┓ + ┃ number ┃ + ┡━━━━━━━━┩ +7. │ 6 │ + └────────┘ + ┏━━━━━━━━┓ + ┃ number ┃ + ┡━━━━━━━━┩ +8. │ 7 │ + └────────┘ + ┏━━━━━━━━┓ + ┃ number ┃ + ┡━━━━━━━━┩ +9. │ 8 │ + └────────┘ + ┏━━━━━━━━┓ + ┃ number ┃ + ┡━━━━━━━━┩ +10. │ 9 │ + └────────┘ + ┌─number─┐ +1. │ 0 │ + └────────┘ + ┌─number─┐ +2. │ 1 │ + └────────┘ + ┌─number─┐ +3. │ 2 │ + └────────┘ + ┌─number─┐ +4. │ 3 │ + └────────┘ + ┌─number─┐ +5. │ 4 │ + └────────┘ + ┌─number─┐ +6. │ 5 │ + └────────┘ + ┌─number─┐ +7. │ 6 │ + └────────┘ + ┌─number─┐ +8. │ 7 │ + └────────┘ + ┌─number─┐ +9. │ 8 │ + └────────┘ + ┌─number─┐ +10. │ 9 │ + └────────┘ + ┌─number─┐ + 1. │ 0 │ + 2. │ 1 │ + 3. │ 2 │ + 4. │ 3 │ + 5. │ 4 │ + 6. │ 5 │ + 7. │ 6 │ + 8. │ 7 │ + 9. │ 8 │ +10. │ 9 │ + └────────┘ + ┏━━━━━━━━┓ + ┃ number ┃ + ┡━━━━━━━━┩ +1. │ 0 │ + └────────┘ + ┏━━━━━━━━┓ + ┃ number ┃ + ┡━━━━━━━━┩ +2. │ 1 │ + └────────┘ + ┏━━━━━━━━┓ + ┃ number ┃ + ┡━━━━━━━━┩ +3. │ 2 │ + └────────┘ + ┏━━━━━━━━┓ + ┃ number ┃ + ┡━━━━━━━━┩ +4. │ 3 │ + └────────┘ + ┏━━━━━━━━┓ + ┃ number ┃ + ┡━━━━━━━━┩ +5. │ 4 │ + └────────┘ + ┏━━━━━━━━┓ + ┃ number ┃ + ┡━━━━━━━━┩ +6. │ 5 │ + └────────┘ + ┏━━━━━━━━┓ + ┃ number ┃ + ┡━━━━━━━━┩ +7. │ 6 │ + └────────┘ + ┏━━━━━━━━┓ + ┃ number ┃ + ┡━━━━━━━━┩ +8. │ 7 │ + └────────┘ + ┏━━━━━━━━┓ + ┃ number ┃ + ┡━━━━━━━━┩ +9. │ 8 │ + └────────┘ + ┏━━━━━━━━┓ + ┃ number ┃ + ┡━━━━━━━━┩ +10. │ 9 │ + └────────┘ + ┌─number─┐ +1. │ 0 │ + └────────┘ + ┌─number─┐ +2. │ 1 │ + └────────┘ + ┌─number─┐ +3. │ 2 │ + └────────┘ + ┌─number─┐ +4. │ 3 │ + └────────┘ + ┌─number─┐ +5. │ 4 │ + └────────┘ + ┌─number─┐ +6. │ 5 │ + └────────┘ + ┌─number─┐ +7. │ 6 │ + └────────┘ + ┌─number─┐ +8. │ 7 │ + └────────┘ + ┌─number─┐ +9. │ 8 │ + └────────┘ + ┌─number─┐ +10. │ 9 │ + └────────┘ + number + +1. 0 + number + +2. 1 + number + +3. 2 + number + +4. 3 + number + +5. 4 + number + +6. 5 + number + +7. 6 + number + +8. 7 + number + +9. 8 + number + +10. 9 + number + +1. 0 + number + +2. 1 + number + +3. 2 + number + +4. 3 + number + +5. 4 + number + +6. 5 + number + +7. 6 + number + +8. 7 + number + +9. 8 + number + 10. 9 diff --git a/tests/queries/0_stateless/01509_output_format_pretty_row_numbers.sql b/tests/queries/0_stateless/01509_output_format_pretty_row_numbers.sql index 3c0eeaf0e53..f8ec0be74d7 100644 --- a/tests/queries/0_stateless/01509_output_format_pretty_row_numbers.sql +++ b/tests/queries/0_stateless/01509_output_format_pretty_row_numbers.sql @@ -13,3 +13,21 @@ SELECT * FROM numbers(10) FORMAT PrettyNoEscapes; SELECT * FROM numbers(10) FORMAT PrettyCompactNoEscapes; SELECT * FROM numbers(10) FORMAT PrettySpaceNoEscapes; SELECT * FROM numbers(10) FORMAT PrettySpace; + +SET max_block_size=1; + +SELECT * FROM (SELECT 1 AS a UNION ALL SELECT 2 as a) ORDER BY a FORMAT Pretty; +SELECT * FROM (SELECT 1 AS a UNION ALL SELECT 2 as a) ORDER BY a FORMAT PrettyCompact; +SELECT * FROM (SELECT 1 AS a UNION ALL SELECT 2 as a) ORDER BY a FORMAT PrettyCompactMonoBlock; +SELECT * FROM (SELECT 1 AS a UNION ALL SELECT 2 as a) ORDER BY a FORMAT PrettyNoEscapes; +SELECT * FROM (SELECT 1 AS a UNION ALL SELECT 2 as a) ORDER BY a FORMAT PrettyCompactNoEscapes; +SELECT * FROM (SELECT 1 AS a UNION ALL SELECT 2 as a) ORDER BY a FORMAT PrettySpace; +SELECT * FROM (SELECT 1 AS a UNION ALL SELECT 2 as a) ORDER BY a FORMAT PrettySpaceNoEscapes; + +SELECT * FROM numbers(10) ORDER BY number FORMAT Pretty; +SELECT * FROM numbers(10) ORDER BY number FORMAT PrettyCompact; +SELECT * FROM numbers(10) ORDER BY number FORMAT PrettyCompactMonoBlock; +SELECT * FROM numbers(10) ORDER BY number FORMAT PrettyNoEscapes; +SELECT * FROM numbers(10) ORDER BY number FORMAT PrettyCompactNoEscapes; +SELECT * FROM numbers(10) ORDER BY number FORMAT PrettySpace; +SELECT * FROM numbers(10) ORDER BY number FORMAT PrettySpaceNoEscapes; diff --git a/tests/queries/0_stateless/01543_avro_deserialization_with_lc.sh b/tests/queries/0_stateless/01543_avro_deserialization_with_lc.sh index df43fd273e8..6f48456f71b 100755 --- a/tests/queries/0_stateless/01543_avro_deserialization_with_lc.sh +++ b/tests/queries/0_stateless/01543_avro_deserialization_with_lc.sh @@ -23,6 +23,6 @@ $CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS test_01543" $CLICKHOUSE_CLIENT --query "SELECT number % 2 ? number: NULL as x from numbers(10) FORMAT Avro" > $USER_FILES_PATH/test_01543.avro -$CLICKHOUSE_CLIENT --query "SELECT * FROM file('test_01543.avro', 'Avro', 'x LowCardinality(Nullable(UInt64))')" +$CLICKHOUSE_CLIENT --query "SELECT * FROM file('test_01543.avro', 'Avro', 'x LowCardinality(Nullable(UInt64))')" --allow_suspicious_low_cardinality_types 1 rm $USER_FILES_PATH/test_01543.avro diff --git a/tests/queries/0_stateless/01553_settings_early_apply.reference b/tests/queries/0_stateless/01553_settings_early_apply.reference index e4e4738c6ab..a42a7daadaa 100644 --- a/tests/queries/0_stateless/01553_settings_early_apply.reference +++ b/tests/queries/0_stateless/01553_settings_early_apply.reference @@ -1,3 +1,39 @@ +{ + "meta": + [ + { + "name": "number", + "type": "UInt64" + } + ], + + "data": + [ + + ], + + "rows": 0 +} +number +UInt64 +"number" +"UInt64" +["number"] +["UInt64"] + + + + + + number + UInt64 + + + + + + 0 + number 0 diff --git a/tests/queries/0_stateless/01553_settings_early_apply.sql b/tests/queries/0_stateless/01553_settings_early_apply.sql index 13bb2d30d4d..e217f20a926 100644 --- a/tests/queries/0_stateless/01553_settings_early_apply.sql +++ b/tests/queries/0_stateless/01553_settings_early_apply.sql @@ -1,3 +1,5 @@ +set output_format_write_statistics=0; + select * from numbers(100) settings max_result_rows = 1; -- { serverError 396 } select * from numbers(100) FORMAT JSON settings max_result_rows = 1; -- { serverError 396 } select * from numbers(100) FORMAT TSVWithNamesAndTypes settings max_result_rows = 1; -- { serverError 396 } diff --git a/tests/queries/0_stateless/01606_git_import.sh b/tests/queries/0_stateless/01606_git_import.sh index 585b39e21ab..c9aa2c7d82e 100755 --- a/tests/queries/0_stateless/01606_git_import.sh +++ b/tests/queries/0_stateless/01606_git_import.sh @@ -1,5 +1,6 @@ #!/usr/bin/env bash -# Tags: no-debug +# Tags: no-debug, no-tsan, no-msan, no-ubsan, no-asan +# ^ because inserting a 50 MB file can be slow. CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh @@ -126,4 +127,3 @@ DROP TABLE commits; DROP TABLE file_changes; DROP TABLE line_changes; " - diff --git a/tests/queries/0_stateless/01674_filter_by_uint8.reference b/tests/queries/0_stateless/01674_filter_by_uint8.reference index 6b522898280..435423ba455 100644 --- a/tests/queries/0_stateless/01674_filter_by_uint8.reference +++ b/tests/queries/0_stateless/01674_filter_by_uint8.reference @@ -2,7 +2,12 @@ 0 255 1 ['foo','bar'] 1 1 -2 ['foo','bar'] 2 1 -3 ['foo','bar'] 3 1 -4 ['foo','bar'] 4 1 -5 ['foo','bar'] 5 1 +2 ['foo','bar'] 2 2 +3 ['foo','bar'] 3 3 +4 ['foo','bar'] 4 4 +5 ['foo','bar'] 5 5 +1 ['foo','bar'] 1 1 +2 ['foo','bar'] 2 2 +3 ['foo','bar'] 3 3 +4 ['foo','bar'] 4 4 +5 ['foo','bar'] 5 5 diff --git a/tests/queries/0_stateless/01674_filter_by_uint8.sql b/tests/queries/0_stateless/01674_filter_by_uint8.sql index 960153d9c5a..0bf11cea59b 100644 --- a/tests/queries/0_stateless/01674_filter_by_uint8.sql +++ b/tests/queries/0_stateless/01674_filter_by_uint8.sql @@ -10,5 +10,6 @@ ENGINE = MergeTree ORDER BY u; INSERT INTO t_filter SELECT toString(number), ['foo', 'bar'], number, toUInt8(number) FROM numbers(1000); SELECT * FROM t_filter WHERE f LIMIT 5; +SELECT * FROM t_filter WHERE f != 0 LIMIT 5; DROP TABLE IF EXISTS t_filter; diff --git a/tests/queries/0_stateless/01823_explain_json.sh b/tests/queries/0_stateless/01823_explain_json.sh index 4d7aa5f88d6..7868bc0cc78 100755 --- a/tests/queries/0_stateless/01823_explain_json.sh +++ b/tests/queries/0_stateless/01823_explain_json.sh @@ -7,7 +7,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) $CLICKHOUSE_CLIENT -q "EXPLAIN json = 1, description = 0 SELECT 1 UNION ALL SELECT 2 FORMAT TSVRaw" echo "--------" -$CLICKHOUSE_CLIENT -q "explain json = 1, description = 0, header = 1 select 1, 2 + dummy FORMAT TSVRaw" | grep Header -m 1 -A 8 +$CLICKHOUSE_CLIENT -q "explain json = 1, description = 0, header = 1 select 1, 2 + dummy FORMAT TSVRaw" 2> /dev/null | grep Header -m 1 -A 8 echo "--------" $CLICKHOUSE_CLIENT -q "EXPLAIN json = 1, actions = 1, header = 1, description = 0 diff --git a/tests/queries/0_stateless/01825_type_json_multiple_files.sh b/tests/queries/0_stateless/01825_type_json_multiple_files.sh index 9120568bb17..91e7ffb5edf 100755 --- a/tests/queries/0_stateless/01825_type_json_multiple_files.sh +++ b/tests/queries/0_stateless/01825_type_json_multiple_files.sh @@ -17,7 +17,7 @@ done ${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS t_json_files" ${CLICKHOUSE_CLIENT} -q "CREATE TABLE t_json_files (file String, data JSON) ENGINE = MergeTree ORDER BY tuple()" --allow_experimental_object_type 1 -${CLICKHOUSE_CLIENT} -q "INSERT INTO t_json_files SELECT _file, data FROM file('01825_file_*.json', 'JSONAsObject', 'data JSON')" +${CLICKHOUSE_CLIENT} -q "INSERT INTO t_json_files SELECT _file, data FROM file('01825_file_*.json', 'JSONAsObject', 'data JSON')" --allow_experimental_object_type 1 ${CLICKHOUSE_CLIENT} -q "SELECT data FROM t_json_files ORDER BY file FORMAT JSONEachRow" --output_format_json_named_tuples_as_objects 1 ${CLICKHOUSE_CLIENT} -q "SELECT toTypeName(data) FROM t_json_files LIMIT 1" @@ -26,7 +26,7 @@ ${CLICKHOUSE_CLIENT} -q "TRUNCATE TABLE IF EXISTS t_json_files" ${CLICKHOUSE_CLIENT} -q "INSERT INTO t_json_files \ SELECT _file, data FROM file('01825_file_*.json', 'JSONAsObject', 'data JSON') \ - ORDER BY _file LIMIT 3" --max_threads 1 --min_insert_block_size_rows 1 --max_insert_block_size 1 --max_block_size 1 + ORDER BY _file LIMIT 3" --max_threads 1 --min_insert_block_size_rows 1 --max_insert_block_size 1 --max_block_size 1 --allow_experimental_object_type 1 ${CLICKHOUSE_CLIENT} -q "SELECT data FROM t_json_files ORDER BY file FORMAT JSONEachRow" --output_format_json_named_tuples_as_objects 1 ${CLICKHOUSE_CLIENT} -q "SELECT toTypeName(data) FROM t_json_files LIMIT 1" @@ -35,7 +35,7 @@ ${CLICKHOUSE_CLIENT} -q "TRUNCATE TABLE IF EXISTS t_json_files" ${CLICKHOUSE_CLIENT} -q "INSERT INTO t_json_files \ SELECT _file, data FROM file('01825_file_*.json', 'JSONAsObject', 'data JSON') \ - WHERE _file IN ('01825_file_1.json', '01825_file_3.json')" + WHERE _file IN ('01825_file_1.json', '01825_file_3.json')" --allow_experimental_object_type 1 ${CLICKHOUSE_CLIENT} -q "SELECT data FROM t_json_files ORDER BY file FORMAT JSONEachRow" --output_format_json_named_tuples_as_objects 1 ${CLICKHOUSE_CLIENT} -q "SELECT toTypeName(data) FROM t_json_files LIMIT 1" diff --git a/tests/queries/0_stateless/01825_type_json_order_by.sql b/tests/queries/0_stateless/01825_type_json_order_by.sql index 1d5768fe0b4..98b2a6105d9 100644 --- a/tests/queries/0_stateless/01825_type_json_order_by.sql +++ b/tests/queries/0_stateless/01825_type_json_order_by.sql @@ -1,5 +1,6 @@ -- Tags: no-fasttest +SET allow_experimental_object_type = 1; SELECT dummy FROM system.one ORDER BY materialize('{"k":"v"}'::JSON); SELECT dummy FROM system.one ORDER BY materialize('{"k":"v"}'::JSON), dummy; SELECT materialize('{"k":"v"}'::JSON) SETTINGS extremes = 1; diff --git a/tests/queries/0_stateless/01906_lc_in_bug.reference b/tests/queries/0_stateless/01906_lc_in_bug.reference index 9fe1650abf0..adce940e346 100644 --- a/tests/queries/0_stateless/01906_lc_in_bug.reference +++ b/tests/queries/0_stateless/01906_lc_in_bug.reference @@ -1,2 +1,3 @@ 1 0 3 1 +0 diff --git a/tests/queries/0_stateless/01906_lc_in_bug.sql b/tests/queries/0_stateless/01906_lc_in_bug.sql index f8f41da31ae..581053e14e1 100644 --- a/tests/queries/0_stateless/01906_lc_in_bug.sql +++ b/tests/queries/0_stateless/01906_lc_in_bug.sql @@ -6,3 +6,8 @@ insert into tab values ('a'), ('bb'), ('a'), ('cc'); select count() as c, x in ('a', 'bb') as g from tab group by g order by c; drop table if exists tab; + +-- https://github.com/ClickHouse/ClickHouse/issues/44503 +CREATE TABLE test(key Int32) ENGINE = MergeTree ORDER BY (key); +insert into test select intDiv(number,100) from numbers(10000000); +SELECT COUNT() FROM test WHERE key <= 100000 AND (NOT (toLowCardinality('') IN (SELECT ''))); diff --git a/tests/queries/0_stateless/01956_fuse_quantile_optimization.reference b/tests/queries/0_stateless/01956_fuse_quantile_optimization.reference deleted file mode 100644 index dddab828a25..00000000000 --- a/tests/queries/0_stateless/01956_fuse_quantile_optimization.reference +++ /dev/null @@ -1,99 +0,0 @@ -2016-06-15 23:00:00 2016-06-15 23:00:00 -2016-06-15 23:00:00 2016-06-15 23:00:00 -2016-06-15 23:00:00 2016-06-15 23:00:00 -2016-06-15 23:00:00 2016-06-15 23:00:00 2016-06-15 23:00:00 -30000 30000 30000 -30000 30000 30000 -2016-06-15 23:00:16 2016-06-15 23:00:16 2016-06-15 23:00:16 -2016-06-15 23:00:16 2016-06-15 23:00:16 2016-06-15 23:00:16 -2016-04-02 17:23:12 2016-04-02 17:23:12 2016-04-02 17:23:12 ----------After fuse result----------- -quantile: -SELECT - quantiles(0.2, 0.3)(d)[1], - quantiles(0.2, 0.3)(d)[2] -FROM datetime -2016-06-15 23:00:00 2016-06-15 23:00:00 -quantileDeterministic: -SELECT - quantilesDeterministic(0.2, 0.5)(d, 1)[1], - quantilesDeterministic(0.2, 0.5)(d, 1)[2] -FROM datetime -2016-06-15 23:00:00 2016-06-15 23:00:00 -quantileExact: -SELECT - quantilesExact(0.2, 0.5)(d)[1], - quantilesExact(0.2, 0.5)(d)[2] -FROM datetime -2016-06-15 23:00:00 2016-06-15 23:00:00 -quantileExactWeighted: -SELECT - quantilesExactWeighted(0.2, 0.4)(d, 1)[1], - quantilesExactWeighted(0.2, 0.4)(d, 1)[2], - quantileExactWeighted(0.3)(d, 2) -FROM datetime -2016-06-15 23:00:00 2016-06-15 23:00:00 2016-06-15 23:00:00 -quantileTiming: -SELECT - quantilesTiming(0.2, 0.3)(d)[1], - quantilesTiming(0.2, 0.3)(d)[2], - quantileTiming(0.2)(d + 1) -FROM datetime -30000 30000 30000 -quantileTimingWeighted: -SELECT - quantilesTimingWeighted(0.2, 0.3)(d, 1)[1], - quantilesTimingWeighted(0.2, 0.3)(d, 1)[2], - quantileTimingWeighted(0.2)(d, 2) -FROM datetime -30000 30000 30000 -quantileTDigest: -SELECT - quantilesTDigest(0.2, 0.3)(d)[1], - quantilesTDigest(0.2, 0.3)(d)[2], - quantileTDigest(0.2)(d + 1) -FROM datetime -2016-06-15 23:00:16 2016-06-15 23:00:16 2016-06-15 23:00:16 -quantileTDigestWeighted: -SELECT - quantilesTDigestWeighted(0.2, 0.3)(d, 1)[1], - quantilesTDigestWeighted(0.2, 0.3)(d, 1)[2], - quantileTDigestWeighted(0.4)(d, 2) -FROM datetime -2016-06-15 23:00:16 2016-06-15 23:00:16 2016-06-15 23:00:16 -quantileBFloat16: -SELECT - quantilesBFloat16(0.2, 0.3)(d)[1], - quantilesBFloat16(0.2, 0.3)(d)[2], - quantileBFloat16(0.4)(d + 1) -FROM datetime -2016-04-02 17:23:12 2016-04-02 17:23:12 2016-04-02 17:23:12 -quantileBFloat16Weighted: -SELECT - quantilesBFloat16Weighted(0.2, 0.3)(d, 1)[1], - quantilesBFloat16Weighted(0.2, 0.3)(d, 1)[2], - quantileBFloat16Weighted(0.2)(d, 2) -FROM datetime -2016-04-02 17:23:12 2016-04-02 17:23:12 2016-04-02 17:23:12 -SELECT - quantiles(0.2, 0.3, 0.2)(d)[1] AS k, - quantiles(0.2, 0.3, 0.2)(d)[2] -FROM datetime -ORDER BY quantiles(0.2, 0.3, 0.2)(d)[3] ASC -0 4 7.2 7.6 -1 5 8.2 8.6 -SELECT - b, - quantiles(0.5, 0.9, 0.95)(x)[1] AS a, - quantiles(0.5, 0.9, 0.95)(x)[2] AS y, - quantiles(0.5, 0.9, 0.95)(x)[3] -FROM -( - SELECT - number AS x, - number % 2 AS b - FROM numbers(10) -) -GROUP BY b -ORDER BY b ASC -1 1 1 diff --git a/tests/queries/0_stateless/01956_fuse_quantile_optimization.sql b/tests/queries/0_stateless/01956_fuse_quantile_optimization.sql deleted file mode 100644 index 1f08439c0b6..00000000000 --- a/tests/queries/0_stateless/01956_fuse_quantile_optimization.sql +++ /dev/null @@ -1,76 +0,0 @@ -DROP TABLE IF EXISTS datetime; -CREATE TABLE datetime (d DateTime('UTC')) ENGINE = Memory; -INSERT INTO datetime(d) VALUES(toDateTime('2016-06-15 23:00:00', 'UTC')) - -SET optimize_syntax_fuse_functions = true; - -SELECT quantile(0.2)(d), quantile(0.3)(d) FROM datetime; -SELECT quantileDeterministic(0.2)(d, 1), quantileDeterministic(0.5)(d, 1) FROM datetime; -SELECT quantileExact(0.2)(d), quantileExact(0.5)(d) FROM datetime; -SELECT quantileExactWeighted(0.2)(d, 1), quantileExactWeighted(0.4)(d, 1), quantileExactWeighted(0.3)(d, 2) FROM datetime; -SELECT quantileTiming(0.2)(d), quantileTiming(0.3)(d), quantileTiming(0.2)(d+1) FROM datetime; -SELECT quantileTimingWeighted(0.2)(d, 1), quantileTimingWeighted(0.3)(d, 1), quantileTimingWeighted(0.2)(d, 2) FROM datetime; -SELECT quantileTDigest(0.2)(d), quantileTDigest(0.3)(d), quantileTDigest(0.2)(d + 1) FROM datetime; -SELECT quantileTDigestWeighted(0.2)(d, 1), quantileTDigestWeighted(0.3)(d, 1), quantileTDigestWeighted(0.4)(d, 2) FROM datetime; -SELECT quantileBFloat16(0.2)(d), quantileBFloat16(0.3)(d), quantileBFloat16(0.4)(d + 1) FROM datetime; - - -SELECT '---------After fuse result-----------'; -SELECT 'quantile:'; -EXPLAIN SYNTAX SELECT quantile(0.2)(d), quantile(0.3)(d) FROM datetime; -SELECT quantile(0.2)(d), quantile(0.3)(d) FROM datetime; - -SELECT 'quantileDeterministic:'; -EXPLAIN SYNTAX SELECT quantileDeterministic(0.2)(d, 1), quantileDeterministic(0.5)(d, 1) FROM datetime; -SELECT quantileDeterministic(0.2)(d, 1), quantileDeterministic(0.5)(d, 1) FROM datetime; - -SELECT 'quantileExact:'; -EXPLAIN SYNTAX SELECT quantileExact(0.2)(d), quantileExact(0.5)(d) FROM datetime; -SELECT quantileExact(0.2)(d), quantileExact(0.5)(d) FROM datetime; - -SELECT 'quantileExactWeighted:'; -EXPLAIN SYNTAX SELECT quantileExactWeighted(0.2)(d, 1), quantileExactWeighted(0.4)(d, 1), quantileExactWeighted(0.3)(d, 2) FROM datetime; -SELECT quantileExactWeighted(0.2)(d, 1), quantileExactWeighted(0.4)(d, 1), quantileExactWeighted(0.3)(d, 2) FROM datetime; - -SELECT 'quantileTiming:'; -EXPLAIN SYNTAX SELECT quantileTiming(0.2)(d), quantileTiming(0.3)(d), quantileTiming(0.2)(d+1) FROM datetime; -SELECT quantileTiming(0.2)(d), quantileTiming(0.3)(d), quantileTiming(0.2)(d+1) FROM datetime; - -SELECT 'quantileTimingWeighted:'; -EXPLAIN SYNTAX SELECT quantileTimingWeighted(0.2)(d, 1), quantileTimingWeighted(0.3)(d, 1), quantileTimingWeighted(0.2)(d, 2) FROM datetime; -SELECT quantileTimingWeighted(0.2)(d, 1), quantileTimingWeighted(0.3)(d, 1), quantileTimingWeighted(0.2)(d, 2) FROM datetime; - -SELECT 'quantileTDigest:'; -EXPLAIN SYNTAX SELECT quantileTDigest(0.2)(d), quantileTDigest(0.3)(d), quantileTDigest(0.2)(d + 1) FROM datetime; -SELECT quantileTDigest(0.2)(d), quantileTDigest(0.3)(d), quantileTDigest(0.2)(d + 1) FROM datetime; - -SELECT 'quantileTDigestWeighted:'; -EXPLAIN SYNTAX SELECT quantileTDigestWeighted(0.2)(d, 1), quantileTDigestWeighted(0.3)(d, 1), quantileTDigestWeighted(0.4)(d, 2) FROM datetime; -SELECT quantileTDigestWeighted(0.2)(d, 1), quantileTDigestWeighted(0.3)(d, 1), quantileTDigestWeighted(0.4)(d, 2) FROM datetime; - -SELECT 'quantileBFloat16:'; -EXPLAIN SYNTAX SELECT quantileBFloat16(0.2)(d), quantileBFloat16(0.3)(d), quantileBFloat16(0.4)(d + 1) FROM datetime; -SELECT quantileBFloat16(0.2)(d), quantileBFloat16(0.3)(d), quantileBFloat16(0.4)(d + 1) FROM datetime; - -SELECT 'quantileBFloat16Weighted:'; -EXPLAIN SYNTAX SELECT quantileBFloat16Weighted(0.2)(d, 1), quantileBFloat16Weighted(0.3)(d, 1), quantileBFloat16Weighted(0.2)(d, 2) FROM datetime; -SELECT quantileBFloat16Weighted(0.2)(d, 1), quantileBFloat16Weighted(0.3)(d, 1), quantileBFloat16Weighted(0.2)(d, 2) FROM datetime; - -EXPLAIN SYNTAX SELECT quantile(0.2)(d) as k, quantile(0.3)(d) FROM datetime order by quantile(0.2)(d); - -SELECT b, quantile(0.5)(x) as a, quantile(0.9)(x) as y, quantile(0.95)(x) FROM (select number as x, number % 2 as b from numbers(10)) group by b order by b; -EXPLAIN SYNTAX SELECT b, quantile(0.5)(x) as a, quantile(0.9)(x) as y, quantile(0.95)(x) FROM (select number as x, number % 2 as b from numbers(10)) group by b order by b; - --- fuzzer -SELECT quantileDeterministic(0.99)(1023) FROM datetime FORMAT Null; -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } -SELECT quantileTiming(0.5)(NULL, NULL, quantileTiming(-inf)(NULL), NULL) FROM datetime FORMAT Null; -- { serverError ILLEGAL_AGGREGATION } -SELECT quantileTDigest(NULL)(NULL, quantileTDigest(3.14)(NULL, d + NULL), 2.), NULL FORMAT Null; -- { serverError ILLEGAL_AGGREGATION } -SELECT quantile(1, 0.3)(d), quantile(0.3)(d) FROM datetime; -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } -SELECT quantile(quantileDeterministic('', '2.47')('0.02', '0.2', NULL), 0.9)(d), quantile(0.3)(d) FROM datetime; -- { serverError ILLEGAL_AGGREGATION } -SELECT quantileTimingWeighted([[[[['-214748364.8'], NULL]], [[[quantileTimingWeighted([[[[['-214748364.8'], NULL], '-922337203.6854775808'], [[['-214748364.7']]], NULL]])([NULL], NULL), '-214748364.7']]], NULL]])([NULL], NULL); -- { serverError ILLEGAL_AGGREGATION } -SELECT quantileTimingWeighted([quantileTimingWeighted(0.5)(1, 1)])(1, 1); -- { serverError ILLEGAL_AGGREGATION } - -DROP TABLE datetime; - -SET optimize_syntax_fuse_functions = 1; -SELECT quantile(1 AS a), quantile(a AS b), quantile(b AS c); diff --git a/tests/queries/0_stateless/02000_join_on_const.sql b/tests/queries/0_stateless/02000_join_on_const.sql index 7496e754a0a..cab5a838250 100644 --- a/tests/queries/0_stateless/02000_join_on_const.sql +++ b/tests/queries/0_stateless/02000_join_on_const.sql @@ -43,14 +43,29 @@ SELECT * FROM t1 RIGHT JOIN t2 ON NULL ORDER BY t1.id NULLS FIRST, t2.id SETTING SELECT '- full -'; SELECT * FROM t1 FULL JOIN t2 ON NULL ORDER BY t1.id NULLS FIRST, t2.id SETTINGS join_use_nulls = 1; -SELECT * FROM t1 JOIN t2 ON 1 = 1 SETTINGS join_algorithm = 'full_sorting_merge'; -- { serverError 48 } -SELECT * FROM t1 JOIN t2 ON 1 = 1 SETTINGS join_algorithm = 'partial_merge'; -- { serverError 48 } -SELECT * FROM t1 JOIN t2 ON 1 = 1 SETTINGS join_algorithm = 'auto'; -- { serverError 48 } -SELECT * FROM t1 JOIN t2 ON NULL SETTINGS join_algorithm = 'full_sorting_merge'; -- { serverError 48 } -SELECT * FROM t1 JOIN t2 ON NULL SETTINGS join_algorithm = 'partial_merge'; -- { serverError 48 } -SELECT * FROM t1 LEFT JOIN t2 ON NULL SETTINGS join_algorithm = 'partial_merge'; -- { serverError 48 } -SELECT * FROM t1 RIGHT JOIN t2 ON NULL SETTINGS join_algorithm = 'auto'; -- { serverError 48 } -SELECT * FROM t1 FULL JOIN t2 ON NULL SETTINGS join_algorithm = 'partial_merge'; -- { serverError 48 } +-- in this cases we have AMBIGUOUS_COLUMN_NAME instead of INVALID_JOIN_ON_EXPRESSION +-- because there's some function in ON expression is not constant itself (result is constant) +SELECT * FROM t1 JOIN t2 ON 1 = 1 SETTINGS join_algorithm = 'full_sorting_merge'; -- { serverError AMBIGUOUS_COLUMN_NAME } +SELECT * FROM t1 JOIN t2 ON 1 = 1 SETTINGS join_algorithm = 'partial_merge'; -- { serverError AMBIGUOUS_COLUMN_NAME } +SELECT * FROM t1 JOIN t2 ON 1 = 1 SETTINGS join_algorithm = 'auto'; -- { serverError AMBIGUOUS_COLUMN_NAME } + +SELECT * FROM t1 JOIN t2 ON NULL SETTINGS join_algorithm = 'full_sorting_merge'; -- { serverError INVALID_JOIN_ON_EXPRESSION } +SELECT * FROM t1 JOIN t2 ON NULL SETTINGS join_algorithm = 'partial_merge'; -- { serverError INVALID_JOIN_ON_EXPRESSION } +SELECT * FROM t1 LEFT JOIN t2 ON NULL SETTINGS join_algorithm = 'partial_merge'; -- { serverError INVALID_JOIN_ON_EXPRESSION } +SELECT * FROM t1 RIGHT JOIN t2 ON NULL SETTINGS join_algorithm = 'auto'; -- { serverError INVALID_JOIN_ON_EXPRESSION } +SELECT * FROM t1 FULL JOIN t2 ON NULL SETTINGS join_algorithm = 'partial_merge'; -- { serverError INVALID_JOIN_ON_EXPRESSION } + +-- mixing of constant and non-constant expressions in ON is not allowed +SELECT * FROM t1 JOIN t2 ON t1.id = t2.id AND 1 == 1; -- { serverError AMBIGUOUS_COLUMN_NAME } +SELECT * FROM t1 JOIN t2 ON t1.id = t2.id AND 1 == 2; -- { serverError AMBIGUOUS_COLUMN_NAME } + +SELECT * FROM t1 JOIN t2 ON t1.id = t2.id AND 1 != 1; -- { serverError INVALID_JOIN_ON_EXPRESSION } +SELECT * FROM t1 JOIN t2 ON t1.id = t2.id AND NULL; -- { serverError INVALID_JOIN_ON_EXPRESSION } +SELECT * FROM t1 JOIN t2 ON t1.id = t2.id AND 'aaa'; -- { serverError INVALID_JOIN_ON_EXPRESSION } +SELECT * FROM t1 JOIN t2 ON 'aaa'; -- { serverError INVALID_JOIN_ON_EXPRESSION } + +SELECT * FROM t1 JOIN t2 ON t1.id = t2.id AND 0; -- { serverError INVALID_JOIN_ON_EXPRESSION } +SELECT * FROM t1 JOIN t2 ON t1.id = t2.id AND 1; -- { serverError INVALID_JOIN_ON_EXPRESSION } DROP TABLE IF EXISTS t1; DROP TABLE IF EXISTS t2; diff --git a/tests/queries/0_stateless/02015_async_inserts_2.sh b/tests/queries/0_stateless/02015_async_inserts_2.sh index fd20f846897..8934dcc66e0 100755 --- a/tests/queries/0_stateless/02015_async_inserts_2.sh +++ b/tests/queries/0_stateless/02015_async_inserts_2.sh @@ -5,10 +5,10 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -url="${CLICKHOUSE_URL}&async_insert=1&wait_for_async_insert=1&max_insert_threads=0&group_by_two_level_threshold=100000&group_by_two_level_threshold_bytes=50000000&distributed_aggregation_memory_efficient=1&fsync_metadata=1&priority=1&output_format_parallel_formatting=0&input_format_parallel_parsing=0&min_chunk_bytes_for_parallel_parsing=4031398&max_read_buffer_size=554729&prefer_localhost_replica=0&max_block_size=51672&max_threads=20" +url="${CLICKHOUSE_URL}&async_insert=1&wait_for_async_insert=1&async_insert_busy_timeout_ms=600000&async_insert_max_query_number=3" -${CLICKHOUSE_CLIENT} --max_insert_threads=0 --group_by_two_level_threshold=100000 --group_by_two_level_threshold_bytes=50000000 --distributed_aggregation_memory_efficient=1 --fsync_metadata=1 --priority=1 --output_format_parallel_formatting=0 --input_format_parallel_parsing=0 --min_chunk_bytes_for_parallel_parsing=4031398 --max_read_buffer_size=554729 --prefer_localhost_replica=0 --max_block_size=51672 --max_threads=20 -q "DROP TABLE IF EXISTS async_inserts" -${CLICKHOUSE_CLIENT} --max_insert_threads=0 --group_by_two_level_threshold=100000 --group_by_two_level_threshold_bytes=50000000 --distributed_aggregation_memory_efficient=1 --fsync_metadata=1 --priority=1 --output_format_parallel_formatting=0 --input_format_parallel_parsing=0 --min_chunk_bytes_for_parallel_parsing=4031398 --max_read_buffer_size=554729 --prefer_localhost_replica=0 --max_block_size=51672 --max_threads=20 -q "CREATE TABLE async_inserts (id UInt32, s String) ENGINE = MergeTree ORDER BY id" +${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS async_inserts" +${CLICKHOUSE_CLIENT} -q "CREATE TABLE async_inserts (id UInt32, s String) ENGINE = MergeTree ORDER BY id" ${CLICKHOUSE_CURL} -sS "$url" -d 'INSERT INTO async_inserts FORMAT CSV 1,"a" @@ -23,7 +23,7 @@ ${CLICKHOUSE_CURL} -sS "$url" -d 'INSERT INTO async_inserts FORMAT CSV wait -${CLICKHOUSE_CLIENT} --max_insert_threads=0 --group_by_two_level_threshold=100000 --group_by_two_level_threshold_bytes=50000000 --distributed_aggregation_memory_efficient=1 --fsync_metadata=1 --priority=1 --output_format_parallel_formatting=0 --input_format_parallel_parsing=0 --min_chunk_bytes_for_parallel_parsing=4031398 --max_read_buffer_size=554729 --prefer_localhost_replica=0 --max_block_size=51672 --max_threads=20 -q "SELECT * FROM async_inserts ORDER BY id" -${CLICKHOUSE_CLIENT} --max_insert_threads=0 --group_by_two_level_threshold=100000 --group_by_two_level_threshold_bytes=50000000 --distributed_aggregation_memory_efficient=1 --fsync_metadata=1 --priority=1 --output_format_parallel_formatting=0 --input_format_parallel_parsing=0 --min_chunk_bytes_for_parallel_parsing=4031398 --max_read_buffer_size=554729 --prefer_localhost_replica=0 --max_block_size=51672 --max_threads=20 -q "SELECT name, rows, level FROM system.parts WHERE table = 'async_inserts' AND database = '$CLICKHOUSE_DATABASE' ORDER BY name" +${CLICKHOUSE_CLIENT} -q "SELECT * FROM async_inserts ORDER BY id" +${CLICKHOUSE_CLIENT} -q "SELECT name, rows, level FROM system.parts WHERE table = 'async_inserts' AND database = '$CLICKHOUSE_DATABASE' ORDER BY name" -${CLICKHOUSE_CLIENT} --max_insert_threads=0 --group_by_two_level_threshold=100000 --group_by_two_level_threshold_bytes=50000000 --distributed_aggregation_memory_efficient=1 --fsync_metadata=1 --priority=1 --output_format_parallel_formatting=0 --input_format_parallel_parsing=0 --min_chunk_bytes_for_parallel_parsing=4031398 --max_read_buffer_size=554729 --prefer_localhost_replica=0 --max_block_size=51672 --max_threads=20 -q "DROP TABLE async_inserts" +${CLICKHOUSE_CLIENT} -q "DROP TABLE async_inserts" diff --git a/tests/queries/0_stateless/02020_exponential_smoothing.reference b/tests/queries/0_stateless/02020_exponential_smoothing.reference index 95f5cb0c310..1e98bca8a23 100644 --- a/tests/queries/0_stateless/02020_exponential_smoothing.reference +++ b/tests/queries/0_stateless/02020_exponential_smoothing.reference @@ -31,14 +31,14 @@ exponentialMovingAverage 9 9 8.002 1 0 0.067 ███▎ 0 1 0.062 ███ -0 2 0.058 ██▊ +0 2 0.058 ██▉ 0 3 0.054 ██▋ 0 4 0.051 ██▌ 0 5 0.047 ██▎ 0 6 0.044 ██▏ 0 7 0.041 ██ -0 8 0.038 █▊ -0 9 0.036 █▋ +0 8 0.038 █▉ +0 9 0.036 █▊ 0 10 0.033 █▋ 0 11 0.031 █▌ 0 12 0.029 █▍ @@ -47,16 +47,16 @@ exponentialMovingAverage 0 15 0.024 █▏ 0 16 0.022 █ 0 17 0.021 █ -0 18 0.019 ▊ -0 19 0.018 ▊ -0 20 0.017 ▋ -0 21 0.016 ▋ +0 18 0.019 ▉ +0 19 0.018 ▉ +0 20 0.017 ▊ +0 21 0.016 ▊ 0 22 0.015 ▋ 0 23 0.014 ▋ 0 24 0.013 ▋ -1 25 0.079 ███▊ +1 25 0.079 ███▉ 1 26 0.14 ███████ -1 27 0.198 █████████▊ +1 27 0.198 █████████▉ 1 28 0.252 ████████████▌ 1 29 0.302 ███████████████ 1 30 0.349 █████████████████▍ @@ -64,68 +64,68 @@ exponentialMovingAverage 1 32 0.433 █████████████████████▋ 1 33 0.471 ███████████████████████▌ 1 34 0.506 █████████████████████████▎ -1 35 0.539 ██████████████████████████▊ +1 35 0.539 ██████████████████████████▉ 1 36 0.57 ████████████████████████████▌ -1 37 0.599 █████████████████████████████▊ +1 37 0.599 █████████████████████████████▉ 1 38 0.626 ███████████████████████████████▎ 1 39 0.651 ████████████████████████████████▌ 1 40 0.674 █████████████████████████████████▋ -1 41 0.696 ██████████████████████████████████▋ -1 42 0.716 ███████████████████████████████████▋ -1 43 0.735 ████████████████████████████████████▋ +1 41 0.696 ██████████████████████████████████▊ +1 42 0.716 ███████████████████████████████████▊ +1 43 0.735 ████████████████████████████████████▊ 1 44 0.753 █████████████████████████████████████▋ 1 45 0.77 ██████████████████████████████████████▍ 1 46 0.785 ███████████████████████████████████████▎ -1 47 0.8 ███████████████████████████████████████▊ +1 47 0.8 ███████████████████████████████████████▉ 1 48 0.813 ████████████████████████████████████████▋ 1 49 0.825 █████████████████████████████████████████▎ 1 0 0.5 █████████████████████████ 0 1 0.25 ████████████▌ 0 2 0.125 ██████▎ -0 3 0.062 ███ +0 3 0.062 ███▏ 0 4 0.031 █▌ -1 5 0.516 █████████████████████████▋ -0 6 0.258 ████████████▊ +1 5 0.516 █████████████████████████▊ +0 6 0.258 ████████████▉ 0 7 0.129 ██████▍ 0 8 0.064 ███▏ 0 9 0.032 █▌ -1 10 0.516 █████████████████████████▋ -0 11 0.258 ████████████▊ +1 10 0.516 █████████████████████████▊ +0 11 0.258 ████████████▉ 0 12 0.129 ██████▍ 0 13 0.065 ███▏ 0 14 0.032 █▌ -1 15 0.516 █████████████████████████▋ -0 16 0.258 ████████████▊ +1 15 0.516 █████████████████████████▊ +0 16 0.258 ████████████▉ 0 17 0.129 ██████▍ 0 18 0.065 ███▏ 0 19 0.032 █▌ -1 20 0.516 █████████████████████████▋ -0 21 0.258 ████████████▊ +1 20 0.516 █████████████████████████▊ +0 21 0.258 ████████████▉ 0 22 0.129 ██████▍ 0 23 0.065 ███▏ 0 24 0.032 █▌ -1 25 0.516 █████████████████████████▋ -0 26 0.258 ████████████▊ +1 25 0.516 █████████████████████████▊ +0 26 0.258 ████████████▉ 0 27 0.129 ██████▍ 0 28 0.065 ███▏ 0 29 0.032 █▌ -1 30 0.516 █████████████████████████▋ -0 31 0.258 ████████████▊ +1 30 0.516 █████████████████████████▊ +0 31 0.258 ████████████▉ 0 32 0.129 ██████▍ 0 33 0.065 ███▏ 0 34 0.032 █▌ -1 35 0.516 █████████████████████████▋ -0 36 0.258 ████████████▊ +1 35 0.516 █████████████████████████▊ +0 36 0.258 ████████████▉ 0 37 0.129 ██████▍ 0 38 0.065 ███▏ 0 39 0.032 █▌ -1 40 0.516 █████████████████████████▋ -0 41 0.258 ████████████▊ +1 40 0.516 █████████████████████████▊ +0 41 0.258 ████████████▉ 0 42 0.129 ██████▍ 0 43 0.065 ███▏ 0 44 0.032 █▌ -1 45 0.516 █████████████████████████▋ -0 46 0.258 ████████████▊ +1 45 0.516 █████████████████████████▊ +0 46 0.258 ████████████▉ 0 47 0.129 ██████▍ 0 48 0.065 ███▏ 0 49 0.032 █▌ @@ -170,15 +170,15 @@ exponentialTimeDecayedSum 0 7 0.497 ██▍ 0 8 0.449 ██▏ 0 9 0.407 ██ -0 10 0.368 █▋ +0 10 0.368 █▊ 0 11 0.333 █▋ 0 12 0.301 █▌ 0 13 0.273 █▎ 0 14 0.247 █▏ 0 15 0.223 █ 0 16 0.202 █ -0 17 0.183 ▊ -0 18 0.165 ▋ +0 17 0.183 ▉ +0 18 0.165 ▊ 0 19 0.15 ▋ 0 20 0.135 ▋ 0 21 0.122 ▌ @@ -186,80 +186,80 @@ exponentialTimeDecayedSum 0 23 0.1 ▌ 0 24 0.091 ▍ 1 25 1.082 █████▍ -1 26 1.979 █████████▊ -1 27 2.791 █████████████▊ +1 26 1.979 █████████▉ +1 27 2.791 █████████████▉ 1 28 3.525 █████████████████▋ -1 29 4.19 ████████████████████▊ -1 30 4.791 ███████████████████████▊ +1 29 4.19 ████████████████████▉ +1 30 4.791 ███████████████████████▉ 1 31 5.335 ██████████████████████████▋ 1 32 5.827 █████████████████████████████▏ 1 33 6.273 ███████████████████████████████▎ 1 34 6.676 █████████████████████████████████▍ 1 35 7.041 ███████████████████████████████████▏ -1 36 7.371 ████████████████████████████████████▋ +1 36 7.371 ████████████████████████████████████▊ 1 37 7.669 ██████████████████████████████████████▎ 1 38 7.939 ███████████████████████████████████████▋ -1 39 8.184 ████████████████████████████████████████▊ +1 39 8.184 ████████████████████████████████████████▉ 1 40 8.405 ██████████████████████████████████████████ 1 41 8.605 ███████████████████████████████████████████ -1 42 8.786 ███████████████████████████████████████████▊ -1 43 8.95 ████████████████████████████████████████████▋ +1 42 8.786 ███████████████████████████████████████████▉ +1 43 8.95 ████████████████████████████████████████████▊ 1 44 9.098 █████████████████████████████████████████████▍ 1 45 9.233 ██████████████████████████████████████████████▏ -1 46 9.354 ██████████████████████████████████████████████▋ +1 46 9.354 ██████████████████████████████████████████████▊ 1 47 9.464 ███████████████████████████████████████████████▎ -1 48 9.563 ███████████████████████████████████████████████▋ +1 48 9.563 ███████████████████████████████████████████████▊ 1 49 9.653 ████████████████████████████████████████████████▎ 1 0 1 ██████████████████████████████████████████████████ 0 1 0.368 ██████████████████▍ -0 2 0.135 ██████▋ +0 2 0.135 ██████▊ 0 3 0.05 ██▍ -0 4 0.018 ▊ +0 4 0.018 ▉ 1 5 1.007 ██████████████████████████████████████████████████ 0 6 0.37 ██████████████████▌ -0 7 0.136 ██████▋ +0 7 0.136 ██████▊ 0 8 0.05 ██▌ -0 9 0.018 ▊ +0 9 0.018 ▉ 1 10 1.007 ██████████████████████████████████████████████████ 0 11 0.37 ██████████████████▌ -0 12 0.136 ██████▋ +0 12 0.136 ██████▊ 0 13 0.05 ██▌ -0 14 0.018 ▊ +0 14 0.018 ▉ 1 15 1.007 ██████████████████████████████████████████████████ 0 16 0.37 ██████████████████▌ -0 17 0.136 ██████▋ +0 17 0.136 ██████▊ 0 18 0.05 ██▌ -0 19 0.018 ▊ +0 19 0.018 ▉ 1 20 1.007 ██████████████████████████████████████████████████ 0 21 0.37 ██████████████████▌ -0 22 0.136 ██████▋ +0 22 0.136 ██████▊ 0 23 0.05 ██▌ -0 24 0.018 ▊ +0 24 0.018 ▉ 1 25 1.007 ██████████████████████████████████████████████████ 0 26 0.37 ██████████████████▌ -0 27 0.136 ██████▋ +0 27 0.136 ██████▊ 0 28 0.05 ██▌ -0 29 0.018 ▊ +0 29 0.018 ▉ 1 30 1.007 ██████████████████████████████████████████████████ 0 31 0.37 ██████████████████▌ -0 32 0.136 ██████▋ +0 32 0.136 ██████▊ 0 33 0.05 ██▌ -0 34 0.018 ▊ +0 34 0.018 ▉ 1 35 1.007 ██████████████████████████████████████████████████ 0 36 0.37 ██████████████████▌ -0 37 0.136 ██████▋ +0 37 0.136 ██████▊ 0 38 0.05 ██▌ -0 39 0.018 ▊ +0 39 0.018 ▉ 1 40 1.007 ██████████████████████████████████████████████████ 0 41 0.37 ██████████████████▌ -0 42 0.136 ██████▋ +0 42 0.136 ██████▊ 0 43 0.05 ██▌ -0 44 0.018 ▊ +0 44 0.018 ▉ 1 45 1.007 ██████████████████████████████████████████████████ 0 46 0.37 ██████████████████▌ -0 47 0.136 ██████▋ +0 47 0.136 ██████▊ 0 48 0.05 ██▌ -0 49 0.018 ▊ +0 49 0.018 ▉ exponentialTimeDecayedMax 1 0 1 0 1 0.368 @@ -301,15 +301,15 @@ exponentialTimeDecayedMax 0 7 0.497 ██▍ 0 8 0.449 ██▏ 0 9 0.407 ██ -0 10 0.368 █▋ +0 10 0.368 █▊ 0 11 0.333 █▋ 0 12 0.301 █▌ 0 13 0.273 █▎ 0 14 0.247 █▏ 0 15 0.223 █ 0 16 0.202 █ -0 17 0.183 ▊ -0 18 0.165 ▋ +0 17 0.183 ▉ +0 18 0.165 ▊ 0 19 0.15 ▋ 0 20 0.135 ▋ 0 21 0.122 ▌ @@ -343,54 +343,54 @@ exponentialTimeDecayedMax 1 49 1 █████ 1 0 1 ██████████████████████████████████████████████████ 0 1 0.368 ██████████████████▍ -0 2 0.135 ██████▋ +0 2 0.135 ██████▊ 0 3 0.05 ██▍ -0 4 0.018 ▊ +0 4 0.018 ▉ 1 5 1 ██████████████████████████████████████████████████ 0 6 0.368 ██████████████████▍ -0 7 0.135 ██████▋ +0 7 0.135 ██████▊ 0 8 0.05 ██▍ -0 9 0.018 ▊ +0 9 0.018 ▉ 1 10 1 ██████████████████████████████████████████████████ 0 11 0.368 ██████████████████▍ -0 12 0.135 ██████▋ +0 12 0.135 ██████▊ 0 13 0.05 ██▍ -0 14 0.018 ▊ +0 14 0.018 ▉ 1 15 1 ██████████████████████████████████████████████████ 0 16 0.368 ██████████████████▍ -0 17 0.135 ██████▋ +0 17 0.135 ██████▊ 0 18 0.05 ██▍ -0 19 0.018 ▊ +0 19 0.018 ▉ 1 20 1 ██████████████████████████████████████████████████ 0 21 0.368 ██████████████████▍ -0 22 0.135 ██████▋ +0 22 0.135 ██████▊ 0 23 0.05 ██▍ -0 24 0.018 ▊ +0 24 0.018 ▉ 1 25 1 ██████████████████████████████████████████████████ 0 26 0.368 ██████████████████▍ -0 27 0.135 ██████▋ +0 27 0.135 ██████▊ 0 28 0.05 ██▍ -0 29 0.018 ▊ +0 29 0.018 ▉ 1 30 1 ██████████████████████████████████████████████████ 0 31 0.368 ██████████████████▍ -0 32 0.135 ██████▋ +0 32 0.135 ██████▊ 0 33 0.05 ██▍ -0 34 0.018 ▊ +0 34 0.018 ▉ 1 35 1 ██████████████████████████████████████████████████ 0 36 0.368 ██████████████████▍ -0 37 0.135 ██████▋ +0 37 0.135 ██████▊ 0 38 0.05 ██▍ -0 39 0.018 ▊ +0 39 0.018 ▉ 1 40 1 ██████████████████████████████████████████████████ 0 41 0.368 ██████████████████▍ -0 42 0.135 ██████▋ +0 42 0.135 ██████▊ 0 43 0.05 ██▍ -0 44 0.018 ▊ +0 44 0.018 ▉ 1 45 1 ██████████████████████████████████████████████████ 0 46 0.368 ██████████████████▍ -0 47 0.135 ██████▋ +0 47 0.135 ██████▊ 0 48 0.05 ██▍ -0 49 0.018 ▊ +0 49 0.018 ▉ exponentialTimeDecayedCount 1 0 1 0 1 1.368 @@ -428,19 +428,19 @@ exponentialTimeDecayedCount 0 3 3.038 ███████████████▏ 0 4 3.487 █████████████████▍ 0 5 3.855 ███████████████████▎ -0 6 4.156 ████████████████████▋ +0 6 4.156 ████████████████████▊ 0 7 4.403 ██████████████████████ 0 8 4.605 ███████████████████████ -0 9 4.77 ███████████████████████▋ +0 9 4.77 ███████████████████████▊ 0 10 4.905 ████████████████████████▌ 0 11 5.016 █████████████████████████ 0 12 5.107 █████████████████████████▌ -0 13 5.181 █████████████████████████▊ +0 13 5.181 █████████████████████████▉ 0 14 5.242 ██████████████████████████▏ 0 15 5.292 ██████████████████████████▍ 0 16 5.333 ██████████████████████████▋ -0 17 5.366 ██████████████████████████▋ -0 18 5.393 ██████████████████████████▊ +0 17 5.366 ██████████████████████████▊ +0 18 5.393 ██████████████████████████▉ 0 19 5.416 ███████████████████████████ 0 20 5.434 ███████████████████████████▏ 0 21 5.449 ███████████████████████████▏ @@ -473,11 +473,11 @@ exponentialTimeDecayedCount 1 48 5.516 ███████████████████████████▌ 1 49 5.516 ███████████████████████████▌ 1 0 1 ██▌ -0 1 1.905 ████▋ -0 2 2.724 ██████▋ +0 1 1.905 ████▊ +0 2 2.724 ██████▊ 0 3 3.464 ████████▋ 0 4 4.135 ██████████▎ -1 5 4.741 ███████████▋ +1 5 4.741 ███████████▊ 0 6 5.29 █████████████▏ 0 7 5.787 ██████████████▍ 0 8 6.236 ███████████████▌ @@ -485,23 +485,23 @@ exponentialTimeDecayedCount 1 10 7.01 █████████████████▌ 0 11 7.343 ██████████████████▎ 0 12 7.644 ███████████████████ -0 13 7.917 ███████████████████▋ +0 13 7.917 ███████████████████▊ 0 14 8.164 ████████████████████▍ -1 15 8.387 ████████████████████▊ +1 15 8.387 ████████████████████▉ 0 16 8.589 █████████████████████▍ -0 17 8.771 █████████████████████▊ +0 17 8.771 █████████████████████▉ 0 18 8.937 ██████████████████████▎ 0 19 9.086 ██████████████████████▋ 1 20 9.222 ███████████████████████ 0 21 9.344 ███████████████████████▎ 0 22 9.455 ███████████████████████▋ -0 23 9.555 ███████████████████████▊ +0 23 9.555 ███████████████████████▉ 0 24 9.646 ████████████████████████ 1 25 9.728 ████████████████████████▎ 0 26 9.802 ████████████████████████▌ 0 27 9.869 ████████████████████████▋ -0 28 9.93 ████████████████████████▋ -0 29 9.985 ████████████████████████▊ +0 28 9.93 ████████████████████████▊ +0 29 9.985 ████████████████████████▉ 1 30 10.035 █████████████████████████ 0 31 10.08 █████████████████████████▏ 0 32 10.121 █████████████████████████▎ @@ -511,12 +511,12 @@ exponentialTimeDecayedCount 0 36 10.249 █████████████████████████▌ 0 37 10.273 █████████████████████████▋ 0 38 10.296 █████████████████████████▋ -0 39 10.316 █████████████████████████▋ -1 40 10.334 █████████████████████████▋ -0 41 10.351 █████████████████████████▊ -0 42 10.366 █████████████████████████▊ -0 43 10.379 █████████████████████████▊ -0 44 10.392 █████████████████████████▊ +0 39 10.316 █████████████████████████▊ +1 40 10.334 █████████████████████████▊ +0 41 10.351 █████████████████████████▉ +0 42 10.366 █████████████████████████▉ +0 43 10.379 █████████████████████████▉ +0 44 10.392 █████████████████████████▉ 1 45 10.403 ██████████████████████████ 0 46 10.413 ██████████████████████████ 0 47 10.422 ██████████████████████████ @@ -554,13 +554,13 @@ exponentialTimeDecayedAvg 8 8 7.419 9 9 8.418 1 0 1 ██████████ -0 1 0.475 ████▋ +0 1 0.475 ████▊ 0 2 0.301 ███ 0 3 0.214 ██▏ 0 4 0.162 █▌ 0 5 0.128 █▎ 0 6 0.104 █ -0 7 0.086 ▋ +0 7 0.086 ▊ 0 8 0.072 ▋ 0 9 0.061 ▌ 0 10 0.052 ▌ @@ -580,42 +580,42 @@ exponentialTimeDecayedAvg 0 24 0.009 1 25 0.111 █ 1 26 0.202 ██ -1 27 0.283 ██▋ +1 27 0.283 ██▊ 1 28 0.355 ███▌ 1 29 0.42 ████▏ -1 30 0.477 ████▋ +1 30 0.477 ████▊ 1 31 0.529 █████▎ -1 32 0.576 █████▋ +1 32 0.576 █████▊ 1 33 0.618 ██████▏ 1 34 0.655 ██████▌ -1 35 0.689 ██████▊ +1 35 0.689 ██████▉ 1 36 0.719 ███████▏ 1 37 0.747 ███████▍ 1 38 0.771 ███████▋ -1 39 0.793 ███████▊ +1 39 0.793 ███████▉ 1 40 0.813 ████████▏ 1 41 0.831 ████████▎ 1 42 0.848 ████████▍ 1 43 0.862 ████████▌ -1 44 0.876 ████████▋ -1 45 0.888 ████████▊ -1 46 0.898 ████████▊ +1 44 0.876 ████████▊ +1 45 0.888 ████████▉ +1 46 0.898 ████████▉ 1 47 0.908 █████████ 1 48 0.917 █████████▏ 1 49 0.925 █████████▏ 1 0 1 ██████████████████████████████████████████████████ -0 1 0.498 █████████████████████████████████████████████████▋ +0 1 0.498 █████████████████████████████████████████████████▊ 0 2 0.33 █████████████████████████████████ 0 3 0.246 ████████████████████████▋ 0 4 0.196 ███████████████████▌ 1 5 0.333 █████████████████████████████████▎ 0 6 0.284 ████████████████████████████▍ -0 7 0.248 ████████████████████████▋ -0 8 0.219 █████████████████████▊ +0 7 0.248 ████████████████████████▊ +0 8 0.219 █████████████████████▉ 0 9 0.196 ███████████████████▌ 1 10 0.273 ███████████████████████████▎ -0 11 0.249 ████████████████████████▊ -0 12 0.229 ██████████████████████▋ +0 11 0.249 ████████████████████████▉ +0 12 0.229 ██████████████████████▊ 0 13 0.211 █████████████████████ 0 14 0.196 ███████████████████▌ 1 15 0.25 █████████████████████████ @@ -623,7 +623,7 @@ exponentialTimeDecayedAvg 0 17 0.22 ██████████████████████ 0 18 0.207 ████████████████████▋ 0 19 0.196 ███████████████████▌ -1 20 0.238 ███████████████████████▋ +1 20 0.238 ███████████████████████▊ 0 21 0.226 ██████████████████████▌ 0 22 0.215 █████████████████████▌ 0 23 0.205 ████████████████████▌ @@ -634,21 +634,21 @@ exponentialTimeDecayedAvg 0 28 0.204 ████████████████████▍ 0 29 0.196 ███████████████████▌ 1 30 0.226 ██████████████████████▌ -0 31 0.218 █████████████████████▋ +0 31 0.218 █████████████████████▊ 0 32 0.21 █████████████████████ 0 33 0.203 ████████████████████▎ 0 34 0.196 ███████████████████▌ 1 35 0.222 ██████████████████████▏ 0 36 0.215 █████████████████████▌ -0 37 0.209 ████████████████████▋ +0 37 0.209 ████████████████████▊ 0 38 0.202 ████████████████████▏ 0 39 0.196 ███████████████████▌ -1 40 0.22 █████████████████████▊ +1 40 0.22 █████████████████████▉ 0 41 0.213 █████████████████████▎ 0 42 0.207 ████████████████████▋ 0 43 0.202 ████████████████████▏ 0 44 0.196 ███████████████████▌ -1 45 0.218 █████████████████████▋ +1 45 0.218 █████████████████████▊ 0 46 0.212 █████████████████████▏ 0 47 0.206 ████████████████████▋ 0 48 0.201 ████████████████████ diff --git a/tests/queries/0_stateless/02049_lowcardinality_shortcircuit_crash.sql b/tests/queries/0_stateless/02049_lowcardinality_shortcircuit_crash.sql index 2a837380250..84d64d3166e 100644 --- a/tests/queries/0_stateless/02049_lowcardinality_shortcircuit_crash.sql +++ b/tests/queries/0_stateless/02049_lowcardinality_shortcircuit_crash.sql @@ -1,4 +1,6 @@ -- https://github.com/ClickHouse/ClickHouse/issues/30231 +SET allow_suspicious_low_cardinality_types=1; + SELECT * FROM ( SELECT number, diff --git a/tests/queries/0_stateless/02102_row_binary_with_names_and_types.reference b/tests/queries/0_stateless/02102_row_binary_with_names_and_types.reference index 9011f20cd6a..4b057f29039 100644 --- a/tests/queries/0_stateless/02102_row_binary_with_names_and_types.reference +++ b/tests/queries/0_stateless/02102_row_binary_with_names_and_types.reference @@ -8,7 +8,7 @@ 1 default 1970-01-01 1 1970-01-01 1 1970-01-01 -OK +1 1 default 1970-01-01 -OK -OK +1 +1 diff --git a/tests/queries/0_stateless/02102_row_binary_with_names_and_types.sh b/tests/queries/0_stateless/02102_row_binary_with_names_and_types.sh index e7307ad3ad5..ba7aac94ddb 100755 --- a/tests/queries/0_stateless/02102_row_binary_with_names_and_types.sh +++ b/tests/queries/0_stateless/02102_row_binary_with_names_and_types.sh @@ -55,7 +55,7 @@ $CLICKHOUSE_CLIENT -q "SELECT * FROM test_02102" $CLICKHOUSE_CLIENT -q "TRUNCATE TABLE test_02102" -$CLICKHOUSE_CLIENT -q "SELECT toUInt32(1) AS x, [[1, 2, 3], [4, 5], []] as a FORMAT RowBinaryWithNames" | $CLICKHOUSE_CLIENT --input_format_skip_unknown_fields=1 --input_format_with_names_use_header=1 -q "INSERT INTO test_02102 FORMAT RowBinaryWithNames" 2>&1 | grep -F -q "CANNOT_SKIP_UNKNOWN_FIELD" && echo 'OK' || echo 'FAIL' +$CLICKHOUSE_CLIENT -q "SELECT toUInt32(1) AS x, [[1, 2, 3], [4, 5], []] as a FORMAT RowBinaryWithNames" | $CLICKHOUSE_CLIENT --input_format_skip_unknown_fields=1 --input_format_with_names_use_header=1 -q "INSERT INTO test_02102 FORMAT RowBinaryWithNames" 2>&1 | grep -F -c "CANNOT_SKIP_UNKNOWN_FIELD" $CLICKHOUSE_CLIENT -q "SELECT toUInt32(1) AS x, [[1, 2, 3], [4, 5], []] as a FORMAT RowBinaryWithNamesAndTypes" | $CLICKHOUSE_CLIENT --input_format_skip_unknown_fields=1 --input_format_with_names_use_header=1 --input_format_with_types_use_header=1 -q "INSERT INTO test_02102 FORMAT RowBinaryWithNamesAndTypes" @@ -63,9 +63,8 @@ $CLICKHOUSE_CLIENT -q "SELECT * FROM test_02102" $CLICKHOUSE_CLIENT -q "TRUNCATE TABLE test_02102" -$CLICKHOUSE_CLIENT -q "SELECT 'text' AS x, toDate('2020-01-01') AS y, toUInt32(1) AS z FORMAT RowBinaryWithNamesAndTypes" | $CLICKHOUSE_CLIENT --input_format_with_names_use_header=1 --input_format_with_types_use_header=1 -q "INSERT INTO test_02102 FORMAT RowBinaryWithNamesAndTypes" 2>&1 | grep -F -q "INCORRECT_DATA" && echo 'OK' || echo 'FAIL' +$CLICKHOUSE_CLIENT -q "SELECT 'text' AS x, toDate('2020-01-01') AS y, toUInt32(1) AS z FORMAT RowBinaryWithNamesAndTypes" | $CLICKHOUSE_CLIENT --input_format_with_names_use_header=1 --input_format_with_types_use_header=1 -q "INSERT INTO test_02102 FORMAT RowBinaryWithNamesAndTypes" 2>&1 | grep -F -c "INCORRECT_DATA" -$CLICKHOUSE_CLIENT -q "SELECT toUInt32(1) AS x, 'text' as z, toDate('2020-01-01') AS y FORMAT RowBinaryWithNamesAndTypes" | $CLICKHOUSE_CLIENT --input_format_with_names_use_header=1 --input_format_with_types_use_header=1 -q "INSERT INTO test_02102 FORMAT RowBinaryWithNamesAndTypes" 2>&1 | grep -F -q "INCORRECT_DATA" && echo 'OK' || echo 'FAIL' +$CLICKHOUSE_CLIENT -q "SELECT toUInt32(1) AS x, 'text' as z, toDate('2020-01-01') AS y FORMAT RowBinaryWithNamesAndTypes" | $CLICKHOUSE_CLIENT --input_format_with_names_use_header=1 --input_format_with_types_use_header=1 -q "INSERT INTO test_02102 FORMAT RowBinaryWithNamesAndTypes" 2>&1 | grep -F -c "INCORRECT_DATA" $CLICKHOUSE_CLIENT -q "DROP TABLE test_02102" - diff --git a/tests/queries/0_stateless/02113_format_row.reference b/tests/queries/0_stateless/02113_format_row.reference index 0ac3a15b115..d137fad60f5 100644 --- a/tests/queries/0_stateless/02113_format_row.reference +++ b/tests/queries/0_stateless/02113_format_row.reference @@ -1,20 +1,20 @@ -0\t1970-01-01\n -1\t1970-01-02\n -2\t1970-01-03\n -3\t1970-01-04\n -4\t1970-01-05\n -0,"1970-01-01"\n -1,"1970-01-02"\n -2,"1970-01-03"\n -3,"1970-01-04"\n -4,"1970-01-05"\n -["0", "1970-01-01"]\n -["1", "1970-01-02"]\n -["2", "1970-01-03"]\n -["3", "1970-01-04"]\n -["4", "1970-01-05"]\n -\t\t\n\t\t\t0\n\t\t\t1970-01-01\n\t\t\n -\t\t\n\t\t\t1\n\t\t\t1970-01-02\n\t\t\n -\t\t\n\t\t\t2\n\t\t\t1970-01-03\n\t\t\n -\t\t\n\t\t\t3\n\t\t\t1970-01-04\n\t\t\n -\t\t\n\t\t\t4\n\t\t\t1970-01-05\n\t\t\n +number\ttoDate(number)\nUInt64\tDate\n0\t1970-01-01\n +number\ttoDate(number)\nUInt64\tDate\n1\t1970-01-02\n +number\ttoDate(number)\nUInt64\tDate\n2\t1970-01-03\n +number\ttoDate(number)\nUInt64\tDate\n3\t1970-01-04\n +number\ttoDate(number)\nUInt64\tDate\n4\t1970-01-05\n +"number","toDate(number)"\n"UInt64","Date"\n0,"1970-01-01"\n +"number","toDate(number)"\n"UInt64","Date"\n1,"1970-01-02"\n +"number","toDate(number)"\n"UInt64","Date"\n2,"1970-01-03"\n +"number","toDate(number)"\n"UInt64","Date"\n3,"1970-01-04"\n +"number","toDate(number)"\n"UInt64","Date"\n4,"1970-01-05"\n +["number", "toDate(number)"]\n["UInt64", "Date"]\n["0", "1970-01-01"]\n +["number", "toDate(number)"]\n["UInt64", "Date"]\n["1", "1970-01-02"]\n +["number", "toDate(number)"]\n["UInt64", "Date"]\n["2", "1970-01-03"]\n +["number", "toDate(number)"]\n["UInt64", "Date"]\n["3", "1970-01-04"]\n +["number", "toDate(number)"]\n["UInt64", "Date"]\n["4", "1970-01-05"]\n +\n\n\t\n\t\t\n\t\t\t\n\t\t\t\tnumber\n\t\t\t\tUInt64\n\t\t\t\n\t\t\t\n\t\t\t\ttoDate(number)\n\t\t\t\tDate\n\t\t\t\n\t\t\n\t\n\t\n\t\t\n\t\t\t0\n\t\t\t1970-01-01\n\t\t\n\t\n\t1\n\n +\n\n\t\n\t\t\n\t\t\t\n\t\t\t\tnumber\n\t\t\t\tUInt64\n\t\t\t\n\t\t\t\n\t\t\t\ttoDate(number)\n\t\t\t\tDate\n\t\t\t\n\t\t\n\t\n\t\n\t\t\n\t\t\t1\n\t\t\t1970-01-02\n\t\t\n\t\n\t1\n\n +\n\n\t\n\t\t\n\t\t\t\n\t\t\t\tnumber\n\t\t\t\tUInt64\n\t\t\t\n\t\t\t\n\t\t\t\ttoDate(number)\n\t\t\t\tDate\n\t\t\t\n\t\t\n\t\n\t\n\t\t\n\t\t\t2\n\t\t\t1970-01-03\n\t\t\n\t\n\t1\n\n +\n\n\t\n\t\t\n\t\t\t\n\t\t\t\tnumber\n\t\t\t\tUInt64\n\t\t\t\n\t\t\t\n\t\t\t\ttoDate(number)\n\t\t\t\tDate\n\t\t\t\n\t\t\n\t\n\t\n\t\t\n\t\t\t3\n\t\t\t1970-01-04\n\t\t\n\t\n\t1\n\n +\n\n\t\n\t\t\n\t\t\t\n\t\t\t\tnumber\n\t\t\t\tUInt64\n\t\t\t\n\t\t\t\n\t\t\t\ttoDate(number)\n\t\t\t\tDate\n\t\t\t\n\t\t\n\t\n\t\n\t\t\n\t\t\t4\n\t\t\t1970-01-05\n\t\t\n\t\n\t1\n\n diff --git a/tests/queries/0_stateless/02113_format_row.sql b/tests/queries/0_stateless/02113_format_row.sql index 93ee6d0f1dd..1af6f7cc740 100644 --- a/tests/queries/0_stateless/02113_format_row.sql +++ b/tests/queries/0_stateless/02113_format_row.sql @@ -1,3 +1,4 @@ +set output_format_write_statistics=0; select formatRow('TSVWithNamesAndTypes', number, toDate(number)) from numbers(5); select formatRow('CSVWithNamesAndTypes', number, toDate(number)) from numbers(5); select formatRow('JSONCompactEachRowWithNamesAndTypes', number, toDate(number)) from numbers(5); diff --git a/tests/queries/0_stateless/02119_sumcount.sql b/tests/queries/0_stateless/02119_sumcount.sql index 22cb8b657da..86625996f44 100644 --- a/tests/queries/0_stateless/02119_sumcount.sql +++ b/tests/queries/0_stateless/02119_sumcount.sql @@ -22,6 +22,9 @@ SELECT toTypeName(sumCount(v)), sumCount(v) FROM ) ORDER BY v ); + +SET allow_suspicious_low_cardinality_types=1; + SELECT toTypeName(sumCount(v)), sumCount(v) FROM ( SELECT v FROM diff --git a/tests/queries/0_stateless/02166_arrow_dictionary_inference.sh b/tests/queries/0_stateless/02166_arrow_dictionary_inference.sh index 7d313b571d9..04888f3a1f3 100755 --- a/tests/queries/0_stateless/02166_arrow_dictionary_inference.sh +++ b/tests/queries/0_stateless/02166_arrow_dictionary_inference.sh @@ -5,7 +5,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -$CLICKHOUSE_CLIENT -q "insert into table function file('arrow.dict', 'Arrow', 'x LowCardinality(UInt64)') select number from numbers(10) settings output_format_arrow_low_cardinality_as_dictionary=1, engine_file_truncate_on_insert=1" +$CLICKHOUSE_CLIENT -q "insert into table function file('arrow.dict', 'Arrow', 'x LowCardinality(UInt64)') select number from numbers(10) settings output_format_arrow_low_cardinality_as_dictionary=1, engine_file_truncate_on_insert=1, allow_suspicious_low_cardinality_types=1" $CLICKHOUSE_CLIENT -q "desc file('arrow.dict', 'Arrow')" diff --git a/tests/queries/0_stateless/02205_ephemeral_1.reference b/tests/queries/0_stateless/02205_ephemeral_1.reference index ba39033668f..7c034ca72ea 100644 --- a/tests/queries/0_stateless/02205_ephemeral_1.reference +++ b/tests/queries/0_stateless/02205_ephemeral_1.reference @@ -7,7 +7,7 @@ z UInt32 DEFAULT 5 7 5 21 5 x UInt32 DEFAULT y -y UInt32 EPHEMERAL 0 +y UInt32 EPHEMERAL defaultValueOfTypeName(\'UInt32\') z UInt32 DEFAULT 5 1 2 0 2 diff --git a/tests/queries/0_stateless/02226_filesystem_cache_profile_events.reference b/tests/queries/0_stateless/02226_filesystem_cache_profile_events.reference index a2c04163136..d895040ef59 100644 --- a/tests/queries/0_stateless/02226_filesystem_cache_profile_events.reference +++ b/tests/queries/0_stateless/02226_filesystem_cache_profile_events.reference @@ -3,16 +3,13 @@ Using storage policy: s3_cache 0 1 0 0 0 1 0 - Using storage policy: local_cache 1 0 1 0 1 0 0 0 1 0 - Using storage policy: azure_cache 1 0 1 0 1 0 0 0 1 0 - diff --git a/tests/queries/0_stateless/02226_filesystem_cache_profile_events.sh b/tests/queries/0_stateless/02226_filesystem_cache_profile_events.sh index 1807fd6bc8e..96e51a58cc4 100755 --- a/tests/queries/0_stateless/02226_filesystem_cache_profile_events.sh +++ b/tests/queries/0_stateless/02226_filesystem_cache_profile_events.sh @@ -7,15 +7,90 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -TMP_PATH=${CLICKHOUSE_TEST_UNIQUE_NAME} -QUERIES_FILE=02226_filesystem_cache_profile_events.sh -TEST_FILE=$CUR_DIR/filesystem_cache_queries/$QUERIES_FILE -for storagePolicy in 's3_cache' 'local_cache' 'azure_cache'; do - echo "Using storage policy: $storagePolicy" - cat $TEST_FILE | sed -e "s/_storagePolicy/${storagePolicy}/" > $TMP_PATH - chmod +x $TMP_PATH - ./$TMP_PATH - rm $TMP_PATH - echo +for STORAGE_POLICY in 's3_cache' 'local_cache' 'azure_cache'; do + echo "Using storage policy: $STORAGE_POLICY" + + clickhouse client --multiquery --multiline --query """ + SET max_memory_usage='20G'; + SET enable_filesystem_cache_on_write_operations = 0; + + DROP TABLE IF EXISTS test_02226; + CREATE TABLE test_02226 (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='$STORAGE_POLICY'; + INSERT INTO test_02226 SELECT * FROM generateRandom('key UInt32, value String') LIMIT 10000; + + SET remote_filesystem_read_method='threadpool'; + """ + + query="SELECT * FROM test_02226 LIMIT 10" + + query_id=$(clickhouse client --query "select queryID() from ($query) limit 1" 2>&1) + + clickhouse client --multiquery --multiline --query """ + SYSTEM FLUSH LOGS; + SELECT ProfileEvents['CachedReadBufferReadFromSourceBytes'] > 0 as remote_fs_read, + ProfileEvents['CachedReadBufferReadFromCacheBytes'] > 0 as remote_fs_cache_read, + ProfileEvents['CachedReadBufferCacheWriteBytes'] > 0 as remote_fs_read_and_download + FROM system.query_log + WHERE query_id='$query_id' + AND type = 'QueryFinish' + AND current_database = currentDatabase() + ORDER BY query_start_time DESC + LIMIT 1; + """ + + clickhouse client --multiquery --multiline --query """ + set remote_filesystem_read_method = 'read'; + set local_filesystem_read_method = 'pread'; + """ + + query_id=$(clickhouse client --query "select queryID() from ($query) limit 1" 2>&1) + + clickhouse client --multiquery --multiline --query """ + SYSTEM FLUSH LOGS; + SELECT ProfileEvents['CachedReadBufferReadFromSourceBytes'] > 0 as remote_fs_read, + ProfileEvents['CachedReadBufferReadFromCacheBytes'] > 0 as remote_fs_cache_read, + ProfileEvents['CachedReadBufferCacheWriteBytes'] > 0 as remote_fs_read_and_download + FROM system.query_log + WHERE query_id='$query_id' + AND type = 'QueryFinish' + AND current_database = currentDatabase() + ORDER BY query_start_time DESC + LIMIT 1; + """ + + + clickhouse client --multiquery --multiline --query """ + set remote_filesystem_read_method='threadpool'; + """ + + clickhouse client --multiquery --multiline --query """ + SELECT * FROM test_02226 WHERE value LIKE '%abc%' ORDER BY value LIMIT 10 FORMAT Null; + + SET enable_filesystem_cache_on_write_operations = 1; + + TRUNCATE TABLE test_02226; + SELECT count() FROM test_02226; + + SYSTEM DROP FILESYSTEM CACHE; + + INSERT INTO test_02226 SELECT * FROM generateRandom('key UInt32, value String') LIMIT 10000; + """ + + query_id=$(clickhouse client --query "select queryID() from ($query) limit 1") + + clickhouse client --multiquery --multiline --query """ + SYSTEM FLUSH LOGS; + SELECT ProfileEvents['CachedReadBufferReadFromSourceBytes'] > 0 as remote_fs_read, + ProfileEvents['CachedReadBufferReadFromCacheBytes'] > 0 as remote_fs_cache_read, + ProfileEvents['CachedReadBufferCacheWriteBytes'] > 0 as remote_fs_read_and_download + FROM system.query_log + WHERE query_id='$query_id' + AND type = 'QueryFinish' + AND current_database = currentDatabase() + ORDER BY query_start_time DESC + LIMIT 1; + + DROP TABLE test_02226; + """ done diff --git a/tests/queries/0_stateless/02232_allow_only_replicated_engine.sh b/tests/queries/0_stateless/02232_allow_only_replicated_engine.sh index 3ff2dabfa43..193d5fdb6d5 100755 --- a/tests/queries/0_stateless/02232_allow_only_replicated_engine.sh +++ b/tests/queries/0_stateless/02232_allow_only_replicated_engine.sh @@ -16,3 +16,5 @@ ${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none -n --query "CREATE TABLE ${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --user "user_${CLICKHOUSE_DATABASE}" -n --query "CREATE TABLE ${CLICKHOUSE_DATABASE}_db.tab_rmt (x UInt32) engine = ReplicatedMergeTree order by x;" ${CLICKHOUSE_CLIENT} --query "DROP DATABASE ${CLICKHOUSE_DATABASE}_db" ${CLICKHOUSE_CLIENT} -q "DROP USER user_${CLICKHOUSE_DATABASE}" + +${CLICKHOUSE_CLIENT} -q "drop table mute_stylecheck" diff --git a/tests/queries/0_stateless/02235_add_part_offset_virtual_column.reference b/tests/queries/0_stateless/02235_add_part_offset_virtual_column.reference index 0edbea64065..2455f50b7f2 100644 --- a/tests/queries/0_stateless/02235_add_part_offset_virtual_column.reference +++ b/tests/queries/0_stateless/02235_add_part_offset_virtual_column.reference @@ -32,9 +32,27 @@ 0 0 198401_1_1_1 1 1 198401_1_1_1 999998 999998 198401_1_1_1 +0 +1 +2 +0 foo +1 foo +2 foo SOME GRANULES FILTERED OUT 335872 166463369216 166463369216 34464 1510321840 1510321840 301408 164953047376 164953047376 +100000 +100001 +100002 +100000 foo +100001 foo +100002 foo PREWHERE 301408 164953047376 164953047376 +42 +10042 +20042 +42 foo +10042 foo +20042 foo diff --git a/tests/queries/0_stateless/02235_add_part_offset_virtual_column.sql b/tests/queries/0_stateless/02235_add_part_offset_virtual_column.sql index eb1f01e65f7..1de6447172d 100644 --- a/tests/queries/0_stateless/02235_add_part_offset_virtual_column.sql +++ b/tests/queries/0_stateless/02235_add_part_offset_virtual_column.sql @@ -24,6 +24,8 @@ INSERT INTO t_1 select rowNumberInAllBlocks(), *, '1984-01-01' from t_random_1 l OPTIMIZE TABLE t_1 FINAL; +ALTER TABLE t_1 ADD COLUMN foo String DEFAULT 'foo'; + SELECT COUNT(DISTINCT(_part)) FROM t_1; SELECT min(_part_offset), max(_part_offset) FROM t_1; @@ -37,13 +39,19 @@ SELECT order_0, _part_offset, _part FROM t_1 WHERE order_0 <= 1 OR (order_0 BETW SELECT order_0, _part_offset, computed FROM t_1 ORDER BY order_0, _part_offset, computed LIMIT 3; SELECT order_0, _part_offset, computed FROM t_1 ORDER BY order_0 DESC, _part_offset DESC, computed DESC LIMIT 3; SELECT order_0, _part_offset, _part FROM t_1 WHERE order_0 <= 1 OR order_0 >= 999998 ORDER BY order_0 LIMIT 3; +SELECT _part_offset FROM t_1 ORDER BY order_0 LIMIT 3; +SELECT _part_offset, foo FROM t_1 ORDER BY order_0 LIMIT 3; SELECT 'SOME GRANULES FILTERED OUT'; SELECT count(*), sum(_part_offset), sum(order_0) from t_1 where granule == 0; SELECT count(*), sum(_part_offset), sum(order_0) from t_1 where granule == 0 AND _part_offset < 100000; SELECT count(*), sum(_part_offset), sum(order_0) from t_1 where granule == 0 AND _part_offset >= 100000; +SELECT _part_offset FROM t_1 where granule == 0 AND _part_offset >= 100000 ORDER BY order_0 LIMIT 3; +SELECT _part_offset, foo FROM t_1 where granule == 0 AND _part_offset >= 100000 ORDER BY order_0 LIMIT 3; SELECT 'PREWHERE'; SELECT count(*), sum(_part_offset), sum(order_0) from t_1 prewhere granule == 0 where _part_offset >= 100000; SELECT count(*), sum(_part_offset), sum(order_0) from t_1 prewhere _part != '' where granule == 0; -- { serverError 10 } SELECT count(*), sum(_part_offset), sum(order_0) from t_1 prewhere _part_offset > 100000 where granule == 0; -- { serverError 10 } +SELECT _part_offset FROM t_1 PREWHERE order_0 % 10000 == 42 ORDER BY order_0 LIMIT 3; +SELECT _part_offset, foo FROM t_1 PREWHERE order_0 % 10000 == 42 ORDER BY order_0 LIMIT 3; diff --git a/tests/queries/0_stateless/02240_system_filesystem_cache_table.reference b/tests/queries/0_stateless/02240_system_filesystem_cache_table.reference index 6b96da0be59..d3be4855b36 100644 --- a/tests/queries/0_stateless/02240_system_filesystem_cache_table.reference +++ b/tests/queries/0_stateless/02240_system_filesystem_cache_table.reference @@ -1,70 +1,10 @@ Using storage policy: s3_cache --- { echo } - -SYSTEM DROP FILESYSTEM CACHE; -SET enable_filesystem_cache_on_write_operations=0; -DROP TABLE IF EXISTS test; -CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache', min_bytes_for_wide_part = 10485760, compress_marks=false, compress_primary_key=false; -SYSTEM STOP MERGES test; -INSERT INTO test SELECT number, toString(number) FROM numbers(100); -SELECT * FROM test FORMAT Null; -SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache ORDER BY file_segment_range_end, size; 0 79 80 0 745 746 -SYSTEM DROP FILESYSTEM CACHE; -SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache; -SELECT * FROM test FORMAT Null; -SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache; 0 745 746 -SYSTEM DROP FILESYSTEM CACHE; -SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache; -DROP TABLE IF EXISTS test; -CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache_3', min_bytes_for_wide_part = 10485760, compress_marks=false, compress_primary_key=false; -INSERT INTO test SELECT number, toString(number) FROM numbers(100); -SELECT * FROM test FORMAT Null; -SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache ORDER BY file_segment_range_end, size; -SELECT * FROM test FORMAT Null; -SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache ORDER BY file_segment_range_end, size; 0 745 746 -SYSTEM DROP FILESYSTEM CACHE; -SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache; -SELECT * FROM test FORMAT Null; -SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache; -SYSTEM DROP FILESYSTEM CACHE; -SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache; - Using storage policy: local_cache --- { echo } - -SYSTEM DROP FILESYSTEM CACHE; -SET enable_filesystem_cache_on_write_operations=0; -DROP TABLE IF EXISTS test; -CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='local_cache', min_bytes_for_wide_part = 10485760, compress_marks=false, compress_primary_key=false; -SYSTEM STOP MERGES test; -INSERT INTO test SELECT number, toString(number) FROM numbers(100); -SELECT * FROM test FORMAT Null; -SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache ORDER BY file_segment_range_end, size; 0 79 80 0 745 746 -SYSTEM DROP FILESYSTEM CACHE; -SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache; -SELECT * FROM test FORMAT Null; -SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache; 0 745 746 -SYSTEM DROP FILESYSTEM CACHE; -SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache; -DROP TABLE IF EXISTS test; -CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='local_cache_3', min_bytes_for_wide_part = 10485760, compress_marks=false, compress_primary_key=false; -INSERT INTO test SELECT number, toString(number) FROM numbers(100); -SELECT * FROM test FORMAT Null; -SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache ORDER BY file_segment_range_end, size; -SELECT * FROM test FORMAT Null; -SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache ORDER BY file_segment_range_end, size; 0 745 746 -SYSTEM DROP FILESYSTEM CACHE; -SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache; -SELECT * FROM test FORMAT Null; -SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache; -SYSTEM DROP FILESYSTEM CACHE; -SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache; - diff --git a/tests/queries/0_stateless/02240_system_filesystem_cache_table.sh b/tests/queries/0_stateless/02240_system_filesystem_cache_table.sh index 5f0412cd237..a487f3ca739 100755 --- a/tests/queries/0_stateless/02240_system_filesystem_cache_table.sh +++ b/tests/queries/0_stateless/02240_system_filesystem_cache_table.sh @@ -7,14 +7,36 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -TMP_PATH=${CLICKHOUSE_TEST_UNIQUE_NAME} -QUERIES_FILE=02240_system_filesystem_cache_table.queries -TEST_FILE=$CUR_DIR/filesystem_cache_queries/$QUERIES_FILE +for STORAGE_POLICY in 's3_cache' 'local_cache'; do + echo "Using storage policy: $STORAGE_POLICY" + ${CLICKHOUSE_CLIENT} --query "SYSTEM DROP FILESYSTEM CACHE" -for storagePolicy in 's3_cache' 'local_cache'; do - echo "Using storage policy: $storagePolicy" - cat $TEST_FILE | sed -e "s/_storagePolicy/${storagePolicy}/" > $TMP_PATH - ${CLICKHOUSE_CLIENT} --queries-file $TMP_PATH - rm $TMP_PATH - echo + ${CLICKHOUSE_CLIENT} --query "DROP TABLE IF EXISTS test_02240_storage_policy" + ${CLICKHOUSE_CLIENT} --query "CREATE TABLE test_02240_storage_policy (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='${STORAGE_POLICY}', min_bytes_for_wide_part = 10485760, compress_marks=false, compress_primary_key=false" + ${CLICKHOUSE_CLIENT} --query "SYSTEM STOP MERGES test_02240_storage_policy" + ${CLICKHOUSE_CLIENT} --enable_filesystem_cache_on_write_operations=0 --query "INSERT INTO test_02240_storage_policy SELECT number, toString(number) FROM numbers(100)" + ${CLICKHOUSE_CLIENT} --query "SELECT * FROM test_02240_storage_policy FORMAT Null" + ${CLICKHOUSE_CLIENT} --query "SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache ORDER BY file_segment_range_end, size" + + ${CLICKHOUSE_CLIENT} --query "SYSTEM DROP FILESYSTEM CACHE" + ${CLICKHOUSE_CLIENT} --query "SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache" + ${CLICKHOUSE_CLIENT} --query "SELECT * FROM test_02240_storage_policy FORMAT Null" + ${CLICKHOUSE_CLIENT} --query "SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache" + ${CLICKHOUSE_CLIENT} --query "SYSTEM DROP FILESYSTEM CACHE" + ${CLICKHOUSE_CLIENT} --query "SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache" + + ${CLICKHOUSE_CLIENT} --query "DROP TABLE IF EXISTS test_02240_storage_policy_3" + ${CLICKHOUSE_CLIENT} --query "CREATE TABLE test_02240_storage_policy_3 (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='${STORAGE_POLICY}_3', min_bytes_for_wide_part = 10485760, compress_marks=false, compress_primary_key=false" + ${CLICKHOUSE_CLIENT} --enable_filesystem_cache_on_write_operations=0 --query "INSERT INTO test_02240_storage_policy_3 SELECT number, toString(number) FROM numbers(100)" + ${CLICKHOUSE_CLIENT} --query "SELECT * FROM test_02240_storage_policy_3 FORMAT Null" + ${CLICKHOUSE_CLIENT} --query "SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache ORDER BY file_segment_range_end, size" + ${CLICKHOUSE_CLIENT} --query "SELECT * FROM test_02240_storage_policy_3 FORMAT Null" + ${CLICKHOUSE_CLIENT} --query "SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache ORDER BY file_segment_range_end, size" + + ${CLICKHOUSE_CLIENT} --query "SYSTEM DROP FILESYSTEM CACHE" + ${CLICKHOUSE_CLIENT} --query "SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache" + ${CLICKHOUSE_CLIENT} --query "SELECT * FROM test_02240_storage_policy_3 FORMAT Null" + ${CLICKHOUSE_CLIENT} --query "SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache" + ${CLICKHOUSE_CLIENT} --query "SYSTEM DROP FILESYSTEM CACHE" + ${CLICKHOUSE_CLIENT} --query "SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache" done diff --git a/tests/queries/0_stateless/02241_filesystem_cache_on_write_operations.reference b/tests/queries/0_stateless/02241_filesystem_cache_on_write_operations.reference index 9405b9eb614..bbca9bbbfee 100644 --- a/tests/queries/0_stateless/02241_filesystem_cache_on_write_operations.reference +++ b/tests/queries/0_stateless/02241_filesystem_cache_on_write_operations.reference @@ -1,252 +1,60 @@ Using storage policy: s3_cache --- { echo } - -SET enable_filesystem_cache_on_write_operations=1; -DROP TABLE IF EXISTS test; -CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache', min_bytes_for_wide_part = 10485760, compress_marks=false, compress_primary_key=false; -SYSTEM STOP MERGES test; -SYSTEM DROP FILESYSTEM CACHE; -SELECT file_segment_range_begin, file_segment_range_end, size, state -FROM -( - SELECT file_segment_range_begin, file_segment_range_end, size, state, local_path - FROM - ( - SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path - FROM system.remote_data_paths - ) AS data_paths - INNER JOIN - system.filesystem_cache AS caches - ON data_paths.cache_path = caches.cache_path -) -WHERE endsWith(local_path, 'data.bin') -FORMAT Vertical; -SELECT count() FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path; 0 -SELECT count() FROM system.filesystem_cache; 0 -INSERT INTO test SELECT number, toString(number) FROM numbers(100); -SELECT file_segment_range_begin, file_segment_range_end, size, state -FROM -( - SELECT file_segment_range_begin, file_segment_range_end, size, state, local_path - FROM - ( - SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path - FROM system.remote_data_paths - ) AS data_paths - INNER JOIN - system.filesystem_cache AS caches - ON data_paths.cache_path = caches.cache_path -) -WHERE endsWith(local_path, 'data.bin') -FORMAT Vertical; Row 1: ────── file_segment_range_begin: 0 file_segment_range_end: 745 size: 746 state: DOWNLOADED -SELECT count() FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path; 7 -SELECT count() FROM system.filesystem_cache; 7 -SELECT count() FROM system.filesystem_cache WHERE cache_hits > 0; 0 -SELECT * FROM test FORMAT Null; -SELECT count() FROM system.filesystem_cache WHERE cache_hits > 0; 2 -SELECT * FROM test FORMAT Null; -SELECT count() FROM system.filesystem_cache WHERE cache_hits > 0; 2 -SELECT count() size FROM system.filesystem_cache; 7 -SYSTEM DROP FILESYSTEM CACHE; -INSERT INTO test SELECT number, toString(number) FROM numbers(100, 200); -SELECT file_segment_range_begin, file_segment_range_end, size, state -FROM -( - SELECT file_segment_range_begin, file_segment_range_end, size, state, local_path - FROM - ( - SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path - FROM system.remote_data_paths - ) AS data_paths - INNER JOIN - system.filesystem_cache AS caches - ON data_paths.cache_path = caches.cache_path -) -WHERE endsWith(local_path, 'data.bin') -FORMAT Vertical; Row 1: ────── file_segment_range_begin: 0 file_segment_range_end: 1659 size: 1660 state: DOWNLOADED -SELECT count() FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path; 7 -SELECT count() FROM system.filesystem_cache; 7 -SELECT count() FROM system.filesystem_cache; 7 -INSERT INTO test SELECT number, toString(number) FROM numbers(100) SETTINGS enable_filesystem_cache_on_write_operations=0; -SELECT count() FROM system.filesystem_cache; 7 -INSERT INTO test SELECT number, toString(number) FROM numbers(100); -INSERT INTO test SELECT number, toString(number) FROM numbers(300, 10000); -SELECT count() FROM system.filesystem_cache; 21 -SYSTEM START MERGES test; -OPTIMIZE TABLE test FINAL; -SELECT count() FROM system.filesystem_cache; 31 -SET mutations_sync=2; -ALTER TABLE test UPDATE value = 'kek' WHERE key = 100; -SELECT count() FROM system.filesystem_cache; 38 -INSERT INTO test SELECT number, toString(number) FROM numbers(5000000); -SYSTEM FLUSH LOGS; -SELECT - query, ProfileEvents['RemoteFSReadBytes'] > 0 as remote_fs_read -FROM - system.query_log -WHERE - query LIKE 'SELECT number, toString(number) FROM numbers(5000000)%' - AND type = 'QueryFinish' - AND current_database = currentDatabase() -ORDER BY - query_start_time - DESC -LIMIT 1; -SELECT count() FROM test; 5010500 -SELECT count() FROM test WHERE value LIKE '%010%'; 18816 - Using storage policy: local_cache --- { echo } - -SET enable_filesystem_cache_on_write_operations=1; -DROP TABLE IF EXISTS test; -CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='local_cache', min_bytes_for_wide_part = 10485760, compress_marks=false, compress_primary_key=false; -SYSTEM STOP MERGES test; -SYSTEM DROP FILESYSTEM CACHE; -SELECT file_segment_range_begin, file_segment_range_end, size, state -FROM -( - SELECT file_segment_range_begin, file_segment_range_end, size, state, local_path - FROM - ( - SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path - FROM system.remote_data_paths - ) AS data_paths - INNER JOIN - system.filesystem_cache AS caches - ON data_paths.cache_path = caches.cache_path -) -WHERE endsWith(local_path, 'data.bin') -FORMAT Vertical; -SELECT count() FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path; 0 -SELECT count() FROM system.filesystem_cache; 0 -INSERT INTO test SELECT number, toString(number) FROM numbers(100); -SELECT file_segment_range_begin, file_segment_range_end, size, state -FROM -( - SELECT file_segment_range_begin, file_segment_range_end, size, state, local_path - FROM - ( - SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path - FROM system.remote_data_paths - ) AS data_paths - INNER JOIN - system.filesystem_cache AS caches - ON data_paths.cache_path = caches.cache_path -) -WHERE endsWith(local_path, 'data.bin') -FORMAT Vertical; Row 1: ────── file_segment_range_begin: 0 file_segment_range_end: 745 size: 746 state: DOWNLOADED -SELECT count() FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path; 7 -SELECT count() FROM system.filesystem_cache; 7 -SELECT count() FROM system.filesystem_cache WHERE cache_hits > 0; 0 -SELECT * FROM test FORMAT Null; -SELECT count() FROM system.filesystem_cache WHERE cache_hits > 0; 2 -SELECT * FROM test FORMAT Null; -SELECT count() FROM system.filesystem_cache WHERE cache_hits > 0; 2 -SELECT count() size FROM system.filesystem_cache; 7 -SYSTEM DROP FILESYSTEM CACHE; -INSERT INTO test SELECT number, toString(number) FROM numbers(100, 200); -SELECT file_segment_range_begin, file_segment_range_end, size, state -FROM -( - SELECT file_segment_range_begin, file_segment_range_end, size, state, local_path - FROM - ( - SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path - FROM system.remote_data_paths - ) AS data_paths - INNER JOIN - system.filesystem_cache AS caches - ON data_paths.cache_path = caches.cache_path -) -WHERE endsWith(local_path, 'data.bin') -FORMAT Vertical; Row 1: ────── file_segment_range_begin: 0 file_segment_range_end: 1659 size: 1660 state: DOWNLOADED -SELECT count() FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path; 7 -SELECT count() FROM system.filesystem_cache; 7 -SELECT count() FROM system.filesystem_cache; 7 -INSERT INTO test SELECT number, toString(number) FROM numbers(100) SETTINGS enable_filesystem_cache_on_write_operations=0; -SELECT count() FROM system.filesystem_cache; 7 -INSERT INTO test SELECT number, toString(number) FROM numbers(100); -INSERT INTO test SELECT number, toString(number) FROM numbers(300, 10000); -SELECT count() FROM system.filesystem_cache; 21 -SYSTEM START MERGES test; -OPTIMIZE TABLE test FINAL; -SELECT count() FROM system.filesystem_cache; 31 -SET mutations_sync=2; -ALTER TABLE test UPDATE value = 'kek' WHERE key = 100; -SELECT count() FROM system.filesystem_cache; 38 -INSERT INTO test SELECT number, toString(number) FROM numbers(5000000); -SYSTEM FLUSH LOGS; -SELECT - query, ProfileEvents['RemoteFSReadBytes'] > 0 as remote_fs_read -FROM - system.query_log -WHERE - query LIKE 'SELECT number, toString(number) FROM numbers(5000000)%' - AND type = 'QueryFinish' - AND current_database = currentDatabase() -ORDER BY - query_start_time - DESC -LIMIT 1; -SELECT count() FROM test; 5010500 -SELECT count() FROM test WHERE value LIKE '%010%'; 18816 - diff --git a/tests/queries/0_stateless/02241_filesystem_cache_on_write_operations.sh b/tests/queries/0_stateless/02241_filesystem_cache_on_write_operations.sh index baf1fdf7fed..048fb792e6e 100755 --- a/tests/queries/0_stateless/02241_filesystem_cache_on_write_operations.sh +++ b/tests/queries/0_stateless/02241_filesystem_cache_on_write_operations.sh @@ -7,14 +7,121 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -TMP_PATH=${CLICKHOUSE_TEST_UNIQUE_NAME} -QUERIES_FILE=02241_filesystem_cache_on_write_operations.queries -TEST_FILE=$CUR_DIR/filesystem_cache_queries/$QUERIES_FILE +for STORAGE_POLICY in 's3_cache' 'local_cache'; do + echo "Using storage policy: $STORAGE_POLICY" -for storagePolicy in 's3_cache' 'local_cache'; do - echo "Using storage policy: $storagePolicy" - cat $TEST_FILE | sed -e "s/_storagePolicy/${storagePolicy}/" > $TMP_PATH - ${CLICKHOUSE_CLIENT} --queries-file $TMP_PATH - rm $TMP_PATH - echo + $CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS test_02241" + $CLICKHOUSE_CLIENT --query "CREATE TABLE test_02241 (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='$STORAGE_POLICY', min_bytes_for_wide_part = 10485760, compress_marks=false, compress_primary_key=false" + $CLICKHOUSE_CLIENT --query "SYSTEM STOP MERGES test_02241" + + $CLICKHOUSE_CLIENT --query "SYSTEM DROP FILESYSTEM CACHE" + + $CLICKHOUSE_CLIENT -n --query "SELECT file_segment_range_begin, file_segment_range_end, size, state + FROM + ( + SELECT file_segment_range_begin, file_segment_range_end, size, state, local_path + FROM + ( + SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path + FROM system.remote_data_paths + ) AS data_paths + INNER JOIN + system.filesystem_cache AS caches + ON data_paths.cache_path = caches.cache_path + ) + WHERE endsWith(local_path, 'data.bin') + FORMAT Vertical" + + $CLICKHOUSE_CLIENT --query "SELECT count() FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path" + $CLICKHOUSE_CLIENT --query "SELECT count() FROM system.filesystem_cache" + + $CLICKHOUSE_CLIENT --enable_filesystem_cache_on_write_operations=1 --query "INSERT INTO test_02241 SELECT number, toString(number) FROM numbers(100)" + + $CLICKHOUSE_CLIENT -n --query "SELECT file_segment_range_begin, file_segment_range_end, size, state + FROM + ( + SELECT file_segment_range_begin, file_segment_range_end, size, state, local_path + FROM + ( + SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path + FROM system.remote_data_paths + ) AS data_paths + INNER JOIN + system.filesystem_cache AS caches + ON data_paths.cache_path = caches.cache_path + ) + WHERE endsWith(local_path, 'data.bin') + FORMAT Vertical" + + $CLICKHOUSE_CLIENT --query "SELECT count() FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path" + $CLICKHOUSE_CLIENT --query "SELECT count() FROM system.filesystem_cache" + + $CLICKHOUSE_CLIENT --query "SELECT count() FROM system.filesystem_cache WHERE cache_hits > 0" + + $CLICKHOUSE_CLIENT --query "SELECT * FROM test_02241 FORMAT Null" + $CLICKHOUSE_CLIENT --query "SELECT count() FROM system.filesystem_cache WHERE cache_hits > 0" + + $CLICKHOUSE_CLIENT --query "SELECT * FROM test_02241 FORMAT Null" + $CLICKHOUSE_CLIENT --query "SELECT count() FROM system.filesystem_cache WHERE cache_hits > 0" + + $CLICKHOUSE_CLIENT --query "SELECT count() size FROM system.filesystem_cache" + + $CLICKHOUSE_CLIENT --query "SYSTEM DROP FILESYSTEM CACHE" + + $CLICKHOUSE_CLIENT --enable_filesystem_cache_on_write_operations=1 --query "INSERT INTO test_02241 SELECT number, toString(number) FROM numbers(100, 200)" + + $CLICKHOUSE_CLIENT -n --query "SELECT file_segment_range_begin, file_segment_range_end, size, state + FROM + ( + SELECT file_segment_range_begin, file_segment_range_end, size, state, local_path + FROM + ( + SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path + FROM system.remote_data_paths + ) AS data_paths + INNER JOIN + system.filesystem_cache AS caches + ON data_paths.cache_path = caches.cache_path + ) + WHERE endsWith(local_path, 'data.bin') + FORMAT Vertical;" + + $CLICKHOUSE_CLIENT --query "SELECT count() FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path" + $CLICKHOUSE_CLIENT --query "SELECT count() FROM system.filesystem_cache" + + $CLICKHOUSE_CLIENT --query "SELECT count() FROM system.filesystem_cache" + $CLICKHOUSE_CLIENT --enable_filesystem_cache_on_write_operations=1 --query "INSERT INTO test_02241 SELECT number, toString(number) FROM numbers(100) SETTINGS enable_filesystem_cache_on_write_operations=0" + $CLICKHOUSE_CLIENT --query "SELECT count() FROM system.filesystem_cache" + + $CLICKHOUSE_CLIENT --enable_filesystem_cache_on_write_operations=1 --query "INSERT INTO test_02241 SELECT number, toString(number) FROM numbers(100)" + $CLICKHOUSE_CLIENT --enable_filesystem_cache_on_write_operations=1 --query "INSERT INTO test_02241 SELECT number, toString(number) FROM numbers(300, 10000)" + $CLICKHOUSE_CLIENT --query "SELECT count() FROM system.filesystem_cache" + + $CLICKHOUSE_CLIENT --query "SYSTEM START MERGES test_02241" + + $CLICKHOUSE_CLIENT --enable_filesystem_cache_on_write_operations=1 --query "OPTIMIZE TABLE test_02241 FINAL" + $CLICKHOUSE_CLIENT --query "SELECT count() FROM system.filesystem_cache" + + $CLICKHOUSE_CLIENT --enable_filesystem_cache_on_write_operations=1 --mutations_sync=2 --query "ALTER TABLE test_02241 UPDATE value = 'kek' WHERE key = 100" + $CLICKHOUSE_CLIENT --query "SELECT count() FROM system.filesystem_cache" + + $CLICKHOUSE_CLIENT --enable_filesystem_cache_on_write_operations=1 --query "INSERT INTO test_02241 SELECT number, toString(number) FROM numbers(5000000)" + + $CLICKHOUSE_CLIENT --query "SYSTEM FLUSH LOGS" + + $CLICKHOUSE_CLIENT -n --query "SELECT + query, ProfileEvents['RemoteFSReadBytes'] > 0 as remote_fs_read + FROM + system.query_log + WHERE + query LIKE 'SELECT number, toString(number) FROM numbers(5000000)%' + AND type = 'QueryFinish' + AND current_database = currentDatabase() + ORDER BY + query_start_time + DESC + LIMIT 1" + + $CLICKHOUSE_CLIENT --query "SELECT count() FROM test_02241" + $CLICKHOUSE_CLIENT --query "SELECT count() FROM test_02241 WHERE value LIKE '%010%'" done diff --git a/tests/queries/0_stateless/02242_system_filesystem_cache_log_table.reference b/tests/queries/0_stateless/02242_system_filesystem_cache_log_table.reference index 91587dc8e79..99f31df7def 100644 --- a/tests/queries/0_stateless/02242_system_filesystem_cache_log_table.reference +++ b/tests/queries/0_stateless/02242_system_filesystem_cache_log_table.reference @@ -1,42 +1,8 @@ Using storage policy: s3_cache --- { echo } - -SYSTEM DROP FILESYSTEM CACHE; -SET enable_filesystem_cache_log=1; -SET enable_filesystem_cache_on_write_operations=0; -DROP TABLE IF EXISTS test; -DROP TABLE IF EXISTS system.filesystem_cache_log; -CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache', min_bytes_for_wide_part = 10485760, compress_marks=false, compress_primary_key=false; -SYSTEM STOP MERGES test; -INSERT INTO test SELECT number, toString(number) FROM numbers(100000); -SELECT 2240, 's3_cache', * FROM test FORMAT Null; -SYSTEM FLUSH LOGS; -SELECT file_segment_range, read_type FROM system.filesystem_cache_log WHERE query_id = (SELECT query_id from system.query_log where query LIKE '%SELECT 2240%s3_cache%' AND current_database = currentDatabase() AND type = 'QueryFinish' ORDER BY event_time desc LIMIT 1) ORDER BY file_segment_range, read_type; (0,519) READ_FROM_FS_AND_DOWNLOADED_TO_CACHE (0,808110) READ_FROM_FS_AND_DOWNLOADED_TO_CACHE -SELECT 2241, 's3_cache', * FROM test FORMAT Null; -SYSTEM FLUSH LOGS; -SELECT file_segment_range, read_type FROM system.filesystem_cache_log WHERE query_id = (SELECT query_id from system.query_log where query LIKE '%SELECT 2241%s3_cache%' AND current_database = currentDatabase() AND type = 'QueryFinish' ORDER BY event_time desc LIMIT 1) ORDER BY file_segment_range, read_type; (0,808110) READ_FROM_CACHE - Using storage policy: local_cache --- { echo } - -SYSTEM DROP FILESYSTEM CACHE; -SET enable_filesystem_cache_log=1; -SET enable_filesystem_cache_on_write_operations=0; -DROP TABLE IF EXISTS test; -DROP TABLE IF EXISTS system.filesystem_cache_log; -CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='local_cache', min_bytes_for_wide_part = 10485760, compress_marks=false, compress_primary_key=false; -SYSTEM STOP MERGES test; -INSERT INTO test SELECT number, toString(number) FROM numbers(100000); -SELECT 2240, 'local_cache', * FROM test FORMAT Null; -SYSTEM FLUSH LOGS; -SELECT file_segment_range, read_type FROM system.filesystem_cache_log WHERE query_id = (SELECT query_id from system.query_log where query LIKE '%SELECT 2240%local_cache%' AND current_database = currentDatabase() AND type = 'QueryFinish' ORDER BY event_time desc LIMIT 1) ORDER BY file_segment_range, read_type; (0,519) READ_FROM_FS_AND_DOWNLOADED_TO_CACHE (0,808110) READ_FROM_FS_AND_DOWNLOADED_TO_CACHE -SELECT 2241, 'local_cache', * FROM test FORMAT Null; -SYSTEM FLUSH LOGS; -SELECT file_segment_range, read_type FROM system.filesystem_cache_log WHERE query_id = (SELECT query_id from system.query_log where query LIKE '%SELECT 2241%local_cache%' AND current_database = currentDatabase() AND type = 'QueryFinish' ORDER BY event_time desc LIMIT 1) ORDER BY file_segment_range, read_type; (0,808110) READ_FROM_CACHE - diff --git a/tests/queries/0_stateless/02242_system_filesystem_cache_log_table.sh b/tests/queries/0_stateless/02242_system_filesystem_cache_log_table.sh index b5dbc5c9f78..4c92d1d2954 100755 --- a/tests/queries/0_stateless/02242_system_filesystem_cache_log_table.sh +++ b/tests/queries/0_stateless/02242_system_filesystem_cache_log_table.sh @@ -7,14 +7,23 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -TMP_PATH=${CLICKHOUSE_TEST_UNIQUE_NAME} -QUERIES_FILE=02242_system_filesystem_cache_log_table.queries -TEST_FILE=$CUR_DIR/filesystem_cache_queries/$QUERIES_FILE +for STORAGE_POLICY in 's3_cache' 'local_cache'; do + echo "Using storage policy: $STORAGE_POLICY" + + $CLICKHOUSE_CLIENT --query "SYSTEM DROP FILESYSTEM CACHE" + + $CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS test_2242" + $CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS system.filesystem_cache_log" + $CLICKHOUSE_CLIENT --query "CREATE TABLE test_2242 (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='$STORAGE_POLICY', min_bytes_for_wide_part = 10485760, compress_marks=false, compress_primary_key=false" + $CLICKHOUSE_CLIENT --query "SYSTEM STOP MERGES test_2242" + $CLICKHOUSE_CLIENT --enable_filesystem_cache_on_write_operations=0 --enable_filesystem_cache_log=1 --query "INSERT INTO test_2242 SELECT number, toString(number) FROM numbers(100000)" + + $CLICKHOUSE_CLIENT --enable_filesystem_cache_on_write_operations=0 --enable_filesystem_cache_log=1 --query "SELECT 2242, '$STORAGE_POLICY', * FROM test_2242 FORMAT Null" + $CLICKHOUSE_CLIENT --query "SYSTEM FLUSH LOGS" + $CLICKHOUSE_CLIENT --query "SELECT file_segment_range, read_type FROM system.filesystem_cache_log WHERE query_id = (SELECT query_id from system.query_log where query LIKE '%SELECT 2242%$STORAGE_POLICY%' AND current_database = currentDatabase() AND type = 'QueryFinish' ORDER BY event_time desc LIMIT 1) ORDER BY file_segment_range, read_type" + + $CLICKHOUSE_CLIENT --enable_filesystem_cache_on_write_operations=0 --enable_filesystem_cache_log=1 --query "SELECT 2243, '$STORAGE_POLICY', * FROM test_2242 FORMAT Null" + $CLICKHOUSE_CLIENT --query "SYSTEM FLUSH LOGS" + $CLICKHOUSE_CLIENT --query "SELECT file_segment_range, read_type FROM system.filesystem_cache_log WHERE query_id = (SELECT query_id from system.query_log where query LIKE '%SELECT 2243%$STORAGE_POLICY%' AND current_database = currentDatabase() AND type = 'QueryFinish' ORDER BY event_time desc LIMIT 1) ORDER BY file_segment_range, read_type" -for storagePolicy in 's3_cache' 'local_cache'; do - echo "Using storage policy: $storagePolicy" - cat $TEST_FILE | sed -e "s/_storagePolicy/${storagePolicy}/" > $TMP_PATH - ${CLICKHOUSE_CLIENT} --queries-file $TMP_PATH - rm $TMP_PATH - echo done diff --git a/tests/queries/0_stateless/02286_drop_filesystem_cache.reference b/tests/queries/0_stateless/02286_drop_filesystem_cache.reference index b37f87afc28..62907a7c81c 100644 --- a/tests/queries/0_stateless/02286_drop_filesystem_cache.reference +++ b/tests/queries/0_stateless/02286_drop_filesystem_cache.reference @@ -1,138 +1,22 @@ Using storage policy: s3_cache --- { echo } - -SET enable_filesystem_cache_on_write_operations=0; -DROP TABLE IF EXISTS test; -CREATE TABLE test (key UInt32, value String) -Engine=MergeTree() -ORDER BY key -SETTINGS storage_policy='s3_cache', min_bytes_for_wide_part = 10485760; -SYSTEM STOP MERGES; -SYSTEM DROP FILESYSTEM CACHE; -SELECT count() FROM system.filesystem_cache; 0 -INSERT INTO test SELECT number, toString(number) FROM numbers(100); -SELECT * FROM test FORMAT Null; -SELECT count() FROM system.filesystem_cache; 2 -SYSTEM DROP FILESYSTEM CACHE; -SELECT count() FROM system.filesystem_cache; 0 -SELECT * FROM test FORMAT Null; -SELECT count() FROM system.filesystem_cache; 1 -SYSTEM DROP FILESYSTEM CACHE './data'; -- { serverError 36 } -SELECT count() FROM system.filesystem_cache; 1 -SELECT * FROM test FORMAT Null; -SELECT count() FROM system.filesystem_cache; 1 -SELECT count() -FROM ( - SELECT - arrayJoin(cache_paths) AS cache_path, - local_path, - remote_path - FROM - system.remote_data_paths - ) AS data_paths -INNER JOIN system.filesystem_cache AS caches -ON data_paths.cache_path = caches.cache_path; 1 -DROP TABLE test NO DELAY; -SELECT count() FROM system.filesystem_cache; 0 -SELECT cache_path FROM system.filesystem_cache; -SELECT cache_path, local_path -FROM ( - SELECT - arrayJoin(cache_paths) AS cache_path, - local_path, - remote_path - FROM - system.remote_data_paths - ) AS data_paths -INNER JOIN system.filesystem_cache AS caches -ON data_paths.cache_path = caches.cache_path; -DROP TABLE IF EXISTS test2; -CREATE TABLE test2 (key UInt32, value String) -Engine=MergeTree() -ORDER BY key -SETTINGS storage_policy='s3_cache_2', min_bytes_for_wide_part = 10485760; -INSERT INTO test2 SELECT number, toString(number) FROM numbers(100); -SELECT * FROM test2 FORMAT Null; -SELECT count() FROM system.filesystem_cache; 2 -SYSTEM DROP FILESYSTEM CACHE 's3_cache_2/'; -SELECT count() FROM system.filesystem_cache; 0 - Using storage policy: local_cache --- { echo } - -SET enable_filesystem_cache_on_write_operations=0; -DROP TABLE IF EXISTS test; -CREATE TABLE test (key UInt32, value String) -Engine=MergeTree() -ORDER BY key -SETTINGS storage_policy='local_cache', min_bytes_for_wide_part = 10485760; -SYSTEM STOP MERGES; -SYSTEM DROP FILESYSTEM CACHE; -SELECT count() FROM system.filesystem_cache; 0 -INSERT INTO test SELECT number, toString(number) FROM numbers(100); -SELECT * FROM test FORMAT Null; -SELECT count() FROM system.filesystem_cache; 2 -SYSTEM DROP FILESYSTEM CACHE; -SELECT count() FROM system.filesystem_cache; 0 -SELECT * FROM test FORMAT Null; -SELECT count() FROM system.filesystem_cache; 1 -SYSTEM DROP FILESYSTEM CACHE './data'; -- { serverError 36 } -SELECT count() FROM system.filesystem_cache; 1 -SELECT * FROM test FORMAT Null; -SELECT count() FROM system.filesystem_cache; 1 -SELECT count() -FROM ( - SELECT - arrayJoin(cache_paths) AS cache_path, - local_path, - remote_path - FROM - system.remote_data_paths - ) AS data_paths -INNER JOIN system.filesystem_cache AS caches -ON data_paths.cache_path = caches.cache_path; 1 -DROP TABLE test NO DELAY; -SELECT count() FROM system.filesystem_cache; 0 -SELECT cache_path FROM system.filesystem_cache; -SELECT cache_path, local_path -FROM ( - SELECT - arrayJoin(cache_paths) AS cache_path, - local_path, - remote_path - FROM - system.remote_data_paths - ) AS data_paths -INNER JOIN system.filesystem_cache AS caches -ON data_paths.cache_path = caches.cache_path; -DROP TABLE IF EXISTS test2; -CREATE TABLE test2 (key UInt32, value String) -Engine=MergeTree() -ORDER BY key -SETTINGS storage_policy='local_cache_2', min_bytes_for_wide_part = 10485760; -INSERT INTO test2 SELECT number, toString(number) FROM numbers(100); -SELECT * FROM test2 FORMAT Null; -SELECT count() FROM system.filesystem_cache; 2 -SYSTEM DROP FILESYSTEM CACHE 'local_cache_2/'; -SELECT count() FROM system.filesystem_cache; 0 - diff --git a/tests/queries/0_stateless/02286_drop_filesystem_cache.sh b/tests/queries/0_stateless/02286_drop_filesystem_cache.sh index 30d04743b34..b563c487646 100755 --- a/tests/queries/0_stateless/02286_drop_filesystem_cache.sh +++ b/tests/queries/0_stateless/02286_drop_filesystem_cache.sh @@ -7,14 +7,78 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -TMP_PATH=${CLICKHOUSE_TEST_UNIQUE_NAME} -QUERIES_FILE=02286_drop_filesystem_cache.queries -TEST_FILE=$CUR_DIR/filesystem_cache_queries/$QUERIES_FILE +for STORAGE_POLICY in 's3_cache' 'local_cache'; do + echo "Using storage policy: $STORAGE_POLICY" + $CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS test_02286" -for storagePolicy in 's3_cache' 'local_cache'; do - echo "Using storage policy: $storagePolicy" - cat $TEST_FILE | sed -e "s/_storagePolicy/${storagePolicy}/" > $TMP_PATH - ${CLICKHOUSE_CLIENT} --queries-file $TMP_PATH - rm $TMP_PATH - echo + $CLICKHOUSE_CLIENT -n --query "CREATE TABLE test_02286 (key UInt32, value String) + Engine=MergeTree() + ORDER BY key + SETTINGS storage_policy='$STORAGE_POLICY', min_bytes_for_wide_part = 10485760" + + $CLICKHOUSE_CLIENT --query "SYSTEM STOP MERGES" + $CLICKHOUSE_CLIENT --query "SYSTEM DROP FILESYSTEM CACHE" + + $CLICKHOUSE_CLIENT --query "SELECT count() FROM system.filesystem_cache" + $CLICKHOUSE_CLIENT --enable_filesystem_cache_on_write_operations=0 --query "INSERT INTO test_02286 SELECT number, toString(number) FROM numbers(100)" + + $CLICKHOUSE_CLIENT --query "SELECT * FROM test_02286 FORMAT Null" + $CLICKHOUSE_CLIENT --query "SELECT count() FROM system.filesystem_cache" + + $CLICKHOUSE_CLIENT --query "SYSTEM DROP FILESYSTEM CACHE" + $CLICKHOUSE_CLIENT --query "SELECT count() FROM system.filesystem_cache" + + $CLICKHOUSE_CLIENT --query "SELECT * FROM test_02286 FORMAT Null" + $CLICKHOUSE_CLIENT --query "SELECT count() FROM system.filesystem_cache" + + $CLICKHOUSE_CLIENT --multiline --multiquery --query "SYSTEM DROP FILESYSTEM CACHE './data'; --{serverError 36}" + + $CLICKHOUSE_CLIENT --query "SELECT count() FROM system.filesystem_cache" + + $CLICKHOUSE_CLIENT --query "SELECT * FROM test_02286 FORMAT Null" + $CLICKHOUSE_CLIENT --query "SELECT count() FROM system.filesystem_cache" + + $CLICKHOUSE_CLIENT -n --query "SELECT count() + FROM ( + SELECT + arrayJoin(cache_paths) AS cache_path, + local_path, + remote_path + FROM + system.remote_data_paths + ) AS data_paths + INNER JOIN system.filesystem_cache AS caches + ON data_paths.cache_path = caches.cache_path" + + $CLICKHOUSE_CLIENT --query "DROP TABLE test_02286 NO DELAY" + $CLICKHOUSE_CLIENT --query "SELECT count() FROM system.filesystem_cache" + + $CLICKHOUSE_CLIENT --query "SELECT cache_path FROM system.filesystem_cache" + $CLICKHOUSE_CLIENT -n --query "SELECT cache_path, local_path + FROM ( + SELECT + arrayJoin(cache_paths) AS cache_path, + local_path, + remote_path + FROM + system.remote_data_paths + ) AS data_paths + INNER JOIN system.filesystem_cache AS caches + ON data_paths.cache_path = caches.cache_path" + + $CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS test_022862" + + $CLICKHOUSE_CLIENT -n --query "CREATE TABLE test_022862 (key UInt32, value String) + Engine=MergeTree() + ORDER BY key + SETTINGS storage_policy='${STORAGE_POLICY}_2', min_bytes_for_wide_part = 10485760" + + $CLICKHOUSE_CLIENT --enable_filesystem_cache_on_write_operations=0 --query "INSERT INTO test_022862 SELECT number, toString(number) FROM numbers(100)" + $CLICKHOUSE_CLIENT --query "SELECT * FROM test_022862 FORMAT Null" + $CLICKHOUSE_CLIENT --query "SELECT count() FROM system.filesystem_cache" + + $CLICKHOUSE_CLIENT --query "SYSTEM DROP FILESYSTEM CACHE '${STORAGE_POLICY}_2/'" + $CLICKHOUSE_CLIENT --query "SELECT count() FROM system.filesystem_cache" + + $CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS test_022862" done diff --git a/tests/queries/0_stateless/02287_ephemeral_format_crash.reference b/tests/queries/0_stateless/02287_ephemeral_format_crash.reference index e69de29bb2d..39bbe7c68eb 100644 --- a/tests/queries/0_stateless/02287_ephemeral_format_crash.reference +++ b/tests/queries/0_stateless/02287_ephemeral_format_crash.reference @@ -0,0 +1,2 @@ +CREATE TABLE default.test\n(\n `a` UInt8,\n `b` String EPHEMERAL\n)\nENGINE = Memory +CREATE TABLE default.test\n(\n `a` UInt8,\n `b` String EPHEMERAL 1 + 2\n)\nENGINE = Memory diff --git a/tests/queries/0_stateless/02287_ephemeral_format_crash.sql b/tests/queries/0_stateless/02287_ephemeral_format_crash.sql index 8fd9a4b4332..466532970ab 100644 --- a/tests/queries/0_stateless/02287_ephemeral_format_crash.sql +++ b/tests/queries/0_stateless/02287_ephemeral_format_crash.sql @@ -1,10 +1,13 @@ DROP TABLE IF EXISTS test; CREATE TABLE test(a UInt8, b String EPHEMERAL) Engine=Memory(); - +SHOW CREATE TABLE test; DROP TABLE test; -CREATE TABLE test(a UInt8, b EPHEMERAL String) Engine=Memory(); -- { clientError SYNTAX_ERROR } +CREATE TABLE test(a UInt8, b EPHEMERAL String) Engine=Memory(); -- { serverError UNKNOWN_IDENTIFIER } CREATE TABLE test(a UInt8, b EPHEMERAL 'a' String) Engine=Memory(); -- { clientError SYNTAX_ERROR } -CREATE TABLE test(a UInt8, b String EPHEMERAL test) Engine=Memory(); -- { clientError SYNTAX_ERROR } -CREATE TABLE test(a UInt8, b String EPHEMERAL 1+2) Engine=Memory(); -- { clientError SYNTAX_ERROR } +CREATE TABLE test(a UInt8, b String EPHEMERAL test) Engine=Memory(); -- { serverError UNKNOWN_IDENTIFIER } + +CREATE TABLE test(a UInt8, b String EPHEMERAL 1+2) Engine=Memory(); +SHOW CREATE TABLE test; +DROP TABLE test; diff --git a/tests/queries/0_stateless/02293_compatibility_ignore_auto_increment_in_create_table.reference b/tests/queries/0_stateless/02293_compatibility_ignore_auto_increment_in_create_table.reference index 2db591f7e5a..63d74e4ea1e 100644 --- a/tests/queries/0_stateless/02293_compatibility_ignore_auto_increment_in_create_table.reference +++ b/tests/queries/0_stateless/02293_compatibility_ignore_auto_increment_in_create_table.reference @@ -11,7 +11,7 @@ s String create table, several columns with different default specifiers di UInt8 DEFAULT 1 id Int32 -s String EPHEMERAL \'\' +s String EPHEMERAL defaultValueOfTypeName(\'String\') create table failed, column +type +DEFAULT +AUTO_INCREMENT create table failed, column -type +DEFAULT +AUTO_INCREMENT create table failed, column +type +AUTO_INCREMENT +DEFAULT diff --git a/tests/queries/0_stateless/02313_filesystem_cache_seeks.reference b/tests/queries/0_stateless/02313_filesystem_cache_seeks.reference index 73871f55856..062aac259a4 100644 --- a/tests/queries/0_stateless/02313_filesystem_cache_seeks.reference +++ b/tests/queries/0_stateless/02313_filesystem_cache_seeks.reference @@ -1,6 +1,3 @@ Using storage policy: s3_cache - Using storage policy: local_cache - Using storage policy: s3_cache_multi - diff --git a/tests/queries/0_stateless/02313_filesystem_cache_seeks.sh b/tests/queries/0_stateless/02313_filesystem_cache_seeks.sh index 7ed92cbf36d..f5de4346fd6 100755 --- a/tests/queries/0_stateless/02313_filesystem_cache_seeks.sh +++ b/tests/queries/0_stateless/02313_filesystem_cache_seeks.sh @@ -7,14 +7,31 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -TMP_PATH=${CLICKHOUSE_TEST_UNIQUE_NAME} -QUERIES_FILE=02313_filesystem_cache_seeks.queries -TEST_FILE=$CUR_DIR/filesystem_cache_queries/$QUERIES_FILE -for storagePolicy in 's3_cache' 'local_cache' 's3_cache_multi'; do - echo "Using storage policy: $storagePolicy" - cat $TEST_FILE | sed -e "s/_storagePolicy/${storagePolicy}/" > $TMP_PATH - ${CLICKHOUSE_CLIENT} --queries-file $TMP_PATH - rm $TMP_PATH - echo +for STORAGE_POLICY in 's3_cache' 'local_cache' 's3_cache_multi'; do + echo "Using storage policy: $STORAGE_POLICY" + $CLICKHOUSE_CLIENT --query "SYSTEM DROP FILESYSTEM CACHE" + + $CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS test_02313" + + $CLICKHOUSE_CLIENT --query "CREATE TABLE test_02313 (id Int32, val String) + ENGINE = MergeTree() + ORDER BY tuple() + SETTINGS storage_policy = '$STORAGE_POLICY'" + + $CLICKHOUSE_CLIENT --enable_filesystem_cache_on_write_operations=0 -n --query "INSERT INTO test_02313 + SELECT * FROM + generateRandom('id Int32, val String') + LIMIT 100000" + + $CLICKHOUSE_CLIENT --query "SELECT * FROM test_02313 WHERE val LIKE concat('%', randomPrintableASCII(3), '%') FORMAT Null" + $CLICKHOUSE_CLIENT --query "SELECT * FROM test_02313 WHERE val LIKE concat('%', randomPrintableASCII(3), '%') FORMAT Null" + $CLICKHOUSE_CLIENT --query "SELECT * FROM test_02313 WHERE val LIKE concat('%', randomPrintableASCII(3), '%') FORMAT Null" + $CLICKHOUSE_CLIENT --query "SELECT * FROM test_02313 WHERE val LIKE concat('%', randomPrintableASCII(3), '%') FORMAT Null" + $CLICKHOUSE_CLIENT --query "SELECT * FROM test_02313 WHERE val LIKE concat('%', randomPrintableASCII(3), '%') FORMAT Null" + $CLICKHOUSE_CLIENT --query "SELECT * FROM test_02313 WHERE val LIKE concat('%', randomPrintableASCII(3), '%') FORMAT Null" + $CLICKHOUSE_CLIENT --query "SELECT * FROM test_02313 WHERE val LIKE concat('%', randomPrintableASCII(3), '%') FORMAT Null" + + $CLICKHOUSE_CLIENT --query "DROP TABLE test_02313" + done diff --git a/tests/queries/0_stateless/02319_lightweight_delete_on_merge_tree.sql b/tests/queries/0_stateless/02319_lightweight_delete_on_merge_tree.sql index 67513a1cdff..c7f8b67e740 100644 --- a/tests/queries/0_stateless/02319_lightweight_delete_on_merge_tree.sql +++ b/tests/queries/0_stateless/02319_lightweight_delete_on_merge_tree.sql @@ -103,7 +103,7 @@ ALTER TABLE t_proj ADD PROJECTION p_1 (SELECT avg(a), avg(b), count()); INSERT INTO t_proj SELECT number + 1, number + 1 FROM numbers(1000); -DELETE FROM t_proj WHERE a < 100; -- { serverError NOT_IMPLEMENTED } +DELETE FROM t_proj WHERE a < 100; -- { serverError BAD_ARGUMENTS } SELECT avg(a), avg(b), count() FROM t_proj; diff --git a/tests/queries/0_stateless/02361_fsync_profile_events.sh b/tests/queries/0_stateless/02361_fsync_profile_events.sh index d54da9a49e5..85f82c59c71 100755 --- a/tests/queries/0_stateless/02361_fsync_profile_events.sh +++ b/tests/queries/0_stateless/02361_fsync_profile_events.sh @@ -45,13 +45,16 @@ for i in {1..100}; do # Non retriable errors if [[ $FileSync -ne 7 ]]; then + echo "FileSync: $FileSync != 11" >&2 exit 2 fi # Check that all files was synced if [[ $FileSync -ne $FileOpen ]]; then + echo "$FileSync (FileSync) != $FileOpen (FileOpen)" >&2 exit 3 fi if [[ $DirectorySync -ne 2 ]]; then + echo "DirectorySync: $DirectorySync != 2" >&2 exit 4 fi diff --git a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference index 34180020680..0996ad37e6e 100644 --- a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference +++ b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference @@ -82,6 +82,7 @@ addYears addressToLine addressToLineWithInlines addressToSymbol +age alphaTokens and appendTrailingCharIfAbsent @@ -330,6 +331,7 @@ flattenTuple floor format formatDateTime +formatDateTimeInJodaSyntax formatReadableQuantity formatReadableSize formatReadableTimeDelta @@ -342,6 +344,7 @@ fromUnixTimestamp fromUnixTimestamp64Micro fromUnixTimestamp64Milli fromUnixTimestamp64Nano +fromUnixTimestampInJodaSyntax fullHostName fuzzBits gccMurmurHash diff --git a/tests/queries/0_stateless/02416_json_object_inference.sql b/tests/queries/0_stateless/02416_json_object_inference.sql index 24f50930a68..837c06bd127 100644 --- a/tests/queries/0_stateless/02416_json_object_inference.sql +++ b/tests/queries/0_stateless/02416_json_object_inference.sql @@ -1,6 +1,6 @@ -- Tags: no-fasttest set allow_experimental_object_type=1; desc format(JSONEachRow, '{"a" : {"b" : {"c" : 1, "d" : "str"}}}'); -set allow_experimental_object_type=0; +set allow_experimental_object_type=0, input_format_json_read_objects_as_strings=0; desc format(JSONEachRow, '{"a" : {"b" : {"c" : 1, "d" : "str"}}}'); -- {serverError 652} diff --git a/tests/queries/0_stateless/02416_json_tuple_to_array_schema_inference.reference b/tests/queries/0_stateless/02416_json_tuple_to_array_schema_inference.reference new file mode 100644 index 00000000000..f44e051e6bf --- /dev/null +++ b/tests/queries/0_stateless/02416_json_tuple_to_array_schema_inference.reference @@ -0,0 +1,3 @@ +x Array(Array(Nullable(Int64))) +x Tuple(Array(Array(Nullable(Int64))), Nullable(Int64)) +x Map(String, Array(Nullable(Int64))) diff --git a/tests/queries/0_stateless/02416_json_tuple_to_array_schema_inference.sql b/tests/queries/0_stateless/02416_json_tuple_to_array_schema_inference.sql new file mode 100644 index 00000000000..ae3142f0b21 --- /dev/null +++ b/tests/queries/0_stateless/02416_json_tuple_to_array_schema_inference.sql @@ -0,0 +1,4 @@ +desc format(JSONEachRow, '{"x" : [[42, null], [24, null]]}'); +desc format(JSONEachRow, '{"x" : [[[42, null], []], 24]}'); +desc format(JSONEachRow, '{"x" : {"key" : [42, null]}}'); + diff --git a/tests/queries/0_stateless/02447_drop_database_replica.reference b/tests/queries/0_stateless/02447_drop_database_replica.reference new file mode 100644 index 00000000000..1d65fe66c6e --- /dev/null +++ b/tests/queries/0_stateless/02447_drop_database_replica.reference @@ -0,0 +1,15 @@ +t +1 +2 +2 +2 +2 +2 +2 +rdb_default 1 1 +rdb_default 1 2 +2 +2 +2 +t +rdb_default_3 1 1 diff --git a/tests/queries/0_stateless/02447_drop_database_replica.sh b/tests/queries/0_stateless/02447_drop_database_replica.sh new file mode 100755 index 00000000000..4bfd6243c2e --- /dev/null +++ b/tests/queries/0_stateless/02447_drop_database_replica.sh @@ -0,0 +1,47 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +db="rdb_$CLICKHOUSE_DATABASE" + +$CLICKHOUSE_CLIENT -q "system flush logs" +$CLICKHOUSE_CLIENT --allow_experimental_database_replicated=1 -q "create database $db engine=Replicated('/test/$CLICKHOUSE_DATABASE/rdb', 's1', 'r1')" +$CLICKHOUSE_CLIENT --distributed_ddl_output_mode=none -q "create table $db.t as system.query_log" # Suppress style check: current_database=$CLICKHOUSE_DATABASE +$CLICKHOUSE_CLIENT -q "show tables from $db" + +$CLICKHOUSE_CLIENT -q "system drop database replica 's1|r1' from table t" 2>&1| grep -Fac "SYNTAX_ERROR" +$CLICKHOUSE_CLIENT -q "system drop database replica 's1|r1' from database $db" 2>&1| grep -Fac "There is a local database" +$CLICKHOUSE_CLIENT -q "system drop database replica 's1|r1' from zkpath '/test/$CLICKHOUSE_DATABASE/rdb'" 2>&1| grep -Fac "There is a local database" +$CLICKHOUSE_CLIENT -q "system drop database replica 's1|r1' from zkpath '/test/$CLICKHOUSE_DATABASE/rdb/'" 2>&1| grep -Fac "There is a local database" + +$CLICKHOUSE_CLIENT -q "system drop database replica 's1|r1' from zkpath '/test/$CLICKHOUSE_DATABASE/'" 2>&1| grep -Fac "does not look like a path of Replicated database" +$CLICKHOUSE_CLIENT -q "system drop database replica 's2|r1' from zkpath '/test/$CLICKHOUSE_DATABASE/rdb'" 2>&1| grep -Fac "does not exist" +$CLICKHOUSE_CLIENT -q "system drop database replica 's2/r1' from zkpath '/test/$CLICKHOUSE_DATABASE/rdb'" 2>&1| grep -Fac "Invalid replica name" + +db2="${db}_2" +$CLICKHOUSE_CLIENT --allow_experimental_database_replicated=1 -q "create database $db2 engine=Replicated('/test/$CLICKHOUSE_DATABASE/rdb', 's1', 'r2')" +$CLICKHOUSE_CLIENT -q "system sync database replica $db" +$CLICKHOUSE_CLIENT -q "select cluster, shard_num, replica_num from system.clusters where cluster='$db' order by shard_num, replica_num" +$CLICKHOUSE_CLIENT -q "system drop database replica 's1|r1' from database $db2" 2>&1| grep -Fac "is active, cannot drop it" + +$CLICKHOUSE_CLIENT -q "detach database $db2" +$CLICKHOUSE_CLIENT -q "system drop database replica 's1|r2' from database $db" +$CLICKHOUSE_CLIENT -q "attach database $db2" 2>/dev/null +$CLICKHOUSE_CLIENT --distributed_ddl_output_mode=none -q "create table $db2.t2 as system.query_log" 2>&1| grep -Fac "Database is in readonly mode" # Suppress style check: current_database=$CLICKHOUSE_DATABASE + +$CLICKHOUSE_CLIENT -q "detach database $db" +$CLICKHOUSE_CLIENT -q "system drop database replica 's1|r1' from zkpath '/test/$CLICKHOUSE_DATABASE/rdb/'" +$CLICKHOUSE_CLIENT -q "attach database $db" 2>/dev/null +$CLICKHOUSE_CLIENT --distributed_ddl_output_mode=none -q "create table $db.t2 as system.query_log" 2>&1| grep -Fac "Database is in readonly mode" # Suppress style check: current_database=$CLICKHOUSE_DATABASE +$CLICKHOUSE_CLIENT -q "show tables from $db" + +db3="${db}_3" +$CLICKHOUSE_CLIENT --allow_experimental_database_replicated=1 -q "create database $db3 engine=Replicated('/test/$CLICKHOUSE_DATABASE/rdb', 's1', 'r1')" +$CLICKHOUSE_CLIENT -q "system sync database replica $db3" +$CLICKHOUSE_CLIENT -q "select cluster, shard_num, replica_num from system.clusters where cluster='$db3'" + +$CLICKHOUSE_CLIENT -q "drop database $db" +$CLICKHOUSE_CLIENT -q "drop database $db2" +$CLICKHOUSE_CLIENT -q "drop database $db3" diff --git a/tests/queries/0_stateless/02448_clone_replica_lost_part.sql b/tests/queries/0_stateless/02448_clone_replica_lost_part.sql index 67a329ee1f0..4befe952a14 100644 --- a/tests/queries/0_stateless/02448_clone_replica_lost_part.sql +++ b/tests/queries/0_stateless/02448_clone_replica_lost_part.sql @@ -120,9 +120,14 @@ insert into rmt1 values (100); insert into rmt2 values (100); insert into rmt1 values (200); insert into rmt2 values (200); + +-- otherwise we can get exception on drop part +system sync replica rmt2; +system sync replica rmt1; + detach table rmt1; --- create a gap in block numbers buy dropping part +-- create a gap in block numbers by dropping part insert into rmt2 values (300); alter table rmt2 drop part 'all_19_19_0'; -- remove 200 insert into rmt2 values (400); diff --git a/tests/queries/0_stateless/02460_prewhere_row_level_policy.reference b/tests/queries/0_stateless/02460_prewhere_row_level_policy.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02460_prewhere_row_level_policy.sql b/tests/queries/0_stateless/02460_prewhere_row_level_policy.sql new file mode 100644 index 00000000000..fc98fa773b4 --- /dev/null +++ b/tests/queries/0_stateless/02460_prewhere_row_level_policy.sql @@ -0,0 +1,9 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/40956#issuecomment-1262096612 +DROP TABLE IF EXISTS row_level_policy_prewhere; +DROP ROW POLICY IF EXISTS row_level_policy_prewhere_policy0 ON row_level_policy_prewhere; + +CREATE TABLE row_level_policy_prewhere (x Int16, y String) ENGINE = MergeTree ORDER BY x; +INSERT INTO row_level_policy_prewhere(y, x) VALUES ('A',1), ('B',2), ('C',3); +CREATE ROW POLICY row_level_policy_prewhere_policy0 ON row_level_policy_prewhere FOR SELECT USING x >= 0 TO default; +SELECT * FROM row_level_policy_prewhere PREWHERE y = 'foo'; +DROP TABLE row_level_policy_prewhere; diff --git a/tests/queries/0_stateless/02461_prewhere_row_level_policy_lightweight_delete.reference.j2 b/tests/queries/0_stateless/02461_prewhere_row_level_policy_lightweight_delete.reference.j2 new file mode 100644 index 00000000000..ca7b300e00e --- /dev/null +++ b/tests/queries/0_stateless/02461_prewhere_row_level_policy_lightweight_delete.reference.j2 @@ -0,0 +1,29 @@ +{% for index_granularity in [999, 1000, 1001, 9999, 10000, 10001] -%} +-- { echoOn } + +SELECT count() FROM url_na_log; +130000 +SELECT rows FROM system.parts WHERE database = currentDatabase() AND table = 'url_na_log' AND active; +130000 +SELECT count() FROM url_na_log PREWHERE DateVisit >= '2022-08-10' AND DateVisit <= '2022-08-20' WHERE SiteId = 209 SETTINGS max_block_size = 200000, max_threads = 1; +110000 +-- Delete more than a half rows (60K) from the range 2022-08-10 .. 2022-08-20 +-- There should be 50K rows remaining in this range +DELETE FROM url_na_log WHERE SiteId = 209 AND DateVisit >= '2022-08-13' AND DateVisit <= '2022-08-18'; +SELECT count() FROM url_na_log; +70000 +SELECT rows FROM system.parts WHERE database = currentDatabase() AND table = 'url_na_log' AND active; +130000 +SELECT count() FROM url_na_log PREWHERE DateVisit >= '2022-08-10' AND DateVisit <= '2022-08-20' WHERE SiteId = 209 SETTINGS max_block_size = 200000, max_threads = 1; +50000 +-- Hide more than a half of remaining rows (30K) from the range 2022-08-10 .. 2022-08-20 using row policy +-- Now the this range should have 20K rows left +CREATE ROW POLICY url_na_log_policy0 ON url_na_log FOR SELECT USING DateVisit < '2022-08-11' or DateVisit > '2022-08-19' TO default; +SELECT count() FROM url_na_log; +40000 +SELECT rows FROM system.parts WHERE database = currentDatabase() AND table = 'url_na_log' AND active; +130000 +SELECT count() FROM url_na_log PREWHERE DateVisit >= '2022-08-10' AND DateVisit <= '2022-08-20' WHERE SiteId = 209 SETTINGS max_block_size = 200000, max_threads = 1; +20000 +DROP ROW POLICY url_na_log_policy0 ON url_na_log; +{% endfor -%} diff --git a/tests/queries/0_stateless/02461_prewhere_row_level_policy_lightweight_delete.sql.j2 b/tests/queries/0_stateless/02461_prewhere_row_level_policy_lightweight_delete.sql.j2 new file mode 100644 index 00000000000..e1ec348e6ac --- /dev/null +++ b/tests/queries/0_stateless/02461_prewhere_row_level_policy_lightweight_delete.sql.j2 @@ -0,0 +1,59 @@ +{% for index_granularity in [999, 1000, 1001, 9999, 10000, 10001] %} + +DROP TABLE IF EXISTS url_na_log; + +CREATE TABLE url_na_log(SiteId UInt32, DateVisit Date, PRIMARY KEY (SiteId)) +ENGINE = MergeTree() +ORDER BY (SiteId, DateVisit) +SETTINGS index_granularity = {{ index_granularity }}, min_bytes_for_wide_part = 0; + +-- Insert some data to have 110K rows in the range 2022-08-10 .. 2022-08-20 and some more rows before and after that range +insert into url_na_log select 209, '2022-08-09' from numbers(10000); +insert into url_na_log select 209, '2022-08-10' from numbers(10000); +insert into url_na_log select 209, '2022-08-11' from numbers(10000); +insert into url_na_log select 209, '2022-08-12' from numbers(10000); +insert into url_na_log select 209, '2022-08-13' from numbers(10000); +insert into url_na_log select 209, '2022-08-14' from numbers(10000); +insert into url_na_log select 209, '2022-08-15' from numbers(10000); +insert into url_na_log select 209, '2022-08-16' from numbers(10000); +insert into url_na_log select 209, '2022-08-17' from numbers(10000); +insert into url_na_log select 209, '2022-08-18' from numbers(10000); +insert into url_na_log select 209, '2022-08-19' from numbers(10000); +insert into url_na_log select 209, '2022-08-20' from numbers(10000); +insert into url_na_log select 209, '2022-08-21' from numbers(10000); + + +SET mutations_sync=2; +SET allow_experimental_lightweight_delete=1; + +OPTIMIZE TABLE url_na_log FINAL; + +-- { echoOn } + +SELECT count() FROM url_na_log; +SELECT rows FROM system.parts WHERE database = currentDatabase() AND table = 'url_na_log' AND active; +SELECT count() FROM url_na_log PREWHERE DateVisit >= '2022-08-10' AND DateVisit <= '2022-08-20' WHERE SiteId = 209 SETTINGS max_block_size = 200000, max_threads = 1; + + +-- Delete more than a half rows (60K) from the range 2022-08-10 .. 2022-08-20 +-- There should be 50K rows remaining in this range +DELETE FROM url_na_log WHERE SiteId = 209 AND DateVisit >= '2022-08-13' AND DateVisit <= '2022-08-18'; + +SELECT count() FROM url_na_log; +SELECT rows FROM system.parts WHERE database = currentDatabase() AND table = 'url_na_log' AND active; +SELECT count() FROM url_na_log PREWHERE DateVisit >= '2022-08-10' AND DateVisit <= '2022-08-20' WHERE SiteId = 209 SETTINGS max_block_size = 200000, max_threads = 1; + + +-- Hide more than a half of remaining rows (30K) from the range 2022-08-10 .. 2022-08-20 using row policy +-- Now the this range should have 20K rows left +CREATE ROW POLICY url_na_log_policy0 ON url_na_log FOR SELECT USING DateVisit < '2022-08-11' or DateVisit > '2022-08-19' TO default; + +SELECT count() FROM url_na_log; +SELECT rows FROM system.parts WHERE database = currentDatabase() AND table = 'url_na_log' AND active; +SELECT count() FROM url_na_log PREWHERE DateVisit >= '2022-08-10' AND DateVisit <= '2022-08-20' WHERE SiteId = 209 SETTINGS max_block_size = 200000, max_threads = 1; + +DROP ROW POLICY url_na_log_policy0 ON url_na_log; + +-- { echoOff } + +{% endfor %} diff --git a/tests/queries/0_stateless/02473_functions_in_readonly_mode.reference b/tests/queries/0_stateless/02473_functions_in_readonly_mode.reference index 500004a06b2..2c33a7b807f 100644 --- a/tests/queries/0_stateless/02473_functions_in_readonly_mode.reference +++ b/tests/queries/0_stateless/02473_functions_in_readonly_mode.reference @@ -1,3 +1,5 @@ 0 123 0 +ERROR +ERROR diff --git a/tests/queries/0_stateless/02473_functions_in_readonly_mode.sh b/tests/queries/0_stateless/02473_functions_in_readonly_mode.sh new file mode 100755 index 00000000000..5e11704e6ce --- /dev/null +++ b/tests/queries/0_stateless/02473_functions_in_readonly_mode.sh @@ -0,0 +1,14 @@ +#!/usr/bin/env bash + +unset CLICKHOUSE_LOG_COMMENT + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_CLIENT --query="SELECT * from numbers(1)" +$CLICKHOUSE_CLIENT --query="SELECT * from format('TSV', '123')" + +$CLICKHOUSE_CLIENT --readonly=1 --query="SELECT * from numbers(1)" +$CLICKHOUSE_CLIENT --readonly=1 --query="SELECT * from format('TSV', '123')" 2>&1 | grep -Fq "Cannot execute query in readonly mode. (READONLY)" && echo 'ERROR' || echo 'OK' +$CLICKHOUSE_CLIENT --readonly=1 --query="INSERT INTO FUNCTION null('x String') (x) FORMAT TSV '123'" 2>&1 | grep -Fq "Cannot execute query in readonly mode. (READONLY)" && echo 'ERROR' || echo 'OK' \ No newline at end of file diff --git a/tests/queries/0_stateless/02473_functions_in_readonly_mode.sql b/tests/queries/0_stateless/02473_functions_in_readonly_mode.sql deleted file mode 100644 index c5c82d2e2bf..00000000000 --- a/tests/queries/0_stateless/02473_functions_in_readonly_mode.sql +++ /dev/null @@ -1,4 +0,0 @@ -SELECT * from numbers(1); -SELECT * from format('TSV', '123'); -SELECT * from numbers(1) SETTINGS readonly=1; -SELECT * from format('TSV', '123') SETTINGS readonly=1; -- { serverError READONLY } \ No newline at end of file diff --git a/tests/queries/0_stateless/02473_multistep_prewhere.python b/tests/queries/0_stateless/02473_multistep_prewhere.python new file mode 100644 index 00000000000..a12656f636b --- /dev/null +++ b/tests/queries/0_stateless/02473_multistep_prewhere.python @@ -0,0 +1,148 @@ +#!/usr/bin/env python3 +import requests +import os +import sys + +CURDIR = os.path.dirname(os.path.realpath(__file__)) +sys.path.insert(0, os.path.join(CURDIR, 'helpers')) + +from pure_http_client import ClickHouseClient + + +class Tester: + ''' + - Creates test table + - Deletes the specified range of rows + - Masks another range using row-level policy + - Runs some read queries and checks that the results + ''' + def __init__(self, session, url, index_granularity, total_rows): + self.session = session + self.url = url + self.index_granularity = index_granularity + self.total_rows = total_rows + self.reported_errors = set() + self.repro_queries = [] + + def report_error(self): + print('Repro steps:', '\n\n\t'.join(self.repro_queries)) + exit(1) + + def query(self, query_text, include_in_repro_steps = True, expected_data = None): + self.repro_queries.append(query_text) + resp = self.session.post(self.url, data=query_text) + if resp.status_code != 200: + # Group similar errors + error = resp.text[0:40] + if error not in self.reported_errors: + self.reported_errors.add(error) + print('Code:', resp.status_code) + print('Result:', resp.text) + self.report_error() + + result = resp.text + # Check that the result is as expected + if ((not expected_data is None) and (int(result) != len(expected_data))): + print('Expected {} rows, got {}'.format(len(expected_data), result)) + print('Expected data:' + str(expected_data)) + self.report_error() + + if not include_in_repro_steps: + self.repro_queries.pop() + + + def check_data(self, all_data, delete_range_start, delete_range_end, row_level_policy_range_start, row_level_policy_range_end): + all_data_after_delete = all_data[ + ~((all_data.a == 0) & + (all_data.b > delete_range_start) & + (all_data.b <= delete_range_end))] + all_data_after_row_policy = all_data_after_delete[ + (all_data_after_delete.b <= row_level_policy_range_start) | + (all_data_after_delete.b > row_level_policy_range_end)] + + for to_select in ['count()', 'sum(d)']: # Test reading with and without column with default value + self.query('SELECT {} FROM tab_02473;'.format(to_select), False, all_data_after_row_policy) + + delta = 10 + for query_range_start in [0, delta]: + for query_range_end in [self.total_rows - delta]: #, self.total_rows]: + expected = all_data_after_row_policy[ + (all_data_after_row_policy.a == 0) & + (all_data_after_row_policy.b > query_range_start) & + (all_data_after_row_policy.b <= query_range_end)] + self.query('SELECT {} from tab_02473 PREWHERE b > {} AND b <= {} WHERE a == 0;'.format( + to_select, query_range_start, query_range_end), False, expected) + + expected = all_data_after_row_policy[ + (all_data_after_row_policy.a == 0) & + (all_data_after_row_policy.c > query_range_start) & + (all_data_after_row_policy.c <= query_range_end)] + self.query('SELECT {} from tab_02473 PREWHERE c > {} AND c <= {} WHERE a == 0;'.format( + to_select, query_range_start, query_range_end), False, expected) + + expected = all_data_after_row_policy[ + (all_data_after_row_policy.a == 0) & + ((all_data_after_row_policy.c <= query_range_start) | + (all_data_after_row_policy.c > query_range_end))] + self.query('SELECT {} from tab_02473 PREWHERE c <= {} OR c > {} WHERE a == 0;'.format( + to_select, query_range_start, query_range_end), False, expected) + + + def run_test(self, delete_range_start, delete_range_end, row_level_policy_range_start, row_level_policy_range_end): + self.repro_queries = [] + + self.query(''' + CREATE TABLE tab_02473 (a Int8, b Int32, c Int32, PRIMARY KEY (a)) + ENGINE = MergeTree() ORDER BY (a, b) + SETTINGS min_bytes_for_wide_part = 0, index_granularity = {};'''.format(self.index_granularity)) + + self.query('INSERT INTO tab_02473 select 0, number+1, number+1 FROM numbers({});'.format(self.total_rows)) + + client = ClickHouseClient() + all_data = client.query_return_df("SELECT a, b, c, 1 as d FROM tab_02473 FORMAT TabSeparatedWithNames;") + + self.query('OPTIMIZE TABLE tab_02473 FINAL SETTINGS mutations_sync=2;') + + # After all data has been written add a column with default value + self.query('ALTER TABLE tab_02473 ADD COLUMN d Int64 DEFAULT 1;') + + self.check_data(all_data, -100, -100, -100, -100) + + self.query('DELETE FROM tab_02473 WHERE a = 0 AND b > {} AND b <= {};'.format( + delete_range_start, delete_range_end)) + + self.check_data(all_data, delete_range_start, delete_range_end, -100, -100) + + self.query('CREATE ROW POLICY policy_tab_02473 ON tab_02473 FOR SELECT USING b <= {} OR b > {} TO default;'.format( + row_level_policy_range_start, row_level_policy_range_end)) + + self.check_data(all_data, delete_range_start, delete_range_end, row_level_policy_range_start, row_level_policy_range_end) + + self.query('DROP POLICY policy_tab_02473 ON tab_02473;') + + self.query('DROP TABLE tab_02473;') + + + +def main(): + # Set mutations to synchronous mode and enable lightweight DELETE's + url = os.environ['CLICKHOUSE_URL'] + '&mutations_sync=2&allow_experimental_lightweight_delete=1&max_threads=1' + + default_index_granularity = 10; + total_rows = 8 * default_index_granularity + step = default_index_granularity + session = requests.Session() + for index_granularity in [default_index_granularity-1, default_index_granularity]: # [default_index_granularity-1, default_index_granularity+1, default_index_granularity]: + tester = Tester(session, url, index_granularity, total_rows) + # Test combinations of ranges of various size masked by lightweight DELETES + # along with ranges of various size masked by row-level policies + for delete_range_start in range(0, total_rows, 3 * step): + for delete_range_end in range(delete_range_start + 3 * step, total_rows, 2 * step): + for row_level_policy_range_start in range(0, total_rows, 3 * step): + for row_level_policy_range_end in range(row_level_policy_range_start + 3 * step, total_rows, 2 * step): + tester.run_test(delete_range_start, delete_range_end, row_level_policy_range_start, row_level_policy_range_end) + + +if __name__ == "__main__": + main() + diff --git a/tests/queries/0_stateless/02473_multistep_prewhere.reference b/tests/queries/0_stateless/02473_multistep_prewhere.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02473_multistep_prewhere.sh b/tests/queries/0_stateless/02473_multistep_prewhere.sh new file mode 100755 index 00000000000..bbb411b0a32 --- /dev/null +++ b/tests/queries/0_stateless/02473_multistep_prewhere.sh @@ -0,0 +1,11 @@ +#!/usr/bin/env bash +# Tags: long + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +# We should have correct env vars from shell_config.sh to run this test + +python3 "$CURDIR"/02473_multistep_prewhere.python + diff --git a/tests/queries/0_stateless/02476_fix_lambda_parsing.reference b/tests/queries/0_stateless/02476_fix_lambda_parsing.reference index 18cb46ce23c..de508c7a0d3 100644 --- a/tests/queries/0_stateless/02476_fix_lambda_parsing.reference +++ b/tests/queries/0_stateless/02476_fix_lambda_parsing.reference @@ -1,8 +1,4 @@ SELECT f(x, y -> z) -SELECT f(x, y -> z) -SELECT f((x, y) -> z) SELECT f((x, y) -> z) SELECT f((x, y) -> z) SELECT f(x, (x, y) -> z) -SELECT f(x, (x, y) -> z) -CREATE FUNCTION func AS x -> plus(x, (x -> ('2' + 2)) -> plus(1), 1) diff --git a/tests/queries/0_stateless/02476_fix_lambda_parsing.sh b/tests/queries/0_stateless/02476_fix_lambda_parsing.sh index 641ef59a170..d47588c100c 100755 --- a/tests/queries/0_stateless/02476_fix_lambda_parsing.sh +++ b/tests/queries/0_stateless/02476_fix_lambda_parsing.sh @@ -8,14 +8,7 @@ set -e format="$CLICKHOUSE_FORMAT" -echo "SELECT f(x, tuple(y) -> z)" | $format echo "SELECT f(x, (y) -> z)" | $format - echo "SELECT f(x, y -> z)" | $format echo "SELECT f((x, y) -> z)" | $format -echo "SELECT f(tuple(x, y) -> z)" | $format - echo "SELECT f(x, (x, y) -> z)" | $format -echo "SELECT f(x, tuple(x, y) -> z)" | $format - -echo "CREATE FUNCTION func AS x -> plus(x, (x -> ('2' + 2)) -> plus(1), 1)" | $format | $format diff --git a/tests/queries/0_stateless/02477_age.reference b/tests/queries/0_stateless/02477_age.reference new file mode 100644 index 00000000000..249c413d923 --- /dev/null +++ b/tests/queries/0_stateless/02477_age.reference @@ -0,0 +1,76 @@ +Various intervals +-1 +0 +0 +-7 +-3 +0 +-23 +-11 +0 +-103 +-52 +0 +-730 +-364 +1 +-17520 +-8736 +24 +-1051200 +-524160 +1440 +-63072000 +-31449600 +86400 +DateTime arguments +0 +23 +1439 +86399 +Date and DateTime arguments +-63072000 +-31449600 +86400 +Constant and non-constant arguments +-1051200 +-524160 +1440 +Case insensitive +-10 +Dependance of timezones +0 +0 +1 +25 +1500 +90000 +0 +0 +1 +24 +1440 +86400 +0 +0 +1 +25 +1500 +90000 +0 +0 +1 +24 +1440 +86400 +Additional test +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 diff --git a/tests/queries/0_stateless/02477_age.sql b/tests/queries/0_stateless/02477_age.sql new file mode 100644 index 00000000000..9b612276b01 --- /dev/null +++ b/tests/queries/0_stateless/02477_age.sql @@ -0,0 +1,82 @@ +SELECT 'Various intervals'; + +SELECT age('year', toDate('2017-12-31'), toDate('2016-01-01')); +SELECT age('year', toDate('2017-12-31'), toDate('2017-01-01')); +SELECT age('year', toDate('2017-12-31'), toDate('2018-01-01')); +SELECT age('quarter', toDate('2017-12-31'), toDate('2016-01-01')); +SELECT age('quarter', toDate('2017-12-31'), toDate('2017-01-01')); +SELECT age('quarter', toDate('2017-12-31'), toDate('2018-01-01')); +SELECT age('month', toDate('2017-12-31'), toDate('2016-01-01')); +SELECT age('month', toDate('2017-12-31'), toDate('2017-01-01')); +SELECT age('month', toDate('2017-12-31'), toDate('2018-01-01')); +SELECT age('week', toDate('2017-12-31'), toDate('2016-01-01')); +SELECT age('week', toDate('2017-12-31'), toDate('2017-01-01')); +SELECT age('week', toDate('2017-12-31'), toDate('2018-01-01')); +SELECT age('day', toDate('2017-12-31'), toDate('2016-01-01')); +SELECT age('day', toDate('2017-12-31'), toDate('2017-01-01')); +SELECT age('day', toDate('2017-12-31'), toDate('2018-01-01')); +SELECT age('hour', toDate('2017-12-31'), toDate('2016-01-01'), 'UTC'); +SELECT age('hour', toDate('2017-12-31'), toDate('2017-01-01'), 'UTC'); +SELECT age('hour', toDate('2017-12-31'), toDate('2018-01-01'), 'UTC'); +SELECT age('minute', toDate('2017-12-31'), toDate('2016-01-01'), 'UTC'); +SELECT age('minute', toDate('2017-12-31'), toDate('2017-01-01'), 'UTC'); +SELECT age('minute', toDate('2017-12-31'), toDate('2018-01-01'), 'UTC'); +SELECT age('second', toDate('2017-12-31'), toDate('2016-01-01'), 'UTC'); +SELECT age('second', toDate('2017-12-31'), toDate('2017-01-01'), 'UTC'); +SELECT age('second', toDate('2017-12-31'), toDate('2018-01-01'), 'UTC'); + +SELECT 'DateTime arguments'; +SELECT age('day', toDateTime('2016-01-01 00:00:01', 'UTC'), toDateTime('2016-01-02 00:00:00', 'UTC'), 'UTC'); +SELECT age('hour', toDateTime('2016-01-01 00:00:01', 'UTC'), toDateTime('2016-01-02 00:00:00', 'UTC'), 'UTC'); +SELECT age('minute', toDateTime('2016-01-01 00:00:01', 'UTC'), toDateTime('2016-01-02 00:00:00', 'UTC'), 'UTC'); +SELECT age('second', toDateTime('2016-01-01 00:00:01', 'UTC'), toDateTime('2016-01-02 00:00:00', 'UTC'), 'UTC'); + +SELECT 'Date and DateTime arguments'; + +SELECT age('second', toDate('2017-12-31'), toDateTime('2016-01-01 00:00:00', 'UTC'), 'UTC'); +SELECT age('second', toDateTime('2017-12-31 00:00:00', 'UTC'), toDate('2017-01-01'), 'UTC'); +SELECT age('second', toDateTime('2017-12-31 00:00:00', 'UTC'), toDateTime('2018-01-01 00:00:00', 'UTC')); + +SELECT 'Constant and non-constant arguments'; + +SELECT age('minute', materialize(toDate('2017-12-31')), toDate('2016-01-01'), 'UTC'); +SELECT age('minute', toDate('2017-12-31'), materialize(toDate('2017-01-01')), 'UTC'); +SELECT age('minute', materialize(toDate('2017-12-31')), materialize(toDate('2018-01-01')), 'UTC'); + +SELECT 'Case insensitive'; + +SELECT age('year', today(), today() - INTERVAL 10 YEAR); + +SELECT 'Dependance of timezones'; + +SELECT age('month', toDate('2014-10-26'), toDate('2014-10-27'), 'Asia/Istanbul'); +SELECT age('week', toDate('2014-10-26'), toDate('2014-10-27'), 'Asia/Istanbul'); +SELECT age('day', toDate('2014-10-26'), toDate('2014-10-27'), 'Asia/Istanbul'); +SELECT age('hour', toDate('2014-10-26'), toDate('2014-10-27'), 'Asia/Istanbul'); +SELECT age('minute', toDate('2014-10-26'), toDate('2014-10-27'), 'Asia/Istanbul'); +SELECT age('second', toDate('2014-10-26'), toDate('2014-10-27'), 'Asia/Istanbul'); + +SELECT age('month', toDate('2014-10-26'), toDate('2014-10-27'), 'UTC'); +SELECT age('week', toDate('2014-10-26'), toDate('2014-10-27'), 'UTC'); +SELECT age('day', toDate('2014-10-26'), toDate('2014-10-27'), 'UTC'); +SELECT age('hour', toDate('2014-10-26'), toDate('2014-10-27'), 'UTC'); +SELECT age('minute', toDate('2014-10-26'), toDate('2014-10-27'), 'UTC'); +SELECT age('second', toDate('2014-10-26'), toDate('2014-10-27'), 'UTC'); + +SELECT age('month', toDateTime('2014-10-26 00:00:00', 'Asia/Istanbul'), toDateTime('2014-10-27 00:00:00', 'Asia/Istanbul')); +SELECT age('week', toDateTime('2014-10-26 00:00:00', 'Asia/Istanbul'), toDateTime('2014-10-27 00:00:00', 'Asia/Istanbul')); +SELECT age('day', toDateTime('2014-10-26 00:00:00', 'Asia/Istanbul'), toDateTime('2014-10-27 00:00:00', 'Asia/Istanbul')); +SELECT age('hour', toDateTime('2014-10-26 00:00:00', 'Asia/Istanbul'), toDateTime('2014-10-27 00:00:00', 'Asia/Istanbul')); +SELECT age('minute', toDateTime('2014-10-26 00:00:00', 'Asia/Istanbul'), toDateTime('2014-10-27 00:00:00', 'Asia/Istanbul')); +SELECT age('second', toDateTime('2014-10-26 00:00:00', 'Asia/Istanbul'), toDateTime('2014-10-27 00:00:00', 'Asia/Istanbul')); + +SELECT age('month', toDateTime('2014-10-26 00:00:00', 'UTC'), toDateTime('2014-10-27 00:00:00', 'UTC')); +SELECT age('week', toDateTime('2014-10-26 00:00:00', 'UTC'), toDateTime('2014-10-27 00:00:00', 'UTC')); +SELECT age('day', toDateTime('2014-10-26 00:00:00', 'UTC'), toDateTime('2014-10-27 00:00:00', 'UTC')); +SELECT age('hour', toDateTime('2014-10-26 00:00:00', 'UTC'), toDateTime('2014-10-27 00:00:00', 'UTC')); +SELECT age('minute', toDateTime('2014-10-26 00:00:00', 'UTC'), toDateTime('2014-10-27 00:00:00', 'UTC')); +SELECT age('second', toDateTime('2014-10-26 00:00:00', 'UTC'), toDateTime('2014-10-27 00:00:00', 'UTC')); + +SELECT 'Additional test'; + +SELECT number = age('month', now() - INTERVAL number MONTH, now()) FROM system.numbers LIMIT 10; diff --git a/tests/queries/0_stateless/02477_age_date32.reference b/tests/queries/0_stateless/02477_age_date32.reference new file mode 100644 index 00000000000..69f27a10acc --- /dev/null +++ b/tests/queries/0_stateless/02477_age_date32.reference @@ -0,0 +1,169 @@ +-- { echo } + +-- Date32 vs Date32 +SELECT age('second', toDate32('1927-01-01', 'UTC'), toDate32('1927-01-02', 'UTC'), 'UTC'); +86400 +SELECT age('minute', toDate32('1927-01-01', 'UTC'), toDate32('1927-01-02', 'UTC'), 'UTC'); +1440 +SELECT age('hour', toDate32('1927-01-01', 'UTC'), toDate32('1927-01-02', 'UTC'), 'UTC'); +24 +SELECT age('day', toDate32('1927-01-01', 'UTC'), toDate32('1927-01-02', 'UTC'), 'UTC'); +1 +SELECT age('week', toDate32('1927-01-01', 'UTC'), toDate32('1927-01-08', 'UTC'), 'UTC'); +1 +SELECT age('month', toDate32('1927-01-01', 'UTC'), toDate32('1927-02-01', 'UTC'), 'UTC'); +1 +SELECT age('quarter', toDate32('1927-01-01', 'UTC'), toDate32('1927-04-01', 'UTC'), 'UTC'); +1 +SELECT age('year', toDate32('1927-01-01', 'UTC'), toDate32('1928-01-01', 'UTC'), 'UTC'); +1 +-- With DateTime64 +-- Date32 vs DateTime64 +SELECT age('second', toDate32('1927-01-01', 'UTC'), toDateTime64('1927-01-02 00:00:00', 3, 'UTC'), 'UTC'); +86400 +SELECT age('minute', toDate32('1927-01-01', 'UTC'), toDateTime64('1927-01-02 00:00:00', 3, 'UTC'), 'UTC'); +1440 +SELECT age('hour', toDate32('1927-01-01', 'UTC'), toDateTime64('1927-01-02 00:00:00', 3, 'UTC'), 'UTC'); +24 +SELECT age('day', toDate32('1927-01-01', 'UTC'), toDateTime64('1927-01-02 00:00:00', 3, 'UTC'), 'UTC'); +1 +SELECT age('week', toDate32('1927-01-01', 'UTC'), toDateTime64('1927-01-08 00:00:00', 3, 'UTC'), 'UTC'); +1 +SELECT age('month', toDate32('1927-01-01', 'UTC'), toDateTime64('1927-02-01 00:00:00', 3, 'UTC'), 'UTC'); +1 +SELECT age('quarter', toDate32('1927-01-01', 'UTC'), toDateTime64('1927-04-01 00:00:00', 3, 'UTC'), 'UTC'); +1 +SELECT age('year', toDate32('1927-01-01', 'UTC'), toDateTime64('1928-01-01 00:00:00', 3, 'UTC'), 'UTC'); +1 +-- DateTime64 vs Date32 +SELECT age('second', toDateTime64('1927-01-01 00:00:00', 3, 'UTC'), toDate32('1927-01-02', 'UTC'), 'UTC'); +86400 +SELECT age('minute', toDateTime64('1927-01-01 00:00:00', 3, 'UTC'), toDate32('1927-01-02', 'UTC'), 'UTC'); +1440 +SELECT age('hour', toDateTime64('1927-01-01 00:00:00', 3, 'UTC'), toDate32('1927-01-02', 'UTC'), 'UTC'); +24 +SELECT age('day', toDateTime64('1927-01-01 00:00:00', 3, 'UTC'), toDate32('1927-01-02', 'UTC'), 'UTC'); +1 +SELECT age('week', toDateTime64('1927-01-01 00:00:00', 3, 'UTC'), toDate32('1927-01-08', 'UTC'), 'UTC'); +1 +SELECT age('month', toDateTime64('1927-01-01 00:00:00', 3, 'UTC'), toDate32('1927-02-01', 'UTC'), 'UTC'); +1 +SELECT age('quarter', toDateTime64('1927-01-01 00:00:00', 3, 'UTC'), toDate32('1927-04-01', 'UTC'), 'UTC'); +1 +SELECT age('year', toDateTime64('1927-01-01 00:00:00', 3, 'UTC'), toDate32('1928-01-01', 'UTC'), 'UTC'); +1 +-- With DateTime +-- Date32 vs DateTime +SELECT age('second', toDate32('2015-08-18', 'UTC'), toDateTime('2015-08-19 00:00:00', 'UTC'), 'UTC'); +86400 +SELECT age('minute', toDate32('2015-08-18', 'UTC'), toDateTime('2015-08-19 00:00:00', 'UTC'), 'UTC'); +1440 +SELECT age('hour', toDate32('2015-08-18', 'UTC'), toDateTime('2015-08-19 00:00:00', 'UTC'), 'UTC'); +24 +SELECT age('day', toDate32('2015-08-18', 'UTC'), toDateTime('2015-08-19 00:00:00', 'UTC'), 'UTC'); +1 +SELECT age('week', toDate32('2015-08-18', 'UTC'), toDateTime('2015-08-25 00:00:00', 'UTC'), 'UTC'); +1 +SELECT age('month', toDate32('2015-08-18', 'UTC'), toDateTime('2015-09-18 00:00:00', 'UTC'), 'UTC'); +1 +SELECT age('quarter', toDate32('2015-08-18', 'UTC'), toDateTime('2015-11-18 00:00:00', 'UTC'), 'UTC'); +1 +SELECT age('year', toDate32('2015-08-18', 'UTC'), toDateTime('2016-08-18 00:00:00', 'UTC'), 'UTC'); +1 +-- DateTime vs Date32 +SELECT age('second', toDateTime('2015-08-18 00:00:00', 'UTC'), toDate32('2015-08-19', 'UTC'), 'UTC'); +86400 +SELECT age('minute', toDateTime('2015-08-18 00:00:00', 'UTC'), toDate32('2015-08-19', 'UTC'), 'UTC'); +1440 +SELECT age('hour', toDateTime('2015-08-18 00:00:00', 'UTC'), toDate32('2015-08-19', 'UTC'), 'UTC'); +24 +SELECT age('day', toDateTime('2015-08-18 00:00:00', 'UTC'), toDate32('2015-08-19', 'UTC'), 'UTC'); +1 +SELECT age('week', toDateTime('2015-08-18 00:00:00', 'UTC'), toDate32('2015-08-25', 'UTC'), 'UTC'); +1 +SELECT age('month', toDateTime('2015-08-18 00:00:00', 'UTC'), toDate32('2015-09-18', 'UTC'), 'UTC'); +1 +SELECT age('quarter', toDateTime('2015-08-18 00:00:00', 'UTC'), toDate32('2015-11-18', 'UTC'), 'UTC'); +1 +SELECT age('year', toDateTime('2015-08-18 00:00:00', 'UTC'), toDate32('2016-08-18', 'UTC'), 'UTC'); +1 +-- With Date +-- Date32 vs Date +SELECT age('second', toDate32('2015-08-18', 'UTC'), toDate('2015-08-19', 'UTC'), 'UTC'); +86400 +SELECT age('minute', toDate32('2015-08-18', 'UTC'), toDate('2015-08-19', 'UTC'), 'UTC'); +1440 +SELECT age('hour', toDate32('2015-08-18', 'UTC'), toDate('2015-08-19', 'UTC'), 'UTC'); +24 +SELECT age('day', toDate32('2015-08-18', 'UTC'), toDate('2015-08-19', 'UTC'), 'UTC'); +1 +SELECT age('week', toDate32('2015-08-18', 'UTC'), toDate('2015-08-25', 'UTC'), 'UTC'); +1 +SELECT age('month', toDate32('2015-08-18', 'UTC'), toDate('2015-09-18', 'UTC'), 'UTC'); +1 +SELECT age('quarter', toDate32('2015-08-18', 'UTC'), toDate('2015-11-18', 'UTC'), 'UTC'); +1 +SELECT age('year', toDate32('2015-08-18', 'UTC'), toDate('2016-08-18', 'UTC'), 'UTC'); +1 +-- Date vs Date32 +SELECT age('second', toDate('2015-08-18', 'UTC'), toDate32('2015-08-19', 'UTC'), 'UTC'); +86400 +SELECT age('minute', toDate('2015-08-18', 'UTC'), toDate32('2015-08-19', 'UTC'), 'UTC'); +1440 +SELECT age('hour', toDate('2015-08-18', 'UTC'), toDate32('2015-08-19', 'UTC'), 'UTC'); +24 +SELECT age('day', toDate('2015-08-18', 'UTC'), toDate32('2015-08-19', 'UTC'), 'UTC'); +1 +SELECT age('week', toDate('2015-08-18', 'UTC'), toDate32('2015-08-25', 'UTC'), 'UTC'); +1 +SELECT age('month', toDate('2015-08-18', 'UTC'), toDate32('2015-09-18', 'UTC'), 'UTC'); +1 +SELECT age('quarter', toDate('2015-08-18', 'UTC'), toDate32('2015-11-18', 'UTC'), 'UTC'); +1 +SELECT age('year', toDate('2015-08-18', 'UTC'), toDate32('2016-08-18', 'UTC'), 'UTC'); +1 +-- Const vs non-const columns +SELECT age('day', toDate32('1927-01-01', 'UTC'), materialize(toDate32('1927-01-02', 'UTC')), 'UTC'); +1 +SELECT age('day', toDate32('1927-01-01', 'UTC'), materialize(toDateTime64('1927-01-02 00:00:00', 3, 'UTC')), 'UTC'); +1 +SELECT age('day', toDateTime64('1927-01-01 00:00:00', 3, 'UTC'), materialize(toDate32('1927-01-02', 'UTC')), 'UTC'); +1 +SELECT age('day', toDate32('2015-08-18', 'UTC'), materialize(toDateTime('2015-08-19 00:00:00', 'UTC')), 'UTC'); +1 +SELECT age('day', toDateTime('2015-08-18 00:00:00', 'UTC'), materialize(toDate32('2015-08-19', 'UTC')), 'UTC'); +1 +SELECT age('day', toDate32('2015-08-18', 'UTC'), materialize(toDate('2015-08-19', 'UTC')), 'UTC'); +1 +SELECT age('day', toDate('2015-08-18', 'UTC'), materialize(toDate32('2015-08-19', 'UTC')), 'UTC'); +1 +-- Non-const vs const columns +SELECT age('day', materialize(toDate32('1927-01-01', 'UTC')), toDate32('1927-01-02', 'UTC'), 'UTC'); +1 +SELECT age('day', materialize(toDate32('1927-01-01', 'UTC')), toDateTime64('1927-01-02 00:00:00', 3, 'UTC'), 'UTC'); +1 +SELECT age('day', materialize(toDateTime64('1927-01-01 00:00:00', 3, 'UTC')), toDate32('1927-01-02', 'UTC'), 'UTC'); +1 +SELECT age('day', materialize(toDate32('2015-08-18', 'UTC')), toDateTime('2015-08-19 00:00:00', 'UTC'), 'UTC'); +1 +SELECT age('day', materialize(toDateTime('2015-08-18 00:00:00', 'UTC')), toDate32('2015-08-19', 'UTC'), 'UTC'); +1 +SELECT age('day', materialize(toDate32('2015-08-18', 'UTC')), toDate('2015-08-19', 'UTC'), 'UTC'); +1 +SELECT age('day', materialize(toDate('2015-08-18', 'UTC')), toDate32('2015-08-19', 'UTC'), 'UTC'); +1 +-- Non-const vs non-const columns +SELECT age('day', materialize(toDate32('1927-01-01', 'UTC')), materialize(toDate32('1927-01-02', 'UTC')), 'UTC'); +1 +SELECT age('day', materialize(toDate32('1927-01-01', 'UTC')), materialize(toDateTime64('1927-01-02 00:00:00', 3, 'UTC')), 'UTC'); +1 +SELECT age('day', materialize(toDateTime64('1927-01-01 00:00:00', 3, 'UTC')), materialize(toDate32('1927-01-02', 'UTC')), 'UTC'); +1 +SELECT age('day', materialize(toDate32('2015-08-18', 'UTC')), materialize(toDateTime('2015-08-19 00:00:00', 'UTC')), 'UTC'); +1 +SELECT age('day', materialize(toDateTime('2015-08-18 00:00:00', 'UTC')), materialize(toDate32('2015-08-19', 'UTC')), 'UTC'); +1 +SELECT age('day', materialize(toDate32('2015-08-18', 'UTC')), materialize(toDate('2015-08-19', 'UTC')), 'UTC'); +1 +SELECT age('day', materialize(toDate('2015-08-18', 'UTC')), materialize(toDate32('2015-08-19', 'UTC')), 'UTC'); +1 diff --git a/tests/queries/0_stateless/02477_age_date32.sql b/tests/queries/0_stateless/02477_age_date32.sql new file mode 100644 index 00000000000..43ff458c2d1 --- /dev/null +++ b/tests/queries/0_stateless/02477_age_date32.sql @@ -0,0 +1,101 @@ +-- { echo } + +-- Date32 vs Date32 +SELECT age('second', toDate32('1927-01-01', 'UTC'), toDate32('1927-01-02', 'UTC'), 'UTC'); +SELECT age('minute', toDate32('1927-01-01', 'UTC'), toDate32('1927-01-02', 'UTC'), 'UTC'); +SELECT age('hour', toDate32('1927-01-01', 'UTC'), toDate32('1927-01-02', 'UTC'), 'UTC'); +SELECT age('day', toDate32('1927-01-01', 'UTC'), toDate32('1927-01-02', 'UTC'), 'UTC'); +SELECT age('week', toDate32('1927-01-01', 'UTC'), toDate32('1927-01-08', 'UTC'), 'UTC'); +SELECT age('month', toDate32('1927-01-01', 'UTC'), toDate32('1927-02-01', 'UTC'), 'UTC'); +SELECT age('quarter', toDate32('1927-01-01', 'UTC'), toDate32('1927-04-01', 'UTC'), 'UTC'); +SELECT age('year', toDate32('1927-01-01', 'UTC'), toDate32('1928-01-01', 'UTC'), 'UTC'); + +-- With DateTime64 +-- Date32 vs DateTime64 +SELECT age('second', toDate32('1927-01-01', 'UTC'), toDateTime64('1927-01-02 00:00:00', 3, 'UTC'), 'UTC'); +SELECT age('minute', toDate32('1927-01-01', 'UTC'), toDateTime64('1927-01-02 00:00:00', 3, 'UTC'), 'UTC'); +SELECT age('hour', toDate32('1927-01-01', 'UTC'), toDateTime64('1927-01-02 00:00:00', 3, 'UTC'), 'UTC'); +SELECT age('day', toDate32('1927-01-01', 'UTC'), toDateTime64('1927-01-02 00:00:00', 3, 'UTC'), 'UTC'); +SELECT age('week', toDate32('1927-01-01', 'UTC'), toDateTime64('1927-01-08 00:00:00', 3, 'UTC'), 'UTC'); +SELECT age('month', toDate32('1927-01-01', 'UTC'), toDateTime64('1927-02-01 00:00:00', 3, 'UTC'), 'UTC'); +SELECT age('quarter', toDate32('1927-01-01', 'UTC'), toDateTime64('1927-04-01 00:00:00', 3, 'UTC'), 'UTC'); +SELECT age('year', toDate32('1927-01-01', 'UTC'), toDateTime64('1928-01-01 00:00:00', 3, 'UTC'), 'UTC'); + +-- DateTime64 vs Date32 +SELECT age('second', toDateTime64('1927-01-01 00:00:00', 3, 'UTC'), toDate32('1927-01-02', 'UTC'), 'UTC'); +SELECT age('minute', toDateTime64('1927-01-01 00:00:00', 3, 'UTC'), toDate32('1927-01-02', 'UTC'), 'UTC'); +SELECT age('hour', toDateTime64('1927-01-01 00:00:00', 3, 'UTC'), toDate32('1927-01-02', 'UTC'), 'UTC'); +SELECT age('day', toDateTime64('1927-01-01 00:00:00', 3, 'UTC'), toDate32('1927-01-02', 'UTC'), 'UTC'); +SELECT age('week', toDateTime64('1927-01-01 00:00:00', 3, 'UTC'), toDate32('1927-01-08', 'UTC'), 'UTC'); +SELECT age('month', toDateTime64('1927-01-01 00:00:00', 3, 'UTC'), toDate32('1927-02-01', 'UTC'), 'UTC'); +SELECT age('quarter', toDateTime64('1927-01-01 00:00:00', 3, 'UTC'), toDate32('1927-04-01', 'UTC'), 'UTC'); +SELECT age('year', toDateTime64('1927-01-01 00:00:00', 3, 'UTC'), toDate32('1928-01-01', 'UTC'), 'UTC'); + +-- With DateTime +-- Date32 vs DateTime +SELECT age('second', toDate32('2015-08-18', 'UTC'), toDateTime('2015-08-19 00:00:00', 'UTC'), 'UTC'); +SELECT age('minute', toDate32('2015-08-18', 'UTC'), toDateTime('2015-08-19 00:00:00', 'UTC'), 'UTC'); +SELECT age('hour', toDate32('2015-08-18', 'UTC'), toDateTime('2015-08-19 00:00:00', 'UTC'), 'UTC'); +SELECT age('day', toDate32('2015-08-18', 'UTC'), toDateTime('2015-08-19 00:00:00', 'UTC'), 'UTC'); +SELECT age('week', toDate32('2015-08-18', 'UTC'), toDateTime('2015-08-25 00:00:00', 'UTC'), 'UTC'); +SELECT age('month', toDate32('2015-08-18', 'UTC'), toDateTime('2015-09-18 00:00:00', 'UTC'), 'UTC'); +SELECT age('quarter', toDate32('2015-08-18', 'UTC'), toDateTime('2015-11-18 00:00:00', 'UTC'), 'UTC'); +SELECT age('year', toDate32('2015-08-18', 'UTC'), toDateTime('2016-08-18 00:00:00', 'UTC'), 'UTC'); + +-- DateTime vs Date32 +SELECT age('second', toDateTime('2015-08-18 00:00:00', 'UTC'), toDate32('2015-08-19', 'UTC'), 'UTC'); +SELECT age('minute', toDateTime('2015-08-18 00:00:00', 'UTC'), toDate32('2015-08-19', 'UTC'), 'UTC'); +SELECT age('hour', toDateTime('2015-08-18 00:00:00', 'UTC'), toDate32('2015-08-19', 'UTC'), 'UTC'); +SELECT age('day', toDateTime('2015-08-18 00:00:00', 'UTC'), toDate32('2015-08-19', 'UTC'), 'UTC'); +SELECT age('week', toDateTime('2015-08-18 00:00:00', 'UTC'), toDate32('2015-08-25', 'UTC'), 'UTC'); +SELECT age('month', toDateTime('2015-08-18 00:00:00', 'UTC'), toDate32('2015-09-18', 'UTC'), 'UTC'); +SELECT age('quarter', toDateTime('2015-08-18 00:00:00', 'UTC'), toDate32('2015-11-18', 'UTC'), 'UTC'); +SELECT age('year', toDateTime('2015-08-18 00:00:00', 'UTC'), toDate32('2016-08-18', 'UTC'), 'UTC'); + +-- With Date +-- Date32 vs Date +SELECT age('second', toDate32('2015-08-18', 'UTC'), toDate('2015-08-19', 'UTC'), 'UTC'); +SELECT age('minute', toDate32('2015-08-18', 'UTC'), toDate('2015-08-19', 'UTC'), 'UTC'); +SELECT age('hour', toDate32('2015-08-18', 'UTC'), toDate('2015-08-19', 'UTC'), 'UTC'); +SELECT age('day', toDate32('2015-08-18', 'UTC'), toDate('2015-08-19', 'UTC'), 'UTC'); +SELECT age('week', toDate32('2015-08-18', 'UTC'), toDate('2015-08-25', 'UTC'), 'UTC'); +SELECT age('month', toDate32('2015-08-18', 'UTC'), toDate('2015-09-18', 'UTC'), 'UTC'); +SELECT age('quarter', toDate32('2015-08-18', 'UTC'), toDate('2015-11-18', 'UTC'), 'UTC'); +SELECT age('year', toDate32('2015-08-18', 'UTC'), toDate('2016-08-18', 'UTC'), 'UTC'); + +-- Date vs Date32 +SELECT age('second', toDate('2015-08-18', 'UTC'), toDate32('2015-08-19', 'UTC'), 'UTC'); +SELECT age('minute', toDate('2015-08-18', 'UTC'), toDate32('2015-08-19', 'UTC'), 'UTC'); +SELECT age('hour', toDate('2015-08-18', 'UTC'), toDate32('2015-08-19', 'UTC'), 'UTC'); +SELECT age('day', toDate('2015-08-18', 'UTC'), toDate32('2015-08-19', 'UTC'), 'UTC'); +SELECT age('week', toDate('2015-08-18', 'UTC'), toDate32('2015-08-25', 'UTC'), 'UTC'); +SELECT age('month', toDate('2015-08-18', 'UTC'), toDate32('2015-09-18', 'UTC'), 'UTC'); +SELECT age('quarter', toDate('2015-08-18', 'UTC'), toDate32('2015-11-18', 'UTC'), 'UTC'); +SELECT age('year', toDate('2015-08-18', 'UTC'), toDate32('2016-08-18', 'UTC'), 'UTC'); + +-- Const vs non-const columns +SELECT age('day', toDate32('1927-01-01', 'UTC'), materialize(toDate32('1927-01-02', 'UTC')), 'UTC'); +SELECT age('day', toDate32('1927-01-01', 'UTC'), materialize(toDateTime64('1927-01-02 00:00:00', 3, 'UTC')), 'UTC'); +SELECT age('day', toDateTime64('1927-01-01 00:00:00', 3, 'UTC'), materialize(toDate32('1927-01-02', 'UTC')), 'UTC'); +SELECT age('day', toDate32('2015-08-18', 'UTC'), materialize(toDateTime('2015-08-19 00:00:00', 'UTC')), 'UTC'); +SELECT age('day', toDateTime('2015-08-18 00:00:00', 'UTC'), materialize(toDate32('2015-08-19', 'UTC')), 'UTC'); +SELECT age('day', toDate32('2015-08-18', 'UTC'), materialize(toDate('2015-08-19', 'UTC')), 'UTC'); +SELECT age('day', toDate('2015-08-18', 'UTC'), materialize(toDate32('2015-08-19', 'UTC')), 'UTC'); + +-- Non-const vs const columns +SELECT age('day', materialize(toDate32('1927-01-01', 'UTC')), toDate32('1927-01-02', 'UTC'), 'UTC'); +SELECT age('day', materialize(toDate32('1927-01-01', 'UTC')), toDateTime64('1927-01-02 00:00:00', 3, 'UTC'), 'UTC'); +SELECT age('day', materialize(toDateTime64('1927-01-01 00:00:00', 3, 'UTC')), toDate32('1927-01-02', 'UTC'), 'UTC'); +SELECT age('day', materialize(toDate32('2015-08-18', 'UTC')), toDateTime('2015-08-19 00:00:00', 'UTC'), 'UTC'); +SELECT age('day', materialize(toDateTime('2015-08-18 00:00:00', 'UTC')), toDate32('2015-08-19', 'UTC'), 'UTC'); +SELECT age('day', materialize(toDate32('2015-08-18', 'UTC')), toDate('2015-08-19', 'UTC'), 'UTC'); +SELECT age('day', materialize(toDate('2015-08-18', 'UTC')), toDate32('2015-08-19', 'UTC'), 'UTC'); + +-- Non-const vs non-const columns +SELECT age('day', materialize(toDate32('1927-01-01', 'UTC')), materialize(toDate32('1927-01-02', 'UTC')), 'UTC'); +SELECT age('day', materialize(toDate32('1927-01-01', 'UTC')), materialize(toDateTime64('1927-01-02 00:00:00', 3, 'UTC')), 'UTC'); +SELECT age('day', materialize(toDateTime64('1927-01-01 00:00:00', 3, 'UTC')), materialize(toDate32('1927-01-02', 'UTC')), 'UTC'); +SELECT age('day', materialize(toDate32('2015-08-18', 'UTC')), materialize(toDateTime('2015-08-19 00:00:00', 'UTC')), 'UTC'); +SELECT age('day', materialize(toDateTime('2015-08-18 00:00:00', 'UTC')), materialize(toDate32('2015-08-19', 'UTC')), 'UTC'); +SELECT age('day', materialize(toDate32('2015-08-18', 'UTC')), materialize(toDate('2015-08-19', 'UTC')), 'UTC'); +SELECT age('day', materialize(toDate('2015-08-18', 'UTC')), materialize(toDate32('2015-08-19', 'UTC')), 'UTC'); diff --git a/tests/queries/0_stateless/02477_age_datetime64.reference b/tests/queries/0_stateless/02477_age_datetime64.reference new file mode 100644 index 00000000000..3b4459dd26d --- /dev/null +++ b/tests/queries/0_stateless/02477_age_datetime64.reference @@ -0,0 +1,113 @@ +-- { echo } + +-- DateTime64 vs DateTime64 same scale +SELECT age('second', toDateTime64('1927-01-01 00:00:00', 0, 'UTC'), toDateTime64('1927-01-01 00:00:10', 0, 'UTC')); +10 +SELECT age('second', toDateTime64('1927-01-01 00:00:00', 0, 'UTC'), toDateTime64('1927-01-01 00:10:00', 0, 'UTC')); +600 +SELECT age('second', toDateTime64('1927-01-01 00:00:00', 0, 'UTC'), toDateTime64('1927-01-01 01:00:00', 0, 'UTC')); +3600 +SELECT age('second', toDateTime64('1927-01-01 00:00:00', 0, 'UTC'), toDateTime64('1927-01-01 01:10:10', 0, 'UTC')); +4210 +SELECT age('minute', toDateTime64('1927-01-01 00:00:00', 0, 'UTC'), toDateTime64('1927-01-01 00:10:00', 0, 'UTC')); +10 +SELECT age('minute', toDateTime64('1927-01-01 00:00:00', 0, 'UTC'), toDateTime64('1927-01-01 10:00:00', 0, 'UTC')); +600 +SELECT age('hour', toDateTime64('1927-01-01 00:00:00', 0, 'UTC'), toDateTime64('1927-01-01 10:00:00', 0, 'UTC')); +10 +SELECT age('day', toDateTime64('1927-01-01 00:00:00', 0, 'UTC'), toDateTime64('1927-01-02 00:00:00', 0, 'UTC')); +1 +SELECT age('month', toDateTime64('1927-01-01 00:00:00', 0, 'UTC'), toDateTime64('1927-02-01 00:00:00', 0, 'UTC')); +1 +SELECT age('year', toDateTime64('1927-01-01 00:00:00', 0, 'UTC'), toDateTime64('1928-01-01 00:00:00', 0, 'UTC')); +1 +-- DateTime64 vs DateTime64 different scale +SELECT age('second', toDateTime64('1927-01-01 00:00:00', 6, 'UTC'), toDateTime64('1927-01-01 00:00:10', 3, 'UTC')); +10 +SELECT age('second', toDateTime64('1927-01-01 00:00:00', 6, 'UTC'), toDateTime64('1927-01-01 00:10:00', 3, 'UTC')); +600 +SELECT age('second', toDateTime64('1927-01-01 00:00:00', 6, 'UTC'), toDateTime64('1927-01-01 01:00:00', 3, 'UTC')); +3600 +SELECT age('second', toDateTime64('1927-01-01 00:00:00', 6, 'UTC'), toDateTime64('1927-01-01 01:10:10', 3, 'UTC')); +4210 +SELECT age('minute', toDateTime64('1927-01-01 00:00:00', 6, 'UTC'), toDateTime64('1927-01-01 00:10:00', 3, 'UTC')); +10 +SELECT age('minute', toDateTime64('1927-01-01 00:00:00', 6, 'UTC'), toDateTime64('1927-01-01 10:00:00', 3, 'UTC')); +600 +SELECT age('hour', toDateTime64('1927-01-01 00:00:00', 6, 'UTC'), toDateTime64('1927-01-01 10:00:00', 3, 'UTC')); +10 +SELECT age('day', toDateTime64('1927-01-01 00:00:00', 6, 'UTC'), toDateTime64('1927-01-02 00:00:00', 3, 'UTC')); +1 +SELECT age('month', toDateTime64('1927-01-01 00:00:00', 6, 'UTC'), toDateTime64('1927-02-01 00:00:00', 3, 'UTC')); +1 +SELECT age('year', toDateTime64('1927-01-01 00:00:00', 6, 'UTC'), toDateTime64('1928-01-01 00:00:00', 3, 'UTC')); +1 +-- With DateTime +-- DateTime64 vs DateTime +SELECT age('second', toDateTime64('2015-08-18 00:00:00', 0, 'UTC'), toDateTime('2015-08-18 00:00:00', 'UTC')); +0 +SELECT age('second', toDateTime64('2015-08-18 00:00:00', 0, 'UTC'), toDateTime('2015-08-18 00:00:10', 'UTC')); +10 +SELECT age('second', toDateTime64('2015-08-18 00:00:00', 0, 'UTC'), toDateTime('2015-08-18 00:10:00', 'UTC')); +600 +SELECT age('second', toDateTime64('2015-08-18 00:00:00', 0, 'UTC'), toDateTime('2015-08-18 01:00:00', 'UTC')); +3600 +SELECT age('second', toDateTime64('2015-08-18 00:00:00', 0, 'UTC'), toDateTime('2015-08-18 01:10:10', 'UTC')); +4210 +-- DateTime vs DateTime64 +SELECT age('second', toDateTime('2015-08-18 00:00:00', 'UTC'), toDateTime64('2015-08-18 00:00:00', 3, 'UTC')); +0 +SELECT age('second', toDateTime('2015-08-18 00:00:00', 'UTC'), toDateTime64('2015-08-18 00:00:10', 3, 'UTC')); +10 +SELECT age('second', toDateTime('2015-08-18 00:00:00', 'UTC'), toDateTime64('2015-08-18 00:10:00', 3, 'UTC')); +600 +SELECT age('second', toDateTime('2015-08-18 00:00:00', 'UTC'), toDateTime64('2015-08-18 01:00:00', 3, 'UTC')); +3600 +SELECT age('second', toDateTime('2015-08-18 00:00:00', 'UTC'), toDateTime64('2015-08-18 01:10:10', 3, 'UTC')); +4210 +-- With Date +-- DateTime64 vs Date +SELECT age('day', toDateTime64('2015-08-18 00:00:00', 0, 'UTC'), toDate('2015-08-19', 'UTC')); +1 +-- Date vs DateTime64 +SELECT age('day', toDate('2015-08-18', 'UTC'), toDateTime64('2015-08-19 00:00:00', 3, 'UTC')); +1 +-- Same thing but const vs non-const columns +SELECT age('second', toDateTime64('1927-01-01 00:00:00', 0, 'UTC'), materialize(toDateTime64('1927-01-01 00:00:10', 0, 'UTC'))); +10 +SELECT age('second', toDateTime64('1927-01-01 00:00:00', 6, 'UTC'), materialize(toDateTime64('1927-01-01 00:00:10', 3, 'UTC'))); +10 +SELECT age('second', toDateTime64('2015-08-18 00:00:00', 0, 'UTC'), materialize(toDateTime('2015-08-18 00:00:10', 'UTC'))); +10 +SELECT age('second', toDateTime('2015-08-18 00:00:00', 'UTC'), materialize(toDateTime64('2015-08-18 00:00:10', 3, 'UTC'))); +10 +SELECT age('day', toDateTime64('2015-08-18 00:00:00', 0, 'UTC'), materialize(toDate('2015-08-19', 'UTC'))); +1 +SELECT age('day', toDate('2015-08-18', 'UTC'), materialize(toDateTime64('2015-08-19 00:00:00', 3, 'UTC'))); +1 +-- Same thing but non-const vs const columns +SELECT age('second', materialize(toDateTime64('1927-01-01 00:00:00', 0, 'UTC')), toDateTime64('1927-01-01 00:00:10', 0, 'UTC')); +10 +SELECT age('second', materialize(toDateTime64('1927-01-01 00:00:00', 6, 'UTC')), toDateTime64('1927-01-01 00:00:10', 3, 'UTC')); +10 +SELECT age('second', materialize(toDateTime64('2015-08-18 00:00:00', 0, 'UTC')), toDateTime('2015-08-18 00:00:10', 'UTC')); +10 +SELECT age('second', materialize(toDateTime('2015-08-18 00:00:00', 'UTC')), toDateTime64('2015-08-18 00:00:10', 3, 'UTC')); +10 +SELECT age('day', materialize(toDateTime64('2015-08-18 00:00:00', 0, 'UTC')), toDate('2015-08-19', 'UTC')); +1 +SELECT age('day', materialize(toDate('2015-08-18', 'UTC')), toDateTime64('2015-08-19 00:00:00', 3, 'UTC')); +1 +-- Same thing but non-const vs non-const columns +SELECT age('second', materialize(toDateTime64('1927-01-01 00:00:00', 0, 'UTC')), materialize(toDateTime64('1927-01-01 00:00:10', 0, 'UTC'))); +10 +SELECT age('second', materialize(toDateTime64('1927-01-01 00:00:00', 6, 'UTC')), materialize(toDateTime64('1927-01-01 00:00:10', 3, 'UTC'))); +10 +SELECT age('second', materialize(toDateTime64('2015-08-18 00:00:00', 0, 'UTC')), materialize(toDateTime('2015-08-18 00:00:10', 'UTC'))); +10 +SELECT age('second', materialize(toDateTime('2015-08-18 00:00:00', 'UTC')), materialize(toDateTime64('2015-08-18 00:00:10', 3, 'UTC'))); +10 +SELECT age('day', materialize(toDateTime64('2015-08-18 00:00:00', 0, 'UTC')), materialize(toDate('2015-08-19', 'UTC'))); +1 +SELECT age('day', materialize(toDate('2015-08-18', 'UTC')), materialize(toDateTime64('2015-08-19 00:00:00', 3, 'UTC'))); +1 diff --git a/tests/queries/0_stateless/02477_age_datetime64.sql b/tests/queries/0_stateless/02477_age_datetime64.sql new file mode 100644 index 00000000000..1bed93991ca --- /dev/null +++ b/tests/queries/0_stateless/02477_age_datetime64.sql @@ -0,0 +1,77 @@ +-- { echo } + +-- DateTime64 vs DateTime64 same scale +SELECT age('second', toDateTime64('1927-01-01 00:00:00', 0, 'UTC'), toDateTime64('1927-01-01 00:00:10', 0, 'UTC')); +SELECT age('second', toDateTime64('1927-01-01 00:00:00', 0, 'UTC'), toDateTime64('1927-01-01 00:10:00', 0, 'UTC')); +SELECT age('second', toDateTime64('1927-01-01 00:00:00', 0, 'UTC'), toDateTime64('1927-01-01 01:00:00', 0, 'UTC')); +SELECT age('second', toDateTime64('1927-01-01 00:00:00', 0, 'UTC'), toDateTime64('1927-01-01 01:10:10', 0, 'UTC')); + +SELECT age('minute', toDateTime64('1927-01-01 00:00:00', 0, 'UTC'), toDateTime64('1927-01-01 00:10:00', 0, 'UTC')); +SELECT age('minute', toDateTime64('1927-01-01 00:00:00', 0, 'UTC'), toDateTime64('1927-01-01 10:00:00', 0, 'UTC')); + +SELECT age('hour', toDateTime64('1927-01-01 00:00:00', 0, 'UTC'), toDateTime64('1927-01-01 10:00:00', 0, 'UTC')); + +SELECT age('day', toDateTime64('1927-01-01 00:00:00', 0, 'UTC'), toDateTime64('1927-01-02 00:00:00', 0, 'UTC')); +SELECT age('month', toDateTime64('1927-01-01 00:00:00', 0, 'UTC'), toDateTime64('1927-02-01 00:00:00', 0, 'UTC')); +SELECT age('year', toDateTime64('1927-01-01 00:00:00', 0, 'UTC'), toDateTime64('1928-01-01 00:00:00', 0, 'UTC')); + +-- DateTime64 vs DateTime64 different scale +SELECT age('second', toDateTime64('1927-01-01 00:00:00', 6, 'UTC'), toDateTime64('1927-01-01 00:00:10', 3, 'UTC')); +SELECT age('second', toDateTime64('1927-01-01 00:00:00', 6, 'UTC'), toDateTime64('1927-01-01 00:10:00', 3, 'UTC')); +SELECT age('second', toDateTime64('1927-01-01 00:00:00', 6, 'UTC'), toDateTime64('1927-01-01 01:00:00', 3, 'UTC')); +SELECT age('second', toDateTime64('1927-01-01 00:00:00', 6, 'UTC'), toDateTime64('1927-01-01 01:10:10', 3, 'UTC')); + +SELECT age('minute', toDateTime64('1927-01-01 00:00:00', 6, 'UTC'), toDateTime64('1927-01-01 00:10:00', 3, 'UTC')); +SELECT age('minute', toDateTime64('1927-01-01 00:00:00', 6, 'UTC'), toDateTime64('1927-01-01 10:00:00', 3, 'UTC')); + +SELECT age('hour', toDateTime64('1927-01-01 00:00:00', 6, 'UTC'), toDateTime64('1927-01-01 10:00:00', 3, 'UTC')); + +SELECT age('day', toDateTime64('1927-01-01 00:00:00', 6, 'UTC'), toDateTime64('1927-01-02 00:00:00', 3, 'UTC')); +SELECT age('month', toDateTime64('1927-01-01 00:00:00', 6, 'UTC'), toDateTime64('1927-02-01 00:00:00', 3, 'UTC')); +SELECT age('year', toDateTime64('1927-01-01 00:00:00', 6, 'UTC'), toDateTime64('1928-01-01 00:00:00', 3, 'UTC')); + +-- With DateTime +-- DateTime64 vs DateTime +SELECT age('second', toDateTime64('2015-08-18 00:00:00', 0, 'UTC'), toDateTime('2015-08-18 00:00:00', 'UTC')); +SELECT age('second', toDateTime64('2015-08-18 00:00:00', 0, 'UTC'), toDateTime('2015-08-18 00:00:10', 'UTC')); +SELECT age('second', toDateTime64('2015-08-18 00:00:00', 0, 'UTC'), toDateTime('2015-08-18 00:10:00', 'UTC')); +SELECT age('second', toDateTime64('2015-08-18 00:00:00', 0, 'UTC'), toDateTime('2015-08-18 01:00:00', 'UTC')); +SELECT age('second', toDateTime64('2015-08-18 00:00:00', 0, 'UTC'), toDateTime('2015-08-18 01:10:10', 'UTC')); + +-- DateTime vs DateTime64 +SELECT age('second', toDateTime('2015-08-18 00:00:00', 'UTC'), toDateTime64('2015-08-18 00:00:00', 3, 'UTC')); +SELECT age('second', toDateTime('2015-08-18 00:00:00', 'UTC'), toDateTime64('2015-08-18 00:00:10', 3, 'UTC')); +SELECT age('second', toDateTime('2015-08-18 00:00:00', 'UTC'), toDateTime64('2015-08-18 00:10:00', 3, 'UTC')); +SELECT age('second', toDateTime('2015-08-18 00:00:00', 'UTC'), toDateTime64('2015-08-18 01:00:00', 3, 'UTC')); +SELECT age('second', toDateTime('2015-08-18 00:00:00', 'UTC'), toDateTime64('2015-08-18 01:10:10', 3, 'UTC')); + +-- With Date +-- DateTime64 vs Date +SELECT age('day', toDateTime64('2015-08-18 00:00:00', 0, 'UTC'), toDate('2015-08-19', 'UTC')); + +-- Date vs DateTime64 +SELECT age('day', toDate('2015-08-18', 'UTC'), toDateTime64('2015-08-19 00:00:00', 3, 'UTC')); + +-- Same thing but const vs non-const columns +SELECT age('second', toDateTime64('1927-01-01 00:00:00', 0, 'UTC'), materialize(toDateTime64('1927-01-01 00:00:10', 0, 'UTC'))); +SELECT age('second', toDateTime64('1927-01-01 00:00:00', 6, 'UTC'), materialize(toDateTime64('1927-01-01 00:00:10', 3, 'UTC'))); +SELECT age('second', toDateTime64('2015-08-18 00:00:00', 0, 'UTC'), materialize(toDateTime('2015-08-18 00:00:10', 'UTC'))); +SELECT age('second', toDateTime('2015-08-18 00:00:00', 'UTC'), materialize(toDateTime64('2015-08-18 00:00:10', 3, 'UTC'))); +SELECT age('day', toDateTime64('2015-08-18 00:00:00', 0, 'UTC'), materialize(toDate('2015-08-19', 'UTC'))); +SELECT age('day', toDate('2015-08-18', 'UTC'), materialize(toDateTime64('2015-08-19 00:00:00', 3, 'UTC'))); + +-- Same thing but non-const vs const columns +SELECT age('second', materialize(toDateTime64('1927-01-01 00:00:00', 0, 'UTC')), toDateTime64('1927-01-01 00:00:10', 0, 'UTC')); +SELECT age('second', materialize(toDateTime64('1927-01-01 00:00:00', 6, 'UTC')), toDateTime64('1927-01-01 00:00:10', 3, 'UTC')); +SELECT age('second', materialize(toDateTime64('2015-08-18 00:00:00', 0, 'UTC')), toDateTime('2015-08-18 00:00:10', 'UTC')); +SELECT age('second', materialize(toDateTime('2015-08-18 00:00:00', 'UTC')), toDateTime64('2015-08-18 00:00:10', 3, 'UTC')); +SELECT age('day', materialize(toDateTime64('2015-08-18 00:00:00', 0, 'UTC')), toDate('2015-08-19', 'UTC')); +SELECT age('day', materialize(toDate('2015-08-18', 'UTC')), toDateTime64('2015-08-19 00:00:00', 3, 'UTC')); + +-- Same thing but non-const vs non-const columns +SELECT age('second', materialize(toDateTime64('1927-01-01 00:00:00', 0, 'UTC')), materialize(toDateTime64('1927-01-01 00:00:10', 0, 'UTC'))); +SELECT age('second', materialize(toDateTime64('1927-01-01 00:00:00', 6, 'UTC')), materialize(toDateTime64('1927-01-01 00:00:10', 3, 'UTC'))); +SELECT age('second', materialize(toDateTime64('2015-08-18 00:00:00', 0, 'UTC')), materialize(toDateTime('2015-08-18 00:00:10', 'UTC'))); +SELECT age('second', materialize(toDateTime('2015-08-18 00:00:00', 'UTC')), materialize(toDateTime64('2015-08-18 00:00:10', 3, 'UTC'))); +SELECT age('day', materialize(toDateTime64('2015-08-18 00:00:00', 0, 'UTC')), materialize(toDate('2015-08-19', 'UTC'))); +SELECT age('day', materialize(toDate('2015-08-18', 'UTC')), materialize(toDateTime64('2015-08-19 00:00:00', 3, 'UTC'))); diff --git a/tests/queries/0_stateless/02479_mysql_connect_to_self.sql b/tests/queries/0_stateless/02479_mysql_connect_to_self.sql index a7aa6a96c1d..7ff5b3e3382 100644 --- a/tests/queries/0_stateless/02479_mysql_connect_to_self.sql +++ b/tests/queries/0_stateless/02479_mysql_connect_to_self.sql @@ -1,3 +1,4 @@ -- Tags: no-fasttest SELECT * FROM mysql('127.0.0.1:9004', system, one, 'default', '') +SETTINGS send_logs_level = 'fatal'; -- failed connection tries are ok, if it succeeded after retry. diff --git a/tests/queries/0_stateless/02481_prewhere_filtered_rows_div_by_zero.reference b/tests/queries/0_stateless/02481_prewhere_filtered_rows_div_by_zero.reference new file mode 100644 index 00000000000..bb8ce4a8396 --- /dev/null +++ b/tests/queries/0_stateless/02481_prewhere_filtered_rows_div_by_zero.reference @@ -0,0 +1,76 @@ +-- { echoOn } +CREATE TABLE test_filter(a Int32, b Int32, c Int32) ENGINE = MergeTree() ORDER BY a SETTINGS index_granularity = 3; +INSERT INTO test_filter SELECT number, number+1, (number/2 + 1) % 2 FROM numbers(15); +SELECT _part_offset, intDiv(_part_offset, 3) as granule, * FROM test_filter ORDER BY _part_offset; +0 0 0 1 1 +1 0 1 2 1 +2 0 2 3 0 +3 1 3 4 0 +4 1 4 5 1 +5 1 5 6 1 +6 2 6 7 0 +7 2 7 8 0 +8 2 8 9 1 +9 3 9 10 1 +10 3 10 11 0 +11 3 11 12 0 +12 4 12 13 1 +13 4 13 14 1 +14 4 14 15 0 +-- Check that division by zero occurs on some rows +SELECT intDiv(b, c) FROM test_filter; -- { serverError ILLEGAL_DIVISION } +-- Filter out those rows using WHERE or PREWHERE +SELECT intDiv(b, c) FROM test_filter WHERE c != 0; +1 +2 +5 +6 +9 +10 +13 +14 +SELECT intDiv(b, c) FROM test_filter PREWHERE c != 0; +1 +2 +5 +6 +9 +10 +13 +14 +SELECT intDiv(b, c) FROM test_filter PREWHERE c != 0 WHERE b%2 != 0; +1 +5 +9 +13 +SET mutations_sync = 2, allow_experimental_lightweight_delete = 1; +-- Delete all rows where division by zero could occur +DELETE FROM test_filter WHERE c = 0; +-- Test that now division by zero doesn't occur without explicit condition +SELECT intDiv(b, c) FROM test_filter; +1 +2 +5 +6 +9 +10 +13 +14 +SELECT * FROM test_filter PREWHERE intDiv(b, c) > 0; +0 1 1 +1 2 1 +4 5 1 +5 6 1 +8 9 1 +9 10 1 +12 13 1 +13 14 1 +SELECT * FROM test_filter PREWHERE b != 0 WHERE intDiv(b, c) > 0; +0 1 1 +1 2 1 +4 5 1 +5 6 1 +8 9 1 +9 10 1 +12 13 1 +13 14 1 diff --git a/tests/queries/0_stateless/02481_prewhere_filtered_rows_div_by_zero.sql b/tests/queries/0_stateless/02481_prewhere_filtered_rows_div_by_zero.sql new file mode 100644 index 00000000000..94ffb1b8730 --- /dev/null +++ b/tests/queries/0_stateless/02481_prewhere_filtered_rows_div_by_zero.sql @@ -0,0 +1,28 @@ +DROP TABLE IF EXISTS test_filter; + +-- { echoOn } +CREATE TABLE test_filter(a Int32, b Int32, c Int32) ENGINE = MergeTree() ORDER BY a SETTINGS index_granularity = 3; + +INSERT INTO test_filter SELECT number, number+1, (number/2 + 1) % 2 FROM numbers(15); + +SELECT _part_offset, intDiv(_part_offset, 3) as granule, * FROM test_filter ORDER BY _part_offset; + +-- Check that division by zero occurs on some rows +SELECT intDiv(b, c) FROM test_filter; -- { serverError ILLEGAL_DIVISION } +-- Filter out those rows using WHERE or PREWHERE +SELECT intDiv(b, c) FROM test_filter WHERE c != 0; +SELECT intDiv(b, c) FROM test_filter PREWHERE c != 0; +SELECT intDiv(b, c) FROM test_filter PREWHERE c != 0 WHERE b%2 != 0; + + +SET mutations_sync = 2, allow_experimental_lightweight_delete = 1; + +-- Delete all rows where division by zero could occur +DELETE FROM test_filter WHERE c = 0; +-- Test that now division by zero doesn't occur without explicit condition +SELECT intDiv(b, c) FROM test_filter; +SELECT * FROM test_filter PREWHERE intDiv(b, c) > 0; +SELECT * FROM test_filter PREWHERE b != 0 WHERE intDiv(b, c) > 0; + +-- { echoOff } +DROP TABLE test_filter; diff --git a/tests/queries/0_stateless/02482_capnp_list_of_structs.reference b/tests/queries/0_stateless/02482_capnp_list_of_structs.reference new file mode 100644 index 00000000000..002eae70f97 --- /dev/null +++ b/tests/queries/0_stateless/02482_capnp_list_of_structs.reference @@ -0,0 +1,4 @@ +[(1,3),(2,4)] +[1,2] [3,4] +[1,2] [3,4] +[1,2] diff --git a/tests/queries/0_stateless/02482_capnp_list_of_structs.sh b/tests/queries/0_stateless/02482_capnp_list_of_structs.sh new file mode 100755 index 00000000000..091bd4dba2a --- /dev/null +++ b/tests/queries/0_stateless/02482_capnp_list_of_structs.sh @@ -0,0 +1,25 @@ +#!/usr/bin/env bash +# Tags: no-fasttest, no-parallel + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +USER_FILES_PATH=$(clickhouse-client --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') +touch $USER_FILES_PATH/data.capnp + +SCHEMADIR=$(clickhouse-client --query "select * from file('data.capnp', 'CapnProto', 'val1 char') settings format_schema='nonexist:Message'" 2>&1 | grep Exception | grep -oP "file \K.*(?=/nonexist.capnp)") +CLIENT_SCHEMADIR=$CURDIR/format_schemas +SERVER_SCHEMADIR=test_02482 +mkdir -p $SCHEMADIR/$SERVER_SCHEMADIR +cp -r $CLIENT_SCHEMADIR/02482_* $SCHEMADIR/$SERVER_SCHEMADIR/ + + +$CLICKHOUSE_CLIENT -q "insert into function file(02482_data.capnp, auto, 'nested Nested(x Int64, y Int64)') select [1,2], [3,4] settings format_schema='$SERVER_SCHEMADIR/02482_list_of_structs.capnp:Nested', engine_file_truncate_on_insert=1" +$CLICKHOUSE_CLIENT -q "select * from file(02482_data.capnp) settings format_schema='$SERVER_SCHEMADIR/02482_list_of_structs.capnp:Nested'" +$CLICKHOUSE_CLIENT -q "select * from file(02482_data.capnp, auto, 'nested Nested(x Int64, y Int64)') settings format_schema='$SERVER_SCHEMADIR/02482_list_of_structs.capnp:Nested'" +$CLICKHOUSE_CLIENT -q "select * from file(02482_data.capnp, auto, '\`nested.x\` Array(Int64), \`nested.y\` Array(Int64)') settings format_schema='$SERVER_SCHEMADIR/02482_list_of_structs.capnp:Nested'" +$CLICKHOUSE_CLIENT -q "select * from file(02482_data.capnp, auto, '\`nested.x\` Array(Int64)') settings format_schema='$SERVER_SCHEMADIR/02482_list_of_structs.capnp:Nested'" + +rm $USER_FILES_PATH/data.capnp +rm $USER_FILES_PATH/02482_data.capnp diff --git a/tests/queries/0_stateless/02482_value_block_assert.reference b/tests/queries/0_stateless/02482_value_block_assert.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02482_value_block_assert.sql b/tests/queries/0_stateless/02482_value_block_assert.sql new file mode 100644 index 00000000000..8684776f45f --- /dev/null +++ b/tests/queries/0_stateless/02482_value_block_assert.sql @@ -0,0 +1,24 @@ +SET allow_suspicious_low_cardinality_types=1; +CREATE TABLE range_key_dictionary_source_table__fuzz_323 +( + `key` UInt256, + `start_date` Int8, + `end_date` LowCardinality(UInt256), + `value` Tuple(UInt8, Array(DateTime), Decimal(9, 1), Array(Int16), Array(UInt8)), + `value_nullable` UUID +) +ENGINE = TinyLog; +INSERT INTO range_key_dictionary_source_table__fuzz_323 FORMAT Values +(1, toDate('2019-05-20'), toDate('2019-05-20'), 'First', 'First'); -- { clientError CANNOT_PARSE_INPUT_ASSERTION_FAILED } + + +CREATE TABLE complex_key_dictionary_source_table__fuzz_267 +( + `id` Decimal(38, 30), + `id_key` Array(UUID), + `value` Array(Nullable(DateTime64(3))), + `value_nullable` Nullable(UUID) +) +ENGINE = TinyLog; +INSERT INTO complex_key_dictionary_source_table__fuzz_267 FORMAT Values +(1, 'key', 'First', 'First'); -- { clientError CANNOT_READ_ARRAY_FROM_TEXT } diff --git a/tests/queries/0_stateless/02482_value_block_parsing.reference b/tests/queries/0_stateless/02482_value_block_parsing.reference new file mode 100644 index 00000000000..e8183f05f5d --- /dev/null +++ b/tests/queries/0_stateless/02482_value_block_parsing.reference @@ -0,0 +1,3 @@ +1 +1 +1 diff --git a/tests/queries/0_stateless/02482_value_block_parsing.sh b/tests/queries/0_stateless/02482_value_block_parsing.sh new file mode 100755 index 00000000000..b74d3f395f0 --- /dev/null +++ b/tests/queries/0_stateless/02482_value_block_parsing.sh @@ -0,0 +1,26 @@ +#!/usr/bin/env bash + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +${CLICKHOUSE_CLIENT} --query=" + CREATE TABLE simple_key_dictionary_source_table__fuzz_48 + ( + id Nullable(Int8), + value Array(Date), + value_nullable UUID + ) + ENGINE = TinyLog;" + +echo "INSERT INTO simple_key_dictionary_source_table__fuzz_48 FORMAT Values (null, [], '61f0c404-5cb3-11e7-907b-a6006ad3dba0') +( -- Bu " | ${CLICKHOUSE_CURL} -s "${CLICKHOUSE_URL}" --data-binary @- -v 2>&1 | grep -c 'X-ClickHouse-Exception-Code: 62' + + +echo "INSERT INTO simple_key_dictionary_source_table__fuzz_48 FORMAT Values + (!Invalid" | ${CLICKHOUSE_CURL} -s "${CLICKHOUSE_URL}" --data-binary @- -v 2>&1 | grep -c 'X-ClickHouse-Exception-Code: 62' + +echo "INSERT INTO simple_key_dictionary_source_table__fuzz_48 FORMAT Values (null, [], '61f0c404-5cb3-11e7-907b-a6006ad3dba0') + ,(null, [], '61f0c404-5cb3-11e7-907b-a6006ad3dba0'), + (!!!!!!3adas + )" | ${CLICKHOUSE_CURL} -s "${CLICKHOUSE_URL}" --data-binary @- -v 2>&1 | grep -c 'X-ClickHouse-Exception-Code: 62' diff --git a/tests/queries/0_stateless/02483_capnp_decimals.reference b/tests/queries/0_stateless/02483_capnp_decimals.reference new file mode 100644 index 00000000000..9885da95ce2 --- /dev/null +++ b/tests/queries/0_stateless/02483_capnp_decimals.reference @@ -0,0 +1,2 @@ +4242424242 42420 +4242.424242 42.42 diff --git a/tests/queries/0_stateless/02483_capnp_decimals.sh b/tests/queries/0_stateless/02483_capnp_decimals.sh new file mode 100755 index 00000000000..bdfa9dac3d5 --- /dev/null +++ b/tests/queries/0_stateless/02483_capnp_decimals.sh @@ -0,0 +1,24 @@ +#!/usr/bin/env bash +# Tags: no-fasttest, no-parallel + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +USER_FILES_PATH=$(clickhouse-client --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') +touch $USER_FILES_PATH/data.capnp + +SCHEMADIR=$(clickhouse-client --query "select * from file('data.capnp', 'CapnProto', 'val1 char') settings format_schema='nonexist:Message'" 2>&1 | grep Exception | grep -oP "file \K.*(?=/nonexist.capnp)") +CLIENT_SCHEMADIR=$CURDIR/format_schemas +SERVER_SCHEMADIR=test_02483 +mkdir -p $SCHEMADIR/$SERVER_SCHEMADIR +cp -r $CLIENT_SCHEMADIR/02483_* $SCHEMADIR/$SERVER_SCHEMADIR/ + + +$CLICKHOUSE_CLIENT -q "insert into function file(02483_data.capnp, auto, 'decimal32 Decimal32(3), decimal64 Decimal64(6)') select 42.42, 4242.424242 settings format_schema='$SERVER_SCHEMADIR/02483_decimals.capnp:Message', engine_file_truncate_on_insert=1" +$CLICKHOUSE_CLIENT -q "select * from file(02483_data.capnp) settings format_schema='$SERVER_SCHEMADIR/02483_decimals.capnp:Message'" +$CLICKHOUSE_CLIENT -q "select * from file(02483_data.capnp, auto, 'decimal64 Decimal64(6), decimal32 Decimal32(3)') settings format_schema='$SERVER_SCHEMADIR/02483_decimals.capnp:Message'" + +rm $USER_FILES_PATH/data.capnp +rm $USER_FILES_PATH/02483_data.capnp + diff --git a/tests/queries/0_stateless/02483_elapsed_time.reference b/tests/queries/0_stateless/02483_elapsed_time.reference new file mode 100644 index 00000000000..0fc2ca00f45 --- /dev/null +++ b/tests/queries/0_stateless/02483_elapsed_time.reference @@ -0,0 +1,14 @@ +1 1 +Greater (Ok) +Greater (Ok) +Row 1: +────── +type: QueryFinish +elapsed_more_than_one_second: 1 +end_minus_start_more_than_a_second: 1 + +Row 2: +────── +type: QueryFinish +elapsed_more_than_one_second: 1 +end_minus_start_more_than_a_second: 1 diff --git a/tests/queries/0_stateless/02483_elapsed_time.sh b/tests/queries/0_stateless/02483_elapsed_time.sh new file mode 100755 index 00000000000..724bd1d297b --- /dev/null +++ b/tests/queries/0_stateless/02483_elapsed_time.sh @@ -0,0 +1,69 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + + +# The following query fails during query interpretation so it throws an ExceptionBeforeStart +EXCEPTION_BEFORE_START_QUERY="WITH + ( + SELECT sleepEachRow(1) + ) AS sub + SELECT * + FROM + ( + SELECT * + FROM system.numbers + WHERE number IN (sub) + ) + SETTINGS enable_global_with_statement = 0" + + +# For this query the system.query_log needs to show ExceptionBeforeStart and elapsed seconds >= 1.0 +QUERY_ID="${CLICKHOUSE_DATABASE}_$(date +%s)_02883_q1" +${CLICKHOUSE_CLIENT} -m --query "$EXCEPTION_BEFORE_START_QUERY" --query_id="$QUERY_ID" >/dev/null 2>&1 + +${CLICKHOUSE_CLIENT} --query "SYSTEM FLUSH LOGS" +${CLICKHOUSE_CLIENT} --query "SELECT type == 'ExceptionBeforeStart' as expected_type, query_duration_ms >= 1000 as elapsed_more_than_one_second FROM system.query_log WHERE query_id='$QUERY_ID'" + +# Now we test with a query that will take 1+ seconds. The CLI should show that as part of the output format +OK_QUERY_JSON=" +WITH ( + SELECT sleepEachRow(1.0) + ) AS sub +SELECT * +FROM +( + SELECT * + FROM system.one +) +FORMAT JSON +SETTINGS enable_global_with_statement = 1" +QUERY_ID_2="${CLICKHOUSE_DATABASE}_$(date +%s)_02883_q2" +${CLICKHOUSE_CLIENT} --query "$OK_QUERY_JSON" --query_id="${QUERY_ID_2}" | grep elapsed | awk '{ if($2 >= 1.0) { print "Greater (Ok)" } else { print "Smaller than expected: " $2 } }' + +OK_QUERY_XML=" +WITH ( + SELECT sleepEachRow(1.0) + ) AS sub +SELECT * +FROM +( + SELECT * + FROM system.one +) +FORMAT XML +SETTINGS enable_global_with_statement = 1" +QUERY_ID_3="${CLICKHOUSE_DATABASE}_$(date +%s)_02883_q3" +${CLICKHOUSE_CLIENT} --query "$OK_QUERY_XML" --query_id="${QUERY_ID_3}" | grep elapsed | awk -F '[<>]' '{ if($3 >= 1.0) { print "Greater (Ok)" } else { print "Smaller than expected: " $3 } }' + +${CLICKHOUSE_CLIENT} --query "SYSTEM FLUSH LOGS" +${CLICKHOUSE_CLIENT} --query " + SELECT + type, + query_duration_ms >= 1000 as elapsed_more_than_one_second, + (toDecimal64(event_time_microseconds, 6) - toDecimal64(query_start_time_microseconds, 6)) > 1.0 AS end_minus_start_more_than_a_second + FROM system.query_log + WHERE type='QueryFinish' AND (query_id='$QUERY_ID_2' OR query_id='${QUERY_ID_3}') + FORMAT Vertical" diff --git a/tests/queries/0_stateless/02494_combinators_with_null_argument.reference b/tests/queries/0_stateless/02494_combinators_with_null_argument.reference new file mode 100644 index 00000000000..a891c305dde --- /dev/null +++ b/tests/queries/0_stateless/02494_combinators_with_null_argument.reference @@ -0,0 +1,18 @@ +-- { echoOn } + +select sumIf(1, NULL); +0 +select sumIf(NULL, 1); +\N +select sumIf(NULL, NULL); +\N +select countIf(1, NULL); +0 +select countIf(NULL, 1); +0 +select countIf(1, NULL); +0 +select sumArray([NULL, NULL]); +\N +select countArray([NULL, NULL]); +0 diff --git a/tests/queries/0_stateless/02494_combinators_with_null_argument.sql b/tests/queries/0_stateless/02494_combinators_with_null_argument.sql new file mode 100644 index 00000000000..e18fd741aab --- /dev/null +++ b/tests/queries/0_stateless/02494_combinators_with_null_argument.sql @@ -0,0 +1,11 @@ +-- { echoOn } + +select sumIf(1, NULL); +select sumIf(NULL, 1); +select sumIf(NULL, NULL); +select countIf(1, NULL); +select countIf(NULL, 1); +select countIf(1, NULL); +select sumArray([NULL, NULL]); +select countArray([NULL, NULL]); + diff --git a/tests/queries/0_stateless/02495_sum_if_to_count_if_bug.reference b/tests/queries/0_stateless/02495_sum_if_to_count_if_bug.reference new file mode 100644 index 00000000000..4bda3243d2e --- /dev/null +++ b/tests/queries/0_stateless/02495_sum_if_to_count_if_bug.reference @@ -0,0 +1,3 @@ +1024 +0 +1024 diff --git a/tests/queries/0_stateless/02495_sum_if_to_count_if_bug.sql b/tests/queries/0_stateless/02495_sum_if_to_count_if_bug.sql new file mode 100644 index 00000000000..0791b374668 --- /dev/null +++ b/tests/queries/0_stateless/02495_sum_if_to_count_if_bug.sql @@ -0,0 +1,4 @@ +select sum(if((number % NULL) = 2, 0, 1)) FROM numbers(1024) settings optimize_rewrite_sum_if_to_count_if=0; +select sum(if((number % NULL) = 2, 0, 1)) FROM numbers(1024) settings optimize_rewrite_sum_if_to_count_if=1, allow_experimental_analyzer=0; +select sum(if((number % NULL) = 2, 0, 1)) FROM numbers(1024) settings optimize_rewrite_sum_if_to_count_if=1, allow_experimental_analyzer=1; + diff --git a/tests/queries/0_stateless/02496_format_datetime_in_joda_syntax.reference b/tests/queries/0_stateless/02496_format_datetime_in_joda_syntax.reference new file mode 100644 index 00000000000..c3a931a5ebb --- /dev/null +++ b/tests/queries/0_stateless/02496_format_datetime_in_joda_syntax.reference @@ -0,0 +1,123 @@ +-- { echoOn } +with '2018-01-12 22:33:44' as s, toDateTime(s) as datetime, toDateTime64(s, 6) as datetime64, toDate(s) as date, toDate32(s) as date32 SELECT formatDateTimeInJodaSyntax(datetime, 'G'), formatDateTimeInJodaSyntax(datetime64, 'G'), formatDateTimeInJodaSyntax(date, 'G'), formatDateTimeInJodaSyntax(date32, 'G'); +AD AD AD AD +with '2018-01-12 22:33:44' as s, toDateTime(s) as datetime, toDateTime64(s, 6) as datetime64, toDate(s) as date, toDate32(s) as date32 SELECT formatDateTimeInJodaSyntax(datetime, 'GG'), formatDateTimeInJodaSyntax(datetime64, 'GG'), formatDateTimeInJodaSyntax(date, 'GG'), formatDateTimeInJodaSyntax(date32, 'GG'); +AD AD AD AD +with '2018-01-12 22:33:44' as s, toDateTime(s) as datetime, toDateTime64(s, 6) as datetime64, toDate(s) as date, toDate32(s) as date32 SELECT formatDateTimeInJodaSyntax(datetime, 'GGG'), formatDateTimeInJodaSyntax(datetime64, 'GGG'), formatDateTimeInJodaSyntax(date, 'GGG'), formatDateTimeInJodaSyntax(date32, 'GGG'); +AD AD AD AD +with '2018-01-12 22:33:44' as s, toDateTime(s) as datetime, toDateTime64(s, 6) as datetime64, toDate(s) as date, toDate32(s) as date32 SELECT formatDateTimeInJodaSyntax(datetime, 'GGGG'), formatDateTimeInJodaSyntax(datetime64, 'GGGG'), formatDateTimeInJodaSyntax(date, 'GGGG'), formatDateTimeInJodaSyntax(date32, 'GGGG'); +Anno Domini Anno Domini Anno Domini Anno Domini +with '2018-01-12 22:33:44' as s, toDateTime(s) as datetime, toDateTime64(s, 6) as datetime64, toDate(s) as date, toDate32(s) as date32 SELECT formatDateTimeInJodaSyntax(datetime, 'GGGGG'), formatDateTimeInJodaSyntax(datetime64, 'GGGGG'), formatDateTimeInJodaSyntax(date, 'GGGGG'), formatDateTimeInJodaSyntax(date32, 'GGGGG'); +Anno Domini Anno Domini Anno Domini Anno Domini +with '2018-01-12 22:33:44' as s, toDateTime(s) as datetime, toDateTime64(s, 6) as datetime64, toDate(s) as date, toDate32(s) as date32 SELECT formatDateTimeInJodaSyntax(datetime, 'C'), formatDateTimeInJodaSyntax(datetime64, 'C'), formatDateTimeInJodaSyntax(date, 'C'), formatDateTimeInJodaSyntax(date32, 'C'); +20 20 20 20 +with '2018-01-12 22:33:44' as s, toDateTime(s) as datetime, toDateTime64(s, 6) as datetime64, toDate(s) as date, toDate32(s) as date32 SELECT formatDateTimeInJodaSyntax(datetime, 'CC'), formatDateTimeInJodaSyntax(datetime64, 'CC'), formatDateTimeInJodaSyntax(date, 'CC'), formatDateTimeInJodaSyntax(date32, 'CC'); +20 20 20 20 +with '2018-01-12 22:33:44' as s, toDateTime(s) as datetime, toDateTime64(s, 6) as datetime64, toDate(s) as date, toDate32(s) as date32 SELECT formatDateTimeInJodaSyntax(datetime, 'CCC'), formatDateTimeInJodaSyntax(datetime64, 'CCC'), formatDateTimeInJodaSyntax(date, 'CCC'), formatDateTimeInJodaSyntax(date32, 'CCC'); +020 020 020 020 +with '2018-01-12 22:33:44' as s, toDateTime(s) as datetime, toDateTime64(s, 6) as datetime64, toDate(s) as date, toDate32(s) as date32 SELECT formatDateTimeInJodaSyntax(datetime, 'Y'), formatDateTimeInJodaSyntax(datetime64, 'Y'), formatDateTimeInJodaSyntax(date, 'Y'), formatDateTimeInJodaSyntax(date32, 'Y'); +2018 2018 2018 2018 +with '2018-01-12 22:33:44' as s, toDateTime(s) as datetime, toDateTime64(s, 6) as datetime64, toDate(s) as date, toDate32(s) as date32 SELECT formatDateTimeInJodaSyntax(datetime, 'YY'), formatDateTimeInJodaSyntax(datetime64, 'YY'), formatDateTimeInJodaSyntax(date, 'YY'), formatDateTimeInJodaSyntax(date32, 'YY'); +18 18 18 18 +with '2018-01-12 22:33:44' as s, toDateTime(s) as datetime, toDateTime64(s, 6) as datetime64, toDate(s) as date, toDate32(s) as date32 SELECT formatDateTimeInJodaSyntax(datetime, 'YYY'), formatDateTimeInJodaSyntax(datetime64, 'YYY'), formatDateTimeInJodaSyntax(date, 'YYY'), formatDateTimeInJodaSyntax(date32, 'YYY'); +2018 2018 2018 2018 +with '2018-01-12 22:33:44' as s, toDateTime(s) as datetime, toDateTime64(s, 6) as datetime64, toDate(s) as date, toDate32(s) as date32 SELECT formatDateTimeInJodaSyntax(datetime, 'YYYY'), formatDateTimeInJodaSyntax(datetime64, 'YYYY'), formatDateTimeInJodaSyntax(date, 'YYYY'), formatDateTimeInJodaSyntax(date32, 'YYYY'); +2018 2018 2018 2018 +with '2018-01-12 22:33:44' as s, toDateTime(s) as datetime, toDateTime64(s, 6) as datetime64, toDate(s) as date, toDate32(s) as date32 SELECT formatDateTimeInJodaSyntax(datetime, 'YYYYY'), formatDateTimeInJodaSyntax(datetime64, 'YYYYY'), formatDateTimeInJodaSyntax(date, 'YYYYY'), formatDateTimeInJodaSyntax(date32, 'YYYYY'); +02018 02018 02018 02018 +with '2018-01-12 22:33:44' as s, toDateTime(s) as datetime, toDateTime64(s, 6) as datetime64, toDate(s) as date, toDate32(s) as date32 SELECT formatDateTimeInJodaSyntax(datetime, 'e'), formatDateTimeInJodaSyntax(datetime64, 'e'), formatDateTimeInJodaSyntax(date, 'e'), formatDateTimeInJodaSyntax(date32, 'e'); +5 5 5 5 +with '2018-01-12 22:33:44' as s, toDateTime(s) as datetime, toDateTime64(s, 6) as datetime64, toDate(s) as date, toDate32(s) as date32 SELECT formatDateTimeInJodaSyntax(datetime, 'ee'), formatDateTimeInJodaSyntax(datetime64, 'ee'), formatDateTimeInJodaSyntax(date, 'ee'), formatDateTimeInJodaSyntax(date32, 'ee'); +05 05 05 05 +with '2018-01-12 22:33:44' as s, toDateTime(s) as datetime, toDateTime64(s, 6) as datetime64, toDate(s) as date, toDate32(s) as date32 SELECT formatDateTimeInJodaSyntax(datetime, 'E'), formatDateTimeInJodaSyntax(datetime64, 'E'), formatDateTimeInJodaSyntax(date, 'E'), formatDateTimeInJodaSyntax(date32, 'E'); +Fri Fri Fri Fri +with '2018-01-12 22:33:44' as s, toDateTime(s) as datetime, toDateTime64(s, 6) as datetime64, toDate(s) as date, toDate32(s) as date32 SELECT formatDateTimeInJodaSyntax(datetime, 'EE'), formatDateTimeInJodaSyntax(datetime64, 'EE'), formatDateTimeInJodaSyntax(date, 'EE'), formatDateTimeInJodaSyntax(date32, 'EE'); +Fri Fri Fri Fri +with '2018-01-12 22:33:44' as s, toDateTime(s) as datetime, toDateTime64(s, 6) as datetime64, toDate(s) as date, toDate32(s) as date32 SELECT formatDateTimeInJodaSyntax(datetime, 'EEE'), formatDateTimeInJodaSyntax(datetime64, 'EEE'), formatDateTimeInJodaSyntax(date, 'EEE'), formatDateTimeInJodaSyntax(date32, 'EEE'); +Fri Fri Fri Fri +with '2018-01-12 22:33:44' as s, toDateTime(s) as datetime, toDateTime64(s, 6) as datetime64, toDate(s) as date, toDate32(s) as date32 SELECT formatDateTimeInJodaSyntax(datetime, 'EEEE'), formatDateTimeInJodaSyntax(datetime64, 'EEEE'), formatDateTimeInJodaSyntax(date, 'EEEE'), formatDateTimeInJodaSyntax(date32, 'EEEE'); +Friday Friday Friday Friday +with '2018-01-12 22:33:44' as s, toDateTime(s) as datetime, toDateTime64(s, 6) as datetime64, toDate(s) as date, toDate32(s) as date32 SELECT formatDateTimeInJodaSyntax(datetime, 'EEEEE'), formatDateTimeInJodaSyntax(datetime64, 'EEEEE'), formatDateTimeInJodaSyntax(date, 'EEEEE'), formatDateTimeInJodaSyntax(date32, 'EEEEE'); +Friday Friday Friday Friday +with '2018-01-12 22:33:44' as s, toDateTime(s) as datetime, toDateTime64(s, 6) as datetime64, toDate(s) as date, toDate32(s) as date32 SELECT formatDateTimeInJodaSyntax(datetime, 'y'), formatDateTimeInJodaSyntax(datetime64, 'y'), formatDateTimeInJodaSyntax(date, 'y'), formatDateTimeInJodaSyntax(date32, 'y'); +2018 2018 2018 2018 +with '2018-01-12 22:33:44' as s, toDateTime(s) as datetime, toDateTime64(s, 6) as datetime64, toDate(s) as date, toDate32(s) as date32 SELECT formatDateTimeInJodaSyntax(datetime, 'yy'), formatDateTimeInJodaSyntax(datetime64, 'yy'), formatDateTimeInJodaSyntax(date, 'yy'), formatDateTimeInJodaSyntax(date32, 'yy'); +18 18 18 18 +with '2018-01-12 22:33:44' as s, toDateTime(s) as datetime, toDateTime64(s, 6) as datetime64, toDate(s) as date, toDate32(s) as date32 SELECT formatDateTimeInJodaSyntax(datetime, 'yyy'), formatDateTimeInJodaSyntax(datetime64, 'yyy'), formatDateTimeInJodaSyntax(date, 'yyy'), formatDateTimeInJodaSyntax(date32, 'yyy'); +2018 2018 2018 2018 +with '2018-01-12 22:33:44' as s, toDateTime(s) as datetime, toDateTime64(s, 6) as datetime64, toDate(s) as date, toDate32(s) as date32 SELECT formatDateTimeInJodaSyntax(datetime, 'yyyy'), formatDateTimeInJodaSyntax(datetime64, 'yyyy'), formatDateTimeInJodaSyntax(date, 'yyyy'), formatDateTimeInJodaSyntax(date32, 'yyyy'); +2018 2018 2018 2018 +with '2018-01-12 22:33:44' as s, toDateTime(s) as datetime, toDateTime64(s, 6) as datetime64, toDate(s) as date, toDate32(s) as date32 SELECT formatDateTimeInJodaSyntax(datetime, 'yyyyy'), formatDateTimeInJodaSyntax(datetime64, 'yyyyy'), formatDateTimeInJodaSyntax(date, 'yyyyy'), formatDateTimeInJodaSyntax(date32, 'yyyyy'); +02018 02018 02018 02018 +with '2018-01-12 22:33:44' as s, toDateTime(s) as datetime, toDateTime64(s, 6) as datetime64, toDate(s) as date, toDate32(s) as date32 SELECT formatDateTimeInJodaSyntax(datetime, 'D'), formatDateTimeInJodaSyntax(datetime64, 'D'), formatDateTimeInJodaSyntax(date, 'D'), formatDateTimeInJodaSyntax(date32, 'D'); +12 12 12 12 +with '2018-01-12 22:33:44' as s, toDateTime(s) as datetime, toDateTime64(s, 6) as datetime64, toDate(s) as date, toDate32(s) as date32 SELECT formatDateTimeInJodaSyntax(datetime, 'DD'), formatDateTimeInJodaSyntax(datetime64, 'DD'), formatDateTimeInJodaSyntax(date, 'DD'), formatDateTimeInJodaSyntax(date32, 'DD'); +12 12 12 12 +with '2018-01-12 22:33:44' as s, toDateTime(s) as datetime, toDateTime64(s, 6) as datetime64, toDate(s) as date, toDate32(s) as date32 SELECT formatDateTimeInJodaSyntax(datetime, 'DDD'), formatDateTimeInJodaSyntax(datetime64, 'DDD'), formatDateTimeInJodaSyntax(date, 'DDD'), formatDateTimeInJodaSyntax(date32, 'DDD'); +012 012 012 012 +with '2018-01-12 22:33:44' as s, toDateTime(s) as datetime, toDateTime64(s, 6) as datetime64, toDate(s) as date, toDate32(s) as date32 SELECT formatDateTimeInJodaSyntax(datetime, 'M'), formatDateTimeInJodaSyntax(datetime64, 'M'), formatDateTimeInJodaSyntax(date, 'M'), formatDateTimeInJodaSyntax(date32, 'M'); +1 1 1 1 +with '2018-01-12 22:33:44' as s, toDateTime(s) as datetime, toDateTime64(s, 6) as datetime64, toDate(s) as date, toDate32(s) as date32 SELECT formatDateTimeInJodaSyntax(datetime, 'MM'), formatDateTimeInJodaSyntax(datetime64, 'MM'), formatDateTimeInJodaSyntax(date, 'MM'), formatDateTimeInJodaSyntax(date32, 'MM'); +01 01 01 01 +with '2018-01-12 22:33:44' as s, toDateTime(s) as datetime, toDateTime64(s, 6) as datetime64, toDate(s) as date, toDate32(s) as date32 SELECT formatDateTimeInJodaSyntax(datetime, 'MMM'), formatDateTimeInJodaSyntax(datetime64, 'MMM'), formatDateTimeInJodaSyntax(date, 'MMM'), formatDateTimeInJodaSyntax(date32, 'MMM'); +Jan Jan Jan Jan +with '2018-01-12 22:33:44' as s, toDateTime(s) as datetime, toDateTime64(s, 6) as datetime64, toDate(s) as date, toDate32(s) as date32 SELECT formatDateTimeInJodaSyntax(datetime, 'MMMM'), formatDateTimeInJodaSyntax(datetime64, 'MMMM'), formatDateTimeInJodaSyntax(date, 'MMMM'), formatDateTimeInJodaSyntax(date32, 'MMMM'); +January January January January +with '2018-01-12 22:33:44' as s, toDateTime(s) as datetime, toDateTime64(s, 6) as datetime64, toDate(s) as date, toDate32(s) as date32 SELECT formatDateTimeInJodaSyntax(datetime, 'd'), formatDateTimeInJodaSyntax(datetime64, 'd'), formatDateTimeInJodaSyntax(date, 'd'), formatDateTimeInJodaSyntax(date32, 'd'); +12 12 12 12 +with '2018-01-12 22:33:44' as s, toDateTime(s) as datetime, toDateTime64(s, 6) as datetime64, toDate(s) as date, toDate32(s) as date32 SELECT formatDateTimeInJodaSyntax(datetime, 'dd'), formatDateTimeInJodaSyntax(datetime64, 'dd'), formatDateTimeInJodaSyntax(date, 'dd'), formatDateTimeInJodaSyntax(date32, 'dd'); +12 12 12 12 +with '2018-01-12 22:33:44' as s, toDateTime(s) as datetime, toDateTime64(s, 6) as datetime64, toDate(s) as date, toDate32(s) as date32 SELECT formatDateTimeInJodaSyntax(datetime, 'ddd'), formatDateTimeInJodaSyntax(datetime64, 'ddd'), formatDateTimeInJodaSyntax(date, 'ddd'), formatDateTimeInJodaSyntax(date32, 'ddd'); +012 012 012 012 +with '2018-01-12 22:33:44' as s, toDateTime(s) as datetime, toDateTime64(s, 6) as datetime64, toDate(s) as date, toDate32(s) as date32 SELECT formatDateTimeInJodaSyntax(datetime, 'a'), formatDateTimeInJodaSyntax(datetime64, 'a'), formatDateTimeInJodaSyntax(date, 'a'), formatDateTimeInJodaSyntax(date32, 'a'); +PM PM AM AM +with '2018-01-12 22:33:44' as s, toDateTime(s) as datetime, toDateTime64(s, 6) as datetime64, toDate(s) as date, toDate32(s) as date32 SELECT formatDateTimeInJodaSyntax(datetime, 'aa'), formatDateTimeInJodaSyntax(datetime64, 'aa'), formatDateTimeInJodaSyntax(date, 'aa'), formatDateTimeInJodaSyntax(date32, 'aa'); +PM PM AM AM +with '2018-01-12 22:33:44' as s, toDateTime(s) as datetime, toDateTime64(s, 6) as datetime64, toDate(s) as date, toDate32(s) as date32 SELECT formatDateTimeInJodaSyntax(datetime, 'aaa'), formatDateTimeInJodaSyntax(datetime64, 'aaa'), formatDateTimeInJodaSyntax(date, 'aaa'), formatDateTimeInJodaSyntax(date32, 'aaa'); +PM PM AM AM +with '2018-01-12 22:33:44' as s, toDateTime(s) as datetime, toDateTime64(s, 6) as datetime64, toDate(s) as date, toDate32(s) as date32 SELECT formatDateTimeInJodaSyntax(datetime, 'K'), formatDateTimeInJodaSyntax(datetime64, 'K'), formatDateTimeInJodaSyntax(date, 'K'), formatDateTimeInJodaSyntax(date32, 'K'); +10 10 0 0 +with '2018-01-12 22:33:44' as s, toDateTime(s) as datetime, toDateTime64(s, 6) as datetime64, toDate(s) as date, toDate32(s) as date32 SELECT formatDateTimeInJodaSyntax(datetime, 'KK'), formatDateTimeInJodaSyntax(datetime64, 'KK'), formatDateTimeInJodaSyntax(date, 'KK'), formatDateTimeInJodaSyntax(date32, 'KK'); +10 10 00 00 +with '2018-01-12 22:33:44' as s, toDateTime(s) as datetime, toDateTime64(s, 6) as datetime64, toDate(s) as date, toDate32(s) as date32 SELECT formatDateTimeInJodaSyntax(datetime, 'KKK'), formatDateTimeInJodaSyntax(datetime64, 'KKK'), formatDateTimeInJodaSyntax(date, 'KKK'), formatDateTimeInJodaSyntax(date32, 'KKK'); +010 010 000 000 +with '2018-01-12 22:33:44' as s, toDateTime(s) as datetime, toDateTime64(s, 6) as datetime64, toDate(s) as date, toDate32(s) as date32 SELECT formatDateTimeInJodaSyntax(datetime, 'h'), formatDateTimeInJodaSyntax(datetime64, 'h'), formatDateTimeInJodaSyntax(date, 'h'), formatDateTimeInJodaSyntax(date32, 'h'); +10 10 12 12 +with '2018-01-12 22:33:44' as s, toDateTime(s) as datetime, toDateTime64(s, 6) as datetime64, toDate(s) as date, toDate32(s) as date32 SELECT formatDateTimeInJodaSyntax(datetime, 'hh'), formatDateTimeInJodaSyntax(datetime64, 'hh'), formatDateTimeInJodaSyntax(date, 'hh'), formatDateTimeInJodaSyntax(date32, 'hh'); +10 10 12 12 +with '2018-01-12 22:33:44' as s, toDateTime(s) as datetime, toDateTime64(s, 6) as datetime64, toDate(s) as date, toDate32(s) as date32 SELECT formatDateTimeInJodaSyntax(datetime, 'hhh'), formatDateTimeInJodaSyntax(datetime64, 'hhh'), formatDateTimeInJodaSyntax(date, 'hhh'), formatDateTimeInJodaSyntax(date32, 'hhh'); +010 010 012 012 +with '2018-01-12 22:33:44' as s, toDateTime(s) as datetime, toDateTime64(s, 6) as datetime64, toDate(s) as date, toDate32(s) as date32 SELECT formatDateTimeInJodaSyntax(datetime, 'H'), formatDateTimeInJodaSyntax(datetime64, 'H'), formatDateTimeInJodaSyntax(date, 'H'), formatDateTimeInJodaSyntax(date32, 'H'); +22 22 0 0 +with '2018-01-12 22:33:44' as s, toDateTime(s) as datetime, toDateTime64(s, 6) as datetime64, toDate(s) as date, toDate32(s) as date32 SELECT formatDateTimeInJodaSyntax(datetime, 'HH'), formatDateTimeInJodaSyntax(datetime64, 'HH'), formatDateTimeInJodaSyntax(date, 'HH'), formatDateTimeInJodaSyntax(date32, 'HH'); +22 22 00 00 +with '2018-01-12 22:33:44' as s, toDateTime(s) as datetime, toDateTime64(s, 6) as datetime64, toDate(s) as date, toDate32(s) as date32 SELECT formatDateTimeInJodaSyntax(datetime, 'HHH'), formatDateTimeInJodaSyntax(datetime64, 'HHH'), formatDateTimeInJodaSyntax(date, 'HHH'), formatDateTimeInJodaSyntax(date32, 'HHH'); +022 022 000 000 +with '2018-01-12 22:33:44' as s, toDateTime(s) as datetime, toDateTime64(s, 6) as datetime64, toDate(s) as date, toDate32(s) as date32 SELECT formatDateTimeInJodaSyntax(datetime, 'k'), formatDateTimeInJodaSyntax(datetime64, 'k'), formatDateTimeInJodaSyntax(date, 'k'), formatDateTimeInJodaSyntax(date32, 'k'); +22 22 24 24 +with '2018-01-12 22:33:44' as s, toDateTime(s) as datetime, toDateTime64(s, 6) as datetime64, toDate(s) as date, toDate32(s) as date32 SELECT formatDateTimeInJodaSyntax(datetime, 'kk'), formatDateTimeInJodaSyntax(datetime64, 'kk'), formatDateTimeInJodaSyntax(date, 'kk'), formatDateTimeInJodaSyntax(date32, 'kk'); +22 22 24 24 +with '2018-01-12 22:33:44' as s, toDateTime(s) as datetime, toDateTime64(s, 6) as datetime64, toDate(s) as date, toDate32(s) as date32 SELECT formatDateTimeInJodaSyntax(datetime, 'kkk'), formatDateTimeInJodaSyntax(datetime64, 'kkk'), formatDateTimeInJodaSyntax(date, 'kkk'), formatDateTimeInJodaSyntax(date32, 'kkk'); +022 022 024 024 +with '2018-01-12 22:33:44' as s, toDateTime(s) as datetime, toDateTime64(s, 6) as datetime64, toDate(s) as date, toDate32(s) as date32 SELECT formatDateTimeInJodaSyntax(datetime, 'm'), formatDateTimeInJodaSyntax(datetime64, 'm'), formatDateTimeInJodaSyntax(date, 'm'), formatDateTimeInJodaSyntax(date32, 'm'); +33 33 0 0 +with '2018-01-12 22:33:44' as s, toDateTime(s) as datetime, toDateTime64(s, 6) as datetime64, toDate(s) as date, toDate32(s) as date32 SELECT formatDateTimeInJodaSyntax(datetime, 'mm'), formatDateTimeInJodaSyntax(datetime64, 'mm'), formatDateTimeInJodaSyntax(date, 'mm'), formatDateTimeInJodaSyntax(date32, 'mm'); +33 33 00 00 +with '2018-01-12 22:33:44' as s, toDateTime(s) as datetime, toDateTime64(s, 6) as datetime64, toDate(s) as date, toDate32(s) as date32 SELECT formatDateTimeInJodaSyntax(datetime, 'mmm'), formatDateTimeInJodaSyntax(datetime64, 'mmm'), formatDateTimeInJodaSyntax(date, 'mmm'), formatDateTimeInJodaSyntax(date32, 'mmm'); +033 033 000 000 +with '2018-01-12 22:33:44' as s, toDateTime(s) as datetime, toDateTime64(s, 6) as datetime64, toDate(s) as date, toDate32(s) as date32 SELECT formatDateTimeInJodaSyntax(datetime, 's'), formatDateTimeInJodaSyntax(datetime64, 's'), formatDateTimeInJodaSyntax(date, 's'), formatDateTimeInJodaSyntax(date32, 's'); +44 44 0 0 +with '2018-01-12 22:33:44' as s, toDateTime(s) as datetime, toDateTime64(s, 6) as datetime64, toDate(s) as date, toDate32(s) as date32 SELECT formatDateTimeInJodaSyntax(datetime, 'ss'), formatDateTimeInJodaSyntax(datetime64, 'ss'), formatDateTimeInJodaSyntax(date, 'ss'), formatDateTimeInJodaSyntax(date32, 'ss'); +44 44 00 00 +with '2018-01-12 22:33:44' as s, toDateTime(s) as datetime, toDateTime64(s, 6) as datetime64, toDate(s) as date, toDate32(s) as date32 SELECT formatDateTimeInJodaSyntax(datetime, 'sss'), formatDateTimeInJodaSyntax(datetime64, 'sss'), formatDateTimeInJodaSyntax(date, 'sss'), formatDateTimeInJodaSyntax(date32, 'sss'); +044 044 000 000 +with '2018-01-12 22:33:44' as s, toDateTime(s, 'UTC') as datetime, toDateTime64(s, 6, 'UTC') as datetime64, toDate(s) as date, toDate32(s) as date32 select formatDateTimeInJodaSyntax(datetime, 'zzzz'), formatDateTimeInJodaSyntax(datetime64, 'zzzz'); +UTC UTC +with '2018-01-12 22:33:44' as s, toDateTime(s) as datetime, toDateTime64(s, 6) as datetime64, toDate(s) as date, toDate32(s) as date32 SELECT formatDateTimeInJodaSyntax(datetime, 'G123DDD'), formatDateTimeInJodaSyntax(datetime64, 'G123DDD'), formatDateTimeInJodaSyntax(date, 'G123DDD'), formatDateTimeInJodaSyntax(date32, 'G123DDD'); +AD123012 AD123012 AD123012 AD123012 +with '2018-01-12 22:33:44' as s, toDateTime(s) as datetime, toDateTime64(s, 6) as datetime64, toDate(s) as date, toDate32(s) as date32 SELECT formatDateTimeInJodaSyntax(datetime, 'G\'\'DDD'), formatDateTimeInJodaSyntax(datetime64, 'G\'\'DDD'), formatDateTimeInJodaSyntax(date, 'G\'\'DDD'), formatDateTimeInJodaSyntax(date32, 'G\'\'DDD'); +AD\'012 AD\'012 AD\'012 AD\'012 +with '2018-01-12 22:33:44' as s, toDateTime(s) as datetime, toDateTime64(s, 6) as datetime64, toDate(s) as date, toDate32(s) as date32 SELECT formatDateTimeInJodaSyntax(datetime, 'G\'aaa\'DDD'), formatDateTimeInJodaSyntax(datetime64, 'G\'aaa\'DDD'), formatDateTimeInJodaSyntax(date, 'G\'aaa\'DDD'), formatDateTimeInJodaSyntax(date32, 'G\'aaa\'DDD'); +ADaaa012 ADaaa012 ADaaa012 ADaaa012 +with '2018-01-12 22:33:44' as s, toDateTime(s) as datetime, toDateTime64(s, 6) as datetime64, toDate(s) as date, toDate32(s) as date32 SELECT formatDateTimeInJodaSyntax(datetime, 'G\'a\'\'aa\'DDD'), formatDateTimeInJodaSyntax(datetime64, 'G\'a\'\'aa\'DDD'), formatDateTimeInJodaSyntax(date, 'G\'a\'\'aa\'DDD'), formatDateTimeInJodaSyntax(date32, 'G\'a\'\'aa\'DDD'); +ADa\'aa012 ADa\'aa012 ADa\'aa012 ADa\'aa012 diff --git a/tests/queries/0_stateless/02496_format_datetime_in_joda_syntax.sql b/tests/queries/0_stateless/02496_format_datetime_in_joda_syntax.sql new file mode 100644 index 00000000000..4c1839b04e6 --- /dev/null +++ b/tests/queries/0_stateless/02496_format_datetime_in_joda_syntax.sql @@ -0,0 +1,85 @@ +-- { echoOn } +with '2018-01-12 22:33:44' as s, toDateTime(s) as datetime, toDateTime64(s, 6) as datetime64, toDate(s) as date, toDate32(s) as date32 SELECT formatDateTimeInJodaSyntax(datetime, 'G'), formatDateTimeInJodaSyntax(datetime64, 'G'), formatDateTimeInJodaSyntax(date, 'G'), formatDateTimeInJodaSyntax(date32, 'G'); +with '2018-01-12 22:33:44' as s, toDateTime(s) as datetime, toDateTime64(s, 6) as datetime64, toDate(s) as date, toDate32(s) as date32 SELECT formatDateTimeInJodaSyntax(datetime, 'GG'), formatDateTimeInJodaSyntax(datetime64, 'GG'), formatDateTimeInJodaSyntax(date, 'GG'), formatDateTimeInJodaSyntax(date32, 'GG'); +with '2018-01-12 22:33:44' as s, toDateTime(s) as datetime, toDateTime64(s, 6) as datetime64, toDate(s) as date, toDate32(s) as date32 SELECT formatDateTimeInJodaSyntax(datetime, 'GGG'), formatDateTimeInJodaSyntax(datetime64, 'GGG'), formatDateTimeInJodaSyntax(date, 'GGG'), formatDateTimeInJodaSyntax(date32, 'GGG'); +with '2018-01-12 22:33:44' as s, toDateTime(s) as datetime, toDateTime64(s, 6) as datetime64, toDate(s) as date, toDate32(s) as date32 SELECT formatDateTimeInJodaSyntax(datetime, 'GGGG'), formatDateTimeInJodaSyntax(datetime64, 'GGGG'), formatDateTimeInJodaSyntax(date, 'GGGG'), formatDateTimeInJodaSyntax(date32, 'GGGG'); +with '2018-01-12 22:33:44' as s, toDateTime(s) as datetime, toDateTime64(s, 6) as datetime64, toDate(s) as date, toDate32(s) as date32 SELECT formatDateTimeInJodaSyntax(datetime, 'GGGGG'), formatDateTimeInJodaSyntax(datetime64, 'GGGGG'), formatDateTimeInJodaSyntax(date, 'GGGGG'), formatDateTimeInJodaSyntax(date32, 'GGGGG'); +with '2018-01-12 22:33:44' as s, toDateTime(s) as datetime, toDateTime64(s, 6) as datetime64, toDate(s) as date, toDate32(s) as date32 SELECT formatDateTimeInJodaSyntax(datetime, 'C'), formatDateTimeInJodaSyntax(datetime64, 'C'), formatDateTimeInJodaSyntax(date, 'C'), formatDateTimeInJodaSyntax(date32, 'C'); +with '2018-01-12 22:33:44' as s, toDateTime(s) as datetime, toDateTime64(s, 6) as datetime64, toDate(s) as date, toDate32(s) as date32 SELECT formatDateTimeInJodaSyntax(datetime, 'CC'), formatDateTimeInJodaSyntax(datetime64, 'CC'), formatDateTimeInJodaSyntax(date, 'CC'), formatDateTimeInJodaSyntax(date32, 'CC'); +with '2018-01-12 22:33:44' as s, toDateTime(s) as datetime, toDateTime64(s, 6) as datetime64, toDate(s) as date, toDate32(s) as date32 SELECT formatDateTimeInJodaSyntax(datetime, 'CCC'), formatDateTimeInJodaSyntax(datetime64, 'CCC'), formatDateTimeInJodaSyntax(date, 'CCC'), formatDateTimeInJodaSyntax(date32, 'CCC'); +with '2018-01-12 22:33:44' as s, toDateTime(s) as datetime, toDateTime64(s, 6) as datetime64, toDate(s) as date, toDate32(s) as date32 SELECT formatDateTimeInJodaSyntax(datetime, 'Y'), formatDateTimeInJodaSyntax(datetime64, 'Y'), formatDateTimeInJodaSyntax(date, 'Y'), formatDateTimeInJodaSyntax(date32, 'Y'); +with '2018-01-12 22:33:44' as s, toDateTime(s) as datetime, toDateTime64(s, 6) as datetime64, toDate(s) as date, toDate32(s) as date32 SELECT formatDateTimeInJodaSyntax(datetime, 'YY'), formatDateTimeInJodaSyntax(datetime64, 'YY'), formatDateTimeInJodaSyntax(date, 'YY'), formatDateTimeInJodaSyntax(date32, 'YY'); +with '2018-01-12 22:33:44' as s, toDateTime(s) as datetime, toDateTime64(s, 6) as datetime64, toDate(s) as date, toDate32(s) as date32 SELECT formatDateTimeInJodaSyntax(datetime, 'YYY'), formatDateTimeInJodaSyntax(datetime64, 'YYY'), formatDateTimeInJodaSyntax(date, 'YYY'), formatDateTimeInJodaSyntax(date32, 'YYY'); +with '2018-01-12 22:33:44' as s, toDateTime(s) as datetime, toDateTime64(s, 6) as datetime64, toDate(s) as date, toDate32(s) as date32 SELECT formatDateTimeInJodaSyntax(datetime, 'YYYY'), formatDateTimeInJodaSyntax(datetime64, 'YYYY'), formatDateTimeInJodaSyntax(date, 'YYYY'), formatDateTimeInJodaSyntax(date32, 'YYYY'); +with '2018-01-12 22:33:44' as s, toDateTime(s) as datetime, toDateTime64(s, 6) as datetime64, toDate(s) as date, toDate32(s) as date32 SELECT formatDateTimeInJodaSyntax(datetime, 'YYYYY'), formatDateTimeInJodaSyntax(datetime64, 'YYYYY'), formatDateTimeInJodaSyntax(date, 'YYYYY'), formatDateTimeInJodaSyntax(date32, 'YYYYY'); +with '2018-01-12 22:33:44' as s, toDateTime(s) as datetime, toDateTime64(s, 6) as datetime64, toDate(s) as date, toDate32(s) as date32 SELECT formatDateTimeInJodaSyntax(datetime, 'e'), formatDateTimeInJodaSyntax(datetime64, 'e'), formatDateTimeInJodaSyntax(date, 'e'), formatDateTimeInJodaSyntax(date32, 'e'); +with '2018-01-12 22:33:44' as s, toDateTime(s) as datetime, toDateTime64(s, 6) as datetime64, toDate(s) as date, toDate32(s) as date32 SELECT formatDateTimeInJodaSyntax(datetime, 'ee'), formatDateTimeInJodaSyntax(datetime64, 'ee'), formatDateTimeInJodaSyntax(date, 'ee'), formatDateTimeInJodaSyntax(date32, 'ee'); +with '2018-01-12 22:33:44' as s, toDateTime(s) as datetime, toDateTime64(s, 6) as datetime64, toDate(s) as date, toDate32(s) as date32 SELECT formatDateTimeInJodaSyntax(datetime, 'E'), formatDateTimeInJodaSyntax(datetime64, 'E'), formatDateTimeInJodaSyntax(date, 'E'), formatDateTimeInJodaSyntax(date32, 'E'); +with '2018-01-12 22:33:44' as s, toDateTime(s) as datetime, toDateTime64(s, 6) as datetime64, toDate(s) as date, toDate32(s) as date32 SELECT formatDateTimeInJodaSyntax(datetime, 'EE'), formatDateTimeInJodaSyntax(datetime64, 'EE'), formatDateTimeInJodaSyntax(date, 'EE'), formatDateTimeInJodaSyntax(date32, 'EE'); +with '2018-01-12 22:33:44' as s, toDateTime(s) as datetime, toDateTime64(s, 6) as datetime64, toDate(s) as date, toDate32(s) as date32 SELECT formatDateTimeInJodaSyntax(datetime, 'EEE'), formatDateTimeInJodaSyntax(datetime64, 'EEE'), formatDateTimeInJodaSyntax(date, 'EEE'), formatDateTimeInJodaSyntax(date32, 'EEE'); +with '2018-01-12 22:33:44' as s, toDateTime(s) as datetime, toDateTime64(s, 6) as datetime64, toDate(s) as date, toDate32(s) as date32 SELECT formatDateTimeInJodaSyntax(datetime, 'EEEE'), formatDateTimeInJodaSyntax(datetime64, 'EEEE'), formatDateTimeInJodaSyntax(date, 'EEEE'), formatDateTimeInJodaSyntax(date32, 'EEEE'); +with '2018-01-12 22:33:44' as s, toDateTime(s) as datetime, toDateTime64(s, 6) as datetime64, toDate(s) as date, toDate32(s) as date32 SELECT formatDateTimeInJodaSyntax(datetime, 'EEEEE'), formatDateTimeInJodaSyntax(datetime64, 'EEEEE'), formatDateTimeInJodaSyntax(date, 'EEEEE'), formatDateTimeInJodaSyntax(date32, 'EEEEE'); +with '2018-01-12 22:33:44' as s, toDateTime(s) as datetime, toDateTime64(s, 6) as datetime64, toDate(s) as date, toDate32(s) as date32 SELECT formatDateTimeInJodaSyntax(datetime, 'y'), formatDateTimeInJodaSyntax(datetime64, 'y'), formatDateTimeInJodaSyntax(date, 'y'), formatDateTimeInJodaSyntax(date32, 'y'); +with '2018-01-12 22:33:44' as s, toDateTime(s) as datetime, toDateTime64(s, 6) as datetime64, toDate(s) as date, toDate32(s) as date32 SELECT formatDateTimeInJodaSyntax(datetime, 'yy'), formatDateTimeInJodaSyntax(datetime64, 'yy'), formatDateTimeInJodaSyntax(date, 'yy'), formatDateTimeInJodaSyntax(date32, 'yy'); +with '2018-01-12 22:33:44' as s, toDateTime(s) as datetime, toDateTime64(s, 6) as datetime64, toDate(s) as date, toDate32(s) as date32 SELECT formatDateTimeInJodaSyntax(datetime, 'yyy'), formatDateTimeInJodaSyntax(datetime64, 'yyy'), formatDateTimeInJodaSyntax(date, 'yyy'), formatDateTimeInJodaSyntax(date32, 'yyy'); +with '2018-01-12 22:33:44' as s, toDateTime(s) as datetime, toDateTime64(s, 6) as datetime64, toDate(s) as date, toDate32(s) as date32 SELECT formatDateTimeInJodaSyntax(datetime, 'yyyy'), formatDateTimeInJodaSyntax(datetime64, 'yyyy'), formatDateTimeInJodaSyntax(date, 'yyyy'), formatDateTimeInJodaSyntax(date32, 'yyyy'); +with '2018-01-12 22:33:44' as s, toDateTime(s) as datetime, toDateTime64(s, 6) as datetime64, toDate(s) as date, toDate32(s) as date32 SELECT formatDateTimeInJodaSyntax(datetime, 'yyyyy'), formatDateTimeInJodaSyntax(datetime64, 'yyyyy'), formatDateTimeInJodaSyntax(date, 'yyyyy'), formatDateTimeInJodaSyntax(date32, 'yyyyy'); +with '2018-01-12 22:33:44' as s, toDateTime(s) as datetime, toDateTime64(s, 6) as datetime64, toDate(s) as date, toDate32(s) as date32 SELECT formatDateTimeInJodaSyntax(datetime, 'D'), formatDateTimeInJodaSyntax(datetime64, 'D'), formatDateTimeInJodaSyntax(date, 'D'), formatDateTimeInJodaSyntax(date32, 'D'); +with '2018-01-12 22:33:44' as s, toDateTime(s) as datetime, toDateTime64(s, 6) as datetime64, toDate(s) as date, toDate32(s) as date32 SELECT formatDateTimeInJodaSyntax(datetime, 'DD'), formatDateTimeInJodaSyntax(datetime64, 'DD'), formatDateTimeInJodaSyntax(date, 'DD'), formatDateTimeInJodaSyntax(date32, 'DD'); +with '2018-01-12 22:33:44' as s, toDateTime(s) as datetime, toDateTime64(s, 6) as datetime64, toDate(s) as date, toDate32(s) as date32 SELECT formatDateTimeInJodaSyntax(datetime, 'DDD'), formatDateTimeInJodaSyntax(datetime64, 'DDD'), formatDateTimeInJodaSyntax(date, 'DDD'), formatDateTimeInJodaSyntax(date32, 'DDD'); +with '2018-01-12 22:33:44' as s, toDateTime(s) as datetime, toDateTime64(s, 6) as datetime64, toDate(s) as date, toDate32(s) as date32 SELECT formatDateTimeInJodaSyntax(datetime, 'M'), formatDateTimeInJodaSyntax(datetime64, 'M'), formatDateTimeInJodaSyntax(date, 'M'), formatDateTimeInJodaSyntax(date32, 'M'); +with '2018-01-12 22:33:44' as s, toDateTime(s) as datetime, toDateTime64(s, 6) as datetime64, toDate(s) as date, toDate32(s) as date32 SELECT formatDateTimeInJodaSyntax(datetime, 'MM'), formatDateTimeInJodaSyntax(datetime64, 'MM'), formatDateTimeInJodaSyntax(date, 'MM'), formatDateTimeInJodaSyntax(date32, 'MM'); +with '2018-01-12 22:33:44' as s, toDateTime(s) as datetime, toDateTime64(s, 6) as datetime64, toDate(s) as date, toDate32(s) as date32 SELECT formatDateTimeInJodaSyntax(datetime, 'MMM'), formatDateTimeInJodaSyntax(datetime64, 'MMM'), formatDateTimeInJodaSyntax(date, 'MMM'), formatDateTimeInJodaSyntax(date32, 'MMM'); +with '2018-01-12 22:33:44' as s, toDateTime(s) as datetime, toDateTime64(s, 6) as datetime64, toDate(s) as date, toDate32(s) as date32 SELECT formatDateTimeInJodaSyntax(datetime, 'MMMM'), formatDateTimeInJodaSyntax(datetime64, 'MMMM'), formatDateTimeInJodaSyntax(date, 'MMMM'), formatDateTimeInJodaSyntax(date32, 'MMMM'); +with '2018-01-12 22:33:44' as s, toDateTime(s) as datetime, toDateTime64(s, 6) as datetime64, toDate(s) as date, toDate32(s) as date32 SELECT formatDateTimeInJodaSyntax(datetime, 'd'), formatDateTimeInJodaSyntax(datetime64, 'd'), formatDateTimeInJodaSyntax(date, 'd'), formatDateTimeInJodaSyntax(date32, 'd'); +with '2018-01-12 22:33:44' as s, toDateTime(s) as datetime, toDateTime64(s, 6) as datetime64, toDate(s) as date, toDate32(s) as date32 SELECT formatDateTimeInJodaSyntax(datetime, 'dd'), formatDateTimeInJodaSyntax(datetime64, 'dd'), formatDateTimeInJodaSyntax(date, 'dd'), formatDateTimeInJodaSyntax(date32, 'dd'); +with '2018-01-12 22:33:44' as s, toDateTime(s) as datetime, toDateTime64(s, 6) as datetime64, toDate(s) as date, toDate32(s) as date32 SELECT formatDateTimeInJodaSyntax(datetime, 'ddd'), formatDateTimeInJodaSyntax(datetime64, 'ddd'), formatDateTimeInJodaSyntax(date, 'ddd'), formatDateTimeInJodaSyntax(date32, 'ddd'); +with '2018-01-12 22:33:44' as s, toDateTime(s) as datetime, toDateTime64(s, 6) as datetime64, toDate(s) as date, toDate32(s) as date32 SELECT formatDateTimeInJodaSyntax(datetime, 'a'), formatDateTimeInJodaSyntax(datetime64, 'a'), formatDateTimeInJodaSyntax(date, 'a'), formatDateTimeInJodaSyntax(date32, 'a'); +with '2018-01-12 22:33:44' as s, toDateTime(s) as datetime, toDateTime64(s, 6) as datetime64, toDate(s) as date, toDate32(s) as date32 SELECT formatDateTimeInJodaSyntax(datetime, 'aa'), formatDateTimeInJodaSyntax(datetime64, 'aa'), formatDateTimeInJodaSyntax(date, 'aa'), formatDateTimeInJodaSyntax(date32, 'aa'); +with '2018-01-12 22:33:44' as s, toDateTime(s) as datetime, toDateTime64(s, 6) as datetime64, toDate(s) as date, toDate32(s) as date32 SELECT formatDateTimeInJodaSyntax(datetime, 'aaa'), formatDateTimeInJodaSyntax(datetime64, 'aaa'), formatDateTimeInJodaSyntax(date, 'aaa'), formatDateTimeInJodaSyntax(date32, 'aaa'); +with '2018-01-12 22:33:44' as s, toDateTime(s) as datetime, toDateTime64(s, 6) as datetime64, toDate(s) as date, toDate32(s) as date32 SELECT formatDateTimeInJodaSyntax(datetime, 'K'), formatDateTimeInJodaSyntax(datetime64, 'K'), formatDateTimeInJodaSyntax(date, 'K'), formatDateTimeInJodaSyntax(date32, 'K'); +with '2018-01-12 22:33:44' as s, toDateTime(s) as datetime, toDateTime64(s, 6) as datetime64, toDate(s) as date, toDate32(s) as date32 SELECT formatDateTimeInJodaSyntax(datetime, 'KK'), formatDateTimeInJodaSyntax(datetime64, 'KK'), formatDateTimeInJodaSyntax(date, 'KK'), formatDateTimeInJodaSyntax(date32, 'KK'); +with '2018-01-12 22:33:44' as s, toDateTime(s) as datetime, toDateTime64(s, 6) as datetime64, toDate(s) as date, toDate32(s) as date32 SELECT formatDateTimeInJodaSyntax(datetime, 'KKK'), formatDateTimeInJodaSyntax(datetime64, 'KKK'), formatDateTimeInJodaSyntax(date, 'KKK'), formatDateTimeInJodaSyntax(date32, 'KKK'); +with '2018-01-12 22:33:44' as s, toDateTime(s) as datetime, toDateTime64(s, 6) as datetime64, toDate(s) as date, toDate32(s) as date32 SELECT formatDateTimeInJodaSyntax(datetime, 'h'), formatDateTimeInJodaSyntax(datetime64, 'h'), formatDateTimeInJodaSyntax(date, 'h'), formatDateTimeInJodaSyntax(date32, 'h'); +with '2018-01-12 22:33:44' as s, toDateTime(s) as datetime, toDateTime64(s, 6) as datetime64, toDate(s) as date, toDate32(s) as date32 SELECT formatDateTimeInJodaSyntax(datetime, 'hh'), formatDateTimeInJodaSyntax(datetime64, 'hh'), formatDateTimeInJodaSyntax(date, 'hh'), formatDateTimeInJodaSyntax(date32, 'hh'); +with '2018-01-12 22:33:44' as s, toDateTime(s) as datetime, toDateTime64(s, 6) as datetime64, toDate(s) as date, toDate32(s) as date32 SELECT formatDateTimeInJodaSyntax(datetime, 'hhh'), formatDateTimeInJodaSyntax(datetime64, 'hhh'), formatDateTimeInJodaSyntax(date, 'hhh'), formatDateTimeInJodaSyntax(date32, 'hhh'); +with '2018-01-12 22:33:44' as s, toDateTime(s) as datetime, toDateTime64(s, 6) as datetime64, toDate(s) as date, toDate32(s) as date32 SELECT formatDateTimeInJodaSyntax(datetime, 'H'), formatDateTimeInJodaSyntax(datetime64, 'H'), formatDateTimeInJodaSyntax(date, 'H'), formatDateTimeInJodaSyntax(date32, 'H'); +with '2018-01-12 22:33:44' as s, toDateTime(s) as datetime, toDateTime64(s, 6) as datetime64, toDate(s) as date, toDate32(s) as date32 SELECT formatDateTimeInJodaSyntax(datetime, 'HH'), formatDateTimeInJodaSyntax(datetime64, 'HH'), formatDateTimeInJodaSyntax(date, 'HH'), formatDateTimeInJodaSyntax(date32, 'HH'); +with '2018-01-12 22:33:44' as s, toDateTime(s) as datetime, toDateTime64(s, 6) as datetime64, toDate(s) as date, toDate32(s) as date32 SELECT formatDateTimeInJodaSyntax(datetime, 'HHH'), formatDateTimeInJodaSyntax(datetime64, 'HHH'), formatDateTimeInJodaSyntax(date, 'HHH'), formatDateTimeInJodaSyntax(date32, 'HHH'); +with '2018-01-12 22:33:44' as s, toDateTime(s) as datetime, toDateTime64(s, 6) as datetime64, toDate(s) as date, toDate32(s) as date32 SELECT formatDateTimeInJodaSyntax(datetime, 'k'), formatDateTimeInJodaSyntax(datetime64, 'k'), formatDateTimeInJodaSyntax(date, 'k'), formatDateTimeInJodaSyntax(date32, 'k'); +with '2018-01-12 22:33:44' as s, toDateTime(s) as datetime, toDateTime64(s, 6) as datetime64, toDate(s) as date, toDate32(s) as date32 SELECT formatDateTimeInJodaSyntax(datetime, 'kk'), formatDateTimeInJodaSyntax(datetime64, 'kk'), formatDateTimeInJodaSyntax(date, 'kk'), formatDateTimeInJodaSyntax(date32, 'kk'); +with '2018-01-12 22:33:44' as s, toDateTime(s) as datetime, toDateTime64(s, 6) as datetime64, toDate(s) as date, toDate32(s) as date32 SELECT formatDateTimeInJodaSyntax(datetime, 'kkk'), formatDateTimeInJodaSyntax(datetime64, 'kkk'), formatDateTimeInJodaSyntax(date, 'kkk'), formatDateTimeInJodaSyntax(date32, 'kkk'); +with '2018-01-12 22:33:44' as s, toDateTime(s) as datetime, toDateTime64(s, 6) as datetime64, toDate(s) as date, toDate32(s) as date32 SELECT formatDateTimeInJodaSyntax(datetime, 'm'), formatDateTimeInJodaSyntax(datetime64, 'm'), formatDateTimeInJodaSyntax(date, 'm'), formatDateTimeInJodaSyntax(date32, 'm'); +with '2018-01-12 22:33:44' as s, toDateTime(s) as datetime, toDateTime64(s, 6) as datetime64, toDate(s) as date, toDate32(s) as date32 SELECT formatDateTimeInJodaSyntax(datetime, 'mm'), formatDateTimeInJodaSyntax(datetime64, 'mm'), formatDateTimeInJodaSyntax(date, 'mm'), formatDateTimeInJodaSyntax(date32, 'mm'); +with '2018-01-12 22:33:44' as s, toDateTime(s) as datetime, toDateTime64(s, 6) as datetime64, toDate(s) as date, toDate32(s) as date32 SELECT formatDateTimeInJodaSyntax(datetime, 'mmm'), formatDateTimeInJodaSyntax(datetime64, 'mmm'), formatDateTimeInJodaSyntax(date, 'mmm'), formatDateTimeInJodaSyntax(date32, 'mmm'); +with '2018-01-12 22:33:44' as s, toDateTime(s) as datetime, toDateTime64(s, 6) as datetime64, toDate(s) as date, toDate32(s) as date32 SELECT formatDateTimeInJodaSyntax(datetime, 's'), formatDateTimeInJodaSyntax(datetime64, 's'), formatDateTimeInJodaSyntax(date, 's'), formatDateTimeInJodaSyntax(date32, 's'); +with '2018-01-12 22:33:44' as s, toDateTime(s) as datetime, toDateTime64(s, 6) as datetime64, toDate(s) as date, toDate32(s) as date32 SELECT formatDateTimeInJodaSyntax(datetime, 'ss'), formatDateTimeInJodaSyntax(datetime64, 'ss'), formatDateTimeInJodaSyntax(date, 'ss'), formatDateTimeInJodaSyntax(date32, 'ss'); +with '2018-01-12 22:33:44' as s, toDateTime(s) as datetime, toDateTime64(s, 6) as datetime64, toDate(s) as date, toDate32(s) as date32 SELECT formatDateTimeInJodaSyntax(datetime, 'sss'), formatDateTimeInJodaSyntax(datetime64, 'sss'), formatDateTimeInJodaSyntax(date, 'sss'), formatDateTimeInJodaSyntax(date32, 'sss'); + +with '2018-01-12 22:33:44' as s, toDateTime(s, 'UTC') as datetime, toDateTime64(s, 6, 'UTC') as datetime64, toDate(s) as date, toDate32(s) as date32 select formatDateTimeInJodaSyntax(datetime, 'zzzz'), formatDateTimeInJodaSyntax(datetime64, 'zzzz'); + +with '2018-01-12 22:33:44' as s, toDateTime(s) as datetime, toDateTime64(s, 6) as datetime64, toDate(s) as date, toDate32(s) as date32 SELECT formatDateTimeInJodaSyntax(datetime, 'G123DDD'), formatDateTimeInJodaSyntax(datetime64, 'G123DDD'), formatDateTimeInJodaSyntax(date, 'G123DDD'), formatDateTimeInJodaSyntax(date32, 'G123DDD'); +with '2018-01-12 22:33:44' as s, toDateTime(s) as datetime, toDateTime64(s, 6) as datetime64, toDate(s) as date, toDate32(s) as date32 SELECT formatDateTimeInJodaSyntax(datetime, 'G\'\'DDD'), formatDateTimeInJodaSyntax(datetime64, 'G\'\'DDD'), formatDateTimeInJodaSyntax(date, 'G\'\'DDD'), formatDateTimeInJodaSyntax(date32, 'G\'\'DDD'); +with '2018-01-12 22:33:44' as s, toDateTime(s) as datetime, toDateTime64(s, 6) as datetime64, toDate(s) as date, toDate32(s) as date32 SELECT formatDateTimeInJodaSyntax(datetime, 'G\'aaa\'DDD'), formatDateTimeInJodaSyntax(datetime64, 'G\'aaa\'DDD'), formatDateTimeInJodaSyntax(date, 'G\'aaa\'DDD'), formatDateTimeInJodaSyntax(date32, 'G\'aaa\'DDD'); +with '2018-01-12 22:33:44' as s, toDateTime(s) as datetime, toDateTime64(s, 6) as datetime64, toDate(s) as date, toDate32(s) as date32 SELECT formatDateTimeInJodaSyntax(datetime, 'G\'a\'\'aa\'DDD'), formatDateTimeInJodaSyntax(datetime64, 'G\'a\'\'aa\'DDD'), formatDateTimeInJodaSyntax(date, 'G\'a\'\'aa\'DDD'), formatDateTimeInJodaSyntax(date32, 'G\'a\'\'aa\'DDD'); +-- { echoOff } + +SELECT formatDateTimeInJodaSyntax(toDateTime('2018-01-12 22:33:44'), 'x'); -- { serverError 48 } +SELECT formatDateTimeInJodaSyntax(toDateTime('2018-01-12 22:33:44'), 'w'); -- { serverError 48 } +SELECT formatDateTimeInJodaSyntax(toDateTime('2018-01-12 22:33:44'), 'S'); -- { serverError 48 } +SELECT formatDateTimeInJodaSyntax(toDateTime('2018-01-12 22:33:44'), 'z'); -- { serverError 48 } +SELECT formatDateTimeInJodaSyntax(toDateTime('2018-01-12 22:33:44'), 'zz'); -- { serverError 48 } +SELECT formatDateTimeInJodaSyntax(toDateTime('2018-01-12 22:33:44'), 'zzz'); -- { serverError 48 } +SELECT formatDateTimeInJodaSyntax(toDateTime('2018-01-12 22:33:44'), 'Z'); -- { serverError 48 } +SELECT formatDateTimeInJodaSyntax(toDateTime('2018-01-12 22:33:44'), 'b'); -- { serverError 48 } + +SELECT formatDateTimeInJodaSyntax(toDate32('2018-01-12 22:33:44'), 'x'); -- { serverError 48 } +SELECT formatDateTimeInJodaSyntax(toDate32('2018-01-12 22:33:44'), 'w'); -- { serverError 48 } +SELECT formatDateTimeInJodaSyntax(toDate32('2018-01-12 22:33:44'), 'S'); -- { serverError 48 } +SELECT formatDateTimeInJodaSyntax(toDate32('2018-01-12 22:33:44'), 'z'); -- { serverError 48 } +SELECT formatDateTimeInJodaSyntax(toDate32('2018-01-12 22:33:44'), 'zz'); -- { serverError 48 } +SELECT formatDateTimeInJodaSyntax(toDate32('2018-01-12 22:33:44'), 'zzz'); -- { serverError 48 } +SELECT formatDateTimeInJodaSyntax(toDate32('2018-01-12 22:33:44'), 'Z'); -- { serverError 48 } +SELECT formatDateTimeInJodaSyntax(toDate32('2018-01-12 22:33:44'), 'b'); -- { serverError 48 } + +SELECT formatDateTimeInJodaSyntax(toDate32('2018-01-12 22:33:44'), '\'aaaa\'\''); -- { serverError 36 } diff --git a/tests/queries/0_stateless/02496_from_unixtime_in_joda_syntax.reference b/tests/queries/0_stateless/02496_from_unixtime_in_joda_syntax.reference new file mode 100644 index 00000000000..3d98a1c53b0 --- /dev/null +++ b/tests/queries/0_stateless/02496_from_unixtime_in_joda_syntax.reference @@ -0,0 +1,115 @@ +-- { echoOn } +SELECT fromUnixTimestampInJodaSyntax(1669804872, 'G', 'UTC'); +AD +SELECT fromUnixTimestampInJodaSyntax(1669804872, 'GG', 'UTC'); +AD +SELECT fromUnixTimestampInJodaSyntax(1669804872, 'GGG', 'UTC'); +AD +SELECT fromUnixTimestampInJodaSyntax(1669804872, 'GGGG', 'UTC'); +Anno Domini +SELECT fromUnixTimestampInJodaSyntax(1669804872, 'GGGGG', 'UTC'); +Anno Domini +SELECT fromUnixTimestampInJodaSyntax(1669804872, 'C', 'UTC'); +20 +SELECT fromUnixTimestampInJodaSyntax(1669804872, 'CC', 'UTC'); +20 +SELECT fromUnixTimestampInJodaSyntax(1669804872, 'CCC', 'UTC'); +020 +SELECT fromUnixTimestampInJodaSyntax(1669804872, 'Y', 'UTC'); +2022 +SELECT fromUnixTimestampInJodaSyntax(1669804872, 'YY', 'UTC'); +22 +SELECT fromUnixTimestampInJodaSyntax(1669804872, 'YYY', 'UTC'); +2022 +SELECT fromUnixTimestampInJodaSyntax(1669804872, 'YYYY', 'UTC'); +2022 +SELECT fromUnixTimestampInJodaSyntax(1669804872, 'YYYYY', 'UTC'); +02022 +SELECT fromUnixTimestampInJodaSyntax(1669804872, 'e', 'UTC'); +3 +SELECT fromUnixTimestampInJodaSyntax(1669804872, 'ee', 'UTC'); +03 +SELECT fromUnixTimestampInJodaSyntax(1669804872, 'E', 'UTC'); +Wed +SELECT fromUnixTimestampInJodaSyntax(1669804872, 'EE', 'UTC'); +Wed +SELECT fromUnixTimestampInJodaSyntax(1669804872, 'EEE', 'UTC'); +Wed +SELECT fromUnixTimestampInJodaSyntax(1669804872, 'EEEE', 'UTC'); +Wednesday +SELECT fromUnixTimestampInJodaSyntax(1669804872, 'EEEEE', 'UTC'); +Wednesday +SELECT fromUnixTimestampInJodaSyntax(1669804872, 'y', 'UTC'); +2022 +SELECT fromUnixTimestampInJodaSyntax(1669804872, 'yy', 'UTC'); +22 +SELECT fromUnixTimestampInJodaSyntax(1669804872, 'yyy', 'UTC'); +2022 +SELECT fromUnixTimestampInJodaSyntax(1669804872, 'yyyy', 'UTC'); +2022 +SELECT fromUnixTimestampInJodaSyntax(1669804872, 'yyyyy', 'UTC'); +02022 +SELECT fromUnixTimestampInJodaSyntax(1669804872, 'D', 'UTC'); +334 +SELECT fromUnixTimestampInJodaSyntax(1669804872, 'DD', 'UTC'); +334 +SELECT fromUnixTimestampInJodaSyntax(1669804872, 'DDD', 'UTC'); +334 +SELECT fromUnixTimestampInJodaSyntax(1669804872, 'M', 'UTC'); +11 +SELECT fromUnixTimestampInJodaSyntax(1669804872, 'MM', 'UTC'); +11 +SELECT fromUnixTimestampInJodaSyntax(1669804872, 'MMM', 'UTC'); +Nov +SELECT fromUnixTimestampInJodaSyntax(1669804872, 'MMMM', 'UTC'); +November +SELECT fromUnixTimestampInJodaSyntax(1669804872, 'd', 'UTC'); +30 +SELECT fromUnixTimestampInJodaSyntax(1669804872, 'dd', 'UTC'); +30 +SELECT fromUnixTimestampInJodaSyntax(1669804872, 'ddd', 'UTC'); +030 +SELECT fromUnixTimestampInJodaSyntax(1669804872, 'a', 'UTC'); +AM +SELECT fromUnixTimestampInJodaSyntax(1669804872, 'aa', 'UTC'); +AM +SELECT fromUnixTimestampInJodaSyntax(1669804872, 'aaa', 'UTC'); +AM +SELECT fromUnixTimestampInJodaSyntax(1669804872, 'K', 'UTC'); +10 +SELECT fromUnixTimestampInJodaSyntax(1669804872, 'KK', 'UTC'); +10 +SELECT fromUnixTimestampInJodaSyntax(1669804872, 'KKK', 'UTC'); +010 +SELECT fromUnixTimestampInJodaSyntax(1669804872, 'h', 'UTC'); +10 +SELECT fromUnixTimestampInJodaSyntax(1669804872, 'hh', 'UTC'); +10 +SELECT fromUnixTimestampInJodaSyntax(1669804872, 'hhh', 'UTC'); +010 +SELECT fromUnixTimestampInJodaSyntax(1669804872, 'H', 'UTC'); +10 +SELECT fromUnixTimestampInJodaSyntax(1669804872, 'HH', 'UTC'); +10 +SELECT fromUnixTimestampInJodaSyntax(1669804872, 'HHH', 'UTC'); +010 +SELECT fromUnixTimestampInJodaSyntax(1669804872, 'k', 'UTC'); +10 +SELECT fromUnixTimestampInJodaSyntax(1669804872, 'kk', 'UTC'); +10 +SELECT fromUnixTimestampInJodaSyntax(1669804872, 'kkk', 'UTC'); +010 +SELECT fromUnixTimestampInJodaSyntax(1669804872, 'm', 'UTC'); +41 +SELECT fromUnixTimestampInJodaSyntax(1669804872, 'mm', 'UTC'); +41 +SELECT fromUnixTimestampInJodaSyntax(1669804872, 'mmm', 'UTC'); +041 +SELECT fromUnixTimestampInJodaSyntax(1669804872, 's', 'UTC'); +12 +SELECT fromUnixTimestampInJodaSyntax(1669804872, 'ss', 'UTC'); +12 +SELECT fromUnixTimestampInJodaSyntax(1669804872, 'sss', 'UTC'); +012 +SELECT fromUnixTimestampInJodaSyntax(1669804872, 'zzzz', 'UTC'); +UTC diff --git a/tests/queries/0_stateless/02496_from_unixtime_in_joda_syntax.sql b/tests/queries/0_stateless/02496_from_unixtime_in_joda_syntax.sql new file mode 100644 index 00000000000..b84338cc195 --- /dev/null +++ b/tests/queries/0_stateless/02496_from_unixtime_in_joda_syntax.sql @@ -0,0 +1,59 @@ +-- { echoOn } +SELECT fromUnixTimestampInJodaSyntax(1669804872, 'G', 'UTC'); +SELECT fromUnixTimestampInJodaSyntax(1669804872, 'GG', 'UTC'); +SELECT fromUnixTimestampInJodaSyntax(1669804872, 'GGG', 'UTC'); +SELECT fromUnixTimestampInJodaSyntax(1669804872, 'GGGG', 'UTC'); +SELECT fromUnixTimestampInJodaSyntax(1669804872, 'GGGGG', 'UTC'); +SELECT fromUnixTimestampInJodaSyntax(1669804872, 'C', 'UTC'); +SELECT fromUnixTimestampInJodaSyntax(1669804872, 'CC', 'UTC'); +SELECT fromUnixTimestampInJodaSyntax(1669804872, 'CCC', 'UTC'); +SELECT fromUnixTimestampInJodaSyntax(1669804872, 'Y', 'UTC'); +SELECT fromUnixTimestampInJodaSyntax(1669804872, 'YY', 'UTC'); +SELECT fromUnixTimestampInJodaSyntax(1669804872, 'YYY', 'UTC'); +SELECT fromUnixTimestampInJodaSyntax(1669804872, 'YYYY', 'UTC'); +SELECT fromUnixTimestampInJodaSyntax(1669804872, 'YYYYY', 'UTC'); +SELECT fromUnixTimestampInJodaSyntax(1669804872, 'e', 'UTC'); +SELECT fromUnixTimestampInJodaSyntax(1669804872, 'ee', 'UTC'); +SELECT fromUnixTimestampInJodaSyntax(1669804872, 'E', 'UTC'); +SELECT fromUnixTimestampInJodaSyntax(1669804872, 'EE', 'UTC'); +SELECT fromUnixTimestampInJodaSyntax(1669804872, 'EEE', 'UTC'); +SELECT fromUnixTimestampInJodaSyntax(1669804872, 'EEEE', 'UTC'); +SELECT fromUnixTimestampInJodaSyntax(1669804872, 'EEEEE', 'UTC'); +SELECT fromUnixTimestampInJodaSyntax(1669804872, 'y', 'UTC'); +SELECT fromUnixTimestampInJodaSyntax(1669804872, 'yy', 'UTC'); +SELECT fromUnixTimestampInJodaSyntax(1669804872, 'yyy', 'UTC'); +SELECT fromUnixTimestampInJodaSyntax(1669804872, 'yyyy', 'UTC'); +SELECT fromUnixTimestampInJodaSyntax(1669804872, 'yyyyy', 'UTC'); +SELECT fromUnixTimestampInJodaSyntax(1669804872, 'D', 'UTC'); +SELECT fromUnixTimestampInJodaSyntax(1669804872, 'DD', 'UTC'); +SELECT fromUnixTimestampInJodaSyntax(1669804872, 'DDD', 'UTC'); +SELECT fromUnixTimestampInJodaSyntax(1669804872, 'M', 'UTC'); +SELECT fromUnixTimestampInJodaSyntax(1669804872, 'MM', 'UTC'); +SELECT fromUnixTimestampInJodaSyntax(1669804872, 'MMM', 'UTC'); +SELECT fromUnixTimestampInJodaSyntax(1669804872, 'MMMM', 'UTC'); +SELECT fromUnixTimestampInJodaSyntax(1669804872, 'd', 'UTC'); +SELECT fromUnixTimestampInJodaSyntax(1669804872, 'dd', 'UTC'); +SELECT fromUnixTimestampInJodaSyntax(1669804872, 'ddd', 'UTC'); +SELECT fromUnixTimestampInJodaSyntax(1669804872, 'a', 'UTC'); +SELECT fromUnixTimestampInJodaSyntax(1669804872, 'aa', 'UTC'); +SELECT fromUnixTimestampInJodaSyntax(1669804872, 'aaa', 'UTC'); +SELECT fromUnixTimestampInJodaSyntax(1669804872, 'K', 'UTC'); +SELECT fromUnixTimestampInJodaSyntax(1669804872, 'KK', 'UTC'); +SELECT fromUnixTimestampInJodaSyntax(1669804872, 'KKK', 'UTC'); +SELECT fromUnixTimestampInJodaSyntax(1669804872, 'h', 'UTC'); +SELECT fromUnixTimestampInJodaSyntax(1669804872, 'hh', 'UTC'); +SELECT fromUnixTimestampInJodaSyntax(1669804872, 'hhh', 'UTC'); +SELECT fromUnixTimestampInJodaSyntax(1669804872, 'H', 'UTC'); +SELECT fromUnixTimestampInJodaSyntax(1669804872, 'HH', 'UTC'); +SELECT fromUnixTimestampInJodaSyntax(1669804872, 'HHH', 'UTC'); +SELECT fromUnixTimestampInJodaSyntax(1669804872, 'k', 'UTC'); +SELECT fromUnixTimestampInJodaSyntax(1669804872, 'kk', 'UTC'); +SELECT fromUnixTimestampInJodaSyntax(1669804872, 'kkk', 'UTC'); +SELECT fromUnixTimestampInJodaSyntax(1669804872, 'm', 'UTC'); +SELECT fromUnixTimestampInJodaSyntax(1669804872, 'mm', 'UTC'); +SELECT fromUnixTimestampInJodaSyntax(1669804872, 'mmm', 'UTC'); +SELECT fromUnixTimestampInJodaSyntax(1669804872, 's', 'UTC'); +SELECT fromUnixTimestampInJodaSyntax(1669804872, 'ss', 'UTC'); +SELECT fromUnixTimestampInJodaSyntax(1669804872, 'sss', 'UTC'); +SELECT fromUnixTimestampInJodaSyntax(1669804872, 'zzzz', 'UTC'); +-- { echoOff } diff --git a/tests/queries/0_stateless/02497_if_transform_strings_to_enum.reference b/tests/queries/0_stateless/02497_if_transform_strings_to_enum.reference index 06863f1858b..c6265e195c4 100644 --- a/tests/queries/0_stateless/02497_if_transform_strings_to_enum.reference +++ b/tests/queries/0_stateless/02497_if_transform_strings_to_enum.reference @@ -19,7 +19,7 @@ QUERY id: 0 FUNCTION id: 2, function_name: toString, function_type: ordinary, result_type: String ARGUMENTS LIST id: 3, nodes: 1 - FUNCTION id: 4, function_name: transform, function_type: ordinary, result_type: String + FUNCTION id: 4, function_name: transform, function_type: ordinary, result_type: Enum8(\'censor.net\' = 1, \'google\' = 2, \'other\' = 3, \'yahoo\' = 4) ARGUMENTS LIST id: 5, nodes: 4 COLUMN id: 6, column_name: number, result_type: UInt64, source_id: 7 @@ -59,7 +59,7 @@ QUERY id: 0 FUNCTION id: 2, function_name: toString, function_type: ordinary, result_type: String ARGUMENTS LIST id: 3, nodes: 1 - FUNCTION id: 4, function_name: if, function_type: ordinary, result_type: String + FUNCTION id: 4, function_name: if, function_type: ordinary, result_type: Enum8(\'censor.net\' = 1, \'google\' = 2) ARGUMENTS LIST id: 5, nodes: 3 FUNCTION id: 6, function_name: greater, function_type: ordinary, result_type: UInt8 @@ -105,7 +105,7 @@ QUERY id: 0 FUNCTION id: 4, function_name: toString, function_type: ordinary, result_type: String ARGUMENTS LIST id: 5, nodes: 1 - FUNCTION id: 6, function_name: transform, function_type: ordinary, result_type: String + FUNCTION id: 6, function_name: transform, function_type: ordinary, result_type: Enum8(\'censor.net\' = 1, \'google\' = 2, \'other\' = 3, \'yahoo\' = 4) ARGUMENTS LIST id: 7, nodes: 4 COLUMN id: 8, column_name: number, result_type: UInt64, source_id: 9 @@ -149,7 +149,7 @@ QUERY id: 0 FUNCTION id: 4, function_name: toString, function_type: ordinary, result_type: String ARGUMENTS LIST id: 5, nodes: 1 - FUNCTION id: 6, function_name: if, function_type: ordinary, result_type: String + FUNCTION id: 6, function_name: if, function_type: ordinary, result_type: Enum8(\'censor.net\' = 1, \'google\' = 2) ARGUMENTS LIST id: 7, nodes: 3 FUNCTION id: 8, function_name: greater, function_type: ordinary, result_type: UInt8 @@ -204,7 +204,7 @@ QUERY id: 0 FUNCTION id: 5, function_name: toString, function_type: ordinary, result_type: String ARGUMENTS LIST id: 6, nodes: 1 - FUNCTION id: 7, function_name: if, function_type: ordinary, result_type: String + FUNCTION id: 7, function_name: if, function_type: ordinary, result_type: Enum8(\'censor.net\' = 1, \'google\' = 2) ARGUMENTS LIST id: 8, nodes: 3 FUNCTION id: 9, function_name: greater, function_type: ordinary, result_type: UInt8 @@ -258,7 +258,7 @@ QUERY id: 0 FUNCTION id: 5, function_name: toString, function_type: ordinary, result_type: String ARGUMENTS LIST id: 6, nodes: 1 - FUNCTION id: 7, function_name: transform, function_type: ordinary, result_type: String + FUNCTION id: 7, function_name: transform, function_type: ordinary, result_type: Enum8(\'censor.net\' = 1, \'google\' = 2, \'other\' = 3, \'yahoo\' = 4) ARGUMENTS LIST id: 8, nodes: 4 COLUMN id: 9, column_name: number, result_type: UInt64, source_id: 10 @@ -301,7 +301,7 @@ QUERY id: 0 FUNCTION id: 2, function_name: toString, function_type: ordinary, result_type: String ARGUMENTS LIST id: 3, nodes: 1 - FUNCTION id: 4, function_name: if, function_type: ordinary, result_type: String + FUNCTION id: 4, function_name: if, function_type: ordinary, result_type: Enum8(\'censor.net\' = 1, \'google\' = 2) ARGUMENTS LIST id: 5, nodes: 3 FUNCTION id: 6, function_name: greater, function_type: ordinary, result_type: UInt8 @@ -322,7 +322,7 @@ QUERY id: 0 FUNCTION id: 2, function_name: toString, function_type: ordinary, result_type: String ARGUMENTS LIST id: 3, nodes: 1 - FUNCTION id: 4, function_name: if, function_type: ordinary, result_type: String + FUNCTION id: 4, function_name: if, function_type: ordinary, result_type: Enum8(\'censor.net\' = 1, \'google\' = 2) ARGUMENTS LIST id: 5, nodes: 3 FUNCTION id: 6, function_name: greater, function_type: ordinary, result_type: UInt8 @@ -368,7 +368,7 @@ QUERY id: 0 FUNCTION id: 2, function_name: toString, function_type: ordinary, result_type: String ARGUMENTS LIST id: 3, nodes: 1 - FUNCTION id: 4, function_name: transform, function_type: ordinary, result_type: String + FUNCTION id: 4, function_name: transform, function_type: ordinary, result_type: Enum8(\'censor.net\' = 1, \'google\' = 2, \'other\' = 3, \'yahoo\' = 4) ARGUMENTS LIST id: 5, nodes: 4 COLUMN id: 6, column_name: number, result_type: UInt64, source_id: 7 @@ -386,7 +386,7 @@ QUERY id: 0 FUNCTION id: 2, function_name: toString, function_type: ordinary, result_type: String ARGUMENTS LIST id: 3, nodes: 1 - FUNCTION id: 4, function_name: transform, function_type: ordinary, result_type: String + FUNCTION id: 4, function_name: transform, function_type: ordinary, result_type: Enum8(\'censor.net\' = 1, \'google\' = 2, \'other\' = 3, \'yahoo\' = 4) ARGUMENTS LIST id: 5, nodes: 4 COLUMN id: 6, column_name: number, result_type: UInt64, source_id: 7 diff --git a/tests/queries/0_stateless/02498_storage_join_key_positions.sql.j2 b/tests/queries/0_stateless/02498_storage_join_key_positions.sql.j2 index 697f37fd535..e2dad61a93e 100644 --- a/tests/queries/0_stateless/02498_storage_join_key_positions.sql.j2 +++ b/tests/queries/0_stateless/02498_storage_join_key_positions.sql.j2 @@ -33,24 +33,34 @@ SELECT * FROM (SELECT key3 AS c, key1 AS a, key2 AS b FROM t1) AS t1 ALL INNER J SELECT * FROM (SELECT key3 AS c, key1 AS a, key2 AS b FROM t1) AS t1 ALL INNER JOIN tj ON t1.a = tj.key1 AND t1.b = tj.key2 AND t1.c = tj.key3 ORDER BY t1.a; SELECT * FROM (SELECT key3 AS c, key1 AS a, key2 AS b FROM t1) AS t1 ALL INNER JOIN tj ON t1.c = tj.key3 AND t1.a = tj.key1 AND t1.b = tj.key2 ORDER BY t1.a; --- TODO (vdimir): uncomment after https://github.com/ClickHouse/ClickHouse/pull/44016 --- SELECT * FROM t1 ALL INNER JOIN tj ON t1.key1 = tj.key1 AND t1.key3 = tj.key3 AND t1.key2 = tj.key2 AND 1; -- { serverError INCOMPATIBLE_TYPE_OF_JOIN } --- SELECT * FROM t1 ALL INNER JOIN tj ON t1.key1 = tj.key1 AND t1.key3 = tj.key3 AND t1.key2 = tj.key2 AND 0; -- { serverError INCOMPATIBLE_TYPE_OF_JOIN } --- SELECT * FROM t1 ALL INNER JOIN tj ON t1.key1 = tj.key1 AND t1.key3 = tj.key3 AND t1.key2 = tj.key2 AND 1 == 1; -- { serverError INCOMPATIBLE_TYPE_OF_JOIN } --- SELECT * FROM t1 ALL INNER JOIN tj ON t1.key1 = tj.key1 AND t1.key3 = tj.key3 AND t1.key2 = tj.key2 AND 1 > 1; -- { serverError INCOMPATIBLE_TYPE_OF_JOIN } +{% set expected_error = 'INCOMPATIBLE_TYPE_OF_JOIN' if use_analyzer else 'INVALID_JOIN_ON_EXPRESSION' %} + +SELECT * FROM t1 ALL INNER JOIN tj ON t1.key1 = tj.key1 AND t1.key3 = tj.key3 AND t1.key2 = tj.key2 AND 1; -- { serverError {{ expected_error }} } +SELECT * FROM t1 ALL INNER JOIN tj ON t1.key1 = tj.key1 AND t1.key3 = tj.key3 AND t1.key2 = tj.key2 AND 0; -- { serverError {{ expected_error }} } +SELECT * FROM t1 ALL INNER JOIN tj ON t1.key1 = tj.key1 AND t1.key3 = tj.key3 AND t1.key2 = tj.key2 AND 1 > 1; -- { serverError {{ expected_error }} } SELECT '--- incompatible ---'; -SELECT * FROM t1 ALL INNER JOIN tj ON 1; -- { serverError INCOMPATIBLE_TYPE_OF_JOIN } -SELECT * FROM t1 ALL INNER JOIN tj ON 0; -- { serverError INCOMPATIBLE_TYPE_OF_JOIN } -SELECT * FROM t1 ALL INNER JOIN tj ON NULL; -- { serverError INCOMPATIBLE_TYPE_OF_JOIN } -SELECT * FROM t1 ALL INNER JOIN tj ON 1 == 1; -- { serverError INCOMPATIBLE_TYPE_OF_JOIN } -SELECT * FROM t1 ALL INNER JOIN tj ON 1 != 1; -- { serverError INCOMPATIBLE_TYPE_OF_JOIN } +SELECT * FROM t1 ALL INNER JOIN tj ON 1; -- { serverError {{ expected_error }} } +SELECT * FROM t1 ALL INNER JOIN tj ON 0; -- { serverError {{ expected_error }} } +SELECT * FROM t1 ALL INNER JOIN tj ON NULL; -- { serverError {{ expected_error }} } +SELECT * FROM t1 ALL INNER JOIN tj ON 1 != 1; -- { serverError {{ expected_error }} } + +{% set expected_error = 'INCOMPATIBLE_TYPE_OF_JOIN' if use_analyzer else 'AMBIGUOUS_COLUMN_NAME' %} + +-- Here is another error code because equality is handled differently in CollectJoinOnKeysVisitor. +-- We can change the error code, but it will become inconsistent for other cases +-- where we actually expect AMBIGUOUS_COLUMN_NAME instead of INVALID_JOIN_ON_EXPRESSION/INCOMPATIBLE_TYPE_OF_JOIN. +-- These checks are more reliable after switching to a new analyzer, they return INCOMPATIBLE_TYPE_OF_JOIN consistent with cases above +SELECT * FROM t1 ALL INNER JOIN tj ON t1.key1 = tj.key1 AND t1.key3 = tj.key3 AND t1.key2 = tj.key2 AND 1 == 1; -- { serverError {{ expected_error }} } +SELECT * FROM t1 ALL INNER JOIN tj ON 1 == 1; -- { serverError {{ expected_error }} } +SELECT * FROM t1 ALL INNER JOIN tj ON 1 == 2; -- { serverError {{ expected_error }} } + +{% set expected_error = 'UNKNOWN_IDENTIFIER' if use_analyzer else 'INCOMPATIBLE_TYPE_OF_JOIN' %} + +SELECT * FROM t1 ALL INNER JOIN tj USING (key1, key2, attr); -- { serverError {{ expected_error }} } +SELECT * FROM t1 ALL INNER JOIN tj USING (key1, key2, key3, attr); -- { serverError {{ expected_error }} } SELECT * FROM t1 ALL INNER JOIN tj USING (key2, key3); -- { serverError INCOMPATIBLE_TYPE_OF_JOIN } -SELECT * FROM t1 ALL INNER JOIN tj USING (key1, key2, attr) SETTINGS allow_experimental_analyzer = 0; -- { serverError INCOMPATIBLE_TYPE_OF_JOIN } -SELECT * FROM t1 ALL INNER JOIN tj USING (key1, key2, attr) SETTINGS allow_experimental_analyzer = 1; -- { serverError UNKNOWN_IDENTIFIER } -SELECT * FROM t1 ALL INNER JOIN tj USING (key1, key2, key3, attr) SETTINGS allow_experimental_analyzer = 0; -- { serverError INCOMPATIBLE_TYPE_OF_JOIN } -SELECT * FROM t1 ALL INNER JOIN tj USING (key1, key2, key3, attr) SETTINGS allow_experimental_analyzer = 1; -- { serverError UNKNOWN_IDENTIFIER } SELECT * FROM t1 ALL INNER JOIN tj ON t1.key1 = tj.attr; -- { serverError INCOMPATIBLE_TYPE_OF_JOIN } SELECT * FROM t1 ALL INNER JOIN tj ON t1.key1 = tj.key1; -- { serverError INCOMPATIBLE_TYPE_OF_JOIN } SELECT * FROM t1 ALL INNER JOIN tj ON t1.key1 = tj.key1 AND t1.key2 = tj.key2 AND t1.key3 = tj.attr; -- { serverError INCOMPATIBLE_TYPE_OF_JOIN } diff --git a/tests/queries/0_stateless/02499_monotonicity_toUnixTimestamp64.reference b/tests/queries/0_stateless/02499_monotonicity_toUnixTimestamp64.reference new file mode 100644 index 00000000000..d00491fd7e5 --- /dev/null +++ b/tests/queries/0_stateless/02499_monotonicity_toUnixTimestamp64.reference @@ -0,0 +1 @@ +1 diff --git a/tests/queries/0_stateless/02499_monotonicity_toUnixTimestamp64.sh b/tests/queries/0_stateless/02499_monotonicity_toUnixTimestamp64.sh new file mode 100755 index 00000000000..d3cbc6ec861 --- /dev/null +++ b/tests/queries/0_stateless/02499_monotonicity_toUnixTimestamp64.sh @@ -0,0 +1,21 @@ +#!/usr/bin/env bash + +# shellcheck disable=SC2154 + +unset CLICKHOUSE_LOG_COMMENT + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + + +$CLICKHOUSE_CLIENT -q "create table t(ts DateTime64) engine=MergeTree order by ts as select * from numbers_mt(1e6);" + +max_block_size=8192 + +query_id="${CLICKHOUSE_DATABASE}_02499_$RANDOM$RANDOM" +$CLICKHOUSE_CLIENT --query_id="$query_id" -q "select ts from t order by toUnixTimestamp64Nano(ts) limit 10 format Null settings max_block_size = $max_block_size, optimize_read_in_order = 1;" + +$CLICKHOUSE_CLIENT -q "system flush logs;" +$CLICKHOUSE_CLIENT --param_query_id="$query_id" -q "select read_rows <= $max_block_size from system.query_log where event_date >= yesterday() and query_id = {query_id:String} and type = 'QueryFinish';" + diff --git a/tests/queries/0_stateless/02499_read_json_objects_as_strings.reference b/tests/queries/0_stateless/02499_read_json_objects_as_strings.reference index 4042c1f4389..a78bd4be3e8 100644 --- a/tests/queries/0_stateless/02499_read_json_objects_as_strings.reference +++ b/tests/queries/0_stateless/02499_read_json_objects_as_strings.reference @@ -1,3 +1,6 @@ x Nullable(String) abc {"a" : 10, "b" : "abc"} +x Nullable(String) +{"a" : "b"} +{"a" : 1, "b" : [1,2,3]} diff --git a/tests/queries/0_stateless/02499_read_json_objects_as_strings.sql b/tests/queries/0_stateless/02499_read_json_objects_as_strings.sql index bb4c9e9da0f..12d709bdde1 100644 --- a/tests/queries/0_stateless/02499_read_json_objects_as_strings.sql +++ b/tests/queries/0_stateless/02499_read_json_objects_as_strings.sql @@ -2,3 +2,5 @@ set input_format_json_read_objects_as_strings=1; desc format(JSONEachRow, '{"x" : "abc"}, {"x" : {"a" : 10, "b" : "abc"}}'); select * from format(JSONEachRow, '{"x" : "abc"}, {"x" : {"a" : 10, "b" : "abc"}}'); +desc format(JSONEachRow, '{"x" : {"a" : "b"}}, {"x" : {"a" : 1, "b" : [1,2,3]}}'); +select * from format(JSONEachRow, '{"x" : {"a" : "b"}}, {"x" : {"a" : 1, "b" : [1,2,3]}}'); diff --git a/tests/queries/0_stateless/02501_limits_on_result_for_view.reference b/tests/queries/0_stateless/02501_limits_on_result_for_view.reference new file mode 100644 index 00000000000..0691f67b202 --- /dev/null +++ b/tests/queries/0_stateless/02501_limits_on_result_for_view.reference @@ -0,0 +1 @@ +52 diff --git a/tests/queries/0_stateless/02501_limits_on_result_for_view.sql b/tests/queries/0_stateless/02501_limits_on_result_for_view.sql new file mode 100644 index 00000000000..17e6024d973 --- /dev/null +++ b/tests/queries/0_stateless/02501_limits_on_result_for_view.sql @@ -0,0 +1,25 @@ +DROP TABLE IF EXISTS 02501_test; +DROP TABLE IF EXISTS 02501_dist; +DROP VIEW IF EXISTS 02501_view; + + +-- create local table +CREATE TABLE 02501_test(`a` UInt64) ENGINE = Memory; + +-- create dist table +CREATE TABLE 02501_dist(`a` UInt64) ENGINE = Distributed(test_cluster_two_shards, currentDatabase(), 02501_test); + +-- create view +CREATE VIEW 02501_view(`a` UInt64) AS SELECT a FROM 02501_dist; + +-- insert data +insert into 02501_test values(5),(6),(7),(8); + +-- test +SELECT * from 02501_view settings max_result_rows = 1; -- { serverError 396 } +SELECT sum(a) from 02501_view settings max_result_rows = 1; + + +DROP TABLE IF EXISTS 02501_test; +DROP TABLE IF EXISTS 02501_dist; +DROP VIEW IF EXISTS 02501_view; \ No newline at end of file diff --git a/tests/queries/0_stateless/02503_bad_compatibility_setting.reference b/tests/queries/0_stateless/02503_bad_compatibility_setting.reference new file mode 100644 index 00000000000..5b7d2a449a0 --- /dev/null +++ b/tests/queries/0_stateless/02503_bad_compatibility_setting.reference @@ -0,0 +1 @@ + 0 diff --git a/tests/queries/0_stateless/02503_bad_compatibility_setting.sql b/tests/queries/0_stateless/02503_bad_compatibility_setting.sql new file mode 100644 index 00000000000..178c6a87531 --- /dev/null +++ b/tests/queries/0_stateless/02503_bad_compatibility_setting.sql @@ -0,0 +1,3 @@ +set compatibility='a.a'; -- { serverError BAD_ARGUMENTS } +select value, changed from system.settings where name = 'compatibility' + diff --git a/tests/queries/0_stateless/02503_insert_storage_snapshot.reference b/tests/queries/0_stateless/02503_insert_storage_snapshot.reference index 4e07416f18a..9f9dfd29dc6 100644 --- a/tests/queries/0_stateless/02503_insert_storage_snapshot.reference +++ b/tests/queries/0_stateless/02503_insert_storage_snapshot.reference @@ -1 +1 @@ -all_1_1_0 1 1 +"all_1_1_0",1,1 diff --git a/tests/queries/0_stateless/02503_insert_storage_snapshot.sh b/tests/queries/0_stateless/02503_insert_storage_snapshot.sh index af2952839df..b494adeb785 100755 --- a/tests/queries/0_stateless/02503_insert_storage_snapshot.sh +++ b/tests/queries/0_stateless/02503_insert_storage_snapshot.sh @@ -1,4 +1,5 @@ #!/usr/bin/env bash +# Tags: no-parallel set -e @@ -13,7 +14,21 @@ $CLICKHOUSE_CLIENT -q "INSERT INTO t_insert_storage_snapshot VALUES (1)" query_id="$CLICKHOUSE_DATABASE-$RANDOM" $CLICKHOUSE_CLIENT --query_id $query_id -q "INSERT INTO t_insert_storage_snapshot SELECT sleep(1) FROM numbers(1000) SETTINGS max_block_size = 1" 2>/dev/null & -$CLICKHOUSE_CLIENT -q "SELECT name, active, refcount FROM system.parts WHERE database = '$CLICKHOUSE_DATABASE' AND table = 't_insert_storage_snapshot'" +counter=0 retries=60 + +# There can be different background processes which can hold the references to parts +# for a short period of time. To avoid flakyness we check that refcount became 1 at least once during long INSERT query. +# It proves that the INSERT query doesn't hold redundant references to parts. +while [[ $counter -lt $retries ]]; do + query_result=$($CLICKHOUSE_CLIENT -q "SELECT name, active, refcount FROM system.parts WHERE database = '$CLICKHOUSE_DATABASE' AND table = 't_insert_storage_snapshot' FORMAT CSV") + if [ "$query_result" == '"all_1_1_0",1,1' ]; then + echo "$query_result" + break; + fi + sleep 0.1 + ((++counter)) +done + $CLICKHOUSE_CLIENT -q "KILL QUERY WHERE query_id = '$query_id' SYNC" >/dev/null wait diff --git a/tests/queries/0_stateless/02503_join_switch_alias_fuzz.reference b/tests/queries/0_stateless/02503_join_switch_alias_fuzz.reference new file mode 100644 index 00000000000..af591cd7818 --- /dev/null +++ b/tests/queries/0_stateless/02503_join_switch_alias_fuzz.reference @@ -0,0 +1 @@ +1 \N 1 \N diff --git a/tests/queries/0_stateless/02503_join_switch_alias_fuzz.sql b/tests/queries/0_stateless/02503_join_switch_alias_fuzz.sql new file mode 100644 index 00000000000..28d64bf3881 --- /dev/null +++ b/tests/queries/0_stateless/02503_join_switch_alias_fuzz.sql @@ -0,0 +1,4 @@ +SELECT * FROM (SELECT 1 AS id, '' AS test) AS a +LEFT JOIN (SELECT test, 1 AS id, NULL AS test) AS b ON b.id = a.id +SETTINGS join_algorithm = 'auto', max_rows_in_join = 1, allow_experimental_analyzer = 1 +; diff --git a/tests/queries/0_stateless/02503_mysql_compat_utc_timestamp.reference b/tests/queries/0_stateless/02503_mysql_compat_utc_timestamp.reference new file mode 100644 index 00000000000..573541ac970 --- /dev/null +++ b/tests/queries/0_stateless/02503_mysql_compat_utc_timestamp.reference @@ -0,0 +1 @@ +0 diff --git a/tests/queries/0_stateless/02503_mysql_compat_utc_timestamp.sql b/tests/queries/0_stateless/02503_mysql_compat_utc_timestamp.sql new file mode 100644 index 00000000000..d6716f272c6 --- /dev/null +++ b/tests/queries/0_stateless/02503_mysql_compat_utc_timestamp.sql @@ -0,0 +1,2 @@ +-- PowerBI is doing this query. It should work at least somehow, not necessarily in the same way as in MySQL. +SELECT TIMEDIFF(NOW(), UTC_TIMESTAMP()) DIV 600; diff --git a/tests/queries/0_stateless/02504_bar_fractions.reference b/tests/queries/0_stateless/02504_bar_fractions.reference new file mode 100644 index 00000000000..2a7b46069df --- /dev/null +++ b/tests/queries/0_stateless/02504_bar_fractions.reference @@ -0,0 +1,20 @@ +0 +0.125 ▏ E2968F +0.25 ▎ ▏ E2968E E2968F +0.375 ▍ ▎ E2968D E2968E +0.5 ▌ ▍ E2968C E2968D +0.625 ▋ ▌ E2968B E2968C +0.75 ▊ ▋ E2968A E2968B +0.875 ▉ ▊ E29689 E2968A +1 █ ▉ E29688 E29689 +1.125 █▏ █ E29688E2968F E29688 +1.25 █▎ █▏ E29688E2968E E29688E2968F +1.375 █▍ █▎ E29688E2968D E29688E2968E +1.5 █▌ █▍ E29688E2968C E29688E2968D +1.625 █▋ █▌ E29688E2968B E29688E2968C +1.75 █▊ █▋ E29688E2968A E29688E2968B +1.875 █▉ █▊ E29688E29689 E29688E2968A +2 ██ █▉ E29688E29688 E29688E29689 +2.125 ██▏ ██ E29688E29688E2968F E29688E29688 +2.25 ██▎ ██▏ E29688E29688E2968E E29688E29688E2968F +2.375 ██▍ ██▎ E29688E29688E2968D E29688E29688E2968E diff --git a/tests/queries/0_stateless/02504_bar_fractions.sql b/tests/queries/0_stateless/02504_bar_fractions.sql new file mode 100644 index 00000000000..d182bced55e --- /dev/null +++ b/tests/queries/0_stateless/02504_bar_fractions.sql @@ -0,0 +1,7 @@ +SELECT + number / 8 AS width, + bar(width, 0, 3, 3) AS b, + bar(width - 0.001, 0, 3, 3) AS `b_minus`, + hex(b), + hex(b_minus) +FROM numbers(20); diff --git a/tests/queries/0_stateless/02504_explain_ast_insert.reference b/tests/queries/0_stateless/02504_explain_ast_insert.reference new file mode 100644 index 00000000000..1c149a0f2f4 --- /dev/null +++ b/tests/queries/0_stateless/02504_explain_ast_insert.reference @@ -0,0 +1,4 @@ +InsertQuery (children 1) + Identifier test +InsertQuery (children 1) + Identifier test diff --git a/tests/queries/0_stateless/02504_explain_ast_insert.sql b/tests/queries/0_stateless/02504_explain_ast_insert.sql new file mode 100644 index 00000000000..fc50feebaa4 --- /dev/null +++ b/tests/queries/0_stateless/02504_explain_ast_insert.sql @@ -0,0 +1,2 @@ +explain ast insert into test values balabala; +explain ast insert into test format TabSeparated balabala; \ No newline at end of file diff --git a/tests/queries/0_stateless/02504_parse_datetime_best_effort_calebeaires.reference b/tests/queries/0_stateless/02504_parse_datetime_best_effort_calebeaires.reference new file mode 100644 index 00000000000..f0fc06bc742 --- /dev/null +++ b/tests/queries/0_stateless/02504_parse_datetime_best_effort_calebeaires.reference @@ -0,0 +1 @@ +2148 1969-01-01 2105 2105 1969-01-01 10:42:00.000 diff --git a/tests/queries/0_stateless/02504_parse_datetime_best_effort_calebeaires.sql b/tests/queries/0_stateless/02504_parse_datetime_best_effort_calebeaires.sql new file mode 100644 index 00000000000..e551ec51524 --- /dev/null +++ b/tests/queries/0_stateless/02504_parse_datetime_best_effort_calebeaires.sql @@ -0,0 +1,5 @@ +CREATE TEMPORARY TABLE my_table (col_date Date, col_date32 Date32, col_datetime DateTime('UTC'), col_datetime32 DateTime32('UTC'), col_datetime64 DateTime64); +insert into `my_table` (`col_date`, `col_date32`, `col_datetime`, `col_datetime32`, `col_datetime64`) values (parseDateTime64BestEffort('1969-01-01'), '1969-01-01', parseDateTime64BestEffort('1969-01-01 10:42:00'), parseDateTime64BestEffort('1969-01-01 10:42:00'), parseDateTime64BestEffort('1969-01-01 10:42:00')); + +-- The values for Date32 and DateTime64 will be year 1969, while the values of Date, DateTime will contain a value affected by implementation-defined overflow and can be arbitrary. +SELECT toYear(col_date), col_date32, toYear(col_datetime), toYear(col_datetime32), col_datetime64 FROM my_table; diff --git a/tests/queries/0_stateless/02505_forbid_paths_in_datetime_timezone.reference b/tests/queries/0_stateless/02505_forbid_paths_in_datetime_timezone.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02505_forbid_paths_in_datetime_timezone.sql b/tests/queries/0_stateless/02505_forbid_paths_in_datetime_timezone.sql new file mode 100644 index 00000000000..b08abcb8a19 --- /dev/null +++ b/tests/queries/0_stateless/02505_forbid_paths_in_datetime_timezone.sql @@ -0,0 +1,6 @@ +select toDateTime(0, '/abc'); -- { serverError POCO_EXCEPTION } +select toDateTime(0, './abc'); -- { serverError POCO_EXCEPTION } +select toDateTime(0, '../abc'); -- { serverError POCO_EXCEPTION } +select toDateTime(0, '~/abc'); -- { serverError POCO_EXCEPTION } +select toDateTime(0, 'abc/../../cba'); -- { serverError POCO_EXCEPTION } + diff --git a/tests/queries/0_stateless/02506_date_time64_floating_point_negative_value.reference b/tests/queries/0_stateless/02506_date_time64_floating_point_negative_value.reference new file mode 100644 index 00000000000..b5d0547dc4a --- /dev/null +++ b/tests/queries/0_stateless/02506_date_time64_floating_point_negative_value.reference @@ -0,0 +1,4 @@ +-3600001 +-1 +1970-01-01 00:59:59.999 +1969-12-31 23:59:59.999 diff --git a/tests/queries/0_stateless/02506_date_time64_floating_point_negative_value.sql b/tests/queries/0_stateless/02506_date_time64_floating_point_negative_value.sql new file mode 100644 index 00000000000..dd663c7806e --- /dev/null +++ b/tests/queries/0_stateless/02506_date_time64_floating_point_negative_value.sql @@ -0,0 +1,4 @@ +select toUnixTimestamp64Milli(toDateTime64('1969-12-31 23:59:59.999', 3, 'Europe/Amsterdam')); +select toUnixTimestamp64Milli(toDateTime64('1969-12-31 23:59:59.999', 3, 'UTC')); +select fromUnixTimestamp64Milli(toInt64(-1), 'Europe/Amsterdam'); +select fromUnixTimestamp64Milli(toInt64(-1), 'UTC'); diff --git a/tests/queries/0_stateless/02507_to_unix_timestamp_overflow.reference b/tests/queries/0_stateless/02507_to_unix_timestamp_overflow.reference new file mode 100644 index 00000000000..f6e8cd50296 --- /dev/null +++ b/tests/queries/0_stateless/02507_to_unix_timestamp_overflow.reference @@ -0,0 +1 @@ +-1293882467 diff --git a/tests/queries/0_stateless/02507_to_unix_timestamp_overflow.sql b/tests/queries/0_stateless/02507_to_unix_timestamp_overflow.sql new file mode 100644 index 00000000000..42479f6dbec --- /dev/null +++ b/tests/queries/0_stateless/02507_to_unix_timestamp_overflow.sql @@ -0,0 +1,2 @@ +SELECT toUnixTimestamp(toDateTime64('1928-12-31 12:12:12.123', 3, 'UTC')); -- { serverError DECIMAL_OVERFLOW } +SELECT toInt64(toDateTime64('1928-12-31 12:12:12.123', 3, 'UTC')); diff --git a/tests/queries/0_stateless/02508_bad_graphite.reference b/tests/queries/0_stateless/02508_bad_graphite.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02508_bad_graphite.sql b/tests/queries/0_stateless/02508_bad_graphite.sql new file mode 100644 index 00000000000..a0ca9dcf690 --- /dev/null +++ b/tests/queries/0_stateless/02508_bad_graphite.sql @@ -0,0 +1,6 @@ +DROP TABLE IF EXISTS test_graphite; +create table test_graphite (key UInt32, Path String, Time DateTime('UTC'), Value UInt8, Version UInt32, col UInt64) + engine = GraphiteMergeTree('graphite_rollup') order by key; + +INSERT INTO test_graphite (key) VALUES (0); -- { serverError BAD_ARGUMENTS } +DROP TABLE test_graphite; diff --git a/tests/queries/0_stateless/02508_index_analysis_to_date_timezone.reference b/tests/queries/0_stateless/02508_index_analysis_to_date_timezone.reference new file mode 100644 index 00000000000..28c3774e947 --- /dev/null +++ b/tests/queries/0_stateless/02508_index_analysis_to_date_timezone.reference @@ -0,0 +1,11 @@ +4c36abda-8bd8-11eb-8204-005056aa8bf6 2021-03-24 01:04:27 1 +4c408902-8bd8-11eb-8204-005056aa8bf6 2021-03-24 01:04:27 1 +4c5bf20a-8bd8-11eb-8204-005056aa8bf6 2021-03-24 01:04:27 1 +4c61623a-8bd8-11eb-8204-005056aa8bf6 2021-03-24 01:04:27 1 +4c6efab2-8bd8-11eb-a952-005056aa8bf6 2021-03-24 01:04:27 1 +--- +4c36abda-8bd8-11eb-8204-005056aa8bf6 2021-03-24 01:04:27 1 +4c408902-8bd8-11eb-8204-005056aa8bf6 2021-03-24 01:04:27 1 +4c5bf20a-8bd8-11eb-8204-005056aa8bf6 2021-03-24 01:04:27 1 +4c61623a-8bd8-11eb-8204-005056aa8bf6 2021-03-24 01:04:27 1 +4c6efab2-8bd8-11eb-a952-005056aa8bf6 2021-03-24 01:04:27 1 diff --git a/tests/queries/0_stateless/02508_index_analysis_to_date_timezone.sql b/tests/queries/0_stateless/02508_index_analysis_to_date_timezone.sql new file mode 100644 index 00000000000..a7e4f6e7a0e --- /dev/null +++ b/tests/queries/0_stateless/02508_index_analysis_to_date_timezone.sql @@ -0,0 +1,10 @@ +DROP TABLE IF EXISTS table; +CREATE TABLE table (uid UUID, date DateTime('Asia/Kamchatka')) ENGINE = MergeTree ORDER BY date; + +INSERT INTO `table` VALUES ('4c36abda-8bd8-11eb-8204-005056aa8bf6', '2021-03-24 01:04:27'), ('4c408902-8bd8-11eb-8204-005056aa8bf6', '2021-03-24 01:04:27'), ('4c5bf20a-8bd8-11eb-8204-005056aa8bf6', '2021-03-24 01:04:27'), ('4c61623a-8bd8-11eb-8204-005056aa8bf6', '2021-03-24 01:04:27'), ('4c6efab2-8bd8-11eb-a952-005056aa8bf6', '2021-03-24 01:04:27'); + +SELECT uid, date, toDate(date) = toDate('2021-03-24') AS res FROM table WHERE res = 1 ORDER BY uid, date; +SELECT '---'; +SELECT uid, date, toDate(date) = toDate('2021-03-24') AS res FROM table WHERE toDate(date) = toDate('2021-03-24') ORDER BY uid, date; + +DROP TABLE table; diff --git a/tests/queries/0_stateless/02509_h3_arguments.reference b/tests/queries/0_stateless/02509_h3_arguments.reference new file mode 100644 index 00000000000..3054598cf87 --- /dev/null +++ b/tests/queries/0_stateless/02509_h3_arguments.reference @@ -0,0 +1,23 @@ +583031433791012863 +583031433791012863 +587531185127686143 +614047082918969343 +614047153853038591 +614048195802038271 +614054260742553599 +614054419345965055 +614552348391374847 +614553222213795839 +614554538768072703 +614555412668088319 +614790495813500927 +614047082918969343 +614047153853038591 +614048195802038271 +614054260742553599 +614054419345965055 +614552348391374847 +614553222213795839 +614554538768072703 +614555412668088319 +614790495813500927 diff --git a/tests/queries/0_stateless/02509_h3_arguments.sql b/tests/queries/0_stateless/02509_h3_arguments.sql new file mode 100644 index 00000000000..b5b8b9497f9 --- /dev/null +++ b/tests/queries/0_stateless/02509_h3_arguments.sql @@ -0,0 +1,13 @@ +-- Tags: no-fasttest + +select h3ToParent(641573946153969375, 1); +select h3ToParent(641573946153969375, arrayJoin([1,2])); + +DROP TABLE IF EXISTS data_table; + +CREATE TABLE data_table (id UInt64, longitude Float64, latitude Float64) ENGINE=MergeTree ORDER BY id; +INSERT INTO data_table SELECT number, number, number FROM numbers(10); +SELECT geoToH3(longitude, latitude, toUInt8(8)) AS h3Index FROM data_table ORDER BY 1; +SELECT geoToH3(longitude, latitude, toUInt8(longitude - longitude + 8)) AS h3Index FROM data_table ORDER BY 1; + +DROP TABLE data_table; diff --git a/tests/queries/0_stateless/02510_group_by_prewhere_null.reference b/tests/queries/0_stateless/02510_group_by_prewhere_null.reference new file mode 100644 index 00000000000..d2bd2bb4dc6 --- /dev/null +++ b/tests/queries/0_stateless/02510_group_by_prewhere_null.reference @@ -0,0 +1 @@ +1 6 diff --git a/tests/queries/0_stateless/02510_group_by_prewhere_null.sql b/tests/queries/0_stateless/02510_group_by_prewhere_null.sql new file mode 100644 index 00000000000..90a638d0b5c --- /dev/null +++ b/tests/queries/0_stateless/02510_group_by_prewhere_null.sql @@ -0,0 +1,25 @@ +DROP TABLE IF EXISTS table1; + +create table table1 ( + col1 Int32, + col2 Int32 +) +ENGINE = MergeTree +partition by tuple() +order by col1; + +INSERT INTO table1 VALUES (1, 2), (1, 4); + +with NULL as pid +select a.col1, sum(a.col2) as summ +from table1 a +prewhere (pid is null or a.col2 = pid) +group by a.col1; + +with 123 as pid +select a.col1, sum(a.col2) as summ +from table1 a +prewhere (pid is null or a.col2 = pid) +group by a.col1; + +DROP TABLE table1; diff --git a/tests/queries/0_stateless/02511_complex_literals_as_aggregate_function_parameters.reference b/tests/queries/0_stateless/02511_complex_literals_as_aggregate_function_parameters.reference new file mode 100644 index 00000000000..ab6afce21ef --- /dev/null +++ b/tests/queries/0_stateless/02511_complex_literals_as_aggregate_function_parameters.reference @@ -0,0 +1,4 @@ +AggregateFunction(1, sumMapFiltered([1, 2]), Array(UInt8), Array(UInt8)) +02010A00000000000000020A00000000000000 +02010A00000000000000020A00000000000000 +([1,2],[20,20]) diff --git a/tests/queries/0_stateless/02511_complex_literals_as_aggregate_function_parameters.sql b/tests/queries/0_stateless/02511_complex_literals_as_aggregate_function_parameters.sql new file mode 100644 index 00000000000..92b5f0143ed --- /dev/null +++ b/tests/queries/0_stateless/02511_complex_literals_as_aggregate_function_parameters.sql @@ -0,0 +1,4 @@ +SELECT toTypeName(sumMapFilteredState([1, 2])([1, 2, 3], [10, 10, 10])); +SELECT hex(sumMapFilteredState([1, 2])([1, 2, 3], [10, 10, 10])); +SELECT hex(unhex('02010A00000000000000020A00000000000000')::AggregateFunction(1, sumMapFiltered([1, 2]), Array(UInt8), Array(UInt8))); +SELECT sumMapFilteredMerge([1, 2])(*) FROM remote('127.0.0.{1,2}', view(SELECT sumMapFilteredState([1, 2])([1, 2, 3], [10, 10, 10]))); diff --git a/tests/queries/0_stateless/02511_parquet_orc_missing_columns.reference b/tests/queries/0_stateless/02511_parquet_orc_missing_columns.reference new file mode 100644 index 00000000000..d5318a96f1a --- /dev/null +++ b/tests/queries/0_stateless/02511_parquet_orc_missing_columns.reference @@ -0,0 +1,8 @@ +Hello +Hello +Hello +6 6 +Hello +Hello +Hello +6 6 diff --git a/tests/queries/0_stateless/02511_parquet_orc_missing_columns.sh b/tests/queries/0_stateless/02511_parquet_orc_missing_columns.sh new file mode 100755 index 00000000000..455dccafbb9 --- /dev/null +++ b/tests/queries/0_stateless/02511_parquet_orc_missing_columns.sh @@ -0,0 +1,19 @@ +#!/usr/bin/env bash +#Tags: no-fasttest, no-parallel + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +$CLICKHOUSE_LOCAL -q "select number as x from numbers(3) format Parquet" > 02511_data1.parquet +$CLICKHOUSE_LOCAL -q "select y from file(02511_data1.parquet, auto, 'x UInt64, y String default \'Hello\'') settings input_format_parquet_allow_missing_columns=1" +$CLICKHOUSE_LOCAL -q "select number as x, 'Hello' as y from numbers(3) format Parquet" > 02511_data2.parquet +$CLICKHOUSE_LOCAL -q "select count(*), count(y) from file('02511_data*.parquet', auto, 'x UInt64, y String') settings input_format_parquet_allow_missing_columns=1" + +$CLICKHOUSE_LOCAL -q "select number as x from numbers(3) format ORC" > 02511_data1.orc +$CLICKHOUSE_LOCAL -q "select y from file(02511_data1.orc, auto, 'x UInt64, y String default \'Hello\'') settings input_format_orc_allow_missing_columns=1" +$CLICKHOUSE_LOCAL -q "select number as x, 'Hello' as y from numbers(3) format ORC" > 02511_data2.orc +$CLICKHOUSE_LOCAL -q "select count(*), count(y) from file('02511_data*.orc', auto, 'x UInt64, y String') settings input_format_orc_allow_missing_columns=1" + +rm 02511_data* + diff --git a/tests/queries/0_stateless/02513_broken_datetime64_init_on_mac.reference b/tests/queries/0_stateless/02513_broken_datetime64_init_on_mac.reference new file mode 100644 index 00000000000..aff5a0d01c6 --- /dev/null +++ b/tests/queries/0_stateless/02513_broken_datetime64_init_on_mac.reference @@ -0,0 +1 @@ +2022-12-12 14:06:09.000 diff --git a/tests/queries/0_stateless/02513_broken_datetime64_init_on_mac.sql b/tests/queries/0_stateless/02513_broken_datetime64_init_on_mac.sql new file mode 100644 index 00000000000..f8d3b2847e9 --- /dev/null +++ b/tests/queries/0_stateless/02513_broken_datetime64_init_on_mac.sql @@ -0,0 +1 @@ +select 1670853969::DateTime64(3, 'UTC'); diff --git a/tests/queries/0_stateless/02513_csv_bool_allow_crlf.reference b/tests/queries/0_stateless/02513_csv_bool_allow_crlf.reference new file mode 100644 index 00000000000..da29283aaa4 --- /dev/null +++ b/tests/queries/0_stateless/02513_csv_bool_allow_crlf.reference @@ -0,0 +1,2 @@ +true +false diff --git a/tests/queries/0_stateless/02513_csv_bool_allow_crlf.sh b/tests/queries/0_stateless/02513_csv_bool_allow_crlf.sh new file mode 100755 index 00000000000..ef75514cac6 --- /dev/null +++ b/tests/queries/0_stateless/02513_csv_bool_allow_crlf.sh @@ -0,0 +1,7 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +echo -ne "True\r\nFalse\r\n" | $CLICKHOUSE_LOCAL --structure='x Bool' --input-format=CSV -q "select * from table"; diff --git a/tests/queries/0_stateless/02513_insert_without_materialized_columns.reference b/tests/queries/0_stateless/02513_insert_without_materialized_columns.reference new file mode 100644 index 00000000000..6ed281c757a --- /dev/null +++ b/tests/queries/0_stateless/02513_insert_without_materialized_columns.reference @@ -0,0 +1,2 @@ +1 +1 diff --git a/tests/queries/0_stateless/02513_insert_without_materialized_columns.sh b/tests/queries/0_stateless/02513_insert_without_materialized_columns.sh new file mode 100755 index 00000000000..3faa404917d --- /dev/null +++ b/tests/queries/0_stateless/02513_insert_without_materialized_columns.sh @@ -0,0 +1,29 @@ +#!/usr/bin/env bash +# Tags: no-parallel + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + + +FILE_NAME="${CLICKHOUSE_DATABASE}_test.native.zstd" + +${CLICKHOUSE_CLIENT} --query "DROP TABLE IF EXISTS test" + +${CLICKHOUSE_CLIENT} --query "CREATE TABLE test (a Int64, b Int64 MATERIALIZED a) ENGINE = MergeTree() PRIMARY KEY tuple()" + +${CLICKHOUSE_CLIENT} --query "INSERT INTO test VALUES (1)" + +${CLICKHOUSE_CLIENT} --query "SELECT * FROM test" + +${CLICKHOUSE_CLIENT} --query "SELECT * FROM test INTO OUTFILE '${CLICKHOUSE_TMP}/${FILE_NAME}' FORMAT Native" + +${CLICKHOUSE_CLIENT} --query "TRUNCATE TABLE test" + +${CLICKHOUSE_CLIENT} --query "INSERT INTO test FROM INFILE '${CLICKHOUSE_TMP}/${FILE_NAME}'" + +${CLICKHOUSE_CLIENT} --query "SELECT * FROM test" + +${CLICKHOUSE_CLIENT} --query "DROP TABLE test" + +rm -f "${CLICKHOUSE_TMP}/${FILE_NAME}" diff --git a/tests/queries/0_stateless/02513_parquet_orc_arrow_nullable_schema_inference.reference b/tests/queries/0_stateless/02513_parquet_orc_arrow_nullable_schema_inference.reference new file mode 100644 index 00000000000..a0062820297 --- /dev/null +++ b/tests/queries/0_stateless/02513_parquet_orc_arrow_nullable_schema_inference.reference @@ -0,0 +1,6 @@ +number Nullable(UInt64) +number UInt64 +number Nullable(Int64) +number Int64 +number Nullable(UInt64) +number UInt64 diff --git a/tests/queries/0_stateless/02513_parquet_orc_arrow_nullable_schema_inference.sh b/tests/queries/0_stateless/02513_parquet_orc_arrow_nullable_schema_inference.sh new file mode 100755 index 00000000000..986ca178af3 --- /dev/null +++ b/tests/queries/0_stateless/02513_parquet_orc_arrow_nullable_schema_inference.sh @@ -0,0 +1,16 @@ +#!/usr/bin/env bash +# Tags: no-fasttest + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_LOCAL -q "select * from numbers(3) format Parquet" | $CLICKHOUSE_LOCAL --input-format=Parquet --table=test -q "desc test" --schema_inference_make_columns_nullable=1; +$CLICKHOUSE_LOCAL -q "select * from numbers(3) format Parquet" | $CLICKHOUSE_LOCAL --input-format=Parquet --table=test -q "desc test" --schema_inference_make_columns_nullable=0; + +$CLICKHOUSE_LOCAL -q "select * from numbers(3) format ORC" | $CLICKHOUSE_LOCAL --input-format=ORC --table=test -q "desc test" --schema_inference_make_columns_nullable=1; +$CLICKHOUSE_LOCAL -q "select * from numbers(3) format ORC" | $CLICKHOUSE_LOCAL --input-format=ORC --table=test -q "desc test" --schema_inference_make_columns_nullable=0; + +$CLICKHOUSE_LOCAL -q "select * from numbers(3) format Arrow" | $CLICKHOUSE_LOCAL --input-format=Arrow --table=test -q "desc test" --schema_inference_make_columns_nullable=1; +$CLICKHOUSE_LOCAL -q "select * from numbers(3) format Arrow" | $CLICKHOUSE_LOCAL --input-format=Arrow --table=test -q "desc test" --schema_inference_make_columns_nullable=0; + diff --git a/tests/queries/0_stateless/02513_prewhere_combine_step_filters.reference b/tests/queries/0_stateless/02513_prewhere_combine_step_filters.reference new file mode 100644 index 00000000000..85adb1850d4 --- /dev/null +++ b/tests/queries/0_stateless/02513_prewhere_combine_step_filters.reference @@ -0,0 +1,110 @@ +-- { echoOn } +SELECT * FROM table_02513; +143001 +143002 +143003 +143004 +143005 +143006 +143007 +143008 +143009 +143011 +143012 +143013 +143014 +143015 +143016 +143017 +143018 +143019 +SELECT * FROM table_02513 WHERE n%11; +143001 +143002 +143003 +143004 +143005 +143006 +143007 +143008 +143009 +143012 +143013 +143014 +143015 +143016 +143017 +143018 +143019 +SELECT * FROM table_02513 PREWHERE n%11; +143001 +143002 +143003 +143004 +143005 +143006 +143007 +143008 +143009 +143012 +143013 +143014 +143015 +143016 +143017 +143018 +143019 +SELECT * FROM table_02513 WHERE n%11 AND n%13; +143001 +143002 +143003 +143004 +143005 +143006 +143007 +143008 +143009 +143012 +143014 +143015 +143016 +143017 +143018 +143019 +SELECT * FROM table_02513 PREWHERE n%11 WHERE n%13; +143001 +143002 +143003 +143004 +143005 +143006 +143007 +143008 +143009 +143012 +143014 +143015 +143016 +143017 +143018 +143019 +SELECT * FROM table_02513 WHERE n%143011; -- { serverError ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER } +SELECT * FROM table_02513 PREWHERE n%143011; -- { serverError ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER } +SELECT * FROM table_02513 WHERE n%143011 AND n%13; +143001 +143002 +143003 +143004 +143005 +143006 +143007 +143008 +143009 +143012 +143014 +143015 +143016 +143017 +143018 +143019 +SELECT * FROM table_02513 PREWHERE n%143011 WHERE n%13; -- { serverError ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER } diff --git a/tests/queries/0_stateless/02513_prewhere_combine_step_filters.sql b/tests/queries/0_stateless/02513_prewhere_combine_step_filters.sql new file mode 100644 index 00000000000..771893ce674 --- /dev/null +++ b/tests/queries/0_stateless/02513_prewhere_combine_step_filters.sql @@ -0,0 +1,26 @@ +DROP TABLE IF EXISTS table_02513; + +CREATE TABLE table_02513 (n UInt64) ENGINE=MergeTree() ORDER BY tuple() SETTINGS index_granularity=100; + +INSERT INTO table_02513 SELECT number+11*13*1000 FROM numbers(20); + +SET allow_experimental_lightweight_delete=1; +SET mutations_sync=2; +SET max_threads=1; + +DELETE FROM table_02513 WHERE n%10=0; + +-- { echoOn } +SELECT * FROM table_02513; +SELECT * FROM table_02513 WHERE n%11; +SELECT * FROM table_02513 PREWHERE n%11; +SELECT * FROM table_02513 WHERE n%11 AND n%13; +SELECT * FROM table_02513 PREWHERE n%11 WHERE n%13; + +SELECT * FROM table_02513 WHERE n%143011; -- { serverError ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER } +SELECT * FROM table_02513 PREWHERE n%143011; -- { serverError ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER } +SELECT * FROM table_02513 WHERE n%143011 AND n%13; +SELECT * FROM table_02513 PREWHERE n%143011 WHERE n%13; -- { serverError ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER } +-- { echoOff } + +DROP TABLE table_02513; diff --git a/tests/queries/0_stateless/02513_validate_data_types.reference b/tests/queries/0_stateless/02513_validate_data_types.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02513_validate_data_types.sql b/tests/queries/0_stateless/02513_validate_data_types.sql new file mode 100644 index 00000000000..1235d00ba79 --- /dev/null +++ b/tests/queries/0_stateless/02513_validate_data_types.sql @@ -0,0 +1,19 @@ +-- Tags: no-fasttest + +set allow_experimental_object_type=0; +select CAST('{"x" : 1}', 'JSON'); -- {serverError ILLEGAL_COLUMN} +desc file(nonexist.json, JSONAsObject); -- {serverError ILLEGAL_COLUMN} +desc file(nonexist.json, JSONEachRow, 'x JSON'); -- {serverError ILLEGAL_COLUMN} + +set allow_experimental_geo_types=0; +select CAST([[(20, 20), (50, 20), (50, 50), (20, 50)], [(30, 30), (50, 50), (50, 30)]], 'Polygon'); -- {serverError ILLEGAL_COLUMN} +desc file(nonexist.json, JSONEachRow, 'pg Polygon'); -- {serverError ILLEGAL_COLUMN} + +set allow_suspicious_low_cardinality_types=0; +select CAST(1000000, 'LowCardinality(UInt64)'); -- {serverError SUSPICIOUS_TYPE_FOR_LOW_CARDINALITY} +desc file(nonexist.json, JSONEachRow, 'lc LowCardinality(UInt64)'); -- {serverError SUSPICIOUS_TYPE_FOR_LOW_CARDINALITY} + +set allow_suspicious_fixed_string_types=0; +select CAST('', 'FixedString(1000)'); -- {serverError ILLEGAL_COLUMN} +desc file(nonexist.json, JSONEachRow, 'fs FixedString(1000)'); -- {serverError ILLEGAL_COLUMN} + diff --git a/tests/queries/0_stateless/02514_bad_index_granularity.reference b/tests/queries/0_stateless/02514_bad_index_granularity.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02514_bad_index_granularity.sql b/tests/queries/0_stateless/02514_bad_index_granularity.sql new file mode 100644 index 00000000000..975af2d0728 --- /dev/null +++ b/tests/queries/0_stateless/02514_bad_index_granularity.sql @@ -0,0 +1,7 @@ +CREATE TABLE t +( + id Int64, + d String, + p Map(String, String) +) +ENGINE = ReplacingMergeTree order by id settings index_granularity = 0; -- { serverError BAD_ARGUMENTS } diff --git a/tests/queries/0_stateless/02514_database_replicated_no_arguments_for_rmt.reference b/tests/queries/0_stateless/02514_database_replicated_no_arguments_for_rmt.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02514_database_replicated_no_arguments_for_rmt.sh b/tests/queries/0_stateless/02514_database_replicated_no_arguments_for_rmt.sh new file mode 100755 index 00000000000..ee51640488e --- /dev/null +++ b/tests/queries/0_stateless/02514_database_replicated_no_arguments_for_rmt.sh @@ -0,0 +1,21 @@ +#!/usr/bin/env bash +# Tags: replica, no-replicated-database +# I don't understand why this test fails in ReplicatedDatabase run +# but too many magic included in it, so I just disabled it for ReplicatedDatabase run becase +# here we explicitely create it and check is alright. + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +${CLICKHOUSE_CLIENT} -q "create table mute_stylecheck (x UInt32) engine = ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/root', '1') order by x" + +${CLICKHOUSE_CLIENT} -q "CREATE USER user_${CLICKHOUSE_DATABASE} settings database_replicated_allow_replicated_engine_arguments=0" +${CLICKHOUSE_CLIENT} -q "GRANT CREATE TABLE ON ${CLICKHOUSE_DATABASE}_db.* TO user_${CLICKHOUSE_DATABASE}" +${CLICKHOUSE_CLIENT} --allow_experimental_database_replicated=1 --query "CREATE DATABASE ${CLICKHOUSE_DATABASE}_db engine = Replicated('/clickhouse/databases/${CLICKHOUSE_TEST_ZOOKEEPER_PREFIX}/${CLICKHOUSE_DATABASE}_db', '{shard}', '{replica}')" +${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --user "user_${CLICKHOUSE_DATABASE}" -n --query "CREATE TABLE ${CLICKHOUSE_DATABASE}_db.tab_rmt_ok (x UInt32) engine = ReplicatedMergeTree order by x;" +${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --user "user_${CLICKHOUSE_DATABASE}" -n --query "CREATE TABLE ${CLICKHOUSE_DATABASE}_db.tab_rmt_fail (x UInt32) engine = ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/root/{shard}', '{replica}') order by x; -- { serverError 80 }" +${CLICKHOUSE_CLIENT} --query "DROP DATABASE ${CLICKHOUSE_DATABASE}_db" +${CLICKHOUSE_CLIENT} -q "DROP USER user_${CLICKHOUSE_DATABASE}" + +${CLICKHOUSE_CLIENT} -q "drop table mute_stylecheck" diff --git a/tests/queries/0_stateless/02514_if_with_lazy_low_cardinality.reference b/tests/queries/0_stateless/02514_if_with_lazy_low_cardinality.reference new file mode 100644 index 00000000000..d00491fd7e5 --- /dev/null +++ b/tests/queries/0_stateless/02514_if_with_lazy_low_cardinality.reference @@ -0,0 +1 @@ +1 diff --git a/tests/queries/0_stateless/02514_if_with_lazy_low_cardinality.sql b/tests/queries/0_stateless/02514_if_with_lazy_low_cardinality.sql new file mode 100644 index 00000000000..80e3c0a9ece --- /dev/null +++ b/tests/queries/0_stateless/02514_if_with_lazy_low_cardinality.sql @@ -0,0 +1,5 @@ +create table if not exists t (`arr.key` Array(LowCardinality(String)), `arr.value` Array(LowCardinality(String))) engine = Memory; +insert into t (`arr.key`, `arr.value`) values (['a'], ['b']); +select if(true, if(lowerUTF8(arr.key) = 'a', 1, 2), 3) as x from t left array join arr; +drop table t; + diff --git a/tests/queries/0_stateless/02514_null_dictionary_source.reference b/tests/queries/0_stateless/02514_null_dictionary_source.reference new file mode 100644 index 00000000000..bb08ece2bcf --- /dev/null +++ b/tests/queries/0_stateless/02514_null_dictionary_source.reference @@ -0,0 +1,4 @@ +0 \N 111 0 111 +123 \N 111 123 111 +\N \N 111 +77 diff --git a/tests/queries/0_stateless/02514_null_dictionary_source.sql b/tests/queries/0_stateless/02514_null_dictionary_source.sql new file mode 100644 index 00000000000..74fb57707ff --- /dev/null +++ b/tests/queries/0_stateless/02514_null_dictionary_source.sql @@ -0,0 +1,48 @@ +-- Tags: no-parallel + +DROP DICTIONARY IF EXISTS null_dict; +CREATE DICTIONARY null_dict ( + id UInt64, + val UInt8, + default_val UInt8 DEFAULT 123, + nullable_val Nullable(UInt8) +) +PRIMARY KEY id +SOURCE(NULL()) +LAYOUT(FLAT()) +LIFETIME(0); + +SELECT + dictGet('null_dict', 'val', 1337), + dictGetOrNull('null_dict', 'val', 1337), + dictGetOrDefault('null_dict', 'val', 1337, 111), + dictGetUInt8('null_dict', 'val', 1337), + dictGetUInt8OrDefault('null_dict', 'val', 1337, 111); + +SELECT + dictGet('null_dict', 'default_val', 1337), + dictGetOrNull('null_dict', 'default_val', 1337), + dictGetOrDefault('null_dict', 'default_val', 1337, 111), + dictGetUInt8('null_dict', 'default_val', 1337), + dictGetUInt8OrDefault('null_dict', 'default_val', 1337, 111); + +SELECT + dictGet('null_dict', 'nullable_val', 1337), + dictGetOrNull('null_dict', 'nullable_val', 1337), + dictGetOrDefault('null_dict', 'nullable_val', 1337, 111); + +SELECT val, nullable_val FROM null_dict; + +DROP DICTIONARY IF EXISTS null_ip_dict; +CREATE DICTIONARY null_ip_dict ( + network String, + val UInt8 DEFAULT 77 +) +PRIMARY KEY network +SOURCE(NULL()) +LAYOUT(IP_TRIE()) +LIFETIME(0); + +SELECT dictGet('null_ip_dict', 'val', toIPv4('127.0.0.1')); + +SELECT network, val FROM null_ip_dict; diff --git a/tests/queries/0_stateless/02514_tsv_zero_started_number.reference b/tests/queries/0_stateless/02514_tsv_zero_started_number.reference new file mode 100644 index 00000000000..829ab6bc4d0 --- /dev/null +++ b/tests/queries/0_stateless/02514_tsv_zero_started_number.reference @@ -0,0 +1 @@ +Nullable(String) 0123 diff --git a/tests/queries/0_stateless/02514_tsv_zero_started_number.sql b/tests/queries/0_stateless/02514_tsv_zero_started_number.sql new file mode 100644 index 00000000000..d2058ea8f94 --- /dev/null +++ b/tests/queries/0_stateless/02514_tsv_zero_started_number.sql @@ -0,0 +1,2 @@ +select toTypeName(*), * from format(TSV, '0123'); + diff --git a/tests/queries/0_stateless/02515_analyzer_null_for_empty.reference b/tests/queries/0_stateless/02515_analyzer_null_for_empty.reference new file mode 100644 index 00000000000..13e4ff9b55a --- /dev/null +++ b/tests/queries/0_stateless/02515_analyzer_null_for_empty.reference @@ -0,0 +1 @@ +92233720368547758.06 diff --git a/tests/queries/0_stateless/02515_analyzer_null_for_empty.sql b/tests/queries/0_stateless/02515_analyzer_null_for_empty.sql new file mode 100644 index 00000000000..de21e9b475e --- /dev/null +++ b/tests/queries/0_stateless/02515_analyzer_null_for_empty.sql @@ -0,0 +1,4 @@ +SET allow_experimental_analyzer = 1; +SET aggregate_functions_null_for_empty = 1; + +SELECT max(aggr) FROM (SELECT max('92233720368547758.06') AS aggr FROM system.one); diff --git a/tests/queries/0_stateless/02515_and_or_if_multiif_not_return_lc.reference b/tests/queries/0_stateless/02515_and_or_if_multiif_not_return_lc.reference new file mode 100644 index 00000000000..805bbdf7a59 --- /dev/null +++ b/tests/queries/0_stateless/02515_and_or_if_multiif_not_return_lc.reference @@ -0,0 +1,4 @@ +UInt8 +UInt8 +UInt8 +UInt8 diff --git a/tests/queries/0_stateless/02515_and_or_if_multiif_not_return_lc.sql b/tests/queries/0_stateless/02515_and_or_if_multiif_not_return_lc.sql new file mode 100644 index 00000000000..0ccccd4d9a7 --- /dev/null +++ b/tests/queries/0_stateless/02515_and_or_if_multiif_not_return_lc.sql @@ -0,0 +1,5 @@ +select toTypeName(if(toLowCardinality(number % 2), 1, 2)) from numbers(1); +select toTypeName(multiIf(toLowCardinality(number % 2), 1, 1, 2, 3)) from numbers(1); +select toTypeName(toLowCardinality(number % 2) and 2) from numbers(1); +select toTypeName(toLowCardinality(number % 2) or 2) from numbers(1); + diff --git a/tests/queries/0_stateless/02515_cleanup_async_insert_block_ids.reference b/tests/queries/0_stateless/02515_cleanup_async_insert_block_ids.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02515_cleanup_async_insert_block_ids.sh b/tests/queries/0_stateless/02515_cleanup_async_insert_block_ids.sh new file mode 100755 index 00000000000..9e22089d5e1 --- /dev/null +++ b/tests/queries/0_stateless/02515_cleanup_async_insert_block_ids.sh @@ -0,0 +1,39 @@ +#!/usr/bin/env bash +# Tags: zookeeper, no-parallel, long, no-fasttest, no-replicated-database + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +# Check that if the background cleanup thread works correctly. +CLICKHOUSE_TEST_ZOOKEEPER_PREFIX="${CLICKHOUSE_TEST_ZOOKEEPER_PREFIX}/${CLICKHOUSE_DATABASE}" + +$CLICKHOUSE_CLIENT -n --query " + DROP TABLE IF EXISTS t_async_insert_cleanup NO DELAY; + CREATE TABLE t_async_insert_cleanup ( + KeyID UInt32 + ) Engine = ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/t_async_insert_cleanup', '{replica}') + ORDER BY (KeyID) SETTINGS cleanup_delay_period = 1, cleanup_delay_period_random_add = 1, replicated_deduplication_window_for_async_inserts=10 +" + +for i in {1..100}; do + $CLICKHOUSE_CLIENT --async_insert 1 --async_insert_deduplicate 1 --wait_for_async_insert 0 --query "insert into t_async_insert_cleanup values ($i), ($((i + 1))), ($((i + 2)))" +done + +sleep 1 + +old_answer=$($CLICKHOUSE_CLIENT --query "SELECT count(*) FROM system.zookeeper WHERE path like '/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/t_async_insert_cleanup/async_blocks%' settings allow_unrestricted_reads_from_keeper = 'true'") + +for i in {1..300}; do + answer=$($CLICKHOUSE_CLIENT --query "SELECT count(*) FROM system.zookeeper WHERE path like '/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/t_async_insert_cleanup/async_blocks%' settings allow_unrestricted_reads_from_keeper = 'true'") + if [ $answer == '10' ]; then + $CLICKHOUSE_CLIENT -n --query "DROP TABLE t_async_insert_cleanup NO DELAY;" + exit 0 + fi + sleep 1 +done + +$CLICKHOUSE_CLIENT --query "SELECT count(*) FROM t_async_insert_cleanup" +echo $old_answer +$CLICKHOUSE_CLIENT --query "SELECT count(*) FROM system.zookeeper WHERE path like '/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/t_async_insert_cleanup/async_blocks%' settings allow_unrestricted_reads_from_keeper = 'true'" +$CLICKHOUSE_CLIENT -n --query "DROP TABLE t_async_insert_cleanup NO DELAY;" diff --git a/tests/queries/0_stateless/02515_distinct_zero_size_key_bug_44831.reference b/tests/queries/0_stateless/02515_distinct_zero_size_key_bug_44831.reference new file mode 100644 index 00000000000..8908480c027 --- /dev/null +++ b/tests/queries/0_stateless/02515_distinct_zero_size_key_bug_44831.reference @@ -0,0 +1 @@ +\N diff --git a/tests/queries/0_stateless/02515_distinct_zero_size_key_bug_44831.sql b/tests/queries/0_stateless/02515_distinct_zero_size_key_bug_44831.sql new file mode 100644 index 00000000000..96072b281db --- /dev/null +++ b/tests/queries/0_stateless/02515_distinct_zero_size_key_bug_44831.sql @@ -0,0 +1 @@ +SELECT DISTINCT NULL, if(number > 0, 't', '') AS res FROM numbers(1) ORDER BY res; diff --git a/tests/queries/0_stateless/02515_fix_any_parsing.reference b/tests/queries/0_stateless/02515_fix_any_parsing.reference new file mode 100644 index 00000000000..427fa0c4442 --- /dev/null +++ b/tests/queries/0_stateless/02515_fix_any_parsing.reference @@ -0,0 +1,2 @@ +SELECT any(0) = any(1) +SELECT any((NULL + NULL) = 0.0001), '1', NULL + -2147483647, any(NULL), (NULL + NULL) = 1000.0001, (NULL + NULL) = ((NULL + 10.0001) = (NULL, (NULL + 0.9999) = any(inf, 0., NULL, (NULL + 1.0001) = '214748364.6')), (NULL + NULL) = (NULL + nan)) diff --git a/tests/queries/0_stateless/02515_fix_any_parsing.sh b/tests/queries/0_stateless/02515_fix_any_parsing.sh new file mode 100755 index 00000000000..ed7316bdbb8 --- /dev/null +++ b/tests/queries/0_stateless/02515_fix_any_parsing.sh @@ -0,0 +1,12 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +set -e + +format="$CLICKHOUSE_FORMAT --oneline" + +echo "SELECT any(0) = any(1)" | $format +echo "SELECT any((NULL + NULL) = 0.0001), '1', NULL + -2147483647, any(NULL), (NULL + NULL) = 1000.0001, (NULL + NULL) = ((NULL + 10.0001) = (NULL, (NULL + 0.9999) = any(inf, 0., NULL, (NULL + 1.0001) = '214748364.6')), (NULL + NULL) = (NULL + nan))" | $format | $format diff --git a/tests/queries/0_stateless/02515_projections_with_totals.reference b/tests/queries/0_stateless/02515_projections_with_totals.reference new file mode 100644 index 00000000000..c6359cae032 --- /dev/null +++ b/tests/queries/0_stateless/02515_projections_with_totals.reference @@ -0,0 +1,3 @@ +0 + +0 diff --git a/tests/queries/0_stateless/02515_projections_with_totals.sql b/tests/queries/0_stateless/02515_projections_with_totals.sql new file mode 100644 index 00000000000..4d43d5381da --- /dev/null +++ b/tests/queries/0_stateless/02515_projections_with_totals.sql @@ -0,0 +1,6 @@ +DROP TABLE IF EXISTS t; +CREATE TABLE t (x UInt8, PROJECTION p (SELECT x GROUP BY x)) ENGINE = MergeTree ORDER BY (); +INSERT INTO t VALUES (0); +SET group_by_overflow_mode = 'any', max_rows_to_group_by = 1000, totals_mode = 'after_having_auto'; +SELECT x FROM t GROUP BY x WITH TOTALS; +DROP TABLE t; diff --git a/tests/queries/0_stateless/02515_tuple_lambda_parsing.reference b/tests/queries/0_stateless/02515_tuple_lambda_parsing.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02515_tuple_lambda_parsing.sql b/tests/queries/0_stateless/02515_tuple_lambda_parsing.sql new file mode 100644 index 00000000000..4ec49f30e39 --- /dev/null +++ b/tests/queries/0_stateless/02515_tuple_lambda_parsing.sql @@ -0,0 +1,7 @@ +explain ast select tuple(a) -> f(a); -- { clientError SYNTAX_ERROR } +explain ast select tuple(a, b) -> f(a); -- { clientError SYNTAX_ERROR } +explain ast select (tuple(a)) -> f(a); -- { clientError SYNTAX_ERROR } +explain ast select (f(a)) -> f(a); -- { clientError SYNTAX_ERROR } +explain ast select (a::UInt64) -> f(a); -- { clientError SYNTAX_ERROR } +explain ast select (1) -> f(a); -- { clientError SYNTAX_ERROR } +explain ast select (1::UInt64) -> f(a); -- { clientError SYNTAX_ERROR } diff --git a/tests/queries/0_stateless/02516_join_with_totals_and_subquery_bug.reference b/tests/queries/0_stateless/02516_join_with_totals_and_subquery_bug.reference new file mode 100644 index 00000000000..fd0b223f8e5 --- /dev/null +++ b/tests/queries/0_stateless/02516_join_with_totals_and_subquery_bug.reference @@ -0,0 +1,7 @@ +1 +1 + +0 +\N + +100000000000000000000 diff --git a/tests/queries/0_stateless/02516_join_with_totals_and_subquery_bug.sql b/tests/queries/0_stateless/02516_join_with_totals_and_subquery_bug.sql new file mode 100644 index 00000000000..b6e60aa2e1f --- /dev/null +++ b/tests/queries/0_stateless/02516_join_with_totals_and_subquery_bug.sql @@ -0,0 +1,53 @@ +SELECT * +FROM +( + SELECT 1 AS a +) AS t1 +INNER JOIN +( + SELECT 1 AS a + GROUP BY 1 + WITH TOTALS + UNION ALL + SELECT 1 + GROUP BY 1 + WITH TOTALS +) AS t2 USING (a); + +SELECT a +FROM +( + SELECT + NULL AS a, + NULL AS b, + NULL AS c + UNION ALL + SELECT + 100000000000000000000., + NULL, + NULL + WHERE 0 + GROUP BY + GROUPING SETS ((NULL)) + WITH TOTALS +) AS js1 +ALL LEFT JOIN +( + SELECT + NULL AS a, + 2147483647 AS d + GROUP BY + NULL, + '214748364.8' + WITH CUBE + WITH TOTALS + UNION ALL + SELECT + 2147483646, + NULL + GROUP BY + base58Encode(materialize(NULL)), + NULL + WITH TOTALS +) AS js2 USING (a) +ORDER BY b ASC NULLS FIRST; diff --git a/tests/queries/0_stateless/02516_projections_and_context.reference b/tests/queries/0_stateless/02516_projections_and_context.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02516_projections_and_context.sql b/tests/queries/0_stateless/02516_projections_and_context.sql new file mode 100644 index 00000000000..a7c143c7900 --- /dev/null +++ b/tests/queries/0_stateless/02516_projections_and_context.sql @@ -0,0 +1,6 @@ +DROP TABLE IF EXISTS test1__fuzz_37; +CREATE TABLE test1__fuzz_37 (`i` Date) ENGINE = MergeTree ORDER BY i; +insert into test1__fuzz_37 values ('2020-10-10'); +SELECT count() FROM test1__fuzz_37 GROUP BY dictHas(NULL, (dictHas(NULL, (('', materialize(NULL)), materialize(NULL))), 'KeyKey')), dictHas('test_dictionary', tuple(materialize('Ke\0'))), tuple(dictHas(NULL, (tuple('Ke\0Ke\0Ke\0Ke\0Ke\0Ke\0\0\0\0Ke\0'), materialize(NULL)))), 'test_dicti\0nary', (('', materialize(NULL)), dictHas(NULL, (dictHas(NULL, tuple(materialize(NULL))), 'KeyKeyKeyKeyKeyKeyKeyKey')), materialize(NULL)); -- { serverError BAD_ARGUMENTS } +SELECT count() FROM test1__fuzz_37 GROUP BY dictHas('non_existing_dictionary', materialize('a')); -- { serverError BAD_ARGUMENTS } +DROP TABLE test1__fuzz_37; diff --git a/tests/queries/0_stateless/02516_projections_with_rollup.reference b/tests/queries/0_stateless/02516_projections_with_rollup.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02516_projections_with_rollup.sql b/tests/queries/0_stateless/02516_projections_with_rollup.sql new file mode 100644 index 00000000000..e670fbb7827 --- /dev/null +++ b/tests/queries/0_stateless/02516_projections_with_rollup.sql @@ -0,0 +1,120 @@ +DROP TABLE IF EXISTS video_log; +DROP TABLE IF EXISTS video_log_result__fuzz_0; +DROP TABLE IF EXISTS rng; + +CREATE TABLE video_log +( + `datetime` DateTime, + `user_id` UInt64, + `device_id` UInt64, + `domain` LowCardinality(String), + `bytes` UInt64, + `duration` UInt64 +) +ENGINE = MergeTree +PARTITION BY toDate(datetime) +ORDER BY (user_id, device_id); + +CREATE TABLE video_log_result__fuzz_0 +( + `hour` Nullable(DateTime), + `sum_bytes` UInt64, + `avg_duration` Float64 +) +ENGINE = MergeTree +PARTITION BY toDate(hour) +ORDER BY sum_bytes +SETTINGS allow_nullable_key = 1; + +CREATE TABLE rng +( + `user_id_raw` UInt64, + `device_id_raw` UInt64, + `domain_raw` UInt64, + `bytes_raw` UInt64, + `duration_raw` UInt64 +) +ENGINE = GenerateRandom(1024); + +INSERT INTO video_log SELECT + toUnixTimestamp('2022-07-22 01:00:00') + (rowNumberInAllBlocks() / 20000), + user_id_raw % 100000000 AS user_id, + device_id_raw % 200000000 AS device_id, + domain_raw % 100, + (bytes_raw % 1024) + 128, + (duration_raw % 300) + 100 +FROM rng +LIMIT 1728000; + +INSERT INTO video_log SELECT + toUnixTimestamp('2022-07-22 01:00:00') + (rowNumberInAllBlocks() / 20000), + user_id_raw % 100000000 AS user_id, + 100 AS device_id, + domain_raw % 100, + (bytes_raw % 1024) + 128, + (duration_raw % 300) + 100 +FROM rng +LIMIT 10; + +ALTER TABLE video_log + ADD PROJECTION p_norm + ( + SELECT + datetime, + device_id, + bytes, + duration + ORDER BY device_id + ); + +ALTER TABLE video_log + MATERIALIZE PROJECTION p_norm +SETTINGS mutations_sync = 1; + +ALTER TABLE video_log + ADD PROJECTION p_agg + ( + SELECT + toStartOfHour(datetime) AS hour, + domain, + sum(bytes), + avg(duration) + GROUP BY + hour, + domain + ); + +ALTER TABLE video_log + MATERIALIZE PROJECTION p_agg +SETTINGS mutations_sync = 1; + +-- We are not interested in the result of this query, but it should not produce a logical error. +SELECT + avg_duration1, + avg_duration1 = avg_duration2 +FROM +( + SELECT + sum(bytes), + hour, + toStartOfHour(datetime) AS hour, + avg(duration) AS avg_duration1 + FROM video_log + GROUP BY hour + WITH ROLLUP + WITH TOTALS +) +LEFT JOIN +( + SELECT + hour, + sum_bytes AS sum_bytes2, + avg_duration AS avg_duration2 + FROM video_log_result__fuzz_0 +) USING (hour) +SETTINGS joined_subquery_requires_alias = 0 +FORMAT Null; + +DROP TABLE video_log; +DROP TABLE video_log_result__fuzz_0; +DROP TABLE rng; diff --git a/tests/queries/0_stateless/02517_avro_bool_type.reference b/tests/queries/0_stateless/02517_avro_bool_type.reference new file mode 100644 index 00000000000..c383ecf3857 --- /dev/null +++ b/tests/queries/0_stateless/02517_avro_bool_type.reference @@ -0,0 +1 @@ +true Bool diff --git a/tests/queries/0_stateless/02517_avro_bool_type.sh b/tests/queries/0_stateless/02517_avro_bool_type.sh new file mode 100755 index 00000000000..a26dfbd06ea --- /dev/null +++ b/tests/queries/0_stateless/02517_avro_bool_type.sh @@ -0,0 +1,9 @@ +#!/usr/bin/env bash +# Tags: no-fasttest + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_LOCAL -q "select true::Bool as b format Avro" | $CLICKHOUSE_LOCAL --table=test --input-format=Avro -q "select b, toTypeName(b) from test"; + diff --git a/tests/queries/0_stateless/02517_executable_pool_bad_input_query.reference b/tests/queries/0_stateless/02517_executable_pool_bad_input_query.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02517_executable_pool_bad_input_query.sql b/tests/queries/0_stateless/02517_executable_pool_bad_input_query.sql new file mode 100644 index 00000000000..c016c93b672 --- /dev/null +++ b/tests/queries/0_stateless/02517_executable_pool_bad_input_query.sql @@ -0,0 +1,4 @@ +CREATE TABLE test_table (value String) ENGINE=ExecutablePool('nonexist.py', 'TabSeparated', (foobar)); -- {serverError BAD_ARGUMENTS} +CREATE TABLE test_table (value String) ENGINE=ExecutablePool('nonexist.py', 'TabSeparated', '(SELECT 1)'); -- {serverError BAD_ARGUMENTS} +CREATE TABLE test_table (value String) ENGINE=ExecutablePool('nonexist.py', 'TabSeparated', [1,2,3]); -- {serverError BAD_ARGUMENTS} + diff --git a/tests/queries/0_stateless/02517_fuse_bug_44712.reference b/tests/queries/0_stateless/02517_fuse_bug_44712.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02517_fuse_bug_44712.sql b/tests/queries/0_stateless/02517_fuse_bug_44712.sql new file mode 100644 index 00000000000..894bf9e06d5 --- /dev/null +++ b/tests/queries/0_stateless/02517_fuse_bug_44712.sql @@ -0,0 +1,10 @@ +DROP TABLE IF EXISTS fuse_tbl__fuzz_35; + +CREATE TABLE fuse_tbl__fuzz_35 (`a` UInt8, `b` Nullable(Int16)) ENGINE = Log; +INSERT INTO fuse_tbl__fuzz_35 SELECT number, number + 1 FROM numbers(1000); + +set allow_experimental_analyzer = 0, optimize_syntax_fuse_functions = 1, optimize_fuse_sum_count_avg = 1; + +SELECT quantile(0.5)(b), quantile(0.9)(b) FROM (SELECT x + 2147483648 AS b FROM (SELECT quantile(0.5)(b) AS x FROM fuse_tbl__fuzz_35) GROUP BY x) FORMAT Null; + +DROP TABLE IF EXISTS fuse_tbl__fuzz_35; diff --git a/tests/queries/0_stateless/02517_wrong_total_structure_crash.reference b/tests/queries/0_stateless/02517_wrong_total_structure_crash.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02517_wrong_total_structure_crash.sql b/tests/queries/0_stateless/02517_wrong_total_structure_crash.sql new file mode 100644 index 00000000000..86bc8c1128d --- /dev/null +++ b/tests/queries/0_stateless/02517_wrong_total_structure_crash.sql @@ -0,0 +1,26 @@ +-- Tags: no-ordinary-database +CREATE OR REPLACE TABLE alias10__fuzz_13 (`Id` Array(Array(UInt256)), `EventDate` Array(String), `field1` Array(Array(Nullable(Int8))), `field2` Array(Date), `field3` Array(Array(Array(UInt128)))) ENGINE = Distributed(test_shard_localhost, currentDatabase(), alias_local10); + +set allow_deprecated_syntax_for_merge_tree=1; +CREATE OR REPLACE TABLE alias_local10 ( + Id Int8, + EventDate Date DEFAULT '2000-01-01', + field1 Int8, + field2 String, + field3 ALIAS CASE WHEN field1 = 1 THEN field2 ELSE '0' END +) ENGINE = MergeTree(EventDate, (Id, EventDate), 8192); + +SET prefer_localhost_replica = 0; + +SELECT field1 FROM alias10__fuzz_13 WHERE arrayEnumerateDense(NULL, tuple('0.2147483646'), NULL) GROUP BY field1, arrayEnumerateDense(('0.02', '0.1', '0'), NULL) WITH TOTALS; -- { serverError TYPE_MISMATCH } + + +CREATE OR REPLACE TABLE local (x Int8) ENGINE = Memory; +CREATE OR REPLACE TABLE distributed (x Array(Int8)) ENGINE = Distributed(test_shard_localhost, currentDatabase(), local); +SET prefer_localhost_replica = 0; +SELECT x FROM distributed GROUP BY x WITH TOTALS; -- { serverError TYPE_MISMATCH } + +DROP TABLE distributed; +DROP TABLE local; +DROP TABLE alias_local10; +DROP TABLE alias10__fuzz_13; diff --git a/tests/queries/0_stateless/02518_delete_on_materialized_view.reference b/tests/queries/0_stateless/02518_delete_on_materialized_view.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02518_delete_on_materialized_view.sql b/tests/queries/0_stateless/02518_delete_on_materialized_view.sql new file mode 100644 index 00000000000..73abca4ea53 --- /dev/null +++ b/tests/queries/0_stateless/02518_delete_on_materialized_view.sql @@ -0,0 +1,14 @@ +DROP TABLE IF EXISTS kek; +DROP TABLE IF EXISTS kekv; + +CREATE TABLE kek (a UInt32) ENGINE = MergeTree ORDER BY a; +CREATE MATERIALIZED VIEW kekv ENGINE = MergeTree ORDER BY tuple() AS SELECT * FROM kek; + +INSERT INTO kek VALUES (1); +DELETE FROM kekv WHERE a = 1; -- { serverError BAD_ARGUMENTS} + +SET allow_experimental_lightweight_delete=1; +DELETE FROM kekv WHERE a = 1; -- { serverError BAD_ARGUMENTS} + +DROP TABLE IF EXISTS kek; +DROP TABLE IF EXISTS kekv; diff --git a/tests/queries/0_stateless/02519_monotonicity_fuzz.reference b/tests/queries/0_stateless/02519_monotonicity_fuzz.reference new file mode 100644 index 00000000000..0fa4fc055c9 --- /dev/null +++ b/tests/queries/0_stateless/02519_monotonicity_fuzz.reference @@ -0,0 +1,2 @@ +1.1 +1 diff --git a/tests/queries/0_stateless/02519_monotonicity_fuzz.sql b/tests/queries/0_stateless/02519_monotonicity_fuzz.sql new file mode 100644 index 00000000000..57da691715d --- /dev/null +++ b/tests/queries/0_stateless/02519_monotonicity_fuzz.sql @@ -0,0 +1,10 @@ +DROP TABLE IF EXISTS t; +CREATE TABLE t (x Decimal(18, 3)) ENGINE = MergeTree ORDER BY x; +INSERT INTO t VALUES (1.1); +SELECT * FROM t WHERE toUInt64(x) = 1; +DROP TABLE t; + +CREATE TABLE t (x DateTime64(3)) ENGINE = MergeTree ORDER BY x; +INSERT INTO t VALUES (1000); +SELECT x::UInt64 FROM t WHERE toUInt64(x) = 1; +DROP TABLE t; diff --git a/tests/queries/0_stateless/02520_group_array_last.reference b/tests/queries/0_stateless/02520_group_array_last.reference new file mode 100644 index 00000000000..52b445290ee --- /dev/null +++ b/tests/queries/0_stateless/02520_group_array_last.reference @@ -0,0 +1,53 @@ +-- { echo } +-- BAD_ARGUMENTS +select groupArrayLast(number+1) from numbers(5); -- { serverError BAD_ARGUMENTS } +select groupArrayLastArray([number+1]) from numbers(5); -- { serverError BAD_ARGUMENTS } +-- groupArrayLast by number +select groupArrayLast(1)(number+1) from numbers(5); +[5] +select groupArrayLast(3)(number+1) from numbers(5); +[4,5,3] +select groupArrayLast(3)(number+1) from numbers(10); +[10,8,9] +-- groupArrayLast by String +select groupArrayLast(3)((number+1)::String) from numbers(5); +['4','5','3'] +select groupArrayLast(3)((number+1)::String) from numbers(10); +['10','8','9'] +-- groupArrayLastArray +select groupArrayLastArray(3)([1,2,3,4,5,6]); +[4,5,6] +select groupArrayLastArray(3)(['1','2','3','4','5','6']); +['4','5','6'] +-- groupArrayLastMerge +-- [10,8,9] + [10,8,9] => [10,10,9] => [10,10,8] => [9,10,8] +-- ^ ^ ^ ^^ +-- (position to insert at) +select groupArrayLast(3)(number+1) state from remote('127.{1,1}', view(select * from numbers(10))); +[9,10,8] +select groupArrayLast(3)((number+1)::String) state from remote('127.{1,1}', view(select * from numbers(10))); +['9','10','8'] +select groupArrayLast(3)([number+1]) state from remote('127.{1,1}', view(select * from numbers(10))); +[[9],[10],[8]] +select groupArrayLast(100)(number+1) state from remote('127.{1,1}', view(select * from numbers(10))); +[1,2,3,4,5,6,7,8,9,10,1,2,3,4,5,6,7,8,9,10] +select groupArrayLast(100)((number+1)::String) state from remote('127.{1,1}', view(select * from numbers(10))); +['1','2','3','4','5','6','7','8','9','10','1','2','3','4','5','6','7','8','9','10'] +select groupArrayLast(100)([number+1]) state from remote('127.{1,1}', view(select * from numbers(10))); +[[1],[2],[3],[4],[5],[6],[7],[8],[9],[10],[1],[2],[3],[4],[5],[6],[7],[8],[9],[10]] +-- SimpleAggregateFunction +create table simple_agg_groupArrayLastArray (key Int, value SimpleAggregateFunction(groupArrayLastArray(5), Array(UInt64))) engine=AggregatingMergeTree() order by key; +insert into simple_agg_groupArrayLastArray values (1, [1,2,3]), (1, [4,5,6]), (2, [4,5,6]), (2, [1,2,3]); +select * from simple_agg_groupArrayLastArray order by key, value; +1 [6,2,3,4,5] +2 [3,5,6,1,2] +system stop merges simple_agg_groupArrayLastArray; +insert into simple_agg_groupArrayLastArray values (1, [7,8]), (2, [7,8]); +select * from simple_agg_groupArrayLastArray order by key, value; +1 [6,2,3,4,5] +1 [7,8] +2 [3,5,6,1,2] +2 [7,8] +select * from simple_agg_groupArrayLastArray final order by key, value; +1 [7,8,3,4,5] +2 [7,8,6,1,2] diff --git a/tests/queries/0_stateless/02520_group_array_last.sql b/tests/queries/0_stateless/02520_group_array_last.sql new file mode 100644 index 00000000000..94773d5d522 --- /dev/null +++ b/tests/queries/0_stateless/02520_group_array_last.sql @@ -0,0 +1,34 @@ +drop table if exists simple_agg_groupArrayLastArray; + +-- { echo } +-- BAD_ARGUMENTS +select groupArrayLast(number+1) from numbers(5); -- { serverError BAD_ARGUMENTS } +select groupArrayLastArray([number+1]) from numbers(5); -- { serverError BAD_ARGUMENTS } +-- groupArrayLast by number +select groupArrayLast(1)(number+1) from numbers(5); +select groupArrayLast(3)(number+1) from numbers(5); +select groupArrayLast(3)(number+1) from numbers(10); +-- groupArrayLast by String +select groupArrayLast(3)((number+1)::String) from numbers(5); +select groupArrayLast(3)((number+1)::String) from numbers(10); +-- groupArrayLastArray +select groupArrayLastArray(3)([1,2,3,4,5,6]); +select groupArrayLastArray(3)(['1','2','3','4','5','6']); +-- groupArrayLastMerge +-- [10,8,9] + [10,8,9] => [10,10,9] => [10,10,8] => [9,10,8] +-- ^ ^ ^ ^^ +-- (position to insert at) +select groupArrayLast(3)(number+1) state from remote('127.{1,1}', view(select * from numbers(10))); +select groupArrayLast(3)((number+1)::String) state from remote('127.{1,1}', view(select * from numbers(10))); +select groupArrayLast(3)([number+1]) state from remote('127.{1,1}', view(select * from numbers(10))); +select groupArrayLast(100)(number+1) state from remote('127.{1,1}', view(select * from numbers(10))); +select groupArrayLast(100)((number+1)::String) state from remote('127.{1,1}', view(select * from numbers(10))); +select groupArrayLast(100)([number+1]) state from remote('127.{1,1}', view(select * from numbers(10))); +-- SimpleAggregateFunction +create table simple_agg_groupArrayLastArray (key Int, value SimpleAggregateFunction(groupArrayLastArray(5), Array(UInt64))) engine=AggregatingMergeTree() order by key; +insert into simple_agg_groupArrayLastArray values (1, [1,2,3]), (1, [4,5,6]), (2, [4,5,6]), (2, [1,2,3]); +select * from simple_agg_groupArrayLastArray order by key, value; +system stop merges simple_agg_groupArrayLastArray; +insert into simple_agg_groupArrayLastArray values (1, [7,8]), (2, [7,8]); +select * from simple_agg_groupArrayLastArray order by key, value; +select * from simple_agg_groupArrayLastArray final order by key, value; diff --git a/tests/queries/0_stateless/add-test b/tests/queries/0_stateless/add-test index 2173a4d8cc2..39f6742f71c 100755 --- a/tests/queries/0_stateless/add-test +++ b/tests/queries/0_stateless/add-test @@ -25,4 +25,7 @@ fi set -x touch ${TESTS_PATH}/${NEW_TEST_NO}_${FILENAME}.${FILEEXT} +if [[ $FILEEXT == "sh" ]] ; then + chmod +x ${TESTS_PATH}/${NEW_TEST_NO}_${FILENAME}.${FILEEXT} +fi touch ${TESTS_PATH}/${NEW_TEST_NO}_${FILENAME}.reference diff --git a/tests/queries/0_stateless/filesystem_cache_queries/02226_filesystem_cache_profile_events.sh b/tests/queries/0_stateless/filesystem_cache_queries/02226_filesystem_cache_profile_events.sh deleted file mode 100755 index ab8511d85b3..00000000000 --- a/tests/queries/0_stateless/filesystem_cache_queries/02226_filesystem_cache_profile_events.sh +++ /dev/null @@ -1,85 +0,0 @@ -#!/usr/bin/env bash -# Tags: no-fasttest, no-parallel, no-s3-storage, no-random-settings, no-cpu-aarch64, no-replicated-database - -clickhouse client --multiquery --multiline --query """ -SET max_memory_usage='20G'; -SET enable_filesystem_cache_on_write_operations = 0; - -DROP TABLE IF EXISTS test; -CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='_storagePolicy'; -INSERT INTO test SELECT * FROM generateRandom('key UInt32, value String') LIMIT 10000; - -SET remote_filesystem_read_method='threadpool'; -""" - -query="SELECT * FROM test LIMIT 10" - -query_id=$(clickhouse client --query "select queryID() from ($query) limit 1" 2>&1) - -clickhouse client --multiquery --multiline --query """ -SYSTEM FLUSH LOGS; -SELECT ProfileEvents['CachedReadBufferReadFromSourceBytes'] > 0 as remote_fs_read, - ProfileEvents['CachedReadBufferReadFromCacheBytes'] > 0 as remote_fs_cache_read, - ProfileEvents['CachedReadBufferCacheWriteBytes'] > 0 as remote_fs_read_and_download -FROM system.query_log -WHERE query_id='$query_id' -AND type = 'QueryFinish' -AND current_database = currentDatabase() -ORDER BY query_start_time DESC -LIMIT 1; -""" - -clickhouse client --multiquery --multiline --query """ -set remote_filesystem_read_method = 'read'; -set local_filesystem_read_method = 'pread'; -""" - -query_id=$(clickhouse client --query "select queryID() from ($query) limit 1" 2>&1) - -clickhouse client --multiquery --multiline --query """ -SYSTEM FLUSH LOGS; -SELECT ProfileEvents['CachedReadBufferReadFromSourceBytes'] > 0 as remote_fs_read, - ProfileEvents['CachedReadBufferReadFromCacheBytes'] > 0 as remote_fs_cache_read, - ProfileEvents['CachedReadBufferCacheWriteBytes'] > 0 as remote_fs_read_and_download -FROM system.query_log -WHERE query_id='$query_id' -AND type = 'QueryFinish' -AND current_database = currentDatabase() -ORDER BY query_start_time DESC -LIMIT 1; -""" - - -clickhouse client --multiquery --multiline --query """ -set remote_filesystem_read_method='threadpool'; -""" - -clickhouse client --multiquery --multiline --query """ -SELECT * FROM test WHERE value LIKE '%abc%' ORDER BY value LIMIT 10 FORMAT Null; - -SET enable_filesystem_cache_on_write_operations = 1; - -TRUNCATE TABLE test; -SELECT count() FROM test; - -SYSTEM DROP FILESYSTEM CACHE; - -INSERT INTO test SELECT * FROM generateRandom('key UInt32, value String') LIMIT 10000; -""" - -query_id=$(clickhouse client --query "select queryID() from ($query) limit 1") - -clickhouse client --multiquery --multiline --query """ -SYSTEM FLUSH LOGS; -SELECT ProfileEvents['CachedReadBufferReadFromSourceBytes'] > 0 as remote_fs_read, - ProfileEvents['CachedReadBufferReadFromCacheBytes'] > 0 as remote_fs_cache_read, - ProfileEvents['CachedReadBufferCacheWriteBytes'] > 0 as remote_fs_read_and_download -FROM system.query_log -WHERE query_id='$query_id' -AND type = 'QueryFinish' -AND current_database = currentDatabase() -ORDER BY query_start_time DESC -LIMIT 1; - -DROP TABLE test; -""" diff --git a/tests/queries/0_stateless/filesystem_cache_queries/02240_system_filesystem_cache_table.queries b/tests/queries/0_stateless/filesystem_cache_queries/02240_system_filesystem_cache_table.queries deleted file mode 100644 index 228dccfcb5b..00000000000 --- a/tests/queries/0_stateless/filesystem_cache_queries/02240_system_filesystem_cache_table.queries +++ /dev/null @@ -1,33 +0,0 @@ --- { echo } - -SYSTEM DROP FILESYSTEM CACHE; -SET enable_filesystem_cache_on_write_operations=0; -DROP TABLE IF EXISTS test; -CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='_storagePolicy', min_bytes_for_wide_part = 10485760, compress_marks=false, compress_primary_key=false; -SYSTEM STOP MERGES test; -INSERT INTO test SELECT number, toString(number) FROM numbers(100); - -SELECT * FROM test FORMAT Null; -SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache ORDER BY file_segment_range_end, size; - -SYSTEM DROP FILESYSTEM CACHE; -SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache; -SELECT * FROM test FORMAT Null; -SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache; -SYSTEM DROP FILESYSTEM CACHE; -SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache; - -DROP TABLE IF EXISTS test; -CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='_storagePolicy_3', min_bytes_for_wide_part = 10485760, compress_marks=false, compress_primary_key=false; -INSERT INTO test SELECT number, toString(number) FROM numbers(100); -SELECT * FROM test FORMAT Null; -SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache ORDER BY file_segment_range_end, size; -SELECT * FROM test FORMAT Null; -SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache ORDER BY file_segment_range_end, size; - -SYSTEM DROP FILESYSTEM CACHE; -SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache; -SELECT * FROM test FORMAT Null; -SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache; -SYSTEM DROP FILESYSTEM CACHE; -SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache; diff --git a/tests/queries/0_stateless/filesystem_cache_queries/02241_filesystem_cache_on_write_operations.queries b/tests/queries/0_stateless/filesystem_cache_queries/02241_filesystem_cache_on_write_operations.queries deleted file mode 100644 index bd185942e6c..00000000000 --- a/tests/queries/0_stateless/filesystem_cache_queries/02241_filesystem_cache_on_write_operations.queries +++ /dev/null @@ -1,115 +0,0 @@ --- { echo } - -SET enable_filesystem_cache_on_write_operations=1; - -DROP TABLE IF EXISTS test; -CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='_storagePolicy', min_bytes_for_wide_part = 10485760, compress_marks=false, compress_primary_key=false; -SYSTEM STOP MERGES test; -SYSTEM DROP FILESYSTEM CACHE; -SELECT file_segment_range_begin, file_segment_range_end, size, state -FROM -( - SELECT file_segment_range_begin, file_segment_range_end, size, state, local_path - FROM - ( - SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path - FROM system.remote_data_paths - ) AS data_paths - INNER JOIN - system.filesystem_cache AS caches - ON data_paths.cache_path = caches.cache_path -) -WHERE endsWith(local_path, 'data.bin') -FORMAT Vertical; - -SELECT count() FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path; -SELECT count() FROM system.filesystem_cache; - -INSERT INTO test SELECT number, toString(number) FROM numbers(100); - -SELECT file_segment_range_begin, file_segment_range_end, size, state -FROM -( - SELECT file_segment_range_begin, file_segment_range_end, size, state, local_path - FROM - ( - SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path - FROM system.remote_data_paths - ) AS data_paths - INNER JOIN - system.filesystem_cache AS caches - ON data_paths.cache_path = caches.cache_path -) -WHERE endsWith(local_path, 'data.bin') -FORMAT Vertical; - -SELECT count() FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path; -SELECT count() FROM system.filesystem_cache; - -SELECT count() FROM system.filesystem_cache WHERE cache_hits > 0; - -SELECT * FROM test FORMAT Null; -SELECT count() FROM system.filesystem_cache WHERE cache_hits > 0; - -SELECT * FROM test FORMAT Null; -SELECT count() FROM system.filesystem_cache WHERE cache_hits > 0; - -SELECT count() size FROM system.filesystem_cache; - -SYSTEM DROP FILESYSTEM CACHE; - -INSERT INTO test SELECT number, toString(number) FROM numbers(100, 200); - -SELECT file_segment_range_begin, file_segment_range_end, size, state -FROM -( - SELECT file_segment_range_begin, file_segment_range_end, size, state, local_path - FROM - ( - SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path - FROM system.remote_data_paths - ) AS data_paths - INNER JOIN - system.filesystem_cache AS caches - ON data_paths.cache_path = caches.cache_path -) -WHERE endsWith(local_path, 'data.bin') -FORMAT Vertical; - -SELECT count() FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path; -SELECT count() FROM system.filesystem_cache; - -SELECT count() FROM system.filesystem_cache; -INSERT INTO test SELECT number, toString(number) FROM numbers(100) SETTINGS enable_filesystem_cache_on_write_operations=0; -SELECT count() FROM system.filesystem_cache; - -INSERT INTO test SELECT number, toString(number) FROM numbers(100); -INSERT INTO test SELECT number, toString(number) FROM numbers(300, 10000); -SELECT count() FROM system.filesystem_cache; - -SYSTEM START MERGES test; - -OPTIMIZE TABLE test FINAL; -SELECT count() FROM system.filesystem_cache; - -SET mutations_sync=2; -ALTER TABLE test UPDATE value = 'kek' WHERE key = 100; -SELECT count() FROM system.filesystem_cache; - -INSERT INTO test SELECT number, toString(number) FROM numbers(5000000); -SYSTEM FLUSH LOGS; -SELECT - query, ProfileEvents['RemoteFSReadBytes'] > 0 as remote_fs_read -FROM - system.query_log -WHERE - query LIKE 'SELECT number, toString(number) FROM numbers(5000000)%' - AND type = 'QueryFinish' - AND current_database = currentDatabase() -ORDER BY - query_start_time - DESC -LIMIT 1; - -SELECT count() FROM test; -SELECT count() FROM test WHERE value LIKE '%010%'; diff --git a/tests/queries/0_stateless/filesystem_cache_queries/02242_system_filesystem_cache_log_table.queries b/tests/queries/0_stateless/filesystem_cache_queries/02242_system_filesystem_cache_log_table.queries deleted file mode 100644 index 56a8710cc93..00000000000 --- a/tests/queries/0_stateless/filesystem_cache_queries/02242_system_filesystem_cache_log_table.queries +++ /dev/null @@ -1,19 +0,0 @@ --- { echo } - -SYSTEM DROP FILESYSTEM CACHE; -SET enable_filesystem_cache_log=1; -SET enable_filesystem_cache_on_write_operations=0; - -DROP TABLE IF EXISTS test; -DROP TABLE IF EXISTS system.filesystem_cache_log; -CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='_storagePolicy', min_bytes_for_wide_part = 10485760, compress_marks=false, compress_primary_key=false; -SYSTEM STOP MERGES test; -INSERT INTO test SELECT number, toString(number) FROM numbers(100000); - -SELECT 2240, '_storagePolicy', * FROM test FORMAT Null; -SYSTEM FLUSH LOGS; -SELECT file_segment_range, read_type FROM system.filesystem_cache_log WHERE query_id = (SELECT query_id from system.query_log where query LIKE '%SELECT 2240%_storagePolicy%' AND current_database = currentDatabase() AND type = 'QueryFinish' ORDER BY event_time desc LIMIT 1) ORDER BY file_segment_range, read_type; - -SELECT 2241, '_storagePolicy', * FROM test FORMAT Null; -SYSTEM FLUSH LOGS; -SELECT file_segment_range, read_type FROM system.filesystem_cache_log WHERE query_id = (SELECT query_id from system.query_log where query LIKE '%SELECT 2241%_storagePolicy%' AND current_database = currentDatabase() AND type = 'QueryFinish' ORDER BY event_time desc LIMIT 1) ORDER BY file_segment_range, read_type; diff --git a/tests/queries/0_stateless/filesystem_cache_queries/02286_drop_filesystem_cache.queries b/tests/queries/0_stateless/filesystem_cache_queries/02286_drop_filesystem_cache.queries deleted file mode 100644 index 96774db32ed..00000000000 --- a/tests/queries/0_stateless/filesystem_cache_queries/02286_drop_filesystem_cache.queries +++ /dev/null @@ -1,71 +0,0 @@ --- { echo } - -SET enable_filesystem_cache_on_write_operations=0; - -DROP TABLE IF EXISTS test; - -CREATE TABLE test (key UInt32, value String) -Engine=MergeTree() -ORDER BY key -SETTINGS storage_policy='_storagePolicy', min_bytes_for_wide_part = 10485760; - -SYSTEM STOP MERGES; -SYSTEM DROP FILESYSTEM CACHE; - -SELECT count() FROM system.filesystem_cache; -INSERT INTO test SELECT number, toString(number) FROM numbers(100); - -SELECT * FROM test FORMAT Null; -SELECT count() FROM system.filesystem_cache; - -SYSTEM DROP FILESYSTEM CACHE; -SELECT count() FROM system.filesystem_cache; - -SELECT * FROM test FORMAT Null; -SELECT count() FROM system.filesystem_cache; - -SYSTEM DROP FILESYSTEM CACHE './data'; -- { serverError 36 } -SELECT count() FROM system.filesystem_cache; - -SELECT * FROM test FORMAT Null; -SELECT count() FROM system.filesystem_cache; -SELECT count() -FROM ( - SELECT - arrayJoin(cache_paths) AS cache_path, - local_path, - remote_path - FROM - system.remote_data_paths - ) AS data_paths -INNER JOIN system.filesystem_cache AS caches -ON data_paths.cache_path = caches.cache_path; - -DROP TABLE test NO DELAY; -SELECT count() FROM system.filesystem_cache; -SELECT cache_path FROM system.filesystem_cache; -SELECT cache_path, local_path -FROM ( - SELECT - arrayJoin(cache_paths) AS cache_path, - local_path, - remote_path - FROM - system.remote_data_paths - ) AS data_paths -INNER JOIN system.filesystem_cache AS caches -ON data_paths.cache_path = caches.cache_path; - -DROP TABLE IF EXISTS test2; - -CREATE TABLE test2 (key UInt32, value String) -Engine=MergeTree() -ORDER BY key -SETTINGS storage_policy='_storagePolicy_2', min_bytes_for_wide_part = 10485760; - -INSERT INTO test2 SELECT number, toString(number) FROM numbers(100); -SELECT * FROM test2 FORMAT Null; -SELECT count() FROM system.filesystem_cache; - -SYSTEM DROP FILESYSTEM CACHE '_storagePolicy_2/'; -SELECT count() FROM system.filesystem_cache; diff --git a/tests/queries/0_stateless/filesystem_cache_queries/02313_filesystem_cache_seeks.queries b/tests/queries/0_stateless/filesystem_cache_queries/02313_filesystem_cache_seeks.queries deleted file mode 100644 index 7f343fb83bd..00000000000 --- a/tests/queries/0_stateless/filesystem_cache_queries/02313_filesystem_cache_seeks.queries +++ /dev/null @@ -1,24 +0,0 @@ -SYSTEM DROP FILESYSTEM CACHE; -SET send_logs_level = 'fatal'; -- Ignore retriable errors like "AWSClient: Failed to make request" - -DROP TABLE IF EXISTS test_02313; -CREATE TABLE test_02313 (id Int32, val String) -ENGINE = MergeTree() -ORDER BY tuple() -SETTINGS storage_policy = '_storagePolicy'; - -INSERT INTO test_02313 - SELECT * FROM - generateRandom('id Int32, val String') - LIMIT 100000 -SETTINGS enable_filesystem_cache_on_write_operations = 0; - -SELECT * FROM test_02313 WHERE val LIKE concat('%', randomPrintableASCII(3), '%') FORMAT Null; -SELECT * FROM test_02313 WHERE val LIKE concat('%', randomPrintableASCII(3), '%') FORMAT Null; -SELECT * FROM test_02313 WHERE val LIKE concat('%', randomPrintableASCII(3), '%') FORMAT Null; -SELECT * FROM test_02313 WHERE val LIKE concat('%', randomPrintableASCII(3), '%') FORMAT Null; -SELECT * FROM test_02313 WHERE val LIKE concat('%', randomPrintableASCII(3), '%') FORMAT Null; -SELECT * FROM test_02313 WHERE val LIKE concat('%', randomPrintableASCII(3), '%') FORMAT Null; -SELECT * FROM test_02313 WHERE val LIKE concat('%', randomPrintableASCII(3), '%') FORMAT Null; - -DROP TABLE test_02313; diff --git a/tests/queries/0_stateless/format_schemas/02482_list_of_structs.capnp b/tests/queries/0_stateless/format_schemas/02482_list_of_structs.capnp new file mode 100644 index 00000000000..b203b5b1bdf --- /dev/null +++ b/tests/queries/0_stateless/format_schemas/02482_list_of_structs.capnp @@ -0,0 +1,11 @@ +@0xb6ecde1cd54a101d; + +struct Nested { + nested @0 :List(MyField); +} + +struct MyField { + x @0 :Int64; + y @1 :Int64; +} + diff --git a/tests/queries/0_stateless/format_schemas/02483_decimals.capnp b/tests/queries/0_stateless/format_schemas/02483_decimals.capnp new file mode 100644 index 00000000000..eff4d488420 --- /dev/null +++ b/tests/queries/0_stateless/format_schemas/02483_decimals.capnp @@ -0,0 +1,7 @@ +@0xb6acde1cd54a101d; + +struct Message { + decimal64 @0 :Int64; + decimal32 @1 :Int32; +} + diff --git a/tests/queries/1_stateful/00047_bar.reference b/tests/queries/1_stateful/00047_bar.reference index c038f59946e..86f7ca3a9b6 100644 --- a/tests/queries/1_stateful/00047_bar.reference +++ b/tests/queries/1_stateful/00047_bar.reference @@ -2,21 +2,21 @@ 732797 475698 ████████████████████████████████████████████████████████████████████████▋ 598875 337212 ███████████████████████████████████████████████████▌ 792887 252197 ██████████████████████████████████████▌ -3807842 196036 █████████████████████████████▊ +3807842 196036 █████████████████████████████▉ 25703952 147211 ██████████████████████▌ -716829 90109 █████████████▋ +716829 90109 █████████████▊ 59183 85379 █████████████ -33010362 77807 ███████████▊ -800784 77492 ███████████▋ +33010362 77807 ███████████▉ +800784 77492 ███████████▊ 20810645 73213 ███████████▏ 25843850 68945 ██████████▌ 23447120 67570 ██████████▎ -14739804 64174 █████████▋ +14739804 64174 █████████▊ 32077710 60456 █████████▏ -22446879 58389 ████████▊ +22446879 58389 ████████▉ 170282 57017 ████████▋ 11482817 52345 ████████ -63469 52142 ███████▊ +63469 52142 ███████▉ 29103473 47758 ███████▎ 10136747 44080 ██████▋ 27528801 43395 ██████▋ @@ -27,12 +27,12 @@ 28600281 32776 █████ 32046685 28788 ████▍ 10130880 26603 ████ -8676831 25733 ███▊ -53230 25595 ███▊ -20271226 25585 ███▊ -17420663 25496 ███▊ -631207 25270 ███▋ -633130 24744 ███▋ +8676831 25733 ███▉ +53230 25595 ███▉ +20271226 25585 ███▉ +17420663 25496 ███▉ +631207 25270 ███▊ +633130 24744 ███▊ 14324015 23349 ███▌ 8537965 21270 ███▎ 11285298 20825 ███▏ @@ -41,9 +41,9 @@ 16368233 19897 ███ 81602 19724 ███ 62896 19717 ███ -12967664 19402 ██▊ -15996597 18557 ██▋ -4379238 18370 ██▋ +12967664 19402 ██▉ +15996597 18557 ██▊ +4379238 18370 ██▊ 90982 17443 ██▋ 18211045 17390 ██▋ 14625884 17302 ██▋ @@ -68,19 +68,19 @@ 125776 13308 ██ 11312316 13181 ██ 32667326 13181 ██ -28628973 12922 █▊ -122804 12520 █▊ -12322758 12352 █▊ -1301819 12283 █▊ -10769545 12183 █▋ -21566939 12170 █▋ -28905364 12158 █▋ -4250765 12049 █▋ -15009727 11818 █▋ -12761932 11733 █▋ -26995888 11658 █▋ -12759346 11514 █▋ -1507911 11452 █▋ +28628973 12922 █▉ +122804 12520 █▉ +12322758 12352 █▉ +1301819 12283 █▉ +10769545 12183 █▊ +21566939 12170 █▊ +28905364 12158 █▊ +4250765 12049 █▊ +15009727 11818 █▊ +12761932 11733 █▊ +26995888 11658 █▊ +12759346 11514 █▊ +1507911 11452 █▊ 968488 11444 █▋ 15736172 11358 █▋ 54310 11193 █▋ @@ -102,21 +102,21 @@ 732797 475698 ████████████████████████████████████████████████████████████████████████▋ 598875 337212 ███████████████████████████████████████████████████▌ 792887 252197 ██████████████████████████████████████▌ -3807842 196036 █████████████████████████████▊ +3807842 196036 █████████████████████████████▉ 25703952 147211 ██████████████████████▌ -716829 90109 █████████████▋ +716829 90109 █████████████▊ 59183 85379 █████████████ -33010362 77807 ███████████▊ -800784 77492 ███████████▋ +33010362 77807 ███████████▉ +800784 77492 ███████████▊ 20810645 73213 ███████████▏ 25843850 68945 ██████████▌ 23447120 67570 ██████████▎ -14739804 64174 █████████▋ +14739804 64174 █████████▊ 32077710 60456 █████████▏ -22446879 58389 ████████▊ +22446879 58389 ████████▉ 170282 57017 ████████▋ 11482817 52345 ████████ -63469 52142 ███████▊ +63469 52142 ███████▉ 29103473 47758 ███████▎ 10136747 44080 ██████▋ 27528801 43395 ██████▋ @@ -127,12 +127,12 @@ 28600281 32776 █████ 32046685 28788 ████▍ 10130880 26603 ████ -8676831 25733 ███▊ -53230 25595 ███▊ -20271226 25585 ███▊ -17420663 25496 ███▊ -631207 25270 ███▋ -633130 24744 ███▋ +8676831 25733 ███▉ +53230 25595 ███▉ +20271226 25585 ███▉ +17420663 25496 ███▉ +631207 25270 ███▊ +633130 24744 ███▊ 14324015 23349 ███▌ 8537965 21270 ███▎ 11285298 20825 ███▏ @@ -141,9 +141,9 @@ 16368233 19897 ███ 81602 19724 ███ 62896 19717 ███ -12967664 19402 ██▊ -15996597 18557 ██▋ -4379238 18370 ██▋ +12967664 19402 ██▉ +15996597 18557 ██▊ +4379238 18370 ██▊ 90982 17443 ██▋ 18211045 17390 ██▋ 14625884 17302 ██▋ @@ -168,19 +168,19 @@ 125776 13308 ██ 11312316 13181 ██ 32667326 13181 ██ -28628973 12922 █▊ -122804 12520 █▊ -12322758 12352 █▊ -1301819 12283 █▊ -10769545 12183 █▋ -21566939 12170 █▋ -28905364 12158 █▋ -4250765 12049 █▋ -15009727 11818 █▋ -12761932 11733 █▋ -26995888 11658 █▋ -12759346 11514 █▋ -1507911 11452 █▋ +28628973 12922 █▉ +122804 12520 █▉ +12322758 12352 █▉ +1301819 12283 █▉ +10769545 12183 █▊ +21566939 12170 █▊ +28905364 12158 █▊ +4250765 12049 █▊ +15009727 11818 █▊ +12761932 11733 █▊ +26995888 11658 █▊ +12759346 11514 █▊ +1507911 11452 █▊ 968488 11444 █▋ 15736172 11358 █▋ 54310 11193 █▋ diff --git a/tests/queries/1_stateful/00062_loyalty.reference b/tests/queries/1_stateful/00062_loyalty.reference index 605e4881dd4..f6451faa815 100644 --- a/tests/queries/1_stateful/00062_loyalty.reference +++ b/tests/queries/1_stateful/00062_loyalty.reference @@ -1,12 +1,12 @@ -10 5604 ███████████████████████████████████████████████████████████████████████████████▎ --9 603 ██████████████████████████████████████████████████████████▊ +-9 603 ██████████████████████████████████████████████████████████▉ -8 236 ██████████████████████████████████████████████████▎ -7 133 █████████████████████████████████████████████ -6 123 ████████████████████████████████████████████▎ --5 105 ██████████████████████████████████████████▊ +-5 105 ██████████████████████████████████████████▉ 5 82 ████████████████████████████████████████▋ 6 91 █████████████████████████████████████████▌ 7 102 ██████████████████████████████████████████▌ 8 156 ██████████████████████████████████████████████▍ 9 222 █████████████████████████████████████████████████▋ -10 4291 ████████████████████████████████████████████████████████████████████████████▊ +10 4291 ████████████████████████████████████████████████████████████████████████████▉ diff --git a/tests/queries/1_stateful/00063_loyalty_joins.reference b/tests/queries/1_stateful/00063_loyalty_joins.reference index e4c3619bf5a..f925b457c6a 100644 --- a/tests/queries/1_stateful/00063_loyalty_joins.reference +++ b/tests/queries/1_stateful/00063_loyalty_joins.reference @@ -37,15 +37,15 @@ 8 74083 9 145771 10 1244506 --10 2932018 ███████████████████████████████████████████████████████████████████████████████▊ +-10 2932018 ███████████████████████████████████████████████████████████████████████████████▉ -9 472052 ██████████████████████████████████████████████████████████████████████ -8 136048 ███████████████████████████████████████████████████████████████▍ -7 73688 ████████████████████████████████████████████████████████████ -6 56766 ██████████████████████████████████████████████████████████▋ -5 55691 ██████████████████████████████████████████████████████████▌ 5 47082 █████████████████████████████████████████████████████████▋ -6 32860 ███████████████████████████████████████████████████████▋ +6 32860 ███████████████████████████████████████████████████████▊ 7 52819 ██████████████████████████████████████████████████████████▎ 8 74083 ████████████████████████████████████████████████████████████▏ -9 145771 ███████████████████████████████████████████████████████████████▋ +9 145771 ███████████████████████████████████████████████████████████████▊ 10 1244506 ███████████████████████████████████████████████████████████████████████████▎ diff --git a/utils/CMakeLists.txt b/utils/CMakeLists.txt index 70c32c67063..bb9d4c88fa1 100644 --- a/utils/CMakeLists.txt +++ b/utils/CMakeLists.txt @@ -2,11 +2,7 @@ if (USE_CLANG_TIDY) set (CMAKE_CXX_CLANG_TIDY "${CLANG_TIDY_PATH}") endif () -if(USE_STATIC_LIBRARIES) - set(MAX_LINKER_MEMORY 3500) -else() - set(MAX_LINKER_MEMORY 2500) -endif() +set(MAX_LINKER_MEMORY 3500) include(../cmake/limit_jobs.cmake) if (ENABLE_CLICKHOUSE_SELF_EXTRACTING) diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt index 40a8af9b5b6..0bf8023d698 100644 --- a/utils/check-style/aspell-ignore/en/aspell-dict.txt +++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt @@ -33,6 +33,7 @@ CustomSeparatedWithNames CustomSeparatedWithNamesAndTypes DBMSs DateTime +DateTimes DockerHub Doxygen Encodings @@ -55,6 +56,7 @@ IPv IntN Integrations JSONAsString +JSONAsObject JSONColumns JSONColumnsWithMetadata JSONCompact @@ -171,6 +173,7 @@ Werror Woboq WriteBuffer WriteBuffers +WithNamesAndTypes XCode YAML YYYY @@ -247,6 +250,7 @@ datafiles dataset datasets datetime +datetimes dbms ddl deallocation @@ -361,6 +365,7 @@ mysqldump mysqljs noop nullable +nullability num obfuscator odbc diff --git a/utils/check-style/check-style b/utils/check-style/check-style index 5e5631e7e58..ad34f5e82e1 100755 --- a/utils/check-style/check-style +++ b/utils/check-style/check-style @@ -279,6 +279,8 @@ std_cerr_cout_excludes=( /examples/ /tests/ _fuzzer + # DUMP() + base/base/iostream_debug_helpers.h # OK src/Common/ProgressIndication.cpp # only under #ifdef DBMS_HASH_MAP_DEBUG_RESIZES, that is used only in tests @@ -292,6 +294,7 @@ std_cerr_cout_excludes=( # IProcessor::dump() src/Processors/IProcessor.cpp src/Client/ClientBase.cpp + src/Client/LineReader.cpp src/Client/QueryFuzzer.cpp src/Client/Suggest.cpp src/Bridge/IBridge.cpp @@ -299,7 +302,7 @@ std_cerr_cout_excludes=( src/Loggers/Loggers.cpp ) sources_with_std_cerr_cout=( $( - find $ROOT_PATH/src -name '*.h' -or -name '*.cpp' | \ + find $ROOT_PATH/{src,base} -name '*.h' -or -name '*.cpp' | \ grep -vP $EXCLUDE_DIRS | \ grep -F -v $(printf -- "-e %s " "${std_cerr_cout_excludes[@]}") | \ xargs grep -F --with-filename -e std::cerr -e std::cout | cut -d: -f1 | sort -u diff --git a/utils/check-style/check-submodules b/utils/check-style/check-submodules index 815e6c13c0f..1d15bac9d69 100755 --- a/utils/check-style/check-submodules +++ b/utils/check-style/check-submodules @@ -12,9 +12,9 @@ cd "$GIT_ROOT" # Remove keys for submodule.*.path parameters, the values are separated by \0 # and check if the directory exists git config --file .gitmodules --null --get-regexp path | sed -z 's|.*\n||' | \ - xargs -P100 -0 --no-run-if-empty -I{} bash -c 'if ! test -d {}; then echo Directory for submodule {} is not found; exit 1; fi' 2>&1 + xargs -P100 -0 --no-run-if-empty -I{} bash -c 'if ! test -d '"'{}'"'; then echo Directory for submodule {} is not found; exit 1; fi' 2>&1 # And check that the submodule is fine git config --file .gitmodules --null --get-regexp path | sed -z 's|.*\n||' | \ - xargs -P100 -0 --no-run-if-empty -I{} git submodule status -q {} 2>&1 + xargs -P100 -0 --no-run-if-empty -I{} git submodule status -q '{}' 2>&1 diff --git a/utils/zookeeper-cli/CMakeLists.txt b/utils/zookeeper-cli/CMakeLists.txt index edccb69755e..be8cf81320c 100644 --- a/utils/zookeeper-cli/CMakeLists.txt +++ b/utils/zookeeper-cli/CMakeLists.txt @@ -1,2 +1,4 @@ -clickhouse_add_executable(clickhouse-zookeeper-cli zookeeper-cli.cpp) +clickhouse_add_executable(clickhouse-zookeeper-cli + zookeeper-cli.cpp + ${ClickHouse_SOURCE_DIR}/src/Client/LineReader.cpp) target_link_libraries(clickhouse-zookeeper-cli PRIVATE clickhouse_common_zookeeper_no_log) diff --git a/utils/zookeeper-cli/zookeeper-cli.cpp b/utils/zookeeper-cli/zookeeper-cli.cpp index bfcdb0a90de..fe11c66ea9c 100644 --- a/utils/zookeeper-cli/zookeeper-cli.cpp +++ b/utils/zookeeper-cli/zookeeper-cli.cpp @@ -3,7 +3,7 @@ #include #include #include -#include +#include #include #include @@ -70,7 +70,7 @@ int main(int argc, char ** argv) Poco::Logger::root().setLevel("trace"); zkutil::ZooKeeper zk{zkutil::ZooKeeperArgs(argv[1])}; - LineReader lr({}, false, {"\\"}, {}); + DB::LineReader lr({}, false, {"\\"}, {}); do {