diff --git a/.github/workflows/backport_branches.yml b/.github/workflows/backport_branches.yml index c90df6e57b7..cbd3bd7bec4 100644 --- a/.github/workflows/backport_branches.yml +++ b/.github/workflows/backport_branches.yml @@ -12,11 +12,10 @@ jobs: PythonUnitTests: runs-on: [self-hosted, style-checker] steps: - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Python unit tests run: | cd "$GITHUB_WORKSPACE/tests/ci" @@ -24,34 +23,32 @@ jobs: DockerHubPushAarch64: runs-on: [self-hosted, style-checker-aarch64] steps: - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Images check run: | cd "$GITHUB_WORKSPACE/tests/ci" python3 docker_images_check.py --suffix aarch64 - name: Upload images files to artifacts - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: changed_images_aarch64 path: ${{ runner.temp }}/docker_images_check/changed_images_aarch64.json DockerHubPushAmd64: runs-on: [self-hosted, style-checker] steps: - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Images check run: | cd "$GITHUB_WORKSPACE/tests/ci" python3 docker_images_check.py --suffix amd64 - name: Upload images files to artifacts - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: changed_images_amd64 path: ${{ runner.temp }}/docker_images_check/changed_images_amd64.json @@ -59,18 +56,17 @@ jobs: needs: [DockerHubPushAmd64, DockerHubPushAarch64] runs-on: [self-hosted, style-checker] steps: - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Download changed aarch64 images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images_aarch64 path: ${{ runner.temp }} - name: Download changed amd64 images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images_amd64 path: ${{ runner.temp }} @@ -79,7 +75,7 @@ jobs: cd "$GITHUB_WORKSPACE/tests/ci" python3 docker_manifests_merge.py --suffix amd64 --suffix aarch64 - name: Upload images files to artifacts - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: changed_images path: ${{ runner.temp }}/changed_images.json @@ -94,13 +90,12 @@ jobs: REPO_COPY=${{runner.temp}}/compatibility_check/ClickHouse REPORTS_PATH=${{runner.temp}}/reports_dir EOF - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - name: CompatibilityCheck @@ -132,28 +127,25 @@ jobs: BUILD_NAME=package_release EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 with: + clear-repository: true + submodules: true fetch-depth: 0 # For a proper version and performance artifacts - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -177,28 +169,25 @@ jobs: BUILD_NAME=package_aarch64 EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 with: + clear-repository: true + submodules: true fetch-depth: 0 # For a proper version and performance artifacts - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -222,26 +211,24 @@ jobs: BUILD_NAME=package_asan EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + submodules: true - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -265,26 +252,24 @@ jobs: BUILD_NAME=package_tsan EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + submodules: true - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -308,26 +293,24 @@ jobs: BUILD_NAME=package_debug EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + submodules: true - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -351,28 +334,25 @@ jobs: BUILD_NAME=binary_darwin EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 with: + clear-repository: true + submodules: true fetch-depth: 0 # otherwise we will have no info about contributors - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -396,28 +376,25 @@ jobs: BUILD_NAME=binary_darwin_aarch64 EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 with: + clear-repository: true + submodules: true fetch-depth: 0 # otherwise we will have no info about contributors - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -436,12 +413,10 @@ jobs: - BuilderDebAarch64 runs-on: [self-hosted, style-checker] steps: - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 with: + clear-repository: true fetch-depth: 0 # It MUST BE THE SAME for all dependencies and the job itself - name: Check docker clickhouse/clickhouse-server building run: | @@ -477,14 +452,13 @@ jobs: NEEDS_DATA_PATH=${{runner.temp}}/needs.json EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Report Builder run: | sudo rm -fr "$TEMP_PATH" @@ -516,14 +490,13 @@ jobs: NEEDS_DATA_PATH=${{runner.temp}}/needs.json EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Report Builder run: | sudo rm -fr "$TEMP_PATH" @@ -556,14 +529,13 @@ jobs: KILL_TIMEOUT=10800 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -594,14 +566,13 @@ jobs: KILL_TIMEOUT=3600 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -635,14 +606,13 @@ jobs: REPO_COPY=${{runner.temp}}/stress_thread/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Stress test run: | sudo rm -fr "$TEMP_PATH" @@ -672,14 +642,13 @@ jobs: REPO_COPY=${{runner.temp}}/integration_tests_release/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Integration test run: | sudo rm -fr "$TEMP_PATH" @@ -706,11 +675,10 @@ jobs: - CompatibilityCheck runs-on: [self-hosted, style-checker] steps: - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Finish label run: | cd "$GITHUB_WORKSPACE/tests/ci" diff --git a/.github/workflows/cherry_pick.yml b/.github/workflows/cherry_pick.yml index 3e6f9e76c56..065e584182b 100644 --- a/.github/workflows/cherry_pick.yml +++ b/.github/workflows/cherry_pick.yml @@ -28,8 +28,9 @@ jobs: REPO_TEAM=core EOF - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 with: + clear-repository: true token: ${{secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN}} fetch-depth: 0 - name: Cherry pick diff --git a/.github/workflows/docs_check.yml b/.github/workflows/docs_check.yml index 7a15e77becb..a513eb9216d 100644 --- a/.github/workflows/docs_check.yml +++ b/.github/workflows/docs_check.yml @@ -16,15 +16,15 @@ on: # yamllint disable-line rule:truthy - 'docker/docs/**' - 'docs/**' - 'website/**' + - 'utils/check-style/aspell-ignore/**' jobs: CheckLabels: runs-on: [self-hosted, style-checker] steps: - - name: Clear repository - run: | - sudo rm -rf "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Labels check run: | cd "$GITHUB_WORKSPACE/tests/ci" @@ -33,17 +33,16 @@ jobs: needs: CheckLabels runs-on: [self-hosted, style-checker-aarch64] steps: - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Images check run: | cd "$GITHUB_WORKSPACE/tests/ci" python3 docker_images_check.py --suffix aarch64 - name: Upload images files to artifacts - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: changed_images_aarch64 path: ${{ runner.temp }}/docker_images_check/changed_images_aarch64.json @@ -51,17 +50,16 @@ jobs: needs: CheckLabels runs-on: [self-hosted, style-checker] steps: - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Images check run: | cd "$GITHUB_WORKSPACE/tests/ci" python3 docker_images_check.py --suffix amd64 - name: Upload images files to artifacts - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: changed_images_amd64 path: ${{ runner.temp }}/docker_images_check/changed_images_amd64.json @@ -69,18 +67,17 @@ jobs: needs: [DockerHubPushAmd64, DockerHubPushAarch64] runs-on: [self-hosted, style-checker] steps: - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Download changed aarch64 images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images_aarch64 path: ${{ runner.temp }} - name: Download changed amd64 images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images_amd64 path: ${{ runner.temp }} @@ -89,7 +86,7 @@ jobs: cd "$GITHUB_WORKSPACE/tests/ci" python3 docker_manifests_merge.py --suffix amd64 --suffix aarch64 - name: Upload images files to artifacts - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: changed_images path: ${{ runner.temp }}/changed_images.json @@ -109,15 +106,14 @@ jobs: - name: Download changed images # even if artifact does not exist, e.g. on `do not test` label or failed Docker job continue-on-error: true - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.TEMP_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Style Check run: | cd "$GITHUB_WORKSPACE/tests/ci" @@ -139,15 +135,14 @@ jobs: REPO_COPY=${{runner.temp}}/docs_check/ClickHouse EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.TEMP_PATH }} - - name: Clear repository - run: | - sudo rm -rf "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Docs Check run: | cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" @@ -166,11 +161,10 @@ jobs: - DocsCheck runs-on: [self-hosted, style-checker] steps: - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Finish label run: | cd "$GITHUB_WORKSPACE/tests/ci" diff --git a/.github/workflows/docs_release.yml b/.github/workflows/docs_release.yml index da67edd4aa1..fc4b9d88c3e 100644 --- a/.github/workflows/docs_release.yml +++ b/.github/workflows/docs_release.yml @@ -17,39 +17,38 @@ concurrency: - 'docs/**' - 'utils/list-versions/version_date.tsv' - 'website/**' + - 'utils/check-style/aspell-ignore/**' workflow_dispatch: jobs: DockerHubPushAarch64: runs-on: [self-hosted, style-checker-aarch64] steps: - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Images check run: | cd "$GITHUB_WORKSPACE/tests/ci" python3 docker_images_check.py --suffix aarch64 - name: Upload images files to artifacts - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: changed_images_aarch64 path: ${{ runner.temp }}/docker_images_check/changed_images_aarch64.json DockerHubPushAmd64: runs-on: [self-hosted, style-checker] steps: - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Images check run: | cd "$GITHUB_WORKSPACE/tests/ci" python3 docker_images_check.py --suffix amd64 - name: Upload images files to artifacts - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: changed_images_amd64 path: ${{ runner.temp }}/docker_images_check/changed_images_amd64.json @@ -57,18 +56,17 @@ jobs: needs: [DockerHubPushAmd64, DockerHubPushAarch64] runs-on: [self-hosted, style-checker] steps: - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Download changed aarch64 images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images_aarch64 path: ${{ runner.temp }} - name: Download changed amd64 images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images_amd64 path: ${{ runner.temp }} @@ -77,7 +75,7 @@ jobs: cd "$GITHUB_WORKSPACE/tests/ci" python3 docker_manifests_merge.py --suffix amd64 --suffix aarch64 - name: Upload images files to artifacts - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: changed_images path: ${{ runner.temp }}/changed_images.json @@ -96,13 +94,12 @@ jobs: ${{secrets.ROBOT_CLICKHOUSE_SSH_KEY}} RCSK EOF - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.TEMP_PATH }} diff --git a/.github/workflows/jepsen.yml b/.github/workflows/jepsen.yml index 5afc066065e..e67df15c4d3 100644 --- a/.github/workflows/jepsen.yml +++ b/.github/workflows/jepsen.yml @@ -19,12 +19,10 @@ jobs: TEMP_PATH=${{runner.temp}}/keeper_jepsen REPO_COPY=${{runner.temp}}/keeper_jepsen/ClickHouse EOF - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 with: + clear-repository: true fetch-depth: 0 - name: Jepsen Test run: | @@ -50,12 +48,10 @@ jobs: # TEMP_PATH=${{runner.temp}}/server_jepsen # REPO_COPY=${{runner.temp}}/server_jepsen/ClickHouse # EOF - # - name: Clear repository - # run: | - # sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" # - name: Check out repository code - # uses: actions/checkout@v2 + # uses: ClickHouse/checkout@v1 # with: + # clear-repository: true # fetch-depth: 0 # - name: Jepsen Test # run: | diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml index f3d672136ef..dc81755ec7b 100644 --- a/.github/workflows/master.yml +++ b/.github/workflows/master.yml @@ -12,11 +12,10 @@ jobs: PythonUnitTests: runs-on: [self-hosted, style-checker] steps: - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Python unit tests run: | cd "$GITHUB_WORKSPACE/tests/ci" @@ -24,34 +23,32 @@ jobs: DockerHubPushAarch64: runs-on: [self-hosted, style-checker-aarch64] steps: - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Images check run: | cd "$GITHUB_WORKSPACE/tests/ci" python3 docker_images_check.py --suffix aarch64 - name: Upload images files to artifacts - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: changed_images_aarch64 path: ${{ runner.temp }}/docker_images_check/changed_images_aarch64.json DockerHubPushAmd64: runs-on: [self-hosted, style-checker] steps: - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Images check run: | cd "$GITHUB_WORKSPACE/tests/ci" python3 docker_images_check.py --suffix amd64 - name: Upload images files to artifacts - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: changed_images_amd64 path: ${{ runner.temp }}/docker_images_check/changed_images_amd64.json @@ -59,18 +56,17 @@ jobs: needs: [DockerHubPushAmd64, DockerHubPushAarch64, PythonUnitTests] runs-on: [self-hosted, style-checker] steps: - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Download changed aarch64 images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images_aarch64 path: ${{ runner.temp }} - name: Download changed amd64 images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images_amd64 path: ${{ runner.temp }} @@ -79,7 +75,7 @@ jobs: cd "$GITHUB_WORKSPACE/tests/ci" python3 docker_manifests_merge.py --suffix amd64 --suffix aarch64 - name: Upload images files to artifacts - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: changed_images path: ${{ runner.temp }}/changed_images.json @@ -96,15 +92,14 @@ jobs: - name: Download changed images # even if artifact does not exist, e.g. on `do not test` label or failed Docker job continue-on-error: true - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.TEMP_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Style Check run: | cd "$GITHUB_WORKSPACE/tests/ci" @@ -126,13 +121,12 @@ jobs: REPO_COPY=${{runner.temp}}/compatibility_check/ClickHouse REPORTS_PATH=${{runner.temp}}/reports_dir EOF - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - name: CompatibilityCheck @@ -158,13 +152,12 @@ jobs: REPO_COPY=${{runner.temp}}/split_build_check/ClickHouse REPORTS_PATH=${{runner.temp}}/reports_dir EOF - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - name: Shared build check @@ -196,28 +189,25 @@ jobs: BUILD_NAME=package_release EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 with: + clear-repository: true + submodules: true fetch-depth: 0 # For a proper version and performance artifacts - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -241,24 +231,24 @@ jobs: BUILD_NAME=package_aarch64 EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ runner.temp }}/images_path - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 with: + clear-repository: true + submodules: true fetch-depth: 0 # For a proper version and performance artifacts - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ runner.temp }}/build_check/${{ env.BUILD_URLS }}.json @@ -282,28 +272,25 @@ jobs: BUILD_NAME=binary_release EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 with: + clear-repository: true + submodules: true fetch-depth: 0 # otherwise we will have no info about contributors - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -327,26 +314,24 @@ jobs: BUILD_NAME=package_asan EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + submodules: true - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -370,26 +355,24 @@ jobs: BUILD_NAME=package_ubsan EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + submodules: true - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -413,26 +396,24 @@ jobs: BUILD_NAME=package_tsan EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + submodules: true - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -456,26 +437,24 @@ jobs: BUILD_NAME=package_msan EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + submodules: true - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -499,26 +478,24 @@ jobs: BUILD_NAME=package_debug EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + submodules: true - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -545,26 +522,24 @@ jobs: BUILD_NAME=binary_shared EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + submodules: true - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -588,26 +563,24 @@ jobs: BUILD_NAME=binary_tidy EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + submodules: true - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -631,28 +604,25 @@ jobs: BUILD_NAME=binary_darwin EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 with: + clear-repository: true + submodules: true fetch-depth: 0 # otherwise we will have no info about contributors - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -676,28 +646,25 @@ jobs: BUILD_NAME=binary_aarch64 EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 with: + clear-repository: true + submodules: true fetch-depth: 0 # otherwise we will have no info about contributors - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -721,28 +688,25 @@ jobs: BUILD_NAME=binary_freebsd EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 with: + clear-repository: true + submodules: true fetch-depth: 0 # otherwise we will have no info about contributors - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -766,28 +730,25 @@ jobs: BUILD_NAME=binary_darwin_aarch64 EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 with: + clear-repository: true + submodules: true fetch-depth: 0 # otherwise we will have no info about contributors - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -811,28 +772,25 @@ jobs: BUILD_NAME=binary_ppc64le EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 with: + clear-repository: true + submodules: true fetch-depth: 0 # otherwise we will have no info about contributors - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -842,7 +800,7 @@ jobs: docker ps --quiet | xargs --no-run-if-empty docker kill ||: docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" - BuilderBinAmd64SSE2: + BuilderBinAmd64Compat: needs: [DockerHubPush] runs-on: [self-hosted, builder] steps: @@ -853,31 +811,28 @@ jobs: IMAGES_PATH=${{runner.temp}}/images_path REPO_COPY=${{runner.temp}}/build_check/ClickHouse CACHES_PATH=${{runner.temp}}/../ccaches - BUILD_NAME=binary_amd64sse2 + BUILD_NAME=binary_amd64_compat EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 with: + clear-repository: true + submodules: true fetch-depth: 0 # otherwise we will have no info about contributors - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -901,28 +856,25 @@ jobs: BUILD_NAME=binary_aarch64_v80compat EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 with: + clear-repository: true + submodules: true fetch-depth: 0 # otherwise we will have no info about contributors - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -941,12 +893,10 @@ jobs: - BuilderDebAarch64 runs-on: [self-hosted, style-checker] steps: - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 with: + clear-repository: true fetch-depth: 0 # It MUST BE THE SAME for all dependencies and the job itself - name: Check docker clickhouse/clickhouse-server building run: | @@ -986,14 +936,13 @@ jobs: NEEDS_DATA_PATH=${{runner.temp}}/needs.json EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Report Builder run: | sudo rm -fr "$TEMP_PATH" @@ -1015,9 +964,8 @@ jobs: - BuilderBinDarwin - BuilderBinDarwinAarch64 - BuilderBinFreeBSD - # - BuilderBinGCC - BuilderBinPPC64 - - BuilderBinAmd64SSE2 + - BuilderBinAmd64Compat - BuilderBinAarch64V80Compat - BuilderBinClangTidy - BuilderDebShared @@ -1033,14 +981,13 @@ jobs: NEEDS_DATA_PATH=${{runner.temp}}/needs.json EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Report Builder run: | sudo rm -fr "$TEMP_PATH" @@ -1064,11 +1011,10 @@ jobs: - BuilderDebAarch64 runs-on: [self-hosted, style-checker] steps: - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Mark Commit Release Ready run: | cd "$GITHUB_WORKSPACE/tests/ci" @@ -1090,14 +1036,13 @@ jobs: KILL_TIMEOUT=10800 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1125,14 +1070,13 @@ jobs: KILL_TIMEOUT=10800 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1162,14 +1106,13 @@ jobs: RUN_BY_HASH_TOTAL=2 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1199,14 +1142,13 @@ jobs: RUN_BY_HASH_TOTAL=2 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1234,14 +1176,13 @@ jobs: KILL_TIMEOUT=10800 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1269,14 +1210,13 @@ jobs: KILL_TIMEOUT=10800 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1306,14 +1246,13 @@ jobs: RUN_BY_HASH_TOTAL=2 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1343,14 +1282,13 @@ jobs: RUN_BY_HASH_TOTAL=2 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1380,14 +1318,13 @@ jobs: RUN_BY_HASH_TOTAL=3 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1417,14 +1354,13 @@ jobs: RUN_BY_HASH_TOTAL=3 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1454,14 +1390,13 @@ jobs: RUN_BY_HASH_TOTAL=3 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1489,14 +1424,13 @@ jobs: KILL_TIMEOUT=10800 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1526,14 +1460,13 @@ jobs: RUN_BY_HASH_TOTAL=3 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1563,14 +1496,13 @@ jobs: RUN_BY_HASH_TOTAL=3 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1600,14 +1532,13 @@ jobs: RUN_BY_HASH_TOTAL=3 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1637,14 +1568,13 @@ jobs: RUN_BY_HASH_TOTAL=3 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1674,14 +1604,13 @@ jobs: RUN_BY_HASH_TOTAL=3 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1711,14 +1640,13 @@ jobs: RUN_BY_HASH_TOTAL=3 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1749,14 +1677,13 @@ jobs: KILL_TIMEOUT=3600 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1784,14 +1711,13 @@ jobs: KILL_TIMEOUT=3600 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1819,14 +1745,13 @@ jobs: KILL_TIMEOUT=3600 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1854,14 +1779,13 @@ jobs: KILL_TIMEOUT=3600 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1889,14 +1813,13 @@ jobs: KILL_TIMEOUT=3600 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1924,14 +1847,13 @@ jobs: KILL_TIMEOUT=3600 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1959,14 +1881,13 @@ jobs: KILL_TIMEOUT=3600 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1996,14 +1917,13 @@ jobs: REPO_COPY=${{runner.temp}}/stress_thread/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Stress test run: | sudo rm -fr "$TEMP_PATH" @@ -2034,14 +1954,13 @@ jobs: REPO_COPY=${{runner.temp}}/stress_thread/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Stress test run: | sudo rm -fr "$TEMP_PATH" @@ -2068,14 +1987,13 @@ jobs: REPO_COPY=${{runner.temp}}/stress_memory/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Stress test run: | sudo rm -fr "$TEMP_PATH" @@ -2102,14 +2020,13 @@ jobs: REPO_COPY=${{runner.temp}}/stress_undefined/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Stress test run: | sudo rm -fr "$TEMP_PATH" @@ -2136,14 +2053,13 @@ jobs: REPO_COPY=${{runner.temp}}/stress_debug/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Stress test run: | sudo rm -fr "$TEMP_PATH" @@ -2175,14 +2091,13 @@ jobs: RUN_BY_HASH_TOTAL=3 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Integration test run: | sudo rm -fr "$TEMP_PATH" @@ -2211,14 +2126,13 @@ jobs: RUN_BY_HASH_TOTAL=3 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Integration test run: | sudo rm -fr "$TEMP_PATH" @@ -2247,14 +2161,13 @@ jobs: RUN_BY_HASH_TOTAL=3 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Integration test run: | sudo rm -fr "$TEMP_PATH" @@ -2283,14 +2196,13 @@ jobs: RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Integration test run: | sudo rm -fr "$TEMP_PATH" @@ -2319,14 +2231,13 @@ jobs: RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Integration test run: | sudo rm -fr "$TEMP_PATH" @@ -2355,14 +2266,13 @@ jobs: RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Integration test run: | sudo rm -fr "$TEMP_PATH" @@ -2391,14 +2301,13 @@ jobs: RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Integration test run: | sudo rm -fr "$TEMP_PATH" @@ -2427,14 +2336,13 @@ jobs: RUN_BY_HASH_TOTAL=2 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Integration test run: | sudo rm -fr "$TEMP_PATH" @@ -2463,14 +2371,13 @@ jobs: RUN_BY_HASH_TOTAL=2 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Integration test run: | sudo rm -fr "$TEMP_PATH" @@ -2500,14 +2407,13 @@ jobs: REPO_COPY=${{runner.temp}}/ast_fuzzer_asan/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Fuzzer run: | sudo rm -fr "$TEMP_PATH" @@ -2534,14 +2440,13 @@ jobs: REPO_COPY=${{runner.temp}}/ast_fuzzer_tsan/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Fuzzer run: | sudo rm -fr "$TEMP_PATH" @@ -2568,14 +2473,13 @@ jobs: REPO_COPY=${{runner.temp}}/ast_fuzzer_ubsan/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Fuzzer run: | sudo rm -fr "$TEMP_PATH" @@ -2602,14 +2506,13 @@ jobs: REPO_COPY=${{runner.temp}}/ast_fuzzer_msan/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Fuzzer run: | sudo rm -fr "$TEMP_PATH" @@ -2636,14 +2539,13 @@ jobs: REPO_COPY=${{runner.temp}}/ast_fuzzer_debug/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Fuzzer run: | sudo rm -fr "$TEMP_PATH" @@ -2673,14 +2575,13 @@ jobs: REPO_COPY=${{runner.temp}}/unit_tests_asan/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Unit test run: | sudo rm -fr "$TEMP_PATH" @@ -2707,14 +2608,13 @@ jobs: REPO_COPY=${{runner.temp}}/unit_tests_asan/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Unit test run: | sudo rm -fr "$TEMP_PATH" @@ -2728,40 +2628,6 @@ jobs: docker ps --quiet | xargs --no-run-if-empty docker kill ||: docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: sudo rm -fr "$TEMP_PATH" - # UnitTestsReleaseGCC: - # needs: [BuilderBinGCC] - # runs-on: [self-hosted, fuzzer-unit-tester] - # steps: - # - name: Set envs - # run: | - # cat >> "$GITHUB_ENV" << 'EOF' - # TEMP_PATH=${{runner.temp}}/unit_tests_asan - # REPORTS_PATH=${{runner.temp}}/reports_dir - # CHECK_NAME=Unit tests (release-gcc) - # REPO_COPY=${{runner.temp}}/unit_tests_asan/ClickHouse - # EOF - # - name: Download json reports - # uses: actions/download-artifact@v2 - # with: - # path: ${{ env.REPORTS_PATH }} - # - name: Clear repository - # run: | - # sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - # - name: Check out repository code - # uses: actions/checkout@v2 - # - name: Unit test - # run: | - # sudo rm -fr "$TEMP_PATH" - # mkdir -p "$TEMP_PATH" - # cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" - # cd "$REPO_COPY/tests/ci" - # python3 unit_tests_check.py "$CHECK_NAME" - # - name: Cleanup - # if: always() - # run: | - # docker ps --quiet | xargs --no-run-if-empty docker kill ||: - # docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: - # sudo rm -fr "$TEMP_PATH" UnitTestsTsan: needs: [BuilderDebTsan] runs-on: [self-hosted, fuzzer-unit-tester] @@ -2775,14 +2641,13 @@ jobs: REPO_COPY=${{runner.temp}}/unit_tests_tsan/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Unit test run: | sudo rm -fr "$TEMP_PATH" @@ -2809,14 +2674,13 @@ jobs: REPO_COPY=${{runner.temp}}/unit_tests_msan/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Unit test run: | sudo rm -fr "$TEMP_PATH" @@ -2843,14 +2707,13 @@ jobs: REPO_COPY=${{runner.temp}}/unit_tests_ubsan/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Unit test run: | sudo rm -fr "$TEMP_PATH" @@ -2882,14 +2745,13 @@ jobs: RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Performance Comparison run: | sudo rm -fr "$TEMP_PATH" @@ -2918,14 +2780,13 @@ jobs: RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Performance Comparison run: | sudo rm -fr "$TEMP_PATH" @@ -2954,14 +2815,13 @@ jobs: RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Performance Comparison run: | sudo rm -fr "$TEMP_PATH" @@ -2990,14 +2850,13 @@ jobs: RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Performance Comparison run: | sudo rm -fr "$TEMP_PATH" @@ -3026,14 +2885,13 @@ jobs: RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Performance Comparison run: | sudo rm -fr "$TEMP_PATH" @@ -3062,14 +2920,13 @@ jobs: RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Performance Comparison run: | sudo rm -fr "$TEMP_PATH" @@ -3098,14 +2955,13 @@ jobs: RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Performance Comparison run: | sudo rm -fr "$TEMP_PATH" @@ -3134,14 +2990,13 @@ jobs: RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Performance Comparison run: | sudo rm -fr "$TEMP_PATH" @@ -3171,14 +3026,13 @@ jobs: REPO_COPY=${{runner.temp}}/sqlancer_release/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: SQLancer run: | sudo rm -fr "$TEMP_PATH" @@ -3205,14 +3059,13 @@ jobs: REPO_COPY=${{runner.temp}}/sqlancer_debug/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: SQLancer run: | sudo rm -fr "$TEMP_PATH" @@ -3291,11 +3144,10 @@ jobs: - SQLancerTestDebug runs-on: [self-hosted, style-checker] steps: - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Finish label run: | cd "$GITHUB_WORKSPACE/tests/ci" diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml index 9ebbe4e090d..415d1b8fdc4 100644 --- a/.github/workflows/nightly.yml +++ b/.github/workflows/nightly.yml @@ -16,34 +16,32 @@ jobs: DockerHubPushAarch64: runs-on: [self-hosted, style-checker-aarch64] steps: - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Images check run: | cd "$GITHUB_WORKSPACE/tests/ci" python3 docker_images_check.py --suffix aarch64 --all - name: Upload images files to artifacts - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: changed_images_aarch64 path: ${{ runner.temp }}/docker_images_check/changed_images_aarch64.json DockerHubPushAmd64: runs-on: [self-hosted, style-checker] steps: - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Images check run: | cd "$GITHUB_WORKSPACE/tests/ci" python3 docker_images_check.py --suffix amd64 --all - name: Upload images files to artifacts - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: changed_images_amd64 path: ${{ runner.temp }}/docker_images_check/changed_images_amd64.json @@ -51,18 +49,17 @@ jobs: needs: [DockerHubPushAmd64, DockerHubPushAarch64] runs-on: [self-hosted, style-checker] steps: - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Download changed aarch64 images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images_aarch64 path: ${{ runner.temp }} - name: Download changed amd64 images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images_amd64 path: ${{ runner.temp }} @@ -71,7 +68,7 @@ jobs: cd "$GITHUB_WORKSPACE/tests/ci" python3 docker_manifests_merge.py --suffix amd64 --suffix aarch64 - name: Upload images files to artifacts - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: changed_images path: ${{ runner.temp }}/changed_images.json @@ -90,22 +87,17 @@ jobs: EOF echo "COVERITY_TOKEN=${{ secrets.COVERITY_TOKEN }}" >> "$GITHUB_ENV" - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - id: coverity-checkout - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 with: - fetch-depth: 0 # otherwise we will have no info about contributors + clear-repository: true + submodules: true - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" @@ -134,8 +126,10 @@ jobs: CC: clang-15 CXX: clang++-15 steps: - - uses: actions/checkout@v2 + - name: Check out repository code + uses: ClickHouse/checkout@v1 with: + clear-repository: true fetch-depth: 0 # Shallow clones should be disabled for a better relevancy of analysis submodules: true - name: Set up JDK 11 diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index 857e2c7f604..efb7d50dd28 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -16,6 +16,7 @@ on: # yamllint disable-line rule:truthy - 'docker/docs/**' - 'docs/**' - 'website/**' + - 'utils/check-style/aspell-ignore/**' ########################################################################################## ##################################### SMALL CHECKS ####################################### ########################################################################################## @@ -25,11 +26,10 @@ jobs: # Run the first check always, even if the CI is cancelled if: ${{ always() }} steps: - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Labels check run: | cd "$GITHUB_WORKSPACE/tests/ci" @@ -38,11 +38,10 @@ jobs: needs: CheckLabels runs-on: [self-hosted, style-checker] steps: - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Python unit tests run: | cd "$GITHUB_WORKSPACE/tests/ci" @@ -51,17 +50,16 @@ jobs: needs: CheckLabels runs-on: [self-hosted, style-checker-aarch64] steps: - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Images check run: | cd "$GITHUB_WORKSPACE/tests/ci" python3 docker_images_check.py --suffix aarch64 - name: Upload images files to artifacts - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: changed_images_aarch64 path: ${{ runner.temp }}/docker_images_check/changed_images_aarch64.json @@ -69,17 +67,16 @@ jobs: needs: CheckLabels runs-on: [self-hosted, style-checker] steps: - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Images check run: | cd "$GITHUB_WORKSPACE/tests/ci" python3 docker_images_check.py --suffix amd64 - name: Upload images files to artifacts - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: changed_images_amd64 path: ${{ runner.temp }}/docker_images_check/changed_images_amd64.json @@ -87,18 +84,17 @@ jobs: needs: [DockerHubPushAmd64, DockerHubPushAarch64, PythonUnitTests] runs-on: [self-hosted, style-checker] steps: - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Download changed aarch64 images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images_aarch64 path: ${{ runner.temp }} - name: Download changed amd64 images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images_amd64 path: ${{ runner.temp }} @@ -107,7 +103,7 @@ jobs: cd "$GITHUB_WORKSPACE/tests/ci" python3 docker_manifests_merge.py --suffix amd64 --suffix aarch64 - name: Upload images files to artifacts - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: changed_images path: ${{ runner.temp }}/changed_images.json @@ -128,15 +124,14 @@ jobs: - name: Download changed images # even if artifact does not exist, e.g. on `do not test` label or failed Docker job continue-on-error: true - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.TEMP_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Style Check run: | cd "$GITHUB_WORKSPACE/tests/ci" @@ -158,16 +153,12 @@ jobs: REPO_COPY=${{runner.temp}}/fasttest/ClickHouse CACHES_PATH=${{runner.temp}}/../ccaches EOF - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" - mkdir "$GITHUB_WORKSPACE" - sudo rm -fr "$TEMP_PATH" - mkdir -p "$TEMP_PATH" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.TEMP_PATH }} @@ -192,13 +183,12 @@ jobs: REPO_COPY=${{runner.temp}}/compatibility_check/ClickHouse REPORTS_PATH=${{runner.temp}}/reports_dir EOF - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - name: CompatibilityCheck @@ -224,13 +214,12 @@ jobs: REPO_COPY=${{runner.temp}}/split_build_check/ClickHouse REPORTS_PATH=${{runner.temp}}/reports_dir EOF - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - name: Shared build check @@ -262,28 +251,25 @@ jobs: BUILD_NAME=package_release EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 with: + clear-repository: true fetch-depth: 0 # for performance artifact + submodules: true - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -307,26 +293,24 @@ jobs: BUILD_NAME=binary_release EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + submodules: true - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -350,28 +334,25 @@ jobs: BUILD_NAME=package_aarch64 EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ runner.temp }}/images_path - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 with: + clear-repository: true + submodules: true fetch-depth: 0 # for performance artifact - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -395,26 +376,24 @@ jobs: BUILD_NAME=package_asan EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + submodules: true - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -438,26 +417,24 @@ jobs: BUILD_NAME=package_ubsan EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + submodules: true - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -481,26 +458,24 @@ jobs: BUILD_NAME=package_tsan EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + submodules: true - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -524,26 +499,24 @@ jobs: BUILD_NAME=package_msan EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + submodules: true - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -567,26 +540,24 @@ jobs: BUILD_NAME=package_debug EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + submodules: true - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -613,26 +584,24 @@ jobs: BUILD_NAME=binary_shared EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + submodules: true - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -656,26 +625,24 @@ jobs: BUILD_NAME=binary_tidy EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + submodules: true - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -699,26 +666,24 @@ jobs: BUILD_NAME=binary_darwin EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + submodules: true - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -742,26 +707,24 @@ jobs: BUILD_NAME=binary_aarch64 EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + submodules: true - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -785,26 +748,24 @@ jobs: BUILD_NAME=binary_freebsd EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + submodules: true - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -828,26 +789,24 @@ jobs: BUILD_NAME=binary_darwin_aarch64 EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + submodules: true - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -871,26 +830,24 @@ jobs: BUILD_NAME=binary_ppc64le EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + submodules: true - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -900,7 +857,7 @@ jobs: docker ps --quiet | xargs --no-run-if-empty docker kill ||: docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" - BuilderBinAmd64SSE2: + BuilderBinAmd64Compat: needs: [DockerHubPush, FastTest, StyleCheck] runs-on: [self-hosted, builder] steps: @@ -911,29 +868,27 @@ jobs: IMAGES_PATH=${{runner.temp}}/images_path REPO_COPY=${{runner.temp}}/build_check/ClickHouse CACHES_PATH=${{runner.temp}}/../ccaches - BUILD_NAME=binary_amd64sse2 + BUILD_NAME=binary_amd64_compat EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + submodules: true - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -957,26 +912,24 @@ jobs: BUILD_NAME=binary_aarch64_v80compat EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + submodules: true - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -995,12 +948,10 @@ jobs: - BuilderDebAarch64 runs-on: [self-hosted, style-checker] steps: - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 with: + clear-repository: true fetch-depth: 0 # It MUST BE THE SAME for all dependencies and the job itself - name: Check docker clickhouse/clickhouse-server building run: | @@ -1039,14 +990,13 @@ jobs: NEEDS_DATA_PATH=${{runner.temp}}/needs.json EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Report Builder run: | sudo rm -fr "$TEMP_PATH" @@ -1070,7 +1020,7 @@ jobs: - BuilderBinFreeBSD # - BuilderBinGCC - BuilderBinPPC64 - - BuilderBinAmd64SSE2 + - BuilderBinAmd64Compat - BuilderBinAarch64V80Compat - BuilderBinClangTidy - BuilderDebShared @@ -1086,14 +1036,13 @@ jobs: NEEDS_DATA_PATH=${{runner.temp}}/needs.json EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Report Builder run: | sudo rm -fr "$TEMP_PATH" @@ -1126,14 +1075,13 @@ jobs: KILL_TIMEOUT=10800 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1163,14 +1111,13 @@ jobs: RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1200,14 +1147,13 @@ jobs: RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1237,14 +1183,13 @@ jobs: RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1274,14 +1219,13 @@ jobs: RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1309,14 +1253,13 @@ jobs: KILL_TIMEOUT=10800 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1346,14 +1289,13 @@ jobs: RUN_BY_HASH_TOTAL=2 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1383,14 +1325,13 @@ jobs: RUN_BY_HASH_TOTAL=2 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1420,14 +1361,13 @@ jobs: RUN_BY_HASH_TOTAL=6 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1457,14 +1397,13 @@ jobs: RUN_BY_HASH_TOTAL=6 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1494,14 +1433,13 @@ jobs: RUN_BY_HASH_TOTAL=6 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1531,14 +1469,13 @@ jobs: RUN_BY_HASH_TOTAL=6 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1568,14 +1505,13 @@ jobs: RUN_BY_HASH_TOTAL=6 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1605,14 +1541,13 @@ jobs: RUN_BY_HASH_TOTAL=6 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1642,14 +1577,13 @@ jobs: RUN_BY_HASH_TOTAL=5 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1679,14 +1613,13 @@ jobs: RUN_BY_HASH_TOTAL=5 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1716,14 +1649,13 @@ jobs: RUN_BY_HASH_TOTAL=5 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1753,14 +1685,13 @@ jobs: RUN_BY_HASH_TOTAL=5 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1790,14 +1721,13 @@ jobs: RUN_BY_HASH_TOTAL=5 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1825,14 +1755,13 @@ jobs: KILL_TIMEOUT=10800 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1862,14 +1791,13 @@ jobs: RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1899,14 +1827,13 @@ jobs: RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1936,14 +1863,13 @@ jobs: RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1973,14 +1899,13 @@ jobs: RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -2010,14 +1935,13 @@ jobs: RUN_BY_HASH_TOTAL=5 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -2047,14 +1971,13 @@ jobs: RUN_BY_HASH_TOTAL=5 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -2084,14 +2007,13 @@ jobs: RUN_BY_HASH_TOTAL=5 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -2121,14 +2043,13 @@ jobs: RUN_BY_HASH_TOTAL=5 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -2158,14 +2079,13 @@ jobs: RUN_BY_HASH_TOTAL=5 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -2195,14 +2115,13 @@ jobs: RUN_BY_HASH_TOTAL=2 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -2232,14 +2151,13 @@ jobs: RUN_BY_HASH_TOTAL=2 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -2269,14 +2187,13 @@ jobs: RUN_BY_HASH_TOTAL=6 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -2306,14 +2223,13 @@ jobs: RUN_BY_HASH_TOTAL=6 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -2343,14 +2259,13 @@ jobs: RUN_BY_HASH_TOTAL=6 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -2380,14 +2295,13 @@ jobs: RUN_BY_HASH_TOTAL=6 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -2417,14 +2331,13 @@ jobs: RUN_BY_HASH_TOTAL=6 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -2454,14 +2367,13 @@ jobs: RUN_BY_HASH_TOTAL=6 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -2491,14 +2403,13 @@ jobs: RUN_BY_HASH_TOTAL=5 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -2528,14 +2439,13 @@ jobs: RUN_BY_HASH_TOTAL=5 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -2565,14 +2475,13 @@ jobs: RUN_BY_HASH_TOTAL=5 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -2602,14 +2511,13 @@ jobs: RUN_BY_HASH_TOTAL=5 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -2639,14 +2547,13 @@ jobs: RUN_BY_HASH_TOTAL=5 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -2674,14 +2581,13 @@ jobs: KILL_TIMEOUT=3600 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -2709,14 +2615,13 @@ jobs: REPO_COPY=${{runner.temp}}/tests_bugfix_check/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Bugfix test run: | sudo rm -fr "$TEMP_PATH" @@ -2758,14 +2663,13 @@ jobs: KILL_TIMEOUT=3600 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -2793,14 +2697,13 @@ jobs: KILL_TIMEOUT=3600 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -2828,14 +2731,13 @@ jobs: KILL_TIMEOUT=3600 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -2863,14 +2765,13 @@ jobs: KILL_TIMEOUT=3600 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -2898,14 +2799,13 @@ jobs: KILL_TIMEOUT=3600 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -2933,14 +2833,13 @@ jobs: KILL_TIMEOUT=3600 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -2968,14 +2867,13 @@ jobs: KILL_TIMEOUT=3600 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -3005,14 +2903,13 @@ jobs: REPO_COPY=${{runner.temp}}/stress_thread/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Stress test run: | sudo rm -fr "$TEMP_PATH" @@ -3043,14 +2940,13 @@ jobs: REPO_COPY=${{runner.temp}}/stress_thread/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Stress test run: | sudo rm -fr "$TEMP_PATH" @@ -3077,14 +2973,13 @@ jobs: REPO_COPY=${{runner.temp}}/stress_memory/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Stress test run: | sudo rm -fr "$TEMP_PATH" @@ -3111,14 +3006,13 @@ jobs: REPO_COPY=${{runner.temp}}/stress_undefined/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Stress test run: | sudo rm -fr "$TEMP_PATH" @@ -3145,14 +3039,13 @@ jobs: REPO_COPY=${{runner.temp}}/stress_debug/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Stress test run: | sudo rm -fr "$TEMP_PATH" @@ -3182,14 +3075,13 @@ jobs: REPO_COPY=${{runner.temp}}/ast_fuzzer_asan/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Fuzzer run: | sudo rm -fr "$TEMP_PATH" @@ -3216,14 +3108,13 @@ jobs: REPO_COPY=${{runner.temp}}/ast_fuzzer_tsan/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Fuzzer run: | sudo rm -fr "$TEMP_PATH" @@ -3250,14 +3141,13 @@ jobs: REPO_COPY=${{runner.temp}}/ast_fuzzer_ubsan/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Fuzzer run: | sudo rm -fr "$TEMP_PATH" @@ -3284,14 +3174,13 @@ jobs: REPO_COPY=${{runner.temp}}/ast_fuzzer_msan/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Fuzzer run: | sudo rm -fr "$TEMP_PATH" @@ -3318,14 +3207,13 @@ jobs: REPO_COPY=${{runner.temp}}/ast_fuzzer_debug/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Fuzzer run: | sudo rm -fr "$TEMP_PATH" @@ -3357,14 +3245,13 @@ jobs: RUN_BY_HASH_TOTAL=6 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Integration test run: | sudo rm -fr "$TEMP_PATH" @@ -3393,14 +3280,13 @@ jobs: RUN_BY_HASH_TOTAL=6 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Integration test run: | sudo rm -fr "$TEMP_PATH" @@ -3429,14 +3315,13 @@ jobs: RUN_BY_HASH_TOTAL=6 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Integration test run: | sudo rm -fr "$TEMP_PATH" @@ -3465,14 +3350,13 @@ jobs: RUN_BY_HASH_TOTAL=6 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Integration test run: | sudo rm -fr "$TEMP_PATH" @@ -3501,14 +3385,13 @@ jobs: RUN_BY_HASH_TOTAL=6 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Integration test run: | sudo rm -fr "$TEMP_PATH" @@ -3537,14 +3420,13 @@ jobs: RUN_BY_HASH_TOTAL=6 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Integration test run: | sudo rm -fr "$TEMP_PATH" @@ -3573,14 +3455,13 @@ jobs: RUN_BY_HASH_TOTAL=6 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Integration test run: | sudo rm -fr "$TEMP_PATH" @@ -3609,14 +3490,13 @@ jobs: RUN_BY_HASH_TOTAL=6 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Integration test run: | sudo rm -fr "$TEMP_PATH" @@ -3645,14 +3525,13 @@ jobs: RUN_BY_HASH_TOTAL=6 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Integration test run: | sudo rm -fr "$TEMP_PATH" @@ -3681,14 +3560,13 @@ jobs: RUN_BY_HASH_TOTAL=6 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Integration test run: | sudo rm -fr "$TEMP_PATH" @@ -3717,14 +3595,13 @@ jobs: RUN_BY_HASH_TOTAL=6 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Integration test run: | sudo rm -fr "$TEMP_PATH" @@ -3753,14 +3630,13 @@ jobs: RUN_BY_HASH_TOTAL=6 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Integration test run: | sudo rm -fr "$TEMP_PATH" @@ -3789,14 +3665,13 @@ jobs: RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Integration test run: | sudo rm -fr "$TEMP_PATH" @@ -3825,14 +3700,13 @@ jobs: RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Integration test run: | sudo rm -fr "$TEMP_PATH" @@ -3861,14 +3735,13 @@ jobs: RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Integration test run: | sudo rm -fr "$TEMP_PATH" @@ -3897,14 +3770,13 @@ jobs: RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Integration test run: | sudo rm -fr "$TEMP_PATH" @@ -3931,14 +3803,13 @@ jobs: REPO_COPY=${{runner.temp}}/integration_tests_asan_flaky_check/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Integration test run: | sudo rm -fr "$TEMP_PATH" @@ -3968,14 +3839,13 @@ jobs: REPO_COPY=${{runner.temp}}/unit_tests_asan/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Unit test run: | sudo rm -fr "$TEMP_PATH" @@ -4002,14 +3872,13 @@ jobs: REPO_COPY=${{runner.temp}}/unit_tests_asan/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Unit test run: | sudo rm -fr "$TEMP_PATH" @@ -4036,14 +3905,13 @@ jobs: REPO_COPY=${{runner.temp}}/unit_tests_tsan/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Unit test run: | sudo rm -fr "$TEMP_PATH" @@ -4070,14 +3938,13 @@ jobs: REPO_COPY=${{runner.temp}}/unit_tests_msan/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Unit test run: | sudo rm -fr "$TEMP_PATH" @@ -4104,14 +3971,13 @@ jobs: REPO_COPY=${{runner.temp}}/unit_tests_ubsan/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Unit test run: | sudo rm -fr "$TEMP_PATH" @@ -4143,14 +4009,13 @@ jobs: RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Performance Comparison run: | sudo rm -fr "$TEMP_PATH" @@ -4179,14 +4044,13 @@ jobs: RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Performance Comparison run: | sudo rm -fr "$TEMP_PATH" @@ -4215,14 +4079,13 @@ jobs: RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Performance Comparison run: | sudo rm -fr "$TEMP_PATH" @@ -4251,14 +4114,13 @@ jobs: RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Performance Comparison run: | sudo rm -fr "$TEMP_PATH" @@ -4287,14 +4149,13 @@ jobs: RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Performance Comparison run: | sudo rm -fr "$TEMP_PATH" @@ -4323,14 +4184,13 @@ jobs: RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Performance Comparison run: | sudo rm -fr "$TEMP_PATH" @@ -4359,14 +4219,13 @@ jobs: RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Performance Comparison run: | sudo rm -fr "$TEMP_PATH" @@ -4395,14 +4254,13 @@ jobs: RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Performance Comparison run: | sudo rm -fr "$TEMP_PATH" @@ -4432,14 +4290,13 @@ jobs: REPO_COPY=${{runner.temp}}/sqlancer_release/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: SQLancer run: | sudo rm -fr "$TEMP_PATH" @@ -4466,14 +4323,13 @@ jobs: REPO_COPY=${{runner.temp}}/sqlancer_debug/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: SQLancer run: | sudo rm -fr "$TEMP_PATH" @@ -4599,11 +4455,10 @@ jobs: - SQLancerTestDebug runs-on: [self-hosted, style-checker] steps: - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Finish label run: | cd "$GITHUB_WORKSPACE/tests/ci" diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 0b0f125d641..9200e5e87b8 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -20,7 +20,7 @@ jobs: REPO_COPY=${{runner.temp}}/release_packages/ClickHouse EOF - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 with: # Always use the most recent script version ref: master @@ -50,12 +50,10 @@ jobs: DockerServerImages: runs-on: [self-hosted, style-checker] steps: - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 with: + clear-repository: true fetch-depth: 0 # otherwise we will have no version info - name: Check docker clickhouse/clickhouse-server building run: | diff --git a/.github/workflows/release_branches.yml b/.github/workflows/release_branches.yml index bf35ca76fc6..251087f33a5 100644 --- a/.github/workflows/release_branches.yml +++ b/.github/workflows/release_branches.yml @@ -15,34 +15,32 @@ jobs: DockerHubPushAarch64: runs-on: [self-hosted, style-checker-aarch64] steps: - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Images check run: | cd "$GITHUB_WORKSPACE/tests/ci" python3 docker_images_check.py --suffix aarch64 - name: Upload images files to artifacts - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: changed_images_aarch64 path: ${{ runner.temp }}/docker_images_check/changed_images_aarch64.json DockerHubPushAmd64: runs-on: [self-hosted, style-checker] steps: - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Images check run: | cd "$GITHUB_WORKSPACE/tests/ci" python3 docker_images_check.py --suffix amd64 - name: Upload images files to artifacts - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: changed_images_amd64 path: ${{ runner.temp }}/docker_images_check/changed_images_amd64.json @@ -50,18 +48,17 @@ jobs: needs: [DockerHubPushAmd64, DockerHubPushAarch64] runs-on: [self-hosted, style-checker] steps: - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Download changed aarch64 images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images_aarch64 path: ${{ runner.temp }} - name: Download changed amd64 images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images_amd64 path: ${{ runner.temp }} @@ -70,7 +67,7 @@ jobs: cd "$GITHUB_WORKSPACE/tests/ci" python3 docker_manifests_merge.py --suffix amd64 --suffix aarch64 - name: Upload images files to artifacts - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: changed_images path: ${{ runner.temp }}/changed_images.json @@ -85,13 +82,12 @@ jobs: REPO_COPY=${{runner.temp}}/compatibility_check/ClickHouse REPORTS_PATH=${{runner.temp}}/reports_dir EOF - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - name: CompatibilityCheck @@ -123,28 +119,25 @@ jobs: BUILD_NAME=package_release EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 with: + clear-repository: true + submodules: true fetch-depth: 0 # otherwise we will have no info about contributors - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -168,24 +161,24 @@ jobs: BUILD_NAME=package_aarch64 EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ runner.temp }}/images_path - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 with: - fetch-depth: 0 # otherwise we will have no info about contributors + clear-repository: true + submodules: true + fetch-depth: 0 # For a proper version and performance artifacts - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ runner.temp }}/build_check/${{ env.BUILD_URLS }}.json @@ -209,26 +202,24 @@ jobs: BUILD_NAME=package_asan EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + submodules: true - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -252,26 +243,24 @@ jobs: BUILD_NAME=package_ubsan EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + submodules: true - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -295,26 +284,24 @@ jobs: BUILD_NAME=package_tsan EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + submodules: true - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -338,26 +325,24 @@ jobs: BUILD_NAME=package_msan EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + submodules: true - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -381,26 +366,24 @@ jobs: BUILD_NAME=package_debug EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + submodules: true - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -424,28 +407,25 @@ jobs: BUILD_NAME=binary_darwin EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 with: + clear-repository: true + submodules: true fetch-depth: 0 # otherwise we will have no info about contributors - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -469,28 +449,25 @@ jobs: BUILD_NAME=binary_darwin_aarch64 EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 with: + clear-repository: true + submodules: true fetch-depth: 0 # otherwise we will have no info about contributors - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync - git -C "$GITHUB_WORKSPACE" submodule update --single-branch --depth=1 --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json @@ -509,12 +486,10 @@ jobs: - BuilderDebAarch64 runs-on: [self-hosted, style-checker] steps: - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 with: + clear-repository: true fetch-depth: 0 # It MUST BE THE SAME for all dependencies and the job itself - name: Check docker clickhouse/clickhouse-server building run: | @@ -553,14 +528,13 @@ jobs: NEEDS_DATA_PATH=${{runner.temp}}/needs.json EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Report Builder run: | sudo rm -fr "$TEMP_PATH" @@ -592,14 +566,13 @@ jobs: NEEDS_DATA_PATH=${{runner.temp}}/needs.json EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Report Builder run: | sudo rm -fr "$TEMP_PATH" @@ -623,11 +596,10 @@ jobs: - BuilderDebAarch64 runs-on: [self-hosted, style-checker] steps: - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Mark Commit Release Ready run: | cd "$GITHUB_WORKSPACE/tests/ci" @@ -649,14 +621,13 @@ jobs: KILL_TIMEOUT=10800 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -684,14 +655,13 @@ jobs: KILL_TIMEOUT=10800 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -721,14 +691,13 @@ jobs: RUN_BY_HASH_TOTAL=2 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -758,14 +727,13 @@ jobs: RUN_BY_HASH_TOTAL=2 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -795,14 +763,13 @@ jobs: RUN_BY_HASH_TOTAL=3 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -832,14 +799,13 @@ jobs: RUN_BY_HASH_TOTAL=3 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -869,14 +835,13 @@ jobs: RUN_BY_HASH_TOTAL=3 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -904,14 +869,13 @@ jobs: KILL_TIMEOUT=10800 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -941,14 +905,13 @@ jobs: RUN_BY_HASH_TOTAL=3 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -978,14 +941,13 @@ jobs: RUN_BY_HASH_TOTAL=3 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1015,14 +977,13 @@ jobs: RUN_BY_HASH_TOTAL=3 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1052,14 +1013,13 @@ jobs: RUN_BY_HASH_TOTAL=3 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1089,14 +1049,13 @@ jobs: RUN_BY_HASH_TOTAL=3 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1126,14 +1085,13 @@ jobs: RUN_BY_HASH_TOTAL=3 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1164,14 +1122,13 @@ jobs: KILL_TIMEOUT=3600 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1199,14 +1156,13 @@ jobs: KILL_TIMEOUT=3600 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1234,14 +1190,13 @@ jobs: KILL_TIMEOUT=3600 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1269,14 +1224,13 @@ jobs: KILL_TIMEOUT=3600 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1304,14 +1258,13 @@ jobs: KILL_TIMEOUT=3600 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1339,14 +1292,13 @@ jobs: KILL_TIMEOUT=3600 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1374,14 +1326,13 @@ jobs: KILL_TIMEOUT=3600 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Functional test run: | sudo rm -fr "$TEMP_PATH" @@ -1411,14 +1362,13 @@ jobs: REPO_COPY=${{runner.temp}}/stress_thread/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Stress test run: | sudo rm -fr "$TEMP_PATH" @@ -1449,14 +1399,13 @@ jobs: REPO_COPY=${{runner.temp}}/stress_thread/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Stress test run: | sudo rm -fr "$TEMP_PATH" @@ -1483,14 +1432,13 @@ jobs: REPO_COPY=${{runner.temp}}/stress_memory/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Stress test run: | sudo rm -fr "$TEMP_PATH" @@ -1517,14 +1465,13 @@ jobs: REPO_COPY=${{runner.temp}}/stress_undefined/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Stress test run: | sudo rm -fr "$TEMP_PATH" @@ -1551,14 +1498,13 @@ jobs: REPO_COPY=${{runner.temp}}/stress_debug/ClickHouse EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Stress test run: | sudo rm -fr "$TEMP_PATH" @@ -1590,14 +1536,13 @@ jobs: RUN_BY_HASH_TOTAL=3 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Integration test run: | sudo rm -fr "$TEMP_PATH" @@ -1626,14 +1571,13 @@ jobs: RUN_BY_HASH_TOTAL=3 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Integration test run: | sudo rm -fr "$TEMP_PATH" @@ -1662,14 +1606,13 @@ jobs: RUN_BY_HASH_TOTAL=3 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Integration test run: | sudo rm -fr "$TEMP_PATH" @@ -1698,14 +1641,13 @@ jobs: RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Integration test run: | sudo rm -fr "$TEMP_PATH" @@ -1734,14 +1676,13 @@ jobs: RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Integration test run: | sudo rm -fr "$TEMP_PATH" @@ -1770,14 +1711,13 @@ jobs: RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Integration test run: | sudo rm -fr "$TEMP_PATH" @@ -1806,14 +1746,13 @@ jobs: RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Integration test run: | sudo rm -fr "$TEMP_PATH" @@ -1842,14 +1781,13 @@ jobs: RUN_BY_HASH_TOTAL=2 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Integration test run: | sudo rm -fr "$TEMP_PATH" @@ -1878,14 +1816,13 @@ jobs: RUN_BY_HASH_TOTAL=2 EOF - name: Download json reports - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Integration test run: | sudo rm -fr "$TEMP_PATH" @@ -1944,11 +1881,10 @@ jobs: - CompatibilityCheck runs-on: [self-hosted, style-checker] steps: - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true - name: Finish label run: | cd "$GITHUB_WORKSPACE/tests/ci" diff --git a/.github/workflows/tags_stable.yml b/.github/workflows/tags_stable.yml index f8cfa1137cc..f5b42e9c882 100644 --- a/.github/workflows/tags_stable.yml +++ b/.github/workflows/tags_stable.yml @@ -34,7 +34,7 @@ jobs: run: | echo "GITHUB_TAG=${GITHUB_REF#refs/tags/}" >> "$GITHUB_ENV" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 with: ref: master fetch-depth: 0 diff --git a/.github/workflows/woboq.yml b/.github/workflows/woboq.yml index b928a4a8d3d..363652c9f33 100644 --- a/.github/workflows/woboq.yml +++ b/.github/workflows/woboq.yml @@ -21,12 +21,10 @@ jobs: REPO_COPY=${{runner.temp}}/codebrowser/ClickHouse IMAGES_PATH=${{runner.temp}}/images_path EOF - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 with: + clear-repository: true submodules: 'true' - name: Codebrowser run: | diff --git a/.gitmodules b/.gitmodules index d8a9f0de4dd..0805b6d5492 100644 --- a/.gitmodules +++ b/.gitmodules @@ -269,9 +269,6 @@ [submodule "contrib/vectorscan"] path = contrib/vectorscan url = https://github.com/VectorCamp/vectorscan.git -[submodule "contrib/liburing"] - path = contrib/liburing - url = https://github.com/axboe/liburing.git [submodule "contrib/c-ares"] path = contrib/c-ares url = https://github.com/ClickHouse/c-ares diff --git a/CHANGELOG.md b/CHANGELOG.md index ddba799dbd0..ddc10c1eb2f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,6 +17,9 @@ ### ClickHouse release 22.12, 2022-12-15 +#### Backward Incompatible Change +* Add `GROUP BY ALL` syntax: [#37631](https://github.com/ClickHouse/ClickHouse/issues/37631). [#42265](https://github.com/ClickHouse/ClickHouse/pull/42265) ([刘陶峰](https://github.com/taofengliu)). If you have a column or an alias named `all` and doing `GROUP BY all` without the intention to group by all the columns, the query will have a different semantic. To keep the old semantic, put `all` into backticks or double quotes `"all"` to make it an identifier instead of a keyword. + #### Upgrade Notes * Fixed backward incompatibility in (de)serialization of states of `min`, `max`, `any*`, `argMin`, `argMax` aggregate functions with `String` argument. The incompatibility affects 22.9, 22.10 and 22.11 branches (fixed since 22.9.6, 22.10.4 and 22.11.2 correspondingly). Some minor releases of 22.3, 22.7 and 22.8 branches are also affected: 22.3.13...22.3.14 (fixed since 22.3.15), 22.8.6...22.8.9 (fixed since 22.8.10), 22.7.6 and newer (will not be fixed in 22.7, we recommend upgrading from 22.7.* to 22.8.10 or newer). This release note does not concern users that have never used affected versions. Incompatible versions append an extra `'\0'` to strings when reading states of the aggregate functions mentioned above. For example, if an older version saved state of `anyState('foobar')` to `state_column` then the incompatible version will print `'foobar\0'` on `anyMerge(state_column)`. Also incompatible versions write states of the aggregate functions without trailing `'\0'`. Newer versions (that have the fix) can correctly read data written by all versions including incompatible versions, except one corner case. If an incompatible version saved a state with a string that actually ends with null character, then newer version will trim trailing `'\0'` when reading state of affected aggregate function. For example, if an incompatible version saved state of `anyState('abrac\0dabra\0')` to `state_column` then newer versions will print `'abrac\0dabra'` on `anyMerge(state_column)`. The issue also affects distributed queries when an incompatible version works in a cluster together with older or newer versions. [#43038](https://github.com/ClickHouse/ClickHouse/pull/43038) ([Alexander Tokmakov](https://github.com/tavplubix), [Raúl Marín](https://github.com/Algunenano)). Note: all the official ClickHouse builds already include the patches. This is not necessarily true for unofficial third-party builds that should be avoided. diff --git a/CMakeLists.txt b/CMakeLists.txt index 99997db96a1..66fdaa6a765 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -377,15 +377,15 @@ set (DEBUG_INFO_FLAGS "-g -gdwarf-4") set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${COMPILER_FLAGS}") set (CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -O3 ${DEBUG_INFO_FLAGS} ${CMAKE_CXX_FLAGS_ADD}") -set (CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -O0 ${DEBUG_INFO_FLAGS} -fno-inline ${CMAKE_CXX_FLAGS_ADD}") +set (CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -O0 ${DEBUG_INFO_FLAGS} ${CMAKE_CXX_FLAGS_ADD}") set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${COMPILER_FLAGS} ${CMAKE_C_FLAGS_ADD}") set (CMAKE_C_FLAGS_RELWITHDEBINFO "${CMAKE_C_FLAGS_RELWITHDEBINFO} -O3 ${DEBUG_INFO_FLAGS} ${CMAKE_C_FLAGS_ADD}") -set (CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -O0 ${DEBUG_INFO_FLAGS} -fno-inline ${CMAKE_C_FLAGS_ADD}") +set (CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -O0 ${DEBUG_INFO_FLAGS} ${CMAKE_C_FLAGS_ADD}") set (CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} ${COMPILER_FLAGS} ${CMAKE_ASM_FLAGS_ADD}") set (CMAKE_ASM_FLAGS_RELWITHDEBINFO "${CMAKE_ASM_FLAGS_RELWITHDEBINFO} -O3 ${DEBUG_INFO_FLAGS} ${CMAKE_ASM_FLAGS_ADD}") -set (CMAKE_ASM_FLAGS_DEBUG "${CMAKE_ASM_FLAGS_DEBUG} -O0 ${DEBUG_INFO_FLAGS} -fno-inline ${CMAKE_ASM_FLAGS_ADD}") +set (CMAKE_ASM_FLAGS_DEBUG "${CMAKE_ASM_FLAGS_DEBUG} -O0 ${DEBUG_INFO_FLAGS} ${CMAKE_ASM_FLAGS_ADD}") if (COMPILER_CLANG) if (OS_DARWIN) diff --git a/LICENSE b/LICENSE index 8b0ac080f01..65c5df824c6 100644 --- a/LICENSE +++ b/LICENSE @@ -1,4 +1,4 @@ -Copyright 2016-2022 ClickHouse, Inc. +Copyright 2016-2023 ClickHouse, Inc. Apache License Version 2.0, January 2004 @@ -188,7 +188,7 @@ Copyright 2016-2022 ClickHouse, Inc. same "printed page" as the copyright notice for easier identification within third-party archives. - Copyright 2016-2022 ClickHouse, Inc. + Copyright 2016-2023 ClickHouse, Inc. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/README.md b/README.md index 35580369fd0..951dbf67160 100644 --- a/README.md +++ b/README.md @@ -16,6 +16,6 @@ ClickHouse® is an open-source column-oriented database management system that a * [Contacts](https://clickhouse.com/company/contact) can help to get your questions answered if there are any. ## Upcoming events -* [**v22.12 Release Webinar**](https://clickhouse.com/company/events/v22-12-release-webinar) 22.12 is the ClickHouse Christmas release. There are plenty of gifts (a new JOIN algorithm among them) and we adopted something from MongoDB. Original creator, co-founder, and CTO of ClickHouse Alexey Milovidov will walk us through the highlights of the release. +* **Recording available**: [**v22.12 Release Webinar**](https://www.youtube.com/watch?v=sREupr6uc2k) 22.12 is the ClickHouse Christmas release. There are plenty of gifts (a new JOIN algorithm among them) and we adopted something from MongoDB. Original creator, co-founder, and CTO of ClickHouse Alexey Milovidov will walk us through the highlights of the release. * [**ClickHouse Meetup at the CHEQ office in Tel Aviv**](https://www.meetup.com/clickhouse-tel-aviv-user-group/events/289599423/) - Jan 16 - We are very excited to be holding our next in-person ClickHouse meetup at the CHEQ office in Tel Aviv! Hear from CHEQ, ServiceNow and Contentsquare, as well as a deep dive presentation from ClickHouse CTO Alexey Milovidov. Join us for a fun evening of talks, food and discussion! * [**ClickHouse Meetup at Microsoft Office in Seattle**](https://www.meetup.com/clickhouse-seattle-user-group/events/290310025/) - Jan 18 - Keep an eye on this space as we will be announcing speakers soon! diff --git a/base/base/BorrowedObjectPool.h b/base/base/BorrowedObjectPool.h index bb4c9cd1c21..05a23d5835e 100644 --- a/base/base/BorrowedObjectPool.h +++ b/base/base/BorrowedObjectPool.h @@ -10,7 +10,7 @@ #include /** Pool for limited size objects that cannot be used from different threads simultaneously. - * The main use case is to have fixed size of objects that can be reused in difference threads during their lifetime + * The main use case is to have fixed size of objects that can be reused in different threads during their lifetime * and have to be initialized on demand. * Two main properties of pool are allocated objects size and borrowed objects size. * Allocated objects size is size of objects that are currently allocated by the pool. diff --git a/base/base/CMakeLists.txt b/base/base/CMakeLists.txt index d788bd6f092..79e62586ad4 100644 --- a/base/base/CMakeLists.txt +++ b/base/base/CMakeLists.txt @@ -8,16 +8,13 @@ set (SRCS getPageSize.cpp getThreadId.cpp JSON.cpp - LineReader.cpp mremap.cpp phdr_cache.cpp preciseExp10.cpp - setTerminalEcho.cpp shift10.cpp sleep.cpp terminalColors.cpp errnoToString.cpp - ReplxxLineReader.cpp StringRef.cpp safeExit.cpp throwError.cpp @@ -40,11 +37,6 @@ else () target_compile_definitions(common PUBLIC WITH_COVERAGE=0) endif () -# FIXME: move libraries for line reading out from base -if (TARGET ch_rust::skim) - target_link_libraries(common PUBLIC ch_rust::skim) -endif() - target_include_directories(common PUBLIC .. "${CMAKE_CURRENT_BINARY_DIR}/..") if (OS_DARWIN AND NOT USE_STATIC_LIBRARIES) diff --git a/base/base/setTerminalEcho.cpp b/base/base/setTerminalEcho.cpp deleted file mode 100644 index 759dca19119..00000000000 --- a/base/base/setTerminalEcho.cpp +++ /dev/null @@ -1,28 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include - - -void setTerminalEcho(bool enable) -{ - /// Obtain terminal attributes, - /// toggle the ECHO flag - /// and set them back. - - struct termios tty{}; - - if (0 != tcgetattr(STDIN_FILENO, &tty)) - throw std::runtime_error(std::string("setTerminalEcho failed get: ") + errnoToString()); - - if (enable) - tty.c_lflag |= ECHO; - else - tty.c_lflag &= ~ECHO; - - if (0 != tcsetattr(STDIN_FILENO, TCSANOW, &tty)) - throw std::runtime_error(std::string("setTerminalEcho failed set: ") + errnoToString()); -} diff --git a/base/base/setTerminalEcho.h b/base/base/setTerminalEcho.h deleted file mode 100644 index 98e8f5a87e3..00000000000 --- a/base/base/setTerminalEcho.h +++ /dev/null @@ -1,4 +0,0 @@ -#pragma once - -/// Enable or disable echoing of typed characters. Throws std::runtime_error on error. -void setTerminalEcho(bool enable); diff --git a/contrib/cctz b/contrib/cctz index 5c8528fb35e..7c78edd52b4 160000 --- a/contrib/cctz +++ b/contrib/cctz @@ -1 +1 @@ -Subproject commit 5c8528fb35e89ee0b3a7157490423fba0d4dd7b5 +Subproject commit 7c78edd52b4d65acc103c2f195818ffcabe6fe0d diff --git a/contrib/libunwind-cmake/CMakeLists.txt b/contrib/libunwind-cmake/CMakeLists.txt index 155853a0bca..733f99d07f5 100644 --- a/contrib/libunwind-cmake/CMakeLists.txt +++ b/contrib/libunwind-cmake/CMakeLists.txt @@ -43,7 +43,10 @@ set_target_properties(unwind PROPERTIES FOLDER "contrib/libunwind-cmake") target_include_directories(unwind SYSTEM BEFORE PUBLIC $) target_compile_definitions(unwind PRIVATE -D_LIBUNWIND_NO_HEAP=1 -D_DEBUG -D_LIBUNWIND_IS_NATIVE_ONLY) -target_compile_options(unwind PRIVATE -fno-exceptions -funwind-tables -fno-sanitize=all $<$:-nostdinc++ -fno-rtti>) + +# We should enable optimizations (otherwise it will be too slow in debug) +# and disable sanitizers (otherwise infinite loop may happen) +target_compile_options(unwind PRIVATE -O3 -fno-exceptions -funwind-tables -fno-sanitize=all $<$:-nostdinc++ -fno-rtti>) check_c_compiler_flag(-Wunused-but-set-variable HAVE_WARNING_UNUSED_BUT_SET_VARIABLE) if (HAVE_WARNING_UNUSED_BUT_SET_VARIABLE) diff --git a/contrib/sysroot b/contrib/sysroot index e9fb375d0a1..0f41651860f 160000 --- a/contrib/sysroot +++ b/contrib/sysroot @@ -1 +1 @@ -Subproject commit e9fb375d0a1e5ebfd74c043f088f2342552103f8 +Subproject commit 0f41651860fa4a530ecd68b93a15b8fd77397adf diff --git a/docker/packager/packager b/docker/packager/packager index 7f6bd8818fb..716071fcac6 100755 --- a/docker/packager/packager +++ b/docker/packager/packager @@ -131,7 +131,7 @@ def parse_env_variables( ARM_V80COMPAT_SUFFIX = "-aarch64-v80compat" FREEBSD_SUFFIX = "-freebsd" PPC_SUFFIX = "-ppc64le" - AMD64_SSE2_SUFFIX = "-amd64sse2" + AMD64_COMPAT_SUFFIX = "-amd64-compat" result = [] result.append("OUTPUT_DIR=/output") @@ -144,7 +144,7 @@ def parse_env_variables( is_cross_arm_v80compat = compiler.endswith(ARM_V80COMPAT_SUFFIX) is_cross_ppc = compiler.endswith(PPC_SUFFIX) is_cross_freebsd = compiler.endswith(FREEBSD_SUFFIX) - is_amd64_sse2 = compiler.endswith(AMD64_SSE2_SUFFIX) + is_amd64_compat = compiler.endswith(AMD64_COMPAT_SUFFIX) if is_cross_darwin: cc = compiler[: -len(DARWIN_SUFFIX)] @@ -197,8 +197,8 @@ def parse_env_variables( cmake_flags.append( "-DCMAKE_TOOLCHAIN_FILE=/build/cmake/linux/toolchain-ppc64le.cmake" ) - elif is_amd64_sse2: - cc = compiler[: -len(AMD64_SSE2_SUFFIX)] + elif is_amd64_compat: + cc = compiler[: -len(AMD64_COMPAT_SUFFIX)] result.append("DEB_ARCH=amd64") cmake_flags.append("-DNO_SSE3_OR_HIGHER=1") else: @@ -358,7 +358,7 @@ if __name__ == "__main__": "clang-15-aarch64", "clang-15-aarch64-v80compat", "clang-15-ppc64le", - "clang-15-amd64sse2", + "clang-15-amd64-compat", "clang-15-freebsd", "gcc-11", ), diff --git a/docker/server/entrypoint.sh b/docker/server/entrypoint.sh index 52f4f67281e..dfd8f2e3e54 100755 --- a/docker/server/entrypoint.sh +++ b/docker/server/entrypoint.sh @@ -80,7 +80,7 @@ do done # if clickhouse user is defined - create it (user "default" already exists out of box) -if [ -n "$CLICKHOUSE_USER" ] && [ "$CLICKHOUSE_USER" != "default" ] || [ -n "$CLICKHOUSE_PASSWORD" ]; then +if [ -n "$CLICKHOUSE_USER" ] && [ "$CLICKHOUSE_USER" != "default" ] || [ -n "$CLICKHOUSE_PASSWORD" ] || [ "$CLICKHOUSE_ACCESS_MANAGEMENT" != "0" ]; then echo "$0: create new user '$CLICKHOUSE_USER' instead 'default'" cat < /etc/clickhouse-server/users.d/default-user.xml @@ -120,8 +120,8 @@ if [ -n "$(ls /docker-entrypoint-initdb.d/)" ] || [ -n "$CLICKHOUSE_DB" ]; then pid="$!" # check if clickhouse is ready to accept connections - # will try to send ping clickhouse via http_port (max 12 retries by default, with 1 sec timeout and 1 sec delay between retries) - tries=${CLICKHOUSE_INIT_TIMEOUT:-12} + # will try to send ping clickhouse via http_port (max 1000 retries by default, with 1 sec timeout and 1 sec delay between retries) + tries=${CLICKHOUSE_INIT_TIMEOUT:-1000} while ! wget --spider --no-check-certificate -T 1 -q "$URL" 2>/dev/null; do if [ "$tries" -le "0" ]; then echo >&2 'ClickHouse init process failed.' diff --git a/docker/test/fuzzer/query-fuzzer-tweaks-users.xml b/docker/test/fuzzer/query-fuzzer-tweaks-users.xml index 2f09573f942..1771a03f8a9 100644 --- a/docker/test/fuzzer/query-fuzzer-tweaks-users.xml +++ b/docker/test/fuzzer/query-fuzzer-tweaks-users.xml @@ -2,6 +2,7 @@ 10 + + + + diff --git a/docker/test/fuzzer/run-fuzzer.sh b/docker/test/fuzzer/run-fuzzer.sh index bd539ca978b..67b1b957585 100755 --- a/docker/test/fuzzer/run-fuzzer.sh +++ b/docker/test/fuzzer/run-fuzzer.sh @@ -51,7 +51,6 @@ function clone ) ls -lath ||: - } function wget_with_retry @@ -75,6 +74,7 @@ function download ./clickhouse ||: ln -s ./clickhouse ./clickhouse-server ln -s ./clickhouse ./clickhouse-client + ln -s ./clickhouse ./clickhouse-local # clickhouse-server is in the current dir export PATH="$PWD:$PATH" @@ -91,6 +91,12 @@ function configure cp -av --dereference "$script_dir"/query-fuzzer-tweaks-users.xml db/users.d cp -av --dereference "$script_dir"/allow-nullable-key.xml db/config.d + cat > db/config.d/max_server_memory_usage_to_ram_ratio.xml < + 0.75 + +EOL + cat > db/config.d/core.xml < @@ -151,7 +157,7 @@ function fuzz mkdir -p /var/run/clickhouse-server # NOTE: we use process substitution here to preserve keep $! as a pid of clickhouse-server - clickhouse-server --config-file db/config.xml --pid-file /var/run/clickhouse-server/clickhouse-server.pid -- --path db 2>&1 | pigz > server.log.gz & + clickhouse-server --config-file db/config.xml --pid-file /var/run/clickhouse-server/clickhouse-server.pid -- --path db > server.log 2>&1 & server_pid=$! kill -0 $server_pid @@ -256,12 +262,21 @@ quit if [ "$server_died" == 1 ] then # The server has died. - task_exit_code=210 - echo "failure" > status.txt - if ! zgrep --text -ao "Received signal.*\|Logical error.*\|Assertion.*failed\|Failed assertion.*\|.*runtime error: .*\|.*is located.*\|SUMMARY: AddressSanitizer:.*\|SUMMARY: MemorySanitizer:.*\|SUMMARY: ThreadSanitizer:.*\|.*_LIBCPP_ASSERT.*" server.log.gz > description.txt + if ! grep --text -ao "Received signal.*\|Logical error.*\|Assertion.*failed\|Failed assertion.*\|.*runtime error: .*\|.*is located.*\|SUMMARY: AddressSanitizer:.*\|SUMMARY: MemorySanitizer:.*\|SUMMARY: ThreadSanitizer:.*\|.*_LIBCPP_ASSERT.*" server.log > description.txt then echo "Lost connection to server. See the logs." > description.txt fi + + if grep -F --text 'Sanitizer: out-of-memory' description.txt + then + # OOM of sanitizer is not a problem we can handle - treat it as success, but preserve the description. + task_exit_code=0 + echo "success" > status.txt + else + task_exit_code=210 + echo "failure" > status.txt + fi + elif [ "$fuzzer_exit_code" == "143" ] || [ "$fuzzer_exit_code" == "0" ] then # Variants of a normal run: @@ -327,24 +342,28 @@ case "$stage" in time fuzz ;& "report") + CORE_LINK='' if [ -f core.gz ]; then CORE_LINK='core.gz' fi + +grep --text -F '' server.log > fatal.log ||: + +pigz server.log + cat > report.html < AST Fuzzer for PR #${PR_TO_TEST} @ ${SHA_TO_TEST} @@ -352,17 +371,32 @@ th { cursor: pointer; }
-

AST Fuzzer for PR #${PR_TO_TEST} @ ${SHA_TO_TEST}

+

AST Fuzzer for PR #${PR_TO_TEST} @ ${SHA_TO_TEST}

- - + + + + + + + + + + + + +
Test nameTest statusDescription
AST Fuzzer$(cat status.txt)$(cat description.txt)
Test nameTest statusDescription
AST Fuzzer$(cat status.txt)$( + clickhouse-local --input-format RawBLOB --output-format RawBLOB --query "SELECT encodeXMLComponent(*) FROM table" < description.txt || cat description.txt + )
$( + clickhouse-local --input-format RawBLOB --output-format RawBLOB --query "SELECT encodeXMLComponent(*) FROM table" < fatal.log || cat fatal.log + )
diff --git a/docker/test/stateful/Dockerfile b/docker/test/stateful/Dockerfile index 234d0861f8b..b67a638188c 100644 --- a/docker/test/stateful/Dockerfile +++ b/docker/test/stateful/Dockerfile @@ -17,6 +17,7 @@ ENV S3_URL="https://clickhouse-datasets.s3.amazonaws.com" ENV DATASETS="hits visits" RUN npm install -g azurite +RUN npm install tslib COPY run.sh / CMD ["/bin/bash", "/run.sh"] diff --git a/docker/test/stateless/Dockerfile b/docker/test/stateless/Dockerfile index a497d3443b0..40109255a7e 100644 --- a/docker/test/stateless/Dockerfile +++ b/docker/test/stateless/Dockerfile @@ -80,6 +80,7 @@ ENV MINIO_ROOT_PASSWORD="clickhouse" ENV EXPORT_S3_STORAGE_POLICIES=1 RUN npm install -g azurite +RUN npm install tslib COPY run.sh / COPY setup_minio.sh / diff --git a/docker/test/stateless/setup_minio.sh b/docker/test/stateless/setup_minio.sh index a1de7f2d6ed..66f9625f6e8 100755 --- a/docker/test/stateless/setup_minio.sh +++ b/docker/test/stateless/setup_minio.sh @@ -75,7 +75,7 @@ fi TEST_PATH=${1:-/usr/share/clickhouse-test} MINIO_DATA_PATH=${TEST_PATH}/queries/${QUERY_DIR}/data_minio -# Iterating over globs will cause redudant FILE variale to be a path to a file, not a filename +# Iterating over globs will cause redundant FILE variable to be a path to a file, not a filename # shellcheck disable=SC2045 for FILE in $(ls "${MINIO_DATA_PATH}"); do echo "$FILE"; diff --git a/docker/test/stress/README.md b/docker/test/stress/README.md index 96807b9f9a6..c22721fd7da 100644 --- a/docker/test/stress/README.md +++ b/docker/test/stress/README.md @@ -1,6 +1,6 @@ -Allow to run simple ClickHouse stress test in Docker from debian packages. +Allows to run simple ClickHouse stress test in Docker from debian packages. Actually it runs multiple copies of clickhouse-test (functional tests). -This allows to find problems like segmentation fault which cause shutdown of server. +This allows to find problems like failed assertions and memory safety issues. Usage: ``` diff --git a/docker/test/stress/run.sh b/docker/test/stress/run.sh index 01e0f5b4897..41245013a4a 100644 --- a/docker/test/stress/run.sh +++ b/docker/test/stress/run.sh @@ -11,31 +11,6 @@ set -x # core.COMM.PID-TID sysctl kernel.core_pattern='core.%e.%p-%P' -# Thread Fuzzer allows to check more permutations of possible thread scheduling -# and find more potential issues. -# Temporarily disable ThreadFuzzer with tsan because of https://github.com/google/sanitizers/issues/1540 -is_tsan_build=$(clickhouse local -q "select value like '% -fsanitize=thread %' from system.build_options where name='CXX_FLAGS'") -if [ "$is_tsan_build" -eq "0" ]; then - export THREAD_FUZZER_CPU_TIME_PERIOD_US=1000 - export THREAD_FUZZER_SLEEP_PROBABILITY=0.1 - export THREAD_FUZZER_SLEEP_TIME_US=100000 - - export THREAD_FUZZER_pthread_mutex_lock_BEFORE_MIGRATE_PROBABILITY=1 - export THREAD_FUZZER_pthread_mutex_lock_AFTER_MIGRATE_PROBABILITY=1 - export THREAD_FUZZER_pthread_mutex_unlock_BEFORE_MIGRATE_PROBABILITY=1 - export THREAD_FUZZER_pthread_mutex_unlock_AFTER_MIGRATE_PROBABILITY=1 - - export THREAD_FUZZER_pthread_mutex_lock_BEFORE_SLEEP_PROBABILITY=0.001 - export THREAD_FUZZER_pthread_mutex_lock_AFTER_SLEEP_PROBABILITY=0.001 - export THREAD_FUZZER_pthread_mutex_unlock_BEFORE_SLEEP_PROBABILITY=0.001 - export THREAD_FUZZER_pthread_mutex_unlock_AFTER_SLEEP_PROBABILITY=0.001 - export THREAD_FUZZER_pthread_mutex_lock_BEFORE_SLEEP_TIME_US=10000 - - export THREAD_FUZZER_pthread_mutex_lock_AFTER_SLEEP_TIME_US=10000 - export THREAD_FUZZER_pthread_mutex_unlock_BEFORE_SLEEP_TIME_US=10000 - export THREAD_FUZZER_pthread_mutex_unlock_AFTER_SLEEP_TIME_US=10000 -fi - function install_packages() { @@ -54,7 +29,7 @@ function configure() # we mount tests folder from repo to /usr/share ln -s /usr/share/clickhouse-test/clickhouse-test /usr/bin/clickhouse-test - ln -s /usr/share/clickhouse-test/ci/download_release_packets.py /usr/bin/download_release_packets + ln -s /usr/share/clickhouse-test/ci/download_release_packages.py /usr/bin/download_release_packages ln -s /usr/share/clickhouse-test/ci/get_previous_release_tag.py /usr/bin/get_previous_release_tag # avoid too slow startup @@ -123,6 +98,22 @@ EOL $PWD EOL + + # Analyzer is not yet ready for testing + cat > /etc/clickhouse-server/users.d/no_analyzer.xml < + + + + + + + + + + +EOL + } function stop() @@ -210,6 +201,31 @@ quit install_packages package_folder +# Thread Fuzzer allows to check more permutations of possible thread scheduling +# and find more potential issues. +# Temporarily disable ThreadFuzzer with tsan because of https://github.com/google/sanitizers/issues/1540 +is_tsan_build=$(clickhouse local -q "select value like '% -fsanitize=thread %' from system.build_options where name='CXX_FLAGS'") +if [ "$is_tsan_build" -eq "0" ]; then + export THREAD_FUZZER_CPU_TIME_PERIOD_US=1000 + export THREAD_FUZZER_SLEEP_PROBABILITY=0.1 + export THREAD_FUZZER_SLEEP_TIME_US=100000 + + export THREAD_FUZZER_pthread_mutex_lock_BEFORE_MIGRATE_PROBABILITY=1 + export THREAD_FUZZER_pthread_mutex_lock_AFTER_MIGRATE_PROBABILITY=1 + export THREAD_FUZZER_pthread_mutex_unlock_BEFORE_MIGRATE_PROBABILITY=1 + export THREAD_FUZZER_pthread_mutex_unlock_AFTER_MIGRATE_PROBABILITY=1 + + export THREAD_FUZZER_pthread_mutex_lock_BEFORE_SLEEP_PROBABILITY=0.001 + export THREAD_FUZZER_pthread_mutex_lock_AFTER_SLEEP_PROBABILITY=0.001 + export THREAD_FUZZER_pthread_mutex_unlock_BEFORE_SLEEP_PROBABILITY=0.001 + export THREAD_FUZZER_pthread_mutex_unlock_AFTER_SLEEP_PROBABILITY=0.001 + export THREAD_FUZZER_pthread_mutex_lock_BEFORE_SLEEP_TIME_US=10000 + + export THREAD_FUZZER_pthread_mutex_lock_AFTER_SLEEP_TIME_US=10000 + export THREAD_FUZZER_pthread_mutex_unlock_BEFORE_SLEEP_TIME_US=10000 + export THREAD_FUZZER_pthread_mutex_unlock_AFTER_SLEEP_TIME_US=10000 +fi + export ZOOKEEPER_FAULT_INJECTION=1 configure @@ -334,219 +350,228 @@ zgrep -Fa "########################################" /test_output/* > /dev/null zgrep -Fa " received signal " /test_output/gdb.log > /dev/null \ && echo -e 'Found signal in gdb.log\tFAIL' >> /test_output/test_results.tsv -echo -e "Backward compatibility check\n" +if [ "$DISABLE_BC_CHECK" -ne "1" ]; then + echo -e "Backward compatibility check\n" -echo "Get previous release tag" -previous_release_tag=$(clickhouse-client --version | grep -o "[0-9]*\.[0-9]*\.[0-9]*\.[0-9]*" | get_previous_release_tag) -echo $previous_release_tag + echo "Get previous release tag" + previous_release_tag=$(clickhouse-client --version | grep -o "[0-9]*\.[0-9]*\.[0-9]*\.[0-9]*" | get_previous_release_tag) + echo $previous_release_tag -echo "Clone previous release repository" -git clone https://github.com/ClickHouse/ClickHouse.git --no-tags --progress --branch=$previous_release_tag --no-recurse-submodules --depth=1 previous_release_repository + echo "Clone previous release repository" + git clone https://github.com/ClickHouse/ClickHouse.git --no-tags --progress --branch=$previous_release_tag --no-recurse-submodules --depth=1 previous_release_repository -echo "Download previous release server" -mkdir previous_release_package_folder + echo "Download clickhouse-server from the previous release" + mkdir previous_release_package_folder -echo $previous_release_tag | download_release_packets && echo -e 'Download script exit code\tOK' >> /test_output/test_results.tsv \ - || echo -e 'Download script failed\tFAIL' >> /test_output/test_results.tsv + echo $previous_release_tag | download_release_packages && echo -e 'Download script exit code\tOK' >> /test_output/test_results.tsv \ + || echo -e 'Download script failed\tFAIL' >> /test_output/test_results.tsv -mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.clean.log -for table in query_log trace_log -do - clickhouse-local --path /var/lib/clickhouse/ --only-system-tables -q "select * from system.$table format TSVWithNamesAndTypes" | pigz > /test_output/$table.tsv.gz ||: -done - -tar -chf /test_output/coordination.tar /var/lib/clickhouse/coordination ||: - -# Check if we cloned previous release repository successfully -if ! [ "$(ls -A previous_release_repository/tests/queries)" ] -then - echo -e "Backward compatibility check: Failed to clone previous release tests\tFAIL" >> /test_output/test_results.tsv -elif ! [ "$(ls -A previous_release_package_folder/clickhouse-common-static_*.deb && ls -A previous_release_package_folder/clickhouse-server_*.deb)" ] -then - echo -e "Backward compatibility check: Failed to download previous release packets\tFAIL" >> /test_output/test_results.tsv -else - echo -e "Successfully cloned previous release tests\tOK" >> /test_output/test_results.tsv - echo -e "Successfully downloaded previous release packets\tOK" >> /test_output/test_results.tsv - - # Uninstall current packages - dpkg --remove clickhouse-client - dpkg --remove clickhouse-server - dpkg --remove clickhouse-common-static-dbg - dpkg --remove clickhouse-common-static - - rm -rf /var/lib/clickhouse/* - - # Make BC check more funny by forcing Ordinary engine for system database - mkdir /var/lib/clickhouse/metadata - echo "ATTACH DATABASE system ENGINE=Ordinary" > /var/lib/clickhouse/metadata/system.sql - - # Install previous release packages - install_packages previous_release_package_folder - - # Start server from previous release - # Previous version may not be ready for fault injections - export ZOOKEEPER_FAULT_INJECTION=0 - configure - - # Avoid "Setting s3_check_objects_after_upload is neither a builtin setting..." - rm -f /etc/clickhouse-server/users.d/enable_blobs_check.xml ||: - rm -f /etc/clickhouse-server/users.d/marks.xml ||: - - # Remove s3 related configs to avoid "there is no disk type `cache`" - rm -f /etc/clickhouse-server/config.d/storage_conf.xml ||: - rm -f /etc/clickhouse-server/config.d/azure_storage_conf.xml ||: - - # Turn on after 22.12 - rm -f /etc/clickhouse-server/config.d/compressed_marks_and_index.xml ||: - # it uses recently introduced settings which previous versions may not have - rm -f /etc/clickhouse-server/users.d/insert_keeper_retries.xml ||: - - start - - clickhouse-client --query="SELECT 'Server version: ', version()" - - # Install new package before running stress test because we should use new - # clickhouse-client and new clickhouse-test. - # - # But we should leave old binary in /usr/bin/ and debug symbols in - # /usr/lib/debug/usr/bin (if any) for gdb and internal DWARF parser, so it - # will print sane stacktraces and also to avoid possible crashes. - # - # FIXME: those files can be extracted directly from debian package, but - # actually better solution will be to use different PATH instead of playing - # games with files from packages. - mv /usr/bin/clickhouse previous_release_package_folder/ - mv /usr/lib/debug/usr/bin/clickhouse.debug previous_release_package_folder/ - install_packages package_folder - mv /usr/bin/clickhouse package_folder/ - mv /usr/lib/debug/usr/bin/clickhouse.debug package_folder/ - mv previous_release_package_folder/clickhouse /usr/bin/ - mv previous_release_package_folder/clickhouse.debug /usr/lib/debug/usr/bin/clickhouse.debug - - mkdir tmp_stress_output - - ./stress --test-cmd="/usr/bin/clickhouse-test --queries=\"previous_release_repository/tests/queries\"" --backward-compatibility-check --output-folder tmp_stress_output --global-time-limit=1200 \ - && echo -e 'Backward compatibility check: Test script exit code\tOK' >> /test_output/test_results.tsv \ - || echo -e 'Backward compatibility check: Test script failed\tFAIL' >> /test_output/test_results.tsv - rm -rf tmp_stress_output - - clickhouse-client --query="SELECT 'Tables count:', count() FROM system.tables" - - stop 1 - mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.backward.stress.log - - # Start new server - mv package_folder/clickhouse /usr/bin/ - mv package_folder/clickhouse.debug /usr/lib/debug/usr/bin/clickhouse.debug - export ZOOKEEPER_FAULT_INJECTION=1 - configure - start 500 - clickhouse-client --query "SELECT 'Backward compatibility check: Server successfully started', 'OK'" >> /test_output/test_results.tsv \ - || (echo -e 'Backward compatibility check: Server failed to start\tFAIL' >> /test_output/test_results.tsv \ - && grep -a ".*Application" /var/log/clickhouse-server/clickhouse-server.log >> /test_output/bc_check_application_errors.txt) - - clickhouse-client --query="SELECT 'Server version: ', version()" - - # Let the server run for a while before checking log. - sleep 60 - - stop - mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.backward.clean.log - - # Error messages (we should ignore some errors) - # FIXME https://github.com/ClickHouse/ClickHouse/issues/38643 ("Unknown index: idx.") - # FIXME https://github.com/ClickHouse/ClickHouse/issues/39174 ("Cannot parse string 'Hello' as UInt64") - # FIXME Not sure if it's expected, but some tests from BC check may not be finished yet when we restarting server. - # Let's just ignore all errors from queries ("} TCPHandler: Code:", "} executeQuery: Code:") - # FIXME https://github.com/ClickHouse/ClickHouse/issues/39197 ("Missing columns: 'v3' while processing query: 'v3, k, v1, v2, p'") - # NOTE Incompatibility was introduced in https://github.com/ClickHouse/ClickHouse/pull/39263, it's expected - # ("This engine is deprecated and is not supported in transactions", "[Queue = DB::MergeMutateRuntimeQueue]: Code: 235. DB::Exception: Part") - # FIXME https://github.com/ClickHouse/ClickHouse/issues/39174 - bad mutation does not indicate backward incompatibility - echo "Check for Error messages in server log:" - zgrep -Fav -e "Code: 236. DB::Exception: Cancelled merging parts" \ - -e "Code: 236. DB::Exception: Cancelled mutating parts" \ - -e "REPLICA_IS_ALREADY_ACTIVE" \ - -e "REPLICA_ALREADY_EXISTS" \ - -e "ALL_REPLICAS_LOST" \ - -e "DDLWorker: Cannot parse DDL task query" \ - -e "RaftInstance: failed to accept a rpc connection due to error 125" \ - -e "UNKNOWN_DATABASE" \ - -e "NETWORK_ERROR" \ - -e "UNKNOWN_TABLE" \ - -e "ZooKeeperClient" \ - -e "KEEPER_EXCEPTION" \ - -e "DirectoryMonitor" \ - -e "TABLE_IS_READ_ONLY" \ - -e "Code: 1000, e.code() = 111, Connection refused" \ - -e "UNFINISHED" \ - -e "NETLINK_ERROR" \ - -e "Renaming unexpected part" \ - -e "PART_IS_TEMPORARILY_LOCKED" \ - -e "and a merge is impossible: we didn't find" \ - -e "found in queue and some source parts for it was lost" \ - -e "is lost forever." \ - -e "Unknown index: idx." \ - -e "Cannot parse string 'Hello' as UInt64" \ - -e "} TCPHandler: Code:" \ - -e "} executeQuery: Code:" \ - -e "Missing columns: 'v3' while processing query: 'v3, k, v1, v2, p'" \ - -e "This engine is deprecated and is not supported in transactions" \ - -e "[Queue = DB::MergeMutateRuntimeQueue]: Code: 235. DB::Exception: Part" \ - -e "The set of parts restored in place of" \ - -e "(ReplicatedMergeTreeAttachThread): Initialization failed. Error" \ - -e "Code: 269. DB::Exception: Destination table is myself" \ - -e "Coordination::Exception: Connection loss" \ - -e "MutateFromLogEntryTask" \ - -e "No connection to ZooKeeper, cannot get shared table ID" \ - -e "Session expired" \ - /var/log/clickhouse-server/clickhouse-server.backward.clean.log | zgrep -Fa "" > /test_output/bc_check_error_messages.txt \ - && echo -e 'Backward compatibility check: Error message in clickhouse-server.log (see bc_check_error_messages.txt)\tFAIL' >> /test_output/test_results.tsv \ - || echo -e 'Backward compatibility check: No Error messages in clickhouse-server.log\tOK' >> /test_output/test_results.tsv - - # Remove file bc_check_error_messages.txt if it's empty - [ -s /test_output/bc_check_error_messages.txt ] || rm /test_output/bc_check_error_messages.txt - - # Sanitizer asserts - zgrep -Fa "==================" /var/log/clickhouse-server/stderr.log >> /test_output/tmp - zgrep -Fa "WARNING" /var/log/clickhouse-server/stderr.log >> /test_output/tmp - zgrep -Fav -e "ASan doesn't fully support makecontext/swapcontext functions" -e "DB::Exception" /test_output/tmp > /dev/null \ - && echo -e 'Backward compatibility check: Sanitizer assert (in stderr.log)\tFAIL' >> /test_output/test_results.tsv \ - || echo -e 'Backward compatibility check: No sanitizer asserts\tOK' >> /test_output/test_results.tsv - rm -f /test_output/tmp - - # OOM - zgrep -Fa " Application: Child process was terminated by signal 9" /var/log/clickhouse-server/clickhouse-server.backward.*.log > /dev/null \ - && echo -e 'Backward compatibility check: OOM killer (or signal 9) in clickhouse-server.log\tFAIL' >> /test_output/test_results.tsv \ - || echo -e 'Backward compatibility check: No OOM messages in clickhouse-server.log\tOK' >> /test_output/test_results.tsv - - # Logical errors - echo "Check for Logical errors in server log:" - zgrep -Fa -A20 "Code: 49, e.displayText() = DB::Exception:" /var/log/clickhouse-server/clickhouse-server.backward.*.log > /test_output/bc_check_logical_errors.txt \ - && echo -e 'Backward compatibility check: Logical error thrown (see clickhouse-server.log or bc_check_logical_errors.txt)\tFAIL' >> /test_output/test_results.tsv \ - || echo -e 'Backward compatibility check: No logical errors\tOK' >> /test_output/test_results.tsv - - # Remove file bc_check_logical_errors.txt if it's empty - [ -s /test_output/bc_check_logical_errors.txt ] || rm /test_output/bc_check_logical_errors.txt - - # Crash - zgrep -Fa "########################################" /var/log/clickhouse-server/clickhouse-server.backward.*.log > /dev/null \ - && echo -e 'Backward compatibility check: Killed by signal (in clickhouse-server.log)\tFAIL' >> /test_output/test_results.tsv \ - || echo -e 'Backward compatibility check: Not crashed\tOK' >> /test_output/test_results.tsv - - # It also checks for crash without stacktrace (printed by watchdog) - echo "Check for Fatal message in server log:" - zgrep -Fa " " /var/log/clickhouse-server/clickhouse-server.backward.*.log > /test_output/bc_check_fatal_messages.txt \ - && echo -e 'Backward compatibility check: Fatal message in clickhouse-server.log (see bc_check_fatal_messages.txt)\tFAIL' >> /test_output/test_results.tsv \ - || echo -e 'Backward compatibility check: No fatal messages in clickhouse-server.log\tOK' >> /test_output/test_results.tsv - - # Remove file bc_check_fatal_messages.txt if it's empty - [ -s /test_output/bc_check_fatal_messages.txt ] || rm /test_output/bc_check_fatal_messages.txt - - tar -chf /test_output/coordination.backward.tar /var/lib/clickhouse/coordination ||: + mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.clean.log for table in query_log trace_log do - clickhouse-local --path /var/lib/clickhouse/ --only-system-tables -q "select * from system.$table format TSVWithNamesAndTypes" | pigz > /test_output/$table.backward.tsv.gz ||: + clickhouse-local --path /var/lib/clickhouse/ --only-system-tables -q "select * from system.$table format TSVWithNamesAndTypes" | pigz > /test_output/$table.tsv.gz ||: done + + tar -chf /test_output/coordination.tar /var/lib/clickhouse/coordination ||: + + # Check if we cloned previous release repository successfully + if ! [ "$(ls -A previous_release_repository/tests/queries)" ] + then + echo -e "Backward compatibility check: Failed to clone previous release tests\tFAIL" >> /test_output/test_results.tsv + elif ! [ "$(ls -A previous_release_package_folder/clickhouse-common-static_*.deb && ls -A previous_release_package_folder/clickhouse-server_*.deb)" ] + then + echo -e "Backward compatibility check: Failed to download previous release packages\tFAIL" >> /test_output/test_results.tsv + else + echo -e "Successfully cloned previous release tests\tOK" >> /test_output/test_results.tsv + echo -e "Successfully downloaded previous release packages\tOK" >> /test_output/test_results.tsv + + # Uninstall current packages + dpkg --remove clickhouse-client + dpkg --remove clickhouse-server + dpkg --remove clickhouse-common-static-dbg + dpkg --remove clickhouse-common-static + + rm -rf /var/lib/clickhouse/* + + # Make BC check more funny by forcing Ordinary engine for system database + mkdir /var/lib/clickhouse/metadata + echo "ATTACH DATABASE system ENGINE=Ordinary" > /var/lib/clickhouse/metadata/system.sql + + # Install previous release packages + install_packages previous_release_package_folder + + # Start server from previous release + # Previous version may not be ready for fault injections + export ZOOKEEPER_FAULT_INJECTION=0 + configure + + # Avoid "Setting s3_check_objects_after_upload is neither a builtin setting..." + rm -f /etc/clickhouse-server/users.d/enable_blobs_check.xml ||: + rm -f /etc/clickhouse-server/users.d/marks.xml ||: + + # Remove s3 related configs to avoid "there is no disk type `cache`" + rm -f /etc/clickhouse-server/config.d/storage_conf.xml ||: + rm -f /etc/clickhouse-server/config.d/azure_storage_conf.xml ||: + + # Turn on after 22.12 + rm -f /etc/clickhouse-server/config.d/compressed_marks_and_index.xml ||: + # it uses recently introduced settings which previous versions may not have + rm -f /etc/clickhouse-server/users.d/insert_keeper_retries.xml ||: + + start + + clickhouse-client --query="SELECT 'Server version: ', version()" + + # Install new package before running stress test because we should use new + # clickhouse-client and new clickhouse-test. + # + # But we should leave old binary in /usr/bin/ and debug symbols in + # /usr/lib/debug/usr/bin (if any) for gdb and internal DWARF parser, so it + # will print sane stacktraces and also to avoid possible crashes. + # + # FIXME: those files can be extracted directly from debian package, but + # actually better solution will be to use different PATH instead of playing + # games with files from packages. + mv /usr/bin/clickhouse previous_release_package_folder/ + mv /usr/lib/debug/usr/bin/clickhouse.debug previous_release_package_folder/ + install_packages package_folder + mv /usr/bin/clickhouse package_folder/ + mv /usr/lib/debug/usr/bin/clickhouse.debug package_folder/ + mv previous_release_package_folder/clickhouse /usr/bin/ + mv previous_release_package_folder/clickhouse.debug /usr/lib/debug/usr/bin/clickhouse.debug + + mkdir tmp_stress_output + + ./stress --test-cmd="/usr/bin/clickhouse-test --queries=\"previous_release_repository/tests/queries\"" --backward-compatibility-check --output-folder tmp_stress_output --global-time-limit=1200 \ + && echo -e 'Backward compatibility check: Test script exit code\tOK' >> /test_output/test_results.tsv \ + || echo -e 'Backward compatibility check: Test script failed\tFAIL' >> /test_output/test_results.tsv + rm -rf tmp_stress_output + + # We experienced deadlocks in this command in very rare cases. Let's debug it: + timeout 10m clickhouse-client --query="SELECT 'Tables count:', count() FROM system.tables" || + ( + echo "thread apply all backtrace (on select tables count)" >> /test_output/gdb.log + timeout 30m gdb -batch -ex 'thread apply all backtrace' -p "$(cat /var/run/clickhouse-server/clickhouse-server.pid)" | ts '%Y-%m-%d %H:%M:%S' >> /test_output/gdb.log + clickhouse stop --force + ) + + stop 1 + mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.backward.stress.log + + # Start new server + mv package_folder/clickhouse /usr/bin/ + mv package_folder/clickhouse.debug /usr/lib/debug/usr/bin/clickhouse.debug + # Disable fault injections on start (we don't test them here, and it can lead to tons of requests in case of huge number of tables). + export ZOOKEEPER_FAULT_INJECTION=0 + configure + start 500 + clickhouse-client --query "SELECT 'Backward compatibility check: Server successfully started', 'OK'" >> /test_output/test_results.tsv \ + || (echo -e 'Backward compatibility check: Server failed to start\tFAIL' >> /test_output/test_results.tsv \ + && grep -a ".*Application" /var/log/clickhouse-server/clickhouse-server.log >> /test_output/bc_check_application_errors.txt) + + clickhouse-client --query="SELECT 'Server version: ', version()" + + # Let the server run for a while before checking log. + sleep 60 + + stop + mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.backward.dirty.log + + # Error messages (we should ignore some errors) + # FIXME https://github.com/ClickHouse/ClickHouse/issues/38643 ("Unknown index: idx.") + # FIXME https://github.com/ClickHouse/ClickHouse/issues/39174 ("Cannot parse string 'Hello' as UInt64") + # FIXME Not sure if it's expected, but some tests from BC check may not be finished yet when we restarting server. + # Let's just ignore all errors from queries ("} TCPHandler: Code:", "} executeQuery: Code:") + # FIXME https://github.com/ClickHouse/ClickHouse/issues/39197 ("Missing columns: 'v3' while processing query: 'v3, k, v1, v2, p'") + # NOTE Incompatibility was introduced in https://github.com/ClickHouse/ClickHouse/pull/39263, it's expected + # ("This engine is deprecated and is not supported in transactions", "[Queue = DB::MergeMutateRuntimeQueue]: Code: 235. DB::Exception: Part") + # FIXME https://github.com/ClickHouse/ClickHouse/issues/39174 - bad mutation does not indicate backward incompatibility + echo "Check for Error messages in server log:" + zgrep -Fav -e "Code: 236. DB::Exception: Cancelled merging parts" \ + -e "Code: 236. DB::Exception: Cancelled mutating parts" \ + -e "REPLICA_IS_ALREADY_ACTIVE" \ + -e "REPLICA_ALREADY_EXISTS" \ + -e "ALL_REPLICAS_LOST" \ + -e "DDLWorker: Cannot parse DDL task query" \ + -e "RaftInstance: failed to accept a rpc connection due to error 125" \ + -e "UNKNOWN_DATABASE" \ + -e "NETWORK_ERROR" \ + -e "UNKNOWN_TABLE" \ + -e "ZooKeeperClient" \ + -e "KEEPER_EXCEPTION" \ + -e "DirectoryMonitor" \ + -e "TABLE_IS_READ_ONLY" \ + -e "Code: 1000, e.code() = 111, Connection refused" \ + -e "UNFINISHED" \ + -e "NETLINK_ERROR" \ + -e "Renaming unexpected part" \ + -e "PART_IS_TEMPORARILY_LOCKED" \ + -e "and a merge is impossible: we didn't find" \ + -e "found in queue and some source parts for it was lost" \ + -e "is lost forever." \ + -e "Unknown index: idx." \ + -e "Cannot parse string 'Hello' as UInt64" \ + -e "} TCPHandler: Code:" \ + -e "} executeQuery: Code:" \ + -e "Missing columns: 'v3' while processing query: 'v3, k, v1, v2, p'" \ + -e "This engine is deprecated and is not supported in transactions" \ + -e "[Queue = DB::MergeMutateRuntimeQueue]: Code: 235. DB::Exception: Part" \ + -e "The set of parts restored in place of" \ + -e "(ReplicatedMergeTreeAttachThread): Initialization failed. Error" \ + -e "Code: 269. DB::Exception: Destination table is myself" \ + -e "Coordination::Exception: Connection loss" \ + -e "MutateFromLogEntryTask" \ + -e "No connection to ZooKeeper, cannot get shared table ID" \ + -e "Session expired" \ + /var/log/clickhouse-server/clickhouse-server.backward.dirty.log | zgrep -Fa "" > /test_output/bc_check_error_messages.txt \ + && echo -e 'Backward compatibility check: Error message in clickhouse-server.log (see bc_check_error_messages.txt)\tFAIL' >> /test_output/test_results.tsv \ + || echo -e 'Backward compatibility check: No Error messages in clickhouse-server.log\tOK' >> /test_output/test_results.tsv + + # Remove file bc_check_error_messages.txt if it's empty + [ -s /test_output/bc_check_error_messages.txt ] || rm /test_output/bc_check_error_messages.txt + + # Sanitizer asserts + zgrep -Fa "==================" /var/log/clickhouse-server/stderr.log >> /test_output/tmp + zgrep -Fa "WARNING" /var/log/clickhouse-server/stderr.log >> /test_output/tmp + zgrep -Fav -e "ASan doesn't fully support makecontext/swapcontext functions" -e "DB::Exception" /test_output/tmp > /dev/null \ + && echo -e 'Backward compatibility check: Sanitizer assert (in stderr.log)\tFAIL' >> /test_output/test_results.tsv \ + || echo -e 'Backward compatibility check: No sanitizer asserts\tOK' >> /test_output/test_results.tsv + rm -f /test_output/tmp + + # OOM + zgrep -Fa " Application: Child process was terminated by signal 9" /var/log/clickhouse-server/clickhouse-server.backward.*.log > /dev/null \ + && echo -e 'Backward compatibility check: OOM killer (or signal 9) in clickhouse-server.log\tFAIL' >> /test_output/test_results.tsv \ + || echo -e 'Backward compatibility check: No OOM messages in clickhouse-server.log\tOK' >> /test_output/test_results.tsv + + # Logical errors + echo "Check for Logical errors in server log:" + zgrep -Fa -A20 "Code: 49, e.displayText() = DB::Exception:" /var/log/clickhouse-server/clickhouse-server.backward.*.log > /test_output/bc_check_logical_errors.txt \ + && echo -e 'Backward compatibility check: Logical error thrown (see clickhouse-server.log or bc_check_logical_errors.txt)\tFAIL' >> /test_output/test_results.tsv \ + || echo -e 'Backward compatibility check: No logical errors\tOK' >> /test_output/test_results.tsv + + # Remove file bc_check_logical_errors.txt if it's empty + [ -s /test_output/bc_check_logical_errors.txt ] || rm /test_output/bc_check_logical_errors.txt + + # Crash + zgrep -Fa "########################################" /var/log/clickhouse-server/clickhouse-server.backward.*.log > /dev/null \ + && echo -e 'Backward compatibility check: Killed by signal (in clickhouse-server.log)\tFAIL' >> /test_output/test_results.tsv \ + || echo -e 'Backward compatibility check: Not crashed\tOK' >> /test_output/test_results.tsv + + # It also checks for crash without stacktrace (printed by watchdog) + echo "Check for Fatal message in server log:" + zgrep -Fa " " /var/log/clickhouse-server/clickhouse-server.backward.*.log > /test_output/bc_check_fatal_messages.txt \ + && echo -e 'Backward compatibility check: Fatal message in clickhouse-server.log (see bc_check_fatal_messages.txt)\tFAIL' >> /test_output/test_results.tsv \ + || echo -e 'Backward compatibility check: No fatal messages in clickhouse-server.log\tOK' >> /test_output/test_results.tsv + + # Remove file bc_check_fatal_messages.txt if it's empty + [ -s /test_output/bc_check_fatal_messages.txt ] || rm /test_output/bc_check_fatal_messages.txt + + tar -chf /test_output/coordination.backward.tar /var/lib/clickhouse/coordination ||: + for table in query_log trace_log + do + clickhouse-local --path /var/lib/clickhouse/ --only-system-tables -q "select * from system.$table format TSVWithNamesAndTypes" | pigz > /test_output/$table.backward.tsv.gz ||: + done + fi fi dmesg -T > /test_output/dmesg.log diff --git a/docker/test/stress/stress b/docker/test/stress/stress index a0ec86f7fbe..d1860e9e14b 100755 --- a/docker/test/stress/stress +++ b/docker/test/stress/stress @@ -14,9 +14,6 @@ def get_options(i, backward_compatibility_check): if 0 < i: options.append("--order=random") - if i % 3 == 1: - options.append("--db-engine=Ordinary") - if i % 3 == 2 and not backward_compatibility_check: options.append( '''--db-engine="Replicated('/test/db/test_{}', 's1', 'r1')"'''.format(i) diff --git a/docker/test/style/process_style_check_result.py b/docker/test/style/process_style_check_result.py index 6dc3d05d051..2edf6ba3591 100755 --- a/docker/test/style/process_style_check_result.py +++ b/docker/test/style/process_style_check_result.py @@ -19,6 +19,7 @@ def process_result(result_folder): "typos", "whitespaces", "workflows", + "submodules", "docs spelling", ) diff --git a/docker/test/style/run.sh b/docker/test/style/run.sh index 80911bf8627..315efb9e6c4 100755 --- a/docker/test/style/run.sh +++ b/docker/test/style/run.sh @@ -10,7 +10,7 @@ echo "Check style" | ts echo "Check python formatting with black" | ts ./check-black -n |& tee /test_output/black_output.txt echo "Check python type hinting with mypy" | ts -./check-mypy -n |& tee /test_output/mypy_output.txt +./check-mypy -n |& tee /test_output/mypy_output.txt echo "Check typos" | ts ./check-typos |& tee /test_output/typos_output.txt echo "Check docs spelling" | ts @@ -19,6 +19,8 @@ echo "Check whitespaces" | ts ./check-whitespaces -n |& tee /test_output/whitespaces_output.txt echo "Check workflows" | ts ./check-workflows |& tee /test_output/workflows_output.txt +echo "Check submodules" | ts +./check-submodules |& tee /test_output/submodules_output.txt echo "Check shell scripts with shellcheck" | ts ./shellcheck-run.sh |& tee /test_output/shellcheck_output.txt /process_style_check_result.py || echo -e "failure\tCannot parse results" > /test_output/check_status.tsv diff --git a/docker/test/testflows/runner/Dockerfile b/docker/test/testflows/runner/Dockerfile deleted file mode 100644 index bfc3ed5e39f..00000000000 --- a/docker/test/testflows/runner/Dockerfile +++ /dev/null @@ -1,82 +0,0 @@ -# docker build -t clickhouse/testflows-runner . -FROM ubuntu:20.04 - -# ARG for quick switch to a given ubuntu mirror -ARG apt_archive="http://archive.ubuntu.com" -RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list - -RUN apt-get update \ - && env DEBIAN_FRONTEND=noninteractive apt-get install --yes \ - ca-certificates \ - bash \ - btrfs-progs \ - e2fsprogs \ - iptables \ - xfsprogs \ - tar \ - pigz \ - wget \ - git \ - iproute2 \ - cgroupfs-mount \ - python3-pip \ - tzdata \ - libicu-dev \ - bsdutils \ - curl \ - liblua5.1-dev \ - luajit \ - libssl-dev \ - libcurl4-openssl-dev \ - gdb \ - && rm -rf \ - /var/lib/apt/lists/* \ - /var/cache/debconf \ - /tmp/* \ - && apt-get clean - -ENV TZ=Europe/Moscow -RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone - -RUN pip3 install urllib3 testflows==1.7.20 docker-compose==1.29.2 docker==5.0.0 dicttoxml kazoo tzlocal==2.1 pytz python-dateutil numpy - -ENV DOCKER_CHANNEL stable -ENV DOCKER_VERSION 20.10.6 - -# Architecture of the image when BuildKit/buildx is used -ARG TARGETARCH - -# Install docker -RUN arch=${TARGETARCH:-amd64} \ - && case $arch in \ - amd64) rarch=x86_64 ;; \ - arm64) rarch=aarch64 ;; \ - esac \ - && set -eux \ - && if ! wget -nv -O docker.tgz "https://download.docker.com/linux/static/${DOCKER_CHANNEL}/${rarch}/docker-${DOCKER_VERSION}.tgz"; then \ - echo >&2 "error: failed to download 'docker-${DOCKER_VERSION}' from '${DOCKER_CHANNEL}' for '${rarch}'" \ - && exit 1; \ - fi \ - && tar --extract \ - --file docker.tgz \ - --strip-components 1 \ - --directory /usr/local/bin/ \ - && rm docker.tgz \ - && dockerd --version \ - && docker --version - -COPY modprobe.sh /usr/local/bin/modprobe -COPY dockerd-entrypoint.sh /usr/local/bin/ -COPY process_testflows_result.py /usr/local/bin/ - -RUN set -x \ - && addgroup --system dockremap \ - && adduser --system dockremap \ - && adduser dockremap dockremap \ - && echo 'dockremap:165536:65536' >> /etc/subuid \ - && echo 'dockremap:165536:65536' >> /etc/subgid - -VOLUME /var/lib/docker -EXPOSE 2375 -ENTRYPOINT ["dockerd-entrypoint.sh"] -CMD ["sh", "-c", "python3 regression.py --no-color -o new-fails --local --clickhouse-binary-path ${CLICKHOUSE_TESTS_SERVER_BIN_PATH} --log test.log ${TESTFLOWS_OPTS}; cat test.log | tfs report results --format json > results.json; /usr/local/bin/process_testflows_result.py || echo -e 'failure\tCannot parse results' > check_status.tsv; find * -type f | grep _instances | grep clickhouse-server | xargs -n1 tar -rvf clickhouse_logs.tar; gzip -9 clickhouse_logs.tar"] diff --git a/docker/test/testflows/runner/dockerd-entrypoint.sh b/docker/test/testflows/runner/dockerd-entrypoint.sh deleted file mode 100755 index d310ee583bf..00000000000 --- a/docker/test/testflows/runner/dockerd-entrypoint.sh +++ /dev/null @@ -1,39 +0,0 @@ -#!/bin/bash -set -e - -echo "Configure to use Yandex dockerhub-proxy" -mkdir -p /etc/docker/ -cat > /etc/docker/daemon.json << EOF -{ - "insecure-registries" : ["dockerhub-proxy.dockerhub-proxy-zone:5000"], - "registry-mirrors" : ["http://dockerhub-proxy.dockerhub-proxy-zone:5000"] -} -EOF - -# In case of test hung it is convenient to use pytest --pdb to debug it, -# and on hung you can simply press Ctrl-C and it will spawn a python pdb, -# but on SIGINT dockerd will exit, so ignore it to preserve the daemon. -trap '' INT -dockerd --host=unix:///var/run/docker.sock --host=tcp://0.0.0.0:2375 &>/var/log/somefile & - -set +e -reties=0 -while true; do - docker info &>/dev/null && break - reties=$((reties+1)) - if [[ $reties -ge 100 ]]; then # 10 sec max - echo "Can't start docker daemon, timeout exceeded." >&2 - exit 1; - fi - sleep 0.1 -done -set -e - -echo "Start tests" -export CLICKHOUSE_TESTS_SERVER_BIN_PATH=/clickhouse -export CLICKHOUSE_TESTS_CLIENT_BIN_PATH=/clickhouse -export CLICKHOUSE_TESTS_BASE_CONFIG_DIR=/clickhouse-config -export CLICKHOUSE_ODBC_BRIDGE_BINARY_PATH=/clickhouse-odbc-bridge - -cd /ClickHouse/tests/testflows -exec "$@" diff --git a/docker/test/testflows/runner/modprobe.sh b/docker/test/testflows/runner/modprobe.sh deleted file mode 100755 index cb6a527736b..00000000000 --- a/docker/test/testflows/runner/modprobe.sh +++ /dev/null @@ -1,20 +0,0 @@ -#!/bin/sh -set -eu - -# "modprobe" without modprobe -# https://twitter.com/lucabruno/status/902934379835662336 - -# this isn't 100% fool-proof, but it'll have a much higher success rate than simply using the "real" modprobe - -# Docker often uses "modprobe -va foo bar baz" -# so we ignore modules that start with "-" -for module; do - if [ "${module#-}" = "$module" ]; then - ip link show "$module" || true - lsmod | grep "$module" || true - fi -done - -# remove /usr/local/... from PATH so we can exec the real modprobe as a last resort -export PATH='/usr/sbin:/usr/bin:/sbin:/bin' -exec modprobe "$@" diff --git a/docker/test/testflows/runner/process_testflows_result.py b/docker/test/testflows/runner/process_testflows_result.py deleted file mode 100755 index 8bfc4ac0b0f..00000000000 --- a/docker/test/testflows/runner/process_testflows_result.py +++ /dev/null @@ -1,71 +0,0 @@ -#!/usr/bin/env python3 - -import os -import logging -import argparse -import csv -import json - - -def process_result(result_folder): - json_path = os.path.join(result_folder, "results.json") - if not os.path.exists(json_path): - return "success", "No testflows in branch", None, [] - - test_binary_log = os.path.join(result_folder, "test.log") - with open(json_path) as source: - results = json.loads(source.read()) - - total_tests = 0 - total_ok = 0 - total_fail = 0 - total_other = 0 - test_results = [] - for test in results["tests"]: - test_name = test["test"]["test_name"] - test_result = test["result"]["result_type"].upper() - test_time = str(test["result"]["message_rtime"]) - total_tests += 1 - if test_result == "OK": - total_ok += 1 - elif test_result == "FAIL" or test_result == "ERROR": - total_fail += 1 - else: - total_other += 1 - - test_results.append((test_name, test_result, test_time)) - if total_fail != 0: - status = "failure" - else: - status = "success" - - description = "failed: {}, passed: {}, other: {}".format( - total_fail, total_ok, total_other - ) - return status, description, test_results, [json_path, test_binary_log] - - -def write_results(results_file, status_file, results, status): - with open(results_file, "w") as f: - out = csv.writer(f, delimiter="\t") - out.writerows(results) - with open(status_file, "w") as f: - out = csv.writer(f, delimiter="\t") - out.writerow(status) - - -if __name__ == "__main__": - logging.basicConfig(level=logging.INFO, format="%(asctime)s %(message)s") - parser = argparse.ArgumentParser( - description="ClickHouse script for parsing results of Testflows tests" - ) - parser.add_argument("--in-results-dir", default="./") - parser.add_argument("--out-results-file", default="./test_results.tsv") - parser.add_argument("--out-status-file", default="./check_status.tsv") - args = parser.parse_args() - - state, description, test_results, logs = process_result(args.in_results_dir) - logging.info("Result parsed") - status = (state, description) - write_results(args.out_results_file, args.out_status_file, test_results, status) - logging.info("Result written") diff --git a/docs/_includes/install/universal.sh b/docs/_includes/install/universal.sh index 30766cb6052..de34897a6f6 100755 --- a/docs/_includes/install/universal.sh +++ b/docs/_includes/install/universal.sh @@ -9,14 +9,22 @@ if [ "${OS}" = "Linux" ] then if [ "${ARCH}" = "x86_64" -o "${ARCH}" = "amd64" ] then - DIR="amd64" + # Require at least x86-64 + SSE4.2 (introduced in 2006). On older hardware fall back to plain x86-64 (introduced in 1999) which + # guarantees at least SSE2. The caveat is that plain x86-64 builds are much less tested than SSE 4.2 builds. + HAS_SSE42=$(grep sse4_2 /proc/cpuinfo) + if [ "${HAS_SSE42}" ] + then + DIR="amd64" + else + DIR="amd64compat" + fi elif [ "${ARCH}" = "aarch64" -o "${ARCH}" = "arm64" ] then # If the system has >=ARMv8.2 (https://en.wikipedia.org/wiki/AArch64), choose the corresponding build, else fall back to a v8.0 # compat build. Unfortunately, the ARM ISA level cannot be read directly, we need to guess from the "features" in /proc/cpuinfo. # Also, the flags in /proc/cpuinfo are named differently than the flags passed to the compiler (cmake/cpu_features.cmake). - ARMV82=$(grep -m 1 'Features' /proc/cpuinfo | awk '/asimd/ && /sha1/ && /aes/ && /atomics/ && /lrcpc/') - if [ "${ARMV82}" ] + HAS_ARMV82=$(grep -m 1 'Features' /proc/cpuinfo | awk '/asimd/ && /sha1/ && /aes/ && /atomics/ && /lrcpc/') + if [ "${HAS_ARMV82}" ] then DIR="aarch64" else diff --git a/docs/en/development/build-cross-osx.md b/docs/en/development/build-cross-osx.md index 7b151d087df..1df88dbb235 100644 --- a/docs/en/development/build-cross-osx.md +++ b/docs/en/development/build-cross-osx.md @@ -1,15 +1,15 @@ --- slug: /en/development/build-cross-osx sidebar_position: 66 -title: How to Build ClickHouse on Linux for Mac OS X -sidebar_label: Build on Linux for Mac OS X +title: How to Build ClickHouse on Linux for macOS +sidebar_label: Build on Linux for macOS --- This is for the case when you have a Linux machine and want to use it to build `clickhouse` binary that will run on OS X. -This is intended for continuous integration checks that run on Linux servers. If you want to build ClickHouse directly on Mac OS X, then proceed with [another instruction](../development/build-osx.md). +This is intended for continuous integration checks that run on Linux servers. If you want to build ClickHouse directly on macOS, then proceed with [another instruction](../development/build-osx.md). -The cross-build for Mac OS X is based on the [Build instructions](../development/build.md), follow them first. +The cross-build for macOS is based on the [Build instructions](../development/build.md), follow them first. ## Install Clang-14 diff --git a/docs/en/development/build-osx.md b/docs/en/development/build-osx.md index 12f74feb272..656462eeb16 100644 --- a/docs/en/development/build-osx.md +++ b/docs/en/development/build-osx.md @@ -1,9 +1,9 @@ --- slug: /en/development/build-osx sidebar_position: 65 -sidebar_label: Build on Mac OS X -title: How to Build ClickHouse on Mac OS X -description: How to build ClickHouse on Mac OS X +sidebar_label: Build on macOS +title: How to Build ClickHouse on macOS +description: How to build ClickHouse on macOS --- :::info You don't have to build ClickHouse yourself! diff --git a/docs/en/development/developer-instruction.md b/docs/en/development/developer-instruction.md index 69afb31e214..526400e9cce 100644 --- a/docs/en/development/developer-instruction.md +++ b/docs/en/development/developer-instruction.md @@ -7,7 +7,7 @@ description: Prerequisites and an overview of how to build ClickHouse # Getting Started Guide for Building ClickHouse -The building of ClickHouse is supported on Linux, FreeBSD and Mac OS X. +The building of ClickHouse is supported on Linux, FreeBSD and macOS. If you use Windows, you need to create a virtual machine with Ubuntu. To start working with a virtual machine please install VirtualBox. You can download Ubuntu from the website: https://www.ubuntu.com/#download. Please create a virtual machine from the downloaded image (you should reserve at least 4GB of RAM for it). To run a command-line terminal in Ubuntu, please locate a program containing the word “terminal” in its name (gnome-terminal, konsole etc.) or just press Ctrl+Alt+T. @@ -194,7 +194,7 @@ In this case, ClickHouse will use config files located in the current directory. To connect to ClickHouse with clickhouse-client in another terminal navigate to `ClickHouse/build/programs/` and run `./clickhouse client`. -If you get `Connection refused` message on Mac OS X or FreeBSD, try specifying host address 127.0.0.1: +If you get `Connection refused` message on macOS or FreeBSD, try specifying host address 127.0.0.1: clickhouse client --host 127.0.0.1 @@ -213,7 +213,7 @@ You can also run your custom-built ClickHouse binary with the config file from t ## IDE (Integrated Development Environment) {#ide-integrated-development-environment} -If you do not know which IDE to use, we recommend that you use CLion. CLion is commercial software, but it offers 30 days free trial period. It is also free of charge for students. CLion can be used both on Linux and on Mac OS X. +If you do not know which IDE to use, we recommend that you use CLion. CLion is commercial software, but it offers 30 days free trial period. It is also free of charge for students. CLion can be used both on Linux and on macOS. KDevelop and QTCreator are other great alternatives of an IDE for developing ClickHouse. KDevelop comes in as a very handy IDE although unstable. If KDevelop crashes after a while upon opening project, you should click “Stop All” button as soon as it has opened the list of project’s files. After doing so KDevelop should be fine to work with. diff --git a/docs/en/development/tests.md b/docs/en/development/tests.md index e6d5cf66de9..729c3c9fb58 100644 --- a/docs/en/development/tests.md +++ b/docs/en/development/tests.md @@ -139,7 +139,7 @@ If the system clickhouse-server is already running and you do not want to stop i Build tests allow to check that build is not broken on various alternative configurations and on some foreign systems. These tests are automated as well. Examples: -- cross-compile for Darwin x86_64 (Mac OS X) +- cross-compile for Darwin x86_64 (macOS) - cross-compile for FreeBSD x86_64 - cross-compile for Linux AArch64 - build on Ubuntu with libraries from system packages (discouraged) diff --git a/docs/en/getting-started/install.md b/docs/en/getting-started/install.md index 7fb665c1671..e7dada5cb9a 100644 --- a/docs/en/getting-started/install.md +++ b/docs/en/getting-started/install.md @@ -6,10 +6,11 @@ slug: /en/install # Installing ClickHouse -You have two options for getting up and running with ClickHouse: +You have three options for getting up and running with ClickHouse: -- **[ClickHouse Cloud](https://clickhouse.com/cloud/):** the official ClickHouse as a service, - built by, maintained, and supported by the creators of ClickHouse -- **[Self-managed ClickHouse](#self-managed-install):** ClickHouse can run on any Linux, FreeBSD, or Mac OS X with x86_64, AArch64, or PowerPC64LE CPU architecture +- **[ClickHouse Cloud](https://clickhouse.com/cloud/):** The official ClickHouse as a service, - built by, maintained and supported by the creators of ClickHouse +- **[Self-managed ClickHouse](#self-managed-install):** ClickHouse can run on any Linux, FreeBSD, or macOS with x86-64, ARM, or PowerPC64LE CPU architecture +- **[Docker Image](https://hub.docker.com/r/clickhouse/clickhouse-server/):** Read the guide with the official image in Docker Hub ## ClickHouse Cloud @@ -22,73 +23,49 @@ The quickest and easiest way to get up and running with ClickHouse is to create Once your Cloud service is provisioned, you will be able to [connect to it](/docs/en/integrations/connect-a-client.md) and start [inserting data](/docs/en/integrations/data-ingestion.md). -:::note -The [Quick Start](/docs/en/quick-start.mdx) walks through the steps to get a ClickHouse Cloud service up and running, connecting to it, and inserting data. -::: - -## Self-Managed Requirements - -### CPU Architecture - -ClickHouse can run on any Linux, FreeBSD, or Mac OS X with x86_64, AArch64, or PowerPC64LE CPU architecture. - -Official pre-built binaries are typically compiled for x86_64 and leverage SSE 4.2 instruction set, so unless otherwise stated usage of CPU that supports it becomes an additional system requirement. Here’s the command to check if current CPU has support for SSE 4.2: - -``` bash -$ grep -q sse4_2 /proc/cpuinfo && echo "SSE 4.2 supported" || echo "SSE 4.2 not supported" -``` - -To run ClickHouse on processors that do not support SSE 4.2 or have AArch64 or PowerPC64LE architecture, you should [build ClickHouse from sources](#from-sources) with proper configuration adjustments. - -ClickHouse implements parallel data processing and uses all the hardware resources available. When choosing a processor, take into account that ClickHouse works more efficiently at configurations with a large number of cores but a lower clock rate than at configurations with fewer cores and a higher clock rate. For example, 16 cores with 2600 MHz is preferable to 8 cores with 3600 MHz. - -It is recommended to use **Turbo Boost** and **hyper-threading** technologies. It significantly improves performance with a typical workload. - -### RAM {#ram} - -We recommend using a minimum of 4GB of RAM to perform non-trivial queries. The ClickHouse server can run with a much smaller amount of RAM, but it requires memory for processing queries. - -The required volume of RAM depends on: - -- The complexity of queries. -- The amount of data that is processed in queries. - -To calculate the required volume of RAM, you should estimate the size of temporary data for [GROUP BY](/docs/en/sql-reference/statements/select/group-by.md#select-group-by-clause), [DISTINCT](/docs/en/sql-reference/statements/select/distinct.md#select-distinct), [JOIN](/docs/en/sql-reference/statements/select/join.md#select-join) and other operations you use. - -ClickHouse can use external memory for temporary data. See [GROUP BY in External Memory](/docs/en/sql-reference/statements/select/group-by.md#select-group-by-in-external-memory) for details. - -### Swap File {#swap-file} - -Disable the swap file for production environments. - -### Storage Subsystem {#storage-subsystem} - -You need to have 2GB of free disk space to install ClickHouse. - -The volume of storage required for your data should be calculated separately. Assessment should include: - -- Estimation of the data volume. - - You can take a sample of the data and get the average size of a row from it. Then multiply the value by the number of rows you plan to store. - -- The data compression coefficient. - - To estimate the data compression coefficient, load a sample of your data into ClickHouse, and compare the actual size of the data with the size of the table stored. For example, clickstream data is usually compressed by 6-10 times. - -To calculate the final volume of data to be stored, apply the compression coefficient to the estimated data volume. If you plan to store data in several replicas, then multiply the estimated volume by the number of replicas. - -### Network {#network} - -If possible, use networks of 10G or higher class. - -The network bandwidth is critical for processing distributed queries with a large amount of intermediate data. Besides, network speed affects replication processes. - -### Software {#software} - -ClickHouse is developed primarily for the Linux family of operating systems. The recommended Linux distribution is Ubuntu. The `tzdata` package should be installed in the system. ## Self-Managed Install +1. The simplest way to download ClickHouse locally is to run the following command. If your operating system is supported, an appropriate ClickHouse binary will be downloaded and made runnable: + ```bash + curl https://clickhouse.com/ | sh + ``` + +1. Run the `install` command, which defines a collection of useful symlinks along with the files and folders used by ClickHouse - all of which you can see in the output of the install script: + ```bash + sudo ./clickhouse install + ``` + +1. At the end of the install script, you are prompted for a password for the `default` user. Feel free to enter a password, or you can optionally leave it blank: + ```response + Creating log directory /var/log/clickhouse-server. + Creating data directory /var/lib/clickhouse. + Creating pid directory /var/run/clickhouse-server. + chown -R clickhouse:clickhouse '/var/log/clickhouse-server' + chown -R clickhouse:clickhouse '/var/run/clickhouse-server' + chown clickhouse:clickhouse '/var/lib/clickhouse' + Enter password for default user: + ``` + You should see the following output: + ```response + ClickHouse has been successfully installed. + + Start clickhouse-server with: + sudo clickhouse start + + Start clickhouse-client with: + clickhouse-client + ``` + +1. Run the following command to start the ClickHouse server: + ```bash + sudo clickhouse start + ``` + +:::tip +The [Quick Start](/docs/en/quick-start.mdx/#step-1-get-clickhouse) walks through the steps to download and run ClickHouse, connect to it, and insert data. +::: + ## Available Installation Options {#available-installation-options} ### From DEB Packages {#install-from-deb-packages} @@ -278,50 +255,16 @@ For production environments, it’s recommended to use the latest `stable`-versi To run ClickHouse inside Docker follow the guide on [Docker Hub](https://hub.docker.com/r/clickhouse/clickhouse-server/). Those images use official `deb` packages inside. -### Single Binary {#from-single-binary} - -You can install ClickHouse on Linux using a single portable binary from the latest commit of the `master` branch: [https://builds.clickhouse.com/master/amd64/clickhouse]. - -``` bash -curl -O 'https://builds.clickhouse.com/master/amd64/clickhouse' && chmod a+x clickhouse -sudo ./clickhouse install -``` - -### From Precompiled Binaries for Non-Standard Environments {#from-binaries-non-linux} - -For non-Linux operating systems and for AArch64 CPU architecture, ClickHouse builds are provided as a cross-compiled binary from the latest commit of the `master` branch (with a few hours delay). - -- [MacOS x86_64](https://builds.clickhouse.com/master/macos/clickhouse) - ```bash - curl -O 'https://builds.clickhouse.com/master/macos/clickhouse' && chmod a+x ./clickhouse - ``` -- [MacOS Aarch64 (Apple Silicon)](https://builds.clickhouse.com/master/macos-aarch64/clickhouse) - ```bash - curl -O 'https://builds.clickhouse.com/master/macos-aarch64/clickhouse' && chmod a+x ./clickhouse - ``` -- [FreeBSD x86_64](https://builds.clickhouse.com/master/freebsd/clickhouse) - ```bash - curl -O 'https://builds.clickhouse.com/master/freebsd/clickhouse' && chmod a+x ./clickhouse - ``` -- [Linux AArch64](https://builds.clickhouse.com/master/aarch64/clickhouse) - ```bash - curl -O 'https://builds.clickhouse.com/master/aarch64/clickhouse' && chmod a+x ./clickhouse - ``` - -Run `sudo ./clickhouse install` to install ClickHouse system-wide (also with needed configuration files, configuring users etc.). Then run `sudo clickhouse start` commands to start the clickhouse-server and `clickhouse-client` to connect to it. - -Use the `clickhouse client` to connect to the server, or `clickhouse local` to process local data. - ### From Sources {#from-sources} -To manually compile ClickHouse, follow the instructions for [Linux](/docs/en/development/build.md) or [Mac OS X](/docs/en/development/build-osx.md). +To manually compile ClickHouse, follow the instructions for [Linux](/docs/en/development/build.md) or [macOS](/docs/en/development/build-osx.md). -You can compile packages and install them or use programs without installing packages. Also by building manually you can disable SSE 4.2 requirement or build for AArch64 CPUs. +You can compile packages and install them or use programs without installing packages. - Client: programs/clickhouse-client - Server: programs/clickhouse-server + Client: /programs/clickhouse-client + Server: /programs/clickhouse-server -You’ll need to create a data and metadata folders and `chown` them for the desired user. Their paths can be changed in server config (src/programs/server/config.xml), by default they are: +You’ll need to create data and metadata folders manually and `chown` them for the desired user. Their paths can be changed in server config (src/programs/server/config.xml), by default they are: /var/lib/clickhouse/data/default/ /var/lib/clickhouse/metadata/default/ @@ -406,3 +349,42 @@ SELECT 1 **Congratulations, the system works!** To continue experimenting, you can download one of the test data sets or go through [tutorial](/docs/en/tutorial.md). + +## Recommendations for Self-Managed ClickHouse + +ClickHouse can run on any Linux, FreeBSD, or macOS with x86-64, ARM, or PowerPC64LE CPU architecture. + +ClickHouse uses all hardware resources available to process data. + +ClickHouse tends to work more efficiently with a large number of cores at a lower clock rate than with fewer cores at a higher clock rate. + +We recommend using a minimum of 4GB of RAM to perform non-trivial queries. The ClickHouse server can run with a much smaller amount of RAM, but queries will then frequently abort. + +The required volume of RAM generally depends on: + +- The complexity of queries. +- The amount of data that is processed in queries. + +To calculate the required volume of RAM, you may estimate the size of temporary data for [GROUP BY](/docs/en/sql-reference/statements/select/group-by.md#select-group-by-clause), [DISTINCT](/docs/en/sql-reference/statements/select/distinct.md#select-distinct), [JOIN](/docs/en/sql-reference/statements/select/join.md#select-join) and other operations you use. + +To reduce memory consumption, ClickHouse can swap temporary data to external storage. See [GROUP BY in External Memory](/docs/en/sql-reference/statements/select/group-by.md#select-group-by-in-external-memory) for details. + +We recommend to disable the operating system's swap file in production environments. + +The ClickHouse binary requires at least 2.5 GB of disk space for installation. + +The volume of storage required for your data may be calculated separately based on + +- an estimation of the data volume. + + You can take a sample of the data and get the average size of a row from it. Then multiply the value by the number of rows you plan to store. + +- The data compression coefficient. + + To estimate the data compression coefficient, load a sample of your data into ClickHouse, and compare the actual size of the data with the size of the table stored. For example, clickstream data is usually compressed by 6-10 times. + +To calculate the final volume of data to be stored, apply the compression coefficient to the estimated data volume. If you plan to store data in several replicas, then multiply the estimated volume by the number of replicas. + +For distributed ClickHouse deployments (clustering), we recommend at least 10G class network connectivity. + +Network bandwidth is critical for processing distributed queries with a large amount of intermediate data. Besides, network speed affects replication processes. diff --git a/docs/en/interfaces/cli.md b/docs/en/interfaces/cli.md index 4f07f99fb26..e3b40d83efe 100644 --- a/docs/en/interfaces/cli.md +++ b/docs/en/interfaces/cli.md @@ -2,11 +2,10 @@ slug: /en/interfaces/cli sidebar_position: 17 sidebar_label: Command-Line Client +title: Command-Line Client --- import ConnectionDetails from '@site/docs/en/_snippets/_gather_your_details_native.md'; -# Command-line Client - ## clickhouse-client ClickHouse provides a native command-line client: `clickhouse-client`. The client supports command-line options and configuration files. For more information, see [Configuring](#interfaces_cli_configuration). diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md index b7ef859f974..3fe26fa8eff 100644 --- a/docs/en/interfaces/formats.md +++ b/docs/en/interfaces/formats.md @@ -85,7 +85,7 @@ The supported formats are: | [MySQLDump](#mysqldump) | ✔ | ✗ | -You can control some format processing parameters with the ClickHouse settings. For more information read the [Settings](../operations/settings/settings.md) section. +You can control some format processing parameters with the ClickHouse settings. For more information read the [Settings](/docs/en/operations/settings/settings.md) section. ## TabSeparated {#tabseparated} @@ -148,12 +148,12 @@ Only a small set of symbols are escaped. You can easily stumble onto a string va Arrays are written as a list of comma-separated values in square brackets. Number items in the array are formatted as normally. `Date` and `DateTime` types are written in single quotes. Strings are written in single quotes with the same escaping rules as above. -[NULL](../sql-reference/syntax.md) is formatted according to setting [format_tsv_null_representation](../operations/settings/settings.md#format_tsv_null_representation) (default value is `\N`). +[NULL](/docs/en/sql-reference/syntax.md) is formatted according to setting [format_tsv_null_representation](/docs/en/operations/settings/settings.md/#format_tsv_null_representation) (default value is `\N`). In input data, ENUM values can be represented as names or as ids. First, we try to match the input value to the ENUM name. If we fail and the input value is a number, we try to match this number to ENUM id. -If input data contains only ENUM ids, it's recommended to enable the setting [input_format_tsv_enum_as_number](../operations/settings/settings.md#input_format_tsv_enum_as_number) to optimize ENUM parsing. +If input data contains only ENUM ids, it's recommended to enable the setting [input_format_tsv_enum_as_number](/docs/en/operations/settings/settings.md/#input_format_tsv_enum_as_number) to optimize ENUM parsing. -Each element of [Nested](../sql-reference/data-types/nested-data-structures/nested.md) structures is represented as array. +Each element of [Nested](/docs/en/sql-reference/data-types/nested-data-structures/nested.md) structures is represented as an array. For example: @@ -183,12 +183,12 @@ SELECT * FROM nestedt FORMAT TSV ### TabSeparated format settings {#tabseparated-format-settings} -- [format_tsv_null_representation](../operations/settings/settings.md#format_tsv_null_representation) - custom NULL representation in TSV format. Default value - `\N`. -- [input_format_tsv_empty_as_default](../operations/settings/settings.md#input_format_tsv_empty_as_default) - treat empty fields in TSV input as default values. Default value - `false`. For complex default expressions [input_format_defaults_for_omitted_fields](../operations/settings/settings.md#input_format_defaults_for_omitted_fields) must be enabled too. -- [input_format_tsv_enum_as_number](../operations/settings/settings.md#input_format_tsv_enum_as_number) - treat inserted enum values in TSV formats as enum indices. Default value - `false`. -- [input_format_tsv_use_best_effort_in_schema_inference](../operations/settings/settings.md#input_format_tsv_use_best_effort_in_schema_inference) - use some tweaks and heuristics to infer schema in TSV format. If disabled, all fields will be inferred as Strings. Default value - `true`. -- [output_format_tsv_crlf_end_of_line](../operations/settings/settings.md#output_format_tsv_crlf_end_of_line) - if it is set true, end of line in TSV output format will be `\r\n` instead of `\n`. Default value - `false`. -- [input_format_tsv_skip_first_lines](../operations/settings/settings.md#input_format_tsv_skip_first_lines) - skip specified number of lines at the beginning of data. Default value - `0`. +- [format_tsv_null_representation](/docs/en/operations/settings/settings.md/#format_tsv_null_representation) - custom NULL representation in TSV format. Default value - `\N`. +- [input_format_tsv_empty_as_default](/docs/en/operations/settings/settings.md/#input_format_tsv_empty_as_default) - treat empty fields in TSV input as default values. Default value - `false`. For complex default expressions [input_format_defaults_for_omitted_fields](/docs/en/operations/settings/settings.md/#input_format_defaults_for_omitted_fields) must be enabled too. +- [input_format_tsv_enum_as_number](/docs/en/operations/settings/settings.md/#input_format_tsv_enum_as_number) - treat inserted enum values in TSV formats as enum indices. Default value - `false`. +- [input_format_tsv_use_best_effort_in_schema_inference](/docs/en/operations/settings/settings.md/#input_format_tsv_use_best_effort_in_schema_inference) - use some tweaks and heuristics to infer schema in TSV format. If disabled, all fields will be inferred as Strings. Default value - `true`. +- [output_format_tsv_crlf_end_of_line](/docs/en/operations/settings/settings.md/#output_format_tsv_crlf_end_of_line) - if it is set true, end of line in TSV output format will be `\r\n` instead of `\n`. Default value - `false`. +- [input_format_tsv_skip_first_lines](/docs/en/operations/settings/settings.md/#input_format_tsv_skip_first_lines) - skip specified number of lines at the beginning of data. Default value - `0`. ## TabSeparatedRaw {#tabseparatedraw} @@ -204,8 +204,8 @@ Differs from the `TabSeparated` format in that the column names are written in t During parsing, the first row is expected to contain the column names. You can use column names to determine their position and to check their correctness. :::warning -If setting [input_format_with_names_use_header](../operations/settings/settings.md#input_format_with_names_use_header) is set to 1, -the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](../operations/settings/settings.md#input_format_skip_unknown_fields) is set to 1. +If setting [input_format_with_names_use_header](/docs/en/operations/settings/settings.md/#input_format_with_names_use_header) is set to 1, +the columns from the input data will be mapped to the columns of the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings.md/#input_format_skip_unknown_fields) is set to 1. Otherwise, the first row will be skipped. ::: @@ -216,10 +216,10 @@ This format is also available under the name `TSVWithNames`. Differs from the `TabSeparated` format in that the column names are written to the first row, while the column types are in the second row. :::warning -If setting [input_format_with_names_use_header](../operations/settings/settings.md#input_format_with_names_use_header) is set to 1, -the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](../operations/settings/settings.md#input_format_skip_unknown_fields) is set to 1. +If setting [input_format_with_names_use_header](/docs/en/operations/settings/settings.md/#input_format_with_names_use_header) is set to 1, +the columns from the input data will be mapped to the columns in the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings.md/#input_format_skip_unknown_fields) is set to 1. Otherwise, the first row will be skipped. -If setting [input_format_with_types_use_header](../operations/settings/settings.md#input_format_with_types_use_header) is set to 1, +If setting [input_format_with_types_use_header](/docs/en/operations/settings/settings.md/#input_format_with_types_use_header) is set to 1, the types from input data will be compared with the types of the corresponding columns from the table. Otherwise, the second row will be skipped. ::: @@ -245,7 +245,7 @@ This format allows specifying a custom format string with placeholders for value It uses settings `format_template_resultset`, `format_template_row`, `format_template_rows_between_delimiter` and some settings of other formats (e.g. `output_format_json_quote_64bit_integers` when using `JSON` escaping, see further) -Setting `format_template_row` specifies path to file, which contains format string for rows with the following syntax: +Setting `format_template_row` specifies the path to the file containing format strings for rows with the following syntax: `delimiter_1${column_1:serializeAs_1}delimiter_2${column_2:serializeAs_2} ... delimiter_N`, @@ -253,10 +253,10 @@ where `delimiter_i` is a delimiter between values (`$` symbol can be escaped as `column_i` is a name or index of a column whose values are to be selected or inserted (if empty, then column will be skipped), `serializeAs_i` is an escaping rule for the column values. The following escaping rules are supported: -- `CSV`, `JSON`, `XML` (similarly to the formats of the same names) -- `Escaped` (similarly to `TSV`) -- `Quoted` (similarly to `Values`) -- `Raw` (without escaping, similarly to `TSVRaw`) +- `CSV`, `JSON`, `XML` (similar to the formats of the same names) +- `Escaped` (similar to `TSV`) +- `Quoted` (similar to `Values`) +- `Raw` (without escaping, similar to `TSVRaw`) - `None` (no escaping rule, see further) If an escaping rule is omitted, then `None` will be used. `XML` is suitable only for output. @@ -269,9 +269,9 @@ the values of `SearchPhrase`, `c` and `price` columns, which are escaped as `Quo `Search phrase: 'bathroom interior design', count: 2166, ad price: $3;` -The `format_template_rows_between_delimiter` setting specifies delimiter between rows, which is printed (or expected) after every row except the last one (`\n` by default) +The `format_template_rows_between_delimiter` setting specifies the delimiter between rows, which is printed (or expected) after every row except the last one (`\n` by default) -Setting `format_template_resultset` specifies the path to file, which contains a format string for resultset. Format string for resultset has the same syntax as a format string for row and allows to specify a prefix, a suffix and a way to print some additional information. It contains the following placeholders instead of column names: +Setting `format_template_resultset` specifies the path to the file, which contains a format string for resultset. Format string for resultset has the same syntax as a format string for row and allows to specify a prefix, a suffix and a way to print some additional information. It contains the following placeholders instead of column names: - `data` is the rows with data in `format_template_row` format, separated by `format_template_rows_between_delimiter`. This placeholder must be the first placeholder in the format string. - `totals` is the row with total values in `format_template_row` format (when using WITH TOTALS) @@ -284,8 +284,8 @@ Setting `format_template_resultset` specifies the path to file, which contains a - `bytes_read` is the number of bytes (uncompressed) has been read The placeholders `data`, `totals`, `min` and `max` must not have escaping rule specified (or `None` must be specified explicitly). The remaining placeholders may have any escaping rule specified. -If the `format_template_resultset` setting is an empty string, `${data}` is used as default value. -For insert queries format allows skipping some columns or some fields if prefix or suffix (see example). +If the `format_template_resultset` setting is an empty string, `${data}` is used as the default value. +For insert queries format allows skipping some columns or fields if prefix or suffix (see example). Select example: @@ -373,8 +373,8 @@ All delimiters in the input data must be strictly equal to delimiters in specifi ## TemplateIgnoreSpaces {#templateignorespaces} This format is suitable only for input. -Similar to `Template`, but skips whitespace characters between delimiters and values in the input stream. However, if format strings contain whitespace characters, these characters will be expected in the input stream. Also allows to specify empty placeholders (`${}` or `${:None}`) to split some delimiter into separate parts to ignore spaces between them. Such placeholders are used only for skipping whitespace characters. -It’s possible to read `JSON` using this format, if values of columns have the same order in all rows. For example, the following request can be used for inserting data from output example of format [JSON](#json): +Similar to `Template`, but skips whitespace characters between delimiters and values in the input stream. However, if format strings contain whitespace characters, these characters will be expected in the input stream. Also allows specifying empty placeholders (`${}` or `${:None}`) to split some delimiter into separate parts to ignore spaces between them. Such placeholders are used only for skipping whitespace characters. +It’s possible to read `JSON` using this format if the values of columns have the same order in all rows. For example, the following request can be used for inserting data from its output example of format [JSON](#json): ``` sql INSERT INTO table_name SETTINGS @@ -411,7 +411,7 @@ SearchPhrase=curtain designs count()=1064 SearchPhrase=baku count()=1000 ``` -[NULL](../sql-reference/syntax.md) is formatted as `\N`. +[NULL](/docs/en/sql-reference/syntax.md) is formatted as `\N`. ``` sql SELECT * FROM t_null FORMAT TSKV @@ -427,49 +427,49 @@ Both data output and parsing are supported in this format. For parsing, any orde Parsing allows the presence of the additional field `tskv` without the equal sign or a value. This field is ignored. -During import, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](../operations/settings/settings.md#input_format_skip_unknown_fields) is set to 1. +During import, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings.md/#input_format_skip_unknown_fields) is set to 1. ## CSV {#csv} Comma Separated Values format ([RFC](https://tools.ietf.org/html/rfc4180)). -When formatting, rows are enclosed in double-quotes. A double quote inside a string is output as two double quotes in a row. There are no other rules for escaping characters. Date and date-time are enclosed in double-quotes. Numbers are output without quotes. Values are separated by a delimiter character, which is `,` by default. The delimiter character is defined in the setting [format_csv_delimiter](../operations/settings/settings.md#format_csv_delimiter). Rows are separated using the Unix line feed (LF). Arrays are serialized in CSV as follows: first, the array is serialized to a string as in TabSeparated format, and then the resulting string is output to CSV in double-quotes. Tuples in CSV format are serialized as separate columns (that is, their nesting in the tuple is lost). +When formatting, rows are enclosed in double quotes. A double quote inside a string is output as two double quotes in a row. There are no other rules for escaping characters. Date and date-time are enclosed in double quotes. Numbers are output without quotes. Values are separated by a delimiter character, which is `,` by default. The delimiter character is defined in the setting [format_csv_delimiter](/docs/en/operations/settings/settings.md/#format_csv_delimiter). Rows are separated using the Unix line feed (LF). Arrays are serialized in CSV as follows: first, the array is serialized to a string as in TabSeparated format, and then the resulting string is output to CSV in double quotes. Tuples in CSV format are serialized as separate columns (that is, their nesting in the tuple is lost). ``` bash $ clickhouse-client --format_csv_delimiter="|" --query="INSERT INTO test.csv FORMAT CSV" < data.csv ``` -\*By default, the delimiter is `,`. See the [format_csv_delimiter](../operations/settings/settings.md#format_csv_delimiter) setting for more information. +\*By default, the delimiter is `,`. See the [format_csv_delimiter](/docs/en/operations/settings/settings.md/#format_csv_delimiter) setting for more information. When parsing, all values can be parsed either with or without quotes. Both double and single quotes are supported. Rows can also be arranged without quotes. In this case, they are parsed up to the delimiter character or line feed (CR or LF). In violation of the RFC, when parsing rows without quotes, the leading and trailing spaces and tabs are ignored. For the line feed, Unix (LF), Windows (CR LF) and Mac OS Classic (CR LF) types are all supported. -`NULL` is formatted according to setting [format_csv_null_representation](../operations/settings/settings.md#format_csv_null_representation) (default value is `\N`). +`NULL` is formatted according to setting [format_csv_null_representation](/docs/en/operations/settings/settings.md/#format_csv_null_representation) (default value is `\N`). -In input data, ENUM values can be represented as names or as ids. First, we try to match the input value to the ENUM name. If we fail and the input value is a number, we try to match this number to ENUM id. -If input data contains only ENUM ids, it's recommended to enable the setting [input_format_csv_enum_as_number](../operations/settings/settings.md#input_format_csv_enum_as_number) to optimize ENUM parsing. +In input data, ENUM values can be represented as names or as ids. First, we try to match the input value to the ENUM name. If we fail and the input value is a number, we try to match this number to the ENUM id. +If input data contains only ENUM ids, it's recommended to enable the setting [input_format_csv_enum_as_number](/docs/en/operations/settings/settings.md/#input_format_csv_enum_as_number) to optimize ENUM parsing. The CSV format supports the output of totals and extremes the same way as `TabSeparated`. ### CSV format settings {#csv-format-settings} -- [format_csv_delimiter](../operations/settings/settings.md#format_csv_delimiter) - the character to be considered as a delimiter in CSV data. Default value - `,`. -- [format_csv_allow_single_quotes](../operations/settings/settings.md#format_csv_allow_single_quotes) - allow strings in single quotes. Default value - `true`. -- [format_csv_allow_double_quotes](../operations/settings/settings.md#format_csv_allow_double_quotes) - allow strings in double quotes. Default value - `true`. -- [format_csv_null_representation](../operations/settings/settings.md#format_tsv_null_representation) - custom NULL representation in CSV format. Default value - `\N`. -- [input_format_csv_empty_as_default](../operations/settings/settings.md#input_format_csv_empty_as_default) - treat empty fields in CSV input as default values. Default value - `true`. For complex default expressions [input_format_defaults_for_omitted_fields](../operations/settings/settings.md#input_format_defaults_for_omitted_fields) must be enabled too. -- [input_format_csv_enum_as_number](../operations/settings/settings.md#input_format_csv_enum_as_number) - treat inserted enum values in CSV formats as enum indices. Default value - `false`. -- [input_format_csv_use_best_effort_in_schema_inference](../operations/settings/settings.md#input_format_csv_use_best_effort_in_schema_inference) - use some tweaks and heuristics to infer schema in CSV format. If disabled, all fields will be inferred as Strings. Default value - `true`. -- [input_format_csv_arrays_as_nested_csv](../operations/settings/settings.md#input_format_csv_arrays_as_nested_csv) - when reading Array from CSV, expect that its elements were serialized in nested CSV and then put into string. Default value - `false`. -- [output_format_csv_crlf_end_of_line](../operations/settings/settings.md#output_format_csv_crlf_end_of_line) - if it is set true, end of line in CSV output format will be `\r\n` instead of `\n`. Default value - `false`. -- [input_format_csv_skip_first_lines](../operations/settings/settings.md#input_format_csv_skip_first_lines) - skip specified number of lines at the beginning of data. Default value - `0`. +- [format_csv_delimiter](/docs/en/operations/settings/settings.md/#format_csv_delimiter) - the character to be considered as a delimiter in CSV data. Default value - `,`. +- [format_csv_allow_single_quotes](/docs/en/operations/settings/settings.md/#format_csv_allow_single_quotes) - allow strings in single quotes. Default value - `true`. +- [format_csv_allow_double_quotes](/docs/en/operations/settings/settings.md/#format_csv_allow_double_quotes) - allow strings in double quotes. Default value - `true`. +- [format_csv_null_representation](/docs/en/operations/settings/settings.md/#format_tsv_null_representation) - custom NULL representation in CSV format. Default value - `\N`. +- [input_format_csv_empty_as_default](/docs/en/operations/settings/settings.md/#input_format_csv_empty_as_default) - treat empty fields in CSV input as default values. Default value - `true`. For complex default expressions, [input_format_defaults_for_omitted_fields](/docs/en/operations/settings/settings.md/#input_format_defaults_for_omitted_fields) must be enabled too. +- [input_format_csv_enum_as_number](/docs/en/operations/settings/settings.md/#input_format_csv_enum_as_number) - treat inserted enum values in CSV formats as enum indices. Default value - `false`. +- [input_format_csv_use_best_effort_in_schema_inference](/docs/en/operations/settings/settings.md/#input_format_csv_use_best_effort_in_schema_inference) - use some tweaks and heuristics to infer schema in CSV format. If disabled, all fields will be inferred as Strings. Default value - `true`. +- [input_format_csv_arrays_as_nested_csv](/docs/en/operations/settings/settings.md/#input_format_csv_arrays_as_nested_csv) - when reading Array from CSV, expect that its elements were serialized in nested CSV and then put into string. Default value - `false`. +- [output_format_csv_crlf_end_of_line](/docs/en/operations/settings/settings.md/#output_format_csv_crlf_end_of_line) - if it is set to true, end of line in CSV output format will be `\r\n` instead of `\n`. Default value - `false`. +- [input_format_csv_skip_first_lines](/docs/en/operations/settings/settings.md/#input_format_csv_skip_first_lines) - skip the specified number of lines at the beginning of data. Default value - `0`. ## CSVWithNames {#csvwithnames} Also prints the header row with column names, similar to [TabSeparatedWithNames](#tabseparatedwithnames). :::warning -If setting [input_format_with_names_use_header](../operations/settings/settings.md#input_format_with_names_use_header) is set to 1, -the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](../operations/settings/settings.md#input_format_skip_unknown_fields) is set to 1. +If setting [input_format_with_names_use_header](/docs/en/operations/settings/settings.md/#input_format_with_names_use_header) is set to 1, +the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings.md/#input_format_skip_unknown_fields) is set to 1. Otherwise, the first row will be skipped. ::: @@ -478,16 +478,16 @@ Otherwise, the first row will be skipped. Also prints two header rows with column names and types, similar to [TabSeparatedWithNamesAndTypes](#tabseparatedwithnamesandtypes). :::warning -If setting [input_format_with_names_use_header](../operations/settings/settings.md#input_format_with_names_use_header) is set to 1, -the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](../operations/settings/settings.md#input_format_skip_unknown_fields) is set to 1. +If setting [input_format_with_names_use_header](/docs/en/operations/settings/settings.md/#input_format_with_names_use_header) is set to 1, +the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings.md/#input_format_skip_unknown_fields) is set to 1. Otherwise, the first row will be skipped. -If setting [input_format_with_types_use_header](../operations/settings/settings.md#input_format_with_types_use_header) is set to 1, +If setting [input_format_with_types_use_header](/docs/en/operations/settings/settings.md/#input_format_with_types_use_header) is set to 1, the types from input data will be compared with the types of the corresponding columns from the table. Otherwise, the second row will be skipped. ::: ## CustomSeparated {#format-customseparated} -Similar to [Template](#format-template), but it prints or reads all names and types of columns and uses escaping rule from [format_custom_escaping_rule](../operations/settings/settings.md#format_custom_escaping_rule) setting and delimiters from [format_custom_field_delimiter](../operations/settings/settings.md#format_custom_field_delimiter), [format_custom_row_before_delimiter](../operations/settings/settings.md#format_custom_row_before_delimiter), [format_custom_row_after_delimiter](../operations/settings/settings.md#format_custom_row_after_delimiter), [format_custom_row_between_delimiter](../operations/settings/settings.md#format_custom_row_between_delimiter), [format_custom_result_before_delimiter](../operations/settings/settings.md#format_custom_result_before_delimiter) and [format_custom_result_after_delimiter](../operations/settings/settings.md#format_custom_result_after_delimiter) settings, not from format strings. +Similar to [Template](#format-template), but it prints or reads all names and types of columns and uses escaping rule from [format_custom_escaping_rule](/docs/en/operations/settings/settings.md/#format_custom_escaping_rule) setting and delimiters from [format_custom_field_delimiter](/docs/en/operations/settings/settings.md/#format_custom_field_delimiter), [format_custom_row_before_delimiter](/docs/en/operations/settings/settings.md/#format_custom_row_before_delimiter), [format_custom_row_after_delimiter](/docs/en/operations/settings/settings.md/#format_custom_row_after_delimiter), [format_custom_row_between_delimiter](/docs/en/operations/settings/settings.md/#format_custom_row_between_delimiter), [format_custom_result_before_delimiter](/docs/en/operations/settings/settings.md/#format_custom_result_before_delimiter) and [format_custom_result_after_delimiter](/docs/en/operations/settings/settings.md/#format_custom_result_after_delimiter) settings, not from format strings. There is also `CustomSeparatedIgnoreSpaces` format, which is similar to [TemplateIgnoreSpaces](#templateignorespaces). @@ -496,8 +496,8 @@ There is also `CustomSeparatedIgnoreSpaces` format, which is similar to [Templat Also prints the header row with column names, similar to [TabSeparatedWithNames](#tabseparatedwithnames). :::warning -If setting [input_format_with_names_use_header](../operations/settings/settings.md#input_format_with_names_use_header) is set to 1, -the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](../operations/settings/settings.md#input_format_skip_unknown_fields) is set to 1. +If setting [input_format_with_names_use_header](/docs/en/operations/settings/settings.md/#input_format_with_names_use_header) is set to 1, +the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings.md/#input_format_skip_unknown_fields) is set to 1. Otherwise, the first row will be skipped. ::: @@ -506,10 +506,10 @@ Otherwise, the first row will be skipped. Also prints two header rows with column names and types, similar to [TabSeparatedWithNamesAndTypes](#tabseparatedwithnamesandtypes). :::warning -If setting [input_format_with_names_use_header](../operations/settings/settings.md#input_format_with_names_use_header) is set to 1, -the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](../operations/settings/settings.md#input_format_skip_unknown_fields) is set to 1. +If setting [input_format_with_names_use_header](/docs/en/operations/settings/settings.md/#input_format_with_names_use_header) is set to 1, +the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings.md/#input_format_skip_unknown_fields) is set to 1. Otherwise, the first row will be skipped. -If setting [input_format_with_types_use_header](../operations/settings/settings.md#input_format_with_types_use_header) is set to 1, +If setting [input_format_with_types_use_header](/docs/en/operations/settings/settings.md/#input_format_with_types_use_header) is set to 1, the types from input data will be compared with the types of the corresponding columns from the table. Otherwise, the second row will be skipped. ::: @@ -531,15 +531,15 @@ INSERT INTO table (x, y, z) VALUES (6, 7, 'Hello'), (7, 8, 'Hello'); INSERT INTO table (x, y, z) VALUES (8, 9, 'Hello'), (9, 10, 'Hello'); ``` -To read data output by this format ypu can use [MySQLDump](#mysqldump) input format. +To read data output by this format you can use [MySQLDump](#mysqldump) input format. ### SQLInsert format settings {#sqlinsert-format-settings} -- [output_format_sql_insert_max_batch_size](../operations/settings/settings.md#output_format_sql_insert_max_batch_size) - The maximum number of rows in one INSERT statement. Default value - `65505`. -- [output_format_sql_insert_table_name](../operations/settings/settings.md#output_format_sql_insert_table_name) - The name of table in the output INSERT query. Default value - `'table'`. -- [output_format_sql_insert_include_column_names](../operations/settings/settings.md#output_format_sql_insert_include_column_names) - Include column names in INSERT query. Default value - `true`. -- [output_format_sql_insert_use_replace](../operations/settings/settings.md#output_format_sql_insert_use_replace) - Use REPLACE statement instead of INSERT. Default value - `false`. -- [output_format_sql_insert_quote_names](../operations/settings/settings.md#output_format_sql_insert_quote_names) - Quote column names with "\`" characters . Default value - `true`. +- [output_format_sql_insert_max_batch_size](/docs/en/operations/settings/settings.md/#output_format_sql_insert_max_batch_size) - The maximum number of rows in one INSERT statement. Default value - `65505`. +- [output_format_sql_insert_table_name](/docs/en/operations/settings/settings.md/#output_format_sql_insert_table_name) - The name of the table in the output INSERT query. Default value - `'table'`. +- [output_format_sql_insert_include_column_names](/docs/en/operations/settings/settings.md/#output_format_sql_insert_include_column_names) - Include column names in INSERT query. Default value - `true`. +- [output_format_sql_insert_use_replace](/docs/en/operations/settings/settings.md/#output_format_sql_insert_use_replace) - Use REPLACE statement instead of INSERT. Default value - `false`. +- [output_format_sql_insert_quote_names](/docs/en/operations/settings/settings.md/#output_format_sql_insert_quote_names) - Quote column names with "\`" characters. Default value - `true`. ## JSON {#json} @@ -599,7 +599,7 @@ SELECT SearchPhrase, count() AS c FROM test.hits GROUP BY SearchPhrase WITH TOTA } ``` -The JSON is compatible with JavaScript. To ensure this, some characters are additionally escaped: the slash `/` is escaped as `\/`; alternative line breaks `U+2028` and `U+2029`, which break some browsers, are escaped as `\uXXXX`. ASCII control characters are escaped: backspace, form feed, line feed, carriage return, and horizontal tab are replaced with `\b`, `\f`, `\n`, `\r`, `\t` , as well as the remaining bytes in the 00-1F range using `\uXXXX` sequences. Invalid UTF-8 sequences are changed to the replacement character � so the output text will consist of valid UTF-8 sequences. For compatibility with JavaScript, Int64 and UInt64 integers are enclosed in double-quotes by default. To remove the quotes, you can set the configuration parameter [output_format_json_quote_64bit_integers](../operations/settings/settings.md#output_format_json_quote_64bit_integers) to 0. +The JSON is compatible with JavaScript. To ensure this, some characters are additionally escaped: the slash `/` is escaped as `\/`; alternative line breaks `U+2028` and `U+2029`, which break some browsers, are escaped as `\uXXXX`. ASCII control characters are escaped: backspace, form feed, line feed, carriage return, and horizontal tab are replaced with `\b`, `\f`, `\n`, `\r`, `\t` , as well as the remaining bytes in the 00-1F range using `\uXXXX` sequences. Invalid UTF-8 sequences are changed to the replacement character � so the output text will consist of valid UTF-8 sequences. For compatibility with JavaScript, Int64 and UInt64 integers are enclosed in double quotes by default. To remove the quotes, you can set the configuration parameter [output_format_json_quote_64bit_integers](/docs/en/operations/settings/settings.md/#output_format_json_quote_64bit_integers) to 0. `rows` – The total number of output rows. @@ -610,14 +610,14 @@ If the query contains GROUP BY, rows_before_limit_at_least is the exact number o `extremes` – Extreme values (when extremes are set to 1). -ClickHouse supports [NULL](../sql-reference/syntax.md), which is displayed as `null` in the JSON output. To enable `+nan`, `-nan`, `+inf`, `-inf` values in output, set the [output_format_json_quote_denormals](../operations/settings/settings.md#output_format_json_quote_denormals) to 1. +ClickHouse supports [NULL](/docs/en/sql-reference/syntax.md), which is displayed as `null` in the JSON output. To enable `+nan`, `-nan`, `+inf`, `-inf` values in output, set the [output_format_json_quote_denormals](/docs/en/operations/settings/settings.md/#output_format_json_quote_denormals) to 1. **See Also** - [JSONEachRow](#jsoneachrow) format -- [output_format_json_array_of_rows](../operations/settings/settings.md#output_format_json_array_of_rows) setting +- [output_format_json_array_of_rows](/docs/en/operations/settings/settings.md/#output_format_json_array_of_rows) setting -For JSON input format, if setting [input_format_json_validate_types_from_metadata](../operations/settings/settings.md#input_format_json_validate_types_from_metadata) is set to 1, +For JSON input format, if setting [input_format_json_validate_types_from_metadata](/docs/en/operations/settings/settings.md/#input_format_json_validate_types_from_metadata) is set to 1, the types from metadata in input data will be compared with the types of the corresponding columns from the table. ## JSONStrings {#jsonstrings} @@ -690,8 +690,8 @@ Example: } ``` -During import, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](../operations/settings/settings.md#input_format_skip_unknown_fields) is set to 1. -Columns that are not present in the block will be filled with default values (you can use [input_format_defaults_for_omitted_fields](../operations/settings/settings.md#input_format_defaults_for_omitted_fields) setting here) +During import, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings.md/#input_format_skip_unknown_fields) is set to 1. +Columns that are not present in the block will be filled with default values (you can use the [input_format_defaults_for_omitted_fields](/docs/en/operations/settings/settings.md/#input_format_defaults_for_omitted_fields) setting here) ## JSONColumnsWithMetadata {#jsoncolumnsmonoblock} @@ -739,14 +739,14 @@ Example: } ``` -For JSONColumnsWithMetadata input format, if setting [input_format_json_validate_types_from_metadata](../operations/settings/settings.md#input_format_json_validate_types_from_metadata) is set to 1, +For JSONColumnsWithMetadata input format, if setting [input_format_json_validate_types_from_metadata](/docs/en/operations/settings/settings.md/#input_format_json_validate_types_from_metadata) is set to 1, the types from metadata in input data will be compared with the types of the corresponding columns from the table. ## JSONAsString {#jsonasstring} In this format, a single JSON object is interpreted as a single value. If the input has several JSON objects (comma separated), they are interpreted as separate rows. If the input data is enclosed in square brackets, it is interpreted as an array of JSONs. -This format can only be parsed for table with a single field of type [String](../sql-reference/data-types/string.md). The remaining columns must be set to [DEFAULT](../sql-reference/statements/create/table.md#default) or [MATERIALIZED](../sql-reference/statements/create/table.md#materialized), or omitted. Once you collect whole JSON object to string you can use [JSON functions](../sql-reference/functions/json-functions.md) to process it. +This format can only be parsed for a table with a single field of type [String](/docs/en/sql-reference/data-types/string.md). The remaining columns must be set to [DEFAULT](/docs/en/sql-reference/statements/create/table.md/#default) or [MATERIALIZED](/docs/en/sql-reference/statements/create/table.md/#materialized), or omitted. Once you collect the whole JSON object to string you can use [JSON functions](/docs/en/sql-reference/functions/json-functions.md) to process it. **Examples** @@ -891,7 +891,7 @@ Example: ] ``` -Columns that are not present in the block will be filled with default values (you can use [input_format_defaults_for_omitted_fields](../operations/settings/settings.md#input_format_defaults_for_omitted_fields) setting here) +Columns that are not present in the block will be filled with default values (you can use [input_format_defaults_for_omitted_fields](/docs/en/operations/settings/settings.md/#input_format_defaults_for_omitted_fields) setting here) ## JSONEachRow {#jsoneachrow} @@ -905,7 +905,7 @@ Example: {"num":44,"str":"hello","arr":[0,1,2,3]} ``` -While importing data columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](../operations/settings/settings.md#input_format_skip_unknown_fields) is set to 1. +While importing data columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings.md/#input_format_skip_unknown_fields) is set to 1. ## JSONStringsEachRow {#jsonstringseachrow} @@ -960,8 +960,8 @@ Differs from `JSONEachRow`/`JSONStringsEachRow` in that ClickHouse will also yie Differs from `JSONCompactEachRow` format in that it also prints the header row with column names, similar to [TabSeparatedWithNames](#tabseparatedwithnames). :::warning -If setting [input_format_with_names_use_header](../operations/settings/settings.md#input_format_with_names_use_header) is set to 1, -the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](../operations/settings/settings.md#input_format_skip_unknown_fields) is set to 1. +If setting [input_format_with_names_use_header](/docs/en/operations/settings/settings.md/#input_format_with_names_use_header) is set to 1, +the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings.md/#input_format_skip_unknown_fields) is set to 1. Otherwise, the first row will be skipped. ::: @@ -970,10 +970,10 @@ Otherwise, the first row will be skipped. Differs from `JSONCompactEachRow` format in that it also prints two header rows with column names and types, similar to [TabSeparatedWithNamesAndTypes](#tabseparatedwithnamesandtypes). :::warning -If setting [input_format_with_names_use_header](../operations/settings/settings.md#input_format_with_names_use_header) is set to 1, -the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](../operations/settings/settings.md#input_format_skip_unknown_fields) is set to 1. +If setting [input_format_with_names_use_header](/docs/en/operations/settings/settings.md/#input_format_with_names_use_header) is set to 1, +the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings.md/#input_format_skip_unknown_fields) is set to 1. Otherwise, the first row will be skipped. -If setting [input_format_with_types_use_header](../operations/settings/settings.md#input_format_with_types_use_header) is set to 1, +If setting [input_format_with_types_use_header](/docs/en/operations/settings/settings.md/#input_format_with_types_use_header) is set to 1, the types from input data will be compared with the types of the corresponding columns from the table. Otherwise, the second row will be skipped. ::: @@ -982,8 +982,8 @@ the types from input data will be compared with the types of the corresponding c Differs from `JSONCompactStringsEachRow` in that in that it also prints the header row with column names, similar to [TabSeparatedWithNames](#tabseparatedwithnames). :::warning -If setting [input_format_with_names_use_header](../operations/settings/settings.md#input_format_with_names_use_header) is set to 1, -the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](../operations/settings/settings.md#input_format_skip_unknown_fields) is set to 1. +If setting [input_format_with_names_use_header](/docs/en/operations/settings/settings.md/#input_format_with_names_use_header) is set to 1, +the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings.md/#input_format_skip_unknown_fields) is set to 1. Otherwise, the first row will be skipped. ::: @@ -992,10 +992,10 @@ Otherwise, the first row will be skipped. Differs from `JSONCompactStringsEachRow` in that it also prints two header rows with column names and types, similar to [TabSeparatedWithNamesAndTypes](#tabseparatedwithnamesandtypes). :::warning -If setting [input_format_with_names_use_header](../operations/settings/settings.md#input_format_with_names_use_header) is set to 1, -the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](../operations/settings/settings.md#input_format_skip_unknown_fields) is set to 1. +If setting [input_format_with_names_use_header](/docs/en/operations/settings/settings.md/#input_format_with_names_use_header) is set to 1, +the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings.md/#input_format_skip_unknown_fields) is set to 1. Otherwise, the first row will be skipped. -If setting [input_format_with_types_use_header](../operations/settings/settings.md#input_format_with_types_use_header) is set to 1, +If setting [input_format_with_types_use_header](/docs/en/operations/settings/settings.md/#input_format_with_types_use_header) is set to 1, the types from input data will be compared with the types of the corresponding columns from the table. Otherwise, the second row will be skipped. ::: @@ -1009,7 +1009,7 @@ the types from input data will be compared with the types of the corresponding c ## JSONObjectEachRow {#jsonobjecteachrow} -In this format, all data is represented as a single JSON Object, each row is represented as separate field of this object similar to JSONEachRow format. +In this format, all data is represented as a single JSON Object, each row is represented as a separate field of this object similar to JSONEachRow format. Example: @@ -1021,12 +1021,12 @@ Example: } ``` -To use object name as column value you can use special setting [format_json_object_each_row_column_for_object_name](../operations/settings/settings.md#format_json_object_each_row_column_for_object_name). Value of this setting is set to the name of a column, that is used as JSON key for a row in resulting object. +To use an object name as a column value you can use the special setting [format_json_object_each_row_column_for_object_name](/docs/en/operations/settings/settings.md/#format_json_object_each_row_column_for_object_name). The value of this setting is set to the name of a column, that is used as JSON key for a row in the resulting object. Examples: For output: -Let's say we have table `test` with two columns: +Let's say we have the table `test` with two columns: ``` ┌─object_name─┬─number─┐ │ first_obj │ 1 │ @@ -1051,7 +1051,7 @@ The output: For input: -Let's say we stored output from previous example in a file with name `data.json`: +Let's say we stored output from the previous example in a file named `data.json`: ```sql select * from file('data.json', JSONObjectEachRow, 'object_name String, number UInt64') settings format_json_object_each_row_column_for_object_name='object_name' ``` @@ -1093,9 +1093,9 @@ ClickHouse ignores spaces between elements and commas after the objects. You can **Omitted values processing** -ClickHouse substitutes omitted values with the default values for the corresponding [data types](../sql-reference/data-types/index.md). +ClickHouse substitutes omitted values with the default values for the corresponding [data types](/docs/en/sql-reference/data-types/index.md). -If `DEFAULT expr` is specified, ClickHouse uses different substitution rules depending on the [input_format_defaults_for_omitted_fields](../operations/settings/settings.md#input_format_defaults_for_omitted_fields) setting. +If `DEFAULT expr` is specified, ClickHouse uses different substitution rules depending on the [input_format_defaults_for_omitted_fields](/docs/en/operations/settings/settings.md/#input_format_defaults_for_omitted_fields) setting. Consider the following table: @@ -1140,7 +1140,7 @@ Any set of bytes can be output in the strings. Use the `JSONEachRow` format if y ### Usage of Nested Structures {#jsoneachrow-nested} -If you have a table with [Nested](../sql-reference/data-types/nested-data-structures/nested.md) data type columns, you can insert JSON data with the same structure. Enable this feature with the [input_format_import_nested_json](../operations/settings/settings.md#input_format_import_nested_json) setting. +If you have a table with [Nested](/docs/en/sql-reference/data-types/nested-data-structures/nested.md) data type columns, you can insert JSON data with the same structure. Enable this feature with the [input_format_import_nested_json](/docs/en/operations/settings/settings.md/#input_format_import_nested_json) setting. For example, consider the following table: @@ -1154,7 +1154,7 @@ As you can see in the `Nested` data type description, ClickHouse treats each com INSERT INTO json_each_row_nested FORMAT JSONEachRow {"n.s": ["abc", "def"], "n.i": [1, 23]} ``` -To insert data as a hierarchical JSON object, set [input_format_import_nested_json=1](../operations/settings/settings.md#input_format_import_nested_json). +To insert data as a hierarchical JSON object, set [input_format_import_nested_json=1](/docs/en/operations/settings/settings.md/#input_format_import_nested_json). ``` json { @@ -1199,18 +1199,18 @@ SELECT * FROM json_each_row_nested ### JSON formats settings {#json-formats-settings} -- [input_format_import_nested_json](../operations/settings/settings.md#input_format_import_nested_json) - map nested JSON data to nested tables (it works for JSONEachRow format). Default value - `false`. -- [input_format_json_read_bools_as_numbers](../operations/settings/settings.md#input_format_json_read_bools_as_numbers) - allow to parse bools as numbers in JSON input formats. Default value - `true`. -- [input_format_json_read_numbers_as_strings](../operations/settings/settings.md#input_format_json_read_numbers_as_strings) - allow to parse numbers as strings in JSON input formats. Default value - `false`. -- [input_format_json_read_objects_as_strings](../operations/settings/settings.md#input_format_json_read_objects_as_strings) - allow to parse JSON objects as strings in JSON input formats. Default value - `false`. -- [output_format_json_quote_64bit_integers](../operations/settings/settings.md#output_format_json_quote_64bit_integers) - controls quoting of 64-bit integers in JSON output format. Default value - `true`. -- [output_format_json_quote_64bit_floats](../operations/settings/settings.md#output_format_json_quote_64bit_floats) - controls quoting of 64-bit floats in JSON output format. Default value - `false`. -- [output_format_json_quote_denormals](../operations/settings/settings.md#output_format_json_quote_denormals) - enables '+nan', '-nan', '+inf', '-inf' outputs in JSON output format. Default value - `false`. -- [output_format_json_quote_decimals](../operations/settings/settings.md#output_format_json_quote_decimals) - controls quoting of decimals in JSON output format. Default value - `false`. -- [output_format_json_escape_forward_slashes](../operations/settings/settings.md#output_format_json_escape_forward_slashes) - controls escaping forward slashes for string outputs in JSON output format. Default value - `true`. -- [output_format_json_named_tuples_as_objects](../operations/settings/settings.md#output_format_json_named_tuples_as_objects) - serialize named tuple columns as JSON objects. Default value - `false`. -- [output_format_json_array_of_rows](../operations/settings/settings.md#output_format_json_array_of_rows) - output a JSON array of all rows in JSONEachRow(Compact) format. Default value - `false`. -- [output_format_json_validate_utf8](../operations/settings/settings.md#output_format_json_validate_utf8) - enables validation of UTF-8 sequences in JSON output formats (note that it doesn't impact formats JSON/JSONCompact/JSONColumnsWithMetadata, they always validate utf8). Default value - `false`. +- [input_format_import_nested_json](/docs/en/operations/settings/settings.md/#input_format_import_nested_json) - map nested JSON data to nested tables (it works for JSONEachRow format). Default value - `false`. +- [input_format_json_read_bools_as_numbers](/docs/en/operations/settings/settings.md/#input_format_json_read_bools_as_numbers) - allow to parse bools as numbers in JSON input formats. Default value - `true`. +- [input_format_json_read_numbers_as_strings](/docs/en/operations/settings/settings.md/#input_format_json_read_numbers_as_strings) - allow to parse numbers as strings in JSON input formats. Default value - `false`. +- [input_format_json_read_objects_as_strings](/docs/en/operations/settings/settings.md/#input_format_json_read_objects_as_strings) - allow to parse JSON objects as strings in JSON input formats. Default value - `false`. +- [output_format_json_quote_64bit_integers](/docs/en/operations/settings/settings.md/#output_format_json_quote_64bit_integers) - controls quoting of 64-bit integers in JSON output format. Default value - `true`. +- [output_format_json_quote_64bit_floats](/docs/en/operations/settings/settings.md/#output_format_json_quote_64bit_floats) - controls quoting of 64-bit floats in JSON output format. Default value - `false`. +- [output_format_json_quote_denormals](/docs/en/operations/settings/settings.md/#output_format_json_quote_denormals) - enables '+nan', '-nan', '+inf', '-inf' outputs in JSON output format. Default value - `false`. +- [output_format_json_quote_decimals](/docs/en/operations/settings/settings.md/#output_format_json_quote_decimals) - controls quoting of decimals in JSON output format. Default value - `false`. +- [output_format_json_escape_forward_slashes](/docs/en/operations/settings/settings.md/#output_format_json_escape_forward_slashes) - controls escaping forward slashes for string outputs in JSON output format. Default value - `true`. +- [output_format_json_named_tuples_as_objects](/docs/en/operations/settings/settings.md/#output_format_json_named_tuples_as_objects) - serialize named tuple columns as JSON objects. Default value - `false`. +- [output_format_json_array_of_rows](/docs/en/operations/settings/settings.md/#output_format_json_array_of_rows) - output a JSON array of all rows in JSONEachRow(Compact) format. Default value - `false`. +- [output_format_json_validate_utf8](/docs/en/operations/settings/settings.md/#output_format_json_validate_utf8) - enables validation of UTF-8 sequences in JSON output formats (note that it doesn't impact formats JSON/JSONCompact/JSONColumnsWithMetadata, they always validate utf8). Default value - `false`. ## BSONEachRow {#bsoneachrow} @@ -1221,49 +1221,49 @@ For output it uses the following correspondence between ClickHouse types and BSO | ClickHouse type | BSON Type | |-----------------------------------------------------------------------------------------------------------|-----------------------------------------------------------------------------------------------------------| -| [Bool](../sql-reference/data-types/boolean.md) | `\x08` boolean | -| [Int8/UInt8](../sql-reference/data-types/int-uint.md) | `\x10` int32 | -| [Int16UInt16](../sql-reference/data-types/int-uint.md) | `\x10` int32 | -| [Int32](../sql-reference/data-types/int-uint.md) | `\x10` int32 | -| [UInt32](../sql-reference/data-types/int-uint.md) | `\x12` int64 | -| [Int64/UInt64](../sql-reference/data-types/int-uint.md) | `\x12` int64 | -| [Float32/Float64](../sql-reference/data-types/float.md) | `\x01` double | -| [Date](../sql-reference/data-types/date.md)/[Date32](../sql-reference/data-types/date32.md) | `\x10` int32 | -| [DateTime](../sql-reference/data-types/datetime.md) | `\x12` int64 | -| [DateTime64](../sql-reference/data-types/datetime64.md) | `\x09` datetime | -| [Decimal32](../sql-reference/data-types/decimal.md) | `\x10` int32 | -| [Decimal64](../sql-reference/data-types/decimal.md) | `\x12` int64 | -| [Decimal128](../sql-reference/data-types/decimal.md) | `\x05` binary, `\x00` binary subtype, size = 16 | -| [Decimal256](../sql-reference/data-types/decimal.md) | `\x05` binary, `\x00` binary subtype, size = 32 | -| [Int128/UInt128](../sql-reference/data-types/int-uint.md) | `\x05` binary, `\x00` binary subtype, size = 16 | -| [Int256/UInt256](../sql-reference/data-types/int-uint.md) | `\x05` binary, `\x00` binary subtype, size = 32 | -| [String](../sql-reference/data-types/string.md)/[FixedString](../sql-reference/data-types/fixedstring.md) | `\x05` binary, `\x00` binary subtype or \x02 string if setting output_format_bson_string_as_string is enabled | -| [UUID](../sql-reference/data-types/uuid.md) | `\x05` binary, `\x04` uuid subtype, size = 16 | -| [Array](../sql-reference/data-types/array.md) | `\x04` array | -| [Tuple](../sql-reference/data-types/tuple.md) | `\x04` array | -| [Named Tuple](../sql-reference/data-types/tuple.md) | `\x03` document | -| [Map](../sql-reference/data-types/map.md) (with String keys) | `\x03` document | +| [Bool](/docs/en/sql-reference/data-types/boolean.md) | `\x08` boolean | +| [Int8/UInt8](/docs/en/sql-reference/data-types/int-uint.md) | `\x10` int32 | +| [Int16UInt16](/docs/en/sql-reference/data-types/int-uint.md) | `\x10` int32 | +| [Int32](/docs/en/sql-reference/data-types/int-uint.md) | `\x10` int32 | +| [UInt32](/docs/en/sql-reference/data-types/int-uint.md) | `\x12` int64 | +| [Int64/UInt64](/docs/en/sql-reference/data-types/int-uint.md) | `\x12` int64 | +| [Float32/Float64](/docs/en/sql-reference/data-types/float.md) | `\x01` double | +| [Date](/docs/en/sql-reference/data-types/date.md)/[Date32](/docs/en/sql-reference/data-types/date32.md) | `\x10` int32 | +| [DateTime](/docs/en/sql-reference/data-types/datetime.md) | `\x12` int64 | +| [DateTime64](/docs/en/sql-reference/data-types/datetime64.md) | `\x09` datetime | +| [Decimal32](/docs/en/sql-reference/data-types/decimal.md) | `\x10` int32 | +| [Decimal64](/docs/en/sql-reference/data-types/decimal.md) | `\x12` int64 | +| [Decimal128](/docs/en/sql-reference/data-types/decimal.md) | `\x05` binary, `\x00` binary subtype, size = 16 | +| [Decimal256](/docs/en/sql-reference/data-types/decimal.md) | `\x05` binary, `\x00` binary subtype, size = 32 | +| [Int128/UInt128](/docs/en/sql-reference/data-types/int-uint.md) | `\x05` binary, `\x00` binary subtype, size = 16 | +| [Int256/UInt256](/docs/en/sql-reference/data-types/int-uint.md) | `\x05` binary, `\x00` binary subtype, size = 32 | +| [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md) | `\x05` binary, `\x00` binary subtype or \x02 string if setting output_format_bson_string_as_string is enabled | +| [UUID](/docs/en/sql-reference/data-types/uuid.md) | `\x05` binary, `\x04` uuid subtype, size = 16 | +| [Array](/docs/en/sql-reference/data-types/array.md) | `\x04` array | +| [Tuple](/docs/en/sql-reference/data-types/tuple.md) | `\x04` array | +| [Named Tuple](/docs/en/sql-reference/data-types/tuple.md) | `\x03` document | +| [Map](/docs/en/sql-reference/data-types/map.md) (with String keys) | `\x03` document | For input it uses the following correspondence between BSON types and ClickHouse types: | BSON Type | ClickHouse Type | |------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------| -| `\x01` double | [Float32/Float64](../sql-reference/data-types/float.md) | -| `\x02` string | [String](../sql-reference/data-types/string.md)/[FixedString](../sql-reference/data-types/fixedstring.md) | -| `\x03` document | [Map](../sql-reference/data-types/map.md)/[Named Tuple](../sql-reference/data-types/tuple.md) | -| `\x04` array | [Array](../sql-reference/data-types/array.md)/[Tuple](../sql-reference/data-types/tuple.md) | -| `\x05` binary, `\x00` binary subtype | [String](../sql-reference/data-types/string.md)/[FixedString](../sql-reference/data-types/fixedstring.md) | -| `\x05` binary, `\x02` old binary subtype | [String](../sql-reference/data-types/string.md)/[FixedString](../sql-reference/data-types/fixedstring.md) | -| `\x05` binary, `\x03` old uuid subtype | [UUID](../sql-reference/data-types/uuid.md) | -| `\x05` binary, `\x04` uuid subtype | [UUID](../sql-reference/data-types/uuid.md) | -| `\x07` ObjectId | [String](../sql-reference/data-types/string.md)/[FixedString](../sql-reference/data-types/fixedstring.md) | -| `\x08` boolean | [Bool](../sql-reference/data-types/boolean.md) | -| `\x09` datetime | [DateTime64](../sql-reference/data-types/datetime64.md) | -| `\x0A` null value | [NULL](../sql-reference/data-types/nullable.md) | -| `\x0D` JavaScript code | [String](../sql-reference/data-types/string.md)/[FixedString](../sql-reference/data-types/fixedstring.md) | -| `\x0E` symbol | [String](../sql-reference/data-types/string.md)/[FixedString](../sql-reference/data-types/fixedstring.md) | -| `\x10` int32 | [Int32/UInt32](../sql-reference/data-types/int-uint.md)/[Decimal32](../sql-reference/data-types/decimal.md) | -| `\x12` int64 | [Int64/UInt64](../sql-reference/data-types/int-uint.md)/[Decimal64](../sql-reference/data-types/decimal.md)/[DateTime64](../sql-reference/data-types/datetime64.md) | +| `\x01` double | [Float32/Float64](/docs/en/sql-reference/data-types/float.md) | +| `\x02` string | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md) | +| `\x03` document | [Map](/docs/en/sql-reference/data-types/map.md)/[Named Tuple](/docs/en/sql-reference/data-types/tuple.md) | +| `\x04` array | [Array](/docs/en/sql-reference/data-types/array.md)/[Tuple](/docs/en/sql-reference/data-types/tuple.md) | +| `\x05` binary, `\x00` binary subtype | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md) | +| `\x05` binary, `\x02` old binary subtype | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md) | +| `\x05` binary, `\x03` old uuid subtype | [UUID](/docs/en/sql-reference/data-types/uuid.md) | +| `\x05` binary, `\x04` uuid subtype | [UUID](/docs/en/sql-reference/data-types/uuid.md) | +| `\x07` ObjectId | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md) | +| `\x08` boolean | [Bool](/docs/en/sql-reference/data-types/boolean.md) | +| `\x09` datetime | [DateTime64](/docs/en/sql-reference/data-types/datetime64.md) | +| `\x0A` null value | [NULL](/docs/en/sql-reference/data-types/nullable.md) | +| `\x0D` JavaScript code | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md) | +| `\x0E` symbol | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md) | +| `\x10` int32 | [Int32/UInt32](/docs/en/sql-reference/data-types/int-uint.md)/[Decimal32](/docs/en/sql-reference/data-types/decimal.md) | +| `\x12` int64 | [Int64/UInt64](/docs/en/sql-reference/data-types/int-uint.md)/[Decimal64](/docs/en/sql-reference/data-types/decimal.md)/[DateTime64](/docs/en/sql-reference/data-types/datetime64.md) | Other BSON types are not supported. Also, it performs conversion between different integer types (for example, you can insert BSON int32 value into ClickHouse UInt8). Big integers and decimals (Int128/UInt128/Int256/UInt256/Decimal128/Decimal256) can be parsed from BSON Binary value with `\x00` binary subtype. In this case this format will validate that the size of binary data equals the size of expected value. @@ -1272,8 +1272,8 @@ Note: this format don't work properly on Big-Endian platforms. ### BSON format settings {#bson-format-settings} -- [output_format_bson_string_as_string](../operations/settings/settings.md#output_format_bson_string_as_string) - use BSON String type instead of Binary for String columns. Default value - `false`. -- [input_format_bson_skip_fields_with_unsupported_types_in_schema_inference](../operations/settings/settings.md#input_format_bson_skip_fields_with_unsupported_types_in_schema_inference) - allow skipping columns with unsupported types while schema inference for format BSONEachRow. Default value - `false`. +- [output_format_bson_string_as_string](/docs/en/operations/settings/settings.md/#output_format_bson_string_as_string) - use BSON String type instead of Binary for String columns. Default value - `false`. +- [input_format_bson_skip_fields_with_unsupported_types_in_schema_inference](/docs/en/operations/settings/settings.md/#input_format_bson_skip_fields_with_unsupported_types_in_schema_inference) - allow skipping columns with unsupported types while schema inference for format BSONEachRow. Default value - `false`. ## Native {#native} @@ -1292,7 +1292,7 @@ Outputs data as Unicode-art tables, also using ANSI-escape sequences for setting A full grid of the table is drawn, and each row occupies two lines in the terminal. Each result block is output as a separate table. This is necessary so that blocks can be output without buffering results (buffering would be necessary in order to pre-calculate the visible width of all the values). -[NULL](../sql-reference/syntax.md) is output as `ᴺᵁᴸᴸ`. +[NULL](/docs/en/sql-reference/syntax.md) is output as `ᴺᵁᴸᴸ`. Example (shown for the [PrettyCompact](#prettycompact) format): @@ -1406,12 +1406,12 @@ Differs from [PrettySpaceNoEscapes](#prettyspacenoescapes) in that up to 10,000 ## Pretty formats settings {#pretty-formats-settings} -- [output_format_pretty_max_rows](../operations/settings/settings.md#output_format_pretty_max_rows) - rows limit for Pretty formats. Default value - `10000`. -- [output_format_pretty_max_column_pad_width](../operations/settings/settings.md#output_format_pretty_max_column_pad_width) - maximum width to pad all values in a column in Pretty formats. Default value - `250`. -- [output_format_pretty_max_value_width](../operations/settings/settings.md#output_format_pretty_max_value_width) - Maximum width of value to display in Pretty formats. If greater - it will be cut. Default value - `10000`. -- [output_format_pretty_color](../operations/settings/settings.md#output_format_pretty_color) - use ANSI escape sequences to paint colors in Pretty formats. Default value - `true`. -- [output_format_pretty_grid_charset](../operations/settings/settings.md#output_format_pretty_grid_charset) - Charset for printing grid borders. Available charsets: ASCII, UTF-8. Default value - `UTF-8`. -- [output_format_pretty_row_numbers](../operations/settings/settings.md#output_format_pretty_row_numbers) - Add row numbers before each row for pretty output format. Default value - `false`. +- [output_format_pretty_max_rows](/docs/en/operations/settings/settings.md/#output_format_pretty_max_rows) - rows limit for Pretty formats. Default value - `10000`. +- [output_format_pretty_max_column_pad_width](/docs/en/operations/settings/settings.md/#output_format_pretty_max_column_pad_width) - maximum width to pad all values in a column in Pretty formats. Default value - `250`. +- [output_format_pretty_max_value_width](/docs/en/operations/settings/settings.md/#output_format_pretty_max_value_width) - Maximum width of value to display in Pretty formats. If greater - it will be cut. Default value - `10000`. +- [output_format_pretty_color](/docs/en/operations/settings/settings.md/#output_format_pretty_color) - use ANSI escape sequences to paint colors in Pretty formats. Default value - `true`. +- [output_format_pretty_grid_charset](/docs/en/operations/settings/settings.md/#output_format_pretty_grid_charset) - Charset for printing grid borders. Available charsets: ASCII, UTF-8. Default value - `UTF-8`. +- [output_format_pretty_row_numbers](/docs/en/operations/settings/settings.md/#output_format_pretty_row_numbers) - Add row numbers before each row for pretty output format. Default value - `false`. ## RowBinary {#rowbinary} @@ -1426,7 +1426,7 @@ FixedString is represented simply as a sequence of bytes. Array is represented as a varint length (unsigned [LEB128](https://en.wikipedia.org/wiki/LEB128)), followed by successive elements of the array. -For [NULL](../sql-reference/syntax.md#null-literal) support, an additional byte containing 1 or 0 is added before each [Nullable](../sql-reference/data-types/nullable.md) value. If 1, then the value is `NULL` and this byte is interpreted as a separate value. If 0, the value after the byte is not `NULL`. +For [NULL](/docs/en/sql-reference/syntax.md/#null-literal) support, an additional byte containing 1 or 0 is added before each [Nullable](/docs/en/sql-reference/data-types/nullable.md) value. If 1, then the value is `NULL` and this byte is interpreted as a separate value. If 0, the value after the byte is not `NULL`. ## RowBinaryWithNames {#rowbinarywithnames} @@ -1436,8 +1436,8 @@ Similar to [RowBinary](#rowbinary), but with added header: - N `String`s specifying column names :::warning -If setting [input_format_with_names_use_header](../operations/settings/settings.md#input_format_with_names_use_header) is set to 1, -the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](../operations/settings/settings.md#input_format_skip_unknown_fields) is set to 1. +If setting [input_format_with_names_use_header](/docs/en/operations/settings/settings.md/#input_format_with_names_use_header) is set to 1, +the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings.md/#input_format_skip_unknown_fields) is set to 1. Otherwise, the first row will be skipped. ::: @@ -1450,20 +1450,20 @@ Similar to [RowBinary](#rowbinary), but with added header: - N `String`s specifying column types :::warning -If setting [input_format_with_names_use_header](../operations/settings/settings.md#input_format_with_names_use_header) is set to 1, -the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](../operations/settings/settings.md#input_format_skip_unknown_fields) is set to 1. +If setting [input_format_with_names_use_header](/docs/en/operations/settings/settings.md/#input_format_with_names_use_header) is set to 1, +the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings.md/#input_format_skip_unknown_fields) is set to 1. Otherwise, the first row will be skipped. -If setting [input_format_with_types_use_header](../operations/settings/settings.md#input_format_with_types_use_header) is set to 1, +If setting [input_format_with_types_use_header](/docs/en/operations/settings/settings.md/#input_format_with_types_use_header) is set to 1, the types from input data will be compared with the types of the corresponding columns from the table. Otherwise, the second row will be skipped. ::: ## RowBinary format settings {#row-binary-format-settings} -- [format_binary_max_string_size](../operations/settings/settings.md#format_binary_max_string_size) - The maximum allowed size for String in RowBinary format. Default value - `1GiB`. +- [format_binary_max_string_size](/docs/en/operations/settings/settings.md/#format_binary_max_string_size) - The maximum allowed size for String in RowBinary format. Default value - `1GiB`. ## Values {#data-format-values} -Prints every row in brackets. Rows are separated by commas. There is no comma after the last row. The values inside the brackets are also comma-separated. Numbers are output in a decimal format without quotes. Arrays are output in square brackets. Strings, dates, and dates with times are output in quotes. Escaping rules and parsing are similar to the [TabSeparated](#tabseparated) format. During formatting, extra spaces aren’t inserted, but during parsing, they are allowed and skipped (except for spaces inside array values, which are not allowed). [NULL](../sql-reference/syntax.md) is represented as `NULL`. +Prints every row in brackets. Rows are separated by commas. There is no comma after the last row. The values inside the brackets are also comma-separated. Numbers are output in a decimal format without quotes. Arrays are output in square brackets. Strings, dates, and dates with times are output in quotes. Escaping rules and parsing are similar to the [TabSeparated](#tabseparated) format. During formatting, extra spaces aren’t inserted, but during parsing, they are allowed and skipped (except for spaces inside array values, which are not allowed). [NULL](/docs/en/sql-reference/syntax.md) is represented as `NULL`. The minimum set of characters that you need to escape when passing data in Values ​​format: single quotes and backslashes. @@ -1471,16 +1471,16 @@ This is the format that is used in `INSERT INTO t VALUES ...`, but you can also ## Values format settings {#values-format-settings} -- [input_format_values_interpret_expressions](../operations/settings/settings.md#input_format_values_interpret_expressions) - if the field could not be parsed by streaming parser, run SQL parser and try to interpret it as SQL expression. Default value - `true`. -- [input_format_values_deduce_templates_of_expressions](../operations/settings/settings.md#input_format_values_deduce_templates_of_expressions) -if the field could not be parsed by streaming parser, run SQL parser, deduce template of the SQL expression, try to parse all rows using template and then interpret expression for all rows. Default value - `true`. -- [input_format_values_accurate_types_of_literals](../operations/settings/settings.md#input_format_values_accurate_types_of_literals) - when parsing and interpreting expressions using template, check actual type of literal to avoid possible overflow and precision issues. Default value - `true`. +- [input_format_values_interpret_expressions](/docs/en/operations/settings/settings.md/#input_format_values_interpret_expressions) - if the field could not be parsed by streaming parser, run SQL parser and try to interpret it as SQL expression. Default value - `true`. +- [input_format_values_deduce_templates_of_expressions](/docs/en/operations/settings/settings.md/#input_format_values_deduce_templates_of_expressions) -if the field could not be parsed by streaming parser, run SQL parser, deduce template of the SQL expression, try to parse all rows using template and then interpret expression for all rows. Default value - `true`. +- [input_format_values_accurate_types_of_literals](/docs/en/operations/settings/settings.md/#input_format_values_accurate_types_of_literals) - when parsing and interpreting expressions using template, check actual type of literal to avoid possible overflow and precision issues. Default value - `true`. ## Vertical {#vertical} Prints each value on a separate line with the column name specified. This format is convenient for printing just one or a few rows if each row consists of a large number of columns. -[NULL](../sql-reference/syntax.md) is output as `ᴺᵁᴸᴸ`. +[NULL](/docs/en/sql-reference/syntax.md) is output as `ᴺᵁᴸᴸ`. Example: @@ -1593,27 +1593,27 @@ See also [Format Schema](#formatschema). ### Data Types Matching {#data_types-matching-capnproto} -The table below shows supported data types and how they match ClickHouse [data types](../sql-reference/data-types/index.md) in `INSERT` and `SELECT` queries. +The table below shows supported data types and how they match ClickHouse [data types](/docs/en/sql-reference/data-types/index.md) in `INSERT` and `SELECT` queries. | CapnProto data type (`INSERT`) | ClickHouse data type | CapnProto data type (`SELECT`) | |--------------------------------|-----------------------------------------------------------|--------------------------------| -| `UINT8`, `BOOL` | [UInt8](../sql-reference/data-types/int-uint.md) | `UINT8` | -| `INT8` | [Int8](../sql-reference/data-types/int-uint.md) | `INT8` | -| `UINT16` | [UInt16](../sql-reference/data-types/int-uint.md), [Date](../sql-reference/data-types/date.md) | `UINT16` | -| `INT16` | [Int16](../sql-reference/data-types/int-uint.md) | `INT16` | -| `UINT32` | [UInt32](../sql-reference/data-types/int-uint.md), [DateTime](../sql-reference/data-types/datetime.md) | `UINT32` | -| `INT32` | [Int32](../sql-reference/data-types/int-uint.md) | `INT32` | -| `UINT64` | [UInt64](../sql-reference/data-types/int-uint.md) | `UINT64` | -| `INT64` | [Int64](../sql-reference/data-types/int-uint.md), [DateTime64](../sql-reference/data-types/datetime.md) | `INT64` | -| `FLOAT32` | [Float32](../sql-reference/data-types/float.md) | `FLOAT32` | -| `FLOAT64` | [Float64](../sql-reference/data-types/float.md) | `FLOAT64` | -| `TEXT, DATA` | [String](../sql-reference/data-types/string.md), [FixedString](../sql-reference/data-types/fixedstring.md) | `TEXT, DATA` | -| `union(T, Void), union(Void, T)` | [Nullable(T)](../sql-reference/data-types/date.md) | `union(T, Void), union(Void, T)` | -| `ENUM` | [Enum(8\|16)](../sql-reference/data-types/enum.md) | `ENUM` | -| `LIST` | [Array](../sql-reference/data-types/array.md) | `LIST` | -| `STRUCT` | [Tuple](../sql-reference/data-types/tuple.md) | `STRUCT` | +| `UINT8`, `BOOL` | [UInt8](/docs/en/sql-reference/data-types/int-uint.md) | `UINT8` | +| `INT8` | [Int8](/docs/en/sql-reference/data-types/int-uint.md) | `INT8` | +| `UINT16` | [UInt16](/docs/en/sql-reference/data-types/int-uint.md), [Date](/docs/en/sql-reference/data-types/date.md) | `UINT16` | +| `INT16` | [Int16](/docs/en/sql-reference/data-types/int-uint.md) | `INT16` | +| `UINT32` | [UInt32](/docs/en/sql-reference/data-types/int-uint.md), [DateTime](/docs/en/sql-reference/data-types/datetime.md) | `UINT32` | +| `INT32` | [Int32](/docs/en/sql-reference/data-types/int-uint.md) | `INT32` | +| `UINT64` | [UInt64](/docs/en/sql-reference/data-types/int-uint.md) | `UINT64` | +| `INT64` | [Int64](/docs/en/sql-reference/data-types/int-uint.md), [DateTime64](/docs/en/sql-reference/data-types/datetime.md) | `INT64` | +| `FLOAT32` | [Float32](/docs/en/sql-reference/data-types/float.md) | `FLOAT32` | +| `FLOAT64` | [Float64](/docs/en/sql-reference/data-types/float.md) | `FLOAT64` | +| `TEXT, DATA` | [String](/docs/en/sql-reference/data-types/string.md), [FixedString](/docs/en/sql-reference/data-types/fixedstring.md) | `TEXT, DATA` | +| `union(T, Void), union(Void, T)` | [Nullable(T)](/docs/en/sql-reference/data-types/date.md) | `union(T, Void), union(Void, T)` | +| `ENUM` | [Enum(8\|16)](/docs/en/sql-reference/data-types/enum.md) | `ENUM` | +| `LIST` | [Array](/docs/en/sql-reference/data-types/array.md) | `LIST` | +| `STRUCT` | [Tuple](/docs/en/sql-reference/data-types/tuple.md) | `STRUCT` | -For working with `Enum` in CapnProto format use the [format_capn_proto_enum_comparising_mode](../operations/settings/settings.md#format_capn_proto_enum_comparising_mode) setting. +For working with `Enum` in CapnProto format use the [format_capn_proto_enum_comparising_mode](/docs/en/operations/settings/settings.md/#format_capn_proto_enum_comparising_mode) setting. Arrays can be nested and can have a value of the `Nullable` type as an argument. `Tuple` type also can be nested. @@ -1644,10 +1644,10 @@ $ clickhouse-client --query = "SELECT * FROM test.hits FORMAT CapnProto SETTINGS Expose metrics in [Prometheus text-based exposition format](https://prometheus.io/docs/instrumenting/exposition_formats/#text-based-format). The output table should have a proper structure. -Columns `name` ([String](../sql-reference/data-types/string.md)) and `value` (number) are required. -Rows may optionally contain `help` ([String](../sql-reference/data-types/string.md)) and `timestamp` (number). -Column `type` ([String](../sql-reference/data-types/string.md)) is either `counter`, `gauge`, `histogram`, `summary`, `untyped` or empty. -Each metric value may also have some `labels` ([Map(String, String)](../sql-reference/data-types/map.md)). +Columns `name` ([String](/docs/en/sql-reference/data-types/string.md)) and `value` (number) are required. +Rows may optionally contain `help` ([String](/docs/en/sql-reference/data-types/string.md)) and `timestamp` (number). +Column `type` ([String](/docs/en/sql-reference/data-types/string.md)) is either `counter`, `gauge`, `histogram`, `summary`, `untyped` or empty. +Each metric value may also have some `labels` ([Map(String, String)](/docs/en/sql-reference/data-types/map.md)). Several consequent rows may refer to the one metric with different labels. The table should be sorted by metric name (e.g., with `ORDER BY name`). There's special requirements for labels for `histogram` and `summary`, see [Prometheus doc](https://prometheus.io/docs/instrumenting/exposition_formats/#histograms-and-summaries) for the details. Special rules applied to row with labels `{'count':''}` and `{'sum':''}`, they'll be converted to `_count` and `_sum` respectively. @@ -1759,7 +1759,7 @@ message MessageType { ``` ClickHouse tries to find a column named `x.y.z` (or `x_y_z` or `X.y_Z` and so on). -Nested messages are suitable to input or output a [nested data structures](../sql-reference/data-types/nested-data-structures/nested.md). +Nested messages are suitable to input or output a [nested data structures](/docs/en/sql-reference/data-types/nested-data-structures/nested.md). Default values defined in a protobuf schema like this @@ -1771,7 +1771,7 @@ message MessageType { } ``` -are not applied; the [table defaults](../sql-reference/statements/create/table.md#create-default-values) are used instead of them. +are not applied; the [table defaults](/docs/en/sql-reference/statements/create/table.md/#create-default-values) are used instead of them. ClickHouse inputs and outputs protobuf messages in the `length-delimited` format. It means before every message should be written its length as a [varint](https://developers.google.com/protocol-buffers/docs/encoding#varints). @@ -1789,25 +1789,25 @@ ClickHouse Avro format supports reading and writing [Avro data files](https://av ### Data Types Matching {#data_types-matching} -The table below shows supported data types and how they match ClickHouse [data types](../sql-reference/data-types/index.md) in `INSERT` and `SELECT` queries. +The table below shows supported data types and how they match ClickHouse [data types](/docs/en/sql-reference/data-types/index.md) in `INSERT` and `SELECT` queries. -| Avro data type `INSERT` | ClickHouse data type | Avro data type `SELECT` | -|---------------------------------------------|-----------------------------------------------------------------------------------------------------------------------|------------------------------| -| `boolean`, `int`, `long`, `float`, `double` | [Int(8\|16\|32)](../sql-reference/data-types/int-uint.md), [UInt(8\|16\|32)](../sql-reference/data-types/int-uint.md) | `int` | -| `boolean`, `int`, `long`, `float`, `double` | [Int64](../sql-reference/data-types/int-uint.md), [UInt64](../sql-reference/data-types/int-uint.md) | `long` | -| `boolean`, `int`, `long`, `float`, `double` | [Float32](../sql-reference/data-types/float.md) | `float` | -| `boolean`, `int`, `long`, `float`, `double` | [Float64](../sql-reference/data-types/float.md) | `double` | -| `bytes`, `string`, `fixed`, `enum` | [String](../sql-reference/data-types/string.md) | `bytes` or `string` \* | -| `bytes`, `string`, `fixed` | [FixedString(N)](../sql-reference/data-types/fixedstring.md) | `fixed(N)` | -| `enum` | [Enum(8\|16)](../sql-reference/data-types/enum.md) | `enum` | -| `array(T)` | [Array(T)](../sql-reference/data-types/array.md) | `array(T)` | -| `union(null, T)`, `union(T, null)` | [Nullable(T)](../sql-reference/data-types/date.md) | `union(null, T)` | -| `null` | [Nullable(Nothing)](../sql-reference/data-types/special-data-types/nothing.md) | `null` | -| `int (date)` \** | [Date](../sql-reference/data-types/date.md) | `int (date)` \** | -| `long (timestamp-millis)` \** | [DateTime64(3)](../sql-reference/data-types/datetime.md) | `long (timestamp-millis)` \* | -| `long (timestamp-micros)` \** | [DateTime64(6)](../sql-reference/data-types/datetime.md) | `long (timestamp-micros)` \* | +| Avro data type `INSERT` | ClickHouse data type | Avro data type `SELECT` | +|---------------------------------------------|----------------------------------------------------------------------------------------------------|------------------------------| +| `boolean`, `int`, `long`, `float`, `double` | [Int(8\|16\|32)](/docs/en/sql-reference/data-types/int-uint.md), [UInt(8\|16\|32)](/docs/en/sql-reference/data-types/int-uint.md) | `int` | +| `boolean`, `int`, `long`, `float`, `double` | [Int64](/docs/en/sql-reference/data-types/int-uint.md), [UInt64](/docs/en/sql-reference/data-types/int-uint.md) | `long` | +| `boolean`, `int`, `long`, `float`, `double` | [Float32](/docs/en/sql-reference/data-types/float.md) | `float` | +| `boolean`, `int`, `long`, `float`, `double` | [Float64](/docs/en/sql-reference/data-types/float.md) | `double` | +| `bytes`, `string`, `fixed`, `enum` | [String](/docs/en/sql-reference/data-types/string.md) | `bytes` or `string` \* | +| `bytes`, `string`, `fixed` | [FixedString(N)](/docs/en/sql-reference/data-types/fixedstring.md) | `fixed(N)` | +| `enum` | [Enum(8\|16)](/docs/en/sql-reference/data-types/enum.md) | `enum` | +| `array(T)` | [Array(T)](/docs/en/sql-reference/data-types/array.md) | `array(T)` | +| `union(null, T)`, `union(T, null)` | [Nullable(T)](/docs/en/sql-reference/data-types/date.md) | `union(null, T)` | +| `null` | [Nullable(Nothing)](/docs/en/sql-reference/data-types/special-data-types/nothing.md) | `null` | +| `int (date)` \** | [Date](/docs/en/sql-reference/data-types/date.md) | `int (date)` \** | +| `long (timestamp-millis)` \** | [DateTime64(3)](/docs/en/sql-reference/data-types/datetime.md) | `long (timestamp-millis)` \* | +| `long (timestamp-micros)` \** | [DateTime64(6)](/docs/en/sql-reference/data-types/datetime.md) | `long (timestamp-micros)` \* | -\* `bytes` is default, controlled by [output_format_avro_string_column_pattern](../operations/settings/settings.md#output_format_avro_string_column_pattern) +\* `bytes` is default, controlled by [output_format_avro_string_column_pattern](/docs/en/operations/settings/settings.md/#output_format_avro_string_column_pattern) \** [Avro logical types](https://avro.apache.org/docs/current/spec.html#Logical+Types) Unsupported Avro data types: `record` (non-root), `map` @@ -1827,9 +1827,9 @@ The root schema of input Avro file must be of `record` type. To find the correspondence between table columns and fields of Avro schema ClickHouse compares their names. This comparison is case-sensitive. Unused fields are skipped. -Data types of ClickHouse table columns can differ from the corresponding fields of the Avro data inserted. When inserting data, ClickHouse interprets data types according to the table above and then [casts](../sql-reference/functions/type-conversion-functions.md#type_conversion_function-cast) the data to corresponding column type. +Data types of ClickHouse table columns can differ from the corresponding fields of the Avro data inserted. When inserting data, ClickHouse interprets data types according to the table above and then [casts](/docs/en/sql-reference/functions/type-conversion-functions.md/#type_conversion_function-cast) the data to corresponding column type. -While importing data, when field is not found in schema and setting [input_format_avro_allow_missing_fields](../operations/settings/settings.md#input_format_avro_allow_missing_fields) is enabled, default value will be used instead of error. +While importing data, when field is not found in schema and setting [input_format_avro_allow_missing_fields](/docs/en/operations/settings/settings.md/#input_format_avro_allow_missing_fields) is enabled, default value will be used instead of error. ### Selecting Data {#selecting-data-1} @@ -1844,7 +1844,7 @@ Column names must: - start with `[A-Za-z_]` - subsequently contain only `[A-Za-z0-9_]` -Output Avro file compression and sync interval can be configured with [output_format_avro_codec](../operations/settings/settings.md#output_format_avro_codec) and [output_format_avro_sync_interval](../operations/settings/settings.md#output_format_avro_sync_interval) respectively. +Output Avro file compression and sync interval can be configured with [output_format_avro_codec](/docs/en/operations/settings/settings.md/#output_format_avro_codec) and [output_format_avro_sync_interval](/docs/en/operations/settings/settings.md/#output_format_avro_sync_interval) respectively. ## AvroConfluent {#data-format-avro-confluent} @@ -1854,7 +1854,7 @@ Each Avro message embeds a schema id that can be resolved to the actual schema w Schemas are cached once resolved. -Schema Registry URL is configured with [format_avro_schema_registry_url](../operations/settings/settings.md#format_avro_schema_registry_url). +Schema Registry URL is configured with [format_avro_schema_registry_url](/docs/en/operations/settings/settings.md/#format_avro_schema_registry_url). ### Data Types Matching {#data_types-matching-1} @@ -1862,7 +1862,7 @@ Same as [Avro](#data-format-avro). ### Usage {#usage} -To quickly verify schema resolution you can use [kafkacat](https://github.com/edenhill/kafkacat) with [clickhouse-local](../operations/utilities/clickhouse-local.md): +To quickly verify schema resolution you can use [kafkacat](https://github.com/edenhill/kafkacat) with [clickhouse-local](/docs/en/operations/utilities/clickhouse-local.md): ``` bash $ kafkacat -b kafka-broker -C -t topic1 -o beginning -f '%s' -c 3 | clickhouse-local --input-format AvroConfluent --format_avro_schema_registry_url 'http://schema-registry' -S "field1 Int64, field2 String" -q 'select * from table' @@ -1871,7 +1871,7 @@ $ kafkacat -b kafka-broker -C -t topic1 -o beginning -f '%s' -c 3 | clickhouse- 3 c ``` -To use `AvroConfluent` with [Kafka](../engines/table-engines/integrations/kafka.md): +To use `AvroConfluent` with [Kafka](/docs/en/engines/table-engines/integrations/kafka.md): ``` sql CREATE TABLE topic1_stream @@ -1903,36 +1903,36 @@ Setting `format_avro_schema_registry_url` needs to be configured in `users.xml` ### Data Types Matching {#data-types-matching-parquet} -The table below shows supported data types and how they match ClickHouse [data types](../sql-reference/data-types/index.md) in `INSERT` and `SELECT` queries. +The table below shows supported data types and how they match ClickHouse [data types](/docs/en/sql-reference/data-types/index.md) in `INSERT` and `SELECT` queries. | Parquet data type (`INSERT`) | ClickHouse data type | Parquet data type (`SELECT`) | -|------------------------------|-----------------------------------------------------------|------------------------------| -| `UINT8`, `BOOL` | [UInt8](../sql-reference/data-types/int-uint.md) | `UINT8` | -| `INT8` | [Int8](../sql-reference/data-types/int-uint.md) | `INT8` | -| `UINT16` | [UInt16](../sql-reference/data-types/int-uint.md) | `UINT16` | -| `INT16` | [Int16](../sql-reference/data-types/int-uint.md) | `INT16` | -| `UINT32` | [UInt32](../sql-reference/data-types/int-uint.md) | `UINT32` | -| `INT32` | [Int32](../sql-reference/data-types/int-uint.md) | `INT32` | -| `UINT64` | [UInt64](../sql-reference/data-types/int-uint.md) | `UINT64` | -| `INT64` | [Int64](../sql-reference/data-types/int-uint.md) | `INT64` | -| `FLOAT`, `HALF_FLOAT` | [Float32](../sql-reference/data-types/float.md) | `FLOAT` | -| `DOUBLE` | [Float64](../sql-reference/data-types/float.md) | `DOUBLE` | -| `DATE32` | [Date](../sql-reference/data-types/date.md) | `UINT16` | -| `DATE64`, `TIMESTAMP` | [DateTime](../sql-reference/data-types/datetime.md) | `UINT32` | -| `STRING`, `BINARY` | [String](../sql-reference/data-types/string.md) | `BINARY` | -| — | [FixedString](../sql-reference/data-types/fixedstring.md) | `BINARY` | -| `DECIMAL` | [Decimal](../sql-reference/data-types/decimal.md) | `DECIMAL` | -| `LIST` | [Array](../sql-reference/data-types/array.md) | `LIST` | -| `STRUCT` | [Tuple](../sql-reference/data-types/tuple.md) | `STRUCT` | -| `MAP` | [Map](../sql-reference/data-types/map.md) | `MAP` | +|------------------------------|-----------------------------------------------------------|----------------------------| +| `BOOL` | [Bool](/docs/en/sql-reference/data-types/boolean.md) | `BOOL` | +| `UINT8`, `BOOL` | [UInt8](/docs/en/sql-reference/data-types/int-uint.md) | `UINT8` | +| `INT8` | [Int8](/docs/en/sql-reference/data-types/int-uint.md) | `INT8` | +| `UINT16` | [UInt16](/docs/en/sql-reference/data-types/int-uint.md) | `UINT16` | +| `INT16` | [Int16](/docs/en/sql-reference/data-types/int-uint.md) | `INT16` | +| `UINT32` | [UInt32](/docs/en/sql-reference/data-types/int-uint.md) | `UINT32` | +| `INT32` | [Int32](/docs/en/sql-reference/data-types/int-uint.md) | `INT32` | +| `UINT64` | [UInt64](/docs/en/sql-reference/data-types/int-uint.md) | `UINT64` | +| `INT64` | [Int64](/docs/en/sql-reference/data-types/int-uint.md) | `INT64` | +| `FLOAT` | [Float32](/docs/en/sql-reference/data-types/float.md) | `FLOAT` | +| `DOUBLE` | [Float64](/docs/en/sql-reference/data-types/float.md) | `DOUBLE` | +| `DATE` | [Date32](/docs/en/sql-reference/data-types/date.md) | `DATE` | +| `TIME (ms)` | [DateTime](/docs/en/sql-reference/data-types/datetime.md) | `UINT32` | +| `TIMESTAMP`, `TIME (us, ns)` | [DateTime64](/docs/en/sql-reference/data-types/datetime64.md) | `TIMESTAMP` | +| `STRING`, `BINARY` | [String](/docs/en/sql-reference/data-types/string.md) | `BINARY` | +| — | [FixedString](/docs/en/sql-reference/data-types/fixedstring.md) | `BINARY` | +| `DECIMAL` | [Decimal](/docs/en/sql-reference/data-types/decimal.md) | `DECIMAL` | +| `LIST` | [Array](/docs/en/sql-reference/data-types/array.md) | `LIST` | +| `STRUCT` | [Tuple](/docs/en/sql-reference/data-types/tuple.md) | `STRUCT` | +| `MAP` | [Map](/docs/en/sql-reference/data-types/map.md) | `MAP` | Arrays can be nested and can have a value of the `Nullable` type as an argument. `Tuple` and `Map` types also can be nested. -ClickHouse supports configurable precision of `Decimal` type. The `INSERT` query treats the Parquet `DECIMAL` type as the ClickHouse `Decimal128` type. +Unsupported Parquet data types: `FIXED_SIZE_BINARY`, `JSON`, `UUID`, `ENUM`. -Unsupported Parquet data types: `TIME32`, `FIXED_SIZE_BINARY`, `JSON`, `UUID`, `ENUM`. - -Data types of ClickHouse table columns can differ from the corresponding fields of the Parquet data inserted. When inserting data, ClickHouse interprets data types according to the table above and then [cast](../sql-reference/functions/type-conversion-functions/#type_conversion_function-cast) the data to that data type which is set for the ClickHouse table column. +Data types of ClickHouse table columns can differ from the corresponding fields of the Parquet data inserted. When inserting data, ClickHouse interprets data types according to the table above and then [cast](/docs/en/sql-reference/functions/type-conversion-functions/#type_conversion_function-cast) the data to that data type which is set for the ClickHouse table column. ### Inserting and Selecting Data {#inserting-and-selecting-data-parquet} @@ -1948,16 +1948,16 @@ You can select data from a ClickHouse table and save them into some file in the $ clickhouse-client --query="SELECT * FROM {some_table} FORMAT Parquet" > {some_file.pq} ``` -To exchange data with Hadoop, you can use [HDFS table engine](../engines/table-engines/integrations/hdfs.md). +To exchange data with Hadoop, you can use [HDFS table engine](/docs/en/engines/table-engines/integrations/hdfs.md). ### Parquet format settings {#parquet-format-settings} -- [output_format_parquet_row_group_size](../operations/settings/settings.md#output_format_parquet_row_group_size) - row group size in rows while data output. Default value - `1000000`. -- [output_format_parquet_string_as_string](../operations/settings/settings.md#output_format_parquet_string_as_string) - use Parquet String type instead of Binary for String columns. Default value - `false`. -- [input_format_parquet_import_nested](../operations/settings/settings.md#input_format_parquet_import_nested) - allow inserting array of structs into [Nested](../sql-reference/data-types/nested-data-structures/nested.md) table in Parquet input format. Default value - `false`. -- [input_format_parquet_case_insensitive_column_matching](../operations/settings/settings.md#input_format_parquet_case_insensitive_column_matching) - ignore case when matching Parquet columns with ClickHouse columns. Default value - `false`. -- [input_format_parquet_allow_missing_columns](../operations/settings/settings.md#input_format_parquet_allow_missing_columns) - allow missing columns while reading Parquet data. Default value - `false`. -- [input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference](../operations/settings/settings.md#input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference) - allow skipping columns with unsupported types while schema inference for Parquet format. Default value - `false`. +- [output_format_parquet_row_group_size](/docs/en/operations/settings/settings.md/#output_format_parquet_row_group_size) - row group size in rows while data output. Default value - `1000000`. +- [output_format_parquet_string_as_string](/docs/en/operations/settings/settings.md/#output_format_parquet_string_as_string) - use Parquet String type instead of Binary for String columns. Default value - `false`. +- [input_format_parquet_import_nested](/docs/en/operations/settings/settings.md/#input_format_parquet_import_nested) - allow inserting array of structs into [Nested](/docs/en/sql-reference/data-types/nested-data-structures/nested.md) table in Parquet input format. Default value - `false`. +- [input_format_parquet_case_insensitive_column_matching](/docs/en/operations/settings/settings.md/#input_format_parquet_case_insensitive_column_matching) - ignore case when matching Parquet columns with ClickHouse columns. Default value - `false`. +- [input_format_parquet_allow_missing_columns](/docs/en/operations/settings/settings.md/#input_format_parquet_allow_missing_columns) - allow missing columns while reading Parquet data. Default value - `false`. +- [input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference](/docs/en/operations/settings/settings.md/#input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference) - allow skipping columns with unsupported types while schema inference for Parquet format. Default value - `false`. ## Arrow {#data-format-arrow} @@ -1967,39 +1967,39 @@ To exchange data with Hadoop, you can use [HDFS table engine](../engines/table-e ### Data Types Matching {#data-types-matching-arrow} -The table below shows supported data types and how they match ClickHouse [data types](../sql-reference/data-types/index.md) in `INSERT` and `SELECT` queries. +The table below shows supported data types and how they match ClickHouse [data types](/docs/en/sql-reference/data-types/index.md) in `INSERT` and `SELECT` queries. -| Arrow data type (`INSERT`) | ClickHouse data type | Arrow data type (`SELECT`) | -|----------------------------|-----------------------------------------------------|----------------------------| -| `UINT8`, `BOOL` | [UInt8](../sql-reference/data-types/int-uint.md) | `UINT8` | -| `INT8` | [Int8](../sql-reference/data-types/int-uint.md) | `INT8` | -| `UINT16` | [UInt16](../sql-reference/data-types/int-uint.md) | `UINT16` | -| `INT16` | [Int16](../sql-reference/data-types/int-uint.md) | `INT16` | -| `UINT32` | [UInt32](../sql-reference/data-types/int-uint.md) | `UINT32` | -| `INT32` | [Int32](../sql-reference/data-types/int-uint.md) | `INT32` | -| `UINT64` | [UInt64](../sql-reference/data-types/int-uint.md) | `UINT64` | -| `INT64` | [Int64](../sql-reference/data-types/int-uint.md) | `INT64` | -| `FLOAT`, `HALF_FLOAT` | [Float32](../sql-reference/data-types/float.md) | `FLOAT32` | -| `DOUBLE` | [Float64](../sql-reference/data-types/float.md) | `FLOAT64` | -| `DATE32` | [Date](../sql-reference/data-types/date.md) | `UINT16` | -| `DATE64`, `TIMESTAMP` | [DateTime](../sql-reference/data-types/datetime.md) | `UINT32` | -| `STRING`, `BINARY` | [String](../sql-reference/data-types/string.md) | `BINARY` | -| `STRING`, `BINARY` | [FixedString](../sql-reference/data-types/fixedstring.md) | `BINARY` | -| `DECIMAL` | [Decimal](../sql-reference/data-types/decimal.md) | `DECIMAL` | -| `DECIMAL256` | [Decimal256](../sql-reference/data-types/decimal.md)| `DECIMAL256` | -| `LIST` | [Array](../sql-reference/data-types/array.md) | `LIST` | -| `STRUCT` | [Tuple](../sql-reference/data-types/tuple.md) | `STRUCT` | -| `MAP` | [Map](../sql-reference/data-types/map.md) | `MAP` | +| Arrow data type (`INSERT`) | ClickHouse data type | Arrow data type (`SELECT`) | +|---------------------------------|-----------------------------------------------------------|----------------------------| +| `BOOL` | [Bool](/docs/en/sql-reference/data-types/boolean.md) | `BOOL` | +| `UINT8`, `BOOL` | [UInt8](/docs/en/sql-reference/data-types/int-uint.md) | `UINT8` | +| `INT8` | [Int8](/docs/en/sql-reference/data-types/int-uint.md) | `INT8` | +| `UINT16` | [UInt16](/docs/en/sql-reference/data-types/int-uint.md) | `UINT16` | +| `INT16` | [Int16](/docs/en/sql-reference/data-types/int-uint.md) | `INT16` | +| `UINT32` | [UInt32](/docs/en/sql-reference/data-types/int-uint.md) | `UINT32` | +| `INT32` | [Int32](/docs/en/sql-reference/data-types/int-uint.md) | `INT32` | +| `UINT64` | [UInt64](/docs/en/sql-reference/data-types/int-uint.md) | `UINT64` | +| `INT64` | [Int64](/docs/en/sql-reference/data-types/int-uint.md) | `INT64` | +| `FLOAT`, `HALF_FLOAT` | [Float32](/docs/en/sql-reference/data-types/float.md) | `FLOAT32` | +| `DOUBLE` | [Float64](/docs/en/sql-reference/data-types/float.md) | `FLOAT64` | +| `DATE32` | [Date32](/docs/en/sql-reference/data-types/date32.md) | `UINT16` | +| `DATE64` | [DateTime](/docs/en/sql-reference/data-types/datetime.md) | `UINT32` | +| `TIMESTAMP`, `TIME32`, `TIME64` | [DateTime64](/docs/en/sql-reference/data-types/datetime64.md) | `UINT32` | +| `STRING`, `BINARY` | [String](/docs/en/sql-reference/data-types/string.md) | `BINARY` | +| `STRING`, `BINARY` | [FixedString](/docs/en/sql-reference/data-types/fixedstring.md) | `BINARY` | +| `DECIMAL` | [Decimal](/docs/en/sql-reference/data-types/decimal.md) | `DECIMAL` | +| `DECIMAL256` | [Decimal256](/docs/en/sql-reference/data-types/decimal.md) | `DECIMAL256` | +| `LIST` | [Array](/docs/en/sql-reference/data-types/array.md) | `LIST` | +| `STRUCT` | [Tuple](/docs/en/sql-reference/data-types/tuple.md) | `STRUCT` | +| `MAP` | [Map](/docs/en/sql-reference/data-types/map.md) | `MAP` | Arrays can be nested and can have a value of the `Nullable` type as an argument. `Tuple` and `Map` types also can be nested. -The `DICTIONARY` type is supported for `INSERT` queries, and for `SELECT` queries there is an [output_format_arrow_low_cardinality_as_dictionary](../operations/settings/settings.md#output-format-arrow-low-cardinality-as-dictionary) setting that allows to output [LowCardinality](../sql-reference/data-types/lowcardinality.md) type as a `DICTIONARY` type. +The `DICTIONARY` type is supported for `INSERT` queries, and for `SELECT` queries there is an [output_format_arrow_low_cardinality_as_dictionary](/docs/en/operations/settings/settings.md/#output-format-arrow-low-cardinality-as-dictionary) setting that allows to output [LowCardinality](/docs/en/sql-reference/data-types/lowcardinality.md) type as a `DICTIONARY` type. -ClickHouse supports configurable precision of the `Decimal` type. The `INSERT` query treats the Arrow `DECIMAL` type as the ClickHouse `Decimal128` type. +Unsupported Arrow data types: `FIXED_SIZE_BINARY`, `JSON`, `UUID`, `ENUM`. -Unsupported Arrow data types: `TIME32`, `FIXED_SIZE_BINARY`, `JSON`, `UUID`, `ENUM`. - -The data types of ClickHouse table columns do not have to match the corresponding Arrow data fields. When inserting data, ClickHouse interprets data types according to the table above and then [casts](../sql-reference/functions/type-conversion-functions.md#type_conversion_function-cast) the data to the data type set for the ClickHouse table column. +The data types of ClickHouse table columns do not have to match the corresponding Arrow data fields. When inserting data, ClickHouse interprets data types according to the table above and then [casts](/docs/en/sql-reference/functions/type-conversion-functions.md/#type_conversion_function-cast) the data to the data type set for the ClickHouse table column. ### Inserting Data {#inserting-data-arrow} @@ -2019,12 +2019,12 @@ $ clickhouse-client --query="SELECT * FROM {some_table} FORMAT Arrow" > {filenam ### Arrow format settings {#parquet-format-settings} -- [output_format_arrow_low_cardinality_as_dictionary](../operations/settings/settings.md#output_format_arrow_low_cardinality_as_dictionary) - enable output ClickHouse LowCardinality type as Dictionary Arrow type. Default value - `false`. -- [output_format_arrow_string_as_string](../operations/settings/settings.md#output_format_arrow_string_as_string) - use Arrow String type instead of Binary for String columns. Default value - `false`. -- [input_format_arrow_import_nested](../operations/settings/settings.md#input_format_arrow_import_nested) - allow inserting array of structs into Nested table in Arrow input format. Default value - `false`. -- [input_format_arrow_case_insensitive_column_matching](../operations/settings/settings.md#input_format_arrow_case_insensitive_column_matching) - ignore case when matching Arrow columns with ClickHouse columns. Default value - `false`. -- [input_format_arrow_allow_missing_columns](../operations/settings/settings.md#input_format_arrow_allow_missing_columns) - allow missing columns while reading Arrow data. Default value - `false`. -- [input_format_arrow_skip_columns_with_unsupported_types_in_schema_inference](../operations/settings/settings.md#input_format_arrow_skip_columns_with_unsupported_types_in_schema_inference) - allow skipping columns with unsupported types while schema inference for Arrow format. Default value - `false`. +- [output_format_arrow_low_cardinality_as_dictionary](/docs/en/operations/settings/settings.md/#output_format_arrow_low_cardinality_as_dictionary) - enable output ClickHouse LowCardinality type as Dictionary Arrow type. Default value - `false`. +- [output_format_arrow_string_as_string](/docs/en/operations/settings/settings.md/#output_format_arrow_string_as_string) - use Arrow String type instead of Binary for String columns. Default value - `false`. +- [input_format_arrow_import_nested](/docs/en/operations/settings/settings.md/#input_format_arrow_import_nested) - allow inserting array of structs into Nested table in Arrow input format. Default value - `false`. +- [input_format_arrow_case_insensitive_column_matching](/docs/en/operations/settings/settings.md/#input_format_arrow_case_insensitive_column_matching) - ignore case when matching Arrow columns with ClickHouse columns. Default value - `false`. +- [input_format_arrow_allow_missing_columns](/docs/en/operations/settings/settings.md/#input_format_arrow_allow_missing_columns) - allow missing columns while reading Arrow data. Default value - `false`. +- [input_format_arrow_skip_columns_with_unsupported_types_in_schema_inference](/docs/en/operations/settings/settings.md/#input_format_arrow_skip_columns_with_unsupported_types_in_schema_inference) - allow skipping columns with unsupported types while schema inference for Arrow format. Default value - `false`. ## ArrowStream {#data-format-arrow-stream} @@ -2036,35 +2036,30 @@ $ clickhouse-client --query="SELECT * FROM {some_table} FORMAT Arrow" > {filenam ### Data Types Matching {#data-types-matching-orc} -The table below shows supported data types and how they match ClickHouse [data types](../sql-reference/data-types/index.md) in `INSERT` and `SELECT` queries. +The table below shows supported data types and how they match ClickHouse [data types](/docs/en/sql-reference/data-types/index.md) in `INSERT` and `SELECT` queries. -| ORC data type (`INSERT`) | ClickHouse data type | ORC data type (`SELECT`) | -|--------------------------|-----------------------------------------------------|--------------------------| -| `UINT8`, `BOOL` | [UInt8](../sql-reference/data-types/int-uint.md) | `UINT8` | -| `INT8` | [Int8](../sql-reference/data-types/int-uint.md) | `INT8` | -| `UINT16` | [UInt16](../sql-reference/data-types/int-uint.md) | `UINT16` | -| `INT16` | [Int16](../sql-reference/data-types/int-uint.md) | `INT16` | -| `UINT32` | [UInt32](../sql-reference/data-types/int-uint.md) | `UINT32` | -| `INT32` | [Int32](../sql-reference/data-types/int-uint.md) | `INT32` | -| `UINT64` | [UInt64](../sql-reference/data-types/int-uint.md) | `UINT64` | -| `INT64` | [Int64](../sql-reference/data-types/int-uint.md) | `INT64` | -| `FLOAT`, `HALF_FLOAT` | [Float32](../sql-reference/data-types/float.md) | `FLOAT` | -| `DOUBLE` | [Float64](../sql-reference/data-types/float.md) | `DOUBLE` | -| `DATE32` | [Date](../sql-reference/data-types/date.md) | `DATE32` | -| `DATE64`, `TIMESTAMP` | [DateTime](../sql-reference/data-types/datetime.md) | `TIMESTAMP` | -| `STRING`, `BINARY` | [String](../sql-reference/data-types/string.md) | `BINARY` | -| `DECIMAL` | [Decimal](../sql-reference/data-types/decimal.md) | `DECIMAL` | -| `LIST` | [Array](../sql-reference/data-types/array.md) | `LIST` | -| `STRUCT` | [Tuple](../sql-reference/data-types/tuple.md) | `STRUCT` | -| `MAP` | [Map](../sql-reference/data-types/map.md) | `MAP` | +| ORC data type (`INSERT`) | ClickHouse data type | ORC data type (`SELECT`) | +|---------------------------------------|---------------------------------------------------------|--------------------------| +| `Boolean` | [UInt8](/docs/en/sql-reference/data-types/int-uint.md) | `Boolean` | +| `Tinyint` | [Int8](/docs/en/sql-reference/data-types/int-uint.md) | `Tinyint` | +| `Smallint` | [Int16](/docs/en/sql-reference/data-types/int-uint.md) | `Smallint` | +| `Int` | [Int32](/docs/en/sql-reference/data-types/int-uint.md) | `Int` | +| `Bigint` | [Int64](/docs/en/sql-reference/data-types/int-uint.md) | `Bigint` | +| `Float` | [Float32](/docs/en/sql-reference/data-types/float.md) | `Float` | +| `Double` | [Float64](/docs/en/sql-reference/data-types/float.md) | `Double` | +| `Decimal` | [Decimal](/docs/en/sql-reference/data-types/decimal.md) | `Decimal` | +| `Date` | [Date32](/docs/en/sql-reference/data-types/date32.md) | `Date` | +| `Timestamp` | [DateTime64](/docs/en/sql-reference/data-types/datetime64.md) | `Timestamp` | +| `String`, `Char`, `Varchar`, `Binary` | [String](/docs/en/sql-reference/data-types/string.md) | `Binary` | +| `List` | [Array](/docs/en/sql-reference/data-types/array.md) | `List` | +| `Struct` | [Tuple](/docs/en/sql-reference/data-types/tuple.md) | `Struct` | +| `Map` | [Map](/docs/en/sql-reference/data-types/map.md) | `Map` | + +Other types are not supported. Arrays can be nested and can have a value of the `Nullable` type as an argument. `Tuple` and `Map` types also can be nested. -ClickHouse supports configurable precision of the `Decimal` type. The `INSERT` query treats the ORC `DECIMAL` type as the ClickHouse `Decimal128` type. - -Unsupported ORC data types: `TIME32`, `FIXED_SIZE_BINARY`, `JSON`, `UUID`, `ENUM`. - -The data types of ClickHouse table columns do not have to match the corresponding ORC data fields. When inserting data, ClickHouse interprets data types according to the table above and then [casts](../sql-reference/functions/type-conversion-functions.md#type_conversion_function-cast) the data to the data type set for the ClickHouse table column. +The data types of ClickHouse table columns do not have to match the corresponding ORC data fields. When inserting data, ClickHouse interprets data types according to the table above and then [casts](/docs/en/sql-reference/functions/type-conversion-functions.md/#type_conversion_function-cast) the data to the data type set for the ClickHouse table column. ### Inserting Data {#inserting-data-orc} @@ -2084,18 +2079,18 @@ $ clickhouse-client --query="SELECT * FROM {some_table} FORMAT ORC" > {filename. ### Arrow format settings {#parquet-format-settings} -- [output_format_arrow_string_as_string](../operations/settings/settings.md#output_format_arrow_string_as_string) - use Arrow String type instead of Binary for String columns. Default value - `false`. -- [input_format_arrow_import_nested](../operations/settings/settings.md#input_format_arrow_import_nested) - allow inserting array of structs into Nested table in Arrow input format. Default value - `false`. -- [input_format_arrow_case_insensitive_column_matching](../operations/settings/settings.md#input_format_arrow_case_insensitive_column_matching) - ignore case when matching Arrow columns with ClickHouse columns. Default value - `false`. -- [input_format_arrow_allow_missing_columns](../operations/settings/settings.md#input_format_arrow_allow_missing_columns) - allow missing columns while reading Arrow data. Default value - `false`. -- [input_format_arrow_skip_columns_with_unsupported_types_in_schema_inference](../operations/settings/settings.md#input_format_arrow_skip_columns_with_unsupported_types_in_schema_inference) - allow skipping columns with unsupported types while schema inference for Arrow format. Default value - `false`. +- [output_format_arrow_string_as_string](/docs/en/operations/settings/settings.md/#output_format_arrow_string_as_string) - use Arrow String type instead of Binary for String columns. Default value - `false`. +- [input_format_arrow_import_nested](/docs/en/operations/settings/settings.md/#input_format_arrow_import_nested) - allow inserting array of structs into Nested table in Arrow input format. Default value - `false`. +- [input_format_arrow_case_insensitive_column_matching](/docs/en/operations/settings/settings.md/#input_format_arrow_case_insensitive_column_matching) - ignore case when matching Arrow columns with ClickHouse columns. Default value - `false`. +- [input_format_arrow_allow_missing_columns](/docs/en/operations/settings/settings.md/#input_format_arrow_allow_missing_columns) - allow missing columns while reading Arrow data. Default value - `false`. +- [input_format_arrow_skip_columns_with_unsupported_types_in_schema_inference](/docs/en/operations/settings/settings.md/#input_format_arrow_skip_columns_with_unsupported_types_in_schema_inference) - allow skipping columns with unsupported types while schema inference for Arrow format. Default value - `false`. -To exchange data with Hadoop, you can use [HDFS table engine](../engines/table-engines/integrations/hdfs.md). +To exchange data with Hadoop, you can use [HDFS table engine](/docs/en/engines/table-engines/integrations/hdfs.md). ## LineAsString {#lineasstring} -In this format, every line of input data is interpreted as a single string value. This format can only be parsed for table with a single field of type [String](../sql-reference/data-types/string.md). The remaining columns must be set to [DEFAULT](../sql-reference/statements/create/table.md#default) or [MATERIALIZED](../sql-reference/statements/create/table.md#materialized), or omitted. +In this format, every line of input data is interpreted as a single string value. This format can only be parsed for table with a single field of type [String](/docs/en/sql-reference/data-types/string.md). The remaining columns must be set to [DEFAULT](/docs/en/sql-reference/statements/create/table.md/#default) or [MATERIALIZED](/docs/en/sql-reference/statements/create/table.md/#materialized), or omitted. **Example** @@ -2122,9 +2117,9 @@ Each line of imported data is parsed according to the regular expression. When working with the `Regexp` format, you can use the following settings: -- `format_regexp` — [String](../sql-reference/data-types/string.md). Contains regular expression in the [re2](https://github.com/google/re2/wiki/Syntax) format. +- `format_regexp` — [String](/docs/en/sql-reference/data-types/string.md). Contains regular expression in the [re2](https://github.com/google/re2/wiki/Syntax) format. -- `format_regexp_escaping_rule` — [String](../sql-reference/data-types/string.md). The following escaping rules are supported: +- `format_regexp_escaping_rule` — [String](/docs/en/sql-reference/data-types/string.md). The following escaping rules are supported: - CSV (similarly to [CSV](#csv)) - JSON (similarly to [JSONEachRow](#jsoneachrow)) @@ -2132,17 +2127,17 @@ When working with the `Regexp` format, you can use the following settings: - Quoted (similarly to [Values](#data-format-values)) - Raw (extracts subpatterns as a whole, no escaping rules, similarly to [TSVRaw](#tabseparatedraw)) -- `format_regexp_skip_unmatched` — [UInt8](../sql-reference/data-types/int-uint.md). Defines the need to throw an exception in case the `format_regexp` expression does not match the imported data. Can be set to `0` or `1`. +- `format_regexp_skip_unmatched` — [UInt8](/docs/en/sql-reference/data-types/int-uint.md). Defines the need to throw an exception in case the `format_regexp` expression does not match the imported data. Can be set to `0` or `1`. **Usage** -The regular expression from [format_regexp](../operations/settings/settings.md#format_regexp) setting is applied to every line of imported data. The number of subpatterns in the regular expression must be equal to the number of columns in imported dataset. +The regular expression from [format_regexp](/docs/en/operations/settings/settings.md/#format_regexp) setting is applied to every line of imported data. The number of subpatterns in the regular expression must be equal to the number of columns in imported dataset. Lines of the imported data must be separated by newline character `'\n'` or DOS-style newline `"\r\n"`. -The content of every matched subpattern is parsed with the method of corresponding data type, according to [format_regexp_escaping_rule](../operations/settings/settings.md#format_regexp_escaping_rule) setting. +The content of every matched subpattern is parsed with the method of corresponding data type, according to [format_regexp_escaping_rule](/docs/en/operations/settings/settings.md/#format_regexp_escaping_rule) setting. -If the regular expression does not match the line and [format_regexp_skip_unmatched](../operations/settings/settings.md#format_regexp_escaping_rule) is set to 1, the line is silently skipped. Otherwise, exception is thrown. +If the regular expression does not match the line and [format_regexp_skip_unmatched](/docs/en/operations/settings/settings.md/#format_regexp_escaping_rule) is set to 1, the line is silently skipped. Otherwise, exception is thrown. **Example** @@ -2190,25 +2185,25 @@ e.g. `schemafile.proto:MessageType`. If the file has the standard extension for the format (for example, `.proto` for `Protobuf`), it can be omitted and in this case, the format schema looks like `schemafile:MessageType`. -If you input or output data via the [client](../interfaces/cli.md) in the [interactive mode](../interfaces/cli.md#cli_usage), the file name specified in the format schema +If you input or output data via the [client](/docs/en/interfaces/cli.md) in the [interactive mode](/docs/en/interfaces/cli.md/#cli_usage), the file name specified in the format schema can contain an absolute path or a path relative to the current directory on the client. -If you use the client in the [batch mode](../interfaces/cli.md#cli_usage), the path to the schema must be relative due to security reasons. +If you use the client in the [batch mode](/docs/en/interfaces/cli.md/#cli_usage), the path to the schema must be relative due to security reasons. -If you input or output data via the [HTTP interface](../interfaces/http.md) the file name specified in the format schema -should be located in the directory specified in [format_schema_path](../operations/server-configuration-parameters/settings.md#server_configuration_parameters-format_schema_path) +If you input or output data via the [HTTP interface](/docs/en/interfaces/http.md) the file name specified in the format schema +should be located in the directory specified in [format_schema_path](/docs/en/operations/server-configuration-parameters/settings.md/#server_configuration_parameters-format_schema_path) in the server configuration. ## Skipping Errors {#skippingerrors} -Some formats such as `CSV`, `TabSeparated`, `TSKV`, `JSONEachRow`, `Template`, `CustomSeparated` and `Protobuf` can skip broken row if parsing error occurred and continue parsing from the beginning of next row. See [input_format_allow_errors_num](../operations/settings/settings.md#input_format_allow_errors_num) and -[input_format_allow_errors_ratio](../operations/settings/settings.md#input_format_allow_errors_ratio) settings. +Some formats such as `CSV`, `TabSeparated`, `TSKV`, `JSONEachRow`, `Template`, `CustomSeparated` and `Protobuf` can skip broken row if parsing error occurred and continue parsing from the beginning of next row. See [input_format_allow_errors_num](/docs/en/operations/settings/settings.md/#input_format_allow_errors_num) and +[input_format_allow_errors_ratio](/docs/en/operations/settings/settings.md/#input_format_allow_errors_ratio) settings. Limitations: - In case of parsing error `JSONEachRow` skips all data until the new line (or EOF), so rows must be delimited by `\n` to count errors correctly. - `Template` and `CustomSeparated` use delimiter after the last column and delimiter between rows to find the beginning of next row, so skipping errors works only if at least one of them is not empty. ## RawBLOB {#rawblob} -In this format, all input data is read to a single value. It is possible to parse only a table with a single field of type [String](../sql-reference/data-types/string.md) or similar. +In this format, all input data is read to a single value. It is possible to parse only a table with a single field of type [String](/docs/en/sql-reference/data-types/string.md) or similar. The result is output in binary format without delimiters and escaping. If more than one value is output, the format is ambiguous, and it will be impossible to read the data back. Below is a comparison of the formats `RawBLOB` and [TabSeparatedRaw](#tabseparatedraw). @@ -2255,18 +2250,18 @@ ClickHouse supports reading and writing [MessagePack](https://msgpack.org/) data | MessagePack data type (`INSERT`) | ClickHouse data type | MessagePack data type (`SELECT`) | |--------------------------------------------------------------------|-----------------------------------------------------------|------------------------------------| -| `uint N`, `positive fixint` | [UIntN](../sql-reference/data-types/int-uint.md) | `uint N` | -| `int N` | [IntN](../sql-reference/data-types/int-uint.md) | `int N` | -| `bool` | [UInt8](../sql-reference/data-types/int-uint.md) | `uint 8` | -| `fixstr`, `str 8`, `str 16`, `str 32`, `bin 8`, `bin 16`, `bin 32` | [String](../sql-reference/data-types/string.md) | `bin 8`, `bin 16`, `bin 32` | -| `fixstr`, `str 8`, `str 16`, `str 32`, `bin 8`, `bin 16`, `bin 32` | [FixedString](../sql-reference/data-types/fixedstring.md) | `bin 8`, `bin 16`, `bin 32` | -| `float 32` | [Float32](../sql-reference/data-types/float.md) | `float 32` | -| `float 64` | [Float64](../sql-reference/data-types/float.md) | `float 64` | -| `uint 16` | [Date](../sql-reference/data-types/date.md) | `uint 16` | -| `uint 32` | [DateTime](../sql-reference/data-types/datetime.md) | `uint 32` | -| `uint 64` | [DateTime64](../sql-reference/data-types/datetime.md) | `uint 64` | -| `fixarray`, `array 16`, `array 32` | [Array](../sql-reference/data-types/array.md) | `fixarray`, `array 16`, `array 32` | -| `fixmap`, `map 16`, `map 32` | [Map](../sql-reference/data-types/map.md) | `fixmap`, `map 16`, `map 32` | +| `uint N`, `positive fixint` | [UIntN](/docs/en/sql-reference/data-types/int-uint.md) | `uint N` | +| `int N`, `negative fixint` | [IntN](/docs/en/sql-reference/data-types/int-uint.md) | `int N` | +| `bool` | [UInt8](/docs/en/sql-reference/data-types/int-uint.md) | `uint 8` | +| `fixstr`, `str 8`, `str 16`, `str 32`, `bin 8`, `bin 16`, `bin 32` | [String](/docs/en/sql-reference/data-types/string.md) | `bin 8`, `bin 16`, `bin 32` | +| `fixstr`, `str 8`, `str 16`, `str 32`, `bin 8`, `bin 16`, `bin 32` | [FixedString](/docs/en/sql-reference/data-types/fixedstring.md) | `bin 8`, `bin 16`, `bin 32` | +| `float 32` | [Float32](/docs/en/sql-reference/data-types/float.md) | `float 32` | +| `float 64` | [Float64](/docs/en/sql-reference/data-types/float.md) | `float 64` | +| `uint 16` | [Date](/docs/en/sql-reference/data-types/date.md) | `uint 16` | +| `uint 32` | [DateTime](/docs/en/sql-reference/data-types/datetime.md) | `uint 32` | +| `uint 64` | [DateTime64](/docs/en/sql-reference/data-types/datetime.md) | `uint 64` | +| `fixarray`, `array 16`, `array 32` | [Array](/docs/en/sql-reference/data-types/array.md) | `fixarray`, `array 16`, `array 32` | +| `fixmap`, `map 16`, `map 32` | [Map](/docs/en/sql-reference/data-types/map.md) | `fixmap`, `map 16`, `map 32` | Example: @@ -2280,17 +2275,17 @@ $ clickhouse-client --query="SELECT * FROM msgpack FORMAT MsgPack" > tmp_msgpack ### MsgPack format settings {#msgpack-format-settings} -- [input_format_msgpack_number_of_columns](../operations/settings/settings.md#input_format_msgpack_number_of_columns) - the number of columns in inserted MsgPack data. Used for automatic schema inference from data. Default value - `0`. -- [output_format_msgpack_uuid_representation](../operations/settings/settings.md#output_format_msgpack_uuid_representation) - the way how to output UUID in MsgPack format. Default value - `EXT`. +- [input_format_msgpack_number_of_columns](/docs/en/operations/settings/settings.md/#input_format_msgpack_number_of_columns) - the number of columns in inserted MsgPack data. Used for automatic schema inference from data. Default value - `0`. +- [output_format_msgpack_uuid_representation](/docs/en/operations/settings/settings.md/#output_format_msgpack_uuid_representation) - the way how to output UUID in MsgPack format. Default value - `EXT`. ## MySQLDump {#mysqldump} ClickHouse supports reading MySQL [dumps](https://dev.mysql.com/doc/refman/8.0/en/mysqldump.html). It reads all data from INSERT queries belonging to one table in dump. If there are more than one table, by default it reads data from the first one. -You can specify the name of the table from which to read data from using [input_format_mysql_dump_table_name](../operations/settings/settings.md#input_format_mysql_dump_table_name) settings. -If setting [input_format_mysql_dump_map_columns](../operations/settings/settings.md#input_format_mysql_dump_map_columns) is set to 1 and +You can specify the name of the table from which to read data from using [input_format_mysql_dump_table_name](/docs/en/operations/settings/settings.md/#input_format_mysql_dump_table_name) settings. +If setting [input_format_mysql_dump_map_columns](/docs/en/operations/settings/settings.md/#input_format_mysql_dump_map_columns) is set to 1 and dump contains CREATE query for specified table or column names in INSERT query the columns from input data will be mapped to the columns from the table by their names, -columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](../operations/settings/settings.md#input_format_skip_unknown_fields) is set to 1. +columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings.md/#input_format_skip_unknown_fields) is set to 1. This format supports schema inference: if the dump contains CREATE query for the specified table, the structure is extracted from it, otherwise schema is inferred from the data of INSERT queries. Examples: @@ -2349,3 +2344,5 @@ Query id: 17d59664-ebce-4053-bb79-d46a516fb590 │ 3 │ └───┘ ``` + +[Original article](https://clickhouse.com/docs/en/interfaces/formats) diff --git a/docs/en/interfaces/schema-inference.md b/docs/en/interfaces/schema-inference.md new file mode 100644 index 00000000000..394e6906a23 --- /dev/null +++ b/docs/en/interfaces/schema-inference.md @@ -0,0 +1,1573 @@ +--- +slug: /en/interfaces/schema-inference +sidebar_position: 21 +sidebar_label: Schema inference +title: Automatic schema inference from input data +--- + +ClickHouse can automatically determine the structure of input data in almost all supported [Input formats](formats.md). +This document will describe when schema inference is used, how it works with different input formats and which settings +can control it. + +## Usage {#usage} + +Schema inference is used when ClickHouse needs to read the data in a specific data format and the structure is unknown. + +## Table functions [file](../sql-reference/table-functions/file.md), [s3](../sql-reference/table-functions/s3.md), [url](../sql-reference/table-functions/url.md), [hdfs](../sql-reference/table-functions/hdfs.md). + +These table functions have the optional argument `structure` with the structure of input data. If this argument is not specified or set to `auto`, the structure will be inferred from the data. + +**Example:** + +Let's say we have a file `hobbies.jsonl` in JSONEachRow format in the `user_files` directory with this content: +```json +{"id" : 1, "age" : 25, "name" : "Josh", "hobbies" : ["football", "cooking", "music"]} +{"id" : 2, "age" : 19, "name" : "Alan", "hobbies" : ["tennis", "art"]} +{"id" : 3, "age" : 32, "name" : "Lana", "hobbies" : ["fitness", "reading", "shopping"]} +{"id" : 4, "age" : 47, "name" : "Brayan", "hobbies" : ["movies", "skydiving"]} +``` + +ClickHouse can read this data without you specifying its structure: +```sql +SELECT * FROM file('hobbies.jsonl') +``` +```response +┌─id─┬─age─┬─name───┬─hobbies──────────────────────────┐ +│ 1 │ 25 │ Josh │ ['football','cooking','music'] │ +│ 2 │ 19 │ Alan │ ['tennis','art'] │ +│ 3 │ 32 │ Lana │ ['fitness','reading','shopping'] │ +│ 4 │ 47 │ Brayan │ ['movies','skydiving'] │ +└────┴─────┴────────┴──────────────────────────────────┘ +``` + +Note: the format `JSONEachRow` was automatically determined by the file extension `.jsonl`. + +You can see an automatically determined structure using the `DESCRIBE` query: +```sql +DESCRIBE file('hobbies.jsonl') +``` +```response +┌─name────┬─type────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ id │ Nullable(Int64) │ │ │ │ │ │ +│ age │ Nullable(Int64) │ │ │ │ │ │ +│ name │ Nullable(String) │ │ │ │ │ │ +│ hobbies │ Array(Nullable(String)) │ │ │ │ │ │ +└─────────┴─────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +``` + +## Table engines [File](../engines/table-engines/special/file.md), [S3](../engines/table-engines/integrations/s3.md), [URL](../engines/table-engines/special/url.md), [HDFS](../engines/table-engines/integrations/hdfs.md) + +If the list of columns is not specified in `CREATE TABLE` query, the structure of the table will be inferred automatically from the data. + +**Example:** + +Let's use the file `hobbies.jsonl`. We can create a table with engine `File` with the data from this file: +```sql +CREATE TABLE hobbies ENGINE=File(JSONEachRow, 'hobbies.jsonl') +``` +```response +Ok. +``` +```sql +SELECT * FROM hobbies +``` +```response +┌─id─┬─age─┬─name───┬─hobbies──────────────────────────┐ +│ 1 │ 25 │ Josh │ ['football','cooking','music'] │ +│ 2 │ 19 │ Alan │ ['tennis','art'] │ +│ 3 │ 32 │ Lana │ ['fitness','reading','shopping'] │ +│ 4 │ 47 │ Brayan │ ['movies','skydiving'] │ +└────┴─────┴────────┴──────────────────────────────────┘ +``` +```sql +DESCRIBE TABLE hobbies +``` +```response +┌─name────┬─type────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ id │ Nullable(Int64) │ │ │ │ │ │ +│ age │ Nullable(Int64) │ │ │ │ │ │ +│ name │ Nullable(String) │ │ │ │ │ │ +│ hobbies │ Array(Nullable(String)) │ │ │ │ │ │ +└─────────┴─────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +``` + +## clickhouse-local + +`clickhouse-local` has an optional parameter `-S/--structure` with the structure of input data. If this parameter is not specified or set to `auto`, the structure will be inferred from the data. + +**Example:** + +Let's use the file `hobbies.jsonl`. We can query the data from this file using `clickhouse-local`: +```shell +clickhouse-local --file='hobbies.jsonl' --table='hobbies' --query='DESCRIBE TABLE hobbies' +``` +```response +id Nullable(Int64) +age Nullable(Int64) +name Nullable(String) +hobbies Array(Nullable(String)) +``` +```shell +clickhouse-local --file='hobbies.jsonl' --table='hobbies' --query='SELECT * FROM hobbies' +``` +```response +1 25 Josh ['football','cooking','music'] +2 19 Alan ['tennis','art'] +3 32 Lana ['fitness','reading','shopping'] +4 47 Brayan ['movies','skydiving'] +``` + +# Using structure from insertion table {#using-structure-from-insertion-table} + +When table functions `file/s3/url/hdfs` are used to insert data into a table, +there is an option to use the structure from the insertion table instead of extracting it from the data. +It can improve insertion performance because schema inference can take some time. Also, it will be helpful when the table has an optimized schema, so +no conversions between types will be performed. + +There is a special setting [use_structure_from_insertion_table_in_table_functions](/docs/en/operations/settings/settings.md/#use_structure_from_insertion_table_in_table_functions) +that controls this behaviour. It has 3 possible values: +- 0 - table function will extract the structure from the data. +- 1 - table function will use the structure from the insertion table. +- 2 - ClickHouse will automatically determine if it's possible to use the structure from the insertion table or use schema inference. Default value. + +**Example 1:** + +Let's create table `hobbies1` with the next structure: +```sql +CREATE TABLE hobbies1 +( + `id` UInt64, + `age` LowCardinality(UInt8), + `name` String, + `hobbies` Array(String) +) +ENGINE = MergeTree +ORDER BY id; +``` + +And insert data from the file `hobbies.jsonl`: + +```sql +INSERT INTO hobbies1 SELECT * FROM file(hobbies.jsonl) +``` + +In this case, all columns from the file are inserted into the table without changes, so ClickHouse will use the structure from the insertion table instead of schema inference. + +**Example 2:** + +Let's create table `hobbies2` with the next structure: +```sql +CREATE TABLE hobbies2 +( + `id` UInt64, + `age` LowCardinality(UInt8), + `hobbies` Array(String) +) + ENGINE = MergeTree +ORDER BY id; +``` + +And insert data from the file `hobbies.jsonl`: + +```sql +INSERT INTO hobbies2 SELECT id, age, hobbies FROM file(hobbies.jsonl) +``` + +In this case, all columns in the `SELECT` query are present in the table, so ClickHouse will use the structure from the insertion table. +Note that it will work only for input formats that support reading a subset of columns like JSONEachRow, TSKV, Parquet, etc. (so it won't work for example for TSV format). + +**Example 3:** + +Let's create table `hobbies3` with the next structure: + +```sql +CREATE TABLE hobbies3 +( + `identifier` UInt64, + `age` LowCardinality(UInt8), + `hobbies` Array(String) +) + ENGINE = MergeTree +ORDER BY identifier; +``` + +And insert data from the file `hobbies.jsonl`: + +```sql +INSERT INTO hobbies3 SELECT id, age, hobbies FROM file(hobbies.jsonl) +``` + +In this case, column `id` is used in the `SELECT` query, but the table doesn't have this column (it has a column with the name `identifier`), +so ClickHouse cannot use the structure from the insertion table, and schema inference will be used. + +**Example 4:** + +Let's create table `hobbies4` with the next structure: + +```sql +CREATE TABLE hobbies4 +( + `id` UInt64, + `any_hobby` Nullable(String) +) + ENGINE = MergeTree +ORDER BY id; +``` + +And insert data from the file `hobbies.jsonl`: + +```sql +INSERT INTO hobbies4 SELECT id, empty(hobbies) ? NULL : hobbies[1] FROM file(hobbies.jsonl) +``` + +In this case, there are some operations performed on the column `hobbies` in the `SELECT` query to insert it into the table, so ClickHouse cannot use the structure from the insertion table, and schema inference will be used. + +# Schema inference cache {#schema-inference-cache} + +For most input formats schema inference reads some data to determine its structure and this process can take some time. +To prevent inferring the same schema every time ClickHouse read the data from the same file, the inferred schema is cached and when accessing the same file again, ClickHouse will use the schema from the cache. + +There are special settings that control this cache: +- `schema_inference_cache_max_elements_for_{file/s3/hdfs/url}` - the maximum number of cached schemas for the corresponding table function. The default value is `4096`. These settings should be set in the server config. +- `use_cache_for_{file,s3,hdfs,url}_schema_inference` - allows turning on/off using cache for schema inference. These settings can be used in queries. + +The schema of the file can be changed by modifying the data or by changing format settings. +For this reason, the schema inference cache identifies the schema by file source, format name, used format settings, and the last modification time of the file. + +Note: some files accessed by url in `url` table function may not contain information about the last modification time; for this case, there is a special setting +`schema_inference_cache_require_modification_time_for_url`. Disabling this setting allows the use of the schema from cache without the last modification time for such files. + +There is also a system table [schema_inference_cache](../operations/system-tables/schema_inference_cache.md) with all current schemas in cache and system query `SYSTEM DROP SCHEMA CACHE [FOR File/S3/URL/HDFS]` +that allows cleaning the schema cache for all sources, or for a specific source. + +**Examples:** + +Let's try to infer the structure of a sample dataset from s3 `github-2022.ndjson.gz` and see how the schema inference cache works: + +```sql +DESCRIBE TABLE s3('https://datasets-documentation.s3.eu-west-3.amazonaws.com/github/github-2022.ndjson.gz') +SETTINGS allow_experimental_object_type = 1 +``` +```response +┌─name───────┬─type─────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ type │ Nullable(String) │ │ │ │ │ │ +│ actor │ Object(Nullable('json')) │ │ │ │ │ │ +│ repo │ Object(Nullable('json')) │ │ │ │ │ │ +│ created_at │ Nullable(String) │ │ │ │ │ │ +│ payload │ Object(Nullable('json')) │ │ │ │ │ │ +└────────────┴──────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ + +5 rows in set. Elapsed: 0.601 sec. +``` +```sql +DESCRIBE TABLE s3('https://datasets-documentation.s3.eu-west-3.amazonaws.com/github/github-2022.ndjson.gz') +SETTINGS allow_experimental_object_type = 1 +``` +```response +┌─name───────┬─type─────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ type │ Nullable(String) │ │ │ │ │ │ +│ actor │ Object(Nullable('json')) │ │ │ │ │ │ +│ repo │ Object(Nullable('json')) │ │ │ │ │ │ +│ created_at │ Nullable(String) │ │ │ │ │ │ +│ payload │ Object(Nullable('json')) │ │ │ │ │ │ +└────────────┴──────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ + +5 rows in set. Elapsed: 0.059 sec. +``` + +As you can see, the second query succeeded almost instantly. + +Let's try to change some settings that can affect inferred schema: + +```sql +DESCRIBE TABLE s3('https://datasets-documentation.s3.eu-west-3.amazonaws.com/github/github-2022.ndjson.gz') +SETTINGS input_format_json_read_objects_as_strings = 1 + +┌─name───────┬─type─────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ type │ Nullable(String) │ │ │ │ │ │ +│ actor │ Nullable(String) │ │ │ │ │ │ +│ repo │ Nullable(String) │ │ │ │ │ │ +│ created_at │ Nullable(String) │ │ │ │ │ │ +│ payload │ Nullable(String) │ │ │ │ │ │ +└────────────┴──────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ + +5 rows in set. Elapsed: 0.611 sec +``` + +As you can see, the schema from the cache was not used for the same file, because the setting that can affect inferred schema was changed. + +Let's check the content of `system.schema_inference_cache` table: + +```sql +SELECT schema, format, source FROM system.schema_inference_cache WHERE storage='S3' +``` +```response +┌─schema──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┬─format─┬─source───────────────────────────────────────────────────────────────────────────────────────────────────┐ +│ type Nullable(String), actor Object(Nullable('json')), repo Object(Nullable('json')), created_at Nullable(String), payload Object(Nullable('json')) │ NDJSON │ datasets-documentation.s3.eu-west-3.amazonaws.com443/datasets-documentation/github/github-2022.ndjson.gz │ +│ type Nullable(String), actor Nullable(String), repo Nullable(String), created_at Nullable(String), payload Nullable(String) │ NDJSON │ datasets-documentation.s3.eu-west-3.amazonaws.com443/datasets-documentation/github/github-2022.ndjson.gz │ +└─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┴────────┴──────────────────────────────────────────────────────────────────────────────────────────────────────────┘ +``` + +As you can see, there are two different schemas for the same file. + +We can clear the schema cache using a system query: +```sql +SYSTEM DROP SCHEMA CACHE FOR S3 +``` +```response +Ok. +``` +```sql +SELECT count() FROM system.schema_inference_cache WHERE storage='S3' +``` +```response +┌─count()─┐ +│ 0 │ +└─────────┘ +``` + +# Text formats {#text-formats} + +For text formats, ClickHouse reads the data row by row, extracts column values according to the format, +and then uses some recursive parsers and heuristics to determine the type for each value. The maximum number of rows read from the data in schema inference +is controlled by the setting `input_format_max_rows_to_read_for_schema_inference` with default value 25000. +By default, all inferred types are [Nullable](../sql-reference/data-types/nullable.md), but you can change this by setting `schema_inference_make_columns_nullable` (see examples in the [settings](#settings-for-text-formats) section). + +## JSON formats {#json-formats} + +In JSON formats ClickHouse parses values according to the JSON specification and then tries to find the most appropriate data type for them. + +Let's see how it works, what types can be inferred and what specific settings can be used in JSON formats. + +**Examples** + +Here and further, the [format](../sql-reference/table-functions/format.md) table function will be used in examples. + +Integers, Floats, Bools, Strings: +```sql +DESC format(JSONEachRow, '{"int" : 42, "float" : 42.42, "string" : "Hello, World!"}'); +``` +```response +┌─name───┬─type──────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ int │ Nullable(Int64) │ │ │ │ │ │ +│ float │ Nullable(Float64) │ │ │ │ │ │ +│ bool │ Nullable(Bool) │ │ │ │ │ │ +│ string │ Nullable(String) │ │ │ │ │ │ +└────────┴───────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +``` + +Dates, DateTimes: + +```sql +DESC format(JSONEachRow, '{"date" : "2022-01-01", "datetime" : "2022-01-01 00:00:00"}') +``` +```response +┌─name─────┬─type────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ date │ Nullable(Date) │ │ │ │ │ │ +│ datetime │ Nullable(DateTime64(9)) │ │ │ │ │ │ +└──────────┴─────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +``` + +Arrays: +```sql +DESC format(JSONEachRow, '{"arr" : [1, 2, 3], "nested_arrays" : [[1, 2, 3], [4, 5, 6], []]}') +``` +```response +┌─name──────────┬─type──────────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ arr │ Array(Nullable(Int64)) │ │ │ │ │ │ +│ nested_arrays │ Array(Array(Nullable(Int64))) │ │ │ │ │ │ +└───────────────┴───────────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +``` + +If an array contains `null`, ClickHouse will use types from the other array elements: +```sql +DESC format(JSONEachRow, '{"arr" : [null, 42, null]}') +``` +```response +┌─name─┬─type───────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ arr │ Array(Nullable(Int64)) │ │ │ │ │ │ +└──────┴────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +``` + +Tuples: + +In JSON formats we treat Arrays with elements of different types as Tuples. +```sql +DESC format(JSONEachRow, '{"tuple" : [1, "Hello, World!", [1, 2, 3]]}') +``` +```response +┌─name──┬─type─────────────────────────────────────────────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ tuple │ Tuple(Nullable(Int64), Nullable(String), Array(Nullable(Int64))) │ │ │ │ │ │ +└───────┴──────────────────────────────────────────────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +``` + +If some values are `null` or empty, we use types of corresponding values from the other rows: +```sql +DESC format(JSONEachRow, $$ + {"tuple" : [1, null, null]} + {"tuple" : [null, "Hello, World!", []]} + {"tuple" : [null, null, [1, 2, 3]]} + $$) +``` +```response +┌─name──┬─type─────────────────────────────────────────────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ tuple │ Tuple(Nullable(Int64), Nullable(String), Array(Nullable(Int64))) │ │ │ │ │ │ +└───────┴──────────────────────────────────────────────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +``` + +Maps: + +In JSON we can read objects with values of the same type as Map type. +```sql +DESC format(JSONEachRow, '{"map" : {"key1" : 42, "key2" : 24, "key3" : 4}}') +``` +```response +┌─name─┬─type─────────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ map │ Map(String, Nullable(Int64)) │ │ │ │ │ │ +└──────┴──────────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +``` + +JSON Object type (if setting `allow_experimental_object_type` is enabled): + +```sql +SET allow_experimental_object_type = 1 +DESC format(JSONEachRow, $$ + {"obj" : {"key1" : 42}} + {"obj" : {"key2" : "Hello, World!"}} + {"obj" : {"key1" : 24, "key3" : {"a" : 42, "b" : null}}} + $$) +``` +```response +┌─name─┬─type─────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ obj │ Object(Nullable('json')) │ │ │ │ │ │ +└──────┴──────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +``` + +Nested complex types: +```sql +DESC format(JSONEachRow, '{"value" : [[[42, 24], []], {"key1" : 42, "key2" : 24}]}') +``` +```response +┌─name──┬─type───────────────────────────────────────────────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ value │ Tuple(Array(Array(Nullable(Int64))), Map(String, Nullable(Int64))) │ │ │ │ │ │ +└───────┴────────────────────────────────────────────────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +``` + +If ClickHouse cannot determine the type, because the data contains only nulls, an exception will be thrown: +```sql +DESC format(JSONEachRow, '{"arr" : [null, null]}') +``` +```response +Code: 652. DB::Exception: Received from localhost:9000. DB::Exception: +Cannot determine type for column 'arr' by first 1 rows of data, +most likely this column contains only Nulls or empty Arrays/Maps. +... +``` + +### JSON settings {#json-settings} + +#### input_format_json_read_objects_as_strings + +Enabling this setting allows reading nested JSON objects as strings. +This setting can be used to read nested JSON objects without using JSON object type. + +This setting is enabled by default. + +```sql +SET input_format_json_read_objects_as_strings = 1; +DESC format(JSONEachRow, $$ + {"obj" : {"key1" : 42, "key2" : [1,2,3,4]}} + {"obj" : {"key3" : {"nested_key" : 1}}} + $$) +``` +```response +┌─name─┬─type─────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ obj │ Nullable(String) │ │ │ │ │ │ +└──────┴──────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +``` + +#### input_format_json_try_infer_numbers_from_strings + +Enabling this setting allows inferring numbers from string values. + +This setting is enabled by default. + +**Example:** + +```sql +SET input_format_json_try_infer_numbers_from_strings = 1; +DESC format(JSONEachRow, $$ + {"value" : "42"} + {"value" : "424242424242"} + $$) +``` +```reponse +┌─name──┬─type────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ value │ Nullable(Int64) │ │ │ │ │ │ +└───────┴─────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +``` + +#### input_format_json_read_numbers_as_strings + +Enabling this setting allows reading numeric values as strings. + +This setting is disabled by default. + +**Example** + +```sql +SET input_format_json_read_numbers_as_strings = 1; +DESC format(JSONEachRow, $$ + {"value" : 1055} + {"value" : "unknown"} + $$) +``` +```response +┌─name──┬─type─────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ value │ Nullable(String) │ │ │ │ │ │ +└───────┴──────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +``` + +#### input_format_json_read_bools_as_numbers + +Enabling this setting allows reading Bool values as numbers. + +This setting is enabled by default. + +**Example:** + +```sql +SET input_format_json_read_bools_as_numbers = 1; +DESC format(JSONEachRow, $$ + {"value" : true} + {"value" : 42} + $$) +``` +```response +┌─name──┬─type────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ value │ Nullable(Int64) │ │ │ │ │ │ +└───────┴─────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +``` + +## CSV {#csv} + +In CSV format ClickHouse extracts column values from the row according to delimiters. ClickHouse expects all types except numbers and strings to be enclosed in double quotes. If the value is in double quotes, ClickHouse tries to parse +the data inside quotes using the recursive parser and then tries to find the most appropriate data type for it. If the value is not in double quotes, ClickHouse tries to parse it as a number, +and if the value is not a number, ClickHouse treats it as a string. + +If you don't want ClickHouse to try to determine complex types using some parsers and heuristics, you can disable setting `input_format_csv_use_best_effort_in_schema_inference` +and ClickHouse will treat all columns as Strings. + +**Examples:** + +Integers, Floats, Bools, Strings: +```sql +DESC format(CSV, '42,42.42,true,"Hello,World!"') +``` +```response +┌─name─┬─type──────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ c1 │ Nullable(Int64) │ │ │ │ │ │ +│ c2 │ Nullable(Float64) │ │ │ │ │ │ +│ c3 │ Nullable(Bool) │ │ │ │ │ │ +│ c4 │ Nullable(String) │ │ │ │ │ │ +└──────┴───────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +``` + +Strings without quotes: +```sql +DESC format(CSV, 'Hello world!,World hello!') +``` +```response +┌─name─┬─type─────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ c1 │ Nullable(String) │ │ │ │ │ │ +│ c2 │ Nullable(String) │ │ │ │ │ │ +└──────┴──────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +``` + +Dates, DateTimes: + +```sql +DESC format(CSV, '"2020-01-01","2020-01-01 00:00:00"') +``` +```response +┌─name─┬─type────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ c1 │ Nullable(Date) │ │ │ │ │ │ +│ c2 │ Nullable(DateTime64(9)) │ │ │ │ │ │ +└──────┴─────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +``` + +Arrays: +```sql +DESC format(CSV, '"[1,2,3]","[[1, 2], [], [3, 4]]"') +``` +```response +┌─name─┬─type──────────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ c1 │ Array(Nullable(Int64)) │ │ │ │ │ │ +│ c2 │ Array(Array(Nullable(Int64))) │ │ │ │ │ │ +└──────┴───────────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +``` +```sql +DESC format(CSV, $$"['Hello', 'world']","[['Abc', 'Def'], []]"$$) +``` +```response +┌─name─┬─type───────────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ c1 │ Array(Nullable(String)) │ │ │ │ │ │ +│ c2 │ Array(Array(Nullable(String))) │ │ │ │ │ │ +└──────┴────────────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +``` + +If an array contains null, ClickHouse will use types from the other array elements: +```sql +DESC format(CSV, '"[NULL, 42, NULL]"') +``` +```response +┌─name─┬─type───────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ c1 │ Array(Nullable(Int64)) │ │ │ │ │ │ +└──────┴────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +``` + +Maps: +```sql +DESC format(CSV, $$"{'key1' : 42, 'key2' : 24}"$$) +``` +```response +┌─name─┬─type─────────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ c1 │ Map(String, Nullable(Int64)) │ │ │ │ │ │ +└──────┴──────────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +``` + +Nested Arrays and Maps: +```sql +DESC format(CSV, $$"[{'key1' : [[42, 42], []], 'key2' : [[null], [42]]}]"$$) +``` +```response +┌─name─┬─type──────────────────────────────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ c1 │ Array(Map(String, Array(Array(Nullable(Int64))))) │ │ │ │ │ │ +└──────┴───────────────────────────────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +``` + +If ClickHouse cannot determine the type inside quotes, because the data contains only nulls, ClickHouse will treat it as String: +```sql +DESC format(CSV, '"[NULL, NULL]"') +``` +```response +┌─name─┬─type─────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ c1 │ Nullable(String) │ │ │ │ │ │ +└──────┴──────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +``` + +Example with disabled setting `input_format_csv_use_best_effort_in_schema_inference`: +```sql +SET input_format_csv_use_best_effort_in_schema_inference = 0 +DESC format(CSV, '"[1,2,3]",42.42,Hello World!') +``` +```response +┌─name─┬─type─────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ c1 │ Nullable(String) │ │ │ │ │ │ +│ c2 │ Nullable(String) │ │ │ │ │ │ +│ c3 │ Nullable(String) │ │ │ │ │ │ +└──────┴──────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +``` + +## TSV/TSKV {#tsv-tskv} + +In TSV/TSKV formats ClickHouse extracts column value from the row according to tabular delimiters and then parses extracted value using +the recursive parser to determine the most appropriate type. If the type cannot be determined, ClickHouse treats this value as String. + +If you don't want ClickHouse to try to determine complex types using some parsers and heuristics, you can disable setting `input_format_tsv_use_best_effort_in_schema_inference` +and ClickHouse will treat all columns as Strings. + + +**Examples:** + +Integers, Floats, Bools, Strings: +```sql +DESC format(TSV, '42 42.42 true Hello,World!') +``` +```response +┌─name─┬─type──────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ c1 │ Nullable(Int64) │ │ │ │ │ │ +│ c2 │ Nullable(Float64) │ │ │ │ │ │ +│ c3 │ Nullable(Bool) │ │ │ │ │ │ +│ c4 │ Nullable(String) │ │ │ │ │ │ +└──────┴───────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +``` +```sql +DESC format(TSKV, 'int=42 float=42.42 bool=true string=Hello,World!\n') +``` +```response +┌─name───┬─type──────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ int │ Nullable(Int64) │ │ │ │ │ │ +│ float │ Nullable(Float64) │ │ │ │ │ │ +│ bool │ Nullable(Bool) │ │ │ │ │ │ +│ string │ Nullable(String) │ │ │ │ │ │ +└────────┴───────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +``` + +Dates, DateTimes: + +```sql +DESC format(TSV, '2020-01-01 2020-01-01 00:00:00') +``` +```response +┌─name─┬─type────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ c1 │ Nullable(Date) │ │ │ │ │ │ +│ c2 │ Nullable(DateTime64(9)) │ │ │ │ │ │ +└──────┴─────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +``` + +Arrays: +```sql +DESC format(TSV, '[1,2,3] [[1, 2], [], [3, 4]]') +``` +```response +┌─name─┬─type──────────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ c1 │ Array(Nullable(Int64)) │ │ │ │ │ │ +│ c2 │ Array(Array(Nullable(Int64))) │ │ │ │ │ │ +└──────┴───────────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +``` +```sql +DESC format(TSV, '[''Hello'', ''world''] [[''Abc'', ''Def''], []]') +``` +```response +┌─name─┬─type───────────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ c1 │ Array(Nullable(String)) │ │ │ │ │ │ +│ c2 │ Array(Array(Nullable(String))) │ │ │ │ │ │ +└──────┴────────────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +``` + +If an array contains null, ClickHouse will use types from the other array elements: +```sql +DESC format(TSV, '[NULL, 42, NULL]') +``` +```response +┌─name─┬─type───────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ c1 │ Array(Nullable(Int64)) │ │ │ │ │ │ +└──────┴────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +``` + +Tuples: +```sql +DESC format(TSV, $$(42, 'Hello, world!')$$) +``` +```response +┌─name─┬─type─────────────────────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ c1 │ Tuple(Nullable(Int64), Nullable(String)) │ │ │ │ │ │ +└──────┴──────────────────────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +``` + +Maps: +```sql +DESC format(TSV, $${'key1' : 42, 'key2' : 24}$$) +``` +```response +┌─name─┬─type─────────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ c1 │ Map(String, Nullable(Int64)) │ │ │ │ │ │ +└──────┴──────────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +``` + +Nested Arrays, Tuples and Maps: +```sql +DESC format(TSV, $$[{'key1' : [(42, 'Hello'), (24, NULL)], 'key2' : [(NULL, ','), (42, 'world!')]}]$$) +``` +```response +┌─name─┬─type────────────────────────────────────────────────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ c1 │ Array(Map(String, Array(Tuple(Nullable(Int64), Nullable(String))))) │ │ │ │ │ │ +└──────┴─────────────────────────────────────────────────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +``` + +If ClickHouse cannot determine the type, because the data contains only nulls, ClickHouse will treat it as String: +```sql +DESC format(TSV, '[NULL, NULL]') +``` +```response +┌─name─┬─type─────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ c1 │ Nullable(String) │ │ │ │ │ │ +└──────┴──────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +``` + +Example with disabled setting `input_format_tsv_use_best_effort_in_schema_inference`: +```sql +SET input_format_tsv_use_best_effort_in_schema_inference = 0 +DESC format(TSV, '[1,2,3] 42.42 Hello World!') +``` +```response +┌─name─┬─type─────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ c1 │ Nullable(String) │ │ │ │ │ │ +│ c2 │ Nullable(String) │ │ │ │ │ │ +│ c3 │ Nullable(String) │ │ │ │ │ │ +└──────┴──────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +``` + +## Values {#values} + +In Values format ClickHouse extracts column value from the row and then parses it using +the recursive parser similar to how literals are parsed. + +**Examples:** + +Integers, Floats, Bools, Strings: +```sql +DESC format(Values, $$(42, 42.42, true, 'Hello,World!')$$) +``` +```response +┌─name─┬─type──────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ c1 │ Nullable(Int64) │ │ │ │ │ │ +│ c2 │ Nullable(Float64) │ │ │ │ │ │ +│ c3 │ Nullable(Bool) │ │ │ │ │ │ +│ c4 │ Nullable(String) │ │ │ │ │ │ +└──────┴───────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +``` + +Dates, DateTimes: + +```sql +DESC format(Values, $$('2020-01-01', '2020-01-01 00:00:00')$$) +``` +```response +┌─name─┬─type────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ c1 │ Nullable(Date) │ │ │ │ │ │ +│ c2 │ Nullable(DateTime64(9)) │ │ │ │ │ │ +└──────┴─────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +``` + +Arrays: +```sql +DESC format(Values, '([1,2,3], [[1, 2], [], [3, 4]])') +``` +```response +┌─name─┬─type──────────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ c1 │ Array(Nullable(Int64)) │ │ │ │ │ │ +│ c2 │ Array(Array(Nullable(Int64))) │ │ │ │ │ │ +└──────┴───────────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +``` + +If an array contains null, ClickHouse will use types from the other array elements: +```sql +DESC format(Values, '([NULL, 42, NULL])') +``` +```response +┌─name─┬─type───────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ c1 │ Array(Nullable(Int64)) │ │ │ │ │ │ +└──────┴────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +``` + +Tuples: +```sql +DESC format(Values, $$((42, 'Hello, world!'))$$) +``` +```response +┌─name─┬─type─────────────────────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ c1 │ Tuple(Nullable(Int64), Nullable(String)) │ │ │ │ │ │ +└──────┴──────────────────────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +``` + +Maps: +```sql +DESC format(Values, $$({'key1' : 42, 'key2' : 24})$$) +``` +```response +┌─name─┬─type─────────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ c1 │ Map(String, Nullable(Int64)) │ │ │ │ │ │ +└──────┴──────────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +``` + +Nested Arrays, Tuples and Maps: +```sql +DESC format(Values, $$([{'key1' : [(42, 'Hello'), (24, NULL)], 'key2' : [(NULL, ','), (42, 'world!')]}])$$) +``` +```response +┌─name─┬─type────────────────────────────────────────────────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ c1 │ Array(Map(String, Array(Tuple(Nullable(Int64), Nullable(String))))) │ │ │ │ │ │ +└──────┴─────────────────────────────────────────────────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +``` + +If ClickHouse cannot determine the type, because the data contains only nulls, an exception will be thrown: +```sql +DESC format(Values, '([NULL, NULL])') +``` +```response +Code: 652. DB::Exception: Received from localhost:9000. DB::Exception: +Cannot determine type for column 'c1' by first 1 rows of data, +most likely this column contains only Nulls or empty Arrays/Maps. +... +``` + +Example with disabled setting `input_format_tsv_use_best_effort_in_schema_inference`: +```sql +SET input_format_tsv_use_best_effort_in_schema_inference = 0 +DESC format(TSV, '[1,2,3] 42.42 Hello World!') +``` +```response +┌─name─┬─type─────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ c1 │ Nullable(String) │ │ │ │ │ │ +│ c2 │ Nullable(String) │ │ │ │ │ │ +│ c3 │ Nullable(String) │ │ │ │ │ │ +└──────┴──────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +``` + +## CustomSeparated {#custom-separated} + +In CustomSeparated format ClickHouse first extracts all column values from the row according to specified delimiters and then tries to infer +the data type for each value according to escaping rule. + +**Example** + +```sql +SET format_custom_row_before_delimiter = '', + format_custom_row_after_delimiter = '\n', + format_custom_row_between_delimiter = '\n', + format_custom_result_before_delimiter = '\n', + format_custom_result_after_delimiter = '\n', + format_custom_field_delimiter = '', + format_custom_escaping_rule = 'Quoted' + +DESC format(CustomSeparated, $$ +42.42'Some string 1'[1, NULL, 3] + +NULL'Some string 3'[1, 2, NULL] + +$$) +``` +```response +┌─name─┬─type───────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ c1 │ Nullable(Float64) │ │ │ │ │ │ +│ c2 │ Nullable(String) │ │ │ │ │ │ +│ c3 │ Array(Nullable(Int64)) │ │ │ │ │ │ +└──────┴────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +``` + +## Template {#template} + +In Template format ClickHouse first extracts all column values from the row according to the specified template and then tries to infer the +data type for each value according to its escaping rule. + +**Example** + +Let's say we have a file `resultset` with the next content: +``` + +${data} +``` + +And a file `row_format` with the next content: +``` +${column_1:CSV}${column_2:Quoted}${column_3:JSON} +``` + +Then we can make the next queries: + +```sql +SET format_template_rows_between_delimiter = '\n', + format_template_row = 'row_format', + format_template_resultset = 'resultset_format' + +DESC format(Template, $$ +42.42'Some string 1'[1, null, 2] + +\N'Some string 3'[1, 2, null] + +$$) +``` +```response +┌─name─────┬─type───────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ column_1 │ Nullable(Float64) │ │ │ │ │ │ +│ column_2 │ Nullable(String) │ │ │ │ │ │ +│ column_3 │ Array(Nullable(Int64)) │ │ │ │ │ │ +└──────────┴────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +``` + +## Regexp {#regexp} + +Similar to Template, in Regexp format ClickHouse first extracts all column values from the row according to specified regular expression and then tries to infer +data type for each value according to the specified escaping rule. + +**Example** + +```sql +SET format_regexp = '^Line: value_1=(.+?), value_2=(.+?), value_3=(.+?)', + format_regexp_escaping_rule = 'CSV' + +DESC format(Regexp, $$Line: value_1=42, value_2="Some string 1", value_3="[1, NULL, 3]" +Line: value_1=2, value_2="Some string 2", value_3="[4, 5, NULL]"$$) +``` +```response +┌─name─┬─type───────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ c1 │ Nullable(Int64) │ │ │ │ │ │ +│ c2 │ Nullable(String) │ │ │ │ │ │ +│ c3 │ Array(Nullable(Int64)) │ │ │ │ │ │ +└──────┴────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +``` + +## Settings for text formats {settings-for-text-formats} + +### input_format_max_rows_to_read_for_schema_inference + +This setting controls the maximum number of rows to be read while schema inference. +The more rows are read, the more time is spent on schema inference, but the greater the chance to +correctly determine the types (especially when the data contains a lot of nulls). + +Default value: `25000`. + +### column_names_for_schema_inference + +The list of column names to use in schema inference for formats without explicit column names. Specified names will be used instead of default `c1,c2,c3,...`. The format: `column1,column2,column3,...`. + +**Example** + +```sql +DESC format(TSV, 'Hello, World! 42 [1, 2, 3]') settings column_names_for_schema_inference = 'str,int,arr' +``` +```response +┌─name─┬─type───────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ str │ Nullable(String) │ │ │ │ │ │ +│ int │ Nullable(Int64) │ │ │ │ │ │ +│ arr │ Array(Nullable(Int64)) │ │ │ │ │ │ +└──────┴────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +``` + +### schema_inference_hints + +The list of column names and types to use in schema inference instead of automatically determined types. The format: 'column_name1 column_type1, column_name2 column_type2, ...'. +This setting can be used to specify the types of columns that could not be determined automatically or for optimizing the schema. + +**Example** + +```sql +DESC format(JSONEachRow, '{"id" : 1, "age" : 25, "name" : "Josh", "status" : null, "hobbies" : ["football", "cooking"]}' +SETTINGS schema_inference_hints = 'age LowCardinality(UInt8), status Nullable(String)' +``` +```response +┌─name────┬─type────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ id │ Nullable(Int64) │ │ │ │ │ │ +│ age │ LowCardinality(UInt8) │ │ │ │ │ │ +│ name │ Nullable(String) │ │ │ │ │ │ +│ status │ Nullable(String) │ │ │ │ │ │ +│ hobbies │ Array(Nullable(String)) │ │ │ │ │ │ +└─────────┴─────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +``` + +### schema_inference_make_columns_nullable + +Controls making inferred types `Nullable` in schema inference for formats without information about nullability. +If the setting is enabled, all inferred type will be `Nullable`, if disabled, the inferred type will be `Nullable` only if the column contains `NULL` in a sample that is parsed during schema inference. + +Enabled by default. + +**Examples** + +```sql +SET schema_inference_make_columns_nullable = 1 +DESC format(JSONEachRow, $$ + {"id" : 1, "age" : 25, "name" : "Josh", "status" : null, "hobbies" : ["football", "cooking"]} + {"id" : 2, "age" : 19, "name" : "Alan", "status" : "married", "hobbies" : ["tennis", "art"]} + $$) +``` +```response +┌─name────┬─type────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ id │ Nullable(Int64) │ │ │ │ │ │ +│ age │ Nullable(Int64) │ │ │ │ │ │ +│ name │ Nullable(String) │ │ │ │ │ │ +│ status │ Nullable(String) │ │ │ │ │ │ +│ hobbies │ Array(Nullable(String)) │ │ │ │ │ │ +└─────────┴─────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +``` +```sql +SET schema_inference_make_columns_nullable = 0 +DESC format(JSONEachRow, $$ + {"id" : 1, "age" : 25, "name" : "Josh", "status" : null, "hobbies" : ["football", "cooking"]} + {"id" : 2, "age" : 19, "name" : "Alan", "status" : "married", "hobbies" : ["tennis", "art"]} + $$) +``` +```response + +┌─name────┬─type─────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ id │ Int64 │ │ │ │ │ │ +│ age │ Int64 │ │ │ │ │ │ +│ name │ String │ │ │ │ │ │ +│ status │ Nullable(String) │ │ │ │ │ │ +│ hobbies │ Array(String) │ │ │ │ │ │ +└─────────┴──────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +``` + +### input_format_try_infer_integers + +If enabled, ClickHouse will try to infer integers instead of floats in schema inference for text formats. +If all numbers in the column from sample data are integers, the result type will be `Int64`, if at least one number is float, the result type will be `Float64`. +If the sample data contains only integers and at least one integer is positive and overflows `Int64`, ClickHouse will infer `UInt64`. + +Enabled by default. + +**Examples** + +```sql +SET input_format_try_infer_integers = 0 +DESC format(JSONEachRow, $$ + {"number" : 1} + {"number" : 2} + $$) +``` +```response +┌─name───┬─type──────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ number │ Nullable(Float64) │ │ │ │ │ │ +└────────┴───────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +``` +```sql +SET input_format_try_infer_integers = 1 +DESC format(JSONEachRow, $$ + {"number" : 1} + {"number" : 2} + $$) +``` +```response +┌─name───┬─type────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ number │ Nullable(Int64) │ │ │ │ │ │ +└────────┴─────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +``` +```sql +DESC format(JSONEachRow, $$ + {"number" : 1} + {"number" : 18446744073709551615} + $$) +``` +```response +┌─name───┬─type─────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ number │ Nullable(UInt64) │ │ │ │ │ │ +└────────┴──────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +``` +```sql +DESC format(JSONEachRow, $$ + {"number" : 1} + {"number" : 2.2} + $$) +``` +```response +┌─name───┬─type──────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ number │ Nullable(Float64) │ │ │ │ │ │ +└────────┴───────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +``` + +### input_format_try_infer_datetimes + +If enabled, ClickHouse will try to infer type `DateTime64` from string fields in schema inference for text formats. +If all fields from a column in sample data were successfully parsed as datetimes, the result type will be `DateTime64(9)`, +if at least one field was not parsed as datetime, the result type will be `String`. + +Enabled by default. + +**Examples** + +```sql +SET input_format_try_infer_datetimes = 0 +DESC format(JSONEachRow, $$ + {"datetime" : "2021-01-01 00:00:00.000"} + {"datetime" : "2022-01-01 00:00:00.000"} + $$) +``` +```response +┌─name─────┬─type─────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ datetime │ Nullable(String) │ │ │ │ │ │ +└──────────┴──────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +``` +```sql +SET input_format_try_infer_datetimes = 1 +DESC format(JSONEachRow, $$ + {"datetime" : "2021-01-01 00:00:00.000"} + {"datetime" : "2022-01-01 00:00:00.000"} + $$) +``` +```response +┌─name─────┬─type────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ datetime │ Nullable(DateTime64(9)) │ │ │ │ │ │ +└──────────┴─────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +``` +```sql +DESC format(JSONEachRow, $$ + {"datetime" : "2021-01-01 00:00:00.000"} + {"datetime" : "unknown"} + $$) +``` +```response +┌─name─────┬─type─────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ datetime │ Nullable(String) │ │ │ │ │ │ +└──────────┴──────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +``` + +Note: Parsing datetimes during schema inference respect setting [date_time_input_format](/docs/en/operations/settings/settings.md#date_time_input_format) + +### input_format_try_infer_dates + +If enabled, ClickHouse will try to infer type `Date` from string fields in schema inference for text formats. +If all fields from a column in sample data were successfully parsed as dates, the result type will be `Date`, +if at least one field was not parsed as date, the result type will be `String`. + +Enabled by default. + +**Examples** + +```sql +SET input_format_try_infer_datetimes = 0, input_format_try_infer_dates = 0 +DESC format(JSONEachRow, $$ + {"date" : "2021-01-01"} + {"date" : "2022-01-01"} + $$) +``` +```response +┌─name─┬─type─────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ date │ Nullable(String) │ │ │ │ │ │ +└──────┴──────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +``` +```sql +SET input_format_try_infer_dates = 1 +DESC format(JSONEachRow, $$ + {"date" : "2021-01-01"} + {"date" : "2022-01-01"} + $$) +``` +```response +┌─name─┬─type───────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ date │ Nullable(Date) │ │ │ │ │ │ +└──────┴────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +``` +```sql +DESC format(JSONEachRow, $$ + {"date" : "2021-01-01"} + {"date" : "unknown"} + $$) +``` +```response +┌─name─┬─type─────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ date │ Nullable(String) │ │ │ │ │ │ +└──────┴──────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +``` + +# Self describing formats {#self-describing-formats} + +Self-describing formats contain information about the structure of the data in the data itself, +it can be some header with a description, a binary type tree, or some kind of table. +To automatically infer a schema from files in such formats, ClickHouse reads a part of the data containing +information about the types and converts it into a schema of the ClickHouse table. + +## Formats with -WithNamesAndTypes suffix {#formats-with-names-and-types} + +ClickHouse supports some text formats with the suffix -WithNamesAndTypes. This suffix means that the data contains two additional rows with column names and types before the actual data. +While schema inference for such formats, ClickHouse reads the first two rows and extracts column names and types. + +**Example** + +```sql +DESC format(TSVWithNamesAndTypes, +$$num str arr +UInt8 String Array(UInt8) +42 Hello, World! [1,2,3] +$$) +``` +```response +┌─name─┬─type─────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ num │ UInt8 │ │ │ │ │ │ +│ str │ String │ │ │ │ │ │ +│ arr │ Array(UInt8) │ │ │ │ │ │ +└──────┴──────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +``` + +## JSON formats with metadata {#json-with-metadata} + +Some JSON input formats ([JSON](formats.md#json), [JSONCompact](formats.md#json-compact), [JSONColumnsWithMetadata](formats.md#jsoncolumnswithmetadata)) contain metadata with column names and types. +In schema inference for such formats, ClickHouse reads this metadata. + +**Example** +```sql +DESC format(JSON, $$ +{ + "meta": + [ + { + "name": "num", + "type": "UInt8" + }, + { + "name": "str", + "type": "String" + }, + { + "name": "arr", + "type": "Array(UInt8)" + } + ], + + "data": + [ + { + "num": 42, + "str": "Hello, World", + "arr": [1,2,3] + } + ], + + "rows": 1, + + "statistics": + { + "elapsed": 0.005723915, + "rows_read": 1, + "bytes_read": 1 + } +} +$$) +``` +```response +┌─name─┬─type─────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ num │ UInt8 │ │ │ │ │ │ +│ str │ String │ │ │ │ │ │ +│ arr │ Array(UInt8) │ │ │ │ │ │ +└──────┴──────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +``` + +## Avro {#avro} + +In Avro format ClickHouse reads its schema from the data and converts it to ClickHouse schema using the following type matches: + +| Avro data type | ClickHouse data type | +|------------------------------------|--------------------------------------------------------------------------------| +| `boolean` | [Bool](../sql-reference/data-types/boolean.md) | +| `int` | [Int32](../sql-reference/data-types/int-uint.md) | +| `long` | [Int64](../sql-reference/data-types/int-uint.md) | +| `float` | [Float32](../sql-reference/data-types/float.md) | +| `double` | [Float64](../sql-reference/data-types/float.md) | +| `bytes`, `string` | [String](../sql-reference/data-types/string.md) | +| `fixed` | [FixedString(N)](../sql-reference/data-types/fixedstring.md) | +| `enum` | [Enum](../sql-reference/data-types/enum.md) | +| `array(T)` | [Array(T)](../sql-reference/data-types/array.md) | +| `union(null, T)`, `union(T, null)` | [Nullable(T)](../sql-reference/data-types/date.md) | +| `null` | [Nullable(Nothing)](../sql-reference/data-types/special-data-types/nothing.md) | + +Other Avro types are not supported. + +## Parquet {#parquet} + +In Parquet format ClickHouse reads its schema from the data and converts it to ClickHouse schema using the following type matches: + +| Parquet data type | ClickHouse data type | +|------------------------------|---------------------------------------------------------| +| `BOOL` | [Bool](../sql-reference/data-types/boolean.md) | +| `UINT8` | [UInt8](../sql-reference/data-types/int-uint.md) | +| `INT8` | [Int8](../sql-reference/data-types/int-uint.md) | +| `UINT16` | [UInt16](../sql-reference/data-types/int-uint.md) | +| `INT16` | [Int16](../sql-reference/data-types/int-uint.md) | +| `UINT32` | [UInt32](../sql-reference/data-types/int-uint.md) | +| `INT32` | [Int32](../sql-reference/data-types/int-uint.md) | +| `UINT64` | [UInt64](../sql-reference/data-types/int-uint.md) | +| `INT64` | [Int64](../sql-reference/data-types/int-uint.md) | +| `FLOAT` | [Float32](../sql-reference/data-types/float.md) | +| `DOUBLE` | [Float64](../sql-reference/data-types/float.md) | +| `DATE` | [Date32](../sql-reference/data-types/date32.md) | +| `TIME (ms)` | [DateTime](../sql-reference/data-types/datetime.md) | +| `TIMESTAMP`, `TIME (us, ns)` | [DateTime64](../sql-reference/data-types/datetime64.md) | +| `STRING`, `BINARY` | [String](../sql-reference/data-types/string.md) | +| `DECIMAL` | [Decimal](../sql-reference/data-types/decimal.md) | +| `LIST` | [Array](../sql-reference/data-types/array.md) | +| `STRUCT` | [Tuple](../sql-reference/data-types/tuple.md) | +| `MAP` | [Map](../sql-reference/data-types/map.md) | + +Other Parquet types are not supported. By default, all inferred types are inside `Nullable`, but it can be changed using the setting `schema_inference_make_columns_nullable`. + +## Arrow {#arrow} + +In Arrow format ClickHouse reads its schema from the data and converts it to ClickHouse schema using the following type matches: + +| Arrow data type | ClickHouse data type | +|---------------------------------|---------------------------------------------------------| +| `BOOL` | [Bool](../sql-reference/data-types/boolean.md) | +| `UINT8` | [UInt8](../sql-reference/data-types/int-uint.md) | +| `INT8` | [Int8](../sql-reference/data-types/int-uint.md) | +| `UINT16` | [UInt16](../sql-reference/data-types/int-uint.md) | +| `INT16` | [Int16](../sql-reference/data-types/int-uint.md) | +| `UINT32` | [UInt32](../sql-reference/data-types/int-uint.md) | +| `INT32` | [Int32](../sql-reference/data-types/int-uint.md) | +| `UINT64` | [UInt64](../sql-reference/data-types/int-uint.md) | +| `INT64` | [Int64](../sql-reference/data-types/int-uint.md) | +| `FLOAT`, `HALF_FLOAT` | [Float32](../sql-reference/data-types/float.md) | +| `DOUBLE` | [Float64](../sql-reference/data-types/float.md) | +| `DATE32` | [Date32](../sql-reference/data-types/date32.md) | +| `DATE64` | [DateTime](../sql-reference/data-types/datetime.md) | +| `TIMESTAMP`, `TIME32`, `TIME64` | [DateTime64](../sql-reference/data-types/datetime64.md) | +| `STRING`, `BINARY` | [String](../sql-reference/data-types/string.md) | +| `DECIMAL128`, `DECIMAL256` | [Decimal](../sql-reference/data-types/decimal.md) | +| `LIST` | [Array](../sql-reference/data-types/array.md) | +| `STRUCT` | [Tuple](../sql-reference/data-types/tuple.md) | +| `MAP` | [Map](../sql-reference/data-types/map.md) | + +Other Arrow types are not supported. By default, all inferred types are inside `Nullable`, but it can be changed using the setting `schema_inference_make_columns_nullable`. + +## ORC {#orc} + +In ORC format ClickHouse reads its schema from the data and converts it to ClickHouse schema using the following type matches: + +| ORC data type | ClickHouse data type | +|--------------------------------------|---------------------------------------------------------| +| `Boolean` | [Bool](../sql-reference/data-types/boolean.md) | +| `Tinyint` | [Int8](../sql-reference/data-types/int-uint.md) | +| `Smallint` | [Int16](../sql-reference/data-types/int-uint.md) | +| `Int` | [Int32](../sql-reference/data-types/int-uint.md) | +| `Bigint` | [Int64](../sql-reference/data-types/int-uint.md) | +| `Float` | [Float32](../sql-reference/data-types/float.md) | +| `Double` | [Float64](../sql-reference/data-types/float.md) | +| `Date` | [Date32](../sql-reference/data-types/date32.md) | +| `Timestamp` | [DateTime64](../sql-reference/data-types/datetime64.md) | +| `String`, `Char`, `Varchar`,`BINARY` | [String](../sql-reference/data-types/string.md) | +| `Decimal` | [Decimal](../sql-reference/data-types/decimal.md) | +| `List` | [Array](../sql-reference/data-types/array.md) | +| `Struct` | [Tuple](../sql-reference/data-types/tuple.md) | +| `Map` | [Map](../sql-reference/data-types/map.md) | + +Other ORC types are not supported. By default, all inferred types are inside `Nullable`, but it can be changed using the setting `schema_inference_make_columns_nullable`. + +## Native {#native} + +Native format is used inside ClickHouse and contains the schema in the data. +In schema inference, ClickHouse reads the schema from the data without any transformations. + +# Formats with external schema {#formats-with-external-schema} + +Such formats require a schema describing the data in a separate file in a specific schema language. +To automatically infer a schema from files in such formats, ClickHouse reads external schema from a separate file and transforms it to a ClickHouse table schema. + +# Protobuf {#protobuf} + +In schema inference for Protobuf format ClickHouse uses the following type matches: + +| Protobuf data type | ClickHouse data type | +|-------------------------------|---------------------------------------------------| +| `bool` | [UInt8](../sql-reference/data-types/int-uint.md) | +| `float` | [Float32](../sql-reference/data-types/float.md) | +| `double` | [Float64](../sql-reference/data-types/float.md) | +| `int32`, `sint32`, `sfixed32` | [Int32](../sql-reference/data-types/int-uint.md) | +| `int64`, `sint64`, `sfixed64` | [Int64](../sql-reference/data-types/int-uint.md) | +| `uint32`, `fixed32` | [UInt32](../sql-reference/data-types/int-uint.md) | +| `uint64`, `fixed64` | [UInt64](../sql-reference/data-types/int-uint.md) | +| `string`, `bytes` | [String](../sql-reference/data-types/string.md) | +| `enum` | [Enum](../sql-reference/data-types/enum.md) | +| `repeated T` | [Array(T)](../sql-reference/data-types/array.md) | +| `message`, `group` | [Tuple](../sql-reference/data-types/tuple.md) | + +# CapnProto {#capnproto} + +In schema inference for CapnProto format ClickHouse uses the following type matches: + +| CapnProto data type | ClickHouse data type | +|------------------------------------|--------------------------------------------------------| +| `Bool` | [UInt8](../sql-reference/data-types/int-uint.md) | +| `Int8` | [Int8](../sql-reference/data-types/int-uint.md) | +| `UInt8` | [UInt8](../sql-reference/data-types/int-uint.md) | +| `Int16` | [Int16](../sql-reference/data-types/int-uint.md) | +| `UInt16` | [UInt16](../sql-reference/data-types/int-uint.md) | +| `Int32` | [Int32](../sql-reference/data-types/int-uint.md) | +| `UInt32` | [UInt32](../sql-reference/data-types/int-uint.md) | +| `Int64` | [Int64](../sql-reference/data-types/int-uint.md) | +| `UInt64` | [UInt64](../sql-reference/data-types/int-uint.md) | +| `Float32` | [Float32](../sql-reference/data-types/float.md) | +| `Float64` | [Float64](../sql-reference/data-types/float.md) | +| `Text`, `Data` | [String](../sql-reference/data-types/string.md) | +| `enum` | [Enum](../sql-reference/data-types/enum.md) | +| `List` | [Array](../sql-reference/data-types/array.md) | +| `struct` | [Tuple](../sql-reference/data-types/tuple.md) | +| `union(T, Void)`, `union(Void, T)` | [Nullable(T)](../sql-reference/data-types/nullable.md) | + +# Strong-typed binary formats {#strong-typed-binary-formats} + +In such formats, each serialized value contains information about its type (and possibly about its name), but there is no information about the whole table. +In schema inference for such formats, ClickHouse reads data row by row (up to `input_format_max_rows_to_read_for_schema_inference` rows) and extracts +the type (and possibly name) for each value from the data and then converts these types to ClickHouse types. + +## MsgPack {msgpack} + +In MsgPack format there is no delimiter between rows, to use schema inference for this format you should specify the number of columns in the table +using the setting `input_format_msgpack_number_of_columns`. ClickHouse uses the following type matches: + +| MessagePack data type (`INSERT`) | ClickHouse data type | +|--------------------------------------------------------------------|-----------------------------------------------------------| +| `int N`, `uint N`, `negative fixint`, `positive fixint` | [Int64](../sql-reference/data-types/int-uint.md) | +| `bool` | [UInt8](../sql-reference/data-types/int-uint.md) | +| `fixstr`, `str 8`, `str 16`, `str 32`, `bin 8`, `bin 16`, `bin 32` | [String](../sql-reference/data-types/string.md) | +| `float 32` | [Float32](../sql-reference/data-types/float.md) | +| `float 64` | [Float64](../sql-reference/data-types/float.md) | +| `uint 16` | [Date](../sql-reference/data-types/date.md) | +| `uint 32` | [DateTime](../sql-reference/data-types/datetime.md) | +| `uint 64` | [DateTime64](../sql-reference/data-types/datetime.md) | +| `fixarray`, `array 16`, `array 32` | [Array](../sql-reference/data-types/array.md) | +| `fixmap`, `map 16`, `map 32` | [Map](../sql-reference/data-types/map.md) | + +By default, all inferred types are inside `Nullable`, but it can be changed using the setting `schema_inference_make_columns_nullable`. + +## BSONEachRow {#bsoneachrow} + +In BSONEachRow each row of data is presented as a BSON document. In schema inference ClickHouse reads BSON documents one by one and extracts +values, names, and types from the data and then transforms these types to ClickHouse types using the following type matches: + +| BSON Type | ClickHouse type | +|-----------------------------------------------------------------------------------------------|-----------------------------------------------------------------------------------------------------------------------------| +| `\x08` boolean | [Bool](../sql-reference/data-types/boolean.md) | +| `\x10` int32 | [Int32](../sql-reference/data-types/int-uint.md) | +| `\x12` int64 | [Int64](../sql-reference/data-types/int-uint.md) | +| `\x01` double | [Float64](../sql-reference/data-types/float.md) | +| `\x09` datetime | [DateTime64](../sql-reference/data-types/datetime64.md) | +| `\x05` binary with`\x00` binary subtype, `\x02` string, `\x0E` symbol, `\x0D` JavaScript code | [String](../sql-reference/data-types/string.md) | +| `\x07` ObjectId, | [FixedString(12)](../sql-reference/data-types/fixedstring.md) | +| `\x05` binary with `\x04` uuid subtype, size = 16 | [UUID](../sql-reference/data-types/uuid.md) | +| `\x04` array | [Array](../sql-reference/data-types/array.md)/[Tuple](../sql-reference/data-types/tuple.md) (if nested types are different) | +| `\x03` document | [Named Tuple](../sql-reference/data-types/tuple.md)/[Map](../sql-reference/data-types/map.md) (with String keys) | + +By default, all inferred types are inside `Nullable`, but it can be changed using the setting `schema_inference_make_columns_nullable`. + +# Formats with constant schema {#formats-with-constant-schema} + +Data in such formats always have the same schema. + +## LineAsString {#line-as-string} + +In this format, ClickHouse reads the whole line from the data into a single column with `String` data type. The inferred type for this format is always `String` and the column name is `line`. + +**Example** + +```sql +DESC format(LineAsString, 'Hello\nworld!') +``` +```response +┌─name─┬─type───┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ line │ String │ │ │ │ │ │ +└──────┴────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +``` + +## JSONAsString {#json-as-string} + +In this format, ClickHouse reads the whole JSON object from the data into a single column with `String` data type. The inferred type for this format is always `String` and the column name is `json`. + +**Example** + +```sql +DESC format(JSONAsString, '{"x" : 42, "y" : "Hello, World!"}') +``` +```response +┌─name─┬─type───┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ json │ String │ │ │ │ │ │ +└──────┴────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +``` + +## JSONAsObject {#json-as-object} + +In this format, ClickHouse reads the whole JSON object from the data into a single column with `Object('json')` data type. Inferred type for this format is always `String` and the column name is `json`. + +Note: This format works only if `allow_experimental_object_type` is enabled. + +**Example** + +```sql +DESC format(JSONAsString, '{"x" : 42, "y" : "Hello, World!"}') SETTINGS allow_experimental_object_type=1 +``` +```response +┌─name─┬─type───────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ json │ Object('json') │ │ │ │ │ │ +└──────┴────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +``` + +[Original article](https://clickhouse.com/docs/en/interfaces/schema-inference) diff --git a/docs/en/operations/server-configuration-parameters/settings.md b/docs/en/operations/server-configuration-parameters/settings.md index e16785e643d..cf4dd042c34 100644 --- a/docs/en/operations/server-configuration-parameters/settings.md +++ b/docs/en/operations/server-configuration-parameters/settings.md @@ -890,7 +890,7 @@ The maximum number of open files. By default: `maximum`. -We recommend using this option in Mac OS X since the `getrlimit()` function returns an incorrect value. +We recommend using this option in macOS since the `getrlimit()` function returns an incorrect value. **Example** diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index a0b7a3de27d..8437713afdc 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -3531,13 +3531,45 @@ Default value: 2. ## compatibility {#compatibility} -This setting changes other settings according to provided ClickHouse version. -If a behaviour in ClickHouse was changed by using a different default value for some setting, this compatibility setting allows you to use default values from previous versions for all the settings that were not set by the user. +The `compatibility` setting causes ClickHouse to use the default settings of a previous version of ClickHouse, where the previous version is provided as the setting. -This setting takes ClickHouse version number as a string, like `21.3`, `21.8`. Empty value means that this setting is disabled. +If settings are set to non-default values, then those settings are honored (only settings that have not been modified are affected by the `compatibility` setting). + +This setting takes a ClickHouse version number as a string, like `22.3`, `22.8`. An empty value means that this setting is disabled. Disabled by default. +:::note +In ClickHouse Cloud the compatibility setting must be set by ClickHouse Cloud support. Please [open a case](https://clickhouse.cloud/support) to have it set. +::: + +## allow_settings_after_format_in_insert {#allow_settings_after_format_in_insert} + +Control whether `SETTINGS` after `FORMAT` in `INSERT` queries is allowed or not. It is not recommended to use this, since this may interpret part of `SETTINGS` as values. + +Example: + +```sql +INSERT INTO FUNCTION null('foo String') SETTINGS max_threads=1 VALUES ('bar'); +``` + +But the following query will work only with `allow_settings_after_format_in_insert`: + +```sql +SET allow_settings_after_format_in_insert=1; +INSERT INTO FUNCTION null('foo String') VALUES ('bar') SETTINGS max_threads=1; +``` + +Possible values: + +- 0 — Disallow. +- 1 — Allow. + +Default value: `0`. + +!!! note "Warning" + Use this setting only for backward compatibility if your use cases depend on old syntax. + # Format settings {#format-settings} ## input_format_skip_unknown_fields {#input_format_skip_unknown_fields} @@ -3672,6 +3704,13 @@ y Nullable(String) z IPv4 ``` +## schema_inference_make_columns_nullable {#schema_inference_make_columns_nullable} + +Controls making inferred types `Nullable` in schema inference for formats without information about nullability. +If the setting is enabled, the inferred type will be `Nullable` only if column contains `NULL` in a sample that is parsed during schema inference. + +Default value: `true`. + ## input_format_try_infer_integers {#input_format_try_infer_integers} If enabled, ClickHouse will try to infer integers instead of floats in schema inference for text formats. If all numbers in the column from input data are integers, the result type will be `Int64`, if at least one number is float, the result type will be `Float64`. diff --git a/docs/en/operations/system-tables/schema_inference_cache.md b/docs/en/operations/system-tables/schema_inference_cache.md new file mode 100644 index 00000000000..31b0671dc34 --- /dev/null +++ b/docs/en/operations/system-tables/schema_inference_cache.md @@ -0,0 +1,70 @@ +--- +slug: /en/operations/system-tables/schema_inference_cache +--- +# Schema inference cache + +Contains information about all cached file schemas. + +Columns: +- `storage` ([String](/docs/en/sql-reference/data-types/string.md)) — Storage name: File, URL, S3 or HDFS. +- `source` ([String](/docs/en/sql-reference/data-types/string.md)) — File source. +- `format` ([String](/docs/en/sql-reference/data-types/string.md)) — Format name. +- `additional_format_info` ([String](/docs/en/sql-reference/data-types/string.md)) - Additional information required to identify the schema. For example, format specific settings. +- `registration_time` ([DateTime](/docs/en/sql-reference/data-types/datetime.md)) — Timestamp when schema was added in cache. +- `schema` ([String](/docs/en/sql-reference/data-types/string.md)) - Cached schema. + +**Example** + +Let's say we have a file `data.jsonl` with this content: +```json +{"id" : 1, "age" : 25, "name" : "Josh", "hobbies" : ["football", "cooking", "music"]} +{"id" : 2, "age" : 19, "name" : "Alan", "hobbies" : ["tennis", "art"]} +{"id" : 3, "age" : 32, "name" : "Lana", "hobbies" : ["fitness", "reading", "shopping"]} +{"id" : 4, "age" : 47, "name" : "Brayan", "hobbies" : ["movies", "skydiving"]} +``` + +:::tip +Place `data.jsonl` in the `user_files_path` directory. You can find this by looking +in your ClickHouse configuration files. The default is: +``` +/var/lib/clickhouse/user_files/ +``` +::: + +Open `clickhouse-client` and run the `DESCRIBE` query: + +```sql +DESCRIBE file('data.jsonl') SETTINGS input_format_try_infer_integers=0; +``` + +```response +┌─name────┬─type────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ id │ Nullable(Float64) │ │ │ │ │ │ +│ age │ Nullable(Float64) │ │ │ │ │ │ +│ name │ Nullable(String) │ │ │ │ │ │ +│ hobbies │ Array(Nullable(String)) │ │ │ │ │ │ +└─────────┴─────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +``` + +Let's see the content of the `system.schema_inference_cache` table: + +```sql +SELECT * +FROM system.schema_inference_cache +FORMAT Vertical +``` +```response +Row 1: +────── +storage: File +source: /home/droscigno/user_files/data.jsonl +format: JSONEachRow +additional_format_info: schema_inference_hints=, max_rows_to_read_for_schema_inference=25000, schema_inference_make_columns_nullable=true, try_infer_integers=false, try_infer_dates=true, try_infer_datetimes=true, try_infer_numbers_from_strings=true, read_bools_as_numbers=true, try_infer_objects=false +registration_time: 2022-12-29 17:49:52 +schema: id Nullable(Float64), age Nullable(Float64), name Nullable(String), hobbies Array(Nullable(String)) +``` + + +**See also** +- [Automatic schema inference from input data](/docs/en/interfaces/schema-inference.md) + diff --git a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md index c5d48945649..897945a6d9d 100644 --- a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md +++ b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md @@ -825,6 +825,23 @@ Setting fields: The `table` or `where` fields cannot be used together with the `query` field. And either one of the `table` or `query` fields must be declared. ::: +## Null + +A special source that can be used to create dummy (empty) dictionaries. Such dictionaries can useful for tests or with setups with separated data and query nodes at nodes with Distributed tables. + +``` sql +CREATE DICTIONARY null_dict ( + id UInt64, + val UInt8, + default_val UInt8 DEFAULT 123, + nullable_val Nullable(UInt8) +) +PRIMARY KEY id +SOURCE(NULL()) +LAYOUT(FLAT()) +LIFETIME(0); +``` + ## Related Content -- [Using dictionaries to accelerate queries](https://clickhouse.com/blog/faster-queries-dictionaries-clickhouse) \ No newline at end of file +- [Using dictionaries to accelerate queries](https://clickhouse.com/blog/faster-queries-dictionaries-clickhouse) diff --git a/docs/en/sql-reference/functions/date-time-functions.md b/docs/en/sql-reference/functions/date-time-functions.md index 6156a823d58..be8e26daa87 100644 --- a/docs/en/sql-reference/functions/date-time-functions.md +++ b/docs/en/sql-reference/functions/date-time-functions.md @@ -1104,6 +1104,7 @@ Using replacement fields, you can define a pattern for the resulting string. “ | %d | day of the month, zero-padded (01-31) | 02 | | %D | Short MM/DD/YY date, equivalent to %m/%d/%y | 01/02/18 | | %e | day of the month, space-padded ( 1-31) |   2 | +| %f | fractional second from the fractional part of DateTime64 | 1234560 | | %F | short YYYY-MM-DD date, equivalent to %Y-%m-%d | 2018-01-02 | | %G | four-digit year format for ISO week number, calculated from the week-based year [defined by the ISO 8601](https://en.wikipedia.org/wiki/ISO_8601#Week_dates) standard, normally useful only with %V | 2018 | | %g | two-digit year format, aligned to ISO 8601, abbreviated from four-digit notation | 18 | @@ -1143,6 +1144,20 @@ Result: └────────────────────────────────────────────┘ ``` +Query: + +``` sql +SELECT formatDateTime(toDateTime64('2010-01-04 12:34:56.123456', 7), '%f') +``` + +Result: + +``` +┌─formatDateTime(toDateTime64('2010-01-04 12:34:56.123456', 7), '%f')─┐ +│ 1234560 │ +└─────────────────────────────────────────────────────────────────────┘ +``` + ## dateName Returns specified part of date. diff --git a/docs/en/sql-reference/functions/hash-functions.md b/docs/en/sql-reference/functions/hash-functions.md index cc66f62f714..936c20c6a77 100644 --- a/docs/en/sql-reference/functions/hash-functions.md +++ b/docs/en/sql-reference/functions/hash-functions.md @@ -595,9 +595,9 @@ SELECT xxHash64('') **Returned value** -A `Uint32` or `Uint64` data type hash value. +A `UInt32` or `UInt64` data type hash value. -Type: `xxHash`. +Type: `UInt32` for `xxHash32` and `UInt64` for `xxHash64`. **Example** diff --git a/docs/en/sql-reference/functions/random-functions.md b/docs/en/sql-reference/functions/random-functions.md index 4efa2131eb6..f0c0d3e4802 100644 --- a/docs/en/sql-reference/functions/random-functions.md +++ b/docs/en/sql-reference/functions/random-functions.md @@ -68,6 +68,483 @@ Result: └────────────┴────────────┴──────────────┴────────────────┴─────────────────┴──────────────────────┘ ``` +# Functions for Generating Random Numbers based on Distributions + +:::note +These functions are available starting from 22.10. +::: + + + +## randUniform + +Return random number based on [continuous uniform distribution](https://en.wikipedia.org/wiki/Continuous_uniform_distribution) in a specified range from `min` to `max`. + +**Syntax** + +``` sql +randUniform(min, max) +``` + +**Arguments** + +- `min` - `Float64` - min value of the range, +- `max` - `Float64` - max value of the range. + +**Returned value** + +- Pseudo-random number. + +Type: [Float64](/docs/en/sql-reference/data-types/float.md). + +**Example** + +Query: + +``` sql +SELECT randUniform(5.5, 10) FROM numbers(5) +``` + +Result: + +``` text +┌─randUniform(5.5, 10)─┐ +│ 8.094978491443102 │ +│ 7.3181248914450885 │ +│ 7.177741903868262 │ +│ 6.483347380953762 │ +│ 6.122286382885112 │ +└──────────────────────┘ +``` + + + +## randNormal + +Return random number based on [normal distribution](https://en.wikipedia.org/wiki/Normal_distribution). + +**Syntax** + +``` sql +randNormal(meam, variance) +``` + +**Arguments** + +- `meam` - `Float64` mean value of distribution, +- `variance` - `Float64` - [variance](https://en.wikipedia.org/wiki/Variance). + +**Returned value** + +- Pseudo-random number. + +Type: [Float64](/docs/en/sql-reference/data-types/float.md). + +**Example** + +Query: + +``` sql +SELECT randNormal(10, 2) FROM numbers(5) +``` + +Result: + +``` text +┌──randNormal(10, 2)─┐ +│ 13.389228911709653 │ +│ 8.622949707401295 │ +│ 10.801887062682981 │ +│ 4.5220192605895315 │ +│ 10.901239123982567 │ +└────────────────────┘ +``` + + + +## randLogNormal + +Return random number based on [log-normal distribution](https://en.wikipedia.org/wiki/Log-normal_distribution). + +**Syntax** + +``` sql +randLogNormal(meam, variance) +``` + +**Arguments** + +- `meam` - `Float64` mean value of distribution, +- `variance` - `Float64` - [variance](https://en.wikipedia.org/wiki/Variance). + +**Returned value** + +- Pseudo-random number. + +Type: [Float64](/docs/en/sql-reference/data-types/float.md). + +**Example** + +Query: + +``` sql +SELECT randLogNormal(100, 5) FROM numbers(5) +``` + +Result: + +``` text +┌─randLogNormal(100, 5)─┐ +│ 1.295699673937363e48 │ +│ 9.719869109186684e39 │ +│ 6.110868203189557e42 │ +│ 9.912675872925529e39 │ +│ 2.3564708490552458e42 │ +└───────────────────────┘ +``` + + + +## randBinomial + +Return random number based on [binomial distribution](https://en.wikipedia.org/wiki/Binomial_distribution). + +**Syntax** + +``` sql +randBinomial(experiments, probability) +``` + +**Arguments** + +- `experiments` - `UInt64` number of experiments, +- `probability` - `Float64` - probability of success in each experiment (values in `0...1` range only). + +**Returned value** + +- Pseudo-random number. + +Type: [UInt64](/docs/en/sql-reference/data-types/int-uint.md). + +**Example** + +Query: + +``` sql +SELECT randBinomial(100, .75) FROM numbers(5) +``` + +Result: + +``` text +┌─randBinomial(100, 0.75)─┐ +│ 74 │ +│ 78 │ +│ 76 │ +│ 77 │ +│ 80 │ +└─────────────────────────┘ +``` + + + +## randNegativeBinomial + +Return random number based on [negative binomial distribution](https://en.wikipedia.org/wiki/Negative_binomial_distribution). + +**Syntax** + +``` sql +randNegativeBinomial(experiments, probability) +``` + +**Arguments** + +- `experiments` - `UInt64` number of experiments, +- `probability` - `Float64` - probability of failure in each experiment (values in `0...1` range only). + +**Returned value** + +- Pseudo-random number. + +Type: [UInt64](/docs/en/sql-reference/data-types/int-uint.md). + +**Example** + +Query: + +``` sql +SELECT randNegativeBinomial(100, .75) FROM numbers(5) +``` + +Result: + +``` text +┌─randNegativeBinomial(100, 0.75)─┐ +│ 33 │ +│ 32 │ +│ 39 │ +│ 40 │ +│ 50 │ +└─────────────────────────────────┘ +``` + + + +## randPoisson + +Return random number based on [Poisson distribution](https://en.wikipedia.org/wiki/Poisson_distribution). + +**Syntax** + +``` sql +randPoisson(n) +``` + +**Arguments** + +- `n` - `UInt64` mean number of occurrences. + +**Returned value** + +- Pseudo-random number. + +Type: [UInt64](/docs/en/sql-reference/data-types/int-uint.md). + +**Example** + +Query: + +``` sql +SELECT randPoisson(10) FROM numbers(5) +``` + +Result: + +``` text +┌─randPoisson(10)─┐ +│ 8 │ +│ 8 │ +│ 7 │ +│ 10 │ +│ 6 │ +└─────────────────┘ +``` + + + +## randBernoulli + +Return random number based on [Bernoulli distribution](https://en.wikipedia.org/wiki/Bernoulli_distribution). + +**Syntax** + +``` sql +randBernoulli(probability) +``` + +**Arguments** + +- `probability` - `Float64` - probability of success (values in `0...1` range only). + +**Returned value** + +- Pseudo-random number. + +Type: [UInt64](/docs/en/sql-reference/data-types/int-uint.md). + +**Example** + +Query: + +``` sql +SELECT randBernoulli(.75) FROM numbers(5) +``` + +Result: + +``` text +┌─randBernoulli(0.75)─┐ +│ 1 │ +│ 1 │ +│ 0 │ +│ 1 │ +│ 1 │ +└─────────────────────┘ +``` + + + +## randExponential + +Return random number based on [exponential distribution](https://en.wikipedia.org/wiki/Exponential_distribution). + +**Syntax** + +``` sql +randExponential(lambda) +``` + +**Arguments** + +- `lambda` - `Float64` lambda value. + +**Returned value** + +- Pseudo-random number. + +Type: [Float64](/docs/en/sql-reference/data-types/float.md). + +**Example** + +Query: + +``` sql +SELECT randExponential(1/10) FROM numbers(5) +``` + +Result: + +``` text +┌─randExponential(divide(1, 10))─┐ +│ 44.71628934340778 │ +│ 4.211013337903262 │ +│ 10.809402553207766 │ +│ 15.63959406553284 │ +│ 1.8148392319860158 │ +└────────────────────────────────┘ +``` + + + +## randChiSquared + +Return random number based on [Chi-square distribution](https://en.wikipedia.org/wiki/Chi-squared_distribution) - a distribution of a sum of the squares of k independent standard normal random variables. + +**Syntax** + +``` sql +randChiSquared(degree_of_freedom) +``` + +**Arguments** + +- `degree_of_freedom` - `Float64` degree of freedom. + +**Returned value** + +- Pseudo-random number. + +Type: [Float64](/docs/en/sql-reference/data-types/float.md). + +**Example** + +Query: + +``` sql +SELECT randChiSquared(10) FROM numbers(5) +``` + +Result: + +``` text +┌─randChiSquared(10)─┐ +│ 10.015463656521543 │ +│ 9.621799919882768 │ +│ 2.71785015634699 │ +│ 11.128188665931908 │ +│ 4.902063104425469 │ +└────────────────────┘ +``` + + + +## randStudentT + +Return random number based on [Student's t-distribution](https://en.wikipedia.org/wiki/Student%27s_t-distribution). + +**Syntax** + +``` sql +randStudentT(degree_of_freedom) +``` + +**Arguments** + +- `degree_of_freedom` - `Float64` degree of freedom. + +**Returned value** + +- Pseudo-random number. + +Type: [Float64](/docs/en/sql-reference/data-types/float.md). + +**Example** + +Query: + +``` sql +SELECT randStudentT(10) FROM numbers(5) +``` + +Result: + +``` text +┌─────randStudentT(10)─┐ +│ 1.2217309938538725 │ +│ 1.7941971681200541 │ +│ -0.28192176076784664 │ +│ 0.2508897721303792 │ +│ -2.7858432909761186 │ +└──────────────────────┘ +``` + + + +## randFisherF + +Return random number based on [F-distribution](https://en.wikipedia.org/wiki/F-distribution). + +**Syntax** + +``` sql +randFisherF(d1, d2) +``` + +**Arguments** + +- `d1` - `Float64` d1 degree of freedom in `X = (S1 / d1) / (S2 / d2)`, +- `d2` - `Float64` d2 degree of freedom in `X = (S1 / d1) / (S2 / d2)`, + +**Returned value** + +- Pseudo-random number. + +Type: [Float64](/docs/en/sql-reference/data-types/float.md). + +**Example** + +Query: + +``` sql +SELECT randFisherF(10, 3) FROM numbers(5) +``` + +Result: + +``` text +┌──randFisherF(10, 3)─┐ +│ 7.286287504216609 │ +│ 0.26590779413050386 │ +│ 0.22207610901168987 │ +│ 0.7953362728449572 │ +│ 0.19278885985221572 │ +└─────────────────────┘ +``` + + + + # Random Functions for Working with Strings ## randomString diff --git a/docs/en/sql-reference/functions/type-conversion-functions.md b/docs/en/sql-reference/functions/type-conversion-functions.md index d82728b9721..9101a99ed88 100644 --- a/docs/en/sql-reference/functions/type-conversion-functions.md +++ b/docs/en/sql-reference/functions/type-conversion-functions.md @@ -14,7 +14,7 @@ ClickHouse has the [same behavior as C++ programs](https://en.cppreference.com/w ## toInt(8\|16\|32\|64\|128\|256) -Converts an input value to the [Int](../../sql-reference/data-types/int-uint.md) data type. This function family includes: +Converts an input value to the [Int](/docs/en/sql-reference/data-types/int-uint.md) data type. This function family includes: - `toInt8(expr)` — Results in the `Int8` data type. - `toInt16(expr)` — Results in the `Int16` data type. @@ -25,7 +25,7 @@ Converts an input value to the [Int](../../sql-reference/data-types/int-uint.md) **Arguments** -- `expr` — [Expression](../../sql-reference/syntax.md#syntax-expressions) returning a number or a string with the decimal representation of a number. Binary, octal, and hexadecimal representations of numbers are not supported. Leading zeroes are stripped. +- `expr` — [Expression](/docs/en/sql-reference/syntax.md/#syntax-expressions) returning a number or a string with the decimal representation of a number. Binary, octal, and hexadecimal representations of numbers are not supported. Leading zeroes are stripped. **Returned value** @@ -33,7 +33,7 @@ Integer value in the `Int8`, `Int16`, `Int32`, `Int64`, `Int128` or `Int256` dat Functions use [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning they truncate fractional digits of numbers. -The behavior of functions for the [NaN and Inf](../../sql-reference/data-types/float.md#data_type-float-nan-inf) arguments is undefined. Remember about [numeric conversions issues](#numeric-conversion-issues), when using the functions. +The behavior of functions for the [NaN and Inf](/docs/en/sql-reference/data-types/float.md/#data_type-float-nan-inf) arguments is undefined. Remember about [numeric conversions issues](#numeric-conversion-issues), when using the functions. **Example** @@ -114,7 +114,7 @@ Result: ## toUInt(8\|16\|32\|64\|256) -Converts an input value to the [UInt](../../sql-reference/data-types/int-uint.md) data type. This function family includes: +Converts an input value to the [UInt](/docs/en/sql-reference/data-types/int-uint.md) data type. This function family includes: - `toUInt8(expr)` — Results in the `UInt8` data type. - `toUInt16(expr)` — Results in the `UInt16` data type. @@ -124,7 +124,7 @@ Converts an input value to the [UInt](../../sql-reference/data-types/int-uint.md **Arguments** -- `expr` — [Expression](../../sql-reference/syntax.md#syntax-expressions) returning a number or a string with the decimal representation of a number. Binary, octal, and hexadecimal representations of numbers are not supported. Leading zeroes are stripped. +- `expr` — [Expression](/docs/en/sql-reference/syntax.md/#syntax-expressions) returning a number or a string with the decimal representation of a number. Binary, octal, and hexadecimal representations of numbers are not supported. Leading zeroes are stripped. **Returned value** @@ -132,7 +132,7 @@ Integer value in the `UInt8`, `UInt16`, `UInt32`, `UInt64` or `UInt256` data typ Functions use [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning they truncate fractional digits of numbers. -The behavior of functions for negative arguments and for the [NaN and Inf](../../sql-reference/data-types/float.md#data_type-float-nan-inf) arguments is undefined. If you pass a string with a negative number, for example `'-32'`, ClickHouse raises an exception. Remember about [numeric conversions issues](#numeric-conversion-issues), when using the functions. +The behavior of functions for negative arguments and for the [NaN and Inf](/docs/en/sql-reference/data-types/float.md/#data_type-float-nan-inf) arguments is undefined. If you pass a string with a negative number, for example `'-32'`, ClickHouse raises an exception. Remember about [numeric conversions issues](#numeric-conversion-issues), when using the functions. **Example** @@ -166,7 +166,111 @@ Result: ## toDate -Alias: `DATE`. +Converts the argument to `Date` data type. + +If the argument is `DateTime` or `DateTime64`, it truncates it, leaving the date component of the DateTime: +```sql +SELECT + now() AS x, + toDate(x) +``` +```response +┌───────────────────x─┬─toDate(now())─┐ +│ 2022-12-30 13:44:17 │ 2022-12-30 │ +└─────────────────────┴───────────────┘ +``` + +If the argument is a string, it is parsed as Date or DateTime. If it was parsed as DateTime, the date component is being used: +```sql +SELECT + toDate('2022-12-30') AS x, + toTypeName(x) +``` +```response +┌──────────x─┬─toTypeName(toDate('2022-12-30'))─┐ +│ 2022-12-30 │ Date │ +└────────────┴──────────────────────────────────┘ + +1 row in set. Elapsed: 0.001 sec. +``` +```sql +SELECT + toDate('2022-12-30 01:02:03') AS x, + toTypeName(x) +``` +```response +┌──────────x─┬─toTypeName(toDate('2022-12-30 01:02:03'))─┐ +│ 2022-12-30 │ Date │ +└────────────┴───────────────────────────────────────────┘ +``` + +If the argument is a number and it looks like a UNIX timestamp (is greater than 65535), it is interpreted as a DateTime, then truncated to Date in the current timezone. The timezone argument can be specified as a second argument of the function. The truncation to Date depends on the timezone: + +```sql +SELECT + now() AS current_time, + toUnixTimestamp(current_time) AS ts, + toDateTime(ts) AS time_Amsterdam, + toDateTime(ts, 'Pacific/Apia') AS time_Samoa, + toDate(time_Amsterdam) AS date_Amsterdam, + toDate(time_Samoa) AS date_Samoa, + toDate(ts) AS date_Amsterdam_2, + toDate(ts, 'Pacific/Apia') AS date_Samoa_2 +``` +```response +Row 1: +────── +current_time: 2022-12-30 13:51:54 +ts: 1672404714 +time_Amsterdam: 2022-12-30 13:51:54 +time_Samoa: 2022-12-31 01:51:54 +date_Amsterdam: 2022-12-30 +date_Samoa: 2022-12-31 +date_Amsterdam_2: 2022-12-30 +date_Samoa_2: 2022-12-31 +``` + +The example above demonstrates how the same UNIX timestamp can be interpreted as different dates in different time zones. + +If the argument is a number and it is smaller than 65536, it is interpreted as the number of days since 1970-01-01 (a UNIX day) and converted to Date. It corresponds to the internal numeric representation of the `Date` data type. Example: + +```sql +SELECT toDate(12345) +``` +```response +┌─toDate(12345)─┐ +│ 2003-10-20 │ +└───────────────┘ +``` + +This conversion does not depend on timezones. + +If the argument does not fit in the range of the Date type, it results in an implementation-defined behavior, that can saturate to the maximum supported date or overflow: +```sql +SELECT toDate(10000000000.) +``` +```response +┌─toDate(10000000000.)─┐ +│ 2106-02-07 │ +└──────────────────────┘ +``` + +The function `toDate` can be also written in alternative forms: + +```sql +SELECT + now() AS time, + toDate(time), + DATE(time), + CAST(time, 'Date') +``` +```response +┌────────────────time─┬─toDate(now())─┬─DATE(now())─┬─CAST(now(), 'Date')─┐ +│ 2022-12-30 13:54:58 │ 2022-12-30 │ 2022-12-30 │ 2022-12-30 │ +└─────────────────────┴───────────────┴─────────────┴─────────────────────┘ +``` + +Have a nice day working with dates and times. ## toDateOrZero @@ -184,7 +288,7 @@ Alias: `DATE`. ## toDate32 -Converts the argument to the [Date32](../../sql-reference/data-types/date32.md) data type. If the value is outside the range returns the border values supported by `Date32`. If the argument has [Date](../../sql-reference/data-types/date.md) type, borders of `Date` are taken into account. +Converts the argument to the [Date32](/docs/en/sql-reference/data-types/date32.md) data type. If the value is outside the range, `toDate32` returns the border values supported by `Date32`. If the argument has [Date](/docs/en/sql-reference/data-types/date.md) type, borders of `Date` are taken into account. **Syntax** @@ -194,13 +298,13 @@ toDate32(expr) **Arguments** -- `expr` — The value. [String](../../sql-reference/data-types/string.md), [UInt32](../../sql-reference/data-types/int-uint.md) or [Date](../../sql-reference/data-types/date.md). +- `expr` — The value. [String](/docs/en/sql-reference/data-types/string.md), [UInt32](/docs/en/sql-reference/data-types/int-uint.md) or [Date](/docs/en/sql-reference/data-types/date.md). **Returned value** - A calendar date. -Type: [Date32](../../sql-reference/data-types/date32.md). +Type: [Date32](/docs/en/sql-reference/data-types/date32.md). **Example** @@ -242,7 +346,7 @@ SELECT toDate32(toDate('1899-01-01')) AS value, toTypeName(value); ## toDate32OrZero -The same as [toDate32](#todate32) but returns the min value of [Date32](../../sql-reference/data-types/date32.md) if invalid argument is received. +The same as [toDate32](#todate32) but returns the min value of [Date32](/docs/en/sql-reference/data-types/date32.md) if an invalid argument is received. **Example** @@ -262,7 +366,7 @@ Result: ## toDate32OrNull -The same as [toDate32](#todate32) but returns `NULL` if invalid argument is received. +The same as [toDate32](#todate32) but returns `NULL` if an invalid argument is received. **Example** @@ -282,7 +386,7 @@ Result: ## toDate32OrDefault -Converts the argument to the [Date32](../../sql-reference/data-types/date32.md) data type. If the value is outside the range returns the lower border value supported by `Date32`. If the argument has [Date](../../sql-reference/data-types/date.md) type, borders of `Date` are taken into account. Returns default value if invalid argument is received. +Converts the argument to the [Date32](/docs/en/sql-reference/data-types/date32.md) data type. If the value is outside the range, `toDate32OrDefault` returns the lower border value supported by `Date32`. If the argument has [Date](/docs/en/sql-reference/data-types/date.md) type, borders of `Date` are taken into account. Returns default value if an invalid argument is received. **Example** @@ -304,7 +408,7 @@ Result: ## toDateTime64 -Converts the argument to the [DateTime64](../../sql-reference/data-types/datetime64.md) data type. +Converts the argument to the [DateTime64](/docs/en/sql-reference/data-types/datetime64.md) data type. **Syntax** @@ -314,7 +418,7 @@ toDateTime64(expr, scale, [timezone]) **Arguments** -- `expr` — The value. [String](../../sql-reference/data-types/string.md), [UInt32](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [DateTime](../../sql-reference/data-types/datetime.md). +- `expr` — The value. [String](/docs/en/sql-reference/data-types/string.md), [UInt32](/docs/en/sql-reference/data-types/int-uint.md), [Float](/docs/en/sql-reference/data-types/float.md) or [DateTime](/docs/en/sql-reference/data-types/datetime.md). - `scale` - Tick size (precision): 10-precision seconds. Valid range: [ 0 : 9 ]. - `timezone` - Time zone of the specified datetime64 object. @@ -322,7 +426,7 @@ toDateTime64(expr, scale, [timezone]) - A calendar date and time of day, with sub-second precision. -Type: [DateTime64](../../sql-reference/data-types/datetime64.md). +Type: [DateTime64](/docs/en/sql-reference/data-types/datetime64.md). **Example** @@ -378,7 +482,7 @@ SELECT toDateTime64('2019-01-01 00:00:00', 3, 'Asia/Istanbul') AS value, toTypeN ## toDecimal(32\|64\|128\|256) -Converts `value` to the [Decimal](../../sql-reference/data-types/decimal.md) data type with precision of `S`. The `value` can be a number or a string. The `S` (scale) parameter specifies the number of decimal places. +Converts `value` to the [Decimal](/docs/en/sql-reference/data-types/decimal.md) data type with precision of `S`. The `value` can be a number or a string. The `S` (scale) parameter specifies the number of decimal places. - `toDecimal32(value, S)` - `toDecimal64(value, S)` @@ -387,7 +491,7 @@ Converts `value` to the [Decimal](../../sql-reference/data-types/decimal.md) dat ## toDecimal(32\|64\|128\|256)OrNull -Converts an input string to a [Nullable(Decimal(P,S))](../../sql-reference/data-types/decimal.md) data type value. This family of functions include: +Converts an input string to a [Nullable(Decimal(P,S))](/docs/en/sql-reference/data-types/decimal.md) data type value. This family of functions includes: - `toDecimal32OrNull(expr, S)` — Results in `Nullable(Decimal32(S))` data type. - `toDecimal64OrNull(expr, S)` — Results in `Nullable(Decimal64(S))` data type. @@ -398,7 +502,7 @@ These functions should be used instead of `toDecimal*()` functions, if you prefe **Arguments** -- `expr` — [Expression](../../sql-reference/syntax.md#syntax-expressions), returns a value in the [String](../../sql-reference/data-types/string.md) data type. ClickHouse expects the textual representation of the decimal number. For example, `'1.111'`. +- `expr` — [Expression](/docs/en/sql-reference/syntax.md/#syntax-expressions), returns a value in the [String](/docs/en/sql-reference/data-types/string.md) data type. ClickHouse expects the textual representation of the decimal number. For example, `'1.111'`. - `S` — Scale, the number of decimal places in the resulting value. **Returned value** @@ -441,7 +545,7 @@ Result: ## toDecimal(32\|64\|128\|256)OrDefault -Converts an input string to a [Decimal(P,S)](../../sql-reference/data-types/decimal.md) data type value. This family of functions include: +Converts an input string to a [Decimal(P,S)](/docs/en/sql-reference/data-types/decimal.md) data type value. This family of functions includes: - `toDecimal32OrDefault(expr, S)` — Results in `Decimal32(S)` data type. - `toDecimal64OrDefault(expr, S)` — Results in `Decimal64(S)` data type. @@ -452,7 +556,7 @@ These functions should be used instead of `toDecimal*()` functions, if you prefe **Arguments** -- `expr` — [Expression](../../sql-reference/syntax.md#syntax-expressions), returns a value in the [String](../../sql-reference/data-types/string.md) data type. ClickHouse expects the textual representation of the decimal number. For example, `'1.111'`. +- `expr` — [Expression](/docs/en/sql-reference/syntax.md/#syntax-expressions), returns a value in the [String](/docs/en/sql-reference/data-types/string.md) data type. ClickHouse expects the textual representation of the decimal number. For example, `'1.111'`. - `S` — Scale, the number of decimal places in the resulting value. **Returned value** @@ -494,7 +598,7 @@ Result: ## toDecimal(32\|64\|128\|256)OrZero -Converts an input value to the [Decimal(P,S)](../../sql-reference/data-types/decimal.md) data type. This family of functions include: +Converts an input value to the [Decimal(P,S)](/docs/en/sql-reference/data-types/decimal.md) data type. This family of functions includes: - `toDecimal32OrZero( expr, S)` — Results in `Decimal32(S)` data type. - `toDecimal64OrZero( expr, S)` — Results in `Decimal64(S)` data type. @@ -505,7 +609,7 @@ These functions should be used instead of `toDecimal*()` functions, if you prefe **Arguments** -- `expr` — [Expression](../../sql-reference/syntax.md#syntax-expressions), returns a value in the [String](../../sql-reference/data-types/string.md) data type. ClickHouse expects the textual representation of the decimal number. For example, `'1.111'`. +- `expr` — [Expression](/docs/en/sql-reference/syntax.md/#syntax-expressions), returns a value in the [String](/docs/en/sql-reference/data-types/string.md) data type. ClickHouse expects the textual representation of the decimal number. For example, `'1.111'`. - `S` — Scale, the number of decimal places in the resulting value. **Returned value** @@ -564,7 +668,7 @@ YYYY-MM-DD hh:mm:ss As an exception, if converting from UInt32, Int32, UInt64, or Int64 numeric types to Date, and if the number is greater than or equal to 65536, the number is interpreted as a Unix timestamp (and not as the number of days) and is rounded to the date. This allows support for the common occurrence of writing ‘toDate(unix_timestamp)’, which otherwise would be an error and would require writing the more cumbersome ‘toDate(toDateTime(unix_timestamp))’. -Conversion between a date and date with time is performed the natural way: by adding a null time or dropping the time. +Conversion between a date and a date with time is performed the natural way: by adding a null time or dropping the time. Conversion between numeric types uses the same rules as assignments between different numeric types in C++. @@ -643,15 +747,15 @@ These functions accept a string and interpret the bytes placed at the beginning ## reinterpretAsString -This function accepts a number or date or date with time, and returns a string containing bytes representing the corresponding value in host order (little endian). Null bytes are dropped from the end. For example, a UInt32 type value of 255 is a string that is one byte long. +This function accepts a number or date or date with time and returns a string containing bytes representing the corresponding value in host order (little endian). Null bytes are dropped from the end. For example, a UInt32 type value of 255 is a string that is one byte long. ## reinterpretAsFixedString -This function accepts a number or date or date with time, and returns a FixedString containing bytes representing the corresponding value in host order (little endian). Null bytes are dropped from the end. For example, a UInt32 type value of 255 is a FixedString that is one byte long. +This function accepts a number or date or date with time and returns a FixedString containing bytes representing the corresponding value in host order (little endian). Null bytes are dropped from the end. For example, a UInt32 type value of 255 is a FixedString that is one byte long. ## reinterpretAsUUID -Accepts 16 bytes string and returns UUID containing bytes representing the corresponding value in network byte order (big-endian). If the string isn't long enough, the function works as if the string is padded with the necessary number of null bytes to the end. If the string longer than 16 bytes, the extra bytes at the end are ignored. +Accepts 16 bytes string and returns UUID containing bytes representing the corresponding value in network byte order (big-endian). If the string isn't long enough, the function works as if the string is padded with the necessary number of null bytes to the end. If the string is longer than 16 bytes, the extra bytes at the end are ignored. **Syntax** @@ -661,11 +765,11 @@ reinterpretAsUUID(fixed_string) **Arguments** -- `fixed_string` — Big-endian byte string. [FixedString](../../sql-reference/data-types/fixedstring.md#fixedstring). +- `fixed_string` — Big-endian byte string. [FixedString](/docs/en/sql-reference/data-types/fixedstring.md/#fixedstring). **Returned value** -- The UUID type value. [UUID](../../sql-reference/data-types/uuid.md#uuid-data-type). +- The UUID type value. [UUID](/docs/en/sql-reference/data-types/uuid.md/#uuid-data-type). **Examples** @@ -718,7 +822,7 @@ reinterpret(x, type) **Arguments** - `x` — Any type. -- `type` — Destination type. [String](../../sql-reference/data-types/string.md). +- `type` — Destination type. [String](/docs/en/sql-reference/data-types/string.md). **Returned value** @@ -757,7 +861,7 @@ x::t **Arguments** - `x` — A value to convert. May be of any type. -- `T` — The name of the target data type. [String](../../sql-reference/data-types/string.md). +- `T` — The name of the target data type. [String](/docs/en/sql-reference/data-types/string.md). - `t` — The target data type. **Returned value** @@ -806,9 +910,9 @@ Result: └─────────────────────┴─────────────────────┴────────────┴─────────────────────┴───────────────────────────┘ ``` -Conversion to FixedString(N) only works for arguments of type [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md). +Conversion to FixedString(N) only works for arguments of type [String](/docs/en/sql-reference/data-types/string.md) or [FixedString](/docs/en/sql-reference/data-types/fixedstring.md). -Type conversion to [Nullable](../../sql-reference/data-types/nullable.md) and back is supported. +Type conversion to [Nullable](/docs/en/sql-reference/data-types/nullable.md) and back is supported. **Example** @@ -844,7 +948,7 @@ Result: **See also** -- [cast_keep_nullable](../../operations/settings/settings.md#cast_keep_nullable) setting +- [cast_keep_nullable](/docs/en/operations/settings/settings.md/#cast_keep_nullable) setting ## accurateCast(x, T) @@ -882,7 +986,7 @@ Code: 70. DB::Exception: Received from localhost:9000. DB::Exception: Value in c ## accurateCastOrNull(x, T) -Converts input value `x` to the specified data type `T`. Always returns [Nullable](../../sql-reference/data-types/nullable.md) type and returns [NULL](../../sql-reference/syntax.md#null-literal) if the casted value is not representable in the target type. +Converts input value `x` to the specified data type `T`. Always returns [Nullable](/docs/en/sql-reference/data-types/nullable.md) type and returns [NULL](/docs/en/sql-reference/syntax.md/#null-literal) if the casted value is not representable in the target type. **Syntax** @@ -991,7 +1095,7 @@ Result: ## toInterval(Year\|Quarter\|Month\|Week\|Day\|Hour\|Minute\|Second) -Converts a Number type argument to an [Interval](../../sql-reference/data-types/special-data-types/interval.md) data type. +Converts a Number type argument to an [Interval](/docs/en/sql-reference/data-types/special-data-types/interval.md) data type. **Syntax** @@ -1039,7 +1143,7 @@ Result: ## parseDateTimeBestEffort ## parseDateTime32BestEffort -Converts a date and time in the [String](../../sql-reference/data-types/string.md) representation to [DateTime](../../sql-reference/data-types/datetime.md#data_type-datetime) data type. +Converts a date and time in the [String](/docs/en/sql-reference/data-types/string.md) representation to [DateTime](/docs/en/sql-reference/data-types/datetime.md/#data_type-datetime) data type. The function parses [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601), [RFC 1123 - 5.2.14 RFC-822 Date and Time Specification](https://tools.ietf.org/html/rfc1123#page-55), ClickHouse’s and some other date and time formats. @@ -1051,8 +1155,8 @@ parseDateTimeBestEffort(time_string [, time_zone]) **Arguments** -- `time_string` — String containing a date and time to convert. [String](../../sql-reference/data-types/string.md). -- `time_zone` — Time zone. The function parses `time_string` according to the time zone. [String](../../sql-reference/data-types/string.md). +- `time_string` — String containing a date and time to convert. [String](/docs/en/sql-reference/data-types/string.md). +- `time_zone` — Time zone. The function parses `time_string` according to the time zone. [String](/docs/en/sql-reference/data-types/string.md). **Supported non-standard formats** @@ -1175,7 +1279,7 @@ Same as [parseDateTimeBestEffortUS](#parsedatetimebesteffortUS) function except ## parseDateTime64BestEffort -Same as [parseDateTimeBestEffort](#parsedatetimebesteffort) function but also parse milliseconds and microseconds and returns [DateTime](../../sql-reference/functions/type-conversion-functions.md#data_type-datetime) data type. +Same as [parseDateTimeBestEffort](#parsedatetimebesteffort) function but also parse milliseconds and microseconds and returns [DateTime](/docs/en/sql-reference/functions/type-conversion-functions.md/#data_type-datetime) data type. **Syntax** @@ -1185,13 +1289,13 @@ parseDateTime64BestEffort(time_string [, precision [, time_zone]]) **Parameters** -- `time_string` — String containing a date or date with time to convert. [String](../../sql-reference/data-types/string.md). -- `precision` — Required precision. `3` — for milliseconds, `6` — for microseconds. Default — `3`. Optional. [UInt8](../../sql-reference/data-types/int-uint.md). -- `time_zone` — [Timezone](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone). The function parses `time_string` according to the timezone. Optional. [String](../../sql-reference/data-types/string.md). +- `time_string` — String containing a date or date with time to convert. [String](/docs/en/sql-reference/data-types/string.md). +- `precision` — Required precision. `3` — for milliseconds, `6` — for microseconds. Default — `3`. Optional. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `time_zone` — [Timezone](/docs/en/operations/server-configuration-parameters/settings.md/#server_configuration_parameters-timezone). The function parses `time_string` according to the timezone. Optional. [String](/docs/en/sql-reference/data-types/string.md). **Returned value** -- `time_string` converted to the [DateTime](../../sql-reference/data-types/datetime.md) data type. +- `time_string` converted to the [DateTime](/docs/en/sql-reference/data-types/datetime.md) data type. **Examples** @@ -1242,7 +1346,7 @@ Same as for [parseDateTime64BestEffort](#parsedatetime64besteffort), except that ## toLowCardinality -Converts input parameter to the [LowCardinality](../../sql-reference/data-types/lowcardinality.md) version of same data type. +Converts input parameter to the [LowCardinality](/docs/en/sql-reference/data-types/lowcardinality.md) version of same data type. To convert data from the `LowCardinality` data type use the [CAST](#type_conversion_function-cast) function. For example, `CAST(x as String)`. @@ -1254,7 +1358,7 @@ toLowCardinality(expr) **Arguments** -- `expr` — [Expression](../../sql-reference/syntax.md#syntax-expressions) resulting in one of the [supported data types](../../sql-reference/data-types/index.md#data_types). +- `expr` — [Expression](/docs/en/sql-reference/syntax.md/#syntax-expressions) resulting in one of the [supported data types](/docs/en/sql-reference/data-types/index.md/#data_types). **Returned values** @@ -1388,7 +1492,7 @@ formatRow(format, x, y, ...) **Arguments** -- `format` — Text format. For example, [CSV](../../interfaces/formats.md#csv), [TSV](../../interfaces/formats.md#tabseparated). +- `format` — Text format. For example, [CSV](/docs/en/interfaces/formats.md/#csv), [TSV](/docs/en/interfaces/formats.md/#tabseparated). - `x`,`y`, ... — Expressions. **Returned value** @@ -1429,7 +1533,7 @@ formatRowNoNewline(format, x, y, ...) **Arguments** -- `format` — Text format. For example, [CSV](../../interfaces/formats.md#csv), [TSV](../../interfaces/formats.md#tabseparated). +- `format` — Text format. For example, [CSV](/docs/en/interfaces/formats.md/#csv), [TSV](/docs/en/interfaces/formats.md/#tabseparated). - `x`,`y`, ... — Expressions. **Returned value** @@ -1457,7 +1561,7 @@ Result: ## snowflakeToDateTime -Extracts time from [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID) as [DateTime](../data-types/datetime.md) format. +Extracts time from [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID) as [DateTime](/docs/en/sql-reference/data-types/datetime.md) format. **Syntax** @@ -1467,12 +1571,12 @@ snowflakeToDateTime(value [, time_zone]) **Parameters** -- `value` — Snowflake ID. [Int64](../data-types/int-uint.md). -- `time_zone` — [Timezone](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone). The function parses `time_string` according to the timezone. Optional. [String](../../sql-reference/data-types/string.md). +- `value` — Snowflake ID. [Int64](/docs/en/sql-reference/data-types/int-uint.md). +- `time_zone` — [Timezone](/docs/en/operations/server-configuration-parameters/settings.md/#server_configuration_parameters-timezone). The function parses `time_string` according to the timezone. Optional. [String](/docs/en/sql-reference/data-types/string.md). **Returned value** -- Input value converted to the [DateTime](../data-types/datetime.md) data type. +- Input value converted to the [DateTime](/docs/en/sql-reference/data-types/datetime.md) data type. **Example** @@ -1493,7 +1597,7 @@ Result: ## snowflakeToDateTime64 -Extracts time from [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID) as [DateTime64](../data-types/datetime64.md) format. +Extracts time from [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID) as [DateTime64](/docs/en/sql-reference/data-types/datetime64.md) format. **Syntax** @@ -1503,12 +1607,12 @@ snowflakeToDateTime64(value [, time_zone]) **Parameters** -- `value` — Snowflake ID. [Int64](../data-types/int-uint.md). -- `time_zone` — [Timezone](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone). The function parses `time_string` according to the timezone. Optional. [String](../../sql-reference/data-types/string.md). +- `value` — Snowflake ID. [Int64](/docs/en/sql-reference/data-types/int-uint.md). +- `time_zone` — [Timezone](/docs/en/operations/server-configuration-parameters/settings.md/#server_configuration_parameters-timezone). The function parses `time_string` according to the timezone. Optional. [String](/docs/en/sql-reference/data-types/string.md). **Returned value** -- Input value converted to the [DateTime64](../data-types/datetime64.md) data type. +- Input value converted to the [DateTime64](/docs/en/sql-reference/data-types/datetime64.md) data type. **Example** @@ -1529,7 +1633,7 @@ Result: ## dateTimeToSnowflake -Converts [DateTime](../data-types/datetime.md) value to the first [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID) at the giving time. +Converts [DateTime](/docs/en/sql-reference/data-types/datetime.md) value to the first [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID) at the giving time. **Syntax** @@ -1539,11 +1643,11 @@ dateTimeToSnowflake(value) **Parameters** -- `value` — Date and time. [DateTime](../../sql-reference/data-types/datetime.md). +- `value` — Date and time. [DateTime](/docs/en/sql-reference/data-types/datetime.md). **Returned value** -- Input value converted to the [Int64](../data-types/int-uint.md) data type as the first Snowflake ID at that time. +- Input value converted to the [Int64](/docs/en/sql-reference/data-types/int-uint.md) data type as the first Snowflake ID at that time. **Example** @@ -1563,7 +1667,7 @@ Result: ## dateTime64ToSnowflake -Convert [DateTime64](../data-types/datetime64.md) to the first [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID) at the giving time. +Convert [DateTime64](/docs/en/sql-reference/data-types/datetime64.md) to the first [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID) at the giving time. **Syntax** @@ -1573,11 +1677,11 @@ dateTime64ToSnowflake(value) **Parameters** -- `value` — Date and time. [DateTime64](../../sql-reference/data-types/datetime64.md). +- `value` — Date and time. [DateTime64](/docs/en/sql-reference/data-types/datetime64.md). **Returned value** -- Input value converted to the [Int64](../data-types/int-uint.md) data type as the first Snowflake ID at that time. +- Input value converted to the [Int64](/docs/en/sql-reference/data-types/int-uint.md) data type as the first Snowflake ID at that time. **Example** diff --git a/docs/en/sql-reference/statements/select/from.md b/docs/en/sql-reference/statements/select/from.md index 3013a173c16..b751384cb72 100644 --- a/docs/en/sql-reference/statements/select/from.md +++ b/docs/en/sql-reference/statements/select/from.md @@ -21,12 +21,11 @@ Subquery is another `SELECT` query that may be specified in parenthesis inside ` When `FINAL` is specified, ClickHouse fully merges the data before returning the result and thus performs all data transformations that happen during merges for the given table engine. -It is applicable when selecting data from tables that use the [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md)-engine family. Also supported for: +It is applicable when selecting data from ReplacingMergeTree, SummingMergeTree, AggregatingMergeTree, CollapsingMergeTree and VersionedCollapsingMergeTree tables. -- [Replicated](../../../engines/table-engines/mergetree-family/replication.md) versions of `MergeTree` engines. -- [View](../../../engines/table-engines/special/view.md), [Buffer](../../../engines/table-engines/special/buffer.md), [Distributed](../../../engines/table-engines/special/distributed.md), and [MaterializedView](../../../engines/table-engines/special/materializedview.md) engines that operate over other engines, provided they were created over `MergeTree`-engine tables. +`SELECT` queries with `FINAL` are executed in parallel. The [max_final_threads](../../../operations/settings/settings.md#max-final-threads) setting limits the number of threads used. -Now `SELECT` queries with `FINAL` are executed in parallel and slightly faster. But there are drawbacks (see below). The [max_final_threads](../../../operations/settings/settings.md#max-final-threads) setting limits the number of threads used. +There are drawbacks to using `FINAL` (see below). ### Drawbacks diff --git a/docs/zh/engines/table-engines/log-family/index.md b/docs/zh/engines/table-engines/log-family/index.md index 56776522445..1b24984f75f 100644 --- a/docs/zh/engines/table-engines/log-family/index.md +++ b/docs/zh/engines/table-engines/log-family/index.md @@ -11,7 +11,7 @@ sidebar_position: 29 这系列的引擎有: - [StripeLog](stripelog.md) -- [日志](log.md) +- [Log](log.md) - [TinyLog](tinylog.md) ## 共同属性 {#table_engines-log-engine-family-common-properties} diff --git a/programs/benchmark/Benchmark.cpp b/programs/benchmark/Benchmark.cpp index 017b28fe082..26099b352a3 100644 --- a/programs/benchmark/Benchmark.cpp +++ b/programs/benchmark/Benchmark.cpp @@ -683,7 +683,7 @@ int mainEntryClickHouseBenchmark(int argc, char ** argv) ("confidence", value()->default_value(5), "set the level of confidence for T-test [0=80%, 1=90%, 2=95%, 3=98%, 4=99%, 5=99.5%(default)") ("query_id", value()->default_value(""), "") ("max-consecutive-errors", value()->default_value(0), "set number of allowed consecutive errors") - ("continue_on_errors", "continue testing even if a query fails") + ("ignore-error,continue_on_errors", "continue testing even if a query fails") ("reconnect", "establish new connection for every query") ("client-side-time", "display the time including network communication instead of server-side time; note that for server versions before 22.8 we always display client-side time") ; @@ -738,7 +738,7 @@ int mainEntryClickHouseBenchmark(int argc, char ** argv) options["query_id"].as(), options["query"].as(), options["max-consecutive-errors"].as(), - options.count("continue_on_errors"), + options.count("ignore-error"), options.count("reconnect"), options.count("client-side-time"), print_stacktrace, diff --git a/programs/client/CMakeLists.txt b/programs/client/CMakeLists.txt index d212da59908..e160355ef7b 100644 --- a/programs/client/CMakeLists.txt +++ b/programs/client/CMakeLists.txt @@ -13,6 +13,10 @@ set (CLICKHOUSE_CLIENT_LINK string_utils ) +if (TARGET ch_rust::skim) + list(APPEND CLICKHOUSE_CLIENT_LINK PRIVATE ch_rust::skim) +endif() + # Always use internal readpassphrase list(APPEND CLICKHOUSE_CLIENT_LINK PRIVATE readpassphrase) diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp index 9923b8b365a..af1a019e1f8 100644 --- a/programs/client/Client.cpp +++ b/programs/client/Client.cpp @@ -30,9 +30,10 @@ #include #include -#include -#include #include +#include +#include +#include #include #include @@ -41,6 +42,8 @@ #include #include +#include + #include #include @@ -827,6 +830,20 @@ bool Client::processWithFuzzing(const String & full_query) WriteBufferFromOStream ast_buf(std::cout, 4096); formatAST(*query, ast_buf, false /*highlight*/); ast_buf.next(); + if (const auto * insert = query->as()) + { + /// For inserts with data it's really useful to have the data itself available in the logs, as formatAST doesn't print it + if (insert->hasInlinedData()) + { + String bytes; + { + auto read_buf = getReadBufferFromASTInsertQuery(query); + WriteBufferFromString write_buf(bytes); + copyData(*read_buf, write_buf); + } + std::cout << std::endl << bytes; + } + } std::cout << std::endl << std::endl; try diff --git a/programs/install/Install.cpp b/programs/install/Install.cpp index de85572d5c6..d568012bb26 100644 --- a/programs/install/Install.cpp +++ b/programs/install/Install.cpp @@ -1051,18 +1051,12 @@ namespace return pid; } - int stop(const fs::path & pid_file, bool force, bool do_not_kill, unsigned max_tries) + bool sendSignalAndWaitForStop(const fs::path & pid_file, int signal, unsigned max_tries, unsigned wait_ms, const char * signal_name) { - if (force && do_not_kill) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Specified flags are incompatible"); - int pid = isRunning(pid_file); if (!pid) - return 0; - - int signal = force ? SIGKILL : SIGTERM; - const char * signal_name = force ? "kill" : "terminate"; + return true; if (0 == kill(pid, signal)) fmt::print("Sent {} signal to process with pid {}.\n", signal_name, pid); @@ -1078,46 +1072,51 @@ namespace fmt::print("Server stopped\n"); break; } - sleepForSeconds(1); + sleepForMilliseconds(wait_ms); } - if (try_num == max_tries) + return try_num < max_tries; + } + + int stop(const fs::path & pid_file, bool force, bool do_not_kill, unsigned max_tries) + { + if (force && do_not_kill) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Specified flags are incompatible"); + + int signal = force ? SIGKILL : SIGTERM; + const char * signal_name = force ? "kill" : "terminate"; + + if (sendSignalAndWaitForStop(pid_file, signal, max_tries, 1000, signal_name)) + return 0; + + int pid = isRunning(pid_file); + if (!pid) + return 0; + + if (do_not_kill) { - if (do_not_kill) - { - fmt::print("Process (pid = {}) is still running. Will not try to kill it.\n", pid); - return 1; - } - - fmt::print("Will terminate forcefully (pid = {}).\n", pid); - if (0 == kill(pid, 9)) - fmt::print("Sent kill signal (pid = {}).\n", pid); - else - throwFromErrno("Cannot send kill signal", ErrorCodes::SYSTEM_ERROR); - - /// Wait for the process (100 seconds). - constexpr size_t num_kill_check_tries = 1000; - constexpr size_t kill_check_delay_ms = 100; - for (size_t i = 0; i < num_kill_check_tries; ++i) - { - fmt::print("Waiting for server to be killed\n"); - if (!isRunning(pid_file)) - { - fmt::print("Server exited\n"); - break; - } - sleepForMilliseconds(kill_check_delay_ms); - } - - if (isRunning(pid_file)) - { - throw Exception(ErrorCodes::CANNOT_KILL, - "The server process still exists after {} tries (delay: {} ms)", - num_kill_check_tries, kill_check_delay_ms); - } + fmt::print("Process (pid = {}) is still running. Will not try to kill it.\n", pid); + return 1; } - return 0; + /// Send termination signal again, the server will receive it and immediately terminate. + fmt::print("Will send the termination signal again to force the termination (pid = {}).\n", pid); + if (sendSignalAndWaitForStop(pid_file, signal, std::min(10U, max_tries), 1000, signal_name)) + return 0; + + /// Send kill signal. Total wait is 100 seconds. + constexpr size_t num_kill_check_tries = 1000; + constexpr size_t kill_check_delay_ms = 100; + fmt::print("Will terminate forcefully (pid = {}).\n", pid); + if (sendSignalAndWaitForStop(pid_file, SIGKILL, num_kill_check_tries, kill_check_delay_ms, signal_name)) + return 0; + + if (!isRunning(pid_file)) + return 0; + + throw Exception(ErrorCodes::CANNOT_KILL, + "The server process still exists after {} tries (delay: {} ms)", + num_kill_check_tries, kill_check_delay_ms); } } diff --git a/programs/local/CMakeLists.txt b/programs/local/CMakeLists.txt index ad4406156c4..7366f8685a4 100644 --- a/programs/local/CMakeLists.txt +++ b/programs/local/CMakeLists.txt @@ -18,6 +18,10 @@ if(NOT CLICKHOUSE_ONE_SHARED) target_link_libraries(clickhouse-local-lib PRIVATE clickhouse-server-lib) endif() +if (TARGET ch_rust::skim) + target_link_libraries(clickhouse-local-lib PRIVATE ch_rust::skim) +endif() + # Always use internal readpassphrase target_link_libraries(clickhouse-local-lib PRIVATE readpassphrase) diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index 1614fb1a8b4..8ce87558630 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -207,7 +207,7 @@ void LocalServer::tryInitPath() global_context->setPath(path); - global_context->setTemporaryStorage(path + "tmp", "", 0); + global_context->setTemporaryStoragePath(path + "tmp/", 0); global_context->setFlagsPath(path + "flags"); global_context->setUserFilesPath(""); // user's files are everywhere diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 08452c0553f..429947c587e 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -70,6 +70,8 @@ #include #include #include +#include +#include #include #include #include "MetricsTransmitter.h" @@ -203,46 +205,6 @@ int mainEntryClickHouseServer(int argc, char ** argv) namespace { -void setupTmpPath(Poco::Logger * log, const std::string & path) -try -{ - LOG_DEBUG(log, "Setting up {} to store temporary data in it", path); - - fs::create_directories(path); - - /// Clearing old temporary files. - fs::directory_iterator dir_end; - size_t unknown_files = 0; - for (fs::directory_iterator it(path); it != dir_end; ++it) - { - if (it->is_regular_file() && startsWith(it->path().filename(), "tmp")) - { - LOG_DEBUG(log, "Removing old temporary file {}", it->path().string()); - fs::remove(it->path()); - } - else - { - unknown_files++; - if (unknown_files < 100) - LOG_DEBUG(log, "Found unknown {} {} in temporary path", - it->is_regular_file() ? "file" : (it->is_directory() ? "directory" : "element"), - it->path().string()); - } - } - - if (unknown_files) - LOG_DEBUG(log, "Found {} unknown files in temporary path", unknown_files); -} -catch (...) -{ - DB::tryLogCurrentException( - log, - fmt::format( - "Caught exception while setup temporary path: {}. It is ok to skip this exception as cleaning old temporary files is not " - "necessary", - path)); -} - size_t waitServersToFinish(std::vector & servers, size_t seconds_to_wait) { const size_t sleep_max_ms = 1000 * seconds_to_wait; @@ -715,6 +677,8 @@ try registerDisks(/* global_skip_access_check= */ false); registerFormats(); registerRemoteFileMetadatas(); + registerSchedulerNodes(); + registerResourceManagers(); CurrentMetrics::set(CurrentMetrics::Revision, ClickHouseRevision::getVersionRevision()); CurrentMetrics::set(CurrentMetrics::VersionInteger, ClickHouseRevision::getVersionInteger()); @@ -739,6 +703,13 @@ try global_context->addWarningMessage("Server was built with sanitizer. It will work slowly."); #endif + const auto memory_amount = getMemoryAmount(); + + LOG_INFO(log, "Available RAM: {}; physical cores: {}; logical cores: {}.", + formatReadableSizeWithBinarySuffix(memory_amount), + getNumberOfPhysicalCPUCores(), // on ARM processors it can show only enabled at current moment cores + std::thread::hardware_concurrency()); + sanityChecks(*this); // Initialize global thread pool. Do it before we fetch configs from zookeeper @@ -812,8 +783,6 @@ try Settings::checkNoSettingNamesAtTopLevel(config(), config_path); - const auto memory_amount = getMemoryAmount(); - #if defined(OS_LINUX) std::string executable_path = getExecutablePath(); @@ -1009,13 +978,21 @@ try LOG_TRACE(log, "Initialized DateLUT with time zone '{}'.", DateLUT::instance().getTimeZone()); /// Storage with temporary data for processing of heavy queries. + if (auto temporary_policy = config().getString("tmp_policy", ""); !temporary_policy.empty()) + { + size_t max_size = config().getUInt64("max_temporary_data_on_disk_size", 0); + global_context->setTemporaryStoragePolicy(temporary_policy, max_size); + } + else if (auto temporary_cache = config().getString("temporary_data_in_cache", ""); !temporary_cache.empty()) + { + size_t max_size = config().getUInt64("max_temporary_data_on_disk_size", 0); + global_context->setTemporaryStorageInCache(temporary_cache, max_size); + } + else { std::string temporary_path = config().getString("tmp_path", path / "tmp/"); - std::string temporary_policy = config().getString("tmp_policy", ""); size_t max_size = config().getUInt64("max_temporary_data_on_disk_size", 0); - const VolumePtr & volume = global_context->setTemporaryStorage(temporary_path, temporary_policy, max_size); - for (const DiskPtr & disk : volume->getDisks()) - setupTmpPath(log, disk->getPath()); + global_context->setTemporaryStoragePath(temporary_path, max_size); } /** Directory with 'flags': files indicating temporary settings for the server set by system administrator. @@ -1072,8 +1049,8 @@ try bool continue_if_corrupted = config().getBool("merge_tree_metadata_cache.continue_if_corrupted", false); try { - LOG_DEBUG( - log, "Initializing merge tree metadata cache lru_cache_size:{} continue_if_corrupted:{}", size, continue_if_corrupted); + LOG_DEBUG(log, "Initializing MergeTree metadata cache, lru_cache_size: {} continue_if_corrupted: {}", + ReadableSize(size), continue_if_corrupted); global_context->initializeMergeTreeMetadataCache(path_str + "/" + "rocksdb", size); } catch (...) @@ -1289,6 +1266,11 @@ try global_context->getDistributedSchedulePool().increaseThreadsCount(new_pool_size); } + if (config->has("resources")) + { + global_context->getResourceManager()->updateConfiguration(*config); + } + if (!initial_loading) { /// We do not load ZooKeeper configuration on the first config loading @@ -1417,7 +1399,7 @@ try } catch (...) { - tryLogCurrentException(log); + tryLogCurrentException(log, "Caught exception while setting up access control."); throw; } @@ -1750,13 +1732,6 @@ try main_config_reloader->start(); access_control.startPeriodicReloading(); - { - LOG_INFO(log, "Available RAM: {}; physical cores: {}; logical cores: {}.", - formatReadableSizeWithBinarySuffix(memory_amount), - getNumberOfPhysicalCPUCores(), // on ARM processors it can show only enabled at current moment cores - std::thread::hardware_concurrency()); - } - /// try to load dictionaries immediately, throw on error and die try { diff --git a/programs/server/config.d/graphite.xml b/programs/server/config.d/graphite.xml new file mode 120000 index 00000000000..69a0411e243 --- /dev/null +++ b/programs/server/config.d/graphite.xml @@ -0,0 +1 @@ +../../../tests/config/config.d/graphite.xml \ No newline at end of file diff --git a/rust/CMakeLists.txt b/rust/CMakeLists.txt index bf62fcbb151..1f11423a557 100644 --- a/rust/CMakeLists.txt +++ b/rust/CMakeLists.txt @@ -39,5 +39,21 @@ function(clickhouse_import_crate) corrosion_import_crate(NO_STD ${ARGN}) endfunction() -add_subdirectory (BLAKE3) -add_subdirectory (skim) +# Add crate from the build directory. +# +# Our crates has configuration files: +# - config for cargo (see config.toml.in) +# - and possibly config for build (build.rs.in) +# +# And to avoid overlaps different builds for one source directory, crate will +# be copied from source directory to the binary directory. +file(COPY ".cargo" DESTINATION "${CMAKE_CURRENT_BINARY_DIR}") +function(add_rust_subdirectory src) + set(dst "${CMAKE_CURRENT_BINARY_DIR}/${src}") + message(STATUS "Copy ${src} to ${dst}") + file(COPY "${src}" DESTINATION "${CMAKE_CURRENT_BINARY_DIR}") + add_subdirectory("${dst}" "${dst}") +endfunction() + +add_rust_subdirectory (BLAKE3) +add_rust_subdirectory (skim) diff --git a/rust/skim/build.rs.in b/rust/skim/build.rs.in index 0135c93222f..f0dd49e4130 100644 --- a/rust/skim/build.rs.in +++ b/rust/skim/build.rs.in @@ -5,4 +5,5 @@ fn main() { } build.compile("skim"); println!("cargo:rerun-if-changed=src/lib.rs"); + println!("cargo:rerun-if-changed=.cargo/config.toml"); } diff --git a/rust/skim/include/skim.h b/rust/skim/include/skim.h index 12cd257567b..8148474eba3 100644 --- a/rust/skim/include/skim.h +++ b/rust/skim/include/skim.h @@ -87,4 +87,4 @@ private: } // namespace cxxbridge1 } // namespace rust -::rust::String skim(::std::vector<::std::string> const &words); +::rust::String skim(::std::string const &prefix, ::std::vector<::std::string> const &words); diff --git a/rust/skim/src/lib.rs b/rust/skim/src/lib.rs index 29160329287..90f39cc8382 100644 --- a/rust/skim/src/lib.rs +++ b/rust/skim/src/lib.rs @@ -5,7 +5,7 @@ use cxx::{CxxString, CxxVector}; #[cxx::bridge] mod ffi { extern "Rust" { - fn skim(words: &CxxVector) -> Result; + fn skim(prefix: &CxxString, words: &CxxVector) -> Result; } } @@ -18,7 +18,7 @@ impl SkimItem for Item { } } -fn skim(words: &CxxVector) -> Result { +fn skim(prefix: &CxxString, words: &CxxVector) -> Result { // Let's check is terminal available. To avoid panic. if let Err(err) = TermInfo::from_env() { return Err(format!("{}", err)); @@ -26,6 +26,7 @@ fn skim(words: &CxxVector) -> Result { let options = SkimOptionsBuilder::default() .height(Some("30%")) + .query(Some(prefix.to_str().unwrap())) .tac(true) .tiebreak(Some("-score".to_string())) .build() diff --git a/src/Access/SettingsProfileElement.cpp b/src/Access/SettingsProfileElement.cpp index a4f3e81ab30..4eb5c0b4152 100644 --- a/src/Access/SettingsProfileElement.cpp +++ b/src/Access/SettingsProfileElement.cpp @@ -75,6 +75,10 @@ void SettingsProfileElement::init(const ASTSettingsProfileElement & ast, const A } } +bool SettingsProfileElement::isConstraint() const +{ + return this->writability || !this->min_value.isNull() || !this->max_value.isNull(); +} std::shared_ptr SettingsProfileElement::toAST() const { @@ -213,7 +217,7 @@ SettingsConstraints SettingsProfileElements::toSettingsConstraints(const AccessC { SettingsConstraints res{access_control}; for (const auto & elem : *this) - if (!elem.setting_name.empty() && elem.setting_name != ALLOW_BACKUP_SETTING_NAME) + if (!elem.setting_name.empty() && elem.isConstraint() && elem.setting_name != ALLOW_BACKUP_SETTING_NAME) res.set( elem.setting_name, elem.min_value, diff --git a/src/Access/SettingsProfileElement.h b/src/Access/SettingsProfileElement.h index c4dcf4d83a7..7f9379c1e47 100644 --- a/src/Access/SettingsProfileElement.h +++ b/src/Access/SettingsProfileElement.h @@ -44,6 +44,8 @@ struct SettingsProfileElement std::shared_ptr toAST() const; std::shared_ptr toASTWithNames(const AccessControl & access_control) const; + bool isConstraint() const; + private: void init(const ASTSettingsProfileElement & ast, const AccessControl * access_control); }; diff --git a/src/AggregateFunctions/AggregateFunctionAggThrow.cpp b/src/AggregateFunctions/AggregateFunctionAggThrow.cpp index 432b1f39f84..359c6051abb 100644 --- a/src/AggregateFunctions/AggregateFunctionAggThrow.cpp +++ b/src/AggregateFunctions/AggregateFunctionAggThrow.cpp @@ -49,14 +49,16 @@ private: public: AggregateFunctionThrow(const DataTypes & argument_types_, const Array & parameters_, Float64 throw_probability_) - : IAggregateFunctionDataHelper(argument_types_, parameters_), throw_probability(throw_probability_) {} + : IAggregateFunctionDataHelper(argument_types_, parameters_, createResultType()) + , throw_probability(throw_probability_) + {} String getName() const override { return "aggThrow"; } - DataTypePtr getReturnType() const override + static DataTypePtr createResultType() { return std::make_shared(); } diff --git a/src/AggregateFunctions/AggregateFunctionAnalysisOfVariance.h b/src/AggregateFunctions/AggregateFunctionAnalysisOfVariance.h index e891fb191f6..da060ceb18e 100644 --- a/src/AggregateFunctions/AggregateFunctionAnalysisOfVariance.h +++ b/src/AggregateFunctions/AggregateFunctionAnalysisOfVariance.h @@ -37,10 +37,10 @@ class AggregateFunctionAnalysisOfVariance final : public IAggregateFunctionDataH { public: explicit AggregateFunctionAnalysisOfVariance(const DataTypes & arguments, const Array & params) - : IAggregateFunctionDataHelper(arguments, params) + : IAggregateFunctionDataHelper(arguments, params, createResultType()) {} - DataTypePtr getReturnType() const override + DataTypePtr createResultType() const { DataTypes types {std::make_shared>(), std::make_shared>() }; Strings names {"f_statistic", "p_value"}; diff --git a/src/AggregateFunctions/AggregateFunctionArgMinMax.h b/src/AggregateFunctions/AggregateFunctionArgMinMax.h index decb572b019..568b70fe77e 100644 --- a/src/AggregateFunctions/AggregateFunctionArgMinMax.h +++ b/src/AggregateFunctions/AggregateFunctionArgMinMax.h @@ -38,7 +38,6 @@ template class AggregateFunctionArgMinMax final : public IAggregateFunctionDataHelper> { private: - const DataTypePtr & type_res; const DataTypePtr & type_val; const SerializationPtr serialization_res; const SerializationPtr serialization_val; @@ -47,10 +46,9 @@ private: public: AggregateFunctionArgMinMax(const DataTypePtr & type_res_, const DataTypePtr & type_val_) - : Base({type_res_, type_val_}, {}) - , type_res(this->argument_types[0]) + : Base({type_res_, type_val_}, {}, type_res_) , type_val(this->argument_types[1]) - , serialization_res(type_res->getDefaultSerialization()) + , serialization_res(type_res_->getDefaultSerialization()) , serialization_val(type_val->getDefaultSerialization()) { if (!type_val->isComparable()) @@ -63,11 +61,6 @@ public: return StringRef(Data::ValueData_t::name()) == StringRef("min") ? "argMin" : "argMax"; } - DataTypePtr getReturnType() const override - { - return type_res; - } - void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override { if (this->data(place).value.changeIfBetter(*columns[1], row_num, arena)) diff --git a/src/AggregateFunctions/AggregateFunctionArray.h b/src/AggregateFunctions/AggregateFunctionArray.h index c6e29e77318..c0e676c33e7 100644 --- a/src/AggregateFunctions/AggregateFunctionArray.h +++ b/src/AggregateFunctions/AggregateFunctionArray.h @@ -30,7 +30,7 @@ private: public: AggregateFunctionArray(AggregateFunctionPtr nested_, const DataTypes & arguments, const Array & params_) - : IAggregateFunctionHelper(arguments, params_) + : IAggregateFunctionHelper(arguments, params_, createResultType(nested_)) , nested_func(nested_), num_arguments(arguments.size()) { assert(parameters == nested_func->getParameters()); @@ -44,9 +44,9 @@ public: return nested_func->getName() + "Array"; } - DataTypePtr getReturnType() const override + static DataTypePtr createResultType(const AggregateFunctionPtr & nested_) { - return nested_func->getReturnType(); + return nested_->getResultType(); } const IAggregateFunction & getBaseAggregateFunctionWithSameStateRepresentation() const override diff --git a/src/AggregateFunctions/AggregateFunctionAvg.h b/src/AggregateFunctions/AggregateFunctionAvg.h index ee46a40023d..a86c7d042fc 100644 --- a/src/AggregateFunctions/AggregateFunctionAvg.h +++ b/src/AggregateFunctions/AggregateFunctionAvg.h @@ -10,6 +10,7 @@ #include #include #include +#include #include "config.h" @@ -83,10 +84,20 @@ public: using Fraction = AvgFraction; explicit AggregateFunctionAvgBase(const DataTypes & argument_types_, - UInt32 num_scale_ = 0, UInt32 denom_scale_ = 0) - : Base(argument_types_, {}), num_scale(num_scale_), denom_scale(denom_scale_) {} + UInt32 num_scale_ = 0, UInt32 denom_scale_ = 0) + : Base(argument_types_, {}, createResultType()) + , num_scale(num_scale_) + , denom_scale(denom_scale_) + {} - DataTypePtr getReturnType() const override { return std::make_shared>(); } + AggregateFunctionAvgBase(const DataTypes & argument_types_, const DataTypePtr & result_type_, + UInt32 num_scale_ = 0, UInt32 denom_scale_ = 0) + : Base(argument_types_, {}, result_type_) + , num_scale(num_scale_) + , denom_scale(denom_scale_) + {} + + DataTypePtr createResultType() const { return std::make_shared>(); } bool allocatesMemoryInArena() const override { return false; } @@ -135,7 +146,7 @@ public: for (const auto & argument : this->argument_types) can_be_compiled &= canBeNativeType(*argument); - auto return_type = getReturnType(); + auto return_type = this->getResultType(); can_be_compiled &= canBeNativeType(*return_type); return can_be_compiled; diff --git a/src/AggregateFunctions/AggregateFunctionBitwise.h b/src/AggregateFunctions/AggregateFunctionBitwise.h index b8d3bc79007..6c94a72bf32 100644 --- a/src/AggregateFunctions/AggregateFunctionBitwise.h +++ b/src/AggregateFunctions/AggregateFunctionBitwise.h @@ -97,11 +97,12 @@ class AggregateFunctionBitwise final : public IAggregateFunctionDataHelper>({type}, {}) {} + : IAggregateFunctionDataHelper>({type}, {}, createResultType()) + {} String getName() const override { return Data::name(); } - DataTypePtr getReturnType() const override + static DataTypePtr createResultType() { return std::make_shared>(); } @@ -137,7 +138,7 @@ public: bool isCompilable() const override { - auto return_type = getReturnType(); + auto return_type = this->getResultType(); return canBeNativeType(*return_type); } @@ -151,7 +152,7 @@ public: { llvm::IRBuilder<> & b = static_cast &>(builder); - auto * return_type = toNativeType(b, getReturnType()); + auto * return_type = toNativeType(b, this->getResultType()); auto * value_ptr = aggregate_data_ptr; auto * value = b.CreateLoad(return_type, value_ptr); @@ -166,7 +167,7 @@ public: { llvm::IRBuilder<> & b = static_cast &>(builder); - auto * return_type = toNativeType(b, getReturnType()); + auto * return_type = toNativeType(b, this->getResultType()); auto * value_dst_ptr = aggregate_data_dst_ptr; auto * value_dst = b.CreateLoad(return_type, value_dst_ptr); @@ -183,7 +184,7 @@ public: { llvm::IRBuilder<> & b = static_cast &>(builder); - auto * return_type = toNativeType(b, getReturnType()); + auto * return_type = toNativeType(b, this->getResultType()); auto * value_ptr = aggregate_data_ptr; return b.CreateLoad(return_type, value_ptr); diff --git a/src/AggregateFunctions/AggregateFunctionBoundingRatio.h b/src/AggregateFunctions/AggregateFunctionBoundingRatio.h index 34e3fa2f747..8fca88889b8 100644 --- a/src/AggregateFunctions/AggregateFunctionBoundingRatio.h +++ b/src/AggregateFunctions/AggregateFunctionBoundingRatio.h @@ -112,7 +112,7 @@ public: } explicit AggregateFunctionBoundingRatio(const DataTypes & arguments) - : IAggregateFunctionDataHelper(arguments, {}) + : IAggregateFunctionDataHelper(arguments, {}, std::make_shared()) { const auto * x_arg = arguments.at(0).get(); const auto * y_arg = arguments.at(1).get(); @@ -122,11 +122,6 @@ public: ErrorCodes::BAD_ARGUMENTS); } - DataTypePtr getReturnType() const override - { - return std::make_shared(); - } - bool allocatesMemoryInArena() const override { return false; } void add(AggregateDataPtr __restrict place, const IColumn ** columns, const size_t row_num, Arena *) const override diff --git a/src/AggregateFunctions/AggregateFunctionCategoricalInformationValue.cpp b/src/AggregateFunctions/AggregateFunctionCategoricalInformationValue.cpp index 93b5de0c5ab..65dce832789 100644 --- a/src/AggregateFunctions/AggregateFunctionCategoricalInformationValue.cpp +++ b/src/AggregateFunctions/AggregateFunctionCategoricalInformationValue.cpp @@ -46,9 +46,9 @@ private: } public: - AggregateFunctionCategoricalIV(const DataTypes & arguments_, const Array & params_) : - IAggregateFunctionHelper{arguments_, params_}, - category_count{arguments_.size() - 1} + AggregateFunctionCategoricalIV(const DataTypes & arguments_, const Array & params_) + : IAggregateFunctionHelper{arguments_, params_, createResultType()} + , category_count{arguments_.size() - 1} { // notice: argument types has been checked before } @@ -121,7 +121,7 @@ public: buf.readStrict(place, sizeOfData()); } - DataTypePtr getReturnType() const override + static DataTypePtr createResultType() { return std::make_shared( std::make_shared>()); diff --git a/src/AggregateFunctions/AggregateFunctionCount.h b/src/AggregateFunctions/AggregateFunctionCount.h index 6e2c86f065b..91409463409 100644 --- a/src/AggregateFunctions/AggregateFunctionCount.h +++ b/src/AggregateFunctions/AggregateFunctionCount.h @@ -39,11 +39,13 @@ namespace ErrorCodes class AggregateFunctionCount final : public IAggregateFunctionDataHelper { public: - explicit AggregateFunctionCount(const DataTypes & argument_types_) : IAggregateFunctionDataHelper(argument_types_, {}) {} + explicit AggregateFunctionCount(const DataTypes & argument_types_) + : IAggregateFunctionDataHelper(argument_types_, {}, createResultType()) + {} String getName() const override { return "count"; } - DataTypePtr getReturnType() const override + static DataTypePtr createResultType() { return std::make_shared(); } @@ -167,7 +169,7 @@ public: { llvm::IRBuilder<> & b = static_cast &>(builder); - auto * return_type = toNativeType(b, getReturnType()); + auto * return_type = toNativeType(b, this->getResultType()); auto * count_value_ptr = aggregate_data_ptr; auto * count_value = b.CreateLoad(return_type, count_value_ptr); @@ -180,7 +182,7 @@ public: { llvm::IRBuilder<> & b = static_cast &>(builder); - auto * return_type = toNativeType(b, getReturnType()); + auto * return_type = toNativeType(b, this->getResultType()); auto * count_value_dst_ptr = aggregate_data_dst_ptr; auto * count_value_dst = b.CreateLoad(return_type, count_value_dst_ptr); @@ -197,7 +199,7 @@ public: { llvm::IRBuilder<> & b = static_cast &>(builder); - auto * return_type = toNativeType(b, getReturnType()); + auto * return_type = toNativeType(b, this->getResultType()); auto * count_value_ptr = aggregate_data_ptr; return b.CreateLoad(return_type, count_value_ptr); @@ -214,7 +216,7 @@ class AggregateFunctionCountNotNullUnary final { public: AggregateFunctionCountNotNullUnary(const DataTypePtr & argument, const Array & params) - : IAggregateFunctionDataHelper({argument}, params) + : IAggregateFunctionDataHelper({argument}, params, createResultType()) { if (!argument->isNullable()) throw Exception("Logical error: not Nullable data type passed to AggregateFunctionCountNotNullUnary", ErrorCodes::LOGICAL_ERROR); @@ -222,7 +224,7 @@ public: String getName() const override { return "count"; } - DataTypePtr getReturnType() const override + static DataTypePtr createResultType() { return std::make_shared(); } @@ -311,7 +313,7 @@ public: { llvm::IRBuilder<> & b = static_cast &>(builder); - auto * return_type = toNativeType(b, getReturnType()); + auto * return_type = toNativeType(b, this->getResultType()); auto * is_null_value = b.CreateExtractValue(values[0], {1}); auto * increment_value = b.CreateSelect(is_null_value, llvm::ConstantInt::get(return_type, 0), llvm::ConstantInt::get(return_type, 1)); @@ -327,7 +329,7 @@ public: { llvm::IRBuilder<> & b = static_cast &>(builder); - auto * return_type = toNativeType(b, getReturnType()); + auto * return_type = toNativeType(b, this->getResultType()); auto * count_value_dst_ptr = aggregate_data_dst_ptr; auto * count_value_dst = b.CreateLoad(return_type, count_value_dst_ptr); @@ -344,7 +346,7 @@ public: { llvm::IRBuilder<> & b = static_cast &>(builder); - auto * return_type = toNativeType(b, getReturnType()); + auto * return_type = toNativeType(b, this->getResultType()); auto * count_value_ptr = aggregate_data_ptr; return b.CreateLoad(return_type, count_value_ptr); diff --git a/src/AggregateFunctions/AggregateFunctionDeltaSum.h b/src/AggregateFunctions/AggregateFunctionDeltaSum.h index 36d0ef55346..199d2706d3a 100644 --- a/src/AggregateFunctions/AggregateFunctionDeltaSum.h +++ b/src/AggregateFunctions/AggregateFunctionDeltaSum.h @@ -31,7 +31,7 @@ class AggregationFunctionDeltaSum final { public: AggregationFunctionDeltaSum(const DataTypes & arguments, const Array & params) - : IAggregateFunctionDataHelper, AggregationFunctionDeltaSum>{arguments, params} + : IAggregateFunctionDataHelper, AggregationFunctionDeltaSum>{arguments, params, createResultType()} {} AggregationFunctionDeltaSum() @@ -40,7 +40,7 @@ public: String getName() const override { return "deltaSum"; } - DataTypePtr getReturnType() const override { return std::make_shared>(); } + static DataTypePtr createResultType() { return std::make_shared>(); } bool allocatesMemoryInArena() const override { return false; } diff --git a/src/AggregateFunctions/AggregateFunctionDeltaSumTimestamp.h b/src/AggregateFunctions/AggregateFunctionDeltaSumTimestamp.h index a311910de7f..5ca07bb0bdf 100644 --- a/src/AggregateFunctions/AggregateFunctionDeltaSumTimestamp.h +++ b/src/AggregateFunctions/AggregateFunctionDeltaSumTimestamp.h @@ -38,7 +38,7 @@ public: : IAggregateFunctionDataHelper< AggregationFunctionDeltaSumTimestampData, AggregationFunctionDeltaSumTimestamp - >{arguments, params} + >{arguments, params, createResultType()} {} AggregationFunctionDeltaSumTimestamp() @@ -52,7 +52,7 @@ public: String getName() const override { return "deltaSumTimestamp"; } - DataTypePtr getReturnType() const override { return std::make_shared>(); } + static DataTypePtr createResultType() { return std::make_shared>(); } void NO_SANITIZE_UNDEFINED ALWAYS_INLINE add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override { diff --git a/src/AggregateFunctions/AggregateFunctionDistinct.h b/src/AggregateFunctions/AggregateFunctionDistinct.h index 2d7362ba4cc..e09e0ef621d 100644 --- a/src/AggregateFunctions/AggregateFunctionDistinct.h +++ b/src/AggregateFunctions/AggregateFunctionDistinct.h @@ -168,7 +168,7 @@ private: public: AggregateFunctionDistinct(AggregateFunctionPtr nested_func_, const DataTypes & arguments, const Array & params_) - : IAggregateFunctionDataHelper(arguments, params_) + : IAggregateFunctionDataHelper(arguments, params_, nested_func_->getResultType()) , nested_func(nested_func_) , arguments_num(arguments.size()) { @@ -255,11 +255,6 @@ public: return nested_func->getName() + "Distinct"; } - DataTypePtr getReturnType() const override - { - return nested_func->getReturnType(); - } - bool allocatesMemoryInArena() const override { return true; diff --git a/src/AggregateFunctions/AggregateFunctionEntropy.h b/src/AggregateFunctions/AggregateFunctionEntropy.h index a51dd0537bf..9321b5c5825 100644 --- a/src/AggregateFunctions/AggregateFunctionEntropy.h +++ b/src/AggregateFunctions/AggregateFunctionEntropy.h @@ -92,14 +92,14 @@ private: public: explicit AggregateFunctionEntropy(const DataTypes & argument_types_) - : IAggregateFunctionDataHelper, AggregateFunctionEntropy>(argument_types_, {}) + : IAggregateFunctionDataHelper, AggregateFunctionEntropy>(argument_types_, {}, createResultType()) , num_args(argument_types_.size()) { } String getName() const override { return "entropy"; } - DataTypePtr getReturnType() const override + static DataTypePtr createResultType() { return std::make_shared>(); } diff --git a/src/AggregateFunctions/AggregateFunctionExponentialMovingAverage.cpp b/src/AggregateFunctions/AggregateFunctionExponentialMovingAverage.cpp index 2c055c37cca..bb48b3416be 100644 --- a/src/AggregateFunctions/AggregateFunctionExponentialMovingAverage.cpp +++ b/src/AggregateFunctions/AggregateFunctionExponentialMovingAverage.cpp @@ -29,7 +29,7 @@ private: public: AggregateFunctionExponentialMovingAverage(const DataTypes & argument_types_, const Array & params) - : IAggregateFunctionDataHelper(argument_types_, params) + : IAggregateFunctionDataHelper(argument_types_, params, createResultType()) { if (params.size() != 1) throw Exception{"Aggregate function " + getName() + " requires exactly one parameter: half decay time.", @@ -43,7 +43,7 @@ public: return "exponentialMovingAverage"; } - DataTypePtr getReturnType() const override + static DataTypePtr createResultType() { return std::make_shared>(); } diff --git a/src/AggregateFunctions/AggregateFunctionFactory.cpp b/src/AggregateFunctions/AggregateFunctionFactory.cpp index a8385ad8b59..38cc355b857 100644 --- a/src/AggregateFunctions/AggregateFunctionFactory.cpp +++ b/src/AggregateFunctions/AggregateFunctionFactory.cpp @@ -72,9 +72,12 @@ AggregateFunctionPtr AggregateFunctionFactory::get( { auto types_without_low_cardinality = convertLowCardinalityTypesToNested(argument_types); - /// If one of the types is Nullable, we apply aggregate function combinator "Null". - - if (std::any_of(types_without_low_cardinality.begin(), types_without_low_cardinality.end(), + /// If one of the types is Nullable, we apply aggregate function combinator "Null" if it's not window function. + /// Window functions are not real aggregate functions. Applying combinators doesn't make sense for them, + /// they must handle the nullability themselves + auto properties = tryGetPropertiesImpl(name); + bool is_window_function = properties.has_value() && properties->is_window_function; + if (!is_window_function && std::any_of(types_without_low_cardinality.begin(), types_without_low_cardinality.end(), [](const auto & type) { return type->isNullable(); })) { AggregateFunctionCombinatorPtr combinator = AggregateFunctionCombinatorFactory::instance().tryFindSuffix("Null"); diff --git a/src/AggregateFunctions/AggregateFunctionFlameGraph.cpp b/src/AggregateFunctions/AggregateFunctionFlameGraph.cpp deleted file mode 100644 index 5fc6b21926e..00000000000 --- a/src/AggregateFunctions/AggregateFunctionFlameGraph.cpp +++ /dev/null @@ -1,647 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -namespace DB -{ -namespace ErrorCodes -{ - extern const int FUNCTION_NOT_ALLOWED; - extern const int NOT_IMPLEMENTED; - extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; - extern const int ILLEGAL_TYPE_OF_ARGUMENT; -} - -struct AggregateFunctionFlameGraphTree -{ - struct ListNode; - - struct TreeNode - { - TreeNode * parent = nullptr; - ListNode * children = nullptr; - UInt64 ptr = 0; - size_t allocated = 0; - }; - - struct ListNode - { - ListNode * next = nullptr; - TreeNode * child = nullptr; - }; - - TreeNode root; - - static ListNode * createChild(TreeNode * parent, UInt64 ptr, Arena * arena) - { - - ListNode * list_node = reinterpret_cast(arena->alloc(sizeof(ListNode))); - TreeNode * tree_node = reinterpret_cast(arena->alloc(sizeof(TreeNode))); - - list_node->child = tree_node; - list_node->next = nullptr; - - tree_node->parent =parent; - tree_node->children = nullptr; - tree_node->ptr = ptr; - tree_node->allocated = 0; - - return list_node; - } - - TreeNode * find(const UInt64 * stack, size_t stack_size, Arena * arena) - { - TreeNode * node = &root; - for (size_t i = 0; i < stack_size; ++i) - { - UInt64 ptr = stack[i]; - if (ptr == 0) - break; - - if (!node->children) - { - node->children = createChild(node, ptr, arena); - node = node->children->child; - } - else - { - ListNode * list = node->children; - while (list->child->ptr != ptr && list->next) - list = list->next; - - if (list->child->ptr != ptr) - { - list->next = createChild(node, ptr, arena); - list = list->next; - } - - node = list->child; - } - } - - return node; - } - - static void append(DB::PaddedPODArray & values, DB::PaddedPODArray & offsets, std::vector & frame) - { - UInt64 prev = offsets.empty() ? 0 : offsets.back(); - offsets.push_back(prev + frame.size()); - for (UInt64 val : frame) - values.push_back(val); - } - - struct Trace - { - using Frames = std::vector; - - Frames frames; - - /// The total number of bytes allocated for traces with the same prefix. - size_t allocated_total = 0; - /// This counter is relevant in case we want to filter some traces with small amount of bytes. - /// It shows the total number of bytes for *filtered* traces with the same prefix. - /// This is the value which is used in flamegraph. - size_t allocated_self = 0; - }; - - using Traces = std::vector; - - Traces dump(size_t max_depth, size_t min_bytes) const - { - Traces traces; - Trace::Frames frames; - std::vector allocated_total; - std::vector allocated_self; - std::vector nodes; - - nodes.push_back(root.children); - allocated_total.push_back(root.allocated); - allocated_self.push_back(root.allocated); - - while (!nodes.empty()) - { - if (nodes.back() == nullptr) - { - traces.push_back({frames, allocated_total.back(), allocated_self.back()}); - - nodes.pop_back(); - allocated_total.pop_back(); - allocated_self.pop_back(); - - /// We don't have root's frame so framers are empty in the end. - if (!frames.empty()) - frames.pop_back(); - - continue; - } - - TreeNode * current = nodes.back()->child; - nodes.back() = nodes.back()->next; - - bool enough_bytes = current->allocated >= min_bytes; - bool enough_depth = max_depth == 0 || nodes.size() < max_depth; - - if (enough_bytes) - { - frames.push_back(current->ptr); - allocated_self.back() -= current->allocated; - - if (enough_depth) - { - allocated_total.push_back(current->allocated); - allocated_self.push_back(current->allocated); - nodes.push_back(current->children); - } - else - { - traces.push_back({frames, current->allocated, current->allocated}); - frames.pop_back(); - } - } - } - - return traces; - } -}; - -static void insertData(DB::PaddedPODArray & chars, DB::PaddedPODArray & offsets, const char * pos, size_t length) -{ - const size_t old_size = chars.size(); - const size_t new_size = old_size + length + 1; - - chars.resize(new_size); - if (length) - memcpy(chars.data() + old_size, pos, length); - chars[old_size + length] = 0; - offsets.push_back(new_size); -} - -/// Split str by line feed and write as separate row to ColumnString. -static void fillColumn(DB::PaddedPODArray & chars, DB::PaddedPODArray & offsets, const std::string & str) -{ - size_t start = 0; - size_t end = 0; - size_t size = str.size(); - - while (end < size) - { - if (str[end] == '\n') - { - insertData(chars, offsets, str.data() + start, end - start); - start = end + 1; - } - - ++end; - } - - if (start < end) - insertData(chars, offsets, str.data() + start, end - start); -} - -void dumpFlameGraph( - const AggregateFunctionFlameGraphTree::Traces & traces, - DB::PaddedPODArray & chars, - DB::PaddedPODArray & offsets) -{ - DB::WriteBufferFromOwnString out; - - std::unordered_map mapping; - -#if defined(__ELF__) && !defined(OS_FREEBSD) - auto symbol_index_ptr = DB::SymbolIndex::instance(); - const DB::SymbolIndex & symbol_index = *symbol_index_ptr; -#endif - - for (const auto & trace : traces) - { - if (trace.allocated_self == 0) - continue; - - for (size_t i = 0; i < trace.frames.size(); ++i) - { - if (i) - out << ";"; - - const void * ptr = reinterpret_cast(trace.frames[i]); - -#if defined(__ELF__) && !defined(OS_FREEBSD) - if (const auto * symbol = symbol_index.findSymbol(ptr)) - writeString(demangle(symbol->name), out); - else - DB::writePointerHex(ptr, out); -#else - DB::writePointerHex(ptr, out); -#endif - } - - out << ' ' << trace.allocated_self << "\n"; - } - - fillColumn(chars, offsets, out.str()); -} - -struct AggregateFunctionFlameGraphData -{ - struct Entry - { - AggregateFunctionFlameGraphTree::TreeNode * trace; - UInt64 size; - Entry * next = nullptr; - }; - - struct Pair - { - Entry * allocation = nullptr; - Entry * deallocation = nullptr; - }; - - using Entries = HashMap; - - AggregateFunctionFlameGraphTree tree; - Entries entries; - Entry * free_list = nullptr; - - Entry * alloc(Arena * arena) - { - if (free_list) - { - auto * res = free_list; - free_list = free_list->next; - return res; - } - - return reinterpret_cast(arena->alloc(sizeof(Entry))); - } - - void release(Entry * entry) - { - entry->next = free_list; - free_list = entry; - } - - static void track(Entry * allocation) - { - auto * node = allocation->trace; - while (node) - { - node->allocated += allocation->size; - node = node->parent; - } - } - - static void untrack(Entry * allocation) - { - auto * node = allocation->trace; - while (node) - { - node->allocated -= allocation->size; - node = node->parent; - } - } - - static Entry * tryFindMatchAndRemove(Entry *& list, UInt64 size) - { - if (!list) - return nullptr; - - if (list->size == size) - { - Entry * entry = list; - list = list->next; - return entry; - } - else - { - Entry * parent = list; - while (parent->next && parent->next->size != size) - parent = parent->next; - - if (parent->next && parent->next->size == size) - { - Entry * entry = parent->next; - parent->next = entry->next; - return entry; - } - - return nullptr; - } - } - - void add(UInt64 ptr, Int64 size, const UInt64 * stack, size_t stack_size, Arena * arena) - { - /// In case if argument is nullptr, only track allocations. - if (ptr == 0) - { - if (size > 0) - { - auto * node = tree.find(stack, stack_size, arena); - Entry entry{.trace = node, .size = UInt64(size)}; - track(&entry); - } - - return; - } - - auto & place = entries[ptr]; - if (size > 0) - { - if (auto * deallocation = tryFindMatchAndRemove(place.deallocation, size)) - { - release(deallocation); - } - else - { - auto * node = tree.find(stack, stack_size, arena); - - auto * allocation = alloc(arena); - allocation->size = UInt64(size); - allocation->trace = node; - - track(allocation); - - allocation->next = place.allocation; - place.allocation = allocation; - } - } - else if (size < 0) - { - UInt64 abs_size = -size; - if (auto * allocation = tryFindMatchAndRemove(place.allocation, abs_size)) - { - untrack(allocation); - release(allocation); - } - else - { - auto * deallocation = alloc(arena); - deallocation->size = abs_size; - - deallocation->next = place.deallocation; - place.deallocation = deallocation; - } - } - } - - void merge(const AggregateFunctionFlameGraphTree & other_tree, Arena * arena) - { - AggregateFunctionFlameGraphTree::Trace::Frames frames; - std::vector nodes; - - nodes.push_back(other_tree.root.children); - - while (!nodes.empty()) - { - if (nodes.back() == nullptr) - { - nodes.pop_back(); - - /// We don't have root's frame so framers are empty in the end. - if (!frames.empty()) - frames.pop_back(); - - continue; - } - - AggregateFunctionFlameGraphTree::TreeNode * current = nodes.back()->child; - nodes.back() = nodes.back()->next; - - frames.push_back(current->ptr); - - if (current->children) - nodes.push_back(current->children); - else - { - if (current->allocated) - add(0, current->allocated, frames.data(), frames.size(), arena); - - frames.pop_back(); - } - } - } - - void merge(const AggregateFunctionFlameGraphData & other, Arena * arena) - { - AggregateFunctionFlameGraphTree::Trace::Frames frames; - for (const auto & entry : other.entries) - { - for (auto * allocation = entry.value.second.allocation; allocation; allocation = allocation->next) - { - frames.clear(); - const auto * node = allocation->trace; - while (node->ptr) - { - frames.push_back(node->ptr); - node = node->parent; - } - - std::reverse(frames.begin(), frames.end()); - add(entry.value.first, allocation->size, frames.data(), frames.size(), arena); - untrack(allocation); - } - - for (auto * deallocation = entry.value.second.deallocation; deallocation; deallocation = deallocation->next) - { - add(entry.value.first, -Int64(deallocation->size), nullptr, 0, arena); - } - } - - merge(other.tree, arena); - } - - void dumpFlameGraph( - DB::PaddedPODArray & chars, - DB::PaddedPODArray & offsets, - size_t max_depth, size_t min_bytes) const - { - DB::dumpFlameGraph(tree.dump(max_depth, min_bytes), chars, offsets); - } -}; - -/// Aggregate function which builds a flamegraph using the list of stacktraces. -/// The output is an array of strings which can be used by flamegraph.pl util. -/// See https://github.com/brendangregg/FlameGraph -/// -/// Syntax: flameGraph(traces, [size = 1], [ptr = 0]) -/// - trace : Array(UInt64), a stacktrace -/// - size : Int64, an allocation size (for memory profiling) -/// - ptr : UInt64, an allocation address -/// In case if ptr != 0, a flameGraph will map allocations (size > 0) and deallocations (size < 0) with the same size and ptr. -/// Only allocations which were not freed are shown. Not mapped deallocations are ignored. -/// -/// Usage: -/// -/// * Build a flamegraph based on CPU query profiler -/// set query_profiler_cpu_time_period_ns=10000000; -/// SELECT SearchPhrase, COUNT(DISTINCT UserID) AS u FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10; -/// clickhouse client --allow_introspection_functions=1 -/// -q "select arrayJoin(flameGraph(arrayReverse(trace))) from system.trace_log where trace_type = 'CPU' and query_id = 'xxx'" -/// | ~/dev/FlameGraph/flamegraph.pl > flame_cpu.svg -/// -/// * Build a flamegraph based on memory query profiler, showing all allocations -/// set memory_profiler_sample_probability=1, max_untracked_memory=1; -/// SELECT SearchPhrase, COUNT(DISTINCT UserID) AS u FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10; -/// clickhouse client --allow_introspection_functions=1 -/// -q "select arrayJoin(flameGraph(trace, size)) from system.trace_log where trace_type = 'MemorySample' and query_id = 'xxx'" -/// | ~/dev/FlameGraph/flamegraph.pl --countname=bytes --color=mem > flame_mem.svg -/// -/// * Build a flamegraph based on memory query profiler, showing allocations which were not deallocated in query context -/// set memory_profiler_sample_probability=1, max_untracked_memory=1, use_uncompressed_cache=1, merge_tree_max_rows_to_use_cache=100000000000, merge_tree_max_bytes_to_use_cache=1000000000000; -/// SELECT SearchPhrase, COUNT(DISTINCT UserID) AS u FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10; -/// clickhouse client --allow_introspection_functions=1 -/// -q "select arrayJoin(flameGraph(trace, size, ptr)) from system.trace_log where trace_type = 'MemorySample' and query_id = 'xxx'" -/// | ~/dev/FlameGraph/flamegraph.pl --countname=bytes --color=mem > flame_mem_untracked.svg -/// -/// * Build a flamegraph based on memory query profiler, showing active allocations at the fixed point of time -/// set memory_profiler_sample_probability=1, max_untracked_memory=1; -/// SELECT SearchPhrase, COUNT(DISTINCT UserID) AS u FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10; -/// 1. Memory usage per second -/// select event_time, m, formatReadableSize(max(s) as m) from (select event_time, sum(size) over (order by event_time) as s from system.trace_log where query_id = 'xxx' and trace_type = 'MemorySample') group by event_time order by event_time; -/// 2. Find a time point with maximal memory usage -/// select argMax(event_time, s), max(s) from (select event_time, sum(size) over (order by event_time) as s from system.trace_log where query_id = 'xxx' and trace_type = 'MemorySample'); -/// 3. Fix active allocations at fixed point of time -/// clickhouse client --allow_introspection_functions=1 -/// -q "select arrayJoin(flameGraph(trace, size, ptr)) from (select * from system.trace_log where trace_type = 'MemorySample' and query_id = 'xxx' and event_time <= 'yyy' order by event_time)" -/// | ~/dev/FlameGraph/flamegraph.pl --countname=bytes --color=mem > flame_mem_time_point_pos.svg -/// 4. Find deallocations at fixed point of time -/// clickhouse client --allow_introspection_functions=1 -/// -q "select arrayJoin(flameGraph(trace, -size, ptr)) from (select * from system.trace_log where trace_type = 'MemorySample' and query_id = 'xxx' and event_time > 'yyy' order by event_time desc)" -/// | ~/dev/FlameGraph/flamegraph.pl --countname=bytes --color=mem > flame_mem_time_point_neg.svg -class AggregateFunctionFlameGraph final : public IAggregateFunctionDataHelper -{ -public: - explicit AggregateFunctionFlameGraph(const DataTypes & argument_types_) - : IAggregateFunctionDataHelper(argument_types_, {}) - {} - - String getName() const override { return "flameGraph"; } - - DataTypePtr getReturnType() const override - { - return std::make_shared(std::make_shared()); - } - - bool allocatesMemoryInArena() const override { return true; } - - void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override - { - const auto * trace = typeid_cast(columns[0]); - - const auto & trace_offsets = trace->getOffsets(); - const auto & trace_values = typeid_cast(&trace->getData())->getData(); - UInt64 prev_offset = 0; - if (row_num) - prev_offset = trace_offsets[row_num - 1]; - UInt64 trace_size = trace_offsets[row_num] - prev_offset; - - Int64 allocated = 1; - if (argument_types.size() >= 2) - { - const auto & sizes = typeid_cast(columns[1])->getData(); - allocated = sizes[row_num]; - } - - UInt64 ptr = 0; - if (argument_types.size() >= 3) - { - const auto & ptrs = typeid_cast(columns[2])->getData(); - ptr = ptrs[row_num]; - } - - this->data(place).add(ptr, allocated, trace_values.data() + prev_offset, trace_size, arena); - } - - void addManyDefaults( - AggregateDataPtr __restrict /*place*/, - const IColumn ** /*columns*/, - size_t /*length*/, - Arena * /*arena*/) const override - { - } - - void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override - { - this->data(place).merge(this->data(rhs), arena); - } - - void serialize(ConstAggregateDataPtr __restrict, WriteBuffer &, std::optional /* version */) const override - { - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Serialization for function flameGraph is not implemented."); - } - - void deserialize(AggregateDataPtr __restrict, ReadBuffer &, std::optional /* version */, Arena *) const override - { - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Deserialization for function flameGraph is not implemented."); - } - - void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override - { - auto & array = assert_cast(to); - auto & str = assert_cast(array.getData()); - - this->data(place).dumpFlameGraph(str.getChars(), str.getOffsets(), 0, 0); - - array.getOffsets().push_back(str.size()); - } -}; - -static void check(const std::string & name, const DataTypes & argument_types, const Array & params) -{ - assertNoParameters(name, params); - - if (argument_types.empty() || argument_types.size() > 3) - throw Exception( - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, - "Aggregate function {} requires 1 to 3 arguments : trace, [size = 1], [ptr = 0]", - name); - - auto ptr_type = std::make_shared(); - auto trace_type = std::make_shared(ptr_type); - auto size_type = std::make_shared(); - - if (!argument_types[0]->equals(*trace_type)) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, - "First argument (trace) for function {} must be Array(UInt64), but it has type {}", - name, argument_types[0]->getName()); - - if (argument_types.size() >= 2 && !argument_types[1]->equals(*size_type)) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, - "Second argument (size) for function {} must be Int64, but it has type {}", - name, argument_types[1]->getName()); - - if (argument_types.size() >= 3 && !argument_types[2]->equals(*ptr_type)) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, - "Third argument (ptr) for function {} must be UInt64, but it has type {}", - name, argument_types[2]->getName()); -} - -AggregateFunctionPtr createAggregateFunctionFlameGraph(const std::string & name, const DataTypes & argument_types, const Array & params, const Settings * settings) -{ - if (!settings->allow_introspection_functions) - throw Exception(ErrorCodes::FUNCTION_NOT_ALLOWED, - "Introspection functions are disabled, because setting 'allow_introspection_functions' is set to 0"); - - check(name, argument_types, params); - return std::make_shared(argument_types); -} - -void registerAggregateFunctionFlameGraph(AggregateFunctionFactory & factory) -{ - AggregateFunctionProperties properties = { .returns_default_when_only_null = true, .is_order_dependent = true }; - - factory.registerFunction("flameGraph", { createAggregateFunctionFlameGraph, properties }); -} - -} diff --git a/src/AggregateFunctions/AggregateFunctionForEach.h b/src/AggregateFunctions/AggregateFunctionForEach.h index c91c4dd7c86..69102424bf7 100644 --- a/src/AggregateFunctions/AggregateFunctionForEach.h +++ b/src/AggregateFunctions/AggregateFunctionForEach.h @@ -107,7 +107,7 @@ private: public: AggregateFunctionForEach(AggregateFunctionPtr nested_, const DataTypes & arguments, const Array & params_) - : IAggregateFunctionDataHelper(arguments, params_) + : IAggregateFunctionDataHelper(arguments, params_, createResultType(nested_)) , nested_func(nested_), num_arguments(arguments.size()) { nested_size_of_data = nested_func->sizeOfData(); @@ -125,9 +125,9 @@ public: return nested_func->getName() + "ForEach"; } - DataTypePtr getReturnType() const override + static DataTypePtr createResultType(AggregateFunctionPtr nested_) { - return std::make_shared(nested_func->getReturnType()); + return std::make_shared(nested_->getResultType()); } bool isVersioned() const override diff --git a/src/AggregateFunctions/AggregateFunctionGroupArray.h b/src/AggregateFunctions/AggregateFunctionGroupArray.h index 89b382de819..f902cabb99a 100644 --- a/src/AggregateFunctions/AggregateFunctionGroupArray.h +++ b/src/AggregateFunctions/AggregateFunctionGroupArray.h @@ -121,7 +121,7 @@ public: explicit GroupArrayNumericImpl( const DataTypePtr & data_type_, const Array & parameters_, UInt64 max_elems_ = std::numeric_limits::max(), UInt64 seed_ = 123456) : IAggregateFunctionDataHelper, GroupArrayNumericImpl>( - {data_type_}, parameters_) + {data_type_}, parameters_, std::make_shared(data_type_)) , max_elems(max_elems_) , seed(seed_) { @@ -129,8 +129,6 @@ public: String getName() const override { return getNameByTrait(); } - DataTypePtr getReturnType() const override { return std::make_shared(this->argument_types[0]); } - void insert(Data & a, const T & v, Arena * arena) const { ++a.total_values; @@ -423,7 +421,7 @@ class GroupArrayGeneralImpl final public: GroupArrayGeneralImpl(const DataTypePtr & data_type_, const Array & parameters_, UInt64 max_elems_ = std::numeric_limits::max(), UInt64 seed_ = 123456) : IAggregateFunctionDataHelper, GroupArrayGeneralImpl>( - {data_type_}, parameters_) + {data_type_}, parameters_, std::make_shared(data_type_)) , data_type(this->argument_types[0]) , max_elems(max_elems_) , seed(seed_) @@ -432,8 +430,6 @@ public: String getName() const override { return getNameByTrait(); } - DataTypePtr getReturnType() const override { return std::make_shared(data_type); } - void insert(Data & a, const Node * v, Arena * arena) const { ++a.total_values; @@ -697,7 +693,7 @@ class GroupArrayGeneralListImpl final public: GroupArrayGeneralListImpl(const DataTypePtr & data_type_, const Array & parameters_, UInt64 max_elems_ = std::numeric_limits::max()) - : IAggregateFunctionDataHelper, GroupArrayGeneralListImpl>({data_type_}, parameters_) + : IAggregateFunctionDataHelper, GroupArrayGeneralListImpl>({data_type_}, parameters_, std::make_shared(data_type_)) , data_type(this->argument_types[0]) , max_elems(max_elems_) { @@ -705,8 +701,6 @@ public: String getName() const override { return getNameByTrait(); } - DataTypePtr getReturnType() const override { return std::make_shared(data_type); } - void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override { if (limit_num_elems && data(place).elems >= max_elems) diff --git a/src/AggregateFunctions/AggregateFunctionGroupArrayInsertAt.h b/src/AggregateFunctions/AggregateFunctionGroupArrayInsertAt.h index a1a2ce2669b..42fe4083de1 100644 --- a/src/AggregateFunctions/AggregateFunctionGroupArrayInsertAt.h +++ b/src/AggregateFunctions/AggregateFunctionGroupArrayInsertAt.h @@ -64,7 +64,7 @@ private: public: AggregateFunctionGroupArrayInsertAtGeneric(const DataTypes & arguments, const Array & params) - : IAggregateFunctionDataHelper(arguments, params) + : IAggregateFunctionDataHelper(arguments, params, std::make_shared(arguments[0])) , type(argument_types[0]) , serialization(type->getDefaultSerialization()) { @@ -101,11 +101,6 @@ public: String getName() const override { return "groupArrayInsertAt"; } - DataTypePtr getReturnType() const override - { - return std::make_shared(type); - } - bool allocatesMemoryInArena() const override { return false; } void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override diff --git a/src/AggregateFunctions/AggregateFunctionGroupArrayMoving.h b/src/AggregateFunctions/AggregateFunctionGroupArrayMoving.h index 40867b1949a..4444de793b4 100644 --- a/src/AggregateFunctions/AggregateFunctionGroupArrayMoving.h +++ b/src/AggregateFunctions/AggregateFunctionGroupArrayMoving.h @@ -93,12 +93,15 @@ public: using ColumnResult = ColumnVectorOrDecimal; explicit MovingImpl(const DataTypePtr & data_type_, UInt64 window_size_ = std::numeric_limits::max()) - : IAggregateFunctionDataHelper>({data_type_}, {}) + : IAggregateFunctionDataHelper>({data_type_}, {}, createResultType(data_type_)) , window_size(window_size_) {} String getName() const override { return Data::name; } - DataTypePtr getReturnType() const override { return std::make_shared(getReturnTypeElement()); } + static DataTypePtr createResultType(const DataTypePtr & argument) + { + return std::make_shared(getReturnTypeElement(argument)); + } void NO_SANITIZE_UNDEFINED add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override { @@ -183,14 +186,14 @@ public: } private: - auto getReturnTypeElement() const + static auto getReturnTypeElement(const DataTypePtr & argument) { if constexpr (!is_decimal) return std::make_shared>(); else { using Res = DataTypeDecimal; - return std::make_shared(Res::maxPrecision(), getDecimalScale(*this->argument_types.at(0))); + return std::make_shared(Res::maxPrecision(), getDecimalScale(*argument)); } } }; diff --git a/src/AggregateFunctions/AggregateFunctionGroupBitmap.h b/src/AggregateFunctions/AggregateFunctionGroupBitmap.h index dacde67f3ca..5fe3128fa20 100644 --- a/src/AggregateFunctions/AggregateFunctionGroupBitmap.h +++ b/src/AggregateFunctions/AggregateFunctionGroupBitmap.h @@ -19,13 +19,13 @@ class AggregateFunctionBitmap final : public IAggregateFunctionDataHelper>({type}, {}) + : IAggregateFunctionDataHelper>({type}, {}, createResultType()) { } String getName() const override { return Data::name(); } - DataTypePtr getReturnType() const override { return std::make_shared>(); } + static DataTypePtr createResultType() { return std::make_shared>(); } bool allocatesMemoryInArena() const override { return false; } @@ -59,13 +59,13 @@ private: static constexpr size_t STATE_VERSION_1_MIN_REVISION = 54455; public: explicit AggregateFunctionBitmapL2(const DataTypePtr & type) - : IAggregateFunctionDataHelper>({type}, {}) + : IAggregateFunctionDataHelper>({type}, {}, createResultType()) { } String getName() const override { return Policy::name; } - DataTypePtr getReturnType() const override { return std::make_shared>(); } + static DataTypePtr createResultType() { return std::make_shared>(); } bool allocatesMemoryInArena() const override { return false; } diff --git a/src/AggregateFunctions/AggregateFunctionGroupUniqArray.cpp b/src/AggregateFunctions/AggregateFunctionGroupUniqArray.cpp index da934531f96..4589f68280f 100644 --- a/src/AggregateFunctions/AggregateFunctionGroupUniqArray.cpp +++ b/src/AggregateFunctions/AggregateFunctionGroupUniqArray.cpp @@ -26,8 +26,8 @@ class AggregateFunctionGroupUniqArrayDate : public AggregateFunctionGroupUniqArr { public: explicit AggregateFunctionGroupUniqArrayDate(const DataTypePtr & argument_type, const Array & parameters_, UInt64 max_elems_ = std::numeric_limits::max()) - : AggregateFunctionGroupUniqArray(argument_type, parameters_, max_elems_) {} - DataTypePtr getReturnType() const override { return std::make_shared(std::make_shared()); } + : AggregateFunctionGroupUniqArray(argument_type, parameters_, createResultType(), max_elems_) {} + static DataTypePtr createResultType() { return std::make_shared(std::make_shared()); } }; template @@ -35,8 +35,8 @@ class AggregateFunctionGroupUniqArrayDateTime : public AggregateFunctionGroupUni { public: explicit AggregateFunctionGroupUniqArrayDateTime(const DataTypePtr & argument_type, const Array & parameters_, UInt64 max_elems_ = std::numeric_limits::max()) - : AggregateFunctionGroupUniqArray(argument_type, parameters_, max_elems_) {} - DataTypePtr getReturnType() const override { return std::make_shared(std::make_shared()); } + : AggregateFunctionGroupUniqArray(argument_type, parameters_, createResultType(), max_elems_) {} + static DataTypePtr createResultType() { return std::make_shared(std::make_shared()); } }; template diff --git a/src/AggregateFunctions/AggregateFunctionGroupUniqArray.h b/src/AggregateFunctions/AggregateFunctionGroupUniqArray.h index 93db1644bd4..f8e426363d8 100644 --- a/src/AggregateFunctions/AggregateFunctionGroupUniqArray.h +++ b/src/AggregateFunctions/AggregateFunctionGroupUniqArray.h @@ -50,15 +50,16 @@ private: public: AggregateFunctionGroupUniqArray(const DataTypePtr & argument_type, const Array & parameters_, UInt64 max_elems_ = std::numeric_limits::max()) : IAggregateFunctionDataHelper, - AggregateFunctionGroupUniqArray>({argument_type}, parameters_), + AggregateFunctionGroupUniqArray>({argument_type}, parameters_, std::make_shared(argument_type)), max_elems(max_elems_) {} - String getName() const override { return "groupUniqArray"; } + AggregateFunctionGroupUniqArray(const DataTypePtr & argument_type, const Array & parameters_, const DataTypePtr & result_type_, UInt64 max_elems_ = std::numeric_limits::max()) + : IAggregateFunctionDataHelper, + AggregateFunctionGroupUniqArray>({argument_type}, parameters_, result_type_), + max_elems(max_elems_) {} - DataTypePtr getReturnType() const override - { - return std::make_shared(this->argument_types[0]); - } + + String getName() const override { return "groupUniqArray"; } bool allocatesMemoryInArena() const override { return false; } @@ -153,17 +154,12 @@ class AggregateFunctionGroupUniqArrayGeneric public: AggregateFunctionGroupUniqArrayGeneric(const DataTypePtr & input_data_type_, const Array & parameters_, UInt64 max_elems_ = std::numeric_limits::max()) - : IAggregateFunctionDataHelper>({input_data_type_}, parameters_) + : IAggregateFunctionDataHelper>({input_data_type_}, parameters_, std::make_shared(input_data_type_)) , input_data_type(this->argument_types[0]) , max_elems(max_elems_) {} String getName() const override { return "groupUniqArray"; } - DataTypePtr getReturnType() const override - { - return std::make_shared(input_data_type); - } - bool allocatesMemoryInArena() const override { return true; diff --git a/src/AggregateFunctions/AggregateFunctionHistogram.h b/src/AggregateFunctions/AggregateFunctionHistogram.h index fbd92aa8220..c559b3f115f 100644 --- a/src/AggregateFunctions/AggregateFunctionHistogram.h +++ b/src/AggregateFunctions/AggregateFunctionHistogram.h @@ -307,7 +307,7 @@ private: public: AggregateFunctionHistogram(UInt32 max_bins_, const DataTypes & arguments, const Array & params) - : IAggregateFunctionDataHelper>(arguments, params) + : IAggregateFunctionDataHelper>(arguments, params, createResultType()) , max_bins(max_bins_) { } @@ -316,7 +316,7 @@ public: { return Data::structSize(max_bins); } - DataTypePtr getReturnType() const override + static DataTypePtr createResultType() { DataTypes types; auto mean = std::make_shared>(); diff --git a/src/AggregateFunctions/AggregateFunctionIf.cpp b/src/AggregateFunctions/AggregateFunctionIf.cpp index c32454b10e4..ba4faec3aa1 100644 --- a/src/AggregateFunctions/AggregateFunctionIf.cpp +++ b/src/AggregateFunctions/AggregateFunctionIf.cpp @@ -23,7 +23,7 @@ public: throw Exception("Incorrect number of arguments for aggregate function with " + getName() + " suffix", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); - if (!isUInt8(arguments.back())) + if (!isUInt8(arguments.back()) && !arguments.back()->onlyNull()) throw Exception("Illegal type " + arguments.back()->getName() + " of last argument for aggregate function with " + getName() + " suffix", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); @@ -52,6 +52,7 @@ class AggregateFunctionIfNullUnary final private: size_t num_arguments; bool filter_is_nullable = false; + bool filter_is_only_null = false; /// The name of the nested function, including combinators (i.e. *If) /// @@ -84,10 +85,8 @@ private: return assert_cast(*filter_column).getData()[row_num] && !filter_null_map[row_num]; } - else - { - return assert_cast(*filter_column).getData()[row_num]; - } + + return assert_cast(*filter_column).getData()[row_num]; } public: @@ -106,10 +105,14 @@ public: "Aggregate function {} require at least one argument", getName()); filter_is_nullable = arguments[num_arguments - 1]->isNullable(); + filter_is_only_null = arguments[num_arguments - 1]->onlyNull(); } void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override { + if (filter_is_only_null) + return; + const ColumnNullable * column = assert_cast(columns[0]); const IColumn * nested_column = &column->getNestedColumn(); if (!column->isNullAt(row_num) && singleFilter(columns, row_num)) @@ -127,6 +130,9 @@ public: Arena * arena, ssize_t) const override { + if (filter_is_only_null) + return; + const ColumnNullable * column = assert_cast(columns[0]); const UInt8 * null_map = column->getNullMapData().data(); const IColumn * columns_param[] = {&column->getNestedColumn()}; @@ -177,6 +183,11 @@ public: #if USE_EMBEDDED_COMPILER + bool isCompilable() const override + { + return canBeNativeType(*this->argument_types.back()) && this->nested_function->isCompilable(); + } + void compileAdd(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr, const DataTypes & arguments_types, const std::vector & argument_values) const override { llvm::IRBuilder<> & b = static_cast &>(builder); @@ -224,6 +235,9 @@ class AggregateFunctionIfNullVariadic final : public AggregateFunctionNullBase< serialize_flag, AggregateFunctionIfNullVariadic> { +private: + bool filter_is_only_null = false; + public: String getName() const override @@ -243,6 +257,8 @@ public: for (size_t i = 0; i < number_of_arguments; ++i) is_nullable[i] = arguments[i]->isNullable(); + + filter_is_only_null = arguments.back()->onlyNull(); } static inline bool singleFilter(const IColumn ** columns, size_t row_num, size_t num_arguments) @@ -282,6 +298,9 @@ public: void addBatchSinglePlace( size_t row_begin, size_t row_end, AggregateDataPtr __restrict place, const IColumn ** columns, Arena * arena, ssize_t) const final { + if (filter_is_only_null) + return; + std::unique_ptr final_null_flags = std::make_unique(row_end); const size_t filter_column_num = number_of_arguments - 1; @@ -346,6 +365,11 @@ public: #if USE_EMBEDDED_COMPILER + bool isCompilable() const override + { + return canBeNativeType(*this->argument_types.back()) && this->nested_function->isCompilable(); + } + void compileAdd(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr, const DataTypes & arguments_types, const std::vector & argument_values) const override { /// TODO: Check @@ -448,7 +472,7 @@ AggregateFunctionPtr AggregateFunctionIf::getOwnNullAdapter( /// Nullability of the last argument (condition) does not affect the nullability of the result (NULL is processed as false). /// For other arguments it is as usual (at least one is NULL then the result is NULL if possible). - bool return_type_is_nullable = !properties.returns_default_when_only_null && getReturnType()->canBeInsideNullable() + bool return_type_is_nullable = !properties.returns_default_when_only_null && getResultType()->canBeInsideNullable() && std::any_of(arguments.begin(), arguments.end() - 1, [](const auto & element) { return element->isNullable(); }); bool need_to_serialize_flag = return_type_is_nullable || properties.returns_default_when_only_null; diff --git a/src/AggregateFunctions/AggregateFunctionIf.h b/src/AggregateFunctions/AggregateFunctionIf.h index ccc4809dd06..b5199a40aeb 100644 --- a/src/AggregateFunctions/AggregateFunctionIf.h +++ b/src/AggregateFunctions/AggregateFunctionIf.h @@ -36,13 +36,13 @@ private: public: AggregateFunctionIf(AggregateFunctionPtr nested, const DataTypes & types, const Array & params_) - : IAggregateFunctionHelper(types, params_) + : IAggregateFunctionHelper(types, params_, nested->getResultType()) , nested_func(nested), num_arguments(types.size()) { if (num_arguments == 0) throw Exception("Aggregate function " + getName() + " require at least one argument", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); - if (!isUInt8(types.back())) + if (!isUInt8(types.back()) && !types.back()->onlyNull()) throw Exception("Last argument for aggregate function " + getName() + " must be UInt8", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); } @@ -51,11 +51,6 @@ public: return nested_func->getName() + "If"; } - DataTypePtr getReturnType() const override - { - return nested_func->getReturnType(); - } - const IAggregateFunction & getBaseAggregateFunctionWithSameStateRepresentation() const override { return nested_func->getBaseAggregateFunctionWithSameStateRepresentation(); @@ -204,12 +199,16 @@ public: AggregateFunctionPtr getNestedFunction() const override { return nested_func; } + std::unordered_set getArgumentsThatCanBeOnlyNull() const override + { + return {num_arguments - 1}; + } #if USE_EMBEDDED_COMPILER bool isCompilable() const override { - return nested_func->isCompilable(); + return canBeNativeType(*this->argument_types.back()) && nested_func->isCompilable(); } void compileCreate(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr) const override diff --git a/src/AggregateFunctions/AggregateFunctionIntervalLengthSum.h b/src/AggregateFunctions/AggregateFunctionIntervalLengthSum.h index fdde50074aa..5b01da66364 100644 --- a/src/AggregateFunctions/AggregateFunctionIntervalLengthSum.h +++ b/src/AggregateFunctions/AggregateFunctionIntervalLengthSum.h @@ -177,11 +177,11 @@ public: String getName() const override { return "intervalLengthSum"; } explicit AggregateFunctionIntervalLengthSum(const DataTypes & arguments) - : IAggregateFunctionDataHelper>(arguments, {}) + : IAggregateFunctionDataHelper>(arguments, {}, createResultType()) { } - DataTypePtr getReturnType() const override + static DataTypePtr createResultType() { if constexpr (std::is_floating_point_v) return std::make_shared(); diff --git a/src/AggregateFunctions/AggregateFunctionMLMethod.h b/src/AggregateFunctions/AggregateFunctionMLMethod.h index b9d5d835f57..6545ee4fd53 100644 --- a/src/AggregateFunctions/AggregateFunctionMLMethod.h +++ b/src/AggregateFunctions/AggregateFunctionMLMethod.h @@ -309,7 +309,7 @@ public: UInt64 batch_size_, const DataTypes & arguments_types, const Array & params) - : IAggregateFunctionDataHelper>(arguments_types, params) + : IAggregateFunctionDataHelper>(arguments_types, params, createResultType()) , param_num(param_num_) , learning_rate(learning_rate_) , l2_reg_coef(l2_reg_coef_) @@ -319,8 +319,7 @@ public: { } - /// This function is called when SELECT linearRegression(...) is called - DataTypePtr getReturnType() const override + static DataTypePtr createResultType() { return std::make_shared(std::make_shared()); } diff --git a/src/AggregateFunctions/AggregateFunctionMannWhitney.h b/src/AggregateFunctions/AggregateFunctionMannWhitney.h index d861eef10ab..6176d6854fc 100644 --- a/src/AggregateFunctions/AggregateFunctionMannWhitney.h +++ b/src/AggregateFunctions/AggregateFunctionMannWhitney.h @@ -133,7 +133,7 @@ private: public: explicit AggregateFunctionMannWhitney(const DataTypes & arguments, const Array & params) - :IAggregateFunctionDataHelper ({arguments}, {}) + : IAggregateFunctionDataHelper ({arguments}, {}, createResultType()) { if (params.size() > 2) throw Exception("Aggregate function " + getName() + " require two parameter or less", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); @@ -174,7 +174,7 @@ public: bool allocatesMemoryInArena() const override { return true; } - DataTypePtr getReturnType() const override + static DataTypePtr createResultType() { DataTypes types { diff --git a/src/AggregateFunctions/AggregateFunctionMap.h b/src/AggregateFunctions/AggregateFunctionMap.h index f60cc71e78e..dc19bf3f71c 100644 --- a/src/AggregateFunctions/AggregateFunctionMap.h +++ b/src/AggregateFunctions/AggregateFunctionMap.h @@ -18,6 +18,7 @@ #include #include #include +#include "DataTypes/Serializations/ISerialization.h" #include "base/types.h" #include #include "AggregateFunctions/AggregateFunctionFactory.h" @@ -104,26 +105,32 @@ public: return nested_func->getDefaultVersion(); } - AggregateFunctionMap(AggregateFunctionPtr nested, const DataTypes & types) : Base(types, nested->getParameters()), nested_func(nested) + AggregateFunctionMap(AggregateFunctionPtr nested, const DataTypes & types) + : Base(types, nested->getParameters(), std::make_shared(DataTypes{getKeyType(types, nested), nested->getResultType()})) + , nested_func(nested) { - if (types.empty()) - throw Exception( - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Aggregate function " + getName() + " requires at least one argument"); - - if (types.size() > 1) - throw Exception( - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Aggregate function " + getName() + " requires only one map argument"); - - const auto * map_type = checkAndGetDataType(types[0].get()); - if (!map_type) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Aggregate function " + getName() + " requires map as argument"); - - key_type = map_type->getKeyType(); + key_type = getKeyType(types, nested_func); } String getName() const override { return nested_func->getName() + "Map"; } - DataTypePtr getReturnType() const override { return std::make_shared(DataTypes{key_type, nested_func->getReturnType()}); } + static DataTypePtr getKeyType(const DataTypes & types, const AggregateFunctionPtr & nested) + { + if (types.empty()) + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Aggregate function {}Map requires at least one argument", nested->getName()); + + if (types.size() > 1) + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Aggregate function {}Map requires only one map argument", nested->getName()); + + const auto * map_type = checkAndGetDataType(types[0].get()); + if (!map_type) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Aggregate function {}Map requires map as argument", nested->getName()); + + return map_type->getKeyType(); + } void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override { diff --git a/src/AggregateFunctions/AggregateFunctionMaxIntersections.h b/src/AggregateFunctions/AggregateFunctionMaxIntersections.h index d2f553172c9..e78684c9491 100644 --- a/src/AggregateFunctions/AggregateFunctionMaxIntersections.h +++ b/src/AggregateFunctions/AggregateFunctionMaxIntersections.h @@ -62,7 +62,8 @@ private: public: AggregateFunctionIntersectionsMax(AggregateFunctionIntersectionsKind kind_, const DataTypes & arguments) - : IAggregateFunctionDataHelper, AggregateFunctionIntersectionsMax>(arguments, {}), kind(kind_) + : IAggregateFunctionDataHelper, AggregateFunctionIntersectionsMax>(arguments, {}, createResultType(kind_)) + , kind(kind_) { if (!isNativeNumber(arguments[0])) throw Exception{getName() + ": first argument must be represented by integer", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; @@ -81,9 +82,9 @@ public: : "maxIntersectionsPosition"; } - DataTypePtr getReturnType() const override + static DataTypePtr createResultType(AggregateFunctionIntersectionsKind kind_) { - if (kind == AggregateFunctionIntersectionsKind::Count) + if (kind_ == AggregateFunctionIntersectionsKind::Count) return std::make_shared(); else return std::make_shared>(); diff --git a/src/AggregateFunctions/AggregateFunctionMeanZTest.h b/src/AggregateFunctions/AggregateFunctionMeanZTest.h index 7fecff591e6..97925d4e07c 100644 --- a/src/AggregateFunctions/AggregateFunctionMeanZTest.h +++ b/src/AggregateFunctions/AggregateFunctionMeanZTest.h @@ -36,7 +36,7 @@ private: public: AggregateFunctionMeanZTest(const DataTypes & arguments, const Array & params) - : IAggregateFunctionDataHelper>({arguments}, params) + : IAggregateFunctionDataHelper>({arguments}, params, createResultType()) { pop_var_x = params.at(0).safeGet(); pop_var_y = params.at(1).safeGet(); @@ -63,7 +63,7 @@ public: return Data::name; } - DataTypePtr getReturnType() const override + static DataTypePtr createResultType() { DataTypes types { diff --git a/src/AggregateFunctions/AggregateFunctionMerge.h b/src/AggregateFunctions/AggregateFunctionMerge.h index bb2d36eeed1..0cb44259816 100644 --- a/src/AggregateFunctions/AggregateFunctionMerge.h +++ b/src/AggregateFunctions/AggregateFunctionMerge.h @@ -30,7 +30,7 @@ private: public: AggregateFunctionMerge(const AggregateFunctionPtr & nested_, const DataTypePtr & argument, const Array & params_) - : IAggregateFunctionHelper({argument}, params_) + : IAggregateFunctionHelper({argument}, params_, createResultType(nested_)) , nested_func(nested_) { const DataTypeAggregateFunction * data_type = typeid_cast(argument.get()); @@ -45,9 +45,9 @@ public: return nested_func->getName() + "Merge"; } - DataTypePtr getReturnType() const override + static DataTypePtr createResultType(const AggregateFunctionPtr & nested_) { - return nested_func->getReturnType(); + return nested_->getResultType(); } const IAggregateFunction & getBaseAggregateFunctionWithSameStateRepresentation() const override diff --git a/src/AggregateFunctions/AggregateFunctionMinMaxAny.h b/src/AggregateFunctions/AggregateFunctionMinMaxAny.h index a6013f37b9d..314e68f83d9 100644 --- a/src/AggregateFunctions/AggregateFunctionMinMaxAny.h +++ b/src/AggregateFunctions/AggregateFunctionMinMaxAny.h @@ -1222,7 +1222,7 @@ private: public: explicit AggregateFunctionsSingleValue(const DataTypePtr & type) - : IAggregateFunctionDataHelper>({type}, {}) + : IAggregateFunctionDataHelper>({type}, {}, createResultType(type)) , serialization(type->getDefaultSerialization()) { if (StringRef(Data::name()) == StringRef("min") @@ -1236,12 +1236,11 @@ public: String getName() const override { return Data::name(); } - DataTypePtr getReturnType() const override + static DataTypePtr createResultType(const DataTypePtr & type_) { - auto result_type = this->argument_types.at(0); if constexpr (Data::is_nullable) - return makeNullable(result_type); - return result_type; + return makeNullable(type_); + return type_; } void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override diff --git a/src/AggregateFunctions/AggregateFunctionNothing.h b/src/AggregateFunctions/AggregateFunctionNothing.h index 13ef407be8b..de8a5868e04 100644 --- a/src/AggregateFunctions/AggregateFunctionNothing.h +++ b/src/AggregateFunctions/AggregateFunctionNothing.h @@ -6,6 +6,7 @@ #include #include #include +#include "DataTypes/IDataType.h" namespace DB @@ -19,16 +20,16 @@ class AggregateFunctionNothing final : public IAggregateFunctionHelper(arguments, params) {} + : IAggregateFunctionHelper(arguments, params, createResultType(arguments)) {} String getName() const override { return "nothing"; } - DataTypePtr getReturnType() const override + static DataTypePtr createResultType(const DataTypes & arguments) { - return argument_types.empty() ? std::make_shared(std::make_shared()) : argument_types.front(); + return arguments.empty() ? std::make_shared(std::make_shared()) : arguments.front(); } bool allocatesMemoryInArena() const override { return false; } diff --git a/src/AggregateFunctions/AggregateFunctionNull.cpp b/src/AggregateFunctions/AggregateFunctionNull.cpp index 01558b56667..f02c97b08b7 100644 --- a/src/AggregateFunctions/AggregateFunctionNull.cpp +++ b/src/AggregateFunctions/AggregateFunctionNull.cpp @@ -29,7 +29,13 @@ public: size_t size = arguments.size(); DataTypes res(size); for (size_t i = 0; i < size; ++i) - res[i] = removeNullable(arguments[i]); + { + /// Nullable(Nothing) is processed separately, don't convert it to Nothing. + if (arguments[i]->onlyNull()) + res[i] = arguments[i]; + else + res[i] = removeNullable(arguments[i]); + } return res; } @@ -41,12 +47,16 @@ public: { bool has_nullable_types = false; bool has_null_types = false; - for (const auto & arg_type : arguments) + std::unordered_set arguments_that_can_be_only_null; + if (nested_function) + arguments_that_can_be_only_null = nested_function->getArgumentsThatCanBeOnlyNull(); + + for (size_t i = 0; i < arguments.size(); ++i) { - if (arg_type->isNullable()) + if (arguments[i]->isNullable()) { has_nullable_types = true; - if (arg_type->onlyNull()) + if (arguments[i]->onlyNull() && !arguments_that_can_be_only_null.contains(i)) { has_null_types = true; break; @@ -87,7 +97,7 @@ public: transformed_nested_function->getParameters()); } - bool return_type_is_nullable = !properties.returns_default_when_only_null && nested_function->getReturnType()->canBeInsideNullable(); + bool return_type_is_nullable = !properties.returns_default_when_only_null && nested_function->getResultType()->canBeInsideNullable(); bool serialize_flag = return_type_is_nullable || properties.returns_default_when_only_null; if (arguments.size() == 1) diff --git a/src/AggregateFunctions/AggregateFunctionNull.h b/src/AggregateFunctions/AggregateFunctionNull.h index 26d36b84860..ae5573a5351 100644 --- a/src/AggregateFunctions/AggregateFunctionNull.h +++ b/src/AggregateFunctions/AggregateFunctionNull.h @@ -85,7 +85,8 @@ protected: public: AggregateFunctionNullBase(AggregateFunctionPtr nested_function_, const DataTypes & arguments, const Array & params) - : IAggregateFunctionHelper(arguments, params), nested_function{nested_function_} + : IAggregateFunctionHelper(arguments, params, createResultType(nested_function_)) + , nested_function{nested_function_} { if constexpr (result_is_nullable) prefix_size = nested_function->alignOfData(); @@ -99,12 +100,12 @@ public: return nested_function->getName(); } - DataTypePtr getReturnType() const override + static DataTypePtr createResultType(const AggregateFunctionPtr & nested_function_) { if constexpr (result_is_nullable) - return makeNullable(nested_function->getReturnType()); + return makeNullable(nested_function_->getResultType()); else - return nested_function->getReturnType(); + return nested_function_->getResultType(); } void create(AggregateDataPtr __restrict place) const override @@ -275,7 +276,7 @@ public: { llvm::IRBuilder<> & b = static_cast &>(builder); - auto * return_type = toNativeType(b, this->getReturnType()); + auto * return_type = toNativeType(b, this->getResultType()); llvm::Value * result = nullptr; diff --git a/src/AggregateFunctions/AggregateFunctionOrFill.h b/src/AggregateFunctions/AggregateFunctionOrFill.h index eff4fb2bdc0..eeec630be9a 100644 --- a/src/AggregateFunctions/AggregateFunctionOrFill.h +++ b/src/AggregateFunctions/AggregateFunctionOrFill.h @@ -30,16 +30,14 @@ private: AggregateFunctionPtr nested_function; size_t size_of_data; - DataTypePtr inner_type; bool inner_nullable; public: AggregateFunctionOrFill(AggregateFunctionPtr nested_function_, const DataTypes & arguments, const Array & params) - : IAggregateFunctionHelper{arguments, params} + : IAggregateFunctionHelper{arguments, params, createResultType(nested_function_->getResultType())} , nested_function{nested_function_} , size_of_data {nested_function->sizeOfData()} - , inner_type {nested_function->getReturnType()} - , inner_nullable {inner_type->isNullable()} + , inner_nullable {nested_function->getResultType()->isNullable()} { // nothing } @@ -246,22 +244,22 @@ public: readChar(place[size_of_data], buf); } - DataTypePtr getReturnType() const override + static DataTypePtr createResultType(const DataTypePtr & inner_type_) { if constexpr (UseNull) { // -OrNull - if (inner_nullable) - return inner_type; + if (inner_type_->isNullable()) + return inner_type_; - return std::make_shared(inner_type); + return std::make_shared(inner_type_); } else { // -OrDefault - return inner_type; + return inner_type_; } } diff --git a/src/AggregateFunctions/AggregateFunctionQuantile.h b/src/AggregateFunctions/AggregateFunctionQuantile.h index 39a9e09dc64..6427d03f089 100644 --- a/src/AggregateFunctions/AggregateFunctionQuantile.h +++ b/src/AggregateFunctions/AggregateFunctionQuantile.h @@ -72,7 +72,7 @@ private: public: AggregateFunctionQuantile(const DataTypes & argument_types_, const Array & params) : IAggregateFunctionDataHelper>( - argument_types_, params) + argument_types_, params, createResultType(argument_types_)) , levels(params, returns_many) , level(levels.levels[0]) , argument_type(this->argument_types[0]) @@ -83,14 +83,14 @@ public: String getName() const override { return Name::name; } - DataTypePtr getReturnType() const override + static DataTypePtr createResultType(const DataTypes & argument_types_) { DataTypePtr res; if constexpr (returns_float) res = std::make_shared>(); else - res = argument_type; + res = argument_types_[0]; if constexpr (returns_many) return std::make_shared(res); diff --git a/src/AggregateFunctions/AggregateFunctionRankCorrelation.h b/src/AggregateFunctions/AggregateFunctionRankCorrelation.h index 4a81c6cda82..4f9ca55f9f5 100644 --- a/src/AggregateFunctions/AggregateFunctionRankCorrelation.h +++ b/src/AggregateFunctions/AggregateFunctionRankCorrelation.h @@ -51,7 +51,7 @@ class AggregateFunctionRankCorrelation : { public: explicit AggregateFunctionRankCorrelation(const DataTypes & arguments) - :IAggregateFunctionDataHelper ({arguments}, {}) + :IAggregateFunctionDataHelper ({arguments}, {}, std::make_shared>()) {} String getName() const override @@ -61,11 +61,6 @@ public: bool allocatesMemoryInArena() const override { return true; } - DataTypePtr getReturnType() const override - { - return std::make_shared>(); - } - void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override { Float64 new_x = columns[0]->getFloat64(row_num); diff --git a/src/AggregateFunctions/AggregateFunctionResample.h b/src/AggregateFunctions/AggregateFunctionResample.h index fe04ada1a77..32458557ac5 100644 --- a/src/AggregateFunctions/AggregateFunctionResample.h +++ b/src/AggregateFunctions/AggregateFunctionResample.h @@ -43,7 +43,7 @@ public: size_t step_, const DataTypes & arguments, const Array & params) - : IAggregateFunctionHelper>{arguments, params} + : IAggregateFunctionHelper>{arguments, params, createResultType(nested_function_)} , nested_function{nested_function_} , last_col{arguments.size() - 1} , begin{begin_} @@ -190,9 +190,9 @@ public: nested_function->deserialize(place + i * size_of_data, buf, version, arena); } - DataTypePtr getReturnType() const override + static DataTypePtr createResultType(const AggregateFunctionPtr & nested_function_) { - return std::make_shared(nested_function->getReturnType()); + return std::make_shared(nested_function_->getResultType()); } template diff --git a/src/AggregateFunctions/AggregateFunctionRetention.h b/src/AggregateFunctions/AggregateFunctionRetention.h index 18d04fb1ea4..744b6d18f97 100644 --- a/src/AggregateFunctions/AggregateFunctionRetention.h +++ b/src/AggregateFunctions/AggregateFunctionRetention.h @@ -76,7 +76,7 @@ public: } explicit AggregateFunctionRetention(const DataTypes & arguments) - : IAggregateFunctionDataHelper(arguments, {}) + : IAggregateFunctionDataHelper(arguments, {}, std::make_shared(std::make_shared())) { for (const auto i : collections::range(0, arguments.size())) { @@ -90,12 +90,6 @@ public: events_size = static_cast(arguments.size()); } - - DataTypePtr getReturnType() const override - { - return std::make_shared(std::make_shared()); - } - bool allocatesMemoryInArena() const override { return false; } void add(AggregateDataPtr __restrict place, const IColumn ** columns, const size_t row_num, Arena *) const override diff --git a/src/AggregateFunctions/AggregateFunctionSequenceMatch.h b/src/AggregateFunctions/AggregateFunctionSequenceMatch.h index bcea408d26b..b4889a06e53 100644 --- a/src/AggregateFunctions/AggregateFunctionSequenceMatch.h +++ b/src/AggregateFunctions/AggregateFunctionSequenceMatch.h @@ -126,8 +126,8 @@ template class AggregateFunctionSequenceBase : public IAggregateFunctionDataHelper { public: - AggregateFunctionSequenceBase(const DataTypes & arguments, const Array & params, const String & pattern_) - : IAggregateFunctionDataHelper(arguments, params) + AggregateFunctionSequenceBase(const DataTypes & arguments, const Array & params, const String & pattern_, const DataTypePtr & result_type_) + : IAggregateFunctionDataHelper(arguments, params, result_type_) , pattern(pattern_) { arg_count = arguments.size(); @@ -617,14 +617,12 @@ class AggregateFunctionSequenceMatch final : public AggregateFunctionSequenceBas { public: AggregateFunctionSequenceMatch(const DataTypes & arguments, const Array & params, const String & pattern_) - : AggregateFunctionSequenceBase>(arguments, params, pattern_) {} + : AggregateFunctionSequenceBase>(arguments, params, pattern_, std::make_shared()) {} using AggregateFunctionSequenceBase>::AggregateFunctionSequenceBase; String getName() const override { return "sequenceMatch"; } - DataTypePtr getReturnType() const override { return std::make_shared(); } - bool allocatesMemoryInArena() const override { return false; } void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override @@ -655,14 +653,12 @@ class AggregateFunctionSequenceCount final : public AggregateFunctionSequenceBas { public: AggregateFunctionSequenceCount(const DataTypes & arguments, const Array & params, const String & pattern_) - : AggregateFunctionSequenceBase>(arguments, params, pattern_) {} + : AggregateFunctionSequenceBase>(arguments, params, pattern_, std::make_shared()) {} using AggregateFunctionSequenceBase>::AggregateFunctionSequenceBase; String getName() const override { return "sequenceCount"; } - DataTypePtr getReturnType() const override { return std::make_shared(); } - bool allocatesMemoryInArena() const override { return false; } void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override diff --git a/src/AggregateFunctions/AggregateFunctionSequenceNextNode.h b/src/AggregateFunctions/AggregateFunctionSequenceNextNode.h index 90caaee4d94..487889a0ca4 100644 --- a/src/AggregateFunctions/AggregateFunctionSequenceNextNode.h +++ b/src/AggregateFunctions/AggregateFunctionSequenceNextNode.h @@ -190,7 +190,7 @@ public: SequenceDirection seq_direction_, size_t min_required_args_, UInt64 max_elems_ = std::numeric_limits::max()) - : IAggregateFunctionDataHelper, Self>({data_type_}, parameters_) + : IAggregateFunctionDataHelper, Self>({data_type_}, parameters_, data_type_) , seq_base_kind(seq_base_kind_) , seq_direction(seq_direction_) , min_required_args(min_required_args_) @@ -202,8 +202,6 @@ public: String getName() const override { return "sequenceNextNode"; } - DataTypePtr getReturnType() const override { return data_type; } - bool haveSameStateRepresentationImpl(const IAggregateFunction & rhs) const override { return this->getName() == rhs.getName() && this->haveEqualArgumentTypes(rhs); diff --git a/src/AggregateFunctions/AggregateFunctionSimpleLinearRegression.h b/src/AggregateFunctions/AggregateFunctionSimpleLinearRegression.h index 06cdfc5e582..b0d448afb55 100644 --- a/src/AggregateFunctions/AggregateFunctionSimpleLinearRegression.h +++ b/src/AggregateFunctions/AggregateFunctionSimpleLinearRegression.h @@ -99,7 +99,7 @@ public: IAggregateFunctionDataHelper< AggregateFunctionSimpleLinearRegressionData, AggregateFunctionSimpleLinearRegression - > {arguments, params} + > {arguments, params, createResultType()} { // notice: arguments has been checked before } @@ -140,7 +140,7 @@ public: this->data(place).deserialize(buf); } - DataTypePtr getReturnType() const override + static DataTypePtr createResultType() { DataTypes types { diff --git a/src/AggregateFunctions/AggregateFunctionSimpleState.h b/src/AggregateFunctions/AggregateFunctionSimpleState.h index f50c86c684e..3af7d71395a 100644 --- a/src/AggregateFunctions/AggregateFunctionSimpleState.h +++ b/src/AggregateFunctions/AggregateFunctionSimpleState.h @@ -20,28 +20,28 @@ private: public: AggregateFunctionSimpleState(AggregateFunctionPtr nested_, const DataTypes & arguments_, const Array & params_) - : IAggregateFunctionHelper(arguments_, params_) + : IAggregateFunctionHelper(arguments_, params_, createResultType(nested_, params_)) , nested_func(nested_) { } String getName() const override { return nested_func->getName() + "SimpleState"; } - DataTypePtr getReturnType() const override + static DataTypePtr createResultType(const AggregateFunctionPtr & nested_, const Array & params_) { - DataTypeCustomSimpleAggregateFunction::checkSupportedFunctions(nested_func); + DataTypeCustomSimpleAggregateFunction::checkSupportedFunctions(nested_); // Need to make a clone to avoid recursive reference. - auto storage_type_out = DataTypeFactory::instance().get(nested_func->getReturnType()->getName()); + auto storage_type_out = DataTypeFactory::instance().get(nested_->getResultType()->getName()); // Need to make a new function with promoted argument types because SimpleAggregates requires arg_type = return_type. AggregateFunctionProperties properties; auto function - = AggregateFunctionFactory::instance().get(nested_func->getName(), {storage_type_out}, nested_func->getParameters(), properties); + = AggregateFunctionFactory::instance().get(nested_->getName(), {storage_type_out}, nested_->getParameters(), properties); // Need to make a clone because it'll be customized. - auto storage_type_arg = DataTypeFactory::instance().get(nested_func->getReturnType()->getName()); + auto storage_type_arg = DataTypeFactory::instance().get(nested_->getResultType()->getName()); DataTypeCustomNamePtr custom_name - = std::make_unique(function, DataTypes{nested_func->getReturnType()}, parameters); + = std::make_unique(function, DataTypes{nested_->getResultType()}, params_); storage_type_arg->setCustomization(std::make_unique(std::move(custom_name), nullptr)); return storage_type_arg; } diff --git a/src/AggregateFunctions/AggregateFunctionSparkbar.h b/src/AggregateFunctions/AggregateFunctionSparkbar.h index f0fbdd2f2e4..882575e2005 100644 --- a/src/AggregateFunctions/AggregateFunctionSparkbar.h +++ b/src/AggregateFunctions/AggregateFunctionSparkbar.h @@ -261,7 +261,7 @@ private: public: AggregateFunctionSparkbar(const DataTypes & arguments, const Array & params) : IAggregateFunctionDataHelper, AggregateFunctionSparkbar>( - arguments, params) + arguments, params, std::make_shared()) { width = params.at(0).safeGet(); if (params.size() == 3) @@ -283,11 +283,6 @@ public: return "sparkbar"; } - DataTypePtr getReturnType() const override - { - return std::make_shared(); - } - void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * /*arena*/) const override { X x = assert_cast *>(columns[0])->getData()[row_num]; diff --git a/src/AggregateFunctions/AggregateFunctionState.h b/src/AggregateFunctions/AggregateFunctionState.h index 20ccb2e543c..625fe1f36bc 100644 --- a/src/AggregateFunctions/AggregateFunctionState.h +++ b/src/AggregateFunctions/AggregateFunctionState.h @@ -23,7 +23,7 @@ private: public: AggregateFunctionState(AggregateFunctionPtr nested_, const DataTypes & arguments_, const Array & params_) - : IAggregateFunctionHelper(arguments_, params_) + : IAggregateFunctionHelper(arguments_, params_, nested_->getStateType()) , nested_func(nested_) {} @@ -32,11 +32,6 @@ public: return nested_func->getName() + "State"; } - DataTypePtr getReturnType() const override - { - return getStateType(); - } - const IAggregateFunction & getBaseAggregateFunctionWithSameStateRepresentation() const override { return nested_func->getBaseAggregateFunctionWithSameStateRepresentation(); diff --git a/src/AggregateFunctions/AggregateFunctionStatistics.h b/src/AggregateFunctions/AggregateFunctionStatistics.h index ad7177a32fa..eb2d66b7e94 100644 --- a/src/AggregateFunctions/AggregateFunctionStatistics.h +++ b/src/AggregateFunctions/AggregateFunctionStatistics.h @@ -115,15 +115,11 @@ class AggregateFunctionVariance final { public: explicit AggregateFunctionVariance(const DataTypePtr & arg) - : IAggregateFunctionDataHelper, AggregateFunctionVariance>({arg}, {}) {} + : IAggregateFunctionDataHelper, AggregateFunctionVariance>({arg}, {}, std::make_shared()) + {} String getName() const override { return Op::name; } - DataTypePtr getReturnType() const override - { - return std::make_shared(); - } - bool allocatesMemoryInArena() const override { return false; } void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override @@ -368,15 +364,11 @@ class AggregateFunctionCovariance final public: explicit AggregateFunctionCovariance(const DataTypes & args) : IAggregateFunctionDataHelper< CovarianceData, - AggregateFunctionCovariance>(args, {}) {} + AggregateFunctionCovariance>(args, {}, std::make_shared()) + {} String getName() const override { return Op::name; } - DataTypePtr getReturnType() const override - { - return std::make_shared(); - } - bool allocatesMemoryInArena() const override { return false; } void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override diff --git a/src/AggregateFunctions/AggregateFunctionStatisticsSimple.h b/src/AggregateFunctions/AggregateFunctionStatisticsSimple.h index d57b043b491..9ef62363a75 100644 --- a/src/AggregateFunctions/AggregateFunctionStatisticsSimple.h +++ b/src/AggregateFunctions/AggregateFunctionStatisticsSimple.h @@ -81,12 +81,12 @@ public: using ColVecResult = ColumnVector; explicit AggregateFunctionVarianceSimple(const DataTypes & argument_types_) - : IAggregateFunctionDataHelper>(argument_types_, {}) + : IAggregateFunctionDataHelper>(argument_types_, {}, std::make_shared>()) , src_scale(0) {} AggregateFunctionVarianceSimple(const IDataType & data_type, const DataTypes & argument_types_) - : IAggregateFunctionDataHelper>(argument_types_, {}) + : IAggregateFunctionDataHelper>(argument_types_, {}, std::make_shared>()) , src_scale(getDecimalScale(data_type)) {} @@ -117,11 +117,6 @@ public: UNREACHABLE(); } - DataTypePtr getReturnType() const override - { - return std::make_shared>(); - } - bool allocatesMemoryInArena() const override { return false; } void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override diff --git a/src/AggregateFunctions/AggregateFunctionSum.h b/src/AggregateFunctions/AggregateFunctionSum.h index 4cd0afc8760..14c2838c30d 100644 --- a/src/AggregateFunctions/AggregateFunctionSum.h +++ b/src/AggregateFunctions/AggregateFunctionSum.h @@ -411,23 +411,21 @@ public: } explicit AggregateFunctionSum(const DataTypes & argument_types_) - : IAggregateFunctionDataHelper>(argument_types_, {}) - , scale(0) + : IAggregateFunctionDataHelper>(argument_types_, {}, createResultType(0)) {} AggregateFunctionSum(const IDataType & data_type, const DataTypes & argument_types_) - : IAggregateFunctionDataHelper>(argument_types_, {}) - , scale(getDecimalScale(data_type)) + : IAggregateFunctionDataHelper>(argument_types_, {}, createResultType(getDecimalScale(data_type))) {} - DataTypePtr getReturnType() const override + static DataTypePtr createResultType(UInt32 scale_) { if constexpr (!is_decimal) return std::make_shared>(); else { using DataType = DataTypeDecimal; - return std::make_shared(DataType::maxPrecision(), scale); + return std::make_shared(DataType::maxPrecision(), scale_); } } @@ -548,7 +546,7 @@ public: for (const auto & argument_type : this->argument_types) can_be_compiled &= canBeNativeType(*argument_type); - auto return_type = getReturnType(); + auto return_type = this->getResultType(); can_be_compiled &= canBeNativeType(*return_type); return can_be_compiled; @@ -558,7 +556,7 @@ public: { llvm::IRBuilder<> & b = static_cast &>(builder); - auto * return_type = toNativeType(b, getReturnType()); + auto * return_type = toNativeType(b, this->getResultType()); auto * aggregate_sum_ptr = aggregate_data_ptr; b.CreateStore(llvm::Constant::getNullValue(return_type), aggregate_sum_ptr); @@ -568,7 +566,7 @@ public: { llvm::IRBuilder<> & b = static_cast &>(builder); - auto * return_type = toNativeType(b, getReturnType()); + auto * return_type = toNativeType(b, this->getResultType()); auto * sum_value_ptr = aggregate_data_ptr; auto * sum_value = b.CreateLoad(return_type, sum_value_ptr); @@ -586,7 +584,7 @@ public: { llvm::IRBuilder<> & b = static_cast &>(builder); - auto * return_type = toNativeType(b, getReturnType()); + auto * return_type = toNativeType(b, this->getResultType()); auto * sum_value_dst_ptr = aggregate_data_dst_ptr; auto * sum_value_dst = b.CreateLoad(return_type, sum_value_dst_ptr); @@ -602,7 +600,7 @@ public: { llvm::IRBuilder<> & b = static_cast &>(builder); - auto * return_type = toNativeType(b, getReturnType()); + auto * return_type = toNativeType(b, this->getResultType()); auto * sum_value_ptr = aggregate_data_ptr; return b.CreateLoad(return_type, sum_value_ptr); @@ -611,8 +609,6 @@ public: #endif private: - UInt32 scale; - static constexpr auto & castColumnToResult(IColumn & to) { if constexpr (is_decimal) diff --git a/src/AggregateFunctions/AggregateFunctionSumCount.h b/src/AggregateFunctions/AggregateFunctionSumCount.h index f1a5d85bb6c..7058204ed74 100644 --- a/src/AggregateFunctions/AggregateFunctionSumCount.h +++ b/src/AggregateFunctions/AggregateFunctionSumCount.h @@ -14,12 +14,13 @@ public: using Base = AggregateFunctionAvg; explicit AggregateFunctionSumCount(const DataTypes & argument_types_, UInt32 num_scale_ = 0) - : Base(argument_types_, num_scale_), scale(num_scale_) {} + : Base(argument_types_, createResultType(num_scale_), num_scale_) + {} - DataTypePtr getReturnType() const override + static DataTypePtr createResultType(UInt32 num_scale_) { auto second_elem = std::make_shared(); - return std::make_shared(DataTypes{getReturnTypeFirstElement(), std::move(second_elem)}); + return std::make_shared(DataTypes{getReturnTypeFirstElement(num_scale_), std::move(second_elem)}); } void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const final @@ -43,9 +44,7 @@ public: #endif private: - UInt32 scale; - - auto getReturnTypeFirstElement() const + static auto getReturnTypeFirstElement(UInt32 num_scale_) { using FieldType = AvgFieldType; @@ -54,7 +53,7 @@ private: else { using DataType = DataTypeDecimal; - return std::make_shared(DataType::maxPrecision(), scale); + return std::make_shared(DataType::maxPrecision(), num_scale_); } } }; diff --git a/src/AggregateFunctions/AggregateFunctionSumMap.h b/src/AggregateFunctions/AggregateFunctionSumMap.h index 1e32be987ff..4a1088a87bd 100644 --- a/src/AggregateFunctions/AggregateFunctionSumMap.h +++ b/src/AggregateFunctions/AggregateFunctionSumMap.h @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -80,7 +81,7 @@ public: AggregateFunctionMapBase(const DataTypePtr & keys_type_, const DataTypes & values_types_, const DataTypes & argument_types_) - : Base(argument_types_, {} /* parameters */) + : Base(argument_types_, {} /* parameters */, createResultType(keys_type_, values_types_, getName())) , keys_type(keys_type_) , keys_serialization(keys_type->getDefaultSerialization()) , values_types(values_types_) @@ -117,19 +118,22 @@ public: return 0; } - DataTypePtr getReturnType() const override + static DataTypePtr createResultType( + const DataTypePtr & keys_type_, + const DataTypes & values_types_, + const String & name_) { DataTypes types; - types.emplace_back(std::make_shared(keys_type)); + types.emplace_back(std::make_shared(keys_type_)); - for (const auto & value_type : values_types) + for (const auto & value_type : values_types_) { if constexpr (std::is_same_v) { if (!value_type->isSummable()) throw Exception{ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Values for {} cannot be summed, passed type {}", - getName(), value_type->getName()}; + name_, value_type->getName()}; } DataTypePtr result_type; @@ -139,7 +143,7 @@ public: if (value_type->onlyNull()) throw Exception{ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Cannot calculate {} of type {}", - getName(), value_type->getName()}; + name_, value_type->getName()}; // Overflow, meaning that the returned type is the same as // the input type. Nulls are skipped. @@ -153,7 +157,7 @@ public: if (!value_type_without_nullable->canBePromoted()) throw Exception{ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Values for {} are expected to be Numeric, Float or Decimal, passed type {}", - getName(), value_type->getName()}; + name_, value_type->getName()}; WhichDataType value_type_to_check(value_type_without_nullable); @@ -424,7 +428,10 @@ public: } bool keepKey(const T & key) const { return static_cast(*this).keepKey(key); } - String getName() const override { return static_cast(*this).getName(); } + String getName() const override { return getNameImpl(); } + +private: + static String getNameImpl() { return Derived::getNameImpl(); } }; template @@ -443,10 +450,10 @@ public: { // The constructor accepts parameters to have a uniform interface with // sumMapFiltered, but this function doesn't have any parameters. - assertNoParameters(getName(), params_); + assertNoParameters(getNameImpl(), params_); } - String getName() const override + static String getNameImpl() { if constexpr (overflow) { @@ -461,6 +468,7 @@ public: bool keepKey(const T &) const { return true; } }; + template class AggregateFunctionSumMapFiltered final : public AggregateFunctionMapBase(keys_to_keep_values)) throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Aggregate function {} requires an Array as a parameter", - getName()); + getNameImpl()); + + this->parameters = params_; keys_to_keep.reserve(keys_to_keep_values.size()); @@ -501,8 +511,17 @@ public: keys_to_keep.emplace(f.safeGet()); } - String getName() const override - { return overflow ? "sumMapFilteredWithOverflow" : "sumMapFiltered"; } + static String getNameImpl() + { + if constexpr (overflow) + { + return "sumMapFilteredWithOverflow"; + } + else + { + return "sumMapFiltered"; + } + } bool keepKey(const T & key) const { return keys_to_keep.count(key); } }; @@ -606,10 +625,10 @@ public: { // The constructor accepts parameters to have a uniform interface with // sumMapFiltered, but this function doesn't have any parameters. - assertNoParameters(getName(), params_); + assertNoParameters(getNameImpl(), params_); } - String getName() const override { return "minMap"; } + static String getNameImpl() { return "minMap"; } bool keepKey(const T &) const { return true; } }; @@ -630,10 +649,10 @@ public: { // The constructor accepts parameters to have a uniform interface with // sumMapFiltered, but this function doesn't have any parameters. - assertNoParameters(getName(), params_); + assertNoParameters(getNameImpl(), params_); } - String getName() const override { return "maxMap"; } + static String getNameImpl() { return "maxMap"; } bool keepKey(const T &) const { return true; } }; diff --git a/src/AggregateFunctions/AggregateFunctionTTest.h b/src/AggregateFunctions/AggregateFunctionTTest.h index b72e7a3cdcb..749e711d4f7 100644 --- a/src/AggregateFunctions/AggregateFunctionTTest.h +++ b/src/AggregateFunctions/AggregateFunctionTTest.h @@ -46,7 +46,7 @@ private: Float64 confidence_level; public: AggregateFunctionTTest(const DataTypes & arguments, const Array & params) - : IAggregateFunctionDataHelper>({arguments}, params) + : IAggregateFunctionDataHelper>({arguments}, params, createResultType(!params.empty())) { if (!params.empty()) { @@ -71,9 +71,9 @@ public: return Data::name; } - DataTypePtr getReturnType() const override + static DataTypePtr createResultType(bool need_confidence_interval_) { - if (need_confidence_interval) + if (need_confidence_interval_) { DataTypes types { diff --git a/src/AggregateFunctions/AggregateFunctionTopK.cpp b/src/AggregateFunctions/AggregateFunctionTopK.cpp index 4ebc80aceb5..b93aa703503 100644 --- a/src/AggregateFunctions/AggregateFunctionTopK.cpp +++ b/src/AggregateFunctions/AggregateFunctionTopK.cpp @@ -31,15 +31,33 @@ namespace template class AggregateFunctionTopKDate : public AggregateFunctionTopK { +public: using AggregateFunctionTopK::AggregateFunctionTopK; - DataTypePtr getReturnType() const override { return std::make_shared(std::make_shared()); } + + AggregateFunctionTopKDate(UInt64 threshold_, UInt64 load_factor, const DataTypes & argument_types_, const Array & params) + : AggregateFunctionTopK( + threshold_, + load_factor, + argument_types_, + params, + std::make_shared(std::make_shared())) + {} }; template class AggregateFunctionTopKDateTime : public AggregateFunctionTopK { +public: using AggregateFunctionTopK::AggregateFunctionTopK; - DataTypePtr getReturnType() const override { return std::make_shared(std::make_shared()); } + + AggregateFunctionTopKDateTime(UInt64 threshold_, UInt64 load_factor, const DataTypes & argument_types_, const Array & params) + : AggregateFunctionTopK( + threshold_, + load_factor, + argument_types_, + params, + std::make_shared(std::make_shared())) + {} }; diff --git a/src/AggregateFunctions/AggregateFunctionTopK.h b/src/AggregateFunctions/AggregateFunctionTopK.h index 98774254695..f1e57608195 100644 --- a/src/AggregateFunctions/AggregateFunctionTopK.h +++ b/src/AggregateFunctions/AggregateFunctionTopK.h @@ -40,14 +40,20 @@ protected: public: AggregateFunctionTopK(UInt64 threshold_, UInt64 load_factor, const DataTypes & argument_types_, const Array & params) - : IAggregateFunctionDataHelper, AggregateFunctionTopK>(argument_types_, params) - , threshold(threshold_), reserved(load_factor * threshold) {} + : IAggregateFunctionDataHelper, AggregateFunctionTopK>(argument_types_, params, createResultType(argument_types_)) + , threshold(threshold_), reserved(load_factor * threshold) + {} + + AggregateFunctionTopK(UInt64 threshold_, UInt64 load_factor, const DataTypes & argument_types_, const Array & params, const DataTypePtr & result_type_) + : IAggregateFunctionDataHelper, AggregateFunctionTopK>(argument_types_, params, result_type_) + , threshold(threshold_), reserved(load_factor * threshold) + {} String getName() const override { return is_weighted ? "topKWeighted" : "topK"; } - DataTypePtr getReturnType() const override + static DataTypePtr createResultType(const DataTypes & argument_types_) { - return std::make_shared(this->argument_types[0]); + return std::make_shared(argument_types_[0]); } bool allocatesMemoryInArena() const override { return false; } @@ -126,21 +132,20 @@ private: UInt64 threshold; UInt64 reserved; - DataTypePtr & input_data_type; static void deserializeAndInsert(StringRef str, IColumn & data_to); public: AggregateFunctionTopKGeneric( UInt64 threshold_, UInt64 load_factor, const DataTypes & argument_types_, const Array & params) - : IAggregateFunctionDataHelper>(argument_types_, params) - , threshold(threshold_), reserved(load_factor * threshold), input_data_type(this->argument_types[0]) {} + : IAggregateFunctionDataHelper>(argument_types_, params, createResultType(argument_types_)) + , threshold(threshold_), reserved(load_factor * threshold) {} String getName() const override { return is_weighted ? "topKWeighted" : "topK"; } - DataTypePtr getReturnType() const override + static DataTypePtr createResultType(const DataTypes & argument_types_) { - return std::make_shared(input_data_type); + return std::make_shared(argument_types_[0]); } bool allocatesMemoryInArena() const override diff --git a/src/AggregateFunctions/AggregateFunctionUniq.h b/src/AggregateFunctions/AggregateFunctionUniq.h index 1a98bfc8456..c782b9314fd 100644 --- a/src/AggregateFunctions/AggregateFunctionUniq.h +++ b/src/AggregateFunctions/AggregateFunctionUniq.h @@ -358,17 +358,12 @@ private: public: explicit AggregateFunctionUniq(const DataTypes & argument_types_) - : IAggregateFunctionDataHelper>(argument_types_, {}) + : IAggregateFunctionDataHelper>(argument_types_, {}, std::make_shared()) { } String getName() const override { return Data::getName(); } - DataTypePtr getReturnType() const override - { - return std::make_shared(); - } - bool allocatesMemoryInArena() const override { return false; } /// ALWAYS_INLINE is required to have better code layout for uniqHLL12 function @@ -462,7 +457,7 @@ private: public: explicit AggregateFunctionUniqVariadic(const DataTypes & arguments) - : IAggregateFunctionDataHelper>(arguments, {}) + : IAggregateFunctionDataHelper>(arguments, {}, std::make_shared()) { if (argument_is_tuple) num_args = typeid_cast(*arguments[0]).getElements().size(); @@ -472,11 +467,6 @@ public: String getName() const override { return Data::getName(); } - DataTypePtr getReturnType() const override - { - return std::make_shared(); - } - bool allocatesMemoryInArena() const override { return false; } void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override diff --git a/src/AggregateFunctions/AggregateFunctionUniqCombined.h b/src/AggregateFunctions/AggregateFunctionUniqCombined.h index 47b3081225b..d879e3b3dde 100644 --- a/src/AggregateFunctions/AggregateFunctionUniqCombined.h +++ b/src/AggregateFunctions/AggregateFunctionUniqCombined.h @@ -126,7 +126,8 @@ class AggregateFunctionUniqCombined final { public: AggregateFunctionUniqCombined(const DataTypes & argument_types_, const Array & params_) - : IAggregateFunctionDataHelper, AggregateFunctionUniqCombined>(argument_types_, params_) {} + : IAggregateFunctionDataHelper, AggregateFunctionUniqCombined>(argument_types_, params_, std::make_shared()) + {} String getName() const override { @@ -136,11 +137,6 @@ public: return "uniqCombined"; } - DataTypePtr getReturnType() const override - { - return std::make_shared(); - } - bool allocatesMemoryInArena() const override { return false; } void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override @@ -192,7 +188,7 @@ private: public: explicit AggregateFunctionUniqCombinedVariadic(const DataTypes & arguments, const Array & params) : IAggregateFunctionDataHelper, - AggregateFunctionUniqCombinedVariadic>(arguments, params) + AggregateFunctionUniqCombinedVariadic>(arguments, params, std::make_shared()) { if (argument_is_tuple) num_args = typeid_cast(*arguments[0]).getElements().size(); @@ -208,11 +204,6 @@ public: return "uniqCombined"; } - DataTypePtr getReturnType() const override - { - return std::make_shared(); - } - bool allocatesMemoryInArena() const override { return false; } void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override diff --git a/src/AggregateFunctions/AggregateFunctionUniqUpTo.h b/src/AggregateFunctions/AggregateFunctionUniqUpTo.h index 99f36b664d7..377f2580070 100644 --- a/src/AggregateFunctions/AggregateFunctionUniqUpTo.h +++ b/src/AggregateFunctions/AggregateFunctionUniqUpTo.h @@ -174,7 +174,7 @@ private: public: AggregateFunctionUniqUpTo(UInt8 threshold_, const DataTypes & argument_types_, const Array & params_) - : IAggregateFunctionDataHelper, AggregateFunctionUniqUpTo>(argument_types_, params_) + : IAggregateFunctionDataHelper, AggregateFunctionUniqUpTo>(argument_types_, params_, std::make_shared()) , threshold(threshold_) { } @@ -186,11 +186,6 @@ public: String getName() const override { return "uniqUpTo"; } - DataTypePtr getReturnType() const override - { - return std::make_shared(); - } - bool allocatesMemoryInArena() const override { return false; } /// ALWAYS_INLINE is required to have better code layout for uniqUpTo function @@ -235,7 +230,7 @@ private: public: AggregateFunctionUniqUpToVariadic(const DataTypes & arguments, const Array & params, UInt8 threshold_) - : IAggregateFunctionDataHelper, AggregateFunctionUniqUpToVariadic>(arguments, params) + : IAggregateFunctionDataHelper, AggregateFunctionUniqUpToVariadic>(arguments, params, std::make_shared()) , threshold(threshold_) { if (argument_is_tuple) @@ -251,11 +246,6 @@ public: String getName() const override { return "uniqUpTo"; } - DataTypePtr getReturnType() const override - { - return std::make_shared(); - } - bool allocatesMemoryInArena() const override { return false; } void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override diff --git a/src/AggregateFunctions/AggregateFunctionWindowFunnel.h b/src/AggregateFunctions/AggregateFunctionWindowFunnel.h index 8dad9643da5..472f230a24c 100644 --- a/src/AggregateFunctions/AggregateFunctionWindowFunnel.h +++ b/src/AggregateFunctions/AggregateFunctionWindowFunnel.h @@ -221,7 +221,7 @@ public: } AggregateFunctionWindowFunnel(const DataTypes & arguments, const Array & params) - : IAggregateFunctionDataHelper>(arguments, params) + : IAggregateFunctionDataHelper>(arguments, params, std::make_shared()) { events_size = arguments.size() - 1; window = params.at(0).safeGet(); @@ -245,11 +245,6 @@ public: } } - DataTypePtr getReturnType() const override - { - return std::make_shared(); - } - bool allocatesMemoryInArena() const override { return false; } void add(AggregateDataPtr __restrict place, const IColumn ** columns, const size_t row_num, Arena *) const override diff --git a/src/AggregateFunctions/CrossTab.h b/src/AggregateFunctions/CrossTab.h index 1284c210886..5868292c83f 100644 --- a/src/AggregateFunctions/CrossTab.h +++ b/src/AggregateFunctions/CrossTab.h @@ -118,7 +118,7 @@ class AggregateFunctionCrossTab : public IAggregateFunctionDataHelper>({arguments}, {}) + : IAggregateFunctionDataHelper>({arguments}, {}, createResultType()) { } @@ -132,7 +132,7 @@ public: return false; } - DataTypePtr getReturnType() const override + static DataTypePtr createResultType() { return std::make_shared>(); } diff --git a/src/AggregateFunctions/IAggregateFunction.h b/src/AggregateFunctions/IAggregateFunction.h index ada00791e69..a5d1887f85e 100644 --- a/src/AggregateFunctions/IAggregateFunction.h +++ b/src/AggregateFunctions/IAggregateFunction.h @@ -10,6 +10,7 @@ #include #include #include +#include #include "config.h" @@ -49,6 +50,7 @@ using ConstAggregateDataPtr = const char *; class IAggregateFunction; using AggregateFunctionPtr = std::shared_ptr; + struct AggregateFunctionProperties; /** Aggregate functions interface. @@ -59,18 +61,18 @@ struct AggregateFunctionProperties; * (which can be created in some memory pool), * and IAggregateFunction is the external interface for manipulating them. */ -class IAggregateFunction : public std::enable_shared_from_this +class IAggregateFunction : public std::enable_shared_from_this, public IResolvedFunction { public: - IAggregateFunction(const DataTypes & argument_types_, const Array & parameters_) - : argument_types(argument_types_), parameters(parameters_) {} + IAggregateFunction(const DataTypes & argument_types_, const Array & parameters_, const DataTypePtr & result_type_) + : result_type(result_type_) + , argument_types(argument_types_) + , parameters(parameters_) + {} /// Get main function name. virtual String getName() const = 0; - /// Get the result type. - virtual DataTypePtr getReturnType() const = 0; - /// Get the data type of internal state. By default it is AggregateFunction(name(params), argument_types...). virtual DataTypePtr getStateType() const; @@ -102,7 +104,7 @@ public: virtual size_t getDefaultVersion() const { return 0; } - virtual ~IAggregateFunction() = default; + ~IAggregateFunction() override = default; /** Data manipulating functions. */ @@ -343,13 +345,22 @@ public: return nullptr; } + /// For most functions if one of arguments is always NULL, we return NULL (it's implemented in combinator Null), + /// but in some functions we can want to process this argument somehow (for example condition argument in If combinator). + /// This method returns the set of argument indexes that can be always NULL, they will be skipped in combinator Null. + virtual std::unordered_set getArgumentsThatCanBeOnlyNull() const + { + return {}; + } + /** Return the nested function if this is an Aggregate Function Combinator. * Otherwise return nullptr. */ virtual AggregateFunctionPtr getNestedFunction() const { return {}; } - const DataTypes & getArgumentTypes() const { return argument_types; } - const Array & getParameters() const { return parameters; } + const DataTypePtr & getResultType() const override { return result_type; } + const DataTypes & getArgumentTypes() const override { return argument_types; } + const Array & getParameters() const override { return parameters; } // Any aggregate function can be calculated over a window, but there are some // window functions such as rank() that require a different interface, e.g. @@ -398,6 +409,7 @@ public: #endif protected: + DataTypePtr result_type; DataTypes argument_types; Array parameters; }; @@ -414,8 +426,8 @@ private: } public: - IAggregateFunctionHelper(const DataTypes & argument_types_, const Array & parameters_) - : IAggregateFunction(argument_types_, parameters_) {} + IAggregateFunctionHelper(const DataTypes & argument_types_, const Array & parameters_, const DataTypePtr & result_type_) + : IAggregateFunction(argument_types_, parameters_, result_type_) {} AddFunc getAddressOfAddFunction() const override { return &addFree; } @@ -695,15 +707,15 @@ public: // Derived class can `override` this to flag that DateTime64 is not supported. static constexpr bool DateTime64Supported = true; - IAggregateFunctionDataHelper(const DataTypes & argument_types_, const Array & parameters_) - : IAggregateFunctionHelper(argument_types_, parameters_) + IAggregateFunctionDataHelper(const DataTypes & argument_types_, const Array & parameters_, const DataTypePtr & result_type_) + : IAggregateFunctionHelper(argument_types_, parameters_, result_type_) { /// To prevent derived classes changing the destroy() without updating hasTrivialDestructor() to match it /// Enforce that either both of them are changed or none are - constexpr bool declares_destroy_and_hasTrivialDestructor = + constexpr bool declares_destroy_and_has_trivial_destructor = std::is_same_v == std::is_same_v; - static_assert(declares_destroy_and_hasTrivialDestructor, + static_assert(declares_destroy_and_has_trivial_destructor, "destroy() and hasTrivialDestructor() methods of an aggregate function must be either both overridden or not"); } @@ -824,6 +836,9 @@ struct AggregateFunctionProperties * Some may also name this property as "non-commutative". */ bool is_order_dependent = false; + + /// Indicates if it's actually window function. + bool is_window_function = false; }; diff --git a/src/AggregateFunctions/registerAggregateFunctions.cpp b/src/AggregateFunctions/registerAggregateFunctions.cpp index 08524cc9f97..ecf6ab51367 100644 --- a/src/AggregateFunctions/registerAggregateFunctions.cpp +++ b/src/AggregateFunctions/registerAggregateFunctions.cpp @@ -73,7 +73,6 @@ void registerAggregateFunctionExponentialMovingAverage(AggregateFunctionFactory void registerAggregateFunctionSparkbar(AggregateFunctionFactory &); void registerAggregateFunctionIntervalLengthSum(AggregateFunctionFactory &); void registerAggregateFunctionAnalysisOfVariance(AggregateFunctionFactory &); -void registerAggregateFunctionFlameGraph(AggregateFunctionFactory &); class AggregateFunctionCombinatorFactory; void registerAggregateFunctionCombinatorIf(AggregateFunctionCombinatorFactory &); @@ -159,7 +158,6 @@ void registerAggregateFunctions() registerAggregateFunctionExponentialMovingAverage(factory); registerAggregateFunctionSparkbar(factory); registerAggregateFunctionAnalysisOfVariance(factory); - registerAggregateFunctionFlameGraph(factory); registerWindowFunctions(factory); } diff --git a/src/Analyzer/FunctionNode.cpp b/src/Analyzer/FunctionNode.cpp index ad3959dfe9c..1b32cd5436d 100644 --- a/src/Analyzer/FunctionNode.cpp +++ b/src/Analyzer/FunctionNode.cpp @@ -2,6 +2,7 @@ #include #include +#include #include #include @@ -17,6 +18,11 @@ namespace DB { +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + FunctionNode::FunctionNode(String function_name_) : IQueryTreeNode(children_size) , function_name(function_name_) @@ -25,25 +31,41 @@ FunctionNode::FunctionNode(String function_name_) children[arguments_child_index] = std::make_shared(); } -void FunctionNode::resolveAsFunction(FunctionOverloadResolverPtr function_value, DataTypePtr result_type_value) +ColumnsWithTypeAndName FunctionNode::getArgumentTypes() const { - aggregate_function = nullptr; + ColumnsWithTypeAndName argument_types; + for (const auto & arg : getArguments().getNodes()) + { + ColumnWithTypeAndName argument; + argument.type = arg->getResultType(); + if (auto * constant = arg->as()) + argument.column = argument.type->createColumnConst(1, constant->getValue()); + argument_types.push_back(argument); + } + return argument_types; +} + +void FunctionNode::resolveAsFunction(FunctionBasePtr function_value) +{ + function_name = function_value->getName(); function = std::move(function_value); - result_type = std::move(result_type_value); - function_name = function->getName(); + kind = FunctionKind::ORDINARY; } -void FunctionNode::resolveAsAggregateFunction(AggregateFunctionPtr aggregate_function_value, DataTypePtr result_type_value) +void FunctionNode::resolveAsAggregateFunction(AggregateFunctionPtr aggregate_function_value) { - function = nullptr; - aggregate_function = std::move(aggregate_function_value); - result_type = std::move(result_type_value); - function_name = aggregate_function->getName(); + function_name = aggregate_function_value->getName(); + function = std::move(aggregate_function_value); + kind = FunctionKind::AGGREGATE; } -void FunctionNode::resolveAsWindowFunction(AggregateFunctionPtr window_function_value, DataTypePtr result_type_value) +void FunctionNode::resolveAsWindowFunction(AggregateFunctionPtr window_function_value) { - resolveAsAggregateFunction(window_function_value, result_type_value); + if (!hasWindow()) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Trying to resolve FunctionNode without window definition as a window function {}", window_function_value->getName()); + resolveAsAggregateFunction(window_function_value); + kind = FunctionKind::WINDOW; } void FunctionNode::dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, size_t indent) const @@ -63,8 +85,8 @@ void FunctionNode::dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state buffer << ", function_type: " << function_type; - if (result_type) - buffer << ", result_type: " + result_type->getName(); + if (function) + buffer << ", result_type: " + function->getResultType()->getName(); const auto & parameters = getParameters(); if (!parameters.getNodes().empty()) @@ -96,11 +118,19 @@ bool FunctionNode::isEqualImpl(const IQueryTreeNode & rhs) const isWindowFunction() != rhs_typed.isWindowFunction()) return false; - if (result_type && rhs_typed.result_type && !result_type->equals(*rhs_typed.getResultType())) + if (isResolved() != rhs_typed.isResolved()) return false; - else if (result_type && !rhs_typed.result_type) + if (!isResolved()) + return true; + + auto lhs_result_type = getResultType(); + auto rhs_result_type = rhs.getResultType(); + + if (lhs_result_type && rhs_result_type && !lhs_result_type->equals(*rhs_result_type)) return false; - else if (!result_type && rhs_typed.result_type) + else if (lhs_result_type && !rhs_result_type) + return false; + else if (!lhs_result_type && rhs_result_type) return false; return true; @@ -114,7 +144,10 @@ void FunctionNode::updateTreeHashImpl(HashState & hash_state) const hash_state.update(isAggregateFunction()); hash_state.update(isWindowFunction()); - if (result_type) + if (!isResolved()) + return; + + if (auto result_type = getResultType()) { auto result_type_name = result_type->getName(); hash_state.update(result_type_name.size()); @@ -130,8 +163,7 @@ QueryTreeNodePtr FunctionNode::cloneImpl() const * because ordinary functions or aggregate functions must be stateless. */ result_function->function = function; - result_function->aggregate_function = aggregate_function; - result_function->result_type = result_type; + result_function->kind = kind; return result_function; } diff --git a/src/Analyzer/FunctionNode.h b/src/Analyzer/FunctionNode.h index e746cf48581..501d439e55e 100644 --- a/src/Analyzer/FunctionNode.h +++ b/src/Analyzer/FunctionNode.h @@ -1,8 +1,12 @@ #pragma once +#include +#include #include #include #include +#include +#include namespace DB { @@ -15,6 +19,9 @@ namespace ErrorCodes class IFunctionOverloadResolver; using FunctionOverloadResolverPtr = std::shared_ptr; +class IFunctionBase; +using FunctionBasePtr = std::shared_ptr; + class IAggregateFunction; using AggregateFunctionPtr = std::shared_ptr; @@ -39,6 +46,14 @@ using AggregateFunctionPtr = std::shared_ptr; class FunctionNode; using FunctionNodePtr = std::shared_ptr; +enum class FunctionKind +{ + UNKNOWN, + ORDINARY, + AGGREGATE, + WINDOW, +}; + class FunctionNode final : public IQueryTreeNode { public: @@ -101,6 +116,8 @@ public: return children[arguments_child_index]; } + ColumnsWithTypeAndName getArgumentTypes() const; + /// Returns true if function node has window, false otherwise bool hasWindow() const { @@ -129,42 +146,46 @@ public: /** Get non aggregate function. * If function is not resolved nullptr returned. */ - const FunctionOverloadResolverPtr & getFunction() const + FunctionBasePtr getFunction() const { - return function; + if (kind != FunctionKind::ORDINARY) + return {}; + return std::reinterpret_pointer_cast(function); } /** Get aggregate function. * If function is not resolved nullptr returned. * If function is resolved as non aggregate function nullptr returned. */ - const AggregateFunctionPtr & getAggregateFunction() const + AggregateFunctionPtr getAggregateFunction() const { - return aggregate_function; + if (kind == FunctionKind::UNKNOWN || kind == FunctionKind::ORDINARY) + return {}; + return std::reinterpret_pointer_cast(function); } /// Is function node resolved bool isResolved() const { - return result_type != nullptr && (function != nullptr || aggregate_function != nullptr); + return function != nullptr; } /// Is function node window function bool isWindowFunction() const { - return getWindowNode() != nullptr; + return hasWindow(); } /// Is function node aggregate function bool isAggregateFunction() const { - return aggregate_function != nullptr && !isWindowFunction(); + return kind == FunctionKind::AGGREGATE; } /// Is function node ordinary function bool isOrdinaryFunction() const { - return function != nullptr; + return kind == FunctionKind::ORDINARY; } /** Resolve function node as non aggregate function. @@ -173,19 +194,19 @@ public: * Assume we have `multiIf` function with single condition, it can be converted to `if` function. * Function name must be updated accordingly. */ - void resolveAsFunction(FunctionOverloadResolverPtr function_value, DataTypePtr result_type_value); + void resolveAsFunction(FunctionBasePtr function_value); /** Resolve function node as aggregate function. * It is important that function name is updated with resolved function name. * Main motivation for this is query tree optimizations. */ - void resolveAsAggregateFunction(AggregateFunctionPtr aggregate_function_value, DataTypePtr result_type_value); + void resolveAsAggregateFunction(AggregateFunctionPtr aggregate_function_value); /** Resolve function node as window function. * It is important that function name is updated with resolved function name. * Main motivation for this is query tree optimizations. */ - void resolveAsWindowFunction(AggregateFunctionPtr window_function_value, DataTypePtr result_type_value); + void resolveAsWindowFunction(AggregateFunctionPtr window_function_value); QueryTreeNodeType getNodeType() const override { @@ -194,12 +215,11 @@ public: DataTypePtr getResultType() const override { - if (!result_type) + if (!function) throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "Function node with name '{}' is not resolved", function_name); - - return result_type; + return function->getResultType(); } void dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, size_t indent) const override; @@ -215,9 +235,8 @@ protected: private: String function_name; - FunctionOverloadResolverPtr function; - AggregateFunctionPtr aggregate_function; - DataTypePtr result_type; + FunctionKind kind = FunctionKind::UNKNOWN; + IResolvedFunctionPtr function; static constexpr size_t parameters_child_index = 0; static constexpr size_t arguments_child_index = 1; diff --git a/src/Analyzer/Passes/AggregateFunctionsArithmericOperationsPass.cpp b/src/Analyzer/Passes/AggregateFunctionsArithmericOperationsPass.cpp index 9b59faacfe0..e4e99c6e947 100644 --- a/src/Analyzer/Passes/AggregateFunctionsArithmericOperationsPass.cpp +++ b/src/Analyzer/Passes/AggregateFunctionsArithmericOperationsPass.cpp @@ -147,7 +147,6 @@ public: private: static inline void resolveAggregateFunctionNode(FunctionNode & function_node, const String & aggregate_function_name) { - auto function_result_type = function_node.getResultType(); auto function_aggregate_function = function_node.getAggregateFunction(); AggregateFunctionProperties properties; @@ -156,7 +155,7 @@ private: function_aggregate_function->getParameters(), properties); - function_node.resolveAsAggregateFunction(std::move(aggregate_function), std::move(function_result_type)); + function_node.resolveAsAggregateFunction(std::move(aggregate_function)); } }; diff --git a/src/Analyzer/Passes/CountDistinctPass.cpp b/src/Analyzer/Passes/CountDistinctPass.cpp index 05c31ec28ba..0384055e484 100644 --- a/src/Analyzer/Passes/CountDistinctPass.cpp +++ b/src/Analyzer/Passes/CountDistinctPass.cpp @@ -71,7 +71,7 @@ public: auto result_type = function_node->getResultType(); AggregateFunctionProperties properties; auto aggregate_function = AggregateFunctionFactory::instance().get("count", {}, {}, properties); - function_node->resolveAsAggregateFunction(std::move(aggregate_function), std::move(result_type)); + function_node->resolveAsAggregateFunction(std::move(aggregate_function)); function_node->getArguments().getNodes().clear(); } }; diff --git a/src/Analyzer/Passes/CustomizeFunctionsPass.cpp b/src/Analyzer/Passes/CustomizeFunctionsPass.cpp index 629ab411a55..7eb4a040970 100644 --- a/src/Analyzer/Passes/CustomizeFunctionsPass.cpp +++ b/src/Analyzer/Passes/CustomizeFunctionsPass.cpp @@ -138,7 +138,6 @@ public: static inline void resolveAggregateOrWindowFunctionNode(FunctionNode & function_node, const String & aggregate_function_name) { - auto function_result_type = function_node.getResultType(); auto function_aggregate_function = function_node.getAggregateFunction(); AggregateFunctionProperties properties; @@ -148,16 +147,15 @@ public: properties); if (function_node.isAggregateFunction()) - function_node.resolveAsAggregateFunction(std::move(aggregate_function), std::move(function_result_type)); + function_node.resolveAsAggregateFunction(std::move(aggregate_function)); else if (function_node.isWindowFunction()) - function_node.resolveAsWindowFunction(std::move(aggregate_function), std::move(function_result_type)); + function_node.resolveAsWindowFunction(std::move(aggregate_function)); } inline void resolveOrdinaryFunctionNode(FunctionNode & function_node, const String & function_name) const { - auto function_result_type = function_node.getResultType(); auto function = FunctionFactory::instance().get(function_name, context); - function_node.resolveAsFunction(function, std::move(function_result_type)); + function_node.resolveAsFunction(function->build(function_node.getArgumentTypes())); } private: diff --git a/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp b/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp index b1ecfe2d8fc..0c5a450135f 100644 --- a/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp +++ b/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp @@ -78,11 +78,11 @@ public: column.name += ".size0"; column.type = std::make_shared(); - resolveOrdinaryFunctionNode(*function_node, "equals"); - function_arguments_nodes.clear(); function_arguments_nodes.push_back(std::make_shared(column, column_source)); function_arguments_nodes.push_back(std::make_shared(static_cast(0))); + + resolveOrdinaryFunctionNode(*function_node, "equals"); } else if (function_name == "notEmpty") { @@ -90,11 +90,11 @@ public: column.name += ".size0"; column.type = std::make_shared(); - resolveOrdinaryFunctionNode(*function_node, "notEquals"); - function_arguments_nodes.clear(); function_arguments_nodes.push_back(std::make_shared(column, column_source)); function_arguments_nodes.push_back(std::make_shared(static_cast(0))); + + resolveOrdinaryFunctionNode(*function_node, "notEquals"); } } else if (column_type.isNullable()) @@ -112,9 +112,9 @@ public: column.name += ".null"; column.type = std::make_shared(); - resolveOrdinaryFunctionNode(*function_node, "not"); - function_arguments_nodes = {std::make_shared(column, column_source)}; + + resolveOrdinaryFunctionNode(*function_node, "not"); } } else if (column_type.isMap()) @@ -182,9 +182,9 @@ public: column.type = data_type_map.getKeyType(); auto has_function_argument = std::make_shared(column, column_source); - resolveOrdinaryFunctionNode(*function_node, "has"); - function_arguments_nodes[0] = std::move(has_function_argument); + + resolveOrdinaryFunctionNode(*function_node, "has"); } } } @@ -192,9 +192,8 @@ public: private: inline void resolveOrdinaryFunctionNode(FunctionNode & function_node, const String & function_name) const { - auto function_result_type = function_node.getResultType(); auto function = FunctionFactory::instance().get(function_name, context); - function_node.resolveAsFunction(function, std::move(function_result_type)); + function_node.resolveAsFunction(function->build(function_node.getArgumentTypes())); } ContextPtr & context; diff --git a/src/Analyzer/Passes/FuseFunctionsPass.cpp b/src/Analyzer/Passes/FuseFunctionsPass.cpp index f7e703cdaa4..f354a7b1ec3 100644 --- a/src/Analyzer/Passes/FuseFunctionsPass.cpp +++ b/src/Analyzer/Passes/FuseFunctionsPass.cpp @@ -59,14 +59,13 @@ private: std::unordered_set names_to_collect; }; -QueryTreeNodePtr createResolvedFunction(const ContextPtr & context, const String & name, const DataTypePtr & result_type, QueryTreeNodes arguments) +QueryTreeNodePtr createResolvedFunction(const ContextPtr & context, const String & name, QueryTreeNodes arguments) { auto function_node = std::make_shared(name); auto function = FunctionFactory::instance().get(name, context); - function_node->resolveAsFunction(std::move(function), result_type); function_node->getArguments().getNodes() = std::move(arguments); - + function_node->resolveAsFunction(function->build(function_node->getArgumentTypes())); return function_node; } @@ -74,11 +73,6 @@ FunctionNodePtr createResolvedAggregateFunction(const String & name, const Query { auto function_node = std::make_shared(name); - AggregateFunctionProperties properties; - auto aggregate_function = AggregateFunctionFactory::instance().get(name, {argument->getResultType()}, parameters, properties); - function_node->resolveAsAggregateFunction(aggregate_function, aggregate_function->getReturnType()); - function_node->getArguments().getNodes() = { argument }; - if (!parameters.empty()) { QueryTreeNodes parameter_nodes; @@ -86,18 +80,27 @@ FunctionNodePtr createResolvedAggregateFunction(const String & name, const Query parameter_nodes.emplace_back(std::make_shared(param)); function_node->getParameters().getNodes() = std::move(parameter_nodes); } + function_node->getArguments().getNodes() = { argument }; + + AggregateFunctionProperties properties; + auto aggregate_function = AggregateFunctionFactory::instance().get( + name, + { argument->getResultType() }, + parameters, + properties); + function_node->resolveAsAggregateFunction(aggregate_function); return function_node; } -QueryTreeNodePtr createTupleElementFunction(const ContextPtr & context, const DataTypePtr & result_type, QueryTreeNodePtr argument, UInt64 index) +QueryTreeNodePtr createTupleElementFunction(const ContextPtr & context, QueryTreeNodePtr argument, UInt64 index) { - return createResolvedFunction(context, "tupleElement", result_type, {std::move(argument), std::make_shared(index)}); + return createResolvedFunction(context, "tupleElement", {argument, std::make_shared(index)}); } -QueryTreeNodePtr createArrayElementFunction(const ContextPtr & context, const DataTypePtr & result_type, QueryTreeNodePtr argument, UInt64 index) +QueryTreeNodePtr createArrayElementFunction(const ContextPtr & context, QueryTreeNodePtr argument, UInt64 index) { - return createResolvedFunction(context, "arrayElement", result_type, {std::move(argument), std::make_shared(index)}); + return createResolvedFunction(context, "arrayElement", {argument, std::make_shared(index)}); } void replaceWithSumCount(QueryTreeNodePtr & node, const FunctionNodePtr & sum_count_node, ContextPtr context) @@ -115,20 +118,20 @@ void replaceWithSumCount(QueryTreeNodePtr & node, const FunctionNodePtr & sum_co if (function_name == "sum") { assert(node->getResultType()->equals(*sum_count_result_type->getElement(0))); - node = createTupleElementFunction(context, node->getResultType(), sum_count_node, 1); + node = createTupleElementFunction(context, sum_count_node, 1); } else if (function_name == "count") { assert(node->getResultType()->equals(*sum_count_result_type->getElement(1))); - node = createTupleElementFunction(context, node->getResultType(), sum_count_node, 2); + node = createTupleElementFunction(context, sum_count_node, 2); } else if (function_name == "avg") { - auto sum_result = createTupleElementFunction(context, sum_count_result_type->getElement(0), sum_count_node, 1); - auto count_result = createTupleElementFunction(context, sum_count_result_type->getElement(1), sum_count_node, 2); + auto sum_result = createTupleElementFunction(context, sum_count_node, 1); + auto count_result = createTupleElementFunction(context, sum_count_node, 2); /// To avoid integer division by zero - auto count_float_result = createResolvedFunction(context, "toFloat64", std::make_shared(), {count_result}); - node = createResolvedFunction(context, "divide", node->getResultType(), {sum_result, count_float_result}); + auto count_float_result = createResolvedFunction(context, "toFloat64", {count_result}); + node = createResolvedFunction(context, "divide", {sum_result, count_float_result}); } else { @@ -238,7 +241,7 @@ void tryFuseQuantiles(QueryTreeNodePtr query_tree_node, ContextPtr context) for (size_t i = 0; i < nodes_set.size(); ++i) { size_t array_index = i + 1; - *nodes[i] = createArrayElementFunction(context, result_array_type->getNestedType(), quantiles_node, array_index); + *nodes[i] = createArrayElementFunction(context, quantiles_node, array_index); } } } diff --git a/src/Analyzer/Passes/IfChainToMultiIfPass.cpp b/src/Analyzer/Passes/IfChainToMultiIfPass.cpp index f400b11765e..020edfe4820 100644 --- a/src/Analyzer/Passes/IfChainToMultiIfPass.cpp +++ b/src/Analyzer/Passes/IfChainToMultiIfPass.cpp @@ -55,8 +55,8 @@ public: return; auto multi_if_function = std::make_shared("multiIf"); - multi_if_function->resolveAsFunction(multi_if_function_ptr, std::make_shared()); multi_if_function->getArguments().getNodes() = std::move(multi_if_arguments); + multi_if_function->resolveAsFunction(multi_if_function_ptr->build(multi_if_function->getArgumentTypes())); node = std::move(multi_if_function); } diff --git a/src/Analyzer/Passes/IfTransformStringsToEnumPass.cpp b/src/Analyzer/Passes/IfTransformStringsToEnumPass.cpp index 65120632c0c..776fe63c803 100644 --- a/src/Analyzer/Passes/IfTransformStringsToEnumPass.cpp +++ b/src/Analyzer/Passes/IfTransformStringsToEnumPass.cpp @@ -47,49 +47,64 @@ QueryTreeNodePtr createCastFunction(QueryTreeNodePtr from, DataTypePtr result_ty auto enum_literal_node = std::make_shared(std::move(enum_literal)); auto cast_function = FunctionFactory::instance().get("_CAST", std::move(context)); - QueryTreeNodes arguments{std::move(from), std::move(enum_literal_node)}; + QueryTreeNodes arguments{ std::move(from), std::move(enum_literal_node) }; auto function_node = std::make_shared("_CAST"); - function_node->resolveAsFunction(std::move(cast_function), std::move(result_type)); function_node->getArguments().getNodes() = std::move(arguments); + function_node->resolveAsFunction(cast_function->build(function_node->getArgumentTypes())); + return function_node; } /// if(arg1, arg2, arg3) will be transformed to if(arg1, _CAST(arg2, Enum...), _CAST(arg3, Enum...)) /// where Enum is generated based on the possible values stored in string_values void changeIfArguments( - QueryTreeNodePtr & first, QueryTreeNodePtr & second, const std::set & string_values, const ContextPtr & context) + FunctionNode & if_node, const std::set & string_values, const ContextPtr & context) { auto result_type = getEnumType(string_values); - first = createCastFunction(first, result_type, context); - second = createCastFunction(second, result_type, context); + auto & argument_nodes = if_node.getArguments().getNodes(); + + argument_nodes[1] = createCastFunction(argument_nodes[1], result_type, context); + argument_nodes[2] = createCastFunction(argument_nodes[2], result_type, context); + + auto if_resolver = FunctionFactory::instance().get("if", context); + + if_node.resolveAsFunction(if_resolver->build(if_node.getArgumentTypes())); } /// transform(value, array_from, array_to, default_value) will be transformed to transform(value, array_from, _CAST(array_to, Array(Enum...)), _CAST(default_value, Enum...)) /// where Enum is generated based on the possible values stored in string_values void changeTransformArguments( - QueryTreeNodePtr & array_to, - QueryTreeNodePtr & default_value, + FunctionNode & transform_node, const std::set & string_values, const ContextPtr & context) { auto result_type = getEnumType(string_values); + auto & arguments = transform_node.getArguments().getNodes(); + + auto & array_to = arguments[2]; + auto & default_value = arguments[3]; + array_to = createCastFunction(array_to, std::make_shared(result_type), context); default_value = createCastFunction(default_value, std::move(result_type), context); + + auto transform_resolver = FunctionFactory::instance().get("transform", context); + + transform_node.resolveAsFunction(transform_resolver->build(transform_node.getArgumentTypes())); } void wrapIntoToString(FunctionNode & function_node, QueryTreeNodePtr arg, ContextPtr context) { - assert(isString(function_node.getResultType())); - auto to_string_function = FunctionFactory::instance().get("toString", std::move(context)); - QueryTreeNodes arguments{std::move(arg)}; - - function_node.resolveAsFunction(std::move(to_string_function), std::make_shared()); + QueryTreeNodes arguments{ std::move(arg) }; function_node.getArguments().getNodes() = std::move(arguments); + + function_node.resolveAsFunction(to_string_function->build(function_node.getArgumentTypes())); + + assert(isString(function_node.getResultType())); } class ConvertStringsToEnumVisitor : public InDepthQueryTreeVisitor @@ -117,7 +132,8 @@ public: return; auto modified_if_node = function_node->clone(); - auto & argument_nodes = modified_if_node->as()->getArguments().getNodes(); + auto * function_if_node = modified_if_node->as(); + auto & argument_nodes = function_if_node->getArguments().getNodes(); const auto * first_literal = argument_nodes[1]->as(); const auto * second_literal = argument_nodes[2]->as(); @@ -132,7 +148,7 @@ public: string_values.insert(first_literal->getValue().get()); string_values.insert(second_literal->getValue().get()); - changeIfArguments(argument_nodes[1], argument_nodes[2], string_values, context); + changeIfArguments(*function_if_node, string_values, context); wrapIntoToString(*function_node, std::move(modified_if_node), context); return; } @@ -143,7 +159,8 @@ public: return; auto modified_transform_node = function_node->clone(); - auto & argument_nodes = modified_transform_node->as()->getArguments().getNodes(); + auto * function_modified_transform_node = modified_transform_node->as(); + auto & argument_nodes = function_modified_transform_node->getArguments().getNodes(); if (!isString(function_node->getResultType())) return; @@ -176,7 +193,7 @@ public: string_values.insert(literal_default->getValue().get()); - changeTransformArguments(argument_nodes[2], argument_nodes[3], string_values, context); + changeTransformArguments(*function_modified_transform_node, string_values, context); wrapIntoToString(*function_node, std::move(modified_transform_node), context); return; } diff --git a/src/Analyzer/Passes/MultiIfToIfPass.cpp b/src/Analyzer/Passes/MultiIfToIfPass.cpp index 6d2ebac33e6..7e13675bf98 100644 --- a/src/Analyzer/Passes/MultiIfToIfPass.cpp +++ b/src/Analyzer/Passes/MultiIfToIfPass.cpp @@ -27,7 +27,7 @@ public: return; auto result_type = function_node->getResultType(); - function_node->resolveAsFunction(if_function_ptr, std::move(result_type)); + function_node->resolveAsFunction(if_function_ptr->build(function_node->getArgumentTypes())); } private: diff --git a/src/Analyzer/Passes/NormalizeCountVariantsPass.cpp b/src/Analyzer/Passes/NormalizeCountVariantsPass.cpp index cd6aa4d76f4..3580b64497d 100644 --- a/src/Analyzer/Passes/NormalizeCountVariantsPass.cpp +++ b/src/Analyzer/Passes/NormalizeCountVariantsPass.cpp @@ -53,12 +53,10 @@ private: static inline void resolveAsCountAggregateFunction(FunctionNode & function_node) { - auto function_result_type = function_node.getResultType(); - AggregateFunctionProperties properties; auto aggregate_function = AggregateFunctionFactory::instance().get("count", {}, {}, properties); - function_node.resolveAsAggregateFunction(std::move(aggregate_function), std::move(function_result_type)); + function_node.resolveAsAggregateFunction(std::move(aggregate_function)); } }; diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp index 6f56d6fca8e..8d923d2a69d 100644 --- a/src/Analyzer/Passes/QueryAnalysisPass.cpp +++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp @@ -4302,11 +4302,13 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi bool force_grouping_standard_compatibility = scope.context->getSettingsRef().force_grouping_standard_compatibility; auto grouping_function = std::make_shared(force_grouping_standard_compatibility); auto grouping_function_adaptor = std::make_shared(std::move(grouping_function)); - function_node.resolveAsFunction(std::move(grouping_function_adaptor), std::make_shared()); + function_node.resolveAsFunction(grouping_function_adaptor->build({})); return result_projection_names; } } + const auto & settings = scope.context->getSettingsRef(); + if (function_node.isWindowFunction()) { if (!AggregateFunctionFactory::instance().isAggregateFunctionName(function_name)) @@ -4324,10 +4326,14 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi "Window function '{}' does not support lambda arguments", function_name); - AggregateFunctionProperties properties; - auto aggregate_function = AggregateFunctionFactory::instance().get(function_name, argument_types, parameters, properties); + bool need_add_or_null = settings.aggregate_functions_null_for_empty && !function_name.ends_with("OrNull"); - function_node.resolveAsWindowFunction(aggregate_function, aggregate_function->getReturnType()); + AggregateFunctionProperties properties; + auto aggregate_function = need_add_or_null + ? AggregateFunctionFactory::instance().get(function_name + "OrNull", argument_types, parameters, properties) + : AggregateFunctionFactory::instance().get(function_name, argument_types, parameters, properties); + + function_node.resolveAsWindowFunction(aggregate_function); bool window_node_is_identifier = function_node.getWindowNode()->getNodeType() == QueryTreeNodeType::IDENTIFIER; ProjectionName window_projection_name = resolveWindow(function_node.getWindowNode(), scope); @@ -4384,9 +4390,13 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi "Aggregate function '{}' does not support lambda arguments", function_name); + bool need_add_or_null = settings.aggregate_functions_null_for_empty && !function_name.ends_with("OrNull"); + AggregateFunctionProperties properties; - auto aggregate_function = AggregateFunctionFactory::instance().get(function_name, argument_types, parameters, properties); - function_node.resolveAsAggregateFunction(aggregate_function, aggregate_function->getReturnType()); + auto aggregate_function = need_add_or_null + ? AggregateFunctionFactory::instance().get(function_name + "OrNull", argument_types, parameters, properties) + : AggregateFunctionFactory::instance().get(function_name, argument_types, parameters, properties); + function_node.resolveAsAggregateFunction(aggregate_function); return result_projection_names; } @@ -4563,6 +4573,8 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi constant_value = std::make_shared(std::move(column_constant_value), result_type); } } + + function_node.resolveAsFunction(std::move(function_base)); } catch (Exception & e) { @@ -4570,8 +4582,6 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi throw; } - function_node.resolveAsFunction(std::move(function), std::move(result_type)); - if (constant_value) node = std::make_shared(std::move(constant_value), node); diff --git a/src/Analyzer/Passes/SumIfToCountIfPass.cpp b/src/Analyzer/Passes/SumIfToCountIfPass.cpp index 91c277d35b3..7e120b6828d 100644 --- a/src/Analyzer/Passes/SumIfToCountIfPass.cpp +++ b/src/Analyzer/Passes/SumIfToCountIfPass.cpp @@ -81,6 +81,7 @@ public: if (nested_if_function_arguments_nodes.size() != 3) return; + auto & cond_argument = nested_if_function_arguments_nodes[0]; const auto * if_true_condition_constant_node = nested_if_function_arguments_nodes[1]->as(); const auto * if_false_condition_constant_node = nested_if_function_arguments_nodes[2]->as(); @@ -107,8 +108,8 @@ public: return; } - /// Rewrite `sum(if(cond, 0, 1))` into `countIf(not(cond))`. - if (if_true_condition_value == 0 && if_false_condition_value == 1) + /// Rewrite `sum(if(cond, 0, 1))` into `countIf(not(cond))` if condition is not Nullable (otherwise the result can be different). + if (if_true_condition_value == 0 && if_false_condition_value == 1 && !cond_argument->getResultType()->isNullable()) { DataTypePtr not_function_result_type = std::make_shared(); @@ -117,11 +118,12 @@ public: not_function_result_type = makeNullable(not_function_result_type); auto not_function = std::make_shared("not"); - not_function->resolveAsFunction(FunctionFactory::instance().get("not", context), std::move(not_function_result_type)); auto & not_function_arguments = not_function->getArguments().getNodes(); not_function_arguments.push_back(std::move(nested_if_function_arguments_nodes[0])); + not_function->resolveAsFunction(FunctionFactory::instance().get("not", context)->build(not_function->getArgumentTypes())); + function_node_arguments_nodes[0] = std::move(not_function); function_node_arguments_nodes.resize(1); @@ -139,8 +141,7 @@ private: function_node.getAggregateFunction()->getParameters(), properties); - auto function_result_type = function_node.getResultType(); - function_node.resolveAsAggregateFunction(std::move(aggregate_function), std::move(function_result_type)); + function_node.resolveAsAggregateFunction(std::move(aggregate_function)); } ContextPtr & context; diff --git a/src/Analyzer/Passes/UniqInjectiveFunctionsEliminationPass.cpp b/src/Analyzer/Passes/UniqInjectiveFunctionsEliminationPass.cpp index 1716c37228a..37bad70da57 100644 --- a/src/Analyzer/Passes/UniqInjectiveFunctionsEliminationPass.cpp +++ b/src/Analyzer/Passes/UniqInjectiveFunctionsEliminationPass.cpp @@ -76,7 +76,7 @@ public: properties); auto function_result_type = function_node->getResultType(); - function_node->resolveAsAggregateFunction(std::move(aggregate_function), std::move(function_result_type)); + function_node->resolveAsAggregateFunction(std::move(aggregate_function)); } }; diff --git a/src/Analyzer/QueryTreePassManager.cpp b/src/Analyzer/QueryTreePassManager.cpp index ca9d4e3d1e3..06a1fec4698 100644 --- a/src/Analyzer/QueryTreePassManager.cpp +++ b/src/Analyzer/QueryTreePassManager.cpp @@ -21,6 +21,7 @@ #include #include +#include #include #include @@ -44,6 +45,23 @@ namespace class ValidationChecker : public InDepthQueryTreeVisitor { String pass_name; + + void visitColumn(ColumnNode * column) const + { + if (column->getColumnSourceOrNull() == nullptr) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Column {} {} query tree node does not have valid source node after running {} pass", + column->getColumnName(), column->getColumnType(), pass_name); + } + + void visitFunction(FunctionNode * function) const + { + if (!function->isResolved()) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Function {} is not resolved after running {} pass", + function->dumpTree(), pass_name); + } + public: explicit ValidationChecker(String pass_name_) : pass_name(std::move(pass_name_)) @@ -51,13 +69,10 @@ public: void visitImpl(QueryTreeNodePtr & node) const { - auto * column = node->as(); - if (!column) - return; - if (column->getColumnSourceOrNull() == nullptr) - throw Exception(ErrorCodes::LOGICAL_ERROR, - "Column {} {} query tree node does not have valid source node after running {} pass", - column->getColumnName(), column->getColumnType(), pass_name); + if (auto * column = node->as()) + return visitColumn(column); + else if (auto * function = node->as()) + return visitFunction(function); } }; #endif diff --git a/src/Analyzer/SortNode.cpp b/src/Analyzer/SortNode.cpp index 3f91724e9b7..da1c52ff0ef 100644 --- a/src/Analyzer/SortNode.cpp +++ b/src/Analyzer/SortNode.cpp @@ -91,7 +91,8 @@ bool SortNode::isEqualImpl(const IQueryTreeNode & rhs) const void SortNode::updateTreeHashImpl(HashState & hash_state) const { hash_state.update(sort_direction); - hash_state.update(nulls_sort_direction); + /// use some determined value if `nulls_sort_direction` is `nullopt` + hash_state.update(nulls_sort_direction.value_or(sort_direction)); hash_state.update(with_fill); if (collator) diff --git a/src/Backups/BackupCoordinationLocal.cpp b/src/Backups/BackupCoordinationLocal.cpp index d4064902a40..e199e43fe01 100644 --- a/src/Backups/BackupCoordinationLocal.cpp +++ b/src/Backups/BackupCoordinationLocal.cpp @@ -188,15 +188,6 @@ std::optional BackupCoordinationLocal::getFileInfo(const SizeAndChecks return it->second; } -std::optional BackupCoordinationLocal::getFileSizeAndChecksum(const String & file_name) const -{ - std::lock_guard lock{mutex}; - auto it = file_names.find(file_name); - if (it == file_names.end()) - return std::nullopt; - return it->second; -} - String BackupCoordinationLocal::getNextArchiveSuffix() { std::lock_guard lock{mutex}; diff --git a/src/Backups/BackupCoordinationLocal.h b/src/Backups/BackupCoordinationLocal.h index aca7f71545b..43145a42bf6 100644 --- a/src/Backups/BackupCoordinationLocal.h +++ b/src/Backups/BackupCoordinationLocal.h @@ -48,7 +48,6 @@ public: std::optional getFileInfo(const String & file_name) const override; std::optional getFileInfo(const SizeAndChecksum & size_and_checksum) const override; - std::optional getFileSizeAndChecksum(const String & file_name) const override; String getNextArchiveSuffix() override; Strings getAllArchiveSuffixes() const override; diff --git a/src/Backups/BackupCoordinationRemote.cpp b/src/Backups/BackupCoordinationRemote.cpp index 8d8cfc4225e..18789802769 100644 --- a/src/Backups/BackupCoordinationRemote.cpp +++ b/src/Backups/BackupCoordinationRemote.cpp @@ -575,15 +575,6 @@ std::optional BackupCoordinationRemote::getFileInfo(const SizeAndCheck return deserializeFileInfo(file_info_str); } -std::optional BackupCoordinationRemote::getFileSizeAndChecksum(const String & file_name) const -{ - auto zk = getZooKeeper(); - String size_and_checksum; - if (!zk->tryGet(zookeeper_path + "/file_names/" + escapeForFileName(file_name), size_and_checksum)) - return std::nullopt; - return deserializeSizeAndChecksum(size_and_checksum); -} - String BackupCoordinationRemote::getNextArchiveSuffix() { auto zk = getZooKeeper(); diff --git a/src/Backups/BackupCoordinationRemote.h b/src/Backups/BackupCoordinationRemote.h index 83ddd7b16dc..711fadb539e 100644 --- a/src/Backups/BackupCoordinationRemote.h +++ b/src/Backups/BackupCoordinationRemote.h @@ -51,7 +51,6 @@ public: bool hasFiles(const String & directory) const override; std::optional getFileInfo(const String & file_name) const override; std::optional getFileInfo(const SizeAndChecksum & size_and_checksum) const override; - std::optional getFileSizeAndChecksum(const String & file_name) const override; String getNextArchiveSuffix() override; Strings getAllArchiveSuffixes() const override; diff --git a/src/Backups/BackupFactory.h b/src/Backups/BackupFactory.h index 9057d2cbfae..92a5e16533c 100644 --- a/src/Backups/BackupFactory.h +++ b/src/Backups/BackupFactory.h @@ -34,6 +34,7 @@ public: bool is_internal_backup = false; std::shared_ptr backup_coordination; std::optional backup_uuid; + bool deduplicate_files = true; }; static BackupFactory & instance(); diff --git a/src/Backups/BackupIO_S3.cpp b/src/Backups/BackupIO_S3.cpp index 6bc3c86edf0..f2f0a2ef5e3 100644 --- a/src/Backups/BackupIO_S3.cpp +++ b/src/Backups/BackupIO_S3.cpp @@ -80,6 +80,12 @@ namespace throw Exception(outcome.GetError().GetMessage(), ErrorCodes::S3_ERROR); return outcome.GetResult().GetContents(); } + + bool isNotFoundError(Aws::S3::S3Errors error) + { + return error == Aws::S3::S3Errors::RESOURCE_NOT_FOUND + || error == Aws::S3::S3Errors::NO_SUCH_KEY; + } } @@ -129,7 +135,7 @@ BackupWriterS3::BackupWriterS3( , request_settings(context_->getStorageS3Settings().getSettings(s3_uri.uri.toString()).request_settings) , log(&Poco::Logger::get("BackupWriterS3")) { - request_settings.updateFromSettingsIfEmpty(context_->getSettingsRef()); + request_settings.updateFromSettings(context_->getSettingsRef()); request_settings.max_single_read_retries = context_->getSettingsRef().s3_max_single_read_retries; // FIXME: Avoid taking value for endpoint } @@ -210,20 +216,21 @@ void BackupWriterS3::copyObjectMultipartImpl( std::vector part_tags; size_t position = 0; - size_t upload_part_size = request_settings.min_upload_part_size; + const auto & settings = request_settings.getUploadSettings(); + size_t upload_part_size = settings.min_upload_part_size; for (size_t part_number = 1; position < size; ++part_number) { /// Check that part number is not too big. - if (part_number > request_settings.max_part_number) + if (part_number > settings.max_part_number) { throw Exception( ErrorCodes::INVALID_CONFIG_PARAMETER, "Part number exceeded {} while writing {} bytes to S3. Check min_upload_part_size = {}, max_upload_part_size = {}, " "upload_part_size_multiply_factor = {}, upload_part_size_multiply_parts_count_threshold = {}, max_single_operation_copy_size = {}", - request_settings.max_part_number, size, request_settings.min_upload_part_size, request_settings.max_upload_part_size, - request_settings.upload_part_size_multiply_factor, request_settings.upload_part_size_multiply_parts_count_threshold, - request_settings.max_single_operation_copy_size); + settings.max_part_number, size, settings.min_upload_part_size, settings.max_upload_part_size, + settings.upload_part_size_multiply_factor, settings.upload_part_size_multiply_parts_count_threshold, + settings.max_single_operation_copy_size); } size_t next_position = std::min(position + upload_part_size, size); @@ -256,10 +263,10 @@ void BackupWriterS3::copyObjectMultipartImpl( position = next_position; /// Maybe increase `upload_part_size` (we need to increase it sometimes to keep `part_number` less or equal than `max_part_number`). - if (part_number % request_settings.upload_part_size_multiply_parts_count_threshold == 0) + if (part_number % settings.upload_part_size_multiply_parts_count_threshold == 0) { - upload_part_size *= request_settings.upload_part_size_multiply_factor; - upload_part_size = std::min(upload_part_size, request_settings.max_upload_part_size); + upload_part_size *= settings.upload_part_size_multiply_factor; + upload_part_size = std::min(upload_part_size, settings.max_upload_part_size); } } @@ -302,7 +309,7 @@ void BackupWriterS3::copyFileNative(DiskPtr from_disk, const String & file_name_ auto file_path = fs::path(s3_uri.key) / file_name_to; auto head = S3::headObject(*client, source_bucket, objects[0].absolute_path).GetResult(); - if (static_cast(head.GetContentLength()) < request_settings.max_single_operation_copy_size) + if (static_cast(head.GetContentLength()) < request_settings.getUploadSettings().max_single_operation_copy_size) { copyObjectImpl( source_bucket, objects[0].absolute_path, s3_uri.bucket, file_path, head); @@ -369,7 +376,7 @@ void BackupWriterS3::removeFile(const String & file_name) request.SetBucket(s3_uri.bucket); request.SetKey(fs::path(s3_uri.key) / file_name); auto outcome = client->DeleteObject(request); - if (!outcome.IsSuccess()) + if (!outcome.IsSuccess() && !isNotFoundError(outcome.GetError().GetErrorType())) throw Exception(outcome.GetError().GetMessage(), ErrorCodes::S3_ERROR); } @@ -427,7 +434,7 @@ void BackupWriterS3::removeFilesBatch(const Strings & file_names) request.SetDelete(delkeys); auto outcome = client->DeleteObjects(request); - if (!outcome.IsSuccess()) + if (!outcome.IsSuccess() && !isNotFoundError(outcome.GetError().GetErrorType())) throw Exception(outcome.GetError().GetMessage(), ErrorCodes::S3_ERROR); } } diff --git a/src/Backups/BackupImpl.cpp b/src/Backups/BackupImpl.cpp index 6d9ac0bbdad..ec35b8ed07a 100644 --- a/src/Backups/BackupImpl.cpp +++ b/src/Backups/BackupImpl.cpp @@ -167,17 +167,19 @@ BackupImpl::BackupImpl( const ContextPtr & context_, bool is_internal_backup_, const std::shared_ptr & coordination_, - const std::optional & backup_uuid_) + const std::optional & backup_uuid_, + bool deduplicate_files_) : backup_name_for_logging(backup_name_for_logging_) , archive_params(archive_params_) , use_archives(!archive_params.archive_name.empty()) , open_mode(OpenMode::WRITE) , writer(std::move(writer_)) , is_internal_backup(is_internal_backup_) - , coordination(coordination_ ? coordination_ : std::make_shared()) + , coordination(coordination_) , uuid(backup_uuid_) , version(CURRENT_BACKUP_VERSION) , base_backup_info(base_backup_info_) + , deduplicate_files(deduplicate_files_) , log(&Poco::Logger::get("BackupImpl")) { open(context_); @@ -287,6 +289,7 @@ void BackupImpl::writeBackupMetadata() Poco::AutoPtr config{new Poco::Util::XMLConfiguration()}; config->setInt("version", CURRENT_BACKUP_VERSION); + config->setBool("deduplicate_files", deduplicate_files); config->setString("timestamp", toString(LocalDateTime{timestamp})); config->setString("uuid", toString(*uuid)); @@ -759,7 +762,7 @@ void BackupImpl::writeFile(const String & file_name, BackupEntryPtr entry) }; /// Empty file, nothing to backup - if (info.size == 0) + if (info.size == 0 && deduplicate_files) { coordination->addFileInfo(info); return; @@ -828,7 +831,7 @@ void BackupImpl::writeFile(const String & file_name, BackupEntryPtr entry) } /// Maybe we have a copy of this file in the backup already. - if (coordination->getFileInfo(std::pair{info.size, info.checksum})) + if (coordination->getFileInfo(std::pair{info.size, info.checksum}) && deduplicate_files) { LOG_TRACE(log, "File {} already exist in current backup, adding reference", adjusted_path); coordination->addFileInfo(info); @@ -861,7 +864,7 @@ void BackupImpl::writeFile(const String & file_name, BackupEntryPtr entry) bool is_data_file_required; coordination->addFileInfo(info, is_data_file_required); - if (!is_data_file_required) + if (!is_data_file_required && deduplicate_files) { LOG_TRACE(log, "File {} doesn't exist in current backup, but we have file with same size and checksum", adjusted_path); return; /// We copy data only if it's a new combination of size & checksum. diff --git a/src/Backups/BackupImpl.h b/src/Backups/BackupImpl.h index 7df4638affa..9fc881bf680 100644 --- a/src/Backups/BackupImpl.h +++ b/src/Backups/BackupImpl.h @@ -47,9 +47,10 @@ public: const std::optional & base_backup_info_, std::shared_ptr writer_, const ContextPtr & context_, - bool is_internal_backup_ = false, - const std::shared_ptr & coordination_ = {}, - const std::optional & backup_uuid_ = {}); + bool is_internal_backup_, + const std::shared_ptr & coordination_, + const std::optional & backup_uuid_, + bool deduplicate_files_); ~BackupImpl() override; @@ -132,6 +133,7 @@ private: String lock_file_name; std::atomic num_files_written = 0; bool writing_finalized = false; + bool deduplicate_files = true; const Poco::Logger * log; }; diff --git a/src/Backups/BackupSettings.cpp b/src/Backups/BackupSettings.cpp index 5266296c248..8c54b29141a 100644 --- a/src/Backups/BackupSettings.cpp +++ b/src/Backups/BackupSettings.cpp @@ -65,6 +65,7 @@ namespace M(String, password) \ M(Bool, structure_only) \ M(Bool, async) \ + M(Bool, deduplicate_files) \ M(UInt64, shard_num) \ M(UInt64, replica_num) \ M(Bool, internal) \ diff --git a/src/Backups/BackupSettings.h b/src/Backups/BackupSettings.h index 5c5f336aa45..2e7717c3afe 100644 --- a/src/Backups/BackupSettings.h +++ b/src/Backups/BackupSettings.h @@ -32,6 +32,9 @@ struct BackupSettings /// Whether the BACKUP command must return immediately without waiting until the backup has completed. bool async = false; + /// Whether the BACKUP will omit similar files (within one backup only). + bool deduplicate_files = true; + /// 1-based shard index to store in the backup. 0 means all shards. /// Can only be used with BACKUP ON CLUSTER. size_t shard_num = 0; diff --git a/src/Backups/BackupsWorker.cpp b/src/Backups/BackupsWorker.cpp index affcea94c57..267400ce66d 100644 --- a/src/Backups/BackupsWorker.cpp +++ b/src/Backups/BackupsWorker.cpp @@ -286,6 +286,7 @@ void BackupsWorker::doBackup( backup_create_params.is_internal_backup = backup_settings.internal; backup_create_params.backup_coordination = backup_coordination; backup_create_params.backup_uuid = backup_settings.backup_uuid; + backup_create_params.deduplicate_files = backup_settings.deduplicate_files; BackupMutablePtr backup = BackupFactory::instance().createBackup(backup_create_params); /// Write the backup. diff --git a/src/Backups/IBackupCoordination.h b/src/Backups/IBackupCoordination.h index 5e120218544..7ff911488aa 100644 --- a/src/Backups/IBackupCoordination.h +++ b/src/Backups/IBackupCoordination.h @@ -108,7 +108,6 @@ public: virtual std::optional getFileInfo(const String & file_name) const = 0; virtual std::optional getFileInfo(const SizeAndChecksum & size_and_checksum) const = 0; - virtual std::optional getFileSizeAndChecksum(const String & file_name) const = 0; /// Generates a new archive suffix, e.g. "001", "002", "003", ... virtual String getNextArchiveSuffix() = 0; diff --git a/src/Backups/registerBackupEngineS3.cpp b/src/Backups/registerBackupEngineS3.cpp index 33b0049dc4d..ad190b4f4a0 100644 --- a/src/Backups/registerBackupEngineS3.cpp +++ b/src/Backups/registerBackupEngineS3.cpp @@ -116,7 +116,16 @@ void registerBackupEngineS3(BackupFactory & factory) else { auto writer = std::make_shared(S3::URI{s3_uri}, access_key_id, secret_access_key, params.context); - return std::make_unique(backup_name_for_logging, archive_params, params.base_backup_info, writer, params.context, params.is_internal_backup, params.backup_coordination, params.backup_uuid); + return std::make_unique( + backup_name_for_logging, + archive_params, + params.base_backup_info, + writer, + params.context, + params.is_internal_backup, + params.backup_coordination, + params.backup_uuid, + params.deduplicate_files); } #else throw Exception("S3 support is disabled", ErrorCodes::SUPPORT_IS_DISABLED); diff --git a/src/Backups/registerBackupEnginesFileAndDisk.cpp b/src/Backups/registerBackupEnginesFileAndDisk.cpp index def9c5ba188..49ad51534eb 100644 --- a/src/Backups/registerBackupEnginesFileAndDisk.cpp +++ b/src/Backups/registerBackupEnginesFileAndDisk.cpp @@ -181,7 +181,16 @@ void registerBackupEnginesFileAndDisk(BackupFactory & factory) writer = std::make_shared(path); else writer = std::make_shared(disk, path); - return std::make_unique(backup_name_for_logging, archive_params, params.base_backup_info, writer, params.context, params.is_internal_backup, params.backup_coordination, params.backup_uuid); + return std::make_unique( + backup_name_for_logging, + archive_params, + params.base_backup_info, + writer, + params.context, + params.is_internal_backup, + params.backup_coordination, + params.backup_uuid, + params.deduplicate_files); } }; diff --git a/src/Bridge/IBridge.cpp b/src/Bridge/IBridge.cpp index afaaf11b26a..2d97bba6287 100644 --- a/src/Bridge/IBridge.cpp +++ b/src/Bridge/IBridge.cpp @@ -61,14 +61,8 @@ namespace Poco::Net::SocketAddress socketBindListen(Poco::Net::ServerSocket & socket, const std::string & host, UInt16 port, Poco::Logger * log) { auto address = makeSocketAddress(host, port, log); -#if POCO_VERSION < 0x01080000 - socket.bind(address, /* reuseAddress = */ true); -#else socket.bind(address, /* reuseAddress = */ true, /* reusePort = */ false); -#endif - socket.listen(/* backlog = */ 64); - return address; } } diff --git a/src/BridgeHelper/IBridgeHelper.cpp b/src/BridgeHelper/IBridgeHelper.cpp index 7d6ce74c698..3445b655784 100644 --- a/src/BridgeHelper/IBridgeHelper.cpp +++ b/src/BridgeHelper/IBridgeHelper.cpp @@ -2,11 +2,10 @@ #include #include -#include -#include #include #include + namespace fs = std::filesystem; namespace DB @@ -97,9 +96,13 @@ std::unique_ptr IBridgeHelper::startBridgeCommand() LOG_TRACE(getLog(), "Starting {}", serviceAlias()); + /// We will terminate it with the KILL signal instead of the TERM signal, + /// because it's more reliable for arbitrary third-party ODBC drivers. + /// The drivers can spawn threads, install their own signal handlers... we don't care. + ShellCommand::Config command_config(path.string()); command_config.arguments = cmd_args; - command_config.terminate_in_destructor_strategy = ShellCommand::DestructorStrategy(true); + command_config.terminate_in_destructor_strategy = ShellCommand::DestructorStrategy(true, SIGKILL); return ShellCommand::executeDirect(command_config); } diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 99c3c0c3fa2..5e8fe368dfa 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -88,6 +88,7 @@ add_headers_and_sources(clickhouse_common_io Common) add_headers_and_sources(clickhouse_common_io Common/HashTable) add_headers_and_sources(clickhouse_common_io IO) add_headers_and_sources(clickhouse_common_io IO/Archives) +add_headers_and_sources(clickhouse_common_io IO/Resource) add_headers_and_sources(clickhouse_common_io IO/S3) list (REMOVE_ITEM clickhouse_common_io_sources Common/malloc.cpp Common/new_delete.cpp) @@ -331,6 +332,12 @@ macro (dbms_target_link_libraries) endforeach () endmacro () +macro (dbms_target_include_directories) + foreach (module ${all_modules}) + target_include_directories (${module} ${ARGN}) + endforeach () +endmacro () + dbms_target_include_directories (PUBLIC "${ClickHouse_SOURCE_DIR}/src" "${ClickHouse_BINARY_DIR}/src") target_include_directories (clickhouse_common_io PUBLIC "${ClickHouse_SOURCE_DIR}/src" "${ClickHouse_BINARY_DIR}/src") @@ -343,7 +350,7 @@ set_source_files_properties( Common/Elf.cpp Common/Dwarf.cpp Common/SymbolIndex.cpp - PROPERTIES COMPILE_FLAGS "-O3 ${WITHOUT_COVERAGE}") + PROPERTIES COMPILE_FLAGS "-O2 ${WITHOUT_COVERAGE}") target_link_libraries (clickhouse_common_io PRIVATE @@ -389,6 +396,7 @@ if (TARGET ch_contrib::cpuid) endif() dbms_target_link_libraries(PUBLIC ch_contrib::abseil_swiss_tables) +target_link_libraries (clickhouse_common_io PUBLIC ch_contrib::abseil_swiss_tables) # Make dbms depend on roaring instead of clickhouse_common_io so that roaring itself can depend on clickhouse_common_io # That way we we can redirect malloc/free functions avoiding circular dependencies @@ -588,6 +596,11 @@ if (TARGET ch_contrib::annoy) dbms_target_link_libraries(PUBLIC ch_contrib::annoy) endif() +if (TARGET ch_rust::skim) + # Add only -I, library is needed only for clickhouse-client/clickhouse-local + dbms_target_include_directories(PRIVATE $) +endif() + include ("${ClickHouse_SOURCE_DIR}/cmake/add_check.cmake") if (ENABLE_TESTS) diff --git a/src/Client/CMakeLists.txt b/src/Client/CMakeLists.txt index 119414a8a70..83bbe418246 100644 --- a/src/Client/CMakeLists.txt +++ b/src/Client/CMakeLists.txt @@ -1,3 +1,3 @@ if (ENABLE_EXAMPLES) add_subdirectory(examples) -endif() \ No newline at end of file +endif() diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index 09e44a3ac09..9c8700d4142 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -1,42 +1,30 @@ #include +#include +#include +#include +#include +#include -#include -#include -#include -#include - -#include "config.h" - +#include +#include +#include +#include #include #include -#include -#include #include -#include #include #include #include #include -#include -#include -#include -#include -#include -#include - -#include "config_version.h" - #include #include #include #include #include #include -#include - -#include -#include -#include "TestTags.h" +#include +#include +#include #include #include @@ -53,26 +41,36 @@ #include #include #include +#include #include #include #include -#include -#include #include #include #include #include #include +#include +#include #include #include #include #include #include -#include #include -#include + +#include +#include + #include +#include +#include +#include +#include + +#include "config_version.h" +#include "config.h" namespace fs = std::filesystem; @@ -1036,7 +1034,13 @@ void ClientBase::onEndOfStream() progress_indication.clearProgressOutput(*tty_buf); if (output_format) + { + /// Do our best to estimate the start of the query so the output format matches the one reported by the server + bool is_running = false; + output_format->setStartTime( + clock_gettime_ns(CLOCK_MONOTONIC) - static_cast(progress_indication.elapsedSeconds() * 1000000000), is_running); output_format->finalize(); + } resetOutput(); diff --git a/src/Client/ClientBaseHelpers.h b/src/Client/ClientBaseHelpers.h index 2a79332eb98..adc1c81b3c5 100644 --- a/src/Client/ClientBaseHelpers.h +++ b/src/Client/ClientBaseHelpers.h @@ -4,7 +4,7 @@ #include "config.h" #if USE_REPLXX -# include +# include #endif diff --git a/src/Client/ConnectionParameters.cpp b/src/Client/ConnectionParameters.cpp index 6d538fee307..1a0922b2133 100644 --- a/src/Client/ConnectionParameters.cpp +++ b/src/Client/ConnectionParameters.cpp @@ -9,7 +9,6 @@ #include #include #include -#include #include #include diff --git a/base/base/LineReader.cpp b/src/Client/LineReader.cpp similarity index 99% rename from base/base/LineReader.cpp rename to src/Client/LineReader.cpp index cc632f79638..f49e48be617 100644 --- a/base/base/LineReader.cpp +++ b/src/Client/LineReader.cpp @@ -1,4 +1,4 @@ -#include +#include #include #include @@ -65,6 +65,9 @@ void addNewWords(Words & to, const Words & from, Compare comp) } +namespace DB +{ + replxx::Replxx::completions_t LineReader::Suggest::getCompletions(const String & prefix, size_t prefix_length) { std::string_view last_word; @@ -202,3 +205,5 @@ LineReader::InputStatus LineReader::readOneLine(const String & prompt) trim(input); return INPUT_LINE; } + +} diff --git a/base/base/LineReader.h b/src/Client/LineReader.h similarity index 99% rename from base/base/LineReader.h rename to src/Client/LineReader.h index d4ab327fe00..321cf41b77e 100644 --- a/base/base/LineReader.h +++ b/src/Client/LineReader.h @@ -9,6 +9,9 @@ #include #include +namespace DB +{ + class LineReader { public: @@ -68,3 +71,5 @@ protected: virtual InputStatus readOneLine(const String & prompt); virtual void addToHistory(const String &) {} }; + +} diff --git a/src/Client/QueryFuzzer.cpp b/src/Client/QueryFuzzer.cpp index d5cd4ef1548..018e0c6f130 100644 --- a/src/Client/QueryFuzzer.cpp +++ b/src/Client/QueryFuzzer.cpp @@ -327,9 +327,7 @@ void QueryFuzzer::fuzzOrderByList(IAST * ast) // Add element if (fuzz_rand() % 50 == 0) { - auto pos = list->children.empty() - ? list->children.begin() - : list->children.begin() + fuzz_rand() % list->children.size(); + auto * pos = list->children.empty() ? list->children.begin() : list->children.begin() + fuzz_rand() % list->children.size(); auto col = getRandomColumnLike(); if (col) { @@ -373,9 +371,7 @@ void QueryFuzzer::fuzzColumnLikeExpressionList(IAST * ast) // Add element if (fuzz_rand() % 50 == 0) { - auto pos = impl->children.empty() - ? impl->children.begin() - : impl->children.begin() + fuzz_rand() % impl->children.size(); + auto * pos = impl->children.empty() ? impl->children.begin() : impl->children.begin() + fuzz_rand() % impl->children.size(); auto col = getRandomColumnLike(); if (col) impl->children.insert(pos, col); diff --git a/base/base/ReplxxLineReader.cpp b/src/Client/ReplxxLineReader.cpp similarity index 97% rename from base/base/ReplxxLineReader.cpp rename to src/Client/ReplxxLineReader.cpp index 9e29f7744fa..9fc0fec761d 100644 --- a/base/base/ReplxxLineReader.cpp +++ b/src/Client/ReplxxLineReader.cpp @@ -1,6 +1,10 @@ -#include +#include #include +#include +#include +#include + #include #include #include @@ -108,13 +112,11 @@ void writeRetry(int fd, const std::string & data) } std::string readFile(const std::string & path) { - std::ifstream t(path); - std::string str; - t.seekg(0, std::ios::end); - str.reserve(t.tellg()); - t.seekg(0, std::ios::beg); - str.assign((std::istreambuf_iterator(t)), std::istreambuf_iterator()); - return str; + std::string out; + DB::WriteBufferFromString out_buffer(out); + DB::ReadBufferFromFile in_buffer(path); + DB::copyData(in_buffer, out_buffer); + return out; } /// Simple wrapper for temporary files. @@ -269,6 +271,9 @@ void convertHistoryFile(const std::string & path, replxx::Replxx & rx) } +namespace DB +{ + static bool replxx_last_is_delimiter = false; void ReplxxLineReader::setLastIsDelimiter(bool flag) { @@ -402,10 +407,11 @@ ReplxxLineReader::ReplxxLineReader( words.push_back(hs.get().text()); } + std::string current_query(rx.get_state().text()); std::string new_query; try { - new_query = std::string(skim(words)); + new_query = std::string(skim(current_query, words)); } catch (const std::exception & e) { @@ -507,3 +513,5 @@ void ReplxxLineReader::enableBracketedPaste() bracketed_paste_enabled = true; rx.enable_bracketed_paste(); } + +} diff --git a/base/base/ReplxxLineReader.h b/src/Client/ReplxxLineReader.h similarity index 98% rename from base/base/ReplxxLineReader.h rename to src/Client/ReplxxLineReader.h index 428fbf144c3..d36a1d0f42c 100644 --- a/base/base/ReplxxLineReader.h +++ b/src/Client/ReplxxLineReader.h @@ -1,9 +1,11 @@ #pragma once #include "LineReader.h" - #include +namespace DB +{ + class ReplxxLineReader : public LineReader { public: @@ -36,3 +38,5 @@ private: std::string editor; }; + +} diff --git a/src/Client/Suggest.h b/src/Client/Suggest.h index 25d45f7ffaf..cfe9315879c 100644 --- a/src/Client/Suggest.h +++ b/src/Client/Suggest.h @@ -5,8 +5,8 @@ #include #include #include +#include #include -#include #include diff --git a/src/Columns/ColumnAggregateFunction.cpp b/src/Columns/ColumnAggregateFunction.cpp index f51a0426199..58643f7a9b7 100644 --- a/src/Columns/ColumnAggregateFunction.cpp +++ b/src/Columns/ColumnAggregateFunction.cpp @@ -146,7 +146,7 @@ MutableColumnPtr ColumnAggregateFunction::convertToValues(MutableColumnPtr colum /// insertResultInto may invalidate states, so we must unshare ownership of them column_aggregate_func.ensureOwnership(); - MutableColumnPtr res = func->getReturnType()->createColumn(); + MutableColumnPtr res = func->getResultType()->createColumn(); res->reserve(data.size()); /// If there are references to states in final column, we must hold their ownership diff --git a/src/Columns/ColumnFunction.h b/src/Columns/ColumnFunction.h index 4781406c3b9..257bd1146fd 100644 --- a/src/Columns/ColumnFunction.h +++ b/src/Columns/ColumnFunction.h @@ -13,7 +13,7 @@ namespace ErrorCodes } class IFunctionBase; -using FunctionBasePtr = std::shared_ptr; +using FunctionBasePtr = std::shared_ptr; /** A column containing a lambda expression. * Behaves like a constant-column. Contains an expression, but not input or output data. diff --git a/src/Common/AllocationTrace.h b/src/Common/AllocationTrace.h deleted file mode 100644 index 332808c8015..00000000000 --- a/src/Common/AllocationTrace.h +++ /dev/null @@ -1,16 +0,0 @@ -#pragma once -#include - -/// This is a structure which is returned by MemoryTracker. -/// Methods onAlloc/onFree should be called after actual memory allocation if it succeed. -/// For now, it will only collect allocation trace with sample_probability. -struct AllocationTrace -{ - AllocationTrace() = default; - explicit AllocationTrace(double sample_probability_); - - void onAlloc(void * ptr, size_t size) const; - void onFree(void * ptr, size_t size) const; - - double sample_probability = 0; -}; diff --git a/src/Common/Allocator.h b/src/Common/Allocator.h index 8c4f2ef1690..c348eaea006 100644 --- a/src/Common/Allocator.h +++ b/src/Common/Allocator.h @@ -92,10 +92,8 @@ public: void * alloc(size_t size, size_t alignment = 0) { checkSize(size); - auto trace = CurrentMemoryTracker::alloc(size); - void * ptr = allocNoTrack(size, alignment); - trace.onAlloc(ptr, size); - return ptr; + CurrentMemoryTracker::alloc(size); + return allocNoTrack(size, alignment); } /// Free memory range. @@ -105,8 +103,7 @@ public: { checkSize(size); freeNoTrack(buf, size); - auto trace = CurrentMemoryTracker::free(size); - trace.onFree(buf, size); + CurrentMemoryTracker::free(size); } catch (...) { @@ -132,16 +129,13 @@ public: && alignment <= MALLOC_MIN_ALIGNMENT) { /// Resize malloc'd memory region with no special alignment requirement. - auto trace = CurrentMemoryTracker::realloc(old_size, new_size); - trace.onFree(buf, old_size); + CurrentMemoryTracker::realloc(old_size, new_size); void * new_buf = ::realloc(buf, new_size); if (nullptr == new_buf) DB::throwFromErrno(fmt::format("Allocator: Cannot realloc from {} to {}.", ReadableSize(old_size), ReadableSize(new_size)), DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY); buf = new_buf; - trace.onAlloc(buf, new_size); - if constexpr (clear_memory) if (new_size > old_size) memset(reinterpret_cast(buf) + old_size, 0, new_size - old_size); @@ -149,8 +143,7 @@ public: else if (old_size >= MMAP_THRESHOLD && new_size >= MMAP_THRESHOLD) { /// Resize mmap'd memory region. - auto trace = CurrentMemoryTracker::realloc(old_size, new_size); - trace.onFree(buf, old_size); + CurrentMemoryTracker::realloc(old_size, new_size); // On apple and freebsd self-implemented mremap used (common/mremap.h) buf = clickhouse_mremap(buf, old_size, new_size, MREMAP_MAYMOVE, @@ -159,17 +152,14 @@ public: DB::throwFromErrno(fmt::format("Allocator: Cannot mremap memory chunk from {} to {}.", ReadableSize(old_size), ReadableSize(new_size)), DB::ErrorCodes::CANNOT_MREMAP); - trace.onAlloc(buf, new_size); /// No need for zero-fill, because mmap guarantees it. } else if (new_size < MMAP_THRESHOLD) { /// Small allocs that requires a copy. Assume there's enough memory in system. Call CurrentMemoryTracker once. - auto trace = CurrentMemoryTracker::realloc(old_size, new_size); - trace.onFree(buf, old_size); + CurrentMemoryTracker::realloc(old_size, new_size); void * new_buf = allocNoTrack(new_size, alignment); - trace.onAlloc(new_buf, new_size); memcpy(new_buf, buf, std::min(old_size, new_size)); freeNoTrack(buf, old_size); buf = new_buf; diff --git a/src/Common/AllocatorWithMemoryTracking.h b/src/Common/AllocatorWithMemoryTracking.h index b43870e05b2..815c326ed62 100644 --- a/src/Common/AllocatorWithMemoryTracking.h +++ b/src/Common/AllocatorWithMemoryTracking.h @@ -30,24 +30,21 @@ struct AllocatorWithMemoryTracking throw std::bad_alloc(); size_t bytes = n * sizeof(T); - auto trace = CurrentMemoryTracker::alloc(bytes); + CurrentMemoryTracker::alloc(bytes); T * p = static_cast(malloc(bytes)); if (!p) throw std::bad_alloc(); - trace.onAlloc(p, bytes); - return p; } void deallocate(T * p, size_t n) noexcept { - size_t bytes = n * sizeof(T); - free(p); - auto trace = CurrentMemoryTracker::free(bytes); - trace.onFree(p, bytes); + + size_t bytes = n * sizeof(T); + CurrentMemoryTracker::free(bytes); } }; diff --git a/src/Common/AsynchronousMetrics.cpp b/src/Common/AsynchronousMetrics.cpp index d4626d317c7..b68fcab2449 100644 --- a/src/Common/AsynchronousMetrics.cpp +++ b/src/Common/AsynchronousMetrics.cpp @@ -2,12 +2,7 @@ #include #include #include -#include #include -#include -#include -#include -#include #include #include #include diff --git a/src/Common/BinStringDecodeHelper.h b/src/Common/BinStringDecodeHelper.h new file mode 100644 index 00000000000..513a4196b6f --- /dev/null +++ b/src/Common/BinStringDecodeHelper.h @@ -0,0 +1,76 @@ +#pragma once + +#include + +namespace DB +{ + +static void inline hexStringDecode(const char * pos, const char * end, char *& out, size_t word_size = 2) +{ + if ((end - pos) & 1) + { + *out = unhex(*pos); + ++out; + ++pos; + } + while (pos < end) + { + *out = unhex2(pos); + pos += word_size; + ++out; + } + *out = '\0'; + ++out; +} + +static void inline binStringDecode(const char * pos, const char * end, char *& out) +{ + if (pos == end) + { + *out = '\0'; + ++out; + return; + } + + UInt8 left = 0; + + /// end - pos is the length of input. + /// (length & 7) to make remain bits length mod 8 is zero to split. + /// e.g. the length is 9 and the input is "101000001", + /// first left_cnt is 1, left is 0, right shift, pos is 1, left = 1 + /// then, left_cnt is 0, remain input is '01000001'. + for (UInt8 left_cnt = (end - pos) & 7; left_cnt > 0; --left_cnt) + { + left = left << 1; + if (*pos != '0') + left += 1; + ++pos; + } + + if (left != 0 || end - pos == 0) + { + *out = left; + ++out; + } + + assert((end - pos) % 8 == 0); + + while (end - pos != 0) + { + UInt8 c = 0; + for (UInt8 i = 0; i < 8; ++i) + { + c = c << 1; + if (*pos != '0') + c += 1; + ++pos; + } + *out = c; + ++out; + } + + *out = '\0'; + ++out; +} + +} diff --git a/src/Common/CurrentMemoryTracker.cpp b/src/Common/CurrentMemoryTracker.cpp index 0147a095185..720df07efb9 100644 --- a/src/Common/CurrentMemoryTracker.cpp +++ b/src/Common/CurrentMemoryTracker.cpp @@ -37,7 +37,7 @@ MemoryTracker * getMemoryTracker() using DB::current_thread; -AllocationTrace CurrentMemoryTracker::allocImpl(Int64 size, bool throw_if_memory_exceeded) +void CurrentMemoryTracker::allocImpl(Int64 size, bool throw_if_memory_exceeded) { #ifdef MEMORY_TRACKER_DEBUG_CHECKS if (unlikely(memory_tracker_always_throw_logical_error_on_allocation)) @@ -55,9 +55,8 @@ AllocationTrace CurrentMemoryTracker::allocImpl(Int64 size, bool throw_if_memory if (will_be > current_thread->untracked_memory_limit) { - auto res = memory_tracker->allocImpl(will_be, throw_if_memory_exceeded); + memory_tracker->allocImpl(will_be, throw_if_memory_exceeded); current_thread->untracked_memory = 0; - return res; } else { @@ -69,40 +68,36 @@ AllocationTrace CurrentMemoryTracker::allocImpl(Int64 size, bool throw_if_memory /// total_memory_tracker only, ignore untracked_memory else { - return memory_tracker->allocImpl(size, throw_if_memory_exceeded); + memory_tracker->allocImpl(size, throw_if_memory_exceeded); } - - return AllocationTrace(memory_tracker->getSampleProbability()); } - - return AllocationTrace(0); } void CurrentMemoryTracker::check() { if (auto * memory_tracker = getMemoryTracker()) - std::ignore = memory_tracker->allocImpl(0, true); + memory_tracker->allocImpl(0, true); } -AllocationTrace CurrentMemoryTracker::alloc(Int64 size) +void CurrentMemoryTracker::alloc(Int64 size) { bool throw_if_memory_exceeded = true; - return allocImpl(size, throw_if_memory_exceeded); + allocImpl(size, throw_if_memory_exceeded); } -AllocationTrace CurrentMemoryTracker::allocNoThrow(Int64 size) +void CurrentMemoryTracker::allocNoThrow(Int64 size) { bool throw_if_memory_exceeded = false; - return allocImpl(size, throw_if_memory_exceeded); + allocImpl(size, throw_if_memory_exceeded); } -AllocationTrace CurrentMemoryTracker::realloc(Int64 old_size, Int64 new_size) +void CurrentMemoryTracker::realloc(Int64 old_size, Int64 new_size) { Int64 addition = new_size - old_size; - return addition > 0 ? alloc(addition) : free(-addition); + addition > 0 ? alloc(addition) : free(-addition); } -AllocationTrace CurrentMemoryTracker::free(Int64 size) +void CurrentMemoryTracker::free(Int64 size) { if (auto * memory_tracker = getMemoryTracker()) { @@ -111,20 +106,15 @@ AllocationTrace CurrentMemoryTracker::free(Int64 size) current_thread->untracked_memory -= size; if (current_thread->untracked_memory < -current_thread->untracked_memory_limit) { - Int64 untracked_memory = current_thread->untracked_memory; + memory_tracker->free(-current_thread->untracked_memory); current_thread->untracked_memory = 0; - return memory_tracker->free(-untracked_memory); } } /// total_memory_tracker only, ignore untracked_memory else { - return memory_tracker->free(size); + memory_tracker->free(size); } - - return AllocationTrace(memory_tracker->getSampleProbability()); } - - return AllocationTrace(0); } diff --git a/src/Common/CurrentMemoryTracker.h b/src/Common/CurrentMemoryTracker.h index ba46f458e4a..e125e4cbe4a 100644 --- a/src/Common/CurrentMemoryTracker.h +++ b/src/Common/CurrentMemoryTracker.h @@ -1,20 +1,19 @@ #pragma once #include -#include /// Convenience methods, that use current thread's memory_tracker if it is available. struct CurrentMemoryTracker { /// Call the following functions before calling of corresponding operations with memory allocators. - [[nodiscard]] static AllocationTrace alloc(Int64 size); - [[nodiscard]] static AllocationTrace allocNoThrow(Int64 size); - [[nodiscard]] static AllocationTrace realloc(Int64 old_size, Int64 new_size); + static void alloc(Int64 size); + static void allocNoThrow(Int64 size); + static void realloc(Int64 old_size, Int64 new_size); /// This function should be called after memory deallocation. - [[nodiscard]] static AllocationTrace free(Int64 size); + static void free(Int64 size); static void check(); private: - [[nodiscard]] static AllocationTrace allocImpl(Int64 size, bool throw_if_memory_exceeded); + static void allocImpl(Int64 size, bool throw_if_memory_exceeded); }; diff --git a/src/Common/ErrorCodes.cpp b/src/Common/ErrorCodes.cpp index 1b76fef1db4..95333eccbcd 100644 --- a/src/Common/ErrorCodes.cpp +++ b/src/Common/ErrorCodes.cpp @@ -640,6 +640,9 @@ M(669, NAMED_COLLECTION_DOESNT_EXIST) \ M(670, NAMED_COLLECTION_ALREADY_EXISTS) \ M(671, NAMED_COLLECTION_IS_IMMUTABLE) \ + M(672, INVALID_SCHEDULER_NODE) \ + M(673, RESOURCE_ACCESS_DENIED) \ + M(674, RESOURCE_NOT_FOUND) \ \ M(999, KEEPER_EXCEPTION) \ M(1000, POCO_EXCEPTION) \ diff --git a/src/Common/FiberStack.h b/src/Common/FiberStack.h index 91bb632d807..c55608311d0 100644 --- a/src/Common/FiberStack.h +++ b/src/Common/FiberStack.h @@ -57,8 +57,7 @@ public: } /// Do not count guard page in memory usage. - auto trace = CurrentMemoryTracker::alloc(num_pages * page_size); - trace.onAlloc(vp, num_pages * page_size); + CurrentMemoryTracker::alloc(num_pages * page_size); boost::context::stack_context sctx; sctx.size = num_bytes; @@ -78,7 +77,6 @@ public: ::munmap(vp, sctx.size); /// Do not count guard page in memory usage. - auto trace = CurrentMemoryTracker::free(sctx.size - page_size); - trace.onFree(vp, sctx.size - page_size); + CurrentMemoryTracker::free(sctx.size - page_size); } }; diff --git a/src/Common/MemoryTracker.cpp b/src/Common/MemoryTracker.cpp index d0d0d6b8686..27d0adcf24f 100644 --- a/src/Common/MemoryTracker.cpp +++ b/src/Common/MemoryTracker.cpp @@ -1,7 +1,6 @@ #include "MemoryTracker.h" #include -#include #include #include #include @@ -83,53 +82,6 @@ inline std::string_view toDescription(OvercommitResult result) } } -bool shouldTrackAllocation(DB::Float64 probability, void * ptr) -{ - return sipHash64(uintptr_t(ptr)) < std::numeric_limits::max() * probability; -} - -AllocationTrace updateAllocationTrace(AllocationTrace trace, const std::optional & sample_probability) -{ - if (unlikely(sample_probability)) - return AllocationTrace(*sample_probability); - - return trace; -} - -AllocationTrace getAllocationTrace(std::optional & sample_probability) -{ - if (unlikely(sample_probability)) - return AllocationTrace(*sample_probability); - - return AllocationTrace(0); -} - -} - -AllocationTrace::AllocationTrace(double sample_probability_) : sample_probability(sample_probability_) {} - -void AllocationTrace::onAlloc(void * ptr, size_t size) const -{ - if (likely(sample_probability == 0)) - return; - - if (sample_probability < 1 && !shouldTrackAllocation(sample_probability, ptr)) - return; - - MemoryTrackerBlockerInThread untrack_lock(VariableContext::Global); - DB::TraceSender::send(DB::TraceType::MemorySample, StackTrace(), {.size = Int64(size), .ptr = ptr}); -} - -void AllocationTrace::onFree(void * ptr, size_t size) const -{ - if (likely(sample_probability == 0)) - return; - - if (sample_probability < 1 && !shouldTrackAllocation(sample_probability, ptr)) - return; - - MemoryTrackerBlockerInThread untrack_lock(VariableContext::Global); - DB::TraceSender::send(DB::TraceType::MemorySample, StackTrace(), {.size = -Int64(size), .ptr = ptr}); } namespace ProfileEvents @@ -183,7 +135,7 @@ void MemoryTracker::logMemoryUsage(Int64 current) const } -AllocationTrace MemoryTracker::allocImpl(Int64 size, bool throw_if_memory_exceeded, MemoryTracker * query_tracker) +void MemoryTracker::allocImpl(Int64 size, bool throw_if_memory_exceeded, MemoryTracker * query_tracker) { if (size < 0) throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Negative size ({}) is passed to MemoryTracker. It is a bug.", size); @@ -202,14 +154,9 @@ AllocationTrace MemoryTracker::allocImpl(Int64 size, bool throw_if_memory_exceed /// Since the MemoryTrackerBlockerInThread should respect the level, we should go to the next parent. if (auto * loaded_next = parent.load(std::memory_order_relaxed)) - { - MemoryTracker * tracker = level == VariableContext::Process ? this : query_tracker; - return updateAllocationTrace( - loaded_next->allocImpl(size, throw_if_memory_exceeded, tracker), - sample_probability); - } - - return getAllocationTrace(sample_probability); + loaded_next->allocImpl(size, throw_if_memory_exceeded, + level == VariableContext::Process ? this : query_tracker); + return; } /** Using memory_order_relaxed means that if allocations are done simultaneously, @@ -236,6 +183,14 @@ AllocationTrace MemoryTracker::allocImpl(Int64 size, bool throw_if_memory_exceed allocation_traced = true; } + std::bernoulli_distribution sample(sample_probability); + if (unlikely(sample_probability > 0.0 && sample(thread_local_rng))) + { + MemoryTrackerBlockerInThread untrack_lock(VariableContext::Global); + DB::TraceSender::send(DB::TraceType::MemorySample, StackTrace(), {.size = size}); + allocation_traced = true; + } + std::bernoulli_distribution fault(fault_probability); if (unlikely(fault_probability > 0.0 && fault(thread_local_rng))) { @@ -354,22 +309,16 @@ AllocationTrace MemoryTracker::allocImpl(Int64 size, bool throw_if_memory_exceed } if (auto * loaded_next = parent.load(std::memory_order_relaxed)) - { - MemoryTracker * tracker = level == VariableContext::Process ? this : query_tracker; - return updateAllocationTrace( - loaded_next->allocImpl(size, throw_if_memory_exceeded, tracker), - sample_probability); - } - - return getAllocationTrace(sample_probability); + loaded_next->allocImpl(size, throw_if_memory_exceeded, + level == VariableContext::Process ? this : query_tracker); } void MemoryTracker::adjustWithUntrackedMemory(Int64 untracked_memory) { if (untracked_memory > 0) - std::ignore = allocImpl(untracked_memory, /*throw_if_memory_exceeded*/ false); + allocImpl(untracked_memory, /*throw_if_memory_exceeded*/ false); else - std::ignore = free(-untracked_memory); + free(-untracked_memory); } bool MemoryTracker::updatePeak(Int64 will_be, bool log_memory_usage) @@ -388,7 +337,8 @@ bool MemoryTracker::updatePeak(Int64 will_be, bool log_memory_usage) return false; } -AllocationTrace MemoryTracker::free(Int64 size) + +void MemoryTracker::free(Int64 size) { if (MemoryTrackerBlockerInThread::isBlocked(level)) { @@ -403,9 +353,15 @@ AllocationTrace MemoryTracker::free(Int64 size) /// Since the MemoryTrackerBlockerInThread should respect the level, we should go to the next parent. if (auto * loaded_next = parent.load(std::memory_order_relaxed)) - return updateAllocationTrace(loaded_next->free(size), sample_probability); + loaded_next->free(size); + return; + } - return getAllocationTrace(sample_probability); + std::bernoulli_distribution sample(sample_probability); + if (unlikely(sample_probability > 0.0 && sample(thread_local_rng))) + { + MemoryTrackerBlockerInThread untrack_lock(VariableContext::Global); + DB::TraceSender::send(DB::TraceType::MemorySample, StackTrace(), {.size = -size}); } Int64 accounted_size = size; @@ -433,15 +389,12 @@ AllocationTrace MemoryTracker::free(Int64 size) if (auto * overcommit_tracker_ptr = overcommit_tracker.load(std::memory_order_relaxed)) overcommit_tracker_ptr->tryContinueQueryExecutionAfterFree(accounted_size); - AllocationTrace res = getAllocationTrace(sample_probability); if (auto * loaded_next = parent.load(std::memory_order_relaxed)) - res = updateAllocationTrace(loaded_next->free(size), sample_probability); + loaded_next->free(size); auto metric_loaded = metric.load(std::memory_order_relaxed); if (metric_loaded != CurrentMetrics::end()) CurrentMetrics::sub(metric_loaded, accounted_size); - - return res; } @@ -525,14 +478,3 @@ void MemoryTracker::setOrRaiseProfilerLimit(Int64 value) while ((value == 0 || old_value < value) && !profiler_limit.compare_exchange_weak(old_value, value)) ; } - -double MemoryTracker::getSampleProbability() -{ - if (sample_probability) - return *sample_probability; - - if (auto * loaded_next = parent.load(std::memory_order_relaxed)) - return loaded_next->getSampleProbability(); - - return 0; -} diff --git a/src/Common/MemoryTracker.h b/src/Common/MemoryTracker.h index e1f61b1585a..f6113d31423 100644 --- a/src/Common/MemoryTracker.h +++ b/src/Common/MemoryTracker.h @@ -2,11 +2,9 @@ #include #include -#include #include #include #include -#include #if !defined(NDEBUG) #define MEMORY_TRACKER_DEBUG_CHECKS @@ -67,7 +65,7 @@ private: double fault_probability = 0; /// To randomly sample allocations and deallocations in trace_log. - std::optional sample_probability; + double sample_probability = 0; /// Singly-linked list. All information will be passed to subsequent memory trackers also (it allows to implement trackers hierarchy). /// In terms of tree nodes it is the list of parents. Lifetime of these trackers should "include" lifetime of current tracker. @@ -92,8 +90,8 @@ private: /// allocImpl(...) and free(...) should not be used directly friend struct CurrentMemoryTracker; - [[nodiscard]] AllocationTrace allocImpl(Int64 size, bool throw_if_memory_exceeded, MemoryTracker * query_tracker = nullptr); - [[nodiscard]] AllocationTrace free(Int64 size); + void allocImpl(Int64 size, bool throw_if_memory_exceeded, MemoryTracker * query_tracker = nullptr); + void free(Int64 size); public: static constexpr auto USAGE_EVENT_NAME = "MemoryTrackerUsage"; @@ -148,8 +146,6 @@ public: sample_probability = value; } - double getSampleProbability(); - void setProfilerStep(Int64 value) { profiler_step = value; diff --git a/src/Common/MemoryTrackerBlockerInThread.h b/src/Common/MemoryTrackerBlockerInThread.h index 73794049007..d3882056f54 100644 --- a/src/Common/MemoryTrackerBlockerInThread.h +++ b/src/Common/MemoryTrackerBlockerInThread.h @@ -28,5 +28,4 @@ public: } friend class MemoryTracker; - friend struct AllocationTrace; }; diff --git a/src/Common/ShellCommand.cpp b/src/Common/ShellCommand.cpp index 17ee1b880b3..b9f90c8cbb1 100644 --- a/src/Common/ShellCommand.cpp +++ b/src/Common/ShellCommand.cpp @@ -72,11 +72,11 @@ ShellCommand::~ShellCommand() if (process_terminated_normally) return; - LOG_TRACE(getLogger(), "Will kill shell command pid {} with SIGTERM", pid); + LOG_TRACE(getLogger(), "Will kill shell command pid {} with signal {}", pid, config.terminate_in_destructor_strategy.termination_signal); - int retcode = kill(pid, SIGTERM); + int retcode = kill(pid, config.terminate_in_destructor_strategy.termination_signal); if (retcode != 0) - LOG_WARNING(getLogger(), "Cannot kill shell command pid {} errno '{}'", pid, errnoToString()); + LOG_WARNING(getLogger(), "Cannot kill shell command pid {}, error: '{}'", pid, errnoToString()); } else { diff --git a/src/Common/ShellCommand.h b/src/Common/ShellCommand.h index dfc4a826f62..da65d2ae494 100644 --- a/src/Common/ShellCommand.h +++ b/src/Common/ShellCommand.h @@ -27,18 +27,18 @@ namespace DB class ShellCommand final { public: - ~ShellCommand(); struct DestructorStrategy final { - explicit DestructorStrategy(bool terminate_in_destructor_, size_t wait_for_normal_exit_before_termination_seconds_ = 0) - : terminate_in_destructor(terminate_in_destructor_) + explicit DestructorStrategy(bool terminate_in_destructor_, int termination_signal_, size_t wait_for_normal_exit_before_termination_seconds_ = 0) + : terminate_in_destructor(terminate_in_destructor_), termination_signal(termination_signal_) , wait_for_normal_exit_before_termination_seconds(wait_for_normal_exit_before_termination_seconds_) { } bool terminate_in_destructor; + int termination_signal; /// If terminate in destructor is true, command will wait until send SIGTERM signal to created process size_t wait_for_normal_exit_before_termination_seconds = 0; @@ -64,7 +64,7 @@ public: bool pipe_stdin_only = false; - DestructorStrategy terminate_in_destructor_strategy = DestructorStrategy(false); + DestructorStrategy terminate_in_destructor_strategy = DestructorStrategy(false, 0); }; /// Run the command using /bin/sh -c. diff --git a/src/Common/Stopwatch.h b/src/Common/Stopwatch.h index 32d1fca337d..79f650179e2 100644 --- a/src/Common/Stopwatch.h +++ b/src/Common/Stopwatch.h @@ -40,6 +40,10 @@ public: * Pass CLOCK_MONOTONIC_COARSE, if you need better performance with acceptable cost of several milliseconds of inaccuracy. */ explicit Stopwatch(clockid_t clock_type_ = CLOCK_MONOTONIC) : clock_type(clock_type_) { start(); } + explicit Stopwatch(clockid_t clock_type_, UInt64 start_nanoseconds, bool is_running_) + : start_ns(start_nanoseconds), clock_type(clock_type_), is_running(is_running_) + { + } void start() { start_ns = nanoseconds(); is_running = true; } void stop() { stop_ns = nanoseconds(); is_running = false; } @@ -51,6 +55,8 @@ public: UInt64 elapsedMilliseconds() const { return elapsedNanoseconds() / 1000000UL; } double elapsedSeconds() const { return static_cast(elapsedNanoseconds()) / 1000000000ULL; } + UInt64 getStart() { return start_ns; } + private: UInt64 start_ns = 0; UInt64 stop_ns = 0; diff --git a/src/Common/ThreadPool.cpp b/src/Common/ThreadPool.cpp index b70b1fc5e60..e65b5511e05 100644 --- a/src/Common/ThreadPool.cpp +++ b/src/Common/ThreadPool.cpp @@ -156,9 +156,10 @@ ReturnType ThreadPoolImpl::scheduleImpl(Job job, ssize_t priority, std:: propagate_opentelemetry_tracing_context ? DB::OpenTelemetry::CurrentContext() : DB::OpenTelemetry::TracingContextOnThread()); ++scheduled_jobs; - new_job_or_shutdown.notify_one(); } + new_job_or_shutdown.notify_one(); + return static_cast(true); } diff --git a/src/Common/ThreadStatus.cpp b/src/Common/ThreadStatus.cpp index b62a7af6c71..81650f107a4 100644 --- a/src/Common/ThreadStatus.cpp +++ b/src/Common/ThreadStatus.cpp @@ -188,13 +188,10 @@ void ThreadStatus::updatePerformanceCounters() } } -void ThreadStatus::assertState(const std::initializer_list & permitted_states, const char * description) const +void ThreadStatus::assertState(ThreadState permitted_state, const char * description) const { - for (auto permitted_state : permitted_states) - { - if (getCurrentState() == permitted_state) - return; - } + if (getCurrentState() == permitted_state) + return; if (description) throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected thread state {}: {}", getCurrentState(), description); diff --git a/src/Common/ThreadStatus.h b/src/Common/ThreadStatus.h index 6ec46e3e9dc..cbcd8c3c30a 100644 --- a/src/Common/ThreadStatus.h +++ b/src/Common/ThreadStatus.h @@ -87,10 +87,6 @@ public: LogsLevel client_logs_level = LogsLevel::none; String query; - /// Query without new lines (see toOneLineQuery()) - /// Used to print in case of fatal error - /// (to avoid calling extra code in the fatal error handler) - String one_line_query; UInt64 normalized_query_hash = 0; std::vector finished_threads_counters_memory; @@ -296,7 +292,7 @@ protected: void logToQueryThreadLog(QueryThreadLog & thread_log, const String & current_database, std::chrono::time_point now); - void assertState(const std::initializer_list & permitted_states, const char * description = nullptr) const; + void assertState(ThreadState permitted_state, const char * description = nullptr) const; private: diff --git a/src/Common/TraceSender.cpp b/src/Common/TraceSender.cpp index 91d07367a82..64d7b2b0eaf 100644 --- a/src/Common/TraceSender.cpp +++ b/src/Common/TraceSender.cpp @@ -33,7 +33,6 @@ void TraceSender::send(TraceType trace_type, const StackTrace & stack_trace, Ext + sizeof(TraceType) /// trace type + sizeof(UInt64) /// thread_id + sizeof(Int64) /// size - + sizeof(void *) /// ptr + sizeof(ProfileEvents::Event) /// event + sizeof(ProfileEvents::Count); /// increment @@ -75,7 +74,6 @@ void TraceSender::send(TraceType trace_type, const StackTrace & stack_trace, Ext writePODBinary(trace_type, out); writePODBinary(thread_id, out); writePODBinary(extras.size, out); - writePODBinary(UInt64(extras.ptr), out); writePODBinary(extras.event, out); writePODBinary(extras.increment, out); diff --git a/src/Common/TraceSender.h b/src/Common/TraceSender.h index 68ba15ee400..21b44b651dd 100644 --- a/src/Common/TraceSender.h +++ b/src/Common/TraceSender.h @@ -28,9 +28,8 @@ class TraceSender public: struct Extras { - /// size, ptr - for memory tracing is the amount of memory allocated; for other trace types it is 0. + /// size - for memory tracing is the amount of memory allocated; for other trace types it is 0. Int64 size{}; - void * ptr = nullptr; /// Event type and increment for 'ProfileEvent' trace type; for other trace types defaults. ProfileEvents::Event event{ProfileEvents::end()}; ProfileEvents::Count increment{}; diff --git a/src/Common/UnicodeBar.cpp b/src/Common/UnicodeBar.cpp index efc85ad35e4..bad39d8080c 100644 --- a/src/Common/UnicodeBar.cpp +++ b/src/Common/UnicodeBar.cpp @@ -9,6 +9,13 @@ #include +namespace DB +{ + namespace ErrorCodes + { + extern const int LOGICAL_ERROR; + } +} namespace UnicodeBar { @@ -26,36 +33,64 @@ namespace UnicodeBar return (x - min) / (max - min) * max_width; } - size_t getWidthInBytes(double width) + namespace { - return static_cast(ceil(width - 1.0 / 8) * UNICODE_BAR_CHAR_SIZE); + /// We use the following Unicode characters to draw the bar: + /// U+2588 "█" Full block + /// U+2589 "▉" Left seven eighths block + /// U+258A "▊" Left three quarters block + /// U+258B "▋" Left five eighths block + /// U+258C "▌" Left half block + /// U+258D "▍" Left three eighths block + /// U+258E "▎" Left one quarter block + /// U+258F "▏" Left one eighth block + constexpr size_t GRADES_IN_FULL_BAR = 8; + constexpr char FULL_BAR[] = "█"; + constexpr char FRACTIONAL_BARS[] = "▏▎▍▌▋▊▉"; /// 7 elements: 1/8, 2/8, 3/8, 4/8, 5/8, 6/8, 7/8 } - void render(double width, char * dst) + size_t getWidthInBytes(double width) { - size_t floor_width = static_cast(floor(width)); + Int64 int_width = static_cast(width * GRADES_IN_FULL_BAR); + return (int_width / GRADES_IN_FULL_BAR) * UNICODE_BAR_CHAR_SIZE + (int_width % GRADES_IN_FULL_BAR ? UNICODE_BAR_CHAR_SIZE : 0); + } + + static char* checkedCopy(const char * src, size_t src_size, char * dst, const char * dst_end) + { + if (dst + src_size > dst_end) + throw DB::Exception( + DB::ErrorCodes::LOGICAL_ERROR, + "Not enough space in buffer for UnicodeBar::render, required: {}, got: {}", + src_size, dst_end - dst); + + memcpy(dst, src, src_size); + return dst + src_size; + } + + void render(double width, char * dst, const char * dst_end) + { + Int64 int_width = static_cast(width * GRADES_IN_FULL_BAR); + size_t floor_width = (int_width / GRADES_IN_FULL_BAR); for (size_t i = 0; i < floor_width; ++i) { - memcpy(dst, "█", UNICODE_BAR_CHAR_SIZE); - dst += UNICODE_BAR_CHAR_SIZE; + dst = checkedCopy(FULL_BAR, UNICODE_BAR_CHAR_SIZE, dst, dst_end); } - size_t remainder = static_cast(floor((width - floor_width) * 8)); + size_t remainder = int_width % GRADES_IN_FULL_BAR; if (remainder) { - memcpy(dst, &"▏▎▍▌▋▋▊▉"[(remainder - 1) * UNICODE_BAR_CHAR_SIZE], UNICODE_BAR_CHAR_SIZE); - dst += UNICODE_BAR_CHAR_SIZE; + dst = checkedCopy(&FRACTIONAL_BARS[(remainder - 1) * UNICODE_BAR_CHAR_SIZE], UNICODE_BAR_CHAR_SIZE, dst, dst_end); } - *dst = 0; + checkedCopy("\0", 1, dst, dst_end); } std::string render(double width) { - std::string res(getWidthInBytes(width), '\0'); - render(width, res.data()); + std::string res(getWidthInBytes(width) + 1, '\0'); + render(width, res.data(), res.data() + res.size()); return res; } } diff --git a/src/Common/UnicodeBar.h b/src/Common/UnicodeBar.h index 64705aa5022..78e925bdb3c 100644 --- a/src/Common/UnicodeBar.h +++ b/src/Common/UnicodeBar.h @@ -14,6 +14,6 @@ namespace UnicodeBar size_t getWidthInBytes(double width); /// In `dst` there must be a space for barWidthInBytes(width) characters and a trailing zero. - void render(double width, char * dst); + void render(double width, char * dst, const char * dst_end); std::string render(double width); } diff --git a/src/Common/ZooKeeper/TestKeeper.cpp b/src/Common/ZooKeeper/TestKeeper.cpp index 134374f98d0..4f53a8ac307 100644 --- a/src/Common/ZooKeeper/TestKeeper.cpp +++ b/src/Common/ZooKeeper/TestKeeper.cpp @@ -219,6 +219,7 @@ std::pair TestKeeperCreateRequest::process(TestKeeper::Contai created_node.stat.mtime = created_node.stat.ctime; created_node.stat.numChildren = 0; created_node.stat.dataLength = static_cast(data.length()); + created_node.stat.ephemeralOwner = is_ephemeral ? 1 : 0; created_node.data = data; created_node.is_ephemeral = is_ephemeral; created_node.is_sequental = is_sequential; diff --git a/src/Common/ZooKeeper/ZooKeeperCommon.cpp b/src/Common/ZooKeeper/ZooKeeperCommon.cpp index 8976f1098ac..4cc1c24ef8b 100644 --- a/src/Common/ZooKeeper/ZooKeeperCommon.cpp +++ b/src/Common/ZooKeeper/ZooKeeperCommon.cpp @@ -36,7 +36,7 @@ std::string ZooKeeperRequest::toString() const "OpNum = {}\n" "Additional info:\n{}", xid, - getOpNum(), + Coordination::toString(getOpNum()), toStringImpl()); } diff --git a/src/Common/ZooKeeper/ZooKeeperImpl.cpp b/src/Common/ZooKeeper/ZooKeeperImpl.cpp index 7cbe7d7b0f2..251bf023f08 100644 --- a/src/Common/ZooKeeper/ZooKeeperImpl.cpp +++ b/src/Common/ZooKeeper/ZooKeeperImpl.cpp @@ -342,7 +342,6 @@ ZooKeeper::ZooKeeper( default_acls.emplace_back(std::move(acl)); } - /// It makes sense (especially, for async requests) to inject a fault in two places: /// pushRequest (before request is sent) and receiveEvent (after request was executed). if (0 < args.send_fault_probability && args.send_fault_probability <= 1) @@ -676,7 +675,7 @@ void ZooKeeper::receiveThread() if (earliest_operation) { throw Exception(Error::ZOPERATIONTIMEOUT, "Operation timeout (no response) for request {} for path: {}", - earliest_operation->request->getOpNum(), earliest_operation->request->getPath()); + toString(earliest_operation->request->getOpNum()), earliest_operation->request->getPath()); } waited_us += max_wait_us; if (waited_us >= args.session_timeout_ms * 1000) @@ -870,7 +869,7 @@ void ZooKeeper::finalize(bool error_send, bool error_receive, const String & rea if (already_started) return; - LOG_INFO(log, "Finalizing session {}: finalization_started={}, queue_finished={}, reason={}", + LOG_INFO(log, "Finalizing session {}. finalization_started: {}, queue_finished: {}, reason: '{}'", session_id, already_started, requests_queue.isFinished(), reason); auto expire_session_if_not_expired = [&] diff --git a/src/Storages/MergeTree/ZooKeeperWithFaultInjection.h b/src/Common/ZooKeeper/ZooKeeperWithFaultInjection.h similarity index 100% rename from src/Storages/MergeTree/ZooKeeperWithFaultInjection.h rename to src/Common/ZooKeeper/ZooKeeperWithFaultInjection.h diff --git a/src/Common/clickhouse_malloc.cpp b/src/Common/clickhouse_malloc.cpp index afdad3c6599..3f69ebdf58d 100644 --- a/src/Common/clickhouse_malloc.cpp +++ b/src/Common/clickhouse_malloc.cpp @@ -9,11 +9,7 @@ extern "C" void * clickhouse_malloc(size_t size) { void * res = malloc(size); if (res) - { - AllocationTrace trace; - size_t actual_size = Memory::trackMemory(size, trace); - trace.onAlloc(res, actual_size); - } + Memory::trackMemory(size); return res; } @@ -21,29 +17,17 @@ extern "C" void * clickhouse_calloc(size_t number_of_members, size_t size) { void * res = calloc(number_of_members, size); if (res) - { - AllocationTrace trace; - size_t actual_size = Memory::trackMemory(number_of_members * size, trace); - trace.onAlloc(res, actual_size); - } + Memory::trackMemory(number_of_members * size); return res; } extern "C" void * clickhouse_realloc(void * ptr, size_t size) { if (ptr) - { - AllocationTrace trace; - size_t actual_size = Memory::untrackMemory(ptr, trace); - trace.onFree(ptr, actual_size); - } + Memory::untrackMemory(ptr); void * res = realloc(ptr, size); if (res) - { - AllocationTrace trace; - size_t actual_size = Memory::trackMemory(size, trace); - trace.onAlloc(res, actual_size); - } + Memory::trackMemory(size); return res; } @@ -58,9 +42,7 @@ extern "C" void * clickhouse_reallocarray(void * ptr, size_t number_of_members, extern "C" void clickhouse_free(void * ptr) { - AllocationTrace trace; - size_t actual_size = Memory::untrackMemory(ptr, trace); - trace.onFree(ptr, actual_size); + Memory::untrackMemory(ptr); free(ptr); } @@ -68,10 +50,6 @@ extern "C" int clickhouse_posix_memalign(void ** memptr, size_t alignment, size_ { int res = posix_memalign(memptr, alignment, size); if (res == 0) - { - AllocationTrace trace; - size_t actual_size = Memory::trackMemory(size, trace); - trace.onAlloc(*memptr, actual_size); - } + Memory::trackMemory(size); return res; } diff --git a/src/Common/filesystemHelpers.cpp b/src/Common/filesystemHelpers.cpp index 07a08dc7fbc..43f88dd7faa 100644 --- a/src/Common/filesystemHelpers.cpp +++ b/src/Common/filesystemHelpers.cpp @@ -64,11 +64,11 @@ bool enoughSpaceInDirectory(const std::string & path, size_t data_size) return data_size <= free_space; } -std::unique_ptr createTemporaryFile(const std::string & path) +std::unique_ptr createTemporaryFile(const std::string & folder_path) { ProfileEvents::increment(ProfileEvents::ExternalProcessingFilesTotal); - fs::create_directories(path); - return std::make_unique(path); + fs::create_directories(folder_path); + return std::make_unique(folder_path); } #if !defined(OS_LINUX) diff --git a/src/Common/filesystemHelpers.h b/src/Common/filesystemHelpers.h index 0e6e16941bb..14ee5f54322 100644 --- a/src/Common/filesystemHelpers.h +++ b/src/Common/filesystemHelpers.h @@ -14,10 +14,10 @@ namespace fs = std::filesystem; namespace DB { -using TemporaryFile = Poco::TemporaryFile; +using PocoTemporaryFile = Poco::TemporaryFile; bool enoughSpaceInDirectory(const std::string & path, size_t data_size); -std::unique_ptr createTemporaryFile(const std::string & path); +std::unique_ptr createTemporaryFile(const std::string & folder_path); // Determine what block device is responsible for specified path diff --git a/src/Common/memory.h b/src/Common/memory.h index 87ccdce070a..4cb1c535e56 100644 --- a/src/Common/memory.h +++ b/src/Common/memory.h @@ -112,19 +112,16 @@ inline ALWAYS_INLINE size_t getActualAllocationSize(size_t size, TAlign... align template ... TAlign> requires DB::OptionalArgument -inline ALWAYS_INLINE size_t trackMemory(std::size_t size, AllocationTrace & trace, TAlign... align) +inline ALWAYS_INLINE void trackMemory(std::size_t size, TAlign... align) { std::size_t actual_size = getActualAllocationSize(size, align...); - trace = CurrentMemoryTracker::allocNoThrow(actual_size); - return actual_size; + CurrentMemoryTracker::allocNoThrow(actual_size); } template ... TAlign> requires DB::OptionalArgument -inline ALWAYS_INLINE size_t untrackMemory(void * ptr [[maybe_unused]], AllocationTrace & trace, std::size_t size [[maybe_unused]] = 0, TAlign... align [[maybe_unused]]) noexcept +inline ALWAYS_INLINE void untrackMemory(void * ptr [[maybe_unused]], std::size_t size [[maybe_unused]] = 0, TAlign... align [[maybe_unused]]) noexcept { - std::size_t actual_size = 0; - try { #if USE_JEMALLOC @@ -133,26 +130,23 @@ inline ALWAYS_INLINE size_t untrackMemory(void * ptr [[maybe_unused]], Allocatio if (likely(ptr != nullptr)) { if constexpr (sizeof...(TAlign) == 1) - actual_size = sallocx(ptr, MALLOCX_ALIGN(alignToSizeT(align...))); + CurrentMemoryTracker::free(sallocx(ptr, MALLOCX_ALIGN(alignToSizeT(align...)))); else - actual_size = sallocx(ptr, 0); + CurrentMemoryTracker::free(sallocx(ptr, 0)); } #else if (size) - actual_size = size; + CurrentMemoryTracker::free(size); # if defined(_GNU_SOURCE) /// It's innaccurate resource free for sanitizers. malloc_usable_size() result is greater or equal to allocated size. else - actual_size = malloc_usable_size(ptr); + CurrentMemoryTracker::free(malloc_usable_size(ptr)); # endif #endif - trace = CurrentMemoryTracker::free(actual_size); } catch (...) { } - - return actual_size; } } diff --git a/src/Common/new_delete.cpp b/src/Common/new_delete.cpp index d0170bd820c..871ab750907 100644 --- a/src/Common/new_delete.cpp +++ b/src/Common/new_delete.cpp @@ -50,74 +50,50 @@ static struct InitializeJemallocZoneAllocatorForOSX void * operator new(std::size_t size) { - AllocationTrace trace; - std::size_t actual_size = Memory::trackMemory(size, trace); - void * ptr = Memory::newImpl(size); - trace.onAlloc(ptr, actual_size); - return ptr; + Memory::trackMemory(size); + return Memory::newImpl(size); } void * operator new(std::size_t size, std::align_val_t align) { - AllocationTrace trace; - std::size_t actual_size = Memory::trackMemory(size, trace, align); - void * ptr = Memory::newImpl(size, align); - trace.onAlloc(ptr, actual_size); - return ptr; + Memory::trackMemory(size, align); + return Memory::newImpl(size, align); } void * operator new[](std::size_t size) { - AllocationTrace trace; - std::size_t actual_size = Memory::trackMemory(size, trace); - void * ptr = Memory::newImpl(size); - trace.onAlloc(ptr, actual_size); - return ptr; + Memory::trackMemory(size); + return Memory::newImpl(size); } void * operator new[](std::size_t size, std::align_val_t align) { - AllocationTrace trace; - std::size_t actual_size = Memory::trackMemory(size, trace, align); - void * ptr = Memory::newImpl(size, align); - trace.onAlloc(ptr, actual_size); - return ptr; + Memory::trackMemory(size, align); + return Memory::newImpl(size, align); } void * operator new(std::size_t size, const std::nothrow_t &) noexcept { - AllocationTrace trace; - std::size_t actual_size = Memory::trackMemory(size, trace); - void * ptr = Memory::newNoExept(size); - trace.onAlloc(ptr, actual_size); - return ptr; + Memory::trackMemory(size); + return Memory::newNoExept(size); } void * operator new[](std::size_t size, const std::nothrow_t &) noexcept { - AllocationTrace trace; - std::size_t actual_size = Memory::trackMemory(size, trace); - void * ptr = Memory::newNoExept(size); - trace.onAlloc(ptr, actual_size); - return ptr; + Memory::trackMemory(size); + return Memory::newNoExept(size); } void * operator new(std::size_t size, std::align_val_t align, const std::nothrow_t &) noexcept { - AllocationTrace trace; - std::size_t actual_size = Memory::trackMemory(size, trace, align); - void * ptr = Memory::newNoExept(size, align); - trace.onAlloc(ptr, actual_size); - return ptr; + Memory::trackMemory(size, align); + return Memory::newNoExept(size, align); } void * operator new[](std::size_t size, std::align_val_t align, const std::nothrow_t &) noexcept { - AllocationTrace trace; - std::size_t actual_size = Memory::trackMemory(size, trace, align); - void * ptr = Memory::newNoExept(size, align); - trace.onAlloc(ptr, actual_size); - return ptr; + Memory::trackMemory(size, align); + return Memory::newNoExept(size, align); } /// delete @@ -133,64 +109,48 @@ void * operator new[](std::size_t size, std::align_val_t align, const std::nothr void operator delete(void * ptr) noexcept { - AllocationTrace trace; - std::size_t actual_size = Memory::untrackMemory(ptr, trace); - trace.onFree(ptr, actual_size); + Memory::untrackMemory(ptr); Memory::deleteImpl(ptr); } void operator delete(void * ptr, std::align_val_t align) noexcept { - AllocationTrace trace; - std::size_t actual_size = Memory::untrackMemory(ptr, trace, 0, align); - trace.onFree(ptr, actual_size); + Memory::untrackMemory(ptr, 0, align); Memory::deleteImpl(ptr); } void operator delete[](void * ptr) noexcept { - AllocationTrace trace; - std::size_t actual_size = Memory::untrackMemory(ptr, trace); - trace.onFree(ptr, actual_size); + Memory::untrackMemory(ptr); Memory::deleteImpl(ptr); } void operator delete[](void * ptr, std::align_val_t align) noexcept { - AllocationTrace trace; - std::size_t actual_size = Memory::untrackMemory(ptr, trace, 0, align); - trace.onFree(ptr, actual_size); + Memory::untrackMemory(ptr, 0, align); Memory::deleteImpl(ptr); } void operator delete(void * ptr, std::size_t size) noexcept { - AllocationTrace trace; - std::size_t actual_size = Memory::untrackMemory(ptr, trace, size); - trace.onFree(ptr, actual_size); + Memory::untrackMemory(ptr, size); Memory::deleteSized(ptr, size); } void operator delete(void * ptr, std::size_t size, std::align_val_t align) noexcept { - AllocationTrace trace; - std::size_t actual_size = Memory::untrackMemory(ptr, trace, size, align); - trace.onFree(ptr, actual_size); + Memory::untrackMemory(ptr, size, align); Memory::deleteSized(ptr, size, align); } void operator delete[](void * ptr, std::size_t size) noexcept { - AllocationTrace trace; - std::size_t actual_size = Memory::untrackMemory(ptr, trace, size); - trace.onFree(ptr, actual_size); + Memory::untrackMemory(ptr, size); Memory::deleteSized(ptr, size); } void operator delete[](void * ptr, std::size_t size, std::align_val_t align) noexcept { - AllocationTrace trace; - std::size_t actual_size = Memory::untrackMemory(ptr, trace, size, align); - trace.onFree(ptr, actual_size); + Memory::untrackMemory(ptr, size, align); Memory::deleteSized(ptr, size, align); } diff --git a/src/Common/tests/gtest_rw_lock.cpp b/src/Common/tests/gtest_rw_lock.cpp index 6ba67a40445..57f446ca249 100644 --- a/src/Common/tests/gtest_rw_lock.cpp +++ b/src/Common/tests/gtest_rw_lock.cpp @@ -240,24 +240,52 @@ TEST(Common, RWLockPerfTestReaders) for (auto pool_size : pool_sizes) { - Stopwatch watch(CLOCK_MONOTONIC_COARSE); + Stopwatch watch(CLOCK_MONOTONIC_COARSE); - auto func = [&] () + auto func = [&] () + { + for (auto i = 0; i < cycles; ++i) { - for (auto i = 0; i < cycles; ++i) - { - auto lock = fifo_lock->getLock(RWLockImpl::Read, RWLockImpl::NO_QUERY); - } - }; + auto lock = fifo_lock->getLock(RWLockImpl::Read, RWLockImpl::NO_QUERY); + } + }; - std::list threads; - for (size_t thread = 0; thread < pool_size; ++thread) - threads.emplace_back(func); + std::list threads; + for (size_t thread = 0; thread < pool_size; ++thread) + threads.emplace_back(func); - for (auto & thread : threads) - thread.join(); + for (auto & thread : threads) + thread.join(); - auto total_time = watch.elapsedSeconds(); - std::cout << "Threads " << pool_size << ", total_time " << std::setprecision(2) << total_time << "\n"; + auto total_time = watch.elapsedSeconds(); + std::cout << "Threads " << pool_size << ", total_time " << std::setprecision(2) << total_time << "\n"; } } + +TEST(Common, RWLockNotUpgradeableWithNoQuery) +{ + updatePHDRCache(); + + static auto rw_lock = RWLockImpl::create(); + + std::thread read_thread([&] () + { + auto lock = rw_lock->getLock(RWLockImpl::Read, RWLockImpl::NO_QUERY, std::chrono::duration(50000)); + auto sleep_for = std::chrono::duration(5000); + std::this_thread::sleep_for(sleep_for); + }); + + { + auto sleep_for = std::chrono::duration(500); + std::this_thread::sleep_for(sleep_for); + + Stopwatch watch(CLOCK_MONOTONIC_COARSE); + auto get_lock = rw_lock->getLock(RWLockImpl::Write, RWLockImpl::NO_QUERY, std::chrono::duration(50000)); + + EXPECT_NE(get_lock.get(), nullptr); + /// It took some time + EXPECT_GT(watch.elapsedMilliseconds(), 3000); + } + + read_thread.join(); +} diff --git a/src/Coordination/KeeperSnapshotManagerS3.cpp b/src/Coordination/KeeperSnapshotManagerS3.cpp index bcb513157d7..361265e382a 100644 --- a/src/Coordination/KeeperSnapshotManagerS3.cpp +++ b/src/Coordination/KeeperSnapshotManagerS3.cpp @@ -136,7 +136,6 @@ void KeeperSnapshotManagerS3::uploadSnapshotImpl(const std::string & snapshot_pa return; S3Settings::RequestSettings request_settings_1; - request_settings_1.setEmptyFieldsByDefault(); const auto create_writer = [&](const auto & key) { diff --git a/src/Coordination/KeeperStateMachine.cpp b/src/Coordination/KeeperStateMachine.cpp index ee5bfa48357..dde8b30bf79 100644 --- a/src/Coordination/KeeperStateMachine.cpp +++ b/src/Coordination/KeeperStateMachine.cpp @@ -132,7 +132,7 @@ void assertDigest( "Digest for nodes is not matching after {} request of type '{}'.\nExpected digest - {}, actual digest - {} (digest " "{}). Keeper will terminate to avoid inconsistencies.\nExtra information about the request:\n{}", committing ? "committing" : "preprocessing", - request.getOpNum(), + Coordination::toString(request.getOpNum()), first.value, second.value, first.version, diff --git a/src/Coordination/KeeperStorage.cpp b/src/Coordination/KeeperStorage.cpp index fb472201aec..72921c4ac1d 100644 --- a/src/Coordination/KeeperStorage.cpp +++ b/src/Coordination/KeeperStorage.cpp @@ -1704,7 +1704,7 @@ struct KeeperStorageMultiRequestProcessor final : public KeeperStorageRequestPro break; default: throw DB::Exception( - ErrorCodes::BAD_ARGUMENTS, "Illegal command as part of multi ZooKeeper request {}", sub_zk_request->getOpNum()); + ErrorCodes::BAD_ARGUMENTS, "Illegal command as part of multi ZooKeeper request {}", Coordination::toString(sub_zk_request->getOpNum())); } } diff --git a/src/Core/IResolvedFunction.h b/src/Core/IResolvedFunction.h new file mode 100644 index 00000000000..64c69f597c7 --- /dev/null +++ b/src/Core/IResolvedFunction.h @@ -0,0 +1,29 @@ +#pragma once + +#include +#include + +namespace DB +{ +class IDataType; + +using DataTypePtr = std::shared_ptr; +using DataTypes = std::vector; + +struct Array; + +class IResolvedFunction +{ +public: + virtual const DataTypePtr & getResultType() const = 0; + + virtual const DataTypes & getArgumentTypes() const = 0; + + virtual const Array & getParameters() const = 0; + + virtual ~IResolvedFunction() = default; +}; + +using IResolvedFunctionPtr = std::shared_ptr; + +} diff --git a/src/Core/Settings.cpp b/src/Core/Settings.cpp index 7bac3f04fc6..fa1a10d22f2 100644 --- a/src/Core/Settings.cpp +++ b/src/Core/Settings.cpp @@ -148,31 +148,30 @@ std::vector Settings::getAllRegisteredNames() const void Settings::set(std::string_view name, const Field & value) { - BaseSettings::set(name, value); - if (name == "compatibility") - applyCompatibilitySetting(); + applyCompatibilitySetting(value.get()); /// If we change setting that was changed by compatibility setting before /// we should remove it from settings_changed_by_compatibility_setting, /// otherwise the next time we will change compatibility setting /// this setting will be changed too (and we don't want it). else if (settings_changed_by_compatibility_setting.contains(name)) settings_changed_by_compatibility_setting.erase(name); + + BaseSettings::set(name, value); } -void Settings::applyCompatibilitySetting() +void Settings::applyCompatibilitySetting(const String & compatibility_value) { /// First, revert all changes applied by previous compatibility setting for (const auto & setting_name : settings_changed_by_compatibility_setting) resetToDefault(setting_name); settings_changed_by_compatibility_setting.clear(); - String compatibility = getString("compatibility"); /// If setting value is empty, we don't need to change settings - if (compatibility.empty()) + if (compatibility_value.empty()) return; - ClickHouseVersion version(compatibility); + ClickHouseVersion version(compatibility_value); /// Iterate through ClickHouse version in descending order and apply reversed /// changes for each version that is higher that version from compatibility setting for (auto it = settings_changes_history.rbegin(); it != settings_changes_history.rend(); ++it) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index fc96c7768b8..c5ce6019746 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -83,6 +83,7 @@ static constexpr UInt64 operator""_GiB(unsigned long long value) M(UInt64, distributed_connections_pool_size, 1024, "Maximum number of connections with one remote server in the pool.", 0) \ M(UInt64, connections_with_failover_max_tries, DBMS_CONNECTION_POOL_WITH_FAILOVER_DEFAULT_MAX_TRIES, "The maximum number of attempts to connect to replicas.", 0) \ M(UInt64, s3_min_upload_part_size, 16*1024*1024, "The minimum size of part to upload during multipart upload to S3.", 0) \ + M(UInt64, s3_max_upload_part_size, 5ull*1024*1024*1024, "The maximum size of part to upload during multipart upload to S3.", 0) \ M(UInt64, s3_upload_part_size_multiply_factor, 2, "Multiply s3_min_upload_part_size by this factor each time s3_multiply_parts_count_threshold parts were uploaded from a single write to S3.", 0) \ M(UInt64, s3_upload_part_size_multiply_parts_count_threshold, 500, "Each time this number of parts was uploaded to S3 s3_min_upload_part_size multiplied by s3_upload_part_size_multiply_factor.", 0) \ M(UInt64, s3_max_single_part_upload_size, 32*1024*1024, "The maximum size of object to upload using singlepart upload to S3.", 0) \ @@ -564,6 +565,7 @@ static constexpr UInt64 operator""_GiB(unsigned long long value) M(UInt64, max_distributed_depth, 5, "Maximum distributed query depth", 0) \ M(Bool, database_replicated_always_detach_permanently, false, "Execute DETACH TABLE as DETACH TABLE PERMANENTLY if database engine is Replicated", 0) \ M(Bool, database_replicated_allow_only_replicated_engine, false, "Allow to create only Replicated tables in database with engine Replicated", 0) \ + M(Bool, database_replicated_allow_replicated_engine_arguments, true, "Allow to create only Replicated tables in database with engine Replicated with explicit arguments", 0) \ M(DistributedDDLOutputMode, distributed_ddl_output_mode, DistributedDDLOutputMode::THROW, "Format of distributed DDL query result", 0) \ M(UInt64, distributed_ddl_entry_format_version, 3, "Compatibility version of distributed DDL (ON CLUSTER) queries", 0) \ \ @@ -619,6 +621,7 @@ static constexpr UInt64 operator""_GiB(unsigned long long value) M(Bool, enable_filesystem_cache_on_lower_level, true, "If read buffer supports caching inside threadpool, allow it to do it, otherwise cache outside ot threadpool. Do not use this setting, it is needed for testing", 0) \ M(Bool, skip_download_if_exceeds_query_cache, true, "Skip download from remote filesystem if exceeds query cache size", 0) \ M(UInt64, max_query_cache_size, (128UL * 1024 * 1024 * 1024), "Max remote filesystem cache size that can be used by a single query", 0) \ + M(Bool, throw_on_error_from_cache_on_write_operations, false, "Ignore error from cache when caching on write operations (INSERT, merges)", 0) \ \ M(Bool, load_marks_asynchronously, false, "Load MergeTree marks asynchronously", 0) \ \ @@ -651,6 +654,8 @@ static constexpr UInt64 operator""_GiB(unsigned long long value) M(Map, additional_table_filters, "", "Additional filter expression which would be applied after reading from specified table. Syntax: {'table1': 'expression', 'database.table2': 'expression'}", 0) \ M(String, additional_result_filter, "", "Additional filter expression which would be applied to query result", 0) \ \ + M(String, workload, "default", "Name of workload to be used to access resources", 0) \ + \ /** Experimental functions */ \ M(Bool, allow_experimental_funnel_functions, false, "Enable experimental functions for funnel analysis.", 0) \ M(Bool, allow_experimental_nlp_functions, false, "Enable experimental functions for natural language processing.", 0) \ @@ -773,6 +778,7 @@ static constexpr UInt64 operator""_GiB(unsigned long long value) M(Bool, input_format_arrow_skip_columns_with_unsupported_types_in_schema_inference, false, "Skip columns with unsupported types while schema inference for format Arrow", 0) \ M(String, column_names_for_schema_inference, "", "The list of column names to use in schema inference for formats without column names. The format: 'column1,column2,column3,...'", 0) \ M(String, schema_inference_hints, "", "The list of column names and types to use in schema inference for formats without column names. The format: 'column_name1 column_type1, column_name2 column_type2, ...'", 0) \ + M(Bool, schema_inference_make_columns_nullable, true, "If set to true, all inferred types will be Nullable in schema inference for formats without information about nullability.", 0) \ M(Bool, input_format_json_read_bools_as_numbers, true, "Allow to parse bools as numbers in JSON input formats", 0) \ M(Bool, input_format_json_try_infer_numbers_from_strings, true, "Try to infer numbers from string fields while schema inference", 0) \ M(Bool, input_format_json_validate_types_from_metadata, true, "For JSON/JSONCompact/JSONColumnsWithMetadata input formats this controls whether format parser should check if data types from input metadata match data types of the corresponding columns from the table", 0) \ @@ -934,7 +940,7 @@ struct Settings : public BaseSettings, public IHints<2, Settings void setDefaultValue(const String & name) { resetToDefault(name); } private: - void applyCompatibilitySetting(); + void applyCompatibilitySetting(const String & compatibility); std::unordered_set settings_changed_by_compatibility_setting; }; diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h index 62b3c1b9c98..0c637c6d345 100644 --- a/src/Core/SettingsChangesHistory.h +++ b/src/Core/SettingsChangesHistory.h @@ -3,6 +3,7 @@ #include #include #include +#include #include #include @@ -28,7 +29,8 @@ public: for (const auto & split_element : split) { size_t component; - if (!tryParse(component, split_element)) + ReadBufferFromString buf(split_element); + if (!tryReadIntText(component, buf) || !buf.eof()) throw Exception{ErrorCodes::BAD_ARGUMENTS, "Cannot parse ClickHouse version here: {}", version}; components.push_back(component); } diff --git a/src/Core/Types.h b/src/Core/Types.h index 0dfc089f144..1eddd431c86 100644 --- a/src/Core/Types.h +++ b/src/Core/Types.h @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -93,4 +94,5 @@ using Int256 = ::Int256; /// Not a data type in database, defined just for convenience. using Strings = std::vector; +using TypeIndexesSet = std::unordered_set; } diff --git a/src/Daemon/BaseDaemon.cpp b/src/Daemon/BaseDaemon.cpp index 604a882bccc..5a08c8ffcd0 100644 --- a/src/Daemon/BaseDaemon.cpp +++ b/src/Daemon/BaseDaemon.cpp @@ -4,6 +4,7 @@ #include #include +#include #include #include @@ -178,12 +179,9 @@ __attribute__((__weak__)) void collectCrashLog( class SignalListener : public Poco::Runnable { public: - enum Signals : int - { - StdTerminate = -1, - StopThread = -2, - SanitizerTrap = -3, - }; + static constexpr int StdTerminate = -1; + static constexpr int StopThread = -2; + static constexpr int SanitizerTrap = -3; explicit SignalListener(BaseDaemon & daemon_) : log(&Poco::Logger::get("BaseDaemon")) @@ -208,7 +206,7 @@ public: // Don't use strsignal here, because it's not thread-safe. LOG_TRACE(log, "Received signal {}", sig); - if (sig == Signals::StopThread) + if (sig == StopThread) { LOG_INFO(log, "Stop SignalListener thread"); break; @@ -219,7 +217,7 @@ public: BaseDaemon::instance().closeLogs(BaseDaemon::instance().logger()); LOG_INFO(log, "Opened new log file after received signal."); } - else if (sig == Signals::StdTerminate) + else if (sig == StdTerminate) { UInt32 thread_num; std::string message; @@ -306,7 +304,7 @@ private: if (auto thread_group = thread_ptr->getThreadGroup()) { - query = thread_group->one_line_query; + query = DB::toOneLineQuery(thread_group->query); } if (auto logs_queue = thread_ptr->getInternalTextLogsQueue()) @@ -909,7 +907,7 @@ void BaseDaemon::initializeTerminationAndSignalProcessing() void BaseDaemon::logRevision() const { - Poco::Logger::root().information("Starting " + std::string{VERSION_FULL} + logger().information("Starting " + std::string{VERSION_FULL} + " (revision: " + std::to_string(ClickHouseRevision::getVersionRevision()) + ", git hash: " + (git_hash.empty() ? "" : git_hash) + ", build id: " + (build_id.empty() ? "" : build_id) + ")" @@ -958,7 +956,6 @@ void BaseDaemon::handleSignal(int signal_id) std::lock_guard lock(signal_handler_mutex); { ++terminate_signals_counter; - sigint_signals_counter += signal_id == SIGINT; signal_event.notify_all(); } @@ -973,9 +970,9 @@ void BaseDaemon::onInterruptSignals(int signal_id) is_cancelled = true; LOG_INFO(&logger(), "Received termination signal ({})", strsignal(signal_id)); // NOLINT(concurrency-mt-unsafe) // it is not thread-safe but ok in this context - if (sigint_signals_counter >= 2) + if (terminate_signals_counter >= 2) { - LOG_INFO(&logger(), "Received second signal Interrupt. Immediately terminate."); + LOG_INFO(&logger(), "This is the second termination signal. Immediately terminate."); call_default_signal_handler(signal_id); /// If the above did not help. _exit(128 + signal_id); @@ -1025,9 +1022,6 @@ void BaseDaemon::setupWatchdog() #if defined(OS_LINUX) if (0 != prctl(PR_SET_PDEATHSIG, SIGKILL)) logger().warning("Cannot do prctl to ask termination with parent."); - - if (getppid() == 1) - throw Poco::Exception("Parent watchdog process has exited."); #endif { diff --git a/src/Daemon/BaseDaemon.h b/src/Daemon/BaseDaemon.h index cb4aa0c2da6..d28f9403c16 100644 --- a/src/Daemon/BaseDaemon.h +++ b/src/Daemon/BaseDaemon.h @@ -162,7 +162,6 @@ protected: std::mutex signal_handler_mutex; std::condition_variable signal_event; std::atomic_size_t terminate_signals_counter{0}; - std::atomic_size_t sigint_signals_counter{0}; std::string config_path; DB::ConfigProcessor::LoadedConfig loaded_config; diff --git a/src/Daemon/CMakeLists.txt b/src/Daemon/CMakeLists.txt index e1a9f09003c..316b03dc535 100644 --- a/src/Daemon/CMakeLists.txt +++ b/src/Daemon/CMakeLists.txt @@ -11,7 +11,7 @@ if (OS_DARWIN AND NOT USE_STATIC_LIBRARIES) target_link_libraries (daemon PUBLIC -Wl,-undefined,dynamic_lookup) endif() -target_link_libraries (daemon PUBLIC loggers common PRIVATE clickhouse_common_io clickhouse_common_config) +target_link_libraries (daemon PUBLIC loggers common PRIVATE clickhouse_parsers clickhouse_common_io clickhouse_common_config) if (TARGET ch_contrib::sentry) target_link_libraries (daemon PRIVATE ch_contrib::sentry dbms) diff --git a/src/DataTypes/DataTypeAggregateFunction.cpp b/src/DataTypes/DataTypeAggregateFunction.cpp index 7056fcff42f..ab6d024f5d8 100644 --- a/src/DataTypes/DataTypeAggregateFunction.cpp +++ b/src/DataTypes/DataTypeAggregateFunction.cpp @@ -67,7 +67,7 @@ String DataTypeAggregateFunction::getNameImpl(bool with_version) const if (!parameters.empty()) { stream << '('; - for (size_t i = 0; i < parameters.size(); ++i) + for (size_t i = 0, size = parameters.size(); i < size; ++i) { if (i) stream << ", "; diff --git a/src/DataTypes/DataTypeAggregateFunction.h b/src/DataTypes/DataTypeAggregateFunction.h index 4a92e6c5703..2d712d9c686 100644 --- a/src/DataTypes/DataTypeAggregateFunction.h +++ b/src/DataTypes/DataTypeAggregateFunction.h @@ -30,9 +30,9 @@ private: public: static constexpr bool is_parametric = true; - DataTypeAggregateFunction(const AggregateFunctionPtr & function_, const DataTypes & argument_types_, + DataTypeAggregateFunction(AggregateFunctionPtr function_, const DataTypes & argument_types_, const Array & parameters_, std::optional version_ = std::nullopt) - : function(function_) + : function(std::move(function_)) , argument_types(argument_types_) , parameters(parameters_) , version(version_) @@ -51,7 +51,7 @@ public: bool canBeInsideNullable() const override { return false; } - DataTypePtr getReturnType() const { return function->getReturnType(); } + DataTypePtr getReturnType() const { return function->getResultType(); } DataTypePtr getReturnTypeToPredict() const { return function->getReturnTypeToPredict(); } DataTypes getArgumentsDataTypes() const { return argument_types; } diff --git a/src/DataTypes/DataTypeCustomSimpleAggregateFunction.cpp b/src/DataTypes/DataTypeCustomSimpleAggregateFunction.cpp index efcab212094..c12f9de5a95 100644 --- a/src/DataTypes/DataTypeCustomSimpleAggregateFunction.cpp +++ b/src/DataTypes/DataTypeCustomSimpleAggregateFunction.cpp @@ -131,9 +131,9 @@ static std::pair create(const ASTPtr & argum DataTypePtr storage_type = DataTypeFactory::instance().get(argument_types[0]->getName()); - if (!function->getReturnType()->equals(*removeLowCardinality(storage_type))) + if (!function->getResultType()->equals(*removeLowCardinality(storage_type))) { - throw Exception("Incompatible data types between aggregate function '" + function->getName() + "' which returns " + function->getReturnType()->getName() + " and column storage type " + storage_type->getName(), + throw Exception("Incompatible data types between aggregate function '" + function->getName() + "' which returns " + function->getResultType()->getName() + " and column storage type " + storage_type->getName(), ErrorCodes::BAD_ARGUMENTS); } diff --git a/src/DataTypes/DataTypeObject.h b/src/DataTypes/DataTypeObject.h index 2f6ad961512..04b39f94fd8 100644 --- a/src/DataTypes/DataTypeObject.h +++ b/src/DataTypes/DataTypeObject.h @@ -41,6 +41,8 @@ public: SerializationPtr doGetDefaultSerialization() const override; bool hasNullableSubcolumns() const { return is_nullable; } + + const String & getSchemaFormat() const { return schema_format; } }; } diff --git a/src/DataTypes/Serializations/SerializationBool.cpp b/src/DataTypes/Serializations/SerializationBool.cpp index 1efacaaecc5..2a63b24c837 100644 --- a/src/DataTypes/Serializations/SerializationBool.cpp +++ b/src/DataTypes/Serializations/SerializationBool.cpp @@ -259,7 +259,7 @@ void SerializationBool::deserializeTextCSV(IColumn & column, ReadBuffer & istr, if (istr.eof()) throw Exception("Expected boolean value but get EOF.", ErrorCodes::CANNOT_PARSE_BOOL); - deserializeImpl(column, istr, settings, [&](ReadBuffer & buf){ return buf.eof() || *buf.position() == settings.csv.delimiter || *buf.position() == '\n'; }); + deserializeImpl(column, istr, settings, [&](ReadBuffer & buf){ return buf.eof() || *buf.position() == settings.csv.delimiter || *buf.position() == '\n' || *buf.position() == '\r'; }); } void SerializationBool::serializeTextRaw(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const diff --git a/src/DataTypes/transformTypesRecursively.cpp b/src/DataTypes/transformTypesRecursively.cpp index 57128966565..fd97254c7ef 100644 --- a/src/DataTypes/transformTypesRecursively.cpp +++ b/src/DataTypes/transformTypesRecursively.cpp @@ -8,74 +8,108 @@ namespace DB { -void transformTypesRecursively(DataTypes & types, std::function transform_simple_types, std::function transform_complex_types) +TypeIndexesSet getTypesIndexes(const DataTypes & types) { + TypeIndexesSet type_indexes; + for (const auto & type : types) + type_indexes.insert(type->getTypeId()); + return type_indexes; +} + +void transformTypesRecursively(DataTypes & types, std::function transform_simple_types, std::function transform_complex_types) +{ + TypeIndexesSet type_indexes = getTypesIndexes(types); + + /// Nullable + if (type_indexes.contains(TypeIndex::Nullable)) { - /// Arrays - bool have_array = false; - bool all_arrays = true; + std::vector is_nullable; + is_nullable.reserve(types.size()); DataTypes nested_types; + nested_types.reserve(types.size()); for (const auto & type : types) { - if (const DataTypeArray * type_array = typeid_cast(type.get())) + if (const DataTypeNullable * type_nullable = typeid_cast(type.get())) { - have_array = true; - nested_types.push_back(type_array->getNestedType()); + is_nullable.push_back(1); + nested_types.push_back(type_nullable->getNestedType()); } else - all_arrays = false; - } - - if (have_array) - { - if (all_arrays) { - transformTypesRecursively(nested_types, transform_simple_types, transform_complex_types); - for (size_t i = 0; i != types.size(); ++i) - types[i] = std::make_shared(nested_types[i]); + is_nullable.push_back(0); + nested_types.push_back(type); } - - if (transform_complex_types) - transform_complex_types(types); - - return; } + + transformTypesRecursively(nested_types, transform_simple_types, transform_complex_types); + for (size_t i = 0; i != types.size(); ++i) + { + /// Type could be changed so it cannot be inside Nullable anymore. + if (is_nullable[i] && nested_types[i]->canBeInsideNullable()) + types[i] = makeNullable(nested_types[i]); + else + types[i] = nested_types[i]; + } + + if (transform_complex_types) + { + /// Some types could be changed. + type_indexes = getTypesIndexes(types); + transform_complex_types(types, type_indexes); + } + + return; } + /// Arrays + if (type_indexes.contains(TypeIndex::Array)) { - /// Tuples - bool have_tuple = false; - bool all_tuples = true; - size_t tuple_size = 0; - - std::vector nested_types; - - for (const auto & type : types) + /// All types are Array + if (type_indexes.size() == 1) { - if (const DataTypeTuple * type_tuple = typeid_cast(type.get())) - { - if (!have_tuple) - { - tuple_size = type_tuple->getElements().size(); - nested_types.resize(tuple_size); - for (size_t elem_idx = 0; elem_idx < tuple_size; ++elem_idx) - nested_types[elem_idx].reserve(types.size()); - } - else if (tuple_size != type_tuple->getElements().size()) - return; + DataTypes nested_types; + for (const auto & type : types) + nested_types.push_back(typeid_cast(type.get())->getNestedType()); - have_tuple = true; + transformTypesRecursively(nested_types, transform_simple_types, transform_complex_types); + for (size_t i = 0; i != types.size(); ++i) + types[i] = std::make_shared(nested_types[i]); + } + + if (transform_complex_types) + transform_complex_types(types, type_indexes); + + return; + } + + /// Tuples + if (type_indexes.contains(TypeIndex::Tuple)) + { + /// All types are Tuple + if (type_indexes.size() == 1) + { + std::vector nested_types; + const DataTypeTuple * type_tuple = typeid_cast(types[0].get()); + size_t tuple_size = type_tuple->getElements().size(); + nested_types.resize(tuple_size); + for (size_t elem_idx = 0; elem_idx < tuple_size; ++elem_idx) + nested_types[elem_idx].reserve(types.size()); + + bool sizes_are_equal = true; + for (const auto & type : types) + { + type_tuple = typeid_cast(type.get()); + if (type_tuple->getElements().size() != tuple_size) + { + sizes_are_equal = false; + break; + } for (size_t elem_idx = 0; elem_idx < tuple_size; ++elem_idx) nested_types[elem_idx].emplace_back(type_tuple->getElements()[elem_idx]); } - else - all_tuples = false; - } - if (have_tuple) - { - if (all_tuples) + if (sizes_are_equal) { std::vector transposed_nested_types(types.size()); for (size_t elem_idx = 0; elem_idx < tuple_size; ++elem_idx) @@ -88,97 +122,51 @@ void transformTypesRecursively(DataTypes & types, std::function(transposed_nested_types[i]); } - - if (transform_complex_types) - transform_complex_types(types); - - return; } + + if (transform_complex_types) + transform_complex_types(types, type_indexes); + + return; } + /// Maps + if (type_indexes.contains(TypeIndex::Map)) { - /// Maps - bool have_maps = false; - bool all_maps = true; - DataTypes key_types; - DataTypes value_types; - key_types.reserve(types.size()); - value_types.reserve(types.size()); - - for (const auto & type : types) + /// All types are Map + if (type_indexes.size() == 1) { - if (const DataTypeMap * type_map = typeid_cast(type.get())) + DataTypes key_types; + DataTypes value_types; + key_types.reserve(types.size()); + value_types.reserve(types.size()); + for (const auto & type : types) { - have_maps = true; + const DataTypeMap * type_map = typeid_cast(type.get()); key_types.emplace_back(type_map->getKeyType()); value_types.emplace_back(type_map->getValueType()); } - else - all_maps = false; - } - if (have_maps) - { - if (all_maps) - { - transformTypesRecursively(key_types, transform_simple_types, transform_complex_types); - transformTypesRecursively(value_types, transform_simple_types, transform_complex_types); + transformTypesRecursively(key_types, transform_simple_types, transform_complex_types); + transformTypesRecursively(value_types, transform_simple_types, transform_complex_types); - for (size_t i = 0; i != types.size(); ++i) - types[i] = std::make_shared(key_types[i], value_types[i]); - } - - if (transform_complex_types) - transform_complex_types(types); - - return; - } - } - - { - /// Nullable - bool have_nullable = false; - std::vector is_nullable; - is_nullable.reserve(types.size()); - DataTypes nested_types; - nested_types.reserve(types.size()); - for (const auto & type : types) - { - if (const DataTypeNullable * type_nullable = typeid_cast(type.get())) - { - have_nullable = true; - is_nullable.push_back(1); - nested_types.push_back(type_nullable->getNestedType()); - } - else - { - is_nullable.push_back(0); - nested_types.push_back(type); - } - } - - if (have_nullable) - { - transformTypesRecursively(nested_types, transform_simple_types, transform_complex_types); for (size_t i = 0; i != types.size(); ++i) - { - if (is_nullable[i]) - types[i] = makeNullable(nested_types[i]); - else - types[i] = nested_types[i]; - } - - return; + types[i] = std::make_shared(key_types[i], value_types[i]); } + + if (transform_complex_types) + transform_complex_types(types, type_indexes); + + return; } - transform_simple_types(types); + transform_simple_types(types, type_indexes); } void callOnNestedSimpleTypes(DataTypePtr & type, std::function callback) { DataTypes types = {type}; - transformTypesRecursively(types, [callback](auto & data_types){ callback(data_types[0]); }, {}); + transformTypesRecursively(types, [callback](auto & data_types, const TypeIndexesSet &){ callback(data_types[0]); }, {}); } } diff --git a/src/DataTypes/transformTypesRecursively.h b/src/DataTypes/transformTypesRecursively.h index 54e6f2102ad..2cf8664f920 100644 --- a/src/DataTypes/transformTypesRecursively.h +++ b/src/DataTypes/transformTypesRecursively.h @@ -12,7 +12,7 @@ namespace DB /// If not all types are the same complex type (Array/Map/Tuple), this function won't be called to nested types. /// Function transform_simple_types will be applied to resulting simple types after all recursive calls. /// Function transform_complex_types will be applied to complex types (Array/Map/Tuple) after recursive call to their nested types. -void transformTypesRecursively(DataTypes & types, std::function transform_simple_types, std::function transform_complex_types); +void transformTypesRecursively(DataTypes & types, std::function transform_simple_types, std::function transform_complex_types); void callOnNestedSimpleTypes(DataTypePtr & type, std::function callback); diff --git a/src/Databases/DatabaseOrdinary.cpp b/src/Databases/DatabaseOrdinary.cpp index 01c6e5c8d8c..87f91856c1b 100644 --- a/src/Databases/DatabaseOrdinary.cpp +++ b/src/Databases/DatabaseOrdinary.cpp @@ -256,6 +256,9 @@ void DatabaseOrdinary::startupTables(ThreadPool & thread_pool, LoadingStrictness auto startup_one_table = [&](const StoragePtr & table) { + /// Since startup() method can use physical paths on disk we don't allow any exclusive actions (rename, drop so on) + /// until startup finished. + auto table_lock_holder = table->lockForShare(RWLockImpl::NO_QUERY, getContext()->getSettingsRef().lock_acquire_timeout); table->startup(); logAboutProgress(log, ++tables_processed, total_tables, watch); }; diff --git a/src/Databases/DatabaseReplicated.cpp b/src/Databases/DatabaseReplicated.cpp index 2a9f06e77fc..a152f21ce7b 100644 --- a/src/Databases/DatabaseReplicated.cpp +++ b/src/Databases/DatabaseReplicated.cpp @@ -401,7 +401,7 @@ void DatabaseReplicated::createEmptyLogEntry(const ZooKeeperPtr & current_zookee bool DatabaseReplicated::waitForReplicaToProcessAllEntries(UInt64 timeout_ms) { - if (!ddl_worker) + if (!ddl_worker || is_probably_dropped) return false; return ddl_worker->waitForReplicaToProcessAllEntries(timeout_ms); } @@ -473,9 +473,10 @@ void DatabaseReplicated::startupTables(ThreadPool & thread_pool, LoadingStrictne chassert(!TSA_SUPPRESS_WARNING_FOR_READ(tables_metadata_digest)); TSA_SUPPRESS_WARNING_FOR_WRITE(tables_metadata_digest) = digest; - ddl_worker = std::make_unique(this, getContext()); if (is_probably_dropped) return; + + ddl_worker = std::make_unique(this, getContext()); ddl_worker->startup(); } @@ -491,7 +492,7 @@ bool DatabaseReplicated::checkDigestValid(const ContextPtr & local_context, bool LOG_TEST(log, "Current in-memory metadata digest: {}", tables_metadata_digest); /// Database is probably being dropped - if (!local_context->getZooKeeperMetadataTransaction() && !ddl_worker->isCurrentlyActive()) + if (!local_context->getZooKeeperMetadataTransaction() && (!ddl_worker || !ddl_worker->isCurrentlyActive())) return true; UInt64 local_digest = 0; @@ -584,7 +585,14 @@ void DatabaseReplicated::checkQueryValid(const ASTPtr & query, ContextPtr query_ bool enable_functional_tests_helper = getContext()->getConfigRef().has("_functional_tests_helper_database_replicated_replace_args_macros"); if (!enable_functional_tests_helper) - LOG_WARNING(log, "It's not recommended to explicitly specify zookeeper_path and replica_name in ReplicatedMergeTree arguments"); + { + if (query_context->getSettingsRef().database_replicated_allow_replicated_engine_arguments) + LOG_WARNING(log, "It's not recommended to explicitly specify zookeeper_path and replica_name in ReplicatedMergeTree arguments"); + else + throw Exception(ErrorCodes::INCORRECT_QUERY, + "It's not allowed to specify explicit zookeeper_path and replica_name for ReplicatedMergeTree arguments in Replicated database. " + "If you really want to specify them explicitly, enable setting database_replicated_allow_replicated_engine_arguments."); + } if (maybe_shard_macros && maybe_replica_macros) return; @@ -1012,8 +1020,51 @@ ASTPtr DatabaseReplicated::parseQueryFromMetadataInZooKeeper(const String & node return ast; } +void DatabaseReplicated::dropReplica( + DatabaseReplicated * database, const String & database_zookeeper_path, const String & full_replica_name) +{ + assert(!database || database_zookeeper_path == database->zookeeper_path); + + if (full_replica_name.find('/') != std::string::npos) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Invalid replica name: {}", full_replica_name); + + auto zookeeper = Context::getGlobalContextInstance()->getZooKeeper(); + + String database_mark = zookeeper->get(database_zookeeper_path); + if (database_mark != REPLICATED_DATABASE_MARK) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Path {} does not look like a path of Replicated database", database_zookeeper_path); + + String database_replica_path = fs::path(database_zookeeper_path) / "replicas" / full_replica_name; + if (!zookeeper->exists(database_replica_path)) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Replica {} does not exist (database path: {})", + full_replica_name, database_zookeeper_path); + + if (zookeeper->exists(database_replica_path + "/active")) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Replica {} is active, cannot drop it (database path: {})", + full_replica_name, database_zookeeper_path); + + zookeeper->set(database_replica_path, DROPPED_MARK, -1); + /// Notify other replicas that cluster configuration was changed (if we can) + if (database) + database->createEmptyLogEntry(zookeeper); + + zookeeper->tryRemoveRecursive(database_replica_path); + if (zookeeper->tryRemove(database_zookeeper_path + "/replicas") == Coordination::Error::ZOK) + { + /// It was the last replica, remove all metadata + zookeeper->tryRemoveRecursive(database_zookeeper_path); + } +} + void DatabaseReplicated::drop(ContextPtr context_) { + if (is_probably_dropped) + { + /// Don't need to drop anything from ZooKeeper + DatabaseAtomic::drop(context_); + return; + } + auto current_zookeeper = getZooKeeper(); current_zookeeper->set(replica_path, DROPPED_MARK, -1); createEmptyLogEntry(current_zookeeper); @@ -1031,8 +1082,6 @@ void DatabaseReplicated::drop(ContextPtr context_) void DatabaseReplicated::stopReplication() { - if (is_probably_dropped) - return; if (ddl_worker) ddl_worker->shutdown(); } @@ -1048,7 +1097,7 @@ void DatabaseReplicated::shutdown() void DatabaseReplicated::dropTable(ContextPtr local_context, const String & table_name, bool sync) { auto txn = local_context->getZooKeeperMetadataTransaction(); - assert(!ddl_worker->isCurrentlyActive() || txn || startsWith(table_name, ".inner_id.")); + assert(!ddl_worker || !ddl_worker->isCurrentlyActive() || txn || startsWith(table_name, ".inner_id.")); if (txn && txn->isInitialQuery() && !txn->isCreateOrReplaceQuery()) { String metadata_zk_path = zookeeper_path + "/metadata/" + escapeForFileName(table_name); diff --git a/src/Databases/DatabaseReplicated.h b/src/Databases/DatabaseReplicated.h index 0c9a3b77844..6a897f7322a 100644 --- a/src/Databases/DatabaseReplicated.h +++ b/src/Databases/DatabaseReplicated.h @@ -77,6 +77,8 @@ public: bool shouldReplicateQuery(const ContextPtr & query_context, const ASTPtr & query_ptr) const override; + static void dropReplica(DatabaseReplicated * database, const String & database_zookeeper_path, const String & full_replica_name); + friend struct DatabaseReplicatedTask; friend class DatabaseReplicatedDDLWorker; private: diff --git a/src/Databases/DatabaseReplicatedWorker.cpp b/src/Databases/DatabaseReplicatedWorker.cpp index 8c2983e1939..66ae5cd250c 100644 --- a/src/Databases/DatabaseReplicatedWorker.cpp +++ b/src/Databases/DatabaseReplicatedWorker.cpp @@ -2,6 +2,7 @@ #include #include #include +#include #include namespace fs = std::filesystem; @@ -36,6 +37,13 @@ bool DatabaseReplicatedDDLWorker::initializeMainThread() auto zookeeper = getAndSetZooKeeper(); if (database->is_readonly) database->tryConnectToZooKeeperAndInitDatabase(LoadingStrictnessLevel::ATTACH); + if (database->is_probably_dropped) + { + /// The flag was set in tryConnectToZooKeeperAndInitDatabase + LOG_WARNING(log, "Exiting main thread, because the database was probably dropped"); + /// NOTE It will not stop cleanup thread until DDLWorker::shutdown() call (cleanup thread will just do nothing) + break; + } initializeReplication(); initialized = true; return true; @@ -62,6 +70,16 @@ void DatabaseReplicatedDDLWorker::initializeReplication() /// Invariant: replica is lost if it's log_ptr value is less then max_log_ptr - logs_to_keep. auto zookeeper = getAndSetZooKeeper(); + + /// Create "active" node (remove previous one if necessary) + String active_path = fs::path(database->replica_path) / "active"; + String active_id = toString(ServerUUID::get()); + zookeeper->handleEphemeralNodeExistence(active_path, active_id); + zookeeper->create(active_path, active_id, zkutil::CreateMode::Ephemeral); + active_node_holder.reset(); + active_node_holder_zookeeper = zookeeper; + active_node_holder = zkutil::EphemeralNodeHolder::existing(active_path, *active_node_holder_zookeeper); + String log_ptr_str = zookeeper->get(database->replica_path + "/log_ptr"); UInt32 our_log_ptr = parse(log_ptr_str); UInt32 max_log_ptr = parse(zookeeper->get(database->zookeeper_path + "/max_log_ptr")); diff --git a/src/Databases/DatabaseReplicatedWorker.h b/src/Databases/DatabaseReplicatedWorker.h index 638b177460a..41edf2221b8 100644 --- a/src/Databases/DatabaseReplicatedWorker.h +++ b/src/Databases/DatabaseReplicatedWorker.h @@ -1,5 +1,6 @@ #pragma once #include +#include namespace DB { @@ -49,6 +50,12 @@ private: String current_task; std::atomic logs_to_keep = std::numeric_limits::max(); + + + /// EphemeralNodeHolder has reference to ZooKeeper, it may become dangling + ZooKeeperPtr active_node_holder_zookeeper; + /// It will remove "active" node when database is detached + zkutil::EphemeralNodeHolderPtr active_node_holder; }; } diff --git a/src/Dictionaries/MongoDBDictionarySource.cpp b/src/Dictionaries/MongoDBDictionarySource.cpp index 9c751d5ce97..c6b6a01d241 100644 --- a/src/Dictionaries/MongoDBDictionarySource.cpp +++ b/src/Dictionaries/MongoDBDictionarySource.cpp @@ -145,13 +145,9 @@ MongoDBDictionarySource::MongoDBDictionarySource( connection->connect(host, port); if (!user.empty()) { -#if POCO_VERSION >= 0x01070800 Poco::MongoDB::Database poco_db(db); if (!poco_db.authenticate(*connection, user, password, method.empty() ? Poco::MongoDB::Database::AUTH_SCRAM_SHA1 : method)) throw Exception(ErrorCodes::MONGODB_CANNOT_AUTHENTICATE, "Cannot authenticate in MongoDB, incorrect user or password"); -#else - authenticate(*connection, db, user, password); -#endif } } } diff --git a/src/Dictionaries/NullDictionarySource.cpp b/src/Dictionaries/NullDictionarySource.cpp new file mode 100644 index 00000000000..45dcc77f93d --- /dev/null +++ b/src/Dictionaries/NullDictionarySource.cpp @@ -0,0 +1,48 @@ +#include "NullDictionarySource.h" +#include +#include +#include +#include "DictionarySourceFactory.h" +#include "DictionarySourceHelpers.h" +#include "DictionaryStructure.h" +#include "registerDictionaries.h" + + +namespace DB +{ +NullDictionarySource::NullDictionarySource(Block & sample_block_) : sample_block(sample_block_) +{ +} + +NullDictionarySource::NullDictionarySource(const NullDictionarySource & other) : sample_block(other.sample_block) +{ +} + +QueryPipeline NullDictionarySource::loadAll() +{ + LOG_TRACE(&Poco::Logger::get("NullDictionarySource"), "loadAll {}", toString()); + return QueryPipeline(std::make_shared(sample_block)); +} + + +std::string NullDictionarySource::toString() const +{ + return "Null"; +} + + +void registerDictionarySourceNull(DictionarySourceFactory & factory) +{ + auto create_table_source + = [=](const DictionaryStructure & /* dict_struct */, + const Poco::Util::AbstractConfiguration & /* config */, + const std::string & /* config_prefix */, + Block & sample_block, + ContextPtr /* global_context */, + const std::string & /* default_database */, + bool /* created_from_ddl*/) -> DictionarySourcePtr { return std::make_unique(sample_block); }; + + factory.registerSource("null", create_table_source); +} + +} diff --git a/src/Dictionaries/NullDictionarySource.h b/src/Dictionaries/NullDictionarySource.h new file mode 100644 index 00000000000..7eb02055e3a --- /dev/null +++ b/src/Dictionaries/NullDictionarySource.h @@ -0,0 +1,53 @@ +#pragma once + +#include +#include "IDictionarySource.h" + +namespace DB +{ +namespace ErrorCodes +{ + extern const int NOT_IMPLEMENTED; +} + +/// Allows creating empty dictionary +class NullDictionarySource final : public IDictionarySource +{ +public: + NullDictionarySource(Block & sample_block_); + + NullDictionarySource(const NullDictionarySource & other); + + QueryPipeline loadAll() override; + + QueryPipeline loadUpdatedAll() override + { + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method loadUpdatedAll is unsupported for NullDictionarySource"); + } + + QueryPipeline loadIds(const std::vector & /*ids*/) override + { + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method loadIds is unsupported for NullDictionarySource"); + } + + QueryPipeline loadKeys(const Columns & /*key_columns*/, const std::vector & /*requested_rows*/) override + { + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method loadKeys is unsupported for NullDictionarySource"); + } + + bool isModified() const override { return false; } + + bool supportsSelectiveLoad() const override { return false; } + + ///Not supported for NullDictionarySource + bool hasUpdateField() const override { return false; } + + DictionarySourcePtr clone() const override { return std::make_shared(*this); } + + std::string toString() const override; + +private: + Block sample_block; +}; + +} diff --git a/src/Dictionaries/getDictionaryConfigurationFromAST.cpp b/src/Dictionaries/getDictionaryConfigurationFromAST.cpp index 8fc00fe9345..38b513bfecd 100644 --- a/src/Dictionaries/getDictionaryConfigurationFromAST.cpp +++ b/src/Dictionaries/getDictionaryConfigurationFromAST.cpp @@ -344,11 +344,14 @@ void buildPrimaryKeyConfiguration( auto identifier_name = key_names.front(); - auto it = std::find_if(children.begin(), children.end(), [&](const ASTPtr & node) - { - const ASTDictionaryAttributeDeclaration * dict_attr = node->as(); - return dict_attr->name == identifier_name; - }); + const auto * it = std::find_if( + children.begin(), + children.end(), + [&](const ASTPtr & node) + { + const ASTDictionaryAttributeDeclaration * dict_attr = node->as(); + return dict_attr->name == identifier_name; + }); if (it == children.end()) { diff --git a/src/Dictionaries/registerDictionaries.cpp b/src/Dictionaries/registerDictionaries.cpp index 4ade5d88bd4..f0526f4ce37 100644 --- a/src/Dictionaries/registerDictionaries.cpp +++ b/src/Dictionaries/registerDictionaries.cpp @@ -6,6 +6,7 @@ namespace DB class DictionarySourceFactory; +void registerDictionarySourceNull(DictionarySourceFactory & factory); void registerDictionarySourceFile(DictionarySourceFactory & source_factory); void registerDictionarySourceMysql(DictionarySourceFactory & source_factory); void registerDictionarySourceClickHouse(DictionarySourceFactory & source_factory); @@ -36,6 +37,7 @@ void registerDictionaries() { { auto & source_factory = DictionarySourceFactory::instance(); + registerDictionarySourceNull(source_factory); registerDictionarySourceFile(source_factory); registerDictionarySourceMysql(source_factory); registerDictionarySourceClickHouse(source_factory); diff --git a/src/Disks/DiskDecorator.cpp b/src/Disks/DiskDecorator.cpp deleted file mode 100644 index f9017446dda..00000000000 --- a/src/Disks/DiskDecorator.cpp +++ /dev/null @@ -1,259 +0,0 @@ -#include "DiskDecorator.h" -#include -#include - -namespace DB -{ - -DiskDecorator::DiskDecorator(const DiskPtr & delegate_) - : IDisk(/* name_= */ "") - , delegate(delegate_) -{ -} - -DiskTransactionPtr DiskDecorator::createTransaction() -{ - return delegate->createTransaction(); -} - -const String & DiskDecorator::getName() const -{ - return delegate->getName(); -} - -ReservationPtr DiskDecorator::reserve(UInt64 bytes) -{ - return delegate->reserve(bytes); -} - -const String & DiskDecorator::getPath() const -{ - return delegate->getPath(); -} - -UInt64 DiskDecorator::getTotalSpace() const -{ - return delegate->getTotalSpace(); -} - -UInt64 DiskDecorator::getAvailableSpace() const -{ - return delegate->getAvailableSpace(); -} - -UInt64 DiskDecorator::getUnreservedSpace() const -{ - return delegate->getUnreservedSpace(); -} - -UInt64 DiskDecorator::getKeepingFreeSpace() const -{ - return delegate->getKeepingFreeSpace(); -} - -bool DiskDecorator::exists(const String & path) const -{ - return delegate->exists(path); -} - -bool DiskDecorator::isFile(const String & path) const -{ - return delegate->isFile(path); -} - -bool DiskDecorator::isDirectory(const String & path) const -{ - return delegate->isDirectory(path); -} - -size_t DiskDecorator::getFileSize(const String & path) const -{ - return delegate->getFileSize(path); -} - -void DiskDecorator::createDirectory(const String & path) -{ - delegate->createDirectory(path); -} - -void DiskDecorator::createDirectories(const String & path) -{ - delegate->createDirectories(path); -} - -void DiskDecorator::clearDirectory(const String & path) -{ - delegate->clearDirectory(path); -} - -void DiskDecorator::moveDirectory(const String & from_path, const String & to_path) -{ - delegate->moveDirectory(from_path, to_path); -} - -DirectoryIteratorPtr DiskDecorator::iterateDirectory(const String & path) const -{ - return delegate->iterateDirectory(path); -} - -void DiskDecorator::createFile(const String & path) -{ - delegate->createFile(path); -} - -void DiskDecorator::moveFile(const String & from_path, const String & to_path) -{ - delegate->moveFile(from_path, to_path); -} - -void DiskDecorator::replaceFile(const String & from_path, const String & to_path) -{ - delegate->replaceFile(from_path, to_path); -} - -void DiskDecorator::copy(const String & from_path, const std::shared_ptr & to_disk, const String & to_path) -{ - delegate->copy(from_path, to_disk, to_path); -} - -void DiskDecorator::copyDirectoryContent(const String & from_dir, const std::shared_ptr & to_disk, const String & to_dir) -{ - delegate->copyDirectoryContent(from_dir, to_disk, to_dir); -} - -void DiskDecorator::listFiles(const String & path, std::vector & file_names) const -{ - delegate->listFiles(path, file_names); -} - -std::unique_ptr -DiskDecorator::readFile( - const String & path, const ReadSettings & settings, std::optional read_hint, std::optional file_size) const -{ - return delegate->readFile(path, settings, read_hint, file_size); -} - -std::unique_ptr -DiskDecorator::writeFile(const String & path, size_t buf_size, WriteMode mode, const WriteSettings & settings) -{ - return delegate->writeFile(path, buf_size, mode, settings); -} - -void DiskDecorator::removeFile(const String & path) -{ - delegate->removeFile(path); -} - -void DiskDecorator::removeFileIfExists(const String & path) -{ - delegate->removeFileIfExists(path); -} - -void DiskDecorator::removeDirectory(const String & path) -{ - delegate->removeDirectory(path); -} - -void DiskDecorator::removeRecursive(const String & path) -{ - delegate->removeRecursive(path); -} - -void DiskDecorator::removeSharedFile(const String & path, bool keep_s3) -{ - delegate->removeSharedFile(path, keep_s3); -} - -void DiskDecorator::removeSharedFileIfExists(const String & path, bool keep_s3) -{ - delegate->removeSharedFileIfExists(path, keep_s3); -} - -void DiskDecorator::removeSharedFiles(const RemoveBatchRequest & files, bool keep_all_batch_data, const NameSet & file_names_remove_metadata_only) -{ - delegate->removeSharedFiles(files, keep_all_batch_data, file_names_remove_metadata_only); -} - -void DiskDecorator::removeSharedRecursive(const String & path, bool keep_all_batch_data, const NameSet & file_names_remove_metadata_only) -{ - delegate->removeSharedRecursive(path, keep_all_batch_data, file_names_remove_metadata_only); -} - -void DiskDecorator::setLastModified(const String & path, const Poco::Timestamp & timestamp) -{ - delegate->setLastModified(path, timestamp); -} - -Poco::Timestamp DiskDecorator::getLastModified(const String & path) const -{ - return delegate->getLastModified(path); -} - -time_t DiskDecorator::getLastChanged(const String & path) const -{ - return delegate->getLastChanged(path); -} - -void DiskDecorator::setReadOnly(const String & path) -{ - delegate->setReadOnly(path); -} - -void DiskDecorator::createHardLink(const String & src_path, const String & dst_path) -{ - delegate->createHardLink(src_path, dst_path); -} - -void DiskDecorator::truncateFile(const String & path, size_t size) -{ - delegate->truncateFile(path, size); -} - -Executor & DiskDecorator::getExecutor() -{ - return delegate->getExecutor(); -} - -SyncGuardPtr DiskDecorator::getDirectorySyncGuard(const String & path) const -{ - return delegate->getDirectorySyncGuard(path); -} - -void DiskDecorator::onFreeze(const String & path) -{ - delegate->onFreeze(path); -} - -void DiskDecorator::shutdown() -{ - delegate->shutdown(); -} - -void DiskDecorator::startupImpl(ContextPtr context) -{ - delegate->startupImpl(context); -} - -void DiskDecorator::applyNewSettings(const Poco::Util::AbstractConfiguration & config, ContextPtr context, const String & config_prefix, const DisksMap & map) -{ - delegate->applyNewSettings(config, context, config_prefix, map); -} - -DiskObjectStoragePtr DiskDecorator::createDiskObjectStorage() -{ - return delegate->createDiskObjectStorage(); -} - -ObjectStoragePtr DiskDecorator::getObjectStorage() -{ - return delegate->getObjectStorage(); -} - -DiskPtr DiskDecorator::getNestedDisk() const -{ - if (const auto * decorator = dynamic_cast(delegate.get())) - return decorator->getNestedDisk(); - return delegate; -} - -} diff --git a/src/Disks/DiskDecorator.h b/src/Disks/DiskDecorator.h deleted file mode 100644 index f7eface8c66..00000000000 --- a/src/Disks/DiskDecorator.h +++ /dev/null @@ -1,139 +0,0 @@ -#pragma once - -#include "Disks/IDisk.h" - -namespace DB -{ - -/** Forwards all methods to another disk. - * Methods can be overridden by descendants. - */ -class DiskDecorator : public IDisk -{ -public: - explicit DiskDecorator(const DiskPtr & delegate_); - - DiskTransactionPtr createTransaction() override; - const String & getName() const override; - ReservationPtr reserve(UInt64 bytes) override; - ~DiskDecorator() override = default; - const String & getPath() const override; - UInt64 getTotalSpace() const override; - UInt64 getAvailableSpace() const override; - UInt64 getUnreservedSpace() const override; - UInt64 getKeepingFreeSpace() const override; - bool exists(const String & path) const override; - bool isFile(const String & path) const override; - bool isDirectory(const String & path) const override; - size_t getFileSize(const String & path) const override; - void createDirectory(const String & path) override; - void createDirectories(const String & path) override; - void clearDirectory(const String & path) override; - void moveDirectory(const String & from_path, const String & to_path) override; - DirectoryIteratorPtr iterateDirectory(const String & path) const override; - void createFile(const String & path) override; - void moveFile(const String & from_path, const String & to_path) override; - void replaceFile(const String & from_path, const String & to_path) override; - void copy(const String & from_path, const std::shared_ptr & to_disk, const String & to_path) override; - void copyDirectoryContent(const String & from_dir, const std::shared_ptr & to_disk, const String & to_dir) override; - void listFiles(const String & path, std::vector & file_names) const override; - - std::unique_ptr readFile( - const String & path, - const ReadSettings & settings, - std::optional read_hint, - std::optional file_size) const override; - - std::unique_ptr writeFile( - const String & path, - size_t buf_size, - WriteMode mode, - const WriteSettings & settings) override; - - void removeFile(const String & path) override; - void removeFileIfExists(const String & path) override; - void removeSharedFileIfExists(const String & path, bool keep_s3) override; - - void removeDirectory(const String & path) override; - void removeRecursive(const String & path) override; - - void removeSharedFile(const String & path, bool keep_s3) override; - void removeSharedRecursive(const String & path, bool keep_all_batch_data, const NameSet & file_names_remove_metadata_only) override; - void removeSharedFiles(const RemoveBatchRequest & files, bool keep_all_batch_data, const NameSet & file_names_remove_metadata_only) override; - - void setLastModified(const String & path, const Poco::Timestamp & timestamp) override; - time_t getLastChanged(const String & path) const override; - Poco::Timestamp getLastModified(const String & path) const override; - void setReadOnly(const String & path) override; - void createHardLink(const String & src_path, const String & dst_path) override; - void truncateFile(const String & path, size_t size) override; - int open(const String & path, mode_t mode) const; - void close(int fd) const; - void sync(int fd) const; - String getUniqueId(const String & path) const override { return delegate->getUniqueId(path); } - bool checkUniqueId(const String & id) const override { return delegate->checkUniqueId(id); } - DataSourceDescription getDataSourceDescription() const override { return delegate->getDataSourceDescription(); } - bool isRemote() const override { return delegate->isRemote(); } - bool isReadOnly() const override { return delegate->isReadOnly(); } - bool isWriteOnce() const override { return delegate->isWriteOnce(); } - bool supportZeroCopyReplication() const override { return delegate->supportZeroCopyReplication(); } - bool supportParallelWrite() const override { return delegate->supportParallelWrite(); } - void onFreeze(const String & path) override; - SyncGuardPtr getDirectorySyncGuard(const String & path) const override; - void shutdown() override; - void startupImpl(ContextPtr context) override; - void applyNewSettings(const Poco::Util::AbstractConfiguration & config, ContextPtr context, const String & config_prefix, const DisksMap & map) override; - - bool supportsCache() const override { return delegate->supportsCache(); } - const String & getCacheBasePath() const override { return delegate->getCacheBasePath(); } - - StoredObjects getStorageObjects(const String & path) const override { return delegate->getStorageObjects(path); } - void getRemotePathsRecursive(const String & path, std::vector & paths_map) override { return delegate->getRemotePathsRecursive(path, paths_map); } - - DiskObjectStoragePtr createDiskObjectStorage() override; - ObjectStoragePtr getObjectStorage() override; - NameSet getCacheLayersNames() const override { return delegate->getCacheLayersNames(); } - - MetadataStoragePtr getMetadataStorage() override { return delegate->getMetadataStorage(); } - - std::unordered_map getSerializedMetadata(const std::vector & file_paths) const override { return delegate->getSerializedMetadata(file_paths); } - - UInt32 getRefCount(const String & path) const override { return delegate->getRefCount(path); } - - void syncRevision(UInt64 revision) override { delegate->syncRevision(revision); } - - UInt64 getRevision() const override { return delegate->getRevision(); } - - bool supportsStat() const override { return delegate->supportsStat(); } - struct stat stat(const String & path) const override { return delegate->stat(path); } - - bool supportsChmod() const override { return delegate->supportsChmod(); } - void chmod(const String & path, mode_t mode) override { delegate->chmod(path, mode); } - - virtual DiskPtr getNestedDisk() const; - -protected: - Executor & getExecutor() override; - - DiskPtr delegate; -}; - -/// TODO: Current reservation mechanism leaks IDisk abstraction details. -/// This hack is needed to return proper disk pointer (wrapper instead of implementation) from reservation object. -class ReservationDelegate : public IReservation -{ -public: - ReservationDelegate(ReservationPtr delegate_, DiskPtr wrapper_) : delegate(std::move(delegate_)), wrapper(wrapper_) { } - UInt64 getSize() const override { return delegate->getSize(); } - UInt64 getUnreservedSpace() const override { return delegate->getUnreservedSpace(); } - DiskPtr getDisk(size_t) const override { return wrapper; } - Disks getDisks() const override { return {wrapper}; } - void update(UInt64 new_size) override { delegate->update(new_size); } - -private: - ReservationPtr delegate; - DiskPtr wrapper; -}; - - -} diff --git a/src/Disks/DiskEncrypted.cpp b/src/Disks/DiskEncrypted.cpp index 79905283ddb..7c4bee6d861 100644 --- a/src/Disks/DiskEncrypted.cpp +++ b/src/Disks/DiskEncrypted.cpp @@ -209,7 +209,8 @@ DiskEncrypted::DiskEncrypted( } DiskEncrypted::DiskEncrypted(const String & name_, std::unique_ptr settings_) - : DiskDecorator(settings_->wrapped_disk) + : IDisk(name_) + , delegate(settings_->wrapped_disk) , encrypted_name(name_) , disk_path(settings_->disk_path) , disk_absolute_path(settings_->wrapped_disk->getPath() + settings_->disk_path) diff --git a/src/Disks/DiskEncrypted.h b/src/Disks/DiskEncrypted.h index 74da7cfa2c0..d38c916ee6e 100644 --- a/src/Disks/DiskEncrypted.h +++ b/src/Disks/DiskEncrypted.h @@ -4,7 +4,6 @@ #if USE_SSL #include -#include #include #include @@ -27,7 +26,7 @@ struct DiskEncryptedSettings /// Encrypted disk ciphers all written files on the fly and writes the encrypted files to an underlying (normal) disk. /// And when we read files from an encrypted disk it deciphers them automatically, /// so we can work with a encrypted disk like it's a normal disk. -class DiskEncrypted : public DiskDecorator +class DiskEncrypted : public IDisk { public: DiskEncrypted(const String & name_, const Poco::Util::AbstractConfiguration & config_, const String & config_prefix_, const DisksMap & map_); @@ -252,6 +251,32 @@ public: return std::make_shared(*this); } + UInt64 getTotalSpace() const override + { + return delegate->getTotalSpace(); + } + + UInt64 getAvailableSpace() const override + { + return delegate->getAvailableSpace(); + } + + UInt64 getUnreservedSpace() const override + { + return delegate->getUnreservedSpace(); + } + + bool supportZeroCopyReplication() const override + { + return delegate->supportZeroCopyReplication(); + } + + MetadataStoragePtr getMetadataStorage() override + { + return delegate->getMetadataStorage(); + } + + private: String wrappedPath(const String & path) const { @@ -261,6 +286,7 @@ private: return disk_path + path; } + DiskPtr delegate; const String encrypted_name; const String disk_path; const String disk_absolute_path; diff --git a/src/Disks/DiskLocal.cpp b/src/Disks/DiskLocal.cpp index 63ba1a45e34..d2cd30c1cfa 100644 --- a/src/Disks/DiskLocal.cpp +++ b/src/Disks/DiskLocal.cpp @@ -18,7 +18,6 @@ #include #include -#include #include #include #include @@ -775,7 +774,7 @@ void registerDiskLocal(DiskFactory & factory, bool global_skip_access_check) std::shared_ptr disk = std::make_shared(name, path, keep_free_space_bytes, context, config.getUInt("local_disk_check_period_ms", 0)); disk->startup(context, skip_access_check); - return std::make_shared(disk); + return disk; }; factory.registerDiskType("local", creator); } diff --git a/src/Disks/DiskRestartProxy.cpp b/src/Disks/DiskRestartProxy.cpp deleted file mode 100644 index 0b79ee51db9..00000000000 --- a/src/Disks/DiskRestartProxy.cpp +++ /dev/null @@ -1,378 +0,0 @@ -#include "DiskRestartProxy.h" - -#include -#include - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int DEADLOCK_AVOIDED; -} - -using Millis = std::chrono::milliseconds; -using Seconds = std::chrono::seconds; - -/// Holds restart read lock till buffer destruction. -class RestartAwareReadBuffer : public ReadBufferFromFileDecorator -{ -public: - RestartAwareReadBuffer(const DiskRestartProxy & disk, std::unique_ptr impl_) - : ReadBufferFromFileDecorator(std::move(impl_)), lock(disk.mutex) { } - - void prefetch() override - { - swap(*impl); - impl->prefetch(); - swap(*impl); - } - - void setReadUntilPosition(size_t position) override - { - swap(*impl); - impl->setReadUntilPosition(position); - swap(*impl); - } - - void setReadUntilEnd() override - { - swap(*impl); - impl->setReadUntilEnd(); - swap(*impl); - } - - String getInfoForLog() override { return impl->getInfoForLog(); } - -private: - ReadLock lock; -}; - -/// Holds restart read lock till buffer finalize. -class RestartAwareWriteBuffer : public WriteBufferFromFileDecorator -{ -public: - RestartAwareWriteBuffer(const DiskRestartProxy & disk, std::unique_ptr impl_) - : WriteBufferFromFileDecorator(std::move(impl_)), lock(disk.mutex) { } - - ~RestartAwareWriteBuffer() override - { - try - { - finalize(); - } - catch (...) - { - tryLogCurrentException(__PRETTY_FUNCTION__); - } - } - - void finalizeImpl() override - { - WriteBufferFromFileDecorator::finalizeImpl(); - - lock.unlock(); - } - -private: - ReadLock lock; -}; - -DiskRestartProxy::DiskRestartProxy(DiskPtr & delegate_) - : DiskDecorator(delegate_) -{} - -ReservationPtr DiskRestartProxy::reserve(UInt64 bytes) -{ - ReadLock lock (mutex); - auto ptr = DiskDecorator::reserve(bytes); - if (ptr) - { - auto disk_ptr = std::static_pointer_cast(shared_from_this()); - return std::make_unique(std::move(ptr), disk_ptr); - } - return ptr; -} - -const String & DiskRestartProxy::getPath() const -{ - ReadLock lock (mutex); - return DiskDecorator::getPath(); -} - -UInt64 DiskRestartProxy::getTotalSpace() const -{ - ReadLock lock (mutex); - return DiskDecorator::getTotalSpace(); -} - -UInt64 DiskRestartProxy::getAvailableSpace() const -{ - ReadLock lock (mutex); - return DiskDecorator::getAvailableSpace(); -} - -UInt64 DiskRestartProxy::getUnreservedSpace() const -{ - ReadLock lock (mutex); - return DiskDecorator::getUnreservedSpace(); -} - -UInt64 DiskRestartProxy::getKeepingFreeSpace() const -{ - ReadLock lock (mutex); - return DiskDecorator::getKeepingFreeSpace(); -} - -bool DiskRestartProxy::exists(const String & path) const -{ - ReadLock lock (mutex); - return DiskDecorator::exists(path); -} - -bool DiskRestartProxy::isFile(const String & path) const -{ - ReadLock lock (mutex); - return DiskDecorator::isFile(path); -} - -bool DiskRestartProxy::isDirectory(const String & path) const -{ - ReadLock lock (mutex); - return DiskDecorator::isDirectory(path); -} - -size_t DiskRestartProxy::getFileSize(const String & path) const -{ - ReadLock lock (mutex); - return DiskDecorator::getFileSize(path); -} - -void DiskRestartProxy::createDirectory(const String & path) -{ - ReadLock lock (mutex); - DiskDecorator::createDirectory(path); -} - -void DiskRestartProxy::createDirectories(const String & path) -{ - ReadLock lock (mutex); - DiskDecorator::createDirectories(path); -} - -void DiskRestartProxy::clearDirectory(const String & path) -{ - ReadLock lock (mutex); - DiskDecorator::clearDirectory(path); -} - -void DiskRestartProxy::moveDirectory(const String & from_path, const String & to_path) -{ - ReadLock lock (mutex); - DiskDecorator::moveDirectory(from_path, to_path); -} - -DirectoryIteratorPtr DiskRestartProxy::iterateDirectory(const String & path) const -{ - ReadLock lock (mutex); - return DiskDecorator::iterateDirectory(path); -} - -void DiskRestartProxy::createFile(const String & path) -{ - ReadLock lock (mutex); - DiskDecorator::createFile(path); -} - -void DiskRestartProxy::moveFile(const String & from_path, const String & to_path) -{ - ReadLock lock (mutex); - DiskDecorator::moveFile(from_path, to_path); -} - -void DiskRestartProxy::replaceFile(const String & from_path, const String & to_path) -{ - ReadLock lock (mutex); - DiskDecorator::replaceFile(from_path, to_path); -} - -void DiskRestartProxy::copy(const String & from_path, const std::shared_ptr & to_disk, const String & to_path) -{ - ReadLock lock (mutex); - DiskDecorator::copy(from_path, to_disk, to_path); -} - -void DiskRestartProxy::copyDirectoryContent(const String & from_dir, const std::shared_ptr & to_disk, const String & to_dir) -{ - ReadLock lock (mutex); - DiskDecorator::copyDirectoryContent(from_dir, to_disk, to_dir); -} - -void DiskRestartProxy::listFiles(const String & path, std::vector & file_names) const -{ - ReadLock lock (mutex); - DiskDecorator::listFiles(path, file_names); -} - -std::unique_ptr DiskRestartProxy::readFile( - const String & path, const ReadSettings & settings, std::optional read_hint, std::optional file_size) const -{ - ReadLock lock (mutex); - auto impl = DiskDecorator::readFile(path, settings, read_hint, file_size); - return std::make_unique(*this, std::move(impl)); -} - -std::unique_ptr DiskRestartProxy::writeFile(const String & path, size_t buf_size, WriteMode mode, const WriteSettings & settings) -{ - ReadLock lock (mutex); - auto impl = DiskDecorator::writeFile(path, buf_size, mode, settings); - return std::make_unique(*this, std::move(impl)); -} - -void DiskRestartProxy::removeFile(const String & path) -{ - ReadLock lock (mutex); - DiskDecorator::removeFile(path); -} - -void DiskRestartProxy::removeFileIfExists(const String & path) -{ - ReadLock lock (mutex); - DiskDecorator::removeFileIfExists(path); -} - -void DiskRestartProxy::removeDirectory(const String & path) -{ - ReadLock lock (mutex); - DiskDecorator::removeDirectory(path); -} - -void DiskRestartProxy::removeRecursive(const String & path) -{ - ReadLock lock (mutex); - DiskDecorator::removeRecursive(path); -} - -void DiskRestartProxy::removeSharedFile(const String & path, bool keep_s3) -{ - ReadLock lock (mutex); - DiskDecorator::removeSharedFile(path, keep_s3); -} - -void DiskRestartProxy::removeSharedFiles(const RemoveBatchRequest & files, bool keep_all_batch_data, const NameSet & file_names_remove_metadata_only) -{ - ReadLock lock (mutex); - DiskDecorator::removeSharedFiles(files, keep_all_batch_data, file_names_remove_metadata_only); -} - -void DiskRestartProxy::removeSharedRecursive(const String & path, bool keep_all_batch_data, const NameSet & file_names_remove_metadata_only) -{ - ReadLock lock (mutex); - DiskDecorator::removeSharedRecursive(path, keep_all_batch_data, file_names_remove_metadata_only); -} - -void DiskRestartProxy::setLastModified(const String & path, const Poco::Timestamp & timestamp) -{ - ReadLock lock (mutex); - DiskDecorator::setLastModified(path, timestamp); -} - -Poco::Timestamp DiskRestartProxy::getLastModified(const String & path) const -{ - ReadLock lock (mutex); - return DiskDecorator::getLastModified(path); -} - -void DiskRestartProxy::setReadOnly(const String & path) -{ - ReadLock lock (mutex); - DiskDecorator::setReadOnly(path); -} - -void DiskRestartProxy::createHardLink(const String & src_path, const String & dst_path) -{ - ReadLock lock (mutex); - DiskDecorator::createHardLink(src_path, dst_path); -} - -void DiskRestartProxy::truncateFile(const String & path, size_t size) -{ - ReadLock lock (mutex); - DiskDecorator::truncateFile(path, size); -} - -String DiskRestartProxy::getUniqueId(const String & path) const -{ - ReadLock lock (mutex); - return DiskDecorator::getUniqueId(path); -} - -bool DiskRestartProxy::checkUniqueId(const String & id) const -{ - ReadLock lock (mutex); - return DiskDecorator::checkUniqueId(id); -} - -const String & DiskRestartProxy::getCacheBasePath() const -{ - ReadLock lock (mutex); - return DiskDecorator::getCacheBasePath(); -} - -StoredObjects DiskRestartProxy::getStorageObjects(const String & path) const -{ - ReadLock lock (mutex); - return DiskDecorator::getStorageObjects(path); -} - -void DiskRestartProxy::getRemotePathsRecursive( - const String & path, std::vector & paths_map) -{ - ReadLock lock (mutex); - return DiskDecorator::getRemotePathsRecursive(path, paths_map); -} - -DiskPtr DiskRestartProxy::getNestedDisk() const -{ - DiskPtr delegate_copy; - - { - ReadLock lock (mutex); - delegate_copy = delegate; - } - - if (const auto * decorator = dynamic_cast(delegate_copy.get())) - return decorator->getNestedDisk(); - return delegate_copy; -} - -void DiskRestartProxy::restart(ContextPtr context) -{ - /// Speed up processing unhealthy requests. - DiskDecorator::shutdown(); - - WriteLock lock (mutex, std::defer_lock); - - LOG_INFO(log, "Acquiring lock to restart disk {}", DiskDecorator::getName()); - - auto start_time = std::chrono::steady_clock::now(); - auto lock_timeout = Seconds(120); - do - { - /// Use a small timeout to not block read operations for a long time. - if (lock.try_lock_for(Millis(10))) - break; - } while (std::chrono::steady_clock::now() - start_time < lock_timeout); - - if (!lock.owns_lock()) - throw Exception("Failed to acquire restart lock within timeout. Client should retry.", ErrorCodes::DEADLOCK_AVOIDED); - - LOG_INFO(log, "Restart lock acquired. Restarting disk {}", DiskDecorator::getName()); - - /// NOTE: access checking will cause deadlock here, so skip it. - DiskDecorator::startup(context, /* skip_access_check= */ true); - - LOG_INFO(log, "Disk restarted {}", DiskDecorator::getName()); -} - -} diff --git a/src/Disks/DiskRestartProxy.h b/src/Disks/DiskRestartProxy.h deleted file mode 100644 index fb4dde3bfa3..00000000000 --- a/src/Disks/DiskRestartProxy.h +++ /dev/null @@ -1,86 +0,0 @@ -#pragma once - -#include "DiskDecorator.h" - -#include -#include - -namespace DB -{ -using ReadLock = std::shared_lock; -using WriteLock = std::unique_lock; - -class RestartAwareReadBuffer; -class RestartAwareWriteBuffer; - -/** - * Gives possibility to change underlying disk settings at runtime calling 'restart' method. - * All disk methods are protected by read-lock. Read/Write buffers produced by disk holds read-lock till buffer is finalized/destructed. - * When 'restart' method is called write-lock is acquired to make sure that no operations are running on that disk. - */ -class DiskRestartProxy : public DiskDecorator -{ -public: - explicit DiskRestartProxy(DiskPtr & delegate_); - - ReservationPtr reserve(UInt64 bytes) override; - const String & getPath() const override; - UInt64 getTotalSpace() const override; - UInt64 getAvailableSpace() const override; - UInt64 getUnreservedSpace() const override; - UInt64 getKeepingFreeSpace() const override; - bool exists(const String & path) const override; - bool isFile(const String & path) const override; - bool isDirectory(const String & path) const override; - size_t getFileSize(const String & path) const override; - void createDirectory(const String & path) override; - void createDirectories(const String & path) override; - void clearDirectory(const String & path) override; - void moveDirectory(const String & from_path, const String & to_path) override; - DirectoryIteratorPtr iterateDirectory(const String & path) const override; - void createFile(const String & path) override; - void moveFile(const String & from_path, const String & to_path) override; - void replaceFile(const String & from_path, const String & to_path) override; - void copy(const String & from_path, const DiskPtr & to_disk, const String & to_path) override; - void copyDirectoryContent(const String & from_dir, const std::shared_ptr & to_disk, const String & to_dir) override; - void listFiles(const String & path, std::vector & file_names) const override; - std::unique_ptr readFile( - const String & path, - const ReadSettings & settings, - std::optional read_hint, - std::optional file_size) const override; - std::unique_ptr writeFile(const String & path, size_t buf_size, WriteMode mode, const WriteSettings & settings) override; - void removeFile(const String & path) override; - void removeFileIfExists(const String & path) override; - void removeDirectory(const String & path) override; - void removeRecursive(const String & path) override; - void removeSharedFile(const String & path, bool keep_s3) override; - void removeSharedFiles(const RemoveBatchRequest & files, bool keep_all_batch_data, const NameSet & file_names_remove_metadata_only) override; - void removeSharedRecursive(const String & path, bool keep_all_batch_data, const NameSet & file_names_remove_metadata_only) override; - void setLastModified(const String & path, const Poco::Timestamp & timestamp) override; - Poco::Timestamp getLastModified(const String & path) const override; - void setReadOnly(const String & path) override; - void createHardLink(const String & src_path, const String & dst_path) override; - void truncateFile(const String & path, size_t size) override; - String getUniqueId(const String & path) const override; - bool checkUniqueId(const String & id) const override; - - const String & getCacheBasePath() const override; - StoredObjects getStorageObjects(const String & path) const override; - void getRemotePathsRecursive(const String & path, std::vector & paths_map) override; - - void restart(ContextPtr context); - - DiskPtr getNestedDisk() const override; - -private: - friend class RestartAwareReadBuffer; - friend class RestartAwareWriteBuffer; - - /// Mutex to protect RW access. - mutable std::shared_timed_mutex mutex; - - Poco::Logger * log = &Poco::Logger::get("DiskRestartProxy"); -}; - -} diff --git a/src/Disks/FakeDiskTransaction.h b/src/Disks/FakeDiskTransaction.h index e80b45a94ec..46be885739e 100644 --- a/src/Disks/FakeDiskTransaction.h +++ b/src/Disks/FakeDiskTransaction.h @@ -16,6 +16,7 @@ public: {} void commit() override {} + void undo() override {} void createDirectory(const std::string & path) override { diff --git a/src/Disks/IDisk.h b/src/Disks/IDisk.h index c567566a5b3..e85a18c8729 100644 --- a/src/Disks/IDisk.h +++ b/src/Disks/IDisk.h @@ -256,15 +256,15 @@ public: /// For one local path there might be multiple remote paths in case of Log family engines. struct LocalPathWithObjectStoragePaths - { - std::string local_path; - std::string common_prefix_for_objects; - StoredObjects objects; + { + std::string local_path; + std::string common_prefix_for_objects; + StoredObjects objects; - LocalPathWithObjectStoragePaths( - const std::string & local_path_, const std::string & common_prefix_for_objects_, StoredObjects && objects_) - : local_path(local_path_), common_prefix_for_objects(common_prefix_for_objects_), objects(std::move(objects_)) {} - }; + LocalPathWithObjectStoragePaths( + const std::string & local_path_, const std::string & common_prefix_for_objects_, StoredObjects && objects_) + : local_path(local_path_), common_prefix_for_objects(common_prefix_for_objects_), objects(std::move(objects_)) {} + }; virtual void getRemotePathsRecursive(const String &, std::vector &) { diff --git a/src/Disks/IDiskTransaction.h b/src/Disks/IDiskTransaction.h index 572d86dcfdb..02c8731428d 100644 --- a/src/Disks/IDiskTransaction.h +++ b/src/Disks/IDiskTransaction.h @@ -30,6 +30,8 @@ public: /// If something fails rollback and throw exception. virtual void commit() = 0; + virtual void undo() = 0; + virtual ~IDiskTransaction() = default; /// Create directory. diff --git a/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp b/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp index 5b5d746ab55..0dd40e7f153 100644 --- a/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp +++ b/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp @@ -118,10 +118,7 @@ void CachedOnDiskReadBufferFromFile::initialize(size_t offset, size_t size) } else { - CreateFileSegmentSettings create_settings{ - .is_persistent = is_persistent - }; - + CreateFileSegmentSettings create_settings(is_persistent ? FileSegmentKind::Persistent : FileSegmentKind::Regular); file_segments_holder.emplace(cache->getOrSet(cache_key, offset, size, create_settings)); } @@ -951,7 +948,7 @@ bool CachedOnDiskReadBufferFromFile::nextImplStep() } else { - LOG_TRACE(log, "No space left in cache, will continue without cache download"); + LOG_TRACE(log, "No space left in cache to reserve {} bytes, will continue without cache download", size); file_segment->completeWithState(FileSegment::State::PARTIALLY_DOWNLOADED_NO_CONTINUATION); } diff --git a/src/Disks/IO/CachedOnDiskWriteBufferFromFile.cpp b/src/Disks/IO/CachedOnDiskWriteBufferFromFile.cpp index 994bb743c5f..a51fe079a0f 100644 --- a/src/Disks/IO/CachedOnDiskWriteBufferFromFile.cpp +++ b/src/Disks/IO/CachedOnDiskWriteBufferFromFile.cpp @@ -44,81 +44,80 @@ FileSegmentRangeWriter::FileSegmentRangeWriter( const String & source_path_) : cache(cache_) , key(key_) + , log(&Poco::Logger::get("FileSegmentRangeWriter")) , cache_log(cache_log_) , query_id(query_id_) , source_path(source_path_) - , current_file_segment_it(file_segments_holder.file_segments.end()) { } -bool FileSegmentRangeWriter::write(const char * data, size_t size, size_t offset, bool is_persistent) +bool FileSegmentRangeWriter::write(const char * data, size_t size, size_t offset, FileSegmentKind segment_kind) { if (finalized) return false; + if (expected_write_offset != offset) + { + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "Cannot write file segment at offset {}, because expected write offset is: {}", + offset, expected_write_offset); + } + auto & file_segments = file_segments_holder.file_segments; - if (current_file_segment_it == file_segments.end()) + if (file_segments.empty() || file_segments.back()->isDownloaded()) { - current_file_segment_it = allocateFileSegment(current_file_segment_write_offset, is_persistent); - } - else - { - auto file_segment = *current_file_segment_it; - assert(file_segment->getCurrentWriteOffset() == current_file_segment_write_offset); - - if (current_file_segment_write_offset != offset) - { - throw Exception( - ErrorCodes::LOGICAL_ERROR, - "Cannot write file segment at offset {}, because current write offset is: {}", - offset, current_file_segment_write_offset); - } - - if (file_segment->range().size() == file_segment->getDownloadedSize()) - { - completeFileSegment(*file_segment); - current_file_segment_it = allocateFileSegment(current_file_segment_write_offset, is_persistent); - } + allocateFileSegment(expected_write_offset, segment_kind); } - auto & file_segment = *current_file_segment_it; - - auto downloader = file_segment->getOrSetDownloader(); - if (downloader != FileSegment::getCallerId()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Failed to set a downloader. ({})", file_segment->getInfoForLog()); + auto & file_segment = file_segments.back(); SCOPE_EXIT({ - if (file_segment->isDownloader()) - file_segment->completePartAndResetDownloader(); + if (file_segments.back()->isDownloader()) + file_segments.back()->completePartAndResetDownloader(); }); - bool reserved = file_segment->reserve(size); - if (!reserved) + while (size > 0) { - file_segment->completeWithState(FileSegment::State::PARTIALLY_DOWNLOADED_NO_CONTINUATION); - appendFilesystemCacheLog(*file_segment); + size_t available_size = file_segment->range().size() - file_segment->getDownloadedSize(); + if (available_size == 0) + { + completeFileSegment(*file_segment); + file_segment = allocateFileSegment(expected_write_offset, segment_kind); + continue; + } - LOG_DEBUG( - &Poco::Logger::get("FileSegmentRangeWriter"), - "Unsuccessful space reservation attempt (size: {}, file segment info: {}", - size, file_segment->getInfoForLog()); + if (!file_segment->isDownloader() + && file_segment->getOrSetDownloader() != FileSegment::getCallerId()) + { + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Failed to set a downloader. ({})", file_segment->getInfoForLog()); + } - return false; - } + size_t size_to_write = std::min(available_size, size); - try - { - file_segment->write(data, size, offset); - } - catch (...) - { + bool reserved = file_segment->reserve(size_to_write); + if (!reserved) + { + file_segment->completeWithState(FileSegment::State::PARTIALLY_DOWNLOADED_NO_CONTINUATION); + appendFilesystemCacheLog(*file_segment); + + LOG_DEBUG( + log, "Failed to reserve space in cache (size: {}, file segment info: {}", + size, file_segment->getInfoForLog()); + + return false; + } + + file_segment->write(data, size_to_write, offset); file_segment->completePartAndResetDownloader(); - throw; - } - file_segment->completePartAndResetDownloader(); - current_file_segment_write_offset += size; + size -= size_to_write; + expected_write_offset += size_to_write; + offset += size_to_write; + data += size_to_write; + } return true; } @@ -129,10 +128,10 @@ void FileSegmentRangeWriter::finalize() return; auto & file_segments = file_segments_holder.file_segments; - if (file_segments.empty() || current_file_segment_it == file_segments.end()) + if (file_segments.empty()) return; - completeFileSegment(**current_file_segment_it); + completeFileSegment(*file_segments.back()); finalized = true; } @@ -149,7 +148,7 @@ FileSegmentRangeWriter::~FileSegmentRangeWriter() } } -FileSegments::iterator FileSegmentRangeWriter::allocateFileSegment(size_t offset, bool is_persistent) +FileSegmentPtr & FileSegmentRangeWriter::allocateFileSegment(size_t offset, FileSegmentKind segment_kind) { /** * Allocate a new file segment starting `offset`. @@ -158,17 +157,15 @@ FileSegments::iterator FileSegmentRangeWriter::allocateFileSegment(size_t offset std::lock_guard cache_lock(cache->mutex); - CreateFileSegmentSettings create_settings - { - .is_persistent = is_persistent, - }; + CreateFileSegmentSettings create_settings(segment_kind); /// We set max_file_segment_size to be downloaded, /// if we have less size to write, file segment will be resized in complete() method. auto file_segment = cache->createFileSegmentForDownload( key, offset, cache->max_file_segment_size, create_settings, cache_lock); - return file_segments_holder.add(std::move(file_segment)); + auto & file_segments = file_segments_holder.file_segments; + return *file_segments.insert(file_segments.end(), file_segment); } void FileSegmentRangeWriter::appendFilesystemCacheLog(const FileSegment & file_segment) @@ -199,7 +196,7 @@ void FileSegmentRangeWriter::appendFilesystemCacheLog(const FileSegment & file_s void FileSegmentRangeWriter::completeFileSegment(FileSegment & file_segment) { /// File segment can be detached if space reservation failed. - if (file_segment.isDetached()) + if (file_segment.isDetached() || file_segment.isCompleted()) return; file_segment.completeWithoutState(); @@ -223,6 +220,7 @@ CachedOnDiskWriteBufferFromFile::CachedOnDiskWriteBufferFromFile( , is_persistent_cache_file(is_persistent_cache_file_) , query_id(query_id_) , enable_cache_log(!query_id_.empty() && settings_.enable_filesystem_cache_log) + , throw_on_error_from_cache(settings_.throw_on_error_from_cache) { } @@ -246,11 +244,11 @@ void CachedOnDiskWriteBufferFromFile::nextImpl() } /// Write data to cache. - cacheData(working_buffer.begin(), size); + cacheData(working_buffer.begin(), size, throw_on_error_from_cache); current_download_offset += size; } -void CachedOnDiskWriteBufferFromFile::cacheData(char * data, size_t size) +void CachedOnDiskWriteBufferFromFile::cacheData(char * data, size_t size, bool throw_on_error) { if (cache_in_error_state_or_disabled) return; @@ -270,7 +268,8 @@ void CachedOnDiskWriteBufferFromFile::cacheData(char * data, size_t size) try { - if (!cache_writer->write(data, size, current_download_offset, is_persistent_cache_file)) + auto segment_kind = is_persistent_cache_file ? FileSegmentKind::Persistent : FileSegmentKind::Regular; + if (!cache_writer->write(data, size, current_download_offset, segment_kind)) { LOG_INFO(log, "Write-through cache is stopped as cache limit is reached and nothing can be evicted"); return; @@ -285,11 +284,17 @@ void CachedOnDiskWriteBufferFromFile::cacheData(char * data, size_t size) return; } + if (throw_on_error) + throw; + tryLogCurrentException(__PRETTY_FUNCTION__); return; } catch (...) { + if (throw_on_error) + throw; + tryLogCurrentException(__PRETTY_FUNCTION__); return; } diff --git a/src/Disks/IO/CachedOnDiskWriteBufferFromFile.h b/src/Disks/IO/CachedOnDiskWriteBufferFromFile.h index cec7305ab1b..834e584c8db 100644 --- a/src/Disks/IO/CachedOnDiskWriteBufferFromFile.h +++ b/src/Disks/IO/CachedOnDiskWriteBufferFromFile.h @@ -32,14 +32,14 @@ public: * Write a range of file segments. Allocate file segment of `max_file_segment_size` and write to * it until it is full and then allocate next file segment. */ - bool write(const char * data, size_t size, size_t offset, bool is_persistent); + bool write(const char * data, size_t size, size_t offset, FileSegmentKind segment_kind); void finalize(); ~FileSegmentRangeWriter(); private: - FileSegments::iterator allocateFileSegment(size_t offset, bool is_persistent); + FileSegmentPtr & allocateFileSegment(size_t offset, FileSegmentKind segment_kind); void appendFilesystemCacheLog(const FileSegment & file_segment); @@ -48,14 +48,14 @@ private: FileCache * cache; FileSegment::Key key; + Poco::Logger * log; std::shared_ptr cache_log; String query_id; String source_path; FileSegmentsHolder file_segments_holder{}; - FileSegments::iterator current_file_segment_it; - size_t current_file_segment_write_offset = 0; + size_t expected_write_offset = 0; bool finalized = false; }; @@ -81,7 +81,7 @@ public: void finalizeImpl() override; private: - void cacheData(char * data, size_t size); + void cacheData(char * data, size_t size, bool throw_on_error); Poco::Logger * log; @@ -95,6 +95,7 @@ private: bool enable_cache_log; + bool throw_on_error_from_cache; bool cache_in_error_state_or_disabled = false; std::unique_ptr cache_writer; diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp index 15ddbe551da..664f7b6919e 100644 --- a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp +++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp @@ -1,4 +1,5 @@ #include +#include "Common/Exception.h" #if USE_AZURE_BLOB_STORAGE @@ -176,7 +177,9 @@ void AzureObjectStorage::removeObject(const StoredObject & object) auto client_ptr = client.get(); auto delete_info = client_ptr->DeleteBlob(path); if (!delete_info.Value.Deleted) - throw Exception(ErrorCodes::AZURE_BLOB_STORAGE_ERROR, "Failed to delete file in AzureBlob Storage: {}", path); + throw Exception( + ErrorCodes::AZURE_BLOB_STORAGE_ERROR, "Failed to delete file (path: {}) in AzureBlob Storage, reason: {}", + path, delete_info.RawResponse ? delete_info.RawResponse->GetReasonPhrase() : "Unknown"); } void AzureObjectStorage::removeObjects(const StoredObjects & objects) @@ -187,21 +190,49 @@ void AzureObjectStorage::removeObjects(const StoredObjects & objects) LOG_TEST(log, "Removing object: {} (total: {})", object.absolute_path, objects.size()); auto delete_info = client_ptr->DeleteBlob(object.absolute_path); if (!delete_info.Value.Deleted) - throw Exception(ErrorCodes::AZURE_BLOB_STORAGE_ERROR, "Failed to delete file in AzureBlob Storage: {}", object.absolute_path); + throw Exception( + ErrorCodes::AZURE_BLOB_STORAGE_ERROR, "Failed to delete file (path: {}) in AzureBlob Storage, reason: {}", + object.absolute_path, delete_info.RawResponse ? delete_info.RawResponse->GetReasonPhrase() : "Unknown"); } } void AzureObjectStorage::removeObjectIfExists(const StoredObject & object) { auto client_ptr = client.get(); - auto delete_info = client_ptr->DeleteBlob(object.absolute_path); + try + { + LOG_TEST(log, "Removing single object: {}", object.absolute_path); + auto delete_info = client_ptr->DeleteBlob(object.absolute_path); + } + catch (const Azure::Storage::StorageException & e) + { + /// If object doesn't exist... + if (e.StatusCode == Azure::Core::Http::HttpStatusCode::NotFound) + return; + tryLogCurrentException(__PRETTY_FUNCTION__); + throw; + } } void AzureObjectStorage::removeObjectsIfExist(const StoredObjects & objects) { auto client_ptr = client.get(); for (const auto & object : objects) - auto delete_info = client_ptr->DeleteBlob(object.absolute_path); + { + try + { + auto delete_info = client_ptr->DeleteBlob(object.absolute_path); + } + catch (const Azure::Storage::StorageException & e) + { + /// If object doesn't exist... + if (e.StatusCode == Azure::Core::Http::HttpStatusCode::NotFound) + return; + tryLogCurrentException(__PRETTY_FUNCTION__); + throw; + } + } + } diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/registerDiskAzureBlobStorage.cpp b/src/Disks/ObjectStorages/AzureBlobStorage/registerDiskAzureBlobStorage.cpp index df377cdf710..562b2b2fec0 100644 --- a/src/Disks/ObjectStorages/AzureBlobStorage/registerDiskAzureBlobStorage.cpp +++ b/src/Disks/ObjectStorages/AzureBlobStorage/registerDiskAzureBlobStorage.cpp @@ -4,8 +4,6 @@ #if USE_AZURE_BLOB_STORAGE -#include - #include #include @@ -51,7 +49,7 @@ void registerDiskAzureBlobStorage(DiskFactory & factory, bool global_skip_access bool skip_access_check = global_skip_access_check || config.getBool(config_prefix + ".skip_access_check", false); azure_blob_storage_disk->startup(context, skip_access_check); - return std::make_shared(azure_blob_storage_disk); + return azure_blob_storage_disk; }; factory.registerDiskType("azure_blob_storage", creator); diff --git a/src/Disks/ObjectStorages/Cached/CachedObjectStorage.h b/src/Disks/ObjectStorages/Cached/CachedObjectStorage.h index 2d67203be0f..119dc25c66b 100644 --- a/src/Disks/ObjectStorages/Cached/CachedObjectStorage.h +++ b/src/Disks/ObjectStorages/Cached/CachedObjectStorage.h @@ -113,6 +113,8 @@ public: WriteSettings getAdjustedSettingsFromMetadataFile(const WriteSettings & settings, const std::string & path) const override; + FileCachePtr getCache() const { return cache; } + private: FileCache::Key getCacheKey(const std::string & path) const; diff --git a/src/Disks/ObjectStorages/DiskObjectStorage.cpp b/src/Disks/ObjectStorages/DiskObjectStorage.cpp index 263a9a9d0e1..57a7d25fd17 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorage.cpp +++ b/src/Disks/ObjectStorages/DiskObjectStorage.cpp @@ -26,6 +26,7 @@ namespace ErrorCodes extern const int FILE_DOESNT_EXIST; extern const int ATTEMPT_TO_READ_AFTER_EOF; extern const int CANNOT_READ_ALL_DATA; + extern const int DIRECTORY_DOESNT_EXIST; } namespace @@ -126,6 +127,9 @@ StoredObjects DiskObjectStorage::getStorageObjects(const String & local_path) co void DiskObjectStorage::getRemotePathsRecursive(const String & local_path, std::vector & paths_map) { + if (!metadata_storage->exists(local_path)) + return; + /// Protect against concurrent delition of files (for example because of a merge). if (metadata_storage->isFile(local_path)) { @@ -138,6 +142,7 @@ void DiskObjectStorage::getRemotePathsRecursive(const String & local_path, std:: /// Unfortunately in rare cases it can happen when files disappear /// or can be empty in case of operation interruption (like cancelled metadata fetch) if (e.code() == ErrorCodes::FILE_DOESNT_EXIST || + e.code() == ErrorCodes::DIRECTORY_DOESNT_EXIST || e.code() == ErrorCodes::ATTEMPT_TO_READ_AFTER_EOF || e.code() == ErrorCodes::CANNOT_READ_ALL_DATA) return; @@ -157,6 +162,7 @@ void DiskObjectStorage::getRemotePathsRecursive(const String & local_path, std:: /// Unfortunately in rare cases it can happen when files disappear /// or can be empty in case of operation interruption (like cancelled metadata fetch) if (e.code() == ErrorCodes::FILE_DOESNT_EXIST || + e.code() == ErrorCodes::DIRECTORY_DOESNT_EXIST || e.code() == ErrorCodes::ATTEMPT_TO_READ_AFTER_EOF || e.code() == ErrorCodes::CANNOT_READ_ALL_DATA) return; @@ -519,6 +525,14 @@ void DiskObjectStorage::wrapWithCache(FileCachePtr cache, const FileCacheSetting object_storage = std::make_shared(object_storage, cache, cache_settings, layer_name); } +FileCachePtr DiskObjectStorage::getCache() const +{ + const auto * cached_object_storage = typeid_cast(object_storage.get()); + if (!cached_object_storage) + return nullptr; + return cached_object_storage->getCache(); +} + NameSet DiskObjectStorage::getCacheLayersNames() const { NameSet cache_layers; diff --git a/src/Disks/ObjectStorages/DiskObjectStorage.h b/src/Disks/ObjectStorages/DiskObjectStorage.h index 00e3cf98142..a24acc270c0 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorage.h +++ b/src/Disks/ObjectStorages/DiskObjectStorage.h @@ -186,6 +186,7 @@ public: /// There can be any number of cache layers: /// DiskObjectStorage(CachedObjectStorage(...CacheObjectStorage(S3ObjectStorage)...)) void wrapWithCache(FileCachePtr cache, const FileCacheSettings & cache_settings, const String & layer_name); + FileCachePtr getCache() const; /// Get structure of object storage this disk works with. Examples: /// DiskObjectStorage(S3ObjectStorage) diff --git a/src/Disks/ObjectStorages/DiskObjectStorageMetadata.cpp b/src/Disks/ObjectStorages/DiskObjectStorageMetadata.cpp index dc4898559c0..c3284b635da 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorageMetadata.cpp +++ b/src/Disks/ObjectStorages/DiskObjectStorageMetadata.cpp @@ -20,7 +20,7 @@ void DiskObjectStorageMetadata::deserialize(ReadBuffer & buf) UInt32 version; readIntText(version, buf); - if (version < VERSION_ABSOLUTE_PATHS || version > VERSION_READ_ONLY_FLAG) + if (version < VERSION_ABSOLUTE_PATHS || version > VERSION_INLINE_DATA) throw Exception( ErrorCodes::UNKNOWN_FORMAT, "Unknown metadata file version. Path: {}. Version: {}. Maximum expected version: {}", @@ -65,6 +65,12 @@ void DiskObjectStorageMetadata::deserialize(ReadBuffer & buf) readBoolText(read_only, buf); assertChar('\n', buf); } + + if (version >= VERSION_INLINE_DATA) + { + readEscapedString(inline_data, buf); + assertChar('\n', buf); + } } void DiskObjectStorageMetadata::deserializeFromString(const std::string & data) @@ -75,7 +81,11 @@ void DiskObjectStorageMetadata::deserializeFromString(const std::string & data) void DiskObjectStorageMetadata::serialize(WriteBuffer & buf, bool sync) const { - writeIntText(VERSION_READ_ONLY_FLAG, buf); + if (inline_data.empty()) + writeIntText(VERSION_READ_ONLY_FLAG, buf); + else + writeIntText(VERSION_INLINE_DATA, buf); + writeChar('\n', buf); writeIntText(storage_objects.size(), buf); @@ -97,6 +107,12 @@ void DiskObjectStorageMetadata::serialize(WriteBuffer & buf, bool sync) const writeBoolText(read_only, buf); writeChar('\n', buf); + if (!inline_data.empty()) + { + writeEscapedString(inline_data, buf); + writeChar('\n', buf); + } + buf.finalize(); if (sync) buf.sync(); diff --git a/src/Disks/ObjectStorages/DiskObjectStorageMetadata.h b/src/Disks/ObjectStorages/DiskObjectStorageMetadata.h index d3ea5795dd3..a2d0653e4aa 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorageMetadata.h +++ b/src/Disks/ObjectStorages/DiskObjectStorageMetadata.h @@ -16,6 +16,7 @@ private: static constexpr uint32_t VERSION_ABSOLUTE_PATHS = 1; static constexpr uint32_t VERSION_RELATIVE_PATHS = 2; static constexpr uint32_t VERSION_READ_ONLY_FLAG = 3; + static constexpr uint32_t VERSION_INLINE_DATA = 4; const std::string & common_metadata_path; @@ -39,6 +40,9 @@ private: /// Flag indicates that file is read only. bool read_only = false; + /// This data will be stored inline + std::string inline_data; + public: DiskObjectStorageMetadata( @@ -99,6 +103,15 @@ public: read_only = true; } + void setInlineData(const std::string & data) + { + inline_data = data; + } + + const std::string & getInlineData() const + { + return inline_data; + } }; using DiskObjectStorageMetadataPtr = std::unique_ptr; diff --git a/src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp b/src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp index b55fb2c4fa5..677debc69e6 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp +++ b/src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp @@ -133,8 +133,13 @@ struct RemoveObjectStorageOperation final : public IDiskObjectStorageOperation void finalize() override { + /// The client for an object storage may do retries internally + /// and there could be a situation when a query succeeded, but the response is lost + /// due to network error or similar. And when it will retry an operation it may receive + /// a 404 HTTP code. We don't want to threat this code as a real error for deletion process + /// (e.g. throwing some exceptions) and thus we just use method `removeObjectsIfExists` if (!delete_metadata_only && !objects_to_remove.empty()) - object_storage.removeObjects(objects_to_remove); + object_storage.removeObjectsIfExist(objects_to_remove); } }; @@ -213,8 +218,10 @@ struct RemoveManyObjectStorageOperation final : public IDiskObjectStorageOperati void finalize() override { + /// Read comment inside RemoveObjectStorageOperation class + /// TL;DR Don't pay any attention to 404 status code if (!objects_to_remove.empty()) - object_storage.removeObjects(objects_to_remove); + object_storage.removeObjectsIfExist(objects_to_remove); } }; @@ -307,7 +314,9 @@ struct RemoveRecursiveObjectStorageOperation final : public IDiskObjectStorageOp remove_from_remote.insert(remove_from_remote.end(), remote_paths.begin(), remote_paths.end()); } } - object_storage.removeObjects(remove_from_remote); + /// Read comment inside RemoveObjectStorageOperation class + /// TL;DR Don't pay any attention to 404 status code + object_storage.removeObjectsIfExist(remove_from_remote); } } }; @@ -352,8 +361,10 @@ struct ReplaceFileObjectStorageOperation final : public IDiskObjectStorageOperat void finalize() override { + /// Read comment inside RemoveObjectStorageOperation class + /// TL;DR Don't pay any attention to 404 status code if (!objects_to_remove.empty()) - object_storage.removeObjects(objects_to_remove); + object_storage.removeObjectsIfExist(objects_to_remove); } }; @@ -749,4 +760,10 @@ void DiskObjectStorageTransaction::commit() operation->finalize(); } +void DiskObjectStorageTransaction::undo() +{ + for (const auto & operation : operations_to_execute | std::views::reverse) + operation->undo(); +} + } diff --git a/src/Disks/ObjectStorages/DiskObjectStorageTransaction.h b/src/Disks/ObjectStorages/DiskObjectStorageTransaction.h index 9c42203b613..9e6bd5b6307 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorageTransaction.h +++ b/src/Disks/ObjectStorages/DiskObjectStorageTransaction.h @@ -70,6 +70,7 @@ public: DiskObjectStorageRemoteMetadataRestoreHelper * metadata_helper_); void commit() override; + void undo() override; void createDirectory(const std::string & path) override; diff --git a/src/Disks/ObjectStorages/FakeMetadataStorageFromDisk.cpp b/src/Disks/ObjectStorages/FakeMetadataStorageFromDisk.cpp index 383bbebd880..dbfdb2f7b1a 100644 --- a/src/Disks/ObjectStorages/FakeMetadataStorageFromDisk.cpp +++ b/src/Disks/ObjectStorages/FakeMetadataStorageFromDisk.cpp @@ -23,7 +23,7 @@ FakeMetadataStorageFromDisk::FakeMetadataStorageFromDisk( { } -MetadataTransactionPtr FakeMetadataStorageFromDisk::createTransaction() const +MetadataTransactionPtr FakeMetadataStorageFromDisk::createTransaction() { return std::make_shared(*this, disk); } @@ -66,12 +66,7 @@ uint64_t FakeMetadataStorageFromDisk::getFileSize(const String & path) const std::vector FakeMetadataStorageFromDisk::listDirectory(const std::string & path) const { std::vector result; - auto it = disk->iterateDirectory(path); - while (it->isValid()) - { - result.push_back(it->path()); - it->next(); - } + disk->listFiles(path, result); return result; } @@ -85,6 +80,19 @@ std::string FakeMetadataStorageFromDisk::readFileToString(const std::string &) c throw Exception(ErrorCodes::NOT_IMPLEMENTED, "readFileToString is not implemented for FakeMetadataStorageFromDisk"); } +std::string FakeMetadataStorageFromDisk::readInlineDataToString(const std::string & path) const +{ + auto rb = disk->readFile(path); + std::string result; + std::array buf; + while (!rb->eof()) + { + auto sz = rb->read(buf.data(), buf.size()); + result.append(buf.data(), buf.data() + sz); + } + return result; +} + std::unordered_map FakeMetadataStorageFromDisk::getSerializedMetadata(const std::vector &) const { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "getSerializedMetadata is not implemented for FakeMetadataStorageFromDisk"); @@ -118,6 +126,13 @@ void FakeMetadataStorageFromDiskTransaction::writeStringToFile(const std::string wb->finalize(); } +void FakeMetadataStorageFromDiskTransaction::writeInlineDataToFile(const std::string & path, const std::string & data) +{ + auto wb = disk->writeFile(path); + wb->write(data.data(), data.size()); + wb->finalize(); +} + void FakeMetadataStorageFromDiskTransaction::setLastModified(const std::string & path, const Poco::Timestamp & timestamp) { disk->setLastModified(path, timestamp); diff --git a/src/Disks/ObjectStorages/FakeMetadataStorageFromDisk.h b/src/Disks/ObjectStorages/FakeMetadataStorageFromDisk.h index 65cf012ddab..849e7235c0a 100644 --- a/src/Disks/ObjectStorages/FakeMetadataStorageFromDisk.h +++ b/src/Disks/ObjectStorages/FakeMetadataStorageFromDisk.h @@ -27,7 +27,7 @@ public: ObjectStoragePtr object_storage_, const std::string & object_storage_root_path_); - MetadataTransactionPtr createTransaction() const override; + MetadataTransactionPtr createTransaction() override; const std::string & getPath() const override; @@ -55,6 +55,8 @@ public: std::string readFileToString(const std::string & path) const override; + std::string readInlineDataToString(const std::string & path) const override; + std::unordered_map getSerializedMetadata(const std::vector & file_paths) const override; uint32_t getHardlinkCount(const std::string & path) const override; @@ -88,6 +90,8 @@ public: void writeStringToFile(const std::string & path, const std::string & data) override; + void writeInlineDataToFile(const std::string & path, const std::string & data) override; + void createEmptyMetadataFile(const std::string & path) override; void createMetadataFile(const std::string & path, const std::string & blob_name, uint64_t size_in_bytes) override; diff --git a/src/Disks/ObjectStorages/HDFS/registerDiskHDFS.cpp b/src/Disks/ObjectStorages/HDFS/registerDiskHDFS.cpp index 7bec0ee5a6c..4e4d35a07f8 100644 --- a/src/Disks/ObjectStorages/HDFS/registerDiskHDFS.cpp +++ b/src/Disks/ObjectStorages/HDFS/registerDiskHDFS.cpp @@ -3,7 +3,6 @@ #include #include #include -#include #include namespace DB @@ -55,7 +54,7 @@ void registerDiskHDFS(DiskFactory & factory, bool global_skip_access_check) copy_thread_pool_size); disk->startup(context, skip_access_check); - return std::make_shared(disk); + return disk; }; factory.registerDiskType("hdfs", creator); diff --git a/src/Disks/ObjectStorages/IMetadataStorage.h b/src/Disks/ObjectStorages/IMetadataStorage.h index 597d7744c78..00150df9fa3 100644 --- a/src/Disks/ObjectStorages/IMetadataStorage.h +++ b/src/Disks/ObjectStorages/IMetadataStorage.h @@ -44,6 +44,12 @@ public: throwNotImplemented(); } + /// Writes the data inline with the metadata + virtual void writeInlineDataToFile(const std::string & /* path */, const std::string & /* data */) + { + throwNotImplemented(); + } + virtual void setLastModified(const std::string & /* path */, const Poco::Timestamp & /* timestamp */) { throwNotImplemented(); @@ -143,7 +149,7 @@ using MetadataTransactionPtr = std::shared_ptr; class IMetadataStorage : private boost::noncopyable { public: - virtual MetadataTransactionPtr createTransaction() const = 0; + virtual MetadataTransactionPtr createTransaction() = 0; /// Get metadata root path. virtual const std::string & getPath() const = 0; @@ -185,6 +191,12 @@ public: throwNotImplemented(); } + /// Read inline data for file to string from path + virtual std::string readInlineDataToString(const std::string & /* path */) const + { + throwNotImplemented(); + } + virtual ~IMetadataStorage() = default; /// ==== More specific methods. Previous were almost general purpose. ==== diff --git a/src/Disks/ObjectStorages/MetadataStorageFromDisk.cpp b/src/Disks/ObjectStorages/MetadataStorageFromDisk.cpp index 0ad46a1327d..625350eeeff 100644 --- a/src/Disks/ObjectStorages/MetadataStorageFromDisk.cpp +++ b/src/Disks/ObjectStorages/MetadataStorageFromDisk.cpp @@ -78,6 +78,11 @@ std::string MetadataStorageFromDisk::readFileToString(const std::string & path) return result; } +std::string MetadataStorageFromDisk::readInlineDataToString(const std::string & path) const +{ + return readMetadata(path)->getInlineData(); +} + DiskObjectStorageMetadataPtr MetadataStorageFromDisk::readMetadataUnlocked(const std::string & path, std::shared_lock &) const { auto metadata = std::make_unique(disk->getPath(), object_storage_root_path, path); @@ -122,7 +127,7 @@ void MetadataStorageFromDiskTransaction::createHardLink(const std::string & path addOperation(std::make_unique(path_from, path_to, *metadata_storage.disk, metadata_storage)); } -MetadataTransactionPtr MetadataStorageFromDisk::createTransaction() const +MetadataTransactionPtr MetadataStorageFromDisk::createTransaction() { return std::make_shared(*this); } @@ -244,6 +249,16 @@ void MetadataStorageFromDiskTransaction::writeStringToFile( addOperation(std::make_unique(path, *metadata_storage.getDisk(), data)); } +void MetadataStorageFromDiskTransaction::writeInlineDataToFile( + const std::string & path, + const std::string & data) +{ + auto metadata = std::make_unique( + metadata_storage.getDisk()->getPath(), metadata_storage.getObjectStorageRootPath(), path); + metadata->setInlineData(data); + writeStringToFile(path, metadata->serializeToString()); +} + void MetadataStorageFromDiskTransaction::setLastModified(const std::string & path, const Poco::Timestamp & timestamp) { addOperation(std::make_unique(path, timestamp, *metadata_storage.getDisk())); diff --git a/src/Disks/ObjectStorages/MetadataStorageFromDisk.h b/src/Disks/ObjectStorages/MetadataStorageFromDisk.h index b06a2a41f2b..2c80572e7b4 100644 --- a/src/Disks/ObjectStorages/MetadataStorageFromDisk.h +++ b/src/Disks/ObjectStorages/MetadataStorageFromDisk.h @@ -25,7 +25,7 @@ private: public: MetadataStorageFromDisk(DiskPtr disk_, const std::string & object_storage_root_path_); - MetadataTransactionPtr createTransaction() const override; + MetadataTransactionPtr createTransaction() override; const std::string & getPath() const override; @@ -53,6 +53,8 @@ public: std::string readFileToString(const std::string & path) const override; + std::string readInlineDataToString(const std::string & path) const override; + std::unordered_map getSerializedMetadata(const std::vector & file_paths) const override; uint32_t getHardlinkCount(const std::string & path) const override; @@ -94,6 +96,8 @@ public: void writeStringToFile(const std::string & path, const std::string & data) override; + void writeInlineDataToFile(const std::string & path, const std::string & data) override; + void createEmptyMetadataFile(const std::string & path) override; void createMetadataFile(const std::string & path, const std::string & blob_name, uint64_t size_in_bytes) override; @@ -127,6 +131,8 @@ public: void replaceFile(const std::string & path_from, const std::string & path_to) override; void unlinkMetadata(const std::string & path) override; + + }; diff --git a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.cpp b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.cpp index 34a9ae021b7..62c6d57b16f 100644 --- a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.cpp +++ b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.cpp @@ -23,7 +23,7 @@ MetadataStorageFromPlainObjectStorage::MetadataStorageFromPlainObjectStorage( { } -MetadataTransactionPtr MetadataStorageFromPlainObjectStorage::createTransaction() const +MetadataTransactionPtr MetadataStorageFromPlainObjectStorage::createTransaction() { return std::make_shared(*this); } diff --git a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.h b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.h index 99cc960b9e4..0beed65879b 100644 --- a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.h +++ b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.h @@ -33,7 +33,7 @@ public: ObjectStoragePtr object_storage_, const std::string & object_storage_root_path_); - MetadataTransactionPtr createTransaction() const override; + MetadataTransactionPtr createTransaction() override; const std::string & getPath() const override; diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp index a9996926408..d655fd37458 100644 --- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp +++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp @@ -512,7 +512,7 @@ void S3ObjectStorage::copyObjectMultipartImpl( std::vector part_tags; - size_t upload_part_size = settings_ptr->request_settings.min_upload_part_size; + size_t upload_part_size = settings_ptr->request_settings.getUploadSettings().min_upload_part_size; for (size_t position = 0, part_number = 1; position < size; ++part_number, position += upload_part_size) { ProfileEvents::increment(ProfileEvents::S3UploadPartCopy); diff --git a/src/Disks/ObjectStorages/S3/diskSettings.cpp b/src/Disks/ObjectStorages/S3/diskSettings.cpp index da3a2ae710e..87533a5a4e0 100644 --- a/src/Disks/ObjectStorages/S3/diskSettings.cpp +++ b/src/Disks/ObjectStorages/S3/diskSettings.cpp @@ -20,7 +20,6 @@ #include #include #include -#include #include namespace DB @@ -34,24 +33,7 @@ namespace ErrorCodes std::unique_ptr getSettings(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context) { const Settings & settings = context->getSettingsRef(); - S3Settings::RequestSettings request_settings; - request_settings.max_single_read_retries = config.getUInt64(config_prefix + ".s3_max_single_read_retries", settings.s3_max_single_read_retries); - request_settings.min_upload_part_size = config.getUInt64(config_prefix + ".s3_min_upload_part_size", settings.s3_min_upload_part_size); - request_settings.max_upload_part_size = config.getUInt64(config_prefix + ".s3_max_upload_part_size", S3Settings::RequestSettings::DEFAULT_MAX_UPLOAD_PART_SIZE); - request_settings.upload_part_size_multiply_factor = config.getUInt64(config_prefix + ".s3_upload_part_size_multiply_factor", settings.s3_upload_part_size_multiply_factor); - request_settings.upload_part_size_multiply_parts_count_threshold = config.getUInt64(config_prefix + ".s3_upload_part_size_multiply_parts_count_threshold", settings.s3_upload_part_size_multiply_parts_count_threshold); - request_settings.max_part_number = config.getUInt64(config_prefix + ".s3_max_part_number", S3Settings::RequestSettings::DEFAULT_MAX_PART_NUMBER); - request_settings.max_single_part_upload_size = config.getUInt64(config_prefix + ".s3_max_single_part_upload_size", settings.s3_max_single_part_upload_size); - request_settings.check_objects_after_upload = config.getUInt64(config_prefix + ".s3_check_objects_after_upload", settings.s3_check_objects_after_upload); - request_settings.max_unexpected_write_error_retries = config.getUInt64(config_prefix + ".s3_max_unexpected_write_error_retries", settings.s3_max_unexpected_write_error_retries); - - // NOTE: it would be better to reuse old throttlers to avoid losing token bucket state on every config reload, which could lead to exceeding limit for short time. But it is good enough unless very high `burst` values are used. - if (UInt64 max_get_rps = config.getUInt64(config_prefix + ".s3_max_get_rps", settings.s3_max_get_rps)) - request_settings.get_request_throttler = std::make_shared( - max_get_rps, config.getUInt64(config_prefix + ".s3_max_get_burst", settings.s3_max_get_burst ? settings.s3_max_get_burst : Throttler::default_burst_seconds * max_get_rps)); - if (UInt64 max_put_rps = config.getUInt64(config_prefix + ".s3_max_put_rps", settings.s3_max_put_rps)) - request_settings.put_request_throttler = std::make_shared( - max_put_rps, config.getUInt64(config_prefix + ".s3_max_put_burst", settings.s3_max_put_burst ? settings.s3_max_put_burst : Throttler::default_burst_seconds * max_put_rps)); + S3Settings::RequestSettings request_settings(config, config_prefix, settings, "s3_"); return std::make_unique( request_settings, diff --git a/src/Disks/ObjectStorages/S3/registerDiskS3.cpp b/src/Disks/ObjectStorages/S3/registerDiskS3.cpp index 533a925aa1b..236662a7b5e 100644 --- a/src/Disks/ObjectStorages/S3/registerDiskS3.cpp +++ b/src/Disks/ObjectStorages/S3/registerDiskS3.cpp @@ -11,8 +11,8 @@ #include #include -#include #include +#include #include #include #include @@ -165,9 +165,7 @@ void registerDiskS3(DiskFactory & factory, bool global_skip_access_check) s3disk->startup(context, skip_access_check); - std::shared_ptr disk_result = s3disk; - - return std::make_shared(disk_result); + return s3disk; }; factory.registerDiskType("s3", creator); factory.registerDiskType("s3_plain", creator); diff --git a/src/Disks/ObjectStorages/Web/MetadataStorageFromStaticFilesWebServer.cpp b/src/Disks/ObjectStorages/Web/MetadataStorageFromStaticFilesWebServer.cpp index ab5d86fd836..d39582a089e 100644 --- a/src/Disks/ObjectStorages/Web/MetadataStorageFromStaticFilesWebServer.cpp +++ b/src/Disks/ObjectStorages/Web/MetadataStorageFromStaticFilesWebServer.cpp @@ -21,7 +21,7 @@ MetadataStorageFromStaticFilesWebServer::MetadataStorageFromStaticFilesWebServer { } -MetadataTransactionPtr MetadataStorageFromStaticFilesWebServer::createTransaction() const +MetadataTransactionPtr MetadataStorageFromStaticFilesWebServer::createTransaction() { return std::make_shared(*this); } diff --git a/src/Disks/ObjectStorages/Web/MetadataStorageFromStaticFilesWebServer.h b/src/Disks/ObjectStorages/Web/MetadataStorageFromStaticFilesWebServer.h index 6a7c8128b4a..a04a1359d34 100644 --- a/src/Disks/ObjectStorages/Web/MetadataStorageFromStaticFilesWebServer.h +++ b/src/Disks/ObjectStorages/Web/MetadataStorageFromStaticFilesWebServer.h @@ -24,7 +24,7 @@ private: public: explicit MetadataStorageFromStaticFilesWebServer(const WebObjectStorage & object_storage_); - MetadataTransactionPtr createTransaction() const override; + MetadataTransactionPtr createTransaction() override; const std::string & getPath() const override; diff --git a/src/Disks/TemporaryFileOnDisk.cpp b/src/Disks/TemporaryFileOnDisk.cpp index 4f348519037..b0113d6f9c9 100644 --- a/src/Disks/TemporaryFileOnDisk.cpp +++ b/src/Disks/TemporaryFileOnDisk.cpp @@ -15,7 +15,6 @@ namespace CurrentMetrics extern const Metric TotalTemporaryFiles; } - namespace DB { diff --git a/src/Formats/BSONTypes.h b/src/Formats/BSONTypes.h index 2d20cdae698..14a3e9decca 100644 --- a/src/Formats/BSONTypes.h +++ b/src/Formats/BSONTypes.h @@ -7,6 +7,8 @@ namespace DB { static const uint8_t BSON_DOCUMENT_END = 0x00; +static const size_t BSON_OBJECT_ID_SIZE = 12; +static const size_t BSON_DB_POINTER_SIZE = 12; using BSONSizeT = uint32_t; static const BSONSizeT MAX_BSON_SIZE = std::numeric_limits::max(); diff --git a/src/Formats/CapnProtoUtils.cpp b/src/Formats/CapnProtoUtils.cpp index a7ff065aca5..fb5e7c06542 100644 --- a/src/Formats/CapnProtoUtils.cpp +++ b/src/Formats/CapnProtoUtils.cpp @@ -32,6 +32,16 @@ namespace ErrorCodes extern const int BAD_ARGUMENTS; } +std::pair splitCapnProtoFieldName(const String & name) +{ + const auto * begin = name.data(); + const auto * end = name.data() + name.size(); + const auto * it = find_first_symbols<'_', '.'>(begin, end); + String first = String(begin, it); + String second = it == end ? "" : String(it + 1, end); + return {first, second}; +} + capnp::StructSchema CapnProtoSchemaParser::getMessageSchema(const FormatSchemaInfo & schema_info) { capnp::ParsedSchema schema; @@ -201,9 +211,9 @@ static bool checkEnums(const capnp::Type & capnp_type, const DataTypePtr column_ return result; } -static bool checkCapnProtoType(const capnp::Type & capnp_type, const DataTypePtr & data_type, FormatSettings::EnumComparingMode mode, String & error_message); +static bool checkCapnProtoType(const capnp::Type & capnp_type, const DataTypePtr & data_type, FormatSettings::EnumComparingMode mode, String & error_message, const String & column_name); -static bool checkNullableType(const capnp::Type & capnp_type, const DataTypePtr & data_type, FormatSettings::EnumComparingMode mode, String & error_message) +static bool checkNullableType(const capnp::Type & capnp_type, const DataTypePtr & data_type, FormatSettings::EnumComparingMode mode, String & error_message, const String & column_name) { if (!capnp_type.isStruct()) return false; @@ -222,9 +232,9 @@ static bool checkNullableType(const capnp::Type & capnp_type, const DataTypePtr auto nested_type = assert_cast(data_type.get())->getNestedType(); if (first.getType().isVoid()) - return checkCapnProtoType(second.getType(), nested_type, mode, error_message); + return checkCapnProtoType(second.getType(), nested_type, mode, error_message, column_name); if (second.getType().isVoid()) - return checkCapnProtoType(first.getType(), nested_type, mode, error_message); + return checkCapnProtoType(first.getType(), nested_type, mode, error_message, column_name); return false; } @@ -260,7 +270,7 @@ static bool checkTupleType(const capnp::Type & capnp_type, const DataTypePtr & d { KJ_IF_MAYBE(field, struct_schema.findFieldByName(name)) { - if (!checkCapnProtoType(field->getType(), nested_types[tuple_data_type->getPositionByName(name)], mode, error_message)) + if (!checkCapnProtoType(field->getType(), nested_types[tuple_data_type->getPositionByName(name)], mode, error_message, name)) return false; } else @@ -273,16 +283,28 @@ static bool checkTupleType(const capnp::Type & capnp_type, const DataTypePtr & d return true; } -static bool checkArrayType(const capnp::Type & capnp_type, const DataTypePtr & data_type, FormatSettings::EnumComparingMode mode, String & error_message) +static bool checkArrayType(const capnp::Type & capnp_type, const DataTypePtr & data_type, FormatSettings::EnumComparingMode mode, String & error_message, const String & column_name) { if (!capnp_type.isList()) return false; auto list_schema = capnp_type.asList(); auto nested_type = assert_cast(data_type.get())->getNestedType(); - return checkCapnProtoType(list_schema.getElementType(), nested_type, mode, error_message); + + auto [field_name, nested_name] = splitCapnProtoFieldName(column_name); + if (!nested_name.empty() && list_schema.getElementType().isStruct()) + { + auto struct_schema = list_schema.getElementType().asStruct(); + KJ_IF_MAYBE(field, struct_schema.findFieldByName(nested_name)) + return checkCapnProtoType(field->getType(), nested_type, mode, error_message, nested_name); + + error_message += "Element type of List {} doesn't contain field with name " + nested_name; + return false; + } + + return checkCapnProtoType(list_schema.getElementType(), nested_type, mode, error_message, column_name); } -static bool checkCapnProtoType(const capnp::Type & capnp_type, const DataTypePtr & data_type, FormatSettings::EnumComparingMode mode, String & error_message) +static bool checkCapnProtoType(const capnp::Type & capnp_type, const DataTypePtr & data_type, FormatSettings::EnumComparingMode mode, String & error_message, const String & column_name) { switch (data_type->getTypeId()) { @@ -301,9 +323,11 @@ static bool checkCapnProtoType(const capnp::Type & capnp_type, const DataTypePtr case TypeIndex::Int16: return capnp_type.isInt16(); case TypeIndex::Date32: [[fallthrough]]; + case TypeIndex::Decimal32: [[fallthrough]]; case TypeIndex::Int32: return capnp_type.isInt32(); case TypeIndex::DateTime64: [[fallthrough]]; + case TypeIndex::Decimal64: [[fallthrough]]; case TypeIndex::Int64: return capnp_type.isInt64(); case TypeIndex::Float32: @@ -318,15 +342,15 @@ static bool checkCapnProtoType(const capnp::Type & capnp_type, const DataTypePtr return checkTupleType(capnp_type, data_type, mode, error_message); case TypeIndex::Nullable: { - auto result = checkNullableType(capnp_type, data_type, mode, error_message); + auto result = checkNullableType(capnp_type, data_type, mode, error_message, column_name); if (!result) error_message += "Nullable can be represented only as a named union of type Void and nested type"; return result; } case TypeIndex::Array: - return checkArrayType(capnp_type, data_type, mode, error_message); + return checkArrayType(capnp_type, data_type, mode, error_message, column_name); case TypeIndex::LowCardinality: - return checkCapnProtoType(capnp_type, assert_cast(data_type.get())->getDictionaryType(), mode, error_message); + return checkCapnProtoType(capnp_type, assert_cast(data_type.get())->getDictionaryType(), mode, error_message, column_name); case TypeIndex::FixedString: [[fallthrough]]; case TypeIndex::String: return capnp_type.isText() || capnp_type.isData(); @@ -335,19 +359,9 @@ static bool checkCapnProtoType(const capnp::Type & capnp_type, const DataTypePtr } } -static std::pair splitFieldName(const String & name) -{ - const auto * begin = name.data(); - const auto * end = name.data() + name.size(); - const auto * it = find_first_symbols<'_', '.'>(begin, end); - String first = String(begin, it); - String second = it == end ? "" : String(it + 1, end); - return {first, second}; -} - capnp::DynamicValue::Reader getReaderByColumnName(const capnp::DynamicStruct::Reader & struct_reader, const String & name) { - auto [field_name, nested_name] = splitFieldName(name); + auto [field_name, nested_name] = splitCapnProtoFieldName(name); KJ_IF_MAYBE(field, struct_reader.getSchema().findFieldByName(field_name)) { capnp::DynamicValue::Reader field_reader; @@ -363,6 +377,20 @@ capnp::DynamicValue::Reader getReaderByColumnName(const capnp::DynamicStruct::Re if (nested_name.empty()) return field_reader; + /// Support reading Nested as List of Structs. + if (field_reader.getType() == capnp::DynamicValue::LIST) + { + auto list_schema = field->getType().asList(); + if (!list_schema.getElementType().isStruct()) + throw Exception(ErrorCodes::CAPN_PROTO_BAD_CAST, "Element type of List {} is not a struct", field_name); + + auto struct_schema = list_schema.getElementType().asStruct(); + KJ_IF_MAYBE(nested_field, struct_schema.findFieldByName(nested_name)) + return field_reader; + + throw Exception(ErrorCodes::THERE_IS_NO_COLUMN, "Element type of List {} doesn't contain field with name \"{}\"", field_name, nested_name); + } + if (field_reader.getType() != capnp::DynamicValue::STRUCT) throw Exception(ErrorCodes::CAPN_PROTO_BAD_CAST, "Field {} is not a struct", field_name); @@ -374,13 +402,28 @@ capnp::DynamicValue::Reader getReaderByColumnName(const capnp::DynamicStruct::Re std::pair getStructBuilderAndFieldByColumnName(capnp::DynamicStruct::Builder struct_builder, const String & name) { - auto [field_name, nested_name] = splitFieldName(name); + auto [field_name, nested_name] = splitCapnProtoFieldName(name); KJ_IF_MAYBE(field, struct_builder.getSchema().findFieldByName(field_name)) { if (nested_name.empty()) return {struct_builder, *field}; auto field_builder = struct_builder.get(*field); + + /// Support reading Nested as List of Structs. + if (field_builder.getType() == capnp::DynamicValue::LIST) + { + auto list_schema = field->getType().asList(); + if (!list_schema.getElementType().isStruct()) + throw Exception(ErrorCodes::CAPN_PROTO_BAD_CAST, "Element type of List {} is not a struct", field_name); + + auto struct_schema = list_schema.getElementType().asStruct(); + KJ_IF_MAYBE(nested_field, struct_schema.findFieldByName(nested_name)) + return {struct_builder, *field}; + + throw Exception(ErrorCodes::THERE_IS_NO_COLUMN, "Element type of List {} doesn't contain field with name \"{}\"", field_name, nested_name); + } + if (field_builder.getType() != capnp::DynamicValue::STRUCT) throw Exception(ErrorCodes::CAPN_PROTO_BAD_CAST, "Field {} is not a struct", field_name); @@ -390,13 +433,27 @@ std::pair getStructBu throw Exception(ErrorCodes::THERE_IS_NO_COLUMN, "Capnproto struct doesn't contain field with name {}", field_name); } -static capnp::StructSchema::Field getFieldByName(const capnp::StructSchema & schema, const String & name) +static std::pair getFieldByName(const capnp::StructSchema & schema, const String & name) { - auto [field_name, nested_name] = splitFieldName(name); + auto [field_name, nested_name] = splitCapnProtoFieldName(name); KJ_IF_MAYBE(field, schema.findFieldByName(field_name)) { if (nested_name.empty()) - return *field; + return {*field, name}; + + /// Support reading Nested as List of Structs. + if (field->getType().isList()) + { + auto list_schema = field->getType().asList(); + if (!list_schema.getElementType().isStruct()) + throw Exception(ErrorCodes::CAPN_PROTO_BAD_CAST, "Element type of List {} is not a struct", field_name); + + auto struct_schema = list_schema.getElementType().asStruct(); + KJ_IF_MAYBE(nested_field, struct_schema.findFieldByName(nested_name)) + return {*field, name}; + + throw Exception(ErrorCodes::THERE_IS_NO_COLUMN, "Element type of List {} doesn't contain field with name \"{}\"", field_name, nested_name); + } if (!field->getType().isStruct()) throw Exception(ErrorCodes::CAPN_PROTO_BAD_CAST, "Field {} is not a struct", field_name); @@ -416,8 +473,8 @@ void checkCapnProtoSchemaStructure(const capnp::StructSchema & schema, const Blo String additional_error_message; for (auto & [name, type] : names_and_types) { - auto field = getFieldByName(schema, name); - if (!checkCapnProtoType(field.getType(), type, mode, additional_error_message)) + auto [field, field_name] = getFieldByName(schema, name); + if (!checkCapnProtoType(field.getType(), type, mode, additional_error_message, field_name)) { auto e = Exception( ErrorCodes::CAPN_PROTO_BAD_CAST, diff --git a/src/Formats/CapnProtoUtils.h b/src/Formats/CapnProtoUtils.h index 102c3a2e306..2d8cdb418d7 100644 --- a/src/Formats/CapnProtoUtils.h +++ b/src/Formats/CapnProtoUtils.h @@ -30,6 +30,8 @@ public: capnp::StructSchema getMessageSchema(const FormatSchemaInfo & schema_info); }; +std::pair splitCapnProtoFieldName(const String & name); + bool compareEnumNames(const String & first, const String & second, FormatSettings::EnumComparingMode mode); std::pair getStructBuilderAndFieldByColumnName(capnp::DynamicStruct::Builder struct_builder, const String & name); diff --git a/src/Formats/EscapingRuleUtils.cpp b/src/Formats/EscapingRuleUtils.cpp index 12942bcd13c..398ed727988 100644 --- a/src/Formats/EscapingRuleUtils.cpp +++ b/src/Formats/EscapingRuleUtils.cpp @@ -1,21 +1,11 @@ #include -#include -#include +#include #include #include -#include #include #include -#include #include -#include -#include -#include #include -#include -#include -#include -#include #include #include #include @@ -261,556 +251,82 @@ String readStringByEscapingRule(ReadBuffer & buf, FormatSettings::EscapingRule e return readByEscapingRule(buf, escaping_rule, format_settings); } -void transformInferredTypesIfNeededImpl(DataTypes & types, const FormatSettings & settings, bool is_json, const std::unordered_set * numbers_parsed_from_json_strings = nullptr) -{ - /// Do nothing if we didn't try to infer something special. - if (!settings.try_infer_integers && !settings.try_infer_dates && !settings.try_infer_datetimes && !is_json) - return; - - auto transform_simple_types = [&](DataTypes & data_types) - { - /// If we have floats and integers convert them all to float. - if (settings.try_infer_integers) - { - bool have_floats = false; - bool have_integers = false; - for (const auto & type : data_types) - { - have_floats |= isFloat(type); - have_integers |= isInteger(type) && !isBool(type); - } - - if (have_floats && have_integers) - { - for (auto & type : data_types) - { - if (isInteger(type)) - type = std::make_shared(); - } - } - } - - /// If we have only dates and datetimes, convert dates to datetime. - /// If we have date/datetimes and smth else, convert them to string, because - /// There is a special case when we inferred both Date/DateTime and Int64 from Strings, - /// for example: "arr: ["2020-01-01", "2000"]" -> Tuple(Date, Int64), - /// so if we have Date/DateTime and smth else (not only String) we should - /// convert Date/DateTime back to String, so then we will be able to - /// convert Int64 back to String as well. - if (settings.try_infer_dates || settings.try_infer_datetimes) - { - bool have_dates = false; - bool have_datetimes = false; - bool all_dates_or_datetimes = true; - - for (const auto & type : data_types) - { - have_dates |= isDate(type); - have_datetimes |= isDateTime64(type); - all_dates_or_datetimes &= isDate(type) || isDateTime64(type); - } - - if (!all_dates_or_datetimes && (have_dates || have_datetimes)) - { - for (auto & type : data_types) - { - if (isDate(type) || isDateTime64(type)) - type = std::make_shared(); - } - } - else if (have_dates && have_datetimes) - { - for (auto & type : data_types) - { - if (isDate(type)) - type = std::make_shared(9); - } - } - } - - if (!is_json) - return; - - /// Check settings specific for JSON formats. - - /// If we have numbers and strings, convert numbers to strings. - if (settings.json.try_infer_numbers_from_strings || settings.json.read_numbers_as_strings) - { - bool have_strings = false; - bool have_numbers = false; - for (const auto & type : data_types) - { - have_strings |= isString(type); - have_numbers |= isNumber(type); - } - - if (have_strings && have_numbers) - { - for (auto & type : data_types) - { - if (isNumber(type) - && (settings.json.read_numbers_as_strings || !numbers_parsed_from_json_strings - || numbers_parsed_from_json_strings->contains(type.get()))) - type = std::make_shared(); - } - } - } - - if (settings.json.read_bools_as_numbers) - { - /// Note that have_floats and have_integers both cannot be - /// equal to true as in one of previous checks we convert - /// integers to floats if we have both. - bool have_floats = false; - bool have_integers = false; - bool have_bools = false; - for (const auto & type : data_types) - { - have_floats |= isFloat(type); - have_integers |= isInteger(type) && !isBool(type); - have_bools |= isBool(type); - } - - if (have_bools && (have_integers || have_floats)) - { - for (auto & type : data_types) - { - if (isBool(type)) - { - if (have_integers) - type = std::make_shared(); - else - type = std::make_shared(); - } - } - } - } - }; - - auto transform_complex_types = [&](DataTypes & data_types) - { - if (!is_json) - return; - - bool have_maps = false; - bool have_objects = false; - bool have_strings = false; - bool are_maps_equal = true; - DataTypePtr first_map_type; - for (const auto & type : data_types) - { - if (isMap(type)) - { - if (!have_maps) - { - first_map_type = type; - have_maps = true; - } - else - { - are_maps_equal &= type->equals(*first_map_type); - } - } - else if (isObject(type)) - { - have_objects = true; - } - else if (isString(type)) - { - have_strings = false; - } - } - - if (have_maps && (have_objects || !are_maps_equal)) - { - for (auto & type : data_types) - { - if (isMap(type)) - type = std::make_shared("json", true); - } - } - - if (settings.json.read_objects_as_strings && have_strings && (have_maps || have_objects)) - { - for (auto & type : data_types) - { - if (isMap(type) || isObject(type)) - type = std::make_shared(); - } - } - }; - - transformTypesRecursively(types, transform_simple_types, transform_complex_types); -} - -void transformInferredTypesIfNeeded(DataTypes & types, const FormatSettings & settings, FormatSettings::EscapingRule escaping_rule) -{ - transformInferredTypesIfNeededImpl(types, settings, escaping_rule == FormatSettings::EscapingRule::JSON); -} - -void transformInferredTypesIfNeeded(DataTypePtr & first, DataTypePtr & second, const FormatSettings & settings, FormatSettings::EscapingRule escaping_rule) -{ - DataTypes types = {first, second}; - transformInferredTypesIfNeeded(types, settings, escaping_rule); - first = std::move(types[0]); - second = std::move(types[1]); -} - -void transformInferredJSONTypesIfNeeded(DataTypes & types, const FormatSettings & settings, const std::unordered_set * numbers_parsed_from_json_strings) -{ - transformInferredTypesIfNeededImpl(types, settings, true, numbers_parsed_from_json_strings); -} - -void transformInferredJSONTypesIfNeeded(DataTypePtr & first, DataTypePtr & second, const FormatSettings & settings) -{ - DataTypes types = {first, second}; - transformInferredJSONTypesIfNeeded(types, settings); - first = std::move(types[0]); - second = std::move(types[1]); -} - -bool tryInferDate(const std::string_view & field) -{ - ReadBufferFromString buf(field); - DayNum tmp; - return tryReadDateText(tmp, buf) && buf.eof(); -} - -bool tryInferDateTime(const std::string_view & field, const FormatSettings & settings) -{ - if (field.empty()) - return false; - - ReadBufferFromString buf(field); - Float64 tmp_float; - /// Check if it's just a number, and if so, don't try to infer DateTime from it, - /// because we can interpret this number as a timestamp and it will lead to - /// inferring DateTime instead of simple Int64/Float64 in some cases. - if (tryReadFloatText(tmp_float, buf) && buf.eof()) - return false; - - buf.seek(0, SEEK_SET); /// Return position to the beginning - DateTime64 tmp; - switch (settings.date_time_input_format) - { - case FormatSettings::DateTimeInputFormat::Basic: - if (tryReadDateTime64Text(tmp, 9, buf) && buf.eof()) - return true; - break; - case FormatSettings::DateTimeInputFormat::BestEffort: - if (tryParseDateTime64BestEffort(tmp, 9, buf, DateLUT::instance(), DateLUT::instance("UTC")) && buf.eof()) - return true; - break; - case FormatSettings::DateTimeInputFormat::BestEffortUS: - if (tryParseDateTime64BestEffortUS(tmp, 9, buf, DateLUT::instance(), DateLUT::instance("UTC")) && buf.eof()) - return true; - break; - } - - return false; -} - -DataTypePtr tryInferDateOrDateTime(const std::string_view & field, const FormatSettings & settings) -{ - if (settings.try_infer_dates && tryInferDate(field)) - return makeNullable(std::make_shared()); - - if (settings.try_infer_datetimes && tryInferDateTime(field, settings)) - return makeNullable(std::make_shared(9)); - - return nullptr; -} - -static DataTypePtr determineDataTypeForSingleFieldImpl(ReadBufferFromString & buf, const FormatSettings & settings) -{ - if (buf.eof()) - return nullptr; - - /// Array - if (checkChar('[', buf)) - { - skipWhitespaceIfAny(buf); - - DataTypes nested_types; - bool first = true; - while (!buf.eof() && *buf.position() != ']') - { - if (!first) - { - skipWhitespaceIfAny(buf); - if (!checkChar(',', buf)) - return nullptr; - skipWhitespaceIfAny(buf); - } - else - first = false; - - auto nested_type = determineDataTypeForSingleFieldImpl(buf, settings); - if (!nested_type) - return nullptr; - - nested_types.push_back(nested_type); - } - - if (buf.eof()) - return nullptr; - - ++buf.position(); - - if (nested_types.empty()) - return std::make_shared(std::make_shared()); - - transformInferredTypesIfNeeded(nested_types, settings); - - auto least_supertype = tryGetLeastSupertype(nested_types); - if (!least_supertype) - return nullptr; - - return std::make_shared(least_supertype); - } - - /// Tuple - if (checkChar('(', buf)) - { - skipWhitespaceIfAny(buf); - - DataTypes nested_types; - bool first = true; - while (!buf.eof() && *buf.position() != ')') - { - if (!first) - { - skipWhitespaceIfAny(buf); - if (!checkChar(',', buf)) - return nullptr; - skipWhitespaceIfAny(buf); - } - else - first = false; - - auto nested_type = determineDataTypeForSingleFieldImpl(buf, settings); - if (!nested_type) - return nullptr; - - nested_types.push_back(nested_type); - } - - if (buf.eof() || nested_types.empty()) - return nullptr; - - ++buf.position(); - - return std::make_shared(nested_types); - } - - /// Map - if (checkChar('{', buf)) - { - skipWhitespaceIfAny(buf); - - DataTypes key_types; - DataTypes value_types; - bool first = true; - while (!buf.eof() && *buf.position() != '}') - { - if (!first) - { - skipWhitespaceIfAny(buf); - if (!checkChar(',', buf)) - return nullptr; - skipWhitespaceIfAny(buf); - } - else - first = false; - - auto key_type = determineDataTypeForSingleFieldImpl(buf, settings); - if (!key_type) - return nullptr; - - key_types.push_back(key_type); - - skipWhitespaceIfAny(buf); - if (!checkChar(':', buf)) - return nullptr; - skipWhitespaceIfAny(buf); - - auto value_type = determineDataTypeForSingleFieldImpl(buf, settings); - if (!value_type) - return nullptr; - - value_types.push_back(value_type); - } - - if (buf.eof()) - return nullptr; - - ++buf.position(); - skipWhitespaceIfAny(buf); - - if (key_types.empty()) - return std::make_shared(std::make_shared(), std::make_shared()); - - transformInferredTypesIfNeeded(key_types, settings); - transformInferredTypesIfNeeded(value_types, settings); - - auto key_least_supertype = tryGetLeastSupertype(key_types); - - auto value_least_supertype = tryGetLeastSupertype(value_types); - if (!key_least_supertype || !value_least_supertype) - return nullptr; - - if (!DataTypeMap::checkKeyType(key_least_supertype)) - return nullptr; - - return std::make_shared(key_least_supertype, value_least_supertype); - } - - /// String - if (*buf.position() == '\'') - { - ++buf.position(); - String field; - while (!buf.eof()) - { - char * next_pos = find_first_symbols<'\\', '\''>(buf.position(), buf.buffer().end()); - field.append(buf.position(), next_pos); - buf.position() = next_pos; - - if (!buf.hasPendingData()) - continue; - - if (*buf.position() == '\'') - break; - - field.push_back(*buf.position()); - if (*buf.position() == '\\') - ++buf.position(); - } - - if (buf.eof()) - return nullptr; - - ++buf.position(); - if (auto type = tryInferDateOrDateTime(field, settings)) - return type; - - return std::make_shared(); - } - - /// Bool - if (checkStringCaseInsensitive("true", buf) || checkStringCaseInsensitive("false", buf)) - return DataTypeFactory::instance().get("Bool"); - - /// Null - if (checkStringCaseInsensitive("NULL", buf)) - return std::make_shared(); - - /// Number - Float64 tmp; - auto * pos_before_float = buf.position(); - if (tryReadFloatText(tmp, buf)) - { - if (settings.try_infer_integers) - { - auto * float_end_pos = buf.position(); - buf.position() = pos_before_float; - Int64 tmp_int; - if (tryReadIntText(tmp_int, buf) && buf.position() == float_end_pos) - return std::make_shared(); - - buf.position() = float_end_pos; - } - - return std::make_shared(); - } - - return nullptr; -} - -static DataTypePtr determineDataTypeForSingleField(ReadBufferFromString & buf, const FormatSettings & settings) -{ - return makeNullableRecursivelyAndCheckForNothing(determineDataTypeForSingleFieldImpl(buf, settings)); -} - -DataTypePtr determineDataTypeByEscapingRule(const String & field, const FormatSettings & format_settings, FormatSettings::EscapingRule escaping_rule) +DataTypePtr tryInferDataTypeByEscapingRule(const String & field, const FormatSettings & format_settings, FormatSettings::EscapingRule escaping_rule, JSONInferenceInfo * json_info) { switch (escaping_rule) { case FormatSettings::EscapingRule::Quoted: - { - ReadBufferFromString buf(field); - auto type = determineDataTypeForSingleField(buf, format_settings); - return buf.eof() ? type : nullptr; - } + return tryInferDataTypeForSingleField(field, format_settings); case FormatSettings::EscapingRule::JSON: - return JSONUtils::getDataTypeFromField(field, format_settings); + return tryInferDataTypeForSingleJSONField(field, format_settings, json_info); case FormatSettings::EscapingRule::CSV: { if (!format_settings.csv.use_best_effort_in_schema_inference) - return makeNullable(std::make_shared()); + return std::make_shared(); - if (field.empty() || field == format_settings.csv.null_representation) + if (field.empty()) return nullptr; - if (field == format_settings.bool_false_representation || field == format_settings.bool_true_representation) - return DataTypeFactory::instance().get("Nullable(Bool)"); + if (field == format_settings.csv.null_representation) + return makeNullable(std::make_shared()); + if (field == format_settings.bool_false_representation || field == format_settings.bool_true_representation) + return DataTypeFactory::instance().get("Bool"); + + /// In CSV complex types are serialized in quotes. If we have quotes, we should try to infer type + /// from data inside quotes. if (field.size() > 1 && ((field.front() == '\'' && field.back() == '\'') || (field.front() == '"' && field.back() == '"'))) { auto data = std::string_view(field.data() + 1, field.size() - 2); - if (auto date_type = tryInferDateOrDateTime(data, format_settings)) + /// First, try to infer dates and datetimes. + if (auto date_type = tryInferDateOrDateTimeFromString(data, format_settings)) return date_type; - ReadBufferFromString buf(data); /// Try to determine the type of value inside quotes - auto type = determineDataTypeForSingleField(buf, format_settings); + auto type = tryInferDataTypeForSingleField(data, format_settings); - if (!type) - return nullptr; - - /// If it's a number or tuple in quotes or there is some unread data in buffer, we determine it as a string. - if (isNumber(removeNullable(type)) || isTuple(type) || !buf.eof()) - return makeNullable(std::make_shared()); + /// If we couldn't infer any type or it's a number or tuple in quotes, we determine it as a string. + if (!type || isNumber(removeNullable(type)) || isTuple(type)) + return std::make_shared(); return type; } /// Case when CSV value is not in quotes. Check if it's a number, and if not, determine it's as a string. - if (format_settings.try_infer_integers) - { - ReadBufferFromString buf(field); - Int64 tmp_int; - if (tryReadIntText(tmp_int, buf) && buf.eof()) - return makeNullable(std::make_shared()); - } + auto type = tryInferNumberFromString(field, format_settings); - ReadBufferFromString buf(field); - Float64 tmp; - if (tryReadFloatText(tmp, buf) && buf.eof()) - return makeNullable(std::make_shared()); + if (!type) + return std::make_shared(); - return makeNullable(std::make_shared()); + return type; } case FormatSettings::EscapingRule::Raw: [[fallthrough]]; case FormatSettings::EscapingRule::Escaped: { if (!format_settings.tsv.use_best_effort_in_schema_inference) - return makeNullable(std::make_shared()); + return std::make_shared(); - if (field.empty() || field == format_settings.tsv.null_representation) + if (field.empty()) return nullptr; - if (field == format_settings.bool_false_representation || field == format_settings.bool_true_representation) - return DataTypeFactory::instance().get("Nullable(Bool)"); + if (field == format_settings.tsv.null_representation) + return makeNullable(std::make_shared()); - if (auto date_type = tryInferDateOrDateTime(field, format_settings)) + if (field == format_settings.bool_false_representation || field == format_settings.bool_true_representation) + return DataTypeFactory::instance().get("Bool"); + + if (auto date_type = tryInferDateOrDateTimeFromString(field, format_settings)) return date_type; - ReadBufferFromString buf(field); - auto type = determineDataTypeForSingleField(buf, format_settings); - if (!buf.eof()) - return makeNullable(std::make_shared()); + /// Special case when we have number that starts with 0. In TSV we don't parse such numbers, + /// see readIntTextUnsafe in ReadHelpers.h. If we see data started with 0, we can determine it + /// as a String, so parsing won't fail. + if (field[0] == '0' && field.size() != 1) + return std::make_shared(); + auto type = tryInferDataTypeForSingleField(field, format_settings); + if (!type) + return std::make_shared(); return type; } default: @@ -818,15 +334,34 @@ DataTypePtr determineDataTypeByEscapingRule(const String & field, const FormatSe } } -DataTypes determineDataTypesByEscapingRule(const std::vector & fields, const FormatSettings & format_settings, FormatSettings::EscapingRule escaping_rule) +DataTypes tryInferDataTypesByEscapingRule(const std::vector & fields, const FormatSettings & format_settings, FormatSettings::EscapingRule escaping_rule, JSONInferenceInfo * json_info) { DataTypes data_types; data_types.reserve(fields.size()); for (const auto & field : fields) - data_types.push_back(determineDataTypeByEscapingRule(field, format_settings, escaping_rule)); + data_types.push_back(tryInferDataTypeByEscapingRule(field, format_settings, escaping_rule, json_info)); return data_types; } +void transformInferredTypesByEscapingRuleIfNeeded(DataTypePtr & first, DataTypePtr & second, const FormatSettings & settings, FormatSettings::EscapingRule escaping_rule, JSONInferenceInfo * json_info) +{ + switch (escaping_rule) + { + case FormatSettings::EscapingRule::JSON: + transformInferredJSONTypesIfNeeded(first, second, settings, json_info); + break; + case FormatSettings::EscapingRule::Escaped: [[fallthrough]]; + case FormatSettings::EscapingRule::Raw: [[fallthrough]]; + case FormatSettings::EscapingRule::Quoted: [[fallthrough]]; + case FormatSettings::EscapingRule::CSV: + transformInferredTypesIfNeeded(first, second, settings); + break; + default: + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot transform inferred types for value with {} escaping rule", escapingRuleToString(escaping_rule)); + } +} + + DataTypePtr getDefaultDataTypeForEscapingRule(FormatSettings::EscapingRule escaping_rule) { switch (escaping_rule) @@ -834,7 +369,7 @@ DataTypePtr getDefaultDataTypeForEscapingRule(FormatSettings::EscapingRule escap case FormatSettings::EscapingRule::CSV: case FormatSettings::EscapingRule::Escaped: case FormatSettings::EscapingRule::Raw: - return makeNullable(std::make_shared()); + return std::make_shared(); default: return nullptr; } @@ -851,9 +386,10 @@ DataTypes getDefaultDataTypeForEscapingRules(const std::vector +#include #include #include #include @@ -38,45 +39,17 @@ String readFieldByEscapingRule(ReadBuffer & buf, FormatSettings::EscapingRule es /// Try to determine the type of the field written by a specific escaping rule. /// If cannot, return nullptr. -/// - For Quoted escaping rule we can interpret a single field as a constant -/// expression and get it's type by evaluation this expression. -/// - For JSON escaping rule we can use JSON parser to parse a single field -/// and then convert JSON type of this field to ClickHouse type. -/// - For CSV escaping rule we can do the next: -/// - If the field is an unquoted string, then we try to parse it as a number, -/// and if we cannot, treat it as a String. -/// - If the field is a string in quotes, then we try to use some -/// tweaks and heuristics to determine the type inside quotes, and if we can't or -/// the result is a number or tuple (we don't parse numbers in quotes and don't -/// support tuples in CSV) we treat it as a String. -/// - If input_format_csv_use_best_effort_in_schema_inference is disabled, we -/// treat everything as a string. -/// - For TSV and TSVRaw we try to use some tweaks and heuristics to determine the type -/// of value if setting input_format_tsv_use_best_effort_in_schema_inference is enabled, -/// otherwise we treat everything as a string. -DataTypePtr determineDataTypeByEscapingRule(const String & field, const FormatSettings & format_settings, FormatSettings::EscapingRule escaping_rule); -DataTypes determineDataTypesByEscapingRule(const std::vector & fields, const FormatSettings & format_settings, FormatSettings::EscapingRule escaping_rule); +/// See tryInferDataTypeForSingle(JSON)Field in SchemaInferenceUtils.h +DataTypePtr tryInferDataTypeByEscapingRule(const String & field, const FormatSettings & format_settings, FormatSettings::EscapingRule escaping_rule, JSONInferenceInfo * json_info = nullptr); +DataTypes tryInferDataTypesByEscapingRule(const std::vector & fields, const FormatSettings & format_settings, FormatSettings::EscapingRule escaping_rule, JSONInferenceInfo * json_info = nullptr); + +/// Check if we need to transform types inferred from data and transform it if necessary. +/// See transformInferred(JSON)TypesIfNeeded in SchemaInferenceUtils.h +void transformInferredTypesByEscapingRuleIfNeeded(DataTypePtr & first, DataTypePtr & second, const FormatSettings & settings, FormatSettings::EscapingRule escaping_rule, JSONInferenceInfo * json_info = nullptr); DataTypePtr getDefaultDataTypeForEscapingRule(FormatSettings::EscapingRule escaping_rule); DataTypes getDefaultDataTypeForEscapingRules(const std::vector & escaping_rules); -/// Try to infer Date or Datetime from string if corresponding settings are enabled. -DataTypePtr tryInferDateOrDateTime(const std::string_view & field, const FormatSettings & settings); - -/// Check if we need to transform types inferred from data and transform it if necessary. -/// It's used when we try to infer some not ordinary types from another types. -/// For example dates from strings, we should check if dates were inferred from all strings -/// in the same way and if not, transform inferred dates back to strings. -/// For example, if we have array of strings and we tried to infer dates from them, -/// to make the result type Array(Date) we should ensure that all strings were -/// successfully parsed as dated and if not, convert all dates back to strings and make result type Array(String). -void transformInferredTypesIfNeeded(DataTypes & types, const FormatSettings & settings, FormatSettings::EscapingRule escaping_rule = FormatSettings::EscapingRule::Escaped); -void transformInferredTypesIfNeeded(DataTypePtr & first, DataTypePtr & second, const FormatSettings & settings, FormatSettings::EscapingRule escaping_rule = FormatSettings::EscapingRule::Escaped); - -/// Same as transformInferredTypesIfNeeded but takes into account settings that are special for JSON formats. -void transformInferredJSONTypesIfNeeded(DataTypes & types, const FormatSettings & settings, const std::unordered_set * numbers_parsed_from_json_strings = nullptr); -void transformInferredJSONTypesIfNeeded(DataTypePtr & first, DataTypePtr & second, const FormatSettings & settings); - String getAdditionalFormatInfoForAllRowBasedFormats(const FormatSettings & settings); String getAdditionalFormatInfoByEscapingRule(const FormatSettings & settings, FormatSettings::EscapingRule escaping_rule); diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp index 0fd9109b0d7..4b6ee379de6 100644 --- a/src/Formats/FormatFactory.cpp +++ b/src/Formats/FormatFactory.cpp @@ -169,6 +169,7 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings) format_settings.max_rows_to_read_for_schema_inference = settings.input_format_max_rows_to_read_for_schema_inference; format_settings.column_names_for_schema_inference = settings.column_names_for_schema_inference; format_settings.schema_inference_hints = settings.schema_inference_hints; + format_settings.schema_inference_make_columns_nullable = settings.schema_inference_make_columns_nullable; format_settings.mysql_dump.table_name = settings.input_format_mysql_dump_table_name; format_settings.mysql_dump.map_column_names = settings.input_format_mysql_dump_map_column_names; format_settings.sql_insert.max_batch_size = settings.output_format_sql_insert_max_batch_size; @@ -182,6 +183,7 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings) format_settings.bson.output_string_as_string = settings.output_format_bson_string_as_string; format_settings.bson.skip_fields_with_unsupported_types_in_schema_inference = settings.input_format_bson_skip_fields_with_unsupported_types_in_schema_inference; format_settings.max_binary_string_size = settings.format_binary_max_string_size; + format_settings.max_parser_depth = context->getSettingsRef().max_parser_depth; /// Validate avro_schema_registry_url with RemoteHostFilter when non-empty and in Server context if (format_settings.schema.is_server) @@ -315,6 +317,9 @@ static void addExistingProgressToOutputFormat(OutputFormatPtr format, ContextPtr auto current_progress = element_id->getProgressIn(); Progress read_progress{current_progress.read_rows, current_progress.read_bytes, current_progress.total_rows_to_read}; format->onProgress(read_progress); + + /// Update the start of the statistics to use the start of the query, and not the creation of the format class + format->setStartTime(element_id->getQueryCPUStartTime(), true); } } diff --git a/src/Formats/FormatSettings.h b/src/Formats/FormatSettings.h index 963213f31ad..250601c3bf0 100644 --- a/src/Formats/FormatSettings.h +++ b/src/Formats/FormatSettings.h @@ -71,6 +71,8 @@ struct FormatSettings Raw }; + bool schema_inference_make_columns_nullable = true; + DateTimeOutputFormat date_time_output_format = DateTimeOutputFormat::Simple; bool input_format_ipv4_default_on_conversion_error = false; @@ -81,6 +83,8 @@ struct FormatSettings UInt64 max_binary_string_size = 0; + UInt64 max_parser_depth = DBMS_DEFAULT_MAX_PARSER_DEPTH; + struct { UInt64 row_group_size = 1000000; diff --git a/src/Formats/JSONUtils.cpp b/src/Formats/JSONUtils.cpp index d77cd14bd38..16f275ed6b8 100644 --- a/src/Formats/JSONUtils.cpp +++ b/src/Formats/JSONUtils.cpp @@ -6,19 +6,13 @@ #include #include #include -#include -#include -#include -#include -#include #include #include -#include -#include -#include #include +#include + namespace DB { @@ -122,206 +116,6 @@ namespace JSONUtils return {loadAtPosition(in, memory, pos), number_of_rows}; } - template - static String readJSONEachRowLineIntoStringImpl(ReadBuffer & in) - { - Memory memory; - fileSegmentationEngineJSONEachRowImpl(in, memory, 0, 1, 1); - return String(memory.data(), memory.size()); - } - - template - DataTypePtr getDataTypeFromFieldImpl(const Element & field, const FormatSettings & settings, std::unordered_set & numbers_parsed_from_json_strings) - { - if (field.isNull()) - return nullptr; - - if (field.isBool()) - return DataTypeFactory::instance().get("Nullable(Bool)"); - - if (field.isInt64() || field.isUInt64()) - { - if (settings.try_infer_integers) - return makeNullable(std::make_shared()); - - return makeNullable(std::make_shared()); - } - - if (field.isDouble()) - return makeNullable(std::make_shared()); - - if (field.isString()) - { - if (auto date_type = tryInferDateOrDateTime(field.getString(), settings)) - return date_type; - - if (!settings.json.try_infer_numbers_from_strings) - return makeNullable(std::make_shared()); - - ReadBufferFromString buf(field.getString()); - - if (settings.try_infer_integers) - { - Int64 tmp_int; - if (tryReadIntText(tmp_int, buf) && buf.eof()) - { - auto type = std::make_shared(); - numbers_parsed_from_json_strings.insert(type.get()); - return makeNullable(type); - } - } - - Float64 tmp; - if (tryReadFloatText(tmp, buf) && buf.eof()) - { - auto type = std::make_shared(); - numbers_parsed_from_json_strings.insert(type.get()); - return makeNullable(type); - } - - return makeNullable(std::make_shared()); - } - - if (field.isArray()) - { - auto array = field.getArray(); - - /// Return nullptr in case of empty array because we cannot determine nested type. - if (array.size() == 0) - return nullptr; - - DataTypes nested_data_types; - /// If this array contains fields with different types we will treat it as Tuple. - bool are_types_the_same = true; - for (const auto element : array) - { - auto type = getDataTypeFromFieldImpl(element, settings, numbers_parsed_from_json_strings); - if (!type) - return nullptr; - - if (!nested_data_types.empty() && !type->equals(*nested_data_types.back())) - are_types_the_same = false; - - nested_data_types.push_back(std::move(type)); - } - - if (!are_types_the_same) - { - auto nested_types_copy = nested_data_types; - transformInferredJSONTypesIfNeeded(nested_types_copy, settings, &numbers_parsed_from_json_strings); - are_types_the_same = true; - for (size_t i = 1; i < nested_types_copy.size(); ++i) - are_types_the_same &= nested_types_copy[i]->equals(*nested_types_copy[i - 1]); - - if (are_types_the_same) - nested_data_types = std::move(nested_types_copy); - } - - if (!are_types_the_same) - return std::make_shared(nested_data_types); - - return std::make_shared(nested_data_types.back()); - } - - if (field.isObject()) - { - auto object = field.getObject(); - DataTypes value_types; - for (const auto key_value_pair : object) - { - auto type = getDataTypeFromFieldImpl(key_value_pair.second, settings, numbers_parsed_from_json_strings); - if (!type) - { - /// If we couldn't infer nested type and Object type is not enabled, - /// we can't determine the type of this JSON field. - if (!settings.json.try_infer_objects) - { - /// If read_objects_as_strings is enabled, we can read objects into strings. - if (settings.json.read_objects_as_strings) - return makeNullable(std::make_shared()); - return nullptr; - } - - continue; - } - - if (settings.json.try_infer_objects && isObject(type)) - return std::make_shared("json", true); - - value_types.push_back(type); - } - - if (value_types.empty()) - return nullptr; - - transformInferredJSONTypesIfNeeded(value_types, settings, &numbers_parsed_from_json_strings); - bool are_types_equal = true; - for (size_t i = 1; i < value_types.size(); ++i) - are_types_equal &= value_types[i]->equals(*value_types[0]); - - if (!are_types_equal) - { - if (!settings.json.try_infer_objects) - { - /// If read_objects_as_strings is enabled, we can read objects into strings. - if (settings.json.read_objects_as_strings) - return makeNullable(std::make_shared()); - return nullptr; - } - return std::make_shared("json", true); - } - - return std::make_shared(std::make_shared(), value_types[0]); - } - - throw Exception{ErrorCodes::INCORRECT_DATA, "Unexpected JSON type"}; - } - - auto getJSONParserAndElement() - { -#if USE_SIMDJSON - return std::pair(); -#elif USE_RAPIDJSON - return std::pair(); -#else - return std::pair(); -#endif - } - - DataTypePtr getDataTypeFromField(const String & field, const FormatSettings & settings) - { - auto [parser, element] = getJSONParserAndElement(); - bool parsed = parser.parse(field, element); - if (!parsed) - throw Exception(ErrorCodes::INCORRECT_DATA, "Cannot parse JSON object here: {}", field); - - std::unordered_set numbers_parsed_from_json_strings; - return getDataTypeFromFieldImpl(element, settings, numbers_parsed_from_json_strings); - } - - template - static DataTypes determineColumnDataTypesFromJSONEachRowDataImpl(ReadBuffer & in, const FormatSettings & settings, bool /*json_strings*/, Extractor & extractor) - { - String line = readJSONEachRowLineIntoStringImpl(in); - auto [parser, element] = getJSONParserAndElement(); - bool parsed = parser.parse(line, element); - if (!parsed) - throw Exception(ErrorCodes::INCORRECT_DATA, "Cannot parse JSON object here: {}", line); - - auto fields = extractor.extract(element); - - DataTypes data_types; - data_types.reserve(fields.size()); - std::unordered_set numbers_parsed_from_json_strings; - for (const auto & field : fields) - data_types.push_back(getDataTypeFromFieldImpl(field, settings, numbers_parsed_from_json_strings)); - - /// TODO: For JSONStringsEachRow/JSONCompactStringsEach all types will be strings. - /// Should we try to parse data inside strings somehow in this case? - - return data_types; - } - std::pair fileSegmentationEngineJSONEachRow(ReadBuffer & in, DB::Memory<> & memory, size_t min_bytes, size_t max_rows) { return fileSegmentationEngineJSONEachRowImpl<'{', '}'>(in, memory, min_bytes, 1, max_rows); @@ -333,68 +127,56 @@ namespace JSONUtils return fileSegmentationEngineJSONEachRowImpl<'[', ']'>(in, memory, min_bytes, min_rows, max_rows); } - struct JSONEachRowFieldsExtractor + NamesAndTypesList readRowAndGetNamesAndDataTypesForJSONEachRow(ReadBuffer & in, const FormatSettings & settings, JSONInferenceInfo * inference_info) { - template - std::vector extract(const Element & element) + skipWhitespaceIfAny(in); + assertChar('{', in); + bool first = true; + NamesAndTypesList names_and_types; + String field; + while (!in.eof() && *in.position() != '}') { - /// {..., "" : , ...} + if (!first) + skipComma(in); + else + first = false; - if (!element.isObject()) - throw Exception(ErrorCodes::INCORRECT_DATA, "Root JSON value is not an object"); - - auto object = element.getObject(); - std::vector fields; - fields.reserve(object.size()); - column_names.reserve(object.size()); - for (const auto & key_value_pair : object) - { - column_names.emplace_back(key_value_pair.first); - fields.push_back(key_value_pair.second); - } - - return fields; + auto name = readFieldName(in); + auto type = tryInferDataTypeForSingleJSONField(in, settings, inference_info); + names_and_types.emplace_back(name, type); } - std::vector column_names; - }; + if (in.eof()) + throw Exception(ErrorCodes::INCORRECT_DATA, "Unexpected EOF while reading JSON object"); - NamesAndTypesList readRowAndGetNamesAndDataTypesForJSONEachRow(ReadBuffer & in, const FormatSettings & settings, bool json_strings) - { - JSONEachRowFieldsExtractor extractor; - auto data_types - = determineColumnDataTypesFromJSONEachRowDataImpl(in, settings, json_strings, extractor); - NamesAndTypesList result; - for (size_t i = 0; i != extractor.column_names.size(); ++i) - result.emplace_back(extractor.column_names[i], data_types[i]); - return result; + assertChar('}', in); + return names_and_types; } - struct JSONCompactEachRowFieldsExtractor + DataTypes readRowAndGetDataTypesForJSONCompactEachRow(ReadBuffer & in, const FormatSettings & settings, JSONInferenceInfo * inference_info) { - template - std::vector extract(const Element & element) + skipWhitespaceIfAny(in); + assertChar('[', in); + bool first = true; + DataTypes types; + String field; + while (!in.eof() && *in.position() != ']') { - /// [..., , ...] - if (!element.isArray()) - throw Exception(ErrorCodes::INCORRECT_DATA, "Root JSON value is not an array"); - - auto array = element.getArray(); - std::vector fields; - fields.reserve(array.size()); - for (size_t i = 0; i != array.size(); ++i) - fields.push_back(array[i]); - return fields; + if (!first) + skipComma(in); + else + first = false; + auto type = tryInferDataTypeForSingleJSONField(in, settings, inference_info); + types.push_back(std::move(type)); } - }; - DataTypes readRowAndGetDataTypesForJSONCompactEachRow(ReadBuffer & in, const FormatSettings & settings, bool json_strings) - { - JSONCompactEachRowFieldsExtractor extractor; - return determineColumnDataTypesFromJSONEachRowDataImpl(in, settings, json_strings, extractor); + if (in.eof()) + throw Exception(ErrorCodes::INCORRECT_DATA, "Unexpected EOF while reading JSON array"); + + assertChar(']', in); + return types; } - bool nonTrivialPrefixAndSuffixCheckerJSONEachRowImpl(ReadBuffer & buf) { /// For JSONEachRow we can safely skip whitespace characters diff --git a/src/Formats/JSONUtils.h b/src/Formats/JSONUtils.h index d22b446d16d..5835e364c2b 100644 --- a/src/Formats/JSONUtils.h +++ b/src/Formats/JSONUtils.h @@ -13,24 +13,21 @@ namespace DB { +struct JSONInferenceInfo; + namespace JSONUtils { std::pair fileSegmentationEngineJSONEachRow(ReadBuffer & in, DB::Memory<> & memory, size_t min_bytes, size_t max_rows); std::pair fileSegmentationEngineJSONCompactEachRow(ReadBuffer & in, DB::Memory<> & memory, size_t min_bytes, size_t min_rows, size_t max_rows); - /// Parse JSON from string and convert it's type to ClickHouse type. Make the result type always Nullable. - /// JSON array with different nested types is treated as Tuple. - /// If cannot convert (for example when field contains null), return nullptr. - DataTypePtr getDataTypeFromField(const String & field, const FormatSettings & settings); - /// Read row in JSONEachRow format and try to determine type for each field. /// Return list of names and types. /// If cannot determine the type of some field, return nullptr for it. - NamesAndTypesList readRowAndGetNamesAndDataTypesForJSONEachRow(ReadBuffer & in, const FormatSettings & settings, bool json_strings); + NamesAndTypesList readRowAndGetNamesAndDataTypesForJSONEachRow(ReadBuffer & in, const FormatSettings & settings, JSONInferenceInfo * inference_info); /// Read row in JSONCompactEachRow format and try to determine type for each field. /// If cannot determine the type of some field, return nullptr for it. - DataTypes readRowAndGetDataTypesForJSONCompactEachRow(ReadBuffer & in, const FormatSettings & settings, bool json_strings); + DataTypes readRowAndGetDataTypesForJSONCompactEachRow(ReadBuffer & in, const FormatSettings & settings, JSONInferenceInfo * inference_info); bool nonTrivialPrefixAndSuffixCheckerJSONEachRowImpl(ReadBuffer & buf); diff --git a/src/Formats/NativeWriter.cpp b/src/Formats/NativeWriter.cpp index c4dea371afd..e932bb88c2d 100644 --- a/src/Formats/NativeWriter.cpp +++ b/src/Formats/NativeWriter.cpp @@ -64,8 +64,10 @@ static void writeData(const ISerialization & serialization, const ColumnPtr & co } -void NativeWriter::write(const Block & block) +size_t NativeWriter::write(const Block & block) { + size_t written_before = ostr.count(); + /// Additional information about the block. if (client_revision > 0) block.info.write(ostr); @@ -161,6 +163,10 @@ void NativeWriter::write(const Block & block) if (index) index->blocks.emplace_back(std::move(index_block)); + + size_t written_after = ostr.count(); + size_t written_size = written_after - written_before; + return written_size; } } diff --git a/src/Formats/NativeWriter.h b/src/Formats/NativeWriter.h index 010a03ec722..7bb377d2e4a 100644 --- a/src/Formats/NativeWriter.h +++ b/src/Formats/NativeWriter.h @@ -27,7 +27,9 @@ public: IndexForNativeFormat * index_ = nullptr, size_t initial_size_of_file_ = 0); Block getHeader() const { return header; } - void write(const Block & block); + + /// Returns the number of bytes written. + size_t write(const Block & block); void flush(); static String getContentType() { return "application/octet-stream"; } diff --git a/src/Formats/ProtobufSerializer.cpp b/src/Formats/ProtobufSerializer.cpp index 2f56c4242e5..48332deedfb 100644 --- a/src/Formats/ProtobufSerializer.cpp +++ b/src/Formats/ProtobufSerializer.cpp @@ -1736,7 +1736,7 @@ namespace } const std::shared_ptr aggregate_function_data_type; - const AggregateFunctionPtr aggregate_function; + AggregateFunctionPtr aggregate_function; String text_buffer; }; diff --git a/src/Formats/ReadSchemaUtils.cpp b/src/Formats/ReadSchemaUtils.cpp index 8468f540253..05323ed72ee 100644 --- a/src/Formats/ReadSchemaUtils.cpp +++ b/src/Formats/ReadSchemaUtils.cpp @@ -197,69 +197,6 @@ ColumnsDescription readSchemaFromFormat(const String & format_name, const std::o return readSchemaFromFormat(format_name, format_settings, read_buffer_iterator, retry, context, buf_out); } -DataTypePtr makeNullableRecursivelyAndCheckForNothing(DataTypePtr type) -{ - if (!type) - return nullptr; - - WhichDataType which(type); - - if (which.isNothing()) - return nullptr; - - if (which.isNullable()) - { - const auto * nullable_type = assert_cast(type.get()); - return makeNullableRecursivelyAndCheckForNothing(nullable_type->getNestedType()); - } - - if (which.isArray()) - { - const auto * array_type = assert_cast(type.get()); - auto nested_type = makeNullableRecursivelyAndCheckForNothing(array_type->getNestedType()); - return nested_type ? std::make_shared(nested_type) : nullptr; - } - - if (which.isTuple()) - { - const auto * tuple_type = assert_cast(type.get()); - DataTypes nested_types; - for (const auto & element : tuple_type->getElements()) - { - auto nested_type = makeNullableRecursivelyAndCheckForNothing(element); - if (!nested_type) - return nullptr; - nested_types.push_back(nested_type); - } - return std::make_shared(std::move(nested_types)); - } - - if (which.isMap()) - { - const auto * map_type = assert_cast(type.get()); - auto key_type = makeNullableRecursivelyAndCheckForNothing(map_type->getKeyType()); - auto value_type = makeNullableRecursivelyAndCheckForNothing(map_type->getValueType()); - return key_type && value_type ? std::make_shared(removeNullable(key_type), value_type) : nullptr; - } - - if (which.isLowCarnality()) - { - const auto * lc_type = assert_cast(type.get()); - auto nested_type = makeNullableRecursivelyAndCheckForNothing(lc_type->getDictionaryType()); - return nested_type ? std::make_shared(nested_type) : nullptr; - } - - return makeNullable(type); -} - -NamesAndTypesList getNamesAndRecursivelyNullableTypes(const Block & header) -{ - NamesAndTypesList result; - for (auto & [name, type] : header.getNamesAndTypesList()) - result.emplace_back(name, makeNullableRecursivelyAndCheckForNothing(type)); - return result; -} - SchemaCache::Key getKeyForSchemaCache(const String & source, const String & format, const std::optional & format_settings, const ContextPtr & context) { return getKeysForSchemaCache({source}, format, format_settings, context).front(); diff --git a/src/Formats/ReadSchemaUtils.h b/src/Formats/ReadSchemaUtils.h index 6e731d9dd9e..82fbb3f7c46 100644 --- a/src/Formats/ReadSchemaUtils.h +++ b/src/Formats/ReadSchemaUtils.h @@ -35,21 +35,7 @@ ColumnsDescription readSchemaFromFormat( ContextPtr & context, std::unique_ptr & buf_out); -/// Make type Nullable recursively: -/// - Type -> Nullable(type) -/// - Array(Type) -> Array(Nullable(Type)) -/// - Tuple(Type1, ..., TypeN) -> Tuple(Nullable(Type1), ..., Nullable(TypeN)) -/// - Map(KeyType, ValueType) -> Map(KeyType, Nullable(ValueType)) -/// - LowCardinality(Type) -> LowCardinality(Nullable(Type)) -/// If type is Nothing or one of the nested types is Nothing, return nullptr. -DataTypePtr makeNullableRecursivelyAndCheckForNothing(DataTypePtr type); - -/// Call makeNullableRecursivelyAndCheckForNothing for all types -/// in the block and return names and types. -NamesAndTypesList getNamesAndRecursivelyNullableTypes(const Block & header); - SchemaCache::Key getKeyForSchemaCache(const String & source, const String & format, const std::optional & format_settings, const ContextPtr & context); SchemaCache::Keys getKeysForSchemaCache(const Strings & sources, const String & format, const std::optional & format_settings, const ContextPtr & context); -void splitSchemaCacheKey(const String & key, String & source, String & format, String & additional_format_info); } diff --git a/src/Formats/SchemaInferenceUtils.cpp b/src/Formats/SchemaInferenceUtils.cpp new file mode 100644 index 00000000000..85bb5d0ebcb --- /dev/null +++ b/src/Formats/SchemaInferenceUtils.cpp @@ -0,0 +1,1038 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int TOO_DEEP_RECURSION; +} + +namespace +{ + bool checkIfTypesAreEqual(const DataTypes & types) + { + if (types.empty()) + return true; + + for (size_t i = 1; i < types.size(); ++i) + { + if (!types[0]->equals(*types[i])) + return false; + } + return true; + } + + /// If we have both Nothing and non Nothing types, convert all Nothing types to the first non Nothing. + /// For example if we have types [Nothing, String, Nothing] we change it to [String, String, String] + void transformNothingSimpleTypes(DataTypes & data_types, const TypeIndexesSet & type_indexes) + { + /// Check if we have both Nothing and non Nothing types. + if (!type_indexes.contains(TypeIndex::Nothing) || type_indexes.size() <= 1) + return; + + DataTypePtr not_nothing_type = nullptr; + for (const auto & type : data_types) + { + if (!isNothing(type)) + { + not_nothing_type = type; + break; + } + } + + for (auto & type : data_types) + { + if (isNothing(type)) + type = not_nothing_type; + } + } + + /// If we have both Int64 and Float64 types, convert all Int64 to Float64. + void transformIntegersAndFloatsToFloats(DataTypes & data_types, const TypeIndexesSet & type_indexes) + { + if (!type_indexes.contains(TypeIndex::Int64) || !type_indexes.contains(TypeIndex::Float64)) + return; + + for (auto & type : data_types) + { + if (isInteger(type)) + type = std::make_shared(); + } + } + + /// If we have only Date and DateTime types, convert Date to DateTime, + /// otherwise, convert all Date and DateTime to String. + void transformDatesAndDateTimes(DataTypes & data_types, const TypeIndexesSet & type_indexes) + { + bool have_dates = type_indexes.contains(TypeIndex::Date); + bool have_datetimes = type_indexes.contains(TypeIndex::DateTime64); + bool all_dates_or_datetimes = (type_indexes.size() == (static_cast(have_dates) + static_cast(have_datetimes))); + + if (!all_dates_or_datetimes && (have_dates || have_datetimes)) + { + for (auto & type : data_types) + { + if (isDate(type) || isDateTime64(type)) + type = std::make_shared(); + } + + return; + } + + if (have_dates && have_datetimes) + { + for (auto & type : data_types) + { + if (isDate(type)) + type = std::make_shared(9); + } + } + } + + /// If we have numbers (Int64/Float64) and String types and numbers were parsed from String, + /// convert all numbers to String. + void transformJSONNumbersBackToString( + DataTypes & data_types, const FormatSettings & settings, const TypeIndexesSet & type_indexes, JSONInferenceInfo * json_info) + { + bool have_strings = type_indexes.contains(TypeIndex::String); + bool have_numbers = type_indexes.contains(TypeIndex::Int64) || type_indexes.contains(TypeIndex::Float64); + if (!have_strings || !have_numbers) + return; + + for (auto & type : data_types) + { + if (isNumber(type) + && (settings.json.read_numbers_as_strings || !json_info + || json_info->numbers_parsed_from_json_strings.contains(type.get()))) + type = std::make_shared(); + } + } + + /// If we have both Bool and number (Int64/Float64) types, + /// convert all Bool to Int64/Float64. + void transformBoolsAndNumbersToNumbers(DataTypes & data_types, const TypeIndexesSet & type_indexes) + { + bool have_floats = type_indexes.contains(TypeIndex::Float64); + bool have_integers = type_indexes.contains(TypeIndex::Int64); + bool have_bools = type_indexes.contains(TypeIndex::UInt8); + /// Check if we have both Bool and Integer/Float. + if (!have_bools || (!have_integers && !have_floats)) + return; + + for (auto & type : data_types) + { + if (isBool(type)) + { + if (have_integers) + type = std::make_shared(); + else + type = std::make_shared(); + } + } + } + + /// If we have type Nothing/Nullable(Nothing) and some other non Nothing types, + /// convert all Nothing/Nullable(Nothing) types to the first non Nothing. + /// For example, when we have [Nothing, Array(Int64)] it will convert it to [Array(Int64), Array(Int64)] + /// (it can happen when transforming complex nested types like [Array(Nothing), Array(Array(Int64))]) + void transformNothingComplexTypes(DataTypes & data_types) + { + bool have_nothing = false; + DataTypePtr not_nothing_type = nullptr; + for (const auto & type : data_types) + { + if (isNothing(removeNullable(type))) + have_nothing = true; + else + not_nothing_type = type; + } + + if (!have_nothing || !not_nothing_type) + return; + + for (auto & type : data_types) + { + if (isNothing(removeNullable(type))) + type = not_nothing_type; + } + } + + /// If we have both Nullable and non Nullable types, make all types Nullable + void transformNullableTypes(DataTypes & data_types, const TypeIndexesSet & type_indexes) + { + if (!type_indexes.contains(TypeIndex::Nullable)) + return; + + for (auto & type : data_types) + { + if (type->canBeInsideNullable()) + type = makeNullable(type); + } + } + + /// If we have Tuple with the same nested types like Tuple(Int64, Int64), + /// convert it to Array(Int64). It's used for JSON values. + /// For example when we had type Tuple(Int64, Nullable(Nothing)) and we + /// transformed it to Tuple(Nullable(Int64), Nullable(Int64)) we will + /// also transform it to Array(Nullable(Int64)) + void transformTuplesWithEqualNestedTypesToArrays(DataTypes & data_types, const TypeIndexesSet & type_indexes) + { + if (!type_indexes.contains(TypeIndex::Tuple)) + return; + + for (auto & type : data_types) + { + if (isTuple(type)) + { + const auto * tuple_type = assert_cast(type.get()); + if (checkIfTypesAreEqual(tuple_type->getElements())) + type = std::make_shared(tuple_type->getElements().back()); + } + } + } + + template + void transformInferredTypesIfNeededImpl(DataTypes & types, const FormatSettings & settings, JSONInferenceInfo * json_info = nullptr); + + /// If we have Tuple and Array types, try to convert them all to Array + /// if there is a common type for all nested types. + /// For example, if we have [Tuple(Nullable(Nothing), String), Array(Date), Tuple(Date, String)] + /// it will convert them all to Array(String) + void transformJSONTuplesAndArraysToArrays( + DataTypes & data_types, const FormatSettings & settings, const TypeIndexesSet & type_indexes, JSONInferenceInfo * json_info) + { + if (!type_indexes.contains(TypeIndex::Tuple)) + return; + + bool have_arrays = type_indexes.contains(TypeIndex::Array); + bool tuple_sizes_are_equal = true; + size_t tuple_size = 0; + for (const auto & type : data_types) + { + if (isTuple(type)) + { + const auto & current_tuple_size = assert_cast(*type).getElements().size(); + if (!tuple_size) + tuple_size = current_tuple_size; + else + tuple_sizes_are_equal &= current_tuple_size == tuple_size; + } + } + + /// Check if we have arrays and tuples with same size. + if (!have_arrays && !tuple_sizes_are_equal) + return; + + DataTypes nested_types; + for (auto & type : data_types) + { + if (isArray(type)) + nested_types.push_back(assert_cast(*type).getNestedType()); + else if (isTuple(type)) + { + const auto & elements = assert_cast(*type).getElements(); + for (const auto & element : elements) + nested_types.push_back(element); + } + } + + transformInferredTypesIfNeededImpl(nested_types, settings, json_info); + if (checkIfTypesAreEqual(nested_types)) + { + for (auto & type : data_types) + { + if (isArray(type) || isTuple(type)) + type = std::make_shared(nested_types.back()); + } + } + } + + /// If we have Map and Object(JSON) types, convert all Map types to Object(JSON). + /// If we have Map types with different value types, convert all Map types to Object(JSON) + void transformMapsAndObjectsToObjects(DataTypes & data_types, const TypeIndexesSet & type_indexes) + { + if (!type_indexes.contains(TypeIndex::Map)) + return; + + bool have_objects = type_indexes.contains(TypeIndex::Object); + bool maps_are_equal = true; + DataTypePtr first_map_type = nullptr; + for (const auto & type : data_types) + { + if (isMap(type)) + { + if (!first_map_type) + first_map_type = type; + else + maps_are_equal &= type->equals(*first_map_type); + } + } + + if (!have_objects && maps_are_equal) + return; + + for (auto & type : data_types) + { + if (isMap(type)) + type = std::make_shared("json", true); + } + } + + void transformMapsObjectsAndStringsToStrings(DataTypes & data_types, const TypeIndexesSet & type_indexes) + { + bool have_maps = type_indexes.contains(TypeIndex::Map); + bool have_objects = type_indexes.contains(TypeIndex::Object); + bool have_strings = type_indexes.contains(TypeIndex::String); + + /// Check if we have both String and Map/Object + if (!have_strings || (!have_maps && !have_objects)) + return; + + for (auto & type : data_types) + { + if (isMap(type) || isObject(type)) + type = std::make_shared(); + } + } + + template + void transformInferredTypesIfNeededImpl(DataTypes & types, const FormatSettings & settings, JSONInferenceInfo * json_info) + { + auto transform_simple_types = [&](DataTypes & data_types, const TypeIndexesSet & type_indexes) + { + /// Remove all Nothing type if possible. + transformNothingSimpleTypes(data_types, type_indexes); + + /// Transform integers to floats if needed. + if (settings.try_infer_integers) + transformIntegersAndFloatsToFloats(data_types, type_indexes); + + /// Transform Date to DateTime or both to String if needed. + if (settings.try_infer_dates || settings.try_infer_datetimes) + transformDatesAndDateTimes(data_types, type_indexes); + + if constexpr (!is_json) + return; + + /// Check settings specific for JSON formats. + + /// Convert numbers inferred from strings back to strings if needed. + if (settings.json.try_infer_numbers_from_strings || settings.json.read_numbers_as_strings) + transformJSONNumbersBackToString(data_types, settings, type_indexes, json_info); + + /// Convert Bool to number (Int64/Float64) if needed. + if (settings.json.read_bools_as_numbers) + transformBoolsAndNumbersToNumbers(data_types, type_indexes); + }; + + auto transform_complex_types = [&](DataTypes & data_types, const TypeIndexesSet & type_indexes) + { + /// Make types Nullable if needed. + transformNullableTypes(data_types, type_indexes); + + /// If we have type Nothing, it means that we had empty Array/Map while inference. + /// If there is at least one non Nothing type, change all Nothing types to it. + transformNothingComplexTypes(data_types); + + if constexpr (!is_json) + return; + + /// Convert JSON tuples with same nested types to arrays. + transformTuplesWithEqualNestedTypesToArrays(data_types, type_indexes); + + /// Convert JSON tuples and arrays to arrays if possible. + transformJSONTuplesAndArraysToArrays(data_types, settings, type_indexes, json_info); + + /// Convert Maps to Objects if needed. + if (settings.json.try_infer_objects) + transformMapsAndObjectsToObjects(data_types, type_indexes); + + if (settings.json.read_objects_as_strings) + transformMapsObjectsAndStringsToStrings(data_types, type_indexes); + }; + + transformTypesRecursively(types, transform_simple_types, transform_complex_types); + } + + template + DataTypePtr tryInferDataTypeForSingleFieldImpl(ReadBuffer & buf, const FormatSettings & settings, JSONInferenceInfo * json_info, size_t depth = 1); + + bool tryInferDate(std::string_view field) + { + ReadBufferFromString buf(field); + DayNum tmp; + return tryReadDateText(tmp, buf) && buf.eof(); + } + + bool tryInferDateTime(std::string_view field, const FormatSettings & settings) + { + if (field.empty()) + return false; + + ReadBufferFromString buf(field); + Float64 tmp_float; + /// Check if it's just a number, and if so, don't try to infer DateTime from it, + /// because we can interpret this number as a timestamp and it will lead to + /// inferring DateTime instead of simple Int64/Float64 in some cases. + if (tryReadFloatText(tmp_float, buf) && buf.eof()) + return false; + + buf.seek(0, SEEK_SET); /// Return position to the beginning + DateTime64 tmp; + switch (settings.date_time_input_format) + { + case FormatSettings::DateTimeInputFormat::Basic: + if (tryReadDateTime64Text(tmp, 9, buf) && buf.eof()) + return true; + break; + case FormatSettings::DateTimeInputFormat::BestEffort: + if (tryParseDateTime64BestEffort(tmp, 9, buf, DateLUT::instance(), DateLUT::instance("UTC")) && buf.eof()) + return true; + break; + case FormatSettings::DateTimeInputFormat::BestEffortUS: + if (tryParseDateTime64BestEffortUS(tmp, 9, buf, DateLUT::instance(), DateLUT::instance("UTC")) && buf.eof()) + return true; + break; + } + + return false; + } + + template + DataTypePtr tryInferArray(ReadBuffer & buf, const FormatSettings & settings, JSONInferenceInfo * json_info, size_t depth) + { + assertChar('[', buf); + skipWhitespaceIfAny(buf); + + DataTypes nested_types; + bool first = true; + bool have_invalid_nested_type = false; + while (!buf.eof() && *buf.position() != ']') + { + if (!first) + { + /// Skip field delimiter between array elements. + if (!checkChar(',', buf)) + return nullptr; + skipWhitespaceIfAny(buf); + } + else + first = false; + + auto nested_type = tryInferDataTypeForSingleFieldImpl(buf, settings, json_info, depth + 2); + + if (nested_type) + nested_types.push_back(nested_type); + else + have_invalid_nested_type = true; + + skipWhitespaceIfAny(buf); + } + + /// No ']' at the end. + if (buf.eof()) + return nullptr; + + assertChar(']', buf); + skipWhitespaceIfAny(buf); + + /// Nested data is invalid. + if (have_invalid_nested_type) + return nullptr; + + /// Empty array has type Array(Nothing) + if (nested_types.empty()) + return std::make_shared(std::make_shared()); + + if (checkIfTypesAreEqual(nested_types)) + return std::make_shared(std::move(nested_types.back())); + + /// If element types are not equal, we should try to find common type. + /// If after transformation element types are still different, we return Tuple for JSON and + /// nullptr for other formats (nullptr means we couldn't infer the type). + if constexpr (is_json) + { + /// For JSON if we have not complete types, we should not try to transform them + /// and return it as a Tuple. + /// For example, if we have types [Float64, Nullable(Nothing), Float64] + /// it can be Array(Float64) or Tuple(Float64, , Float64) and + /// we can't determine which one it is. But we will be able to do it later + /// when we will have types from other rows for this column. + /// For example, if in the next row we will have types [Nullable(Nothing), String, Float64], + /// we can determine the type for this column as Tuple(Nullable(Float64), Nullable(String), Float64). + for (const auto & type : nested_types) + { + if (!checkIfTypeIsComplete(type)) + return std::make_shared(nested_types); + } + + auto nested_types_copy = nested_types; + transformInferredTypesIfNeededImpl(nested_types_copy, settings, json_info); + + if (checkIfTypesAreEqual(nested_types_copy)) + return std::make_shared(nested_types_copy.back()); + + return std::make_shared(nested_types); + } + else + { + transformInferredTypesIfNeededImpl(nested_types, settings); + if (checkIfTypesAreEqual(nested_types)) + return std::make_shared(nested_types.back()); + + /// We couldn't determine common type for array element. + return nullptr; + } + } + + DataTypePtr tryInferTuple(ReadBuffer & buf, const FormatSettings & settings, JSONInferenceInfo * json_info, size_t depth) + { + assertChar('(', buf); + skipWhitespaceIfAny(buf); + + DataTypes nested_types; + bool first = true; + bool have_invalid_nested_type = false; + while (!buf.eof() && *buf.position() != ')') + { + if (!first) + { + if (!checkChar(',', buf)) + return nullptr; + skipWhitespaceIfAny(buf); + } + else + first = false; + + auto nested_type = tryInferDataTypeForSingleFieldImpl(buf, settings, json_info, depth + 1); + if (nested_type) + nested_types.push_back(nested_type); + else + have_invalid_nested_type = true; + + skipWhitespaceIfAny(buf); + } + + /// No ')' at the end. + if (buf.eof()) + return nullptr; + + assertChar(')', buf); + skipWhitespaceIfAny(buf); + + /// Nested data is invalid. + if (have_invalid_nested_type || nested_types.empty()) + return nullptr; + + return std::make_shared(nested_types); + } + + DataTypePtr tryInferNumber(ReadBuffer & buf, const FormatSettings & settings) + { + if (buf.eof()) + return nullptr; + + Float64 tmp_float; + if (settings.try_infer_integers) + { + /// If we read from String, we can do it in a more efficient way. + if (auto * string_buf = dynamic_cast(&buf)) + { + /// Remember the pointer to the start of the number to rollback to it. + char * number_start = buf.position(); + Int64 tmp_int; + bool read_int = tryReadIntText(tmp_int, buf); + /// If we reached eof, it cannot be float (it requires no less data than integer) + if (buf.eof()) + return read_int ? std::make_shared() : nullptr; + + char * int_end = buf.position(); + /// We cam safely get back to the start of the number, because we read from a string and we didn't reach eof. + buf.position() = number_start; + if (tryReadFloatText(tmp_float, buf)) + { + if (read_int && buf.position() == int_end) + return std::make_shared(); + return std::make_shared(); + } + + return nullptr; + } + + /// We should use PeekableReadBuffer, because we need to + /// rollback to the start of number to parse it as integer first + /// and then as float. + PeekableReadBuffer peekable_buf(buf); + PeekableReadBufferCheckpoint checkpoint(peekable_buf); + Int64 tmp_int; + bool read_int = tryReadIntText(tmp_int, peekable_buf); + auto * int_end = peekable_buf.position(); + peekable_buf.rollbackToCheckpoint(true); + if (tryReadFloatText(tmp_float, peekable_buf)) + { + /// Float parsing reads no fewer bytes than integer parsing, + /// so position of the buffer is either the same, or further. + /// If it's the same, then it's integer. + if (read_int && peekable_buf.position() == int_end) + return std::make_shared(); + return std::make_shared(); + } + } + else if (tryReadFloatText(tmp_float, buf)) + { + return std::make_shared(); + } + + /// This is not a number. + return nullptr; + } + + template + DataTypePtr tryInferString(ReadBuffer & buf, const FormatSettings & settings, JSONInferenceInfo * json_info) + { + String field; + bool ok = true; + if constexpr (is_json) + ok = tryReadJSONStringInto(field, buf); + else + ok = tryReadQuotedStringInto(field, buf); + + if (!ok) + return nullptr; + + skipWhitespaceIfAny(buf); + + /// If it's object key, we should just return String type. + if constexpr (is_json) + { + if (json_info->is_object_key) + return std::make_shared(); + } + + if (auto type = tryInferDateOrDateTimeFromString(field, settings)) + return type; + + if constexpr (is_json) + { + if (settings.json.try_infer_numbers_from_strings) + { + if (auto number_type = tryInferNumberFromString(field, settings)) + { + json_info->numbers_parsed_from_json_strings.insert(number_type.get()); + return number_type; + } + } + } + + return std::make_shared(); + } + + template + DataTypePtr tryInferMapOrObject(ReadBuffer & buf, const FormatSettings & settings, JSONInferenceInfo * json_info, size_t depth) + { + assertChar('{', buf); + skipWhitespaceIfAny(buf); + + DataTypes key_types; + DataTypes value_types; + bool first = true; + bool have_invalid_nested_type = false; + while (!buf.eof() && *buf.position() != '}') + { + if (!first) + { + if (!checkChar(',', buf)) + return nullptr; + skipWhitespaceIfAny(buf); + } + else + first = false; + + DataTypePtr key_type; + if constexpr (is_json) + { + /// For JSON key type must be String. + json_info->is_object_key = true; + key_type = tryInferString(buf, settings, json_info); + json_info->is_object_key = false; + } + else + { + key_type = tryInferDataTypeForSingleFieldImpl(buf, settings, nullptr, depth + 1); + } + + if (key_type) + key_types.push_back(key_type); + else + have_invalid_nested_type = true; + + skipWhitespaceIfAny(buf); + if (!checkChar(':', buf)) + return nullptr; + skipWhitespaceIfAny(buf); + + auto value_type = tryInferDataTypeForSingleFieldImpl(buf, settings, json_info, depth + 1); + if (value_type) + value_types.push_back(value_type); + else + have_invalid_nested_type = true; + skipWhitespaceIfAny(buf); + } + + /// No '}' at the end. + if (buf.eof()) + return nullptr; + + assertChar('}', buf); + skipWhitespaceIfAny(buf); + + /// Nested data is invalid. + if (have_invalid_nested_type) + return nullptr; + + if (key_types.empty()) + { + if constexpr (is_json) + { + if (settings.json.try_infer_objects) + return std::make_shared("json", true); + } + /// Empty Map is Map(Nothing, Nothing) + return std::make_shared(std::make_shared(), std::make_shared()); + } + + if constexpr (is_json) + { + /// If it's JSON field and one of value types is JSON Object, return also JSON Object. + for (const auto & value_type : value_types) + { + if (isObject(value_type)) + return std::make_shared("json", true); + } + + transformInferredTypesIfNeededImpl(value_types, settings, json_info); + if (!checkIfTypesAreEqual(value_types)) + { + if (settings.json.try_infer_objects) + return std::make_shared("json", true); + if (settings.json.read_objects_as_strings) + return std::make_shared(); + return nullptr; + } + + return std::make_shared(key_types.back(), value_types.back()); + } + + if (!checkIfTypesAreEqual(key_types)) + transformInferredTypesIfNeededImpl(key_types, settings); + if (!checkIfTypesAreEqual(value_types)) + transformInferredTypesIfNeededImpl(value_types, settings); + + if (!checkIfTypesAreEqual(key_types) || !checkIfTypesAreEqual(value_types)) + return nullptr; + + auto key_type = removeNullable(key_types.back()); + if (!DataTypeMap::checkKeyType(key_type)) + return nullptr; + + return std::make_shared(key_type, value_types.back()); + } + + template + DataTypePtr tryInferDataTypeForSingleFieldImpl(ReadBuffer & buf, const FormatSettings & settings, JSONInferenceInfo * json_info, size_t depth) + { + if (depth > settings.max_parser_depth) + throw Exception(ErrorCodes::TOO_DEEP_RECURSION, + "Maximum parse depth ({}) exceeded. Consider rising max_parser_depth setting.", settings.max_parser_depth); + + skipWhitespaceIfAny(buf); + + if (buf.eof()) + return nullptr; + + /// Array [field1, field2, ...] + if (*buf.position() == '[') + return tryInferArray(buf, settings, json_info, depth); + + /// Tuple (field1, field2, ...), if format is not JSON + if constexpr (!is_json) + { + if (*buf.position() == '(') + return tryInferTuple(buf, settings, json_info, depth); + } + + /// Map/Object for JSON { key1 : value1, key2 : value2, ...} + if (*buf.position() == '{') + return tryInferMapOrObject(buf, settings, json_info, depth); + + /// String + char quote = is_json ? '"' : '\''; + if (*buf.position() == quote) + return tryInferString(buf, settings, json_info); + + /// Bool + if (checkStringCaseInsensitive("true", buf) || checkStringCaseInsensitive("false", buf)) + return DataTypeFactory::instance().get("Bool"); + + /// Null or NaN + if (checkCharCaseInsensitive('n', buf)) + { + if (checkStringCaseInsensitive("ull", buf)) + return makeNullable(std::make_shared()); + else if (checkStringCaseInsensitive("an", buf)) + return std::make_shared(); + } + + /// Number + return tryInferNumber(buf, settings); + } +} + +void transformInferredTypesIfNeeded(DataTypePtr & first, DataTypePtr & second, const FormatSettings & settings) +{ + DataTypes types = {first, second}; + transformInferredTypesIfNeededImpl(types, settings, nullptr); + first = std::move(types[0]); + second = std::move(types[1]); +} + +void transformInferredJSONTypesIfNeeded( + DataTypePtr & first, DataTypePtr & second, const FormatSettings & settings, JSONInferenceInfo * json_info) +{ + DataTypes types = {first, second}; + transformInferredTypesIfNeededImpl(types, settings, json_info); + first = std::move(types[0]); + second = std::move(types[1]); +} + +void transformJSONTupleToArrayIfPossible(DataTypePtr & data_type, const FormatSettings & settings, JSONInferenceInfo * json_info) +{ + if (!data_type) + return; + + if (const auto * array_type = typeid_cast(data_type.get())) + { + auto nested_type = array_type->getNestedType(); + transformJSONTupleToArrayIfPossible(nested_type, settings, json_info); + data_type = std::make_shared(nested_type); + return; + } + + if (const auto * map_type = typeid_cast(data_type.get())) + { + auto value_type = map_type->getValueType(); + transformJSONTupleToArrayIfPossible(value_type, settings, json_info); + data_type = std::make_shared(map_type->getKeyType(), value_type); + return; + } + + if (const auto * tuple_type = typeid_cast(data_type.get())) + { + auto nested_types = tuple_type->getElements(); + for (auto & nested_type : nested_types) + transformJSONTupleToArrayIfPossible(nested_type, settings, json_info); + + auto nested_types_copy = nested_types; + transformInferredTypesIfNeededImpl(nested_types_copy, settings, json_info); + if (checkIfTypesAreEqual(nested_types_copy)) + data_type = std::make_shared(nested_types_copy.back()); + else + data_type = std::make_shared(nested_types); + + return; + } +} + +DataTypePtr tryInferNumberFromString(std::string_view field, const FormatSettings & settings) +{ + ReadBufferFromString buf(field); + + if (settings.try_infer_integers) + { + Int64 tmp_int; + if (tryReadIntText(tmp_int, buf) && buf.eof()) + return std::make_shared(); + } + + /// We cam safely get back to the start of buffer, because we read from a string and we didn't reach eof. + buf.position() = buf.buffer().begin(); + + Float64 tmp; + if (tryReadFloatText(tmp, buf) && buf.eof()) + return std::make_shared(); + + return nullptr; +} + +DataTypePtr tryInferDateOrDateTimeFromString(std::string_view field, const FormatSettings & settings) +{ + if (settings.try_infer_dates && tryInferDate(field)) + return std::make_shared(); + + if (settings.try_infer_datetimes && tryInferDateTime(field, settings)) + return std::make_shared(9); + + return nullptr; +} + +DataTypePtr tryInferDataTypeForSingleField(ReadBuffer & buf, const FormatSettings & settings) +{ + return tryInferDataTypeForSingleFieldImpl(buf, settings, nullptr); +} + +DataTypePtr tryInferDataTypeForSingleField(std::string_view field, const FormatSettings & settings) +{ + ReadBufferFromString buf(field); + auto type = tryInferDataTypeForSingleFieldImpl(buf, settings, nullptr); + /// Check if there is no unread data in buffer. + if (!buf.eof()) + return nullptr; + return type; +} + +DataTypePtr tryInferDataTypeForSingleJSONField(ReadBuffer & buf, const FormatSettings & settings, JSONInferenceInfo * json_info) +{ + return tryInferDataTypeForSingleFieldImpl(buf, settings, json_info); +} + +DataTypePtr tryInferDataTypeForSingleJSONField(std::string_view field, const FormatSettings & settings, JSONInferenceInfo * json_info) +{ + ReadBufferFromString buf(field); + auto type = tryInferDataTypeForSingleFieldImpl(buf, settings, json_info); + /// Check if there is no unread data in buffer. + if (!buf.eof()) + return nullptr; + return type; +} + +DataTypePtr makeNullableRecursively(DataTypePtr type) +{ + if (!type) + return nullptr; + + WhichDataType which(type); + + if (which.isNullable()) + return type; + + if (which.isArray()) + { + const auto * array_type = assert_cast(type.get()); + auto nested_type = makeNullableRecursively(array_type->getNestedType()); + return nested_type ? std::make_shared(nested_type) : nullptr; + } + + if (which.isTuple()) + { + const auto * tuple_type = assert_cast(type.get()); + DataTypes nested_types; + for (const auto & element : tuple_type->getElements()) + { + auto nested_type = makeNullableRecursively(element); + if (!nested_type) + return nullptr; + nested_types.push_back(nested_type); + } + + if (tuple_type->haveExplicitNames()) + return std::make_shared(std::move(nested_types), tuple_type->getElementNames()); + + return std::make_shared(std::move(nested_types)); + + } + + if (which.isMap()) + { + const auto * map_type = assert_cast(type.get()); + auto key_type = makeNullableRecursively(map_type->getKeyType()); + auto value_type = makeNullableRecursively(map_type->getValueType()); + return key_type && value_type ? std::make_shared(removeNullable(key_type), value_type) : nullptr; + } + + if (which.isLowCarnality()) + { + const auto * lc_type = assert_cast(type.get()); + auto nested_type = makeNullableRecursively(lc_type->getDictionaryType()); + return nested_type ? std::make_shared(nested_type) : nullptr; + } + + if (which.isObject()) + { + const auto * object_type = assert_cast(type.get()); + if (object_type->hasNullableSubcolumns()) + return type; + return std::make_shared(object_type->getSchemaFormat(), true); + } + + return makeNullable(type); +} + +NamesAndTypesList getNamesAndRecursivelyNullableTypes(const Block & header) +{ + NamesAndTypesList result; + for (auto & [name, type] : header.getNamesAndTypesList()) + result.emplace_back(name, makeNullableRecursively(type)); + return result; +} + +bool checkIfTypeIsComplete(const DataTypePtr & type) +{ + if (!type) + return false; + + WhichDataType which(type); + + if (which.isNothing()) + return false; + + if (which.isNullable()) + return checkIfTypeIsComplete(assert_cast(type.get())->getNestedType()); + + if (which.isArray()) + return checkIfTypeIsComplete(assert_cast(type.get())->getNestedType()); + + if (which.isTuple()) + { + const auto * tuple_type = assert_cast(type.get()); + for (const auto & element : tuple_type->getElements()) + { + if (!checkIfTypeIsComplete(element)) + return false; + } + return true; + } + + if (which.isMap()) + { + const auto * map_type = assert_cast(type.get()); + if (!checkIfTypeIsComplete(map_type->getKeyType())) + return false; + return checkIfTypeIsComplete(map_type->getValueType()); + } + + return true; +} + +} diff --git a/src/Formats/SchemaInferenceUtils.h b/src/Formats/SchemaInferenceUtils.h new file mode 100644 index 00000000000..b511abf6a79 --- /dev/null +++ b/src/Formats/SchemaInferenceUtils.h @@ -0,0 +1,93 @@ +#pragma once + +#include +#include + +namespace DB +{ + +/// Struct with some additional information about inferred types for JSON formats. +struct JSONInferenceInfo +{ + /// We store numbers that were parsed from strings. + /// It's used in types transformation to change such numbers back to string if needed. + std::unordered_set numbers_parsed_from_json_strings; + /// Indicates if currently we are inferring type for Map/Object key. + bool is_object_key = false; +}; + +/// Try to determine datatype of the value in buffer/string. If the type cannot be inferred, return nullptr. +/// In general, it tries to parse a type using the following logic: +/// If we see '[', we try to parse an array of values and recursively determine datatype for each element. +/// If we see '(', we try to parse a tuple of values and recursively determine datatype for each element. +/// If we see '{', we try to parse a Map of keys and values and recursively determine datatype for each key/value. +/// If we see a quote '\'', we treat it as a string and read until next quote. +/// If we see NULL it returns Nullable(Nothing) +/// Otherwise we try to read a number. +DataTypePtr tryInferDataTypeForSingleField(ReadBuffer & buf, const FormatSettings & settings); +DataTypePtr tryInferDataTypeForSingleField(std::string_view field, const FormatSettings & settings); + +/// The same as tryInferDataTypeForSingleField, but for JSON values. +DataTypePtr tryInferDataTypeForSingleJSONField(ReadBuffer & buf, const FormatSettings & settings, JSONInferenceInfo * json_info); +DataTypePtr tryInferDataTypeForSingleJSONField(std::string_view field, const FormatSettings & settings, JSONInferenceInfo * json_info); + +/// Try to parse Date or DateTime value from a string. +DataTypePtr tryInferDateOrDateTimeFromString(std::string_view field, const FormatSettings & settings); + +/// Try to parse a number value from a string. By default, it tries to parse Float64, +/// but if setting try_infer_integers is enabled, it also tries to parse Int64. +DataTypePtr tryInferNumberFromString(std::string_view field, const FormatSettings & settings); + +/// It takes two types inferred for the same column and tries to transform them to a common type if possible. +/// It's also used when we try to infer some not ordinary types from another types. +/// Example 1: +/// Dates inferred from strings. In this case we should check if dates were inferred from all strings +/// in the same way and if not, transform inferred dates back to strings. +/// For example, when we have Array(Date) (like `['2020-01-01', '2020-02-02']`) and Array(String) (like `['string', 'abc']` +/// we will convert the first type to Array(String). +/// Example 2: +/// When we have integers and floats for the same value, we should convert all integers to floats. +/// For example, when we have Array(Int64) (like `[123, 456]`) and Array(Float64) (like `[42.42, 4.42]`) +/// we will convert the first type to Array(Float64) +/// Example 3: +/// When we have not complete types like Nullable(Nothing), Array(Nullable(Nothing)) or Tuple(UInt64, Nullable(Nothing)), +/// we try to complete them using the other type. +/// For example, if we have Tuple(UInt64, Nullable(Nothing)) and Tuple(Nullable(Nothing), String) we will convert both +/// types to common type Tuple(Nullable(UInt64), Nullable(String)) +void transformInferredTypesIfNeeded(DataTypePtr & first, DataTypePtr & second, const FormatSettings & settings); + +/// The same as transformInferredTypesIfNeeded but uses some specific transformations for JSON. +/// Example 1: +/// When we have numbers inferred from strings and strings, we convert all such numbers back to string. +/// For example, if we have Array(Int64) (like `['123', '456']`) and Array(String) (like `['str', 'abc']`) +/// we will convert the first type to Array(String). Note that we collect information about numbers inferred +/// from strings in json_info while inference and use it here, so we will know that Array(Int64) contains +/// integer inferred from a string. +/// Example 2: +/// When we have maps with different value types, we convert all types to JSON object type. +/// For example, if we have Map(String, UInt64) (like `{"a" : 123}`) and Map(String, String) (like `{"b" : 'abc'}`) +/// we will convert both types to Object('JSON'). +void transformInferredJSONTypesIfNeeded(DataTypePtr & first, DataTypePtr & second, const FormatSettings & settings, JSONInferenceInfo * json_info); + +/// Check if type is Tuple(...), try to transform nested types to find a common type for them and if all nested types +/// are the same after transform, we convert this tuple to an Array with common nested type. +/// For example, if we have Tuple(String, Nullable(Nothing)) we will convert it to Array(String). +/// It's used when all rows were read and we have Tuple in the result type that can be actually an Array. +void transformJSONTupleToArrayIfPossible(DataTypePtr & data_type, const FormatSettings & settings, JSONInferenceInfo * json_info); + +/// Make type Nullable recursively: +/// - Type -> Nullable(type) +/// - Array(Type) -> Array(Nullable(Type)) +/// - Tuple(Type1, ..., TypeN) -> Tuple(Nullable(Type1), ..., Nullable(TypeN)) +/// - Map(KeyType, ValueType) -> Map(KeyType, Nullable(ValueType)) +/// - LowCardinality(Type) -> LowCardinality(Nullable(Type)) +DataTypePtr makeNullableRecursively(DataTypePtr type); + +/// Call makeNullableRecursively for all types +/// in the block and return names and types. +NamesAndTypesList getNamesAndRecursivelyNullableTypes(const Block & header); + +/// Check if type contains Nothing, like Array(Tuple(Nullable(Nothing), String)) +bool checkIfTypeIsComplete(const DataTypePtr & type); + +} diff --git a/src/Functions/FunctionDateOrDateTimeAddInterval.h b/src/Functions/FunctionDateOrDateTimeAddInterval.h index 2259cc71f07..3002e330f0c 100644 --- a/src/Functions/FunctionDateOrDateTimeAddInterval.h +++ b/src/Functions/FunctionDateOrDateTimeAddInterval.h @@ -685,37 +685,27 @@ public: } else if constexpr (std::is_same_v) { - if (typeid_cast(arguments[0].type.get())) + static constexpr auto target_scale = std::invoke( + []() -> std::optional + { + if constexpr (std::is_base_of_v) + return 9; + else if constexpr (std::is_base_of_v) + return 6; + else if constexpr (std::is_base_of_v) + return 3; + + return {}; + }); + + auto timezone = extractTimeZoneNameFromFunctionArguments(arguments, 2, 0); + if (const auto* datetime64_type = typeid_cast(arguments[0].type.get())) { - const auto & datetime64_type = assert_cast(*arguments[0].type); - - auto from_scale = datetime64_type.getScale(); - auto scale = from_scale; - - if (std::is_same_v) - scale = 9; - else if (std::is_same_v) - scale = 6; - else if (std::is_same_v) - scale = 3; - - scale = std::max(scale, from_scale); - - return std::make_shared(scale, extractTimeZoneNameFromFunctionArguments(arguments, 2, 0)); + const auto from_scale = datetime64_type->getScale(); + return std::make_shared(std::max(from_scale, target_scale.value_or(from_scale)), std::move(timezone)); } - else - { - auto scale = DataTypeDateTime64::default_scale; - if (std::is_same_v) - scale = 9; - else if (std::is_same_v) - scale = 6; - else if (std::is_same_v) - scale = 3; - - return std::make_shared(scale, extractTimeZoneNameFromFunctionArguments(arguments, 2, 0)); - } + return std::make_shared(target_scale.value_or(DataTypeDateTime64::default_scale), std::move(timezone)); } throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected result type in datetime add interval function"); diff --git a/src/Functions/FunctionsBinaryRepresentation.cpp b/src/Functions/FunctionsBinaryRepresentation.cpp index f71f05bbf34..b0bdbc2130c 100644 --- a/src/Functions/FunctionsBinaryRepresentation.cpp +++ b/src/Functions/FunctionsBinaryRepresentation.cpp @@ -4,7 +4,7 @@ #include #include #include -#include +#include #include #include #include @@ -126,20 +126,7 @@ struct UnhexImpl static void decode(const char * pos, const char * end, char *& out) { - if ((end - pos) & 1) - { - *out = unhex(*pos); - ++out; - ++pos; - } - while (pos < end) - { - *out = unhex2(pos); - pos += word_size; - ++out; - } - *out = '\0'; - ++out; + hexStringDecode(pos, end, out, word_size); } }; @@ -233,52 +220,7 @@ struct UnbinImpl static void decode(const char * pos, const char * end, char *& out) { - if (pos == end) - { - *out = '\0'; - ++out; - return; - } - - UInt8 left = 0; - - /// end - pos is the length of input. - /// (length & 7) to make remain bits length mod 8 is zero to split. - /// e.g. the length is 9 and the input is "101000001", - /// first left_cnt is 1, left is 0, right shift, pos is 1, left = 1 - /// then, left_cnt is 0, remain input is '01000001'. - for (UInt8 left_cnt = (end - pos) & 7; left_cnt > 0; --left_cnt) - { - left = left << 1; - if (*pos != '0') - left += 1; - ++pos; - } - - if (left != 0 || end - pos == 0) - { - *out = left; - ++out; - } - - assert((end - pos) % 8 == 0); - - while (end - pos != 0) - { - UInt8 c = 0; - for (UInt8 i = 0; i < 8; ++i) - { - c = c << 1; - if (*pos != '0') - c += 1; - ++pos; - } - *out = c; - ++out; - } - - *out = '\0'; - ++out; + binStringDecode(pos, end, out); } }; diff --git a/src/Functions/FunctionsConversion.h b/src/Functions/FunctionsConversion.h index c9638ab95af..b3f892ffc0d 100644 --- a/src/Functions/FunctionsConversion.h +++ b/src/Functions/FunctionsConversion.h @@ -2147,14 +2147,18 @@ struct ToNumberMonotonicity if constexpr (std::is_floating_point_v) return { .is_monotonic = true, .is_always_monotonic = true }; - /// If converting from Float, for monotonicity, arguments must fit in range of result type. - bool is_type_float = false; - if (const auto * low_cardinality = typeid_cast(&type)) - is_type_float = WhichDataType(low_cardinality->getDictionaryType()).isFloat(); - else - is_type_float = WhichDataType(type).isFloat(); + const auto * low_cardinality = typeid_cast(&type); + const IDataType * low_cardinality_dictionary_type = nullptr; + if (low_cardinality) + low_cardinality_dictionary_type = low_cardinality->getDictionaryType().get(); - if (is_type_float) + WhichDataType which_type(type); + WhichDataType which_inner_type = low_cardinality + ? WhichDataType(low_cardinality_dictionary_type) + : WhichDataType(type); + + /// If converting from Float, for monotonicity, arguments must fit in range of result type. + if (which_inner_type.isFloat()) { if (left.isNull() || right.isNull()) return {}; @@ -2180,7 +2184,7 @@ struct ToNumberMonotonicity const size_t size_of_to = sizeof(T); /// Do not support 128 bit integers and decimals for now. - if (size_of_from > sizeof(Int64)) + if (size_of_from > sizeof(Int64) || which_inner_type.isDecimal()) return {}; const bool left_in_first_half = left.isNull() diff --git a/src/Functions/FunctionsLogical.h b/src/Functions/FunctionsLogical.h index 30d8983b8cc..833191866e5 100644 --- a/src/Functions/FunctionsLogical.h +++ b/src/Functions/FunctionsLogical.h @@ -176,6 +176,7 @@ public: ColumnPtr executeShortCircuit(ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type) const; bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; } size_t getNumberOfArguments() const override { return 0; } + bool canBeExecutedOnLowCardinalityDictionary() const override { return false; } bool useDefaultImplementationForNulls() const override { return !Impl::specialImplementationForNulls(); } diff --git a/src/Functions/IFunction.h b/src/Functions/IFunction.h index fc1a353a873..e82b98f0084 100644 --- a/src/Functions/IFunction.h +++ b/src/Functions/IFunction.h @@ -3,6 +3,8 @@ #include #include #include +#include +#include #include #include "config.h" @@ -122,11 +124,11 @@ using Values = std::vector; /** Function with known arguments and return type (when the specific overload was chosen). * It is also the point where all function-specific properties are known. */ -class IFunctionBase +class IFunctionBase : public IResolvedFunction { public: - virtual ~IFunctionBase() = default; + ~IFunctionBase() override = default; virtual ColumnPtr execute( /// NOLINT const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count, bool dry_run = false) const @@ -137,8 +139,10 @@ public: /// Get the main function name. virtual String getName() const = 0; - virtual const DataTypes & getArgumentTypes() const = 0; - virtual const DataTypePtr & getResultType() const = 0; + const Array & getParameters() const final + { + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "IFunctionBase doesn't support getParameters method"); + } /// Do preparations and return executable. /// sample_columns should contain data types of arguments and values of constants, if relevant. @@ -281,7 +285,7 @@ public: }; -using FunctionBasePtr = std::shared_ptr; +using FunctionBasePtr = std::shared_ptr; /** Creates IFunctionBase from argument types list (chooses one function overload). diff --git a/src/Functions/IFunctionAdaptors.h b/src/Functions/IFunctionAdaptors.h index dbcc07af57a..eb2350d9b5e 100644 --- a/src/Functions/IFunctionAdaptors.h +++ b/src/Functions/IFunctionAdaptors.h @@ -51,6 +51,8 @@ public: const DataTypes & getArgumentTypes() const override { return arguments; } const DataTypePtr & getResultType() const override { return result_type; } + const FunctionPtr & getFunction() const { return function; } + #if USE_EMBEDDED_COMPILER bool isCompilable() const override { return function->isCompilable(getArgumentTypes()); } diff --git a/src/Functions/array/arrayJoin.cpp b/src/Functions/array/arrayJoin.cpp index 1dbe4cebb14..41f19fae6bf 100644 --- a/src/Functions/array/arrayJoin.cpp +++ b/src/Functions/array/arrayJoin.cpp @@ -2,6 +2,7 @@ #include #include #include +#include namespace DB @@ -52,11 +53,11 @@ public: DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override { - const DataTypeArray * arr = checkAndGetDataType(arguments[0].get()); + const auto & arr = getArrayJoinDataType(arguments[0]); if (!arr) - throw Exception("Argument for function " + getName() + " must be Array.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - + throw Exception("Argument for function " + getName() + " must be Array or Map", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); return arr->getNestedType(); + } ColumnPtr executeImpl(const ColumnsWithTypeAndName &, const DataTypePtr &, size_t /*input_rows_count*/) const override diff --git a/src/Functions/array/arrayReduce.cpp b/src/Functions/array/arrayReduce.cpp index c93e67d4b1c..e7ed8577049 100644 --- a/src/Functions/array/arrayReduce.cpp +++ b/src/Functions/array/arrayReduce.cpp @@ -104,7 +104,7 @@ DataTypePtr FunctionArrayReduce::getReturnTypeImpl(const ColumnsWithTypeAndName aggregate_function = AggregateFunctionFactory::instance().get(aggregate_function_name, argument_types, params_row, properties); } - return aggregate_function->getReturnType(); + return aggregate_function->getResultType(); } diff --git a/src/Functions/array/arrayReduceInRanges.cpp b/src/Functions/array/arrayReduceInRanges.cpp index 11d5e03eb3d..2cceea4ddba 100644 --- a/src/Functions/array/arrayReduceInRanges.cpp +++ b/src/Functions/array/arrayReduceInRanges.cpp @@ -122,7 +122,7 @@ DataTypePtr FunctionArrayReduceInRanges::getReturnTypeImpl(const ColumnsWithType aggregate_function = AggregateFunctionFactory::instance().get(aggregate_function_name, argument_types, params_row, properties); } - return std::make_shared(aggregate_function->getReturnType()); + return std::make_shared(aggregate_function->getResultType()); } diff --git a/src/Functions/array/range.cpp b/src/Functions/array/range.cpp index 3b5bb686e60..46284ce95bf 100644 --- a/src/Functions/array/range.cpp +++ b/src/Functions/array/range.cpp @@ -9,6 +9,7 @@ #include #include #include +#include namespace DB @@ -56,7 +57,7 @@ private: for (const auto & arg : arguments) { - if (!isUnsignedInteger(arg)) + if (!isInteger(arg)) throw Exception{"Illegal type " + arg->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; } @@ -72,8 +73,12 @@ private: { const auto & in_data = in->getData(); const auto total_values = std::accumulate(std::begin(in_data), std::end(in_data), size_t{}, - [this] (const size_t lhs, const size_t rhs) + [this] (const size_t lhs, const T rhs) { + if (rhs < 0) + throw Exception{"A call to function " + getName() + " overflows, only support positive values when only end is provided", + ErrorCodes::ARGUMENT_OUT_OF_BOUND}; + const auto sum = lhs + rhs; if (sum < lhs) throw Exception{"A call to function " + getName() + " overflows, investigate the values of arguments you are passing", @@ -96,7 +101,7 @@ private: IColumn::Offset offset{}; for (size_t row_idx = 0, rows = in->size(); row_idx < rows; ++row_idx) { - for (size_t elem_idx = 0, elems = in_data[row_idx]; elem_idx < elems; ++elem_idx) + for (T elem_idx = 0, elems = in_data[row_idx]; elem_idx < elems; ++elem_idx) out_data[offset + elem_idx] = static_cast(elem_idx); offset += in_data[row_idx]; @@ -121,15 +126,22 @@ private: size_t total_values = 0; size_t pre_values = 0; + PODArray row_length(input_rows_count); for (size_t row_idx = 0; row_idx < input_rows_count; ++row_idx) { - if (start < end_data[row_idx] && step == 0) + if (step == 0) throw Exception{"A call to function " + getName() + " overflows, the 3rd argument step can't be zero", ErrorCodes::ARGUMENT_OUT_OF_BOUND}; - pre_values += start >= end_data[row_idx] ? 0 - : (end_data[row_idx] - start - 1) / step + 1; + if (start < end_data[row_idx] && step > 0) + row_length[row_idx] = (static_cast<__int128_t>(end_data[row_idx]) - static_cast<__int128_t>(start) - 1) / static_cast<__int128_t>(step) + 1; + else if (start > end_data[row_idx] && step < 0) + row_length[row_idx] = (static_cast<__int128_t>(end_data[row_idx]) - static_cast<__int128_t>(start) + 1) / static_cast<__int128_t>(step) + 1; + else + row_length[row_idx] = 0; + + pre_values += row_length[row_idx]; if (pre_values < total_values) throw Exception{"A call to function " + getName() + " overflows, investigate the values of arguments you are passing", @@ -151,15 +163,11 @@ private: IColumn::Offset offset{}; for (size_t row_idx = 0; row_idx < input_rows_count; ++row_idx) { - for (size_t st = start, ed = end_data[row_idx]; st < ed; st += step) + for (size_t idx = 0; idx < row_length[row_idx]; ++idx) { - out_data[offset++] = static_cast(st); - - if (st > st + step) - throw Exception{"A call to function " + getName() + " overflows, investigate the values of arguments you are passing", - ErrorCodes::ARGUMENT_OUT_OF_BOUND}; + out_data[offset] = static_cast(start + offset * step); + ++offset; } - out_offsets[row_idx] = offset; } @@ -180,19 +188,26 @@ private: size_t total_values = 0; size_t pre_values = 0; + PODArray row_length(input_rows_count); for (size_t row_idx = 0; row_idx < input_rows_count; ++row_idx) { - if (start_data[row_idx] < end_data[row_idx] && step == 0) + if (step == 0) throw Exception{"A call to function " + getName() + " overflows, the 3rd argument step can't be zero", ErrorCodes::ARGUMENT_OUT_OF_BOUND}; - pre_values += start_data[row_idx] >= end_data[row_idx] ? 0 - : (end_data[row_idx] - start_data[row_idx] - 1) / step + 1; + if (start_data[row_idx] < end_data[row_idx] && step > 0) + row_length[row_idx] = (static_cast<__int128_t>(end_data[row_idx]) - static_cast<__int128_t>(start_data[row_idx]) - 1) / static_cast<__int128_t>(step) + 1; + else if (start_data[row_idx] > end_data[row_idx] && step < 0) + row_length[row_idx] = (static_cast<__int128_t>(end_data[row_idx]) - static_cast<__int128_t>(start_data[row_idx]) + 1) / static_cast<__int128_t>(step) + 1; + else + row_length[row_idx] = 0; + + pre_values += row_length[row_idx]; if (pre_values < total_values) throw Exception{"A call to function " + getName() + " overflows, investigate the values of arguments you are passing", - ErrorCodes::ARGUMENT_OUT_OF_BOUND}; + ErrorCodes::ARGUMENT_OUT_OF_BOUND}; total_values = pre_values; if (total_values > max_elements) @@ -210,15 +225,8 @@ private: IColumn::Offset offset{}; for (size_t row_idx = 0; row_idx < input_rows_count; ++row_idx) { - for (size_t st = start_data[row_idx], ed = end_data[row_idx]; st < ed; st += step) - { - out_data[offset++] = static_cast(st); - - if (st > st + step) - throw Exception{"A call to function " + getName() + " overflows, investigate the values of arguments you are passing", - ErrorCodes::ARGUMENT_OUT_OF_BOUND}; - } - + for (size_t idx = 0; idx < row_length[row_idx]; idx++) + out_data[offset++] = static_cast(start_data[row_idx] + idx * step); out_offsets[row_idx] = offset; } @@ -239,15 +247,22 @@ private: size_t total_values = 0; size_t pre_values = 0; + PODArray row_length(input_rows_count); for (size_t row_idx = 0; row_idx < input_rows_count; ++row_idx) { - if (start < end_data[row_idx] && step_data[row_idx] == 0) + if (step_data[row_idx] == 0) throw Exception{"A call to function " + getName() + " overflows, the 3rd argument step can't be zero", - ErrorCodes::ARGUMENT_OUT_OF_BOUND}; + ErrorCodes::ARGUMENT_OUT_OF_BOUND}; - pre_values += start >= end_data[row_idx] ? 0 - : (end_data[row_idx] - start - 1) / step_data[row_idx] + 1; + if (start < end_data[row_idx] && step_data[row_idx] > 0) + row_length[row_idx] = (static_cast<__int128_t>(end_data[row_idx]) - static_cast<__int128_t>(start) - 1) / static_cast<__int128_t>(step_data[row_idx]) + 1; + else if (start > end_data[row_idx] && step_data[row_idx] < 0) + row_length[row_idx] = (static_cast<__int128_t>(end_data[row_idx]) - static_cast<__int128_t>(start) + 1) / static_cast<__int128_t>(step_data[row_idx]) + 1; + else + row_length[row_idx] = 0; + + pre_values += row_length[row_idx]; if (pre_values < total_values) throw Exception{"A call to function " + getName() + " overflows, investigate the values of arguments you are passing", @@ -269,15 +284,8 @@ private: IColumn::Offset offset{}; for (size_t row_idx = 0; row_idx < input_rows_count; ++row_idx) { - for (size_t st = start, ed = end_data[row_idx]; st < ed; st += step_data[row_idx]) - { - out_data[offset++] = static_cast(st); - - if (st > st + step_data[row_idx]) - throw Exception{"A call to function " + getName() + " overflows, investigate the values of arguments you are passing", - ErrorCodes::ARGUMENT_OUT_OF_BOUND}; - } - + for (size_t idx = 0; idx < row_length[row_idx]; idx++) + out_data[offset++] = static_cast(start + offset * step_data[row_idx]); out_offsets[row_idx] = offset; } @@ -301,15 +309,21 @@ private: size_t total_values = 0; size_t pre_values = 0; + PODArray row_length(input_rows_count); for (size_t row_idx = 0; row_idx < input_rows_count; ++row_idx) { - if (start_data[row_idx] < end_start[row_idx] && step_data[row_idx] == 0) - throw Exception{"A call to function " + getName() + " overflows, the 3rd argument step can't be zero", + if (step_data[row_idx] == 0) + throw Exception{"A call to function " + getName() + " overflows, the 3rd argument step can't less or equal to zero", ErrorCodes::ARGUMENT_OUT_OF_BOUND}; + if (start_data[row_idx] < end_start[row_idx] && step_data[row_idx] > 0) + row_length[row_idx] = (static_cast<__int128_t>(end_start[row_idx]) - static_cast<__int128_t>(start_data[row_idx]) - 1) / static_cast<__int128_t>(step_data[row_idx]) + 1; + else if (start_data[row_idx] > end_start[row_idx] && step_data[row_idx] < 0) + row_length[row_idx] = (static_cast<__int128_t>(end_start[row_idx]) - static_cast<__int128_t>(start_data[row_idx]) + 1) / static_cast<__int128_t>(step_data[row_idx]) + 1; + else + row_length[row_idx] = 0; - pre_values += start_data[row_idx] >= end_start[row_idx] ? 0 - : (end_start[row_idx] -start_data[row_idx] - 1) / (step_data[row_idx]) + 1; + pre_values += row_length[row_idx]; if (pre_values < total_values) throw Exception{"A call to function " + getName() + " overflows, investigate the values of arguments you are passing", @@ -331,15 +345,8 @@ private: IColumn::Offset offset{}; for (size_t row_idx = 0; row_idx < input_rows_count; ++row_idx) { - for (size_t st = start_data[row_idx], ed = end_start[row_idx]; st < ed; st += step_data[row_idx]) - { - out_data[offset++] = static_cast(st); - - if (st > st + step_data[row_idx]) - throw Exception{"A call to function " + getName() + " overflows, investigate the values of arguments you are passing", - ErrorCodes::ARGUMENT_OUT_OF_BOUND}; - } - + for (size_t idx = 0; idx < row_length[row_idx]; idx++) + out_data[offset++] = static_cast(start_data[row_idx] + idx * step_data[row_idx]); out_offsets[row_idx] = offset; } @@ -351,23 +358,20 @@ private: DataTypePtr elem_type = checkAndGetDataType(result_type.get())->getNestedType(); WhichDataType which(elem_type); - if (!which.isUInt8() - && !which.isUInt16() - && !which.isUInt32() - && !which.isUInt64()) + if (!which.isNativeUInt() && !which.isNativeInt()) { throw Exception{"Illegal columns of arguments of function " + getName() - + ", the function only implemented for unsigned integers up to 64 bit", ErrorCodes::ILLEGAL_COLUMN}; + + ", the function only implemented for unsigned/signed integers up to 64 bit", + ErrorCodes::ILLEGAL_COLUMN}; } ColumnPtr res; if (arguments.size() == 1) { const auto * col = arguments[0].column.get(); - if (!((res = executeInternal(col)) - || (res = executeInternal(col)) - || (res = executeInternal(col)) - || (res = executeInternal(col)))) + if (!((res = executeInternal(col)) || (res = executeInternal(col)) || (res = executeInternal(col)) + || (res = executeInternal(col)) || (res = executeInternal(col)) || (res = executeInternal(col)) + || (res = executeInternal(col)) || (res = executeInternal(col)))) { throw Exception{"Illegal column " + col->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_COLUMN}; } @@ -402,44 +406,93 @@ private: bool is_step_const = isColumnConst(*column_ptrs[2]); if (is_start_const && is_step_const) { - UInt64 start = assert_cast(*column_ptrs[0]).getUInt(0); - UInt64 step = assert_cast(*column_ptrs[2]).getUInt(0); - - if ((res = executeConstStartStep(column_ptrs[1], start, step, input_rows_count)) || - (res = executeConstStartStep(column_ptrs[1], start, step, input_rows_count)) || - (res = executeConstStartStep(column_ptrs[1], static_cast(start), static_cast(step), input_rows_count)) || - (res = executeConstStartStep(column_ptrs[1], start, step, input_rows_count))) + if (which.isNativeUInt()) { + UInt64 start = assert_cast(*column_ptrs[0]).getUInt(0); + UInt64 step = assert_cast(*column_ptrs[2]).getUInt(0); + + if ((res = executeConstStartStep(column_ptrs[1], start, step, input_rows_count)) + || (res = executeConstStartStep(column_ptrs[1], start, step, input_rows_count)) + || (res = executeConstStartStep( + column_ptrs[1], static_cast(start), static_cast(step), input_rows_count)) + || (res = executeConstStartStep(column_ptrs[1], start, step, input_rows_count))) + { + } + } + else if (which.isNativeInt()) + { + Int64 start = assert_cast(*column_ptrs[0]).getInt(0); + Int64 step = assert_cast(*column_ptrs[2]).getInt(0); + + if ((res = executeConstStartStep(column_ptrs[1], start, step, input_rows_count)) + || (res = executeConstStartStep(column_ptrs[1], start, step, input_rows_count)) + || (res = executeConstStartStep( + column_ptrs[1], static_cast(start), static_cast(step), input_rows_count)) + || (res = executeConstStartStep(column_ptrs[1], start, step, input_rows_count))) + { + } } } else if (is_start_const && !is_step_const) { - UInt64 start = assert_cast(*column_ptrs[0]).getUInt(0); - - if ((res = executeConstStart(column_ptrs[1], column_ptrs[2], start, input_rows_count)) || - (res = executeConstStart(column_ptrs[1], column_ptrs[2], start, input_rows_count)) || - (res = executeConstStart(column_ptrs[1], column_ptrs[2], static_cast(start), input_rows_count)) || - (res = executeConstStart(column_ptrs[1], column_ptrs[2], start, input_rows_count))) + if (which.isNativeUInt()) { + UInt64 start = assert_cast(*column_ptrs[0]).getUInt(0); + + if ((res = executeConstStart(column_ptrs[1], column_ptrs[2], start, input_rows_count)) + || (res = executeConstStart(column_ptrs[1], column_ptrs[2], start, input_rows_count)) + || (res = executeConstStart(column_ptrs[1], column_ptrs[2], static_cast(start), input_rows_count)) + || (res = executeConstStart(column_ptrs[1], column_ptrs[2], start, input_rows_count))) + { + } + } + else if (which.isNativeInt()) + { + Int64 start = assert_cast(*column_ptrs[0]).getInt(0); + + if ((res = executeConstStart(column_ptrs[1], column_ptrs[2], start, input_rows_count)) + || (res = executeConstStart(column_ptrs[1], column_ptrs[2], start, input_rows_count)) + || (res = executeConstStart(column_ptrs[1], column_ptrs[2], static_cast(start), input_rows_count)) + || (res = executeConstStart(column_ptrs[1], column_ptrs[2], start, input_rows_count))) + { + } } } else if (!is_start_const && is_step_const) { - UInt64 step = assert_cast(*column_ptrs[2]).getUInt(0); - - if ((res = executeConstStep(column_ptrs[0], column_ptrs[1], step, input_rows_count)) || - (res = executeConstStep(column_ptrs[0], column_ptrs[1], step, input_rows_count)) || - (res = executeConstStep(column_ptrs[0], column_ptrs[1], static_cast(step), input_rows_count)) || - (res = executeConstStep(column_ptrs[0], column_ptrs[1], step, input_rows_count))) + if (which.isNativeUInt()) { + UInt64 step = assert_cast(*column_ptrs[2]).getUInt(0); + + if ((res = executeConstStep(column_ptrs[0], column_ptrs[1], step, input_rows_count)) + || (res = executeConstStep(column_ptrs[0], column_ptrs[1], step, input_rows_count)) + || (res = executeConstStep(column_ptrs[0], column_ptrs[1], static_cast(step), input_rows_count)) + || (res = executeConstStep(column_ptrs[0], column_ptrs[1], step, input_rows_count))) + { + } + } + else if (which.isNativeInt()) + { + Int64 step = assert_cast(*column_ptrs[2]).getInt(0); + + if ((res = executeConstStep(column_ptrs[0], column_ptrs[1], step, input_rows_count)) + || (res = executeConstStep(column_ptrs[0], column_ptrs[1], step, input_rows_count)) + || (res = executeConstStep(column_ptrs[0], column_ptrs[1], static_cast(step), input_rows_count)) + || (res = executeConstStep(column_ptrs[0], column_ptrs[1], step, input_rows_count))) + { + } } } else { - if ((res = executeGeneric(column_ptrs[0], column_ptrs[1], column_ptrs[2], input_rows_count)) || - (res = executeGeneric(column_ptrs[0], column_ptrs[1], column_ptrs[2], input_rows_count)) || - (res = executeGeneric(column_ptrs[0], column_ptrs[1], column_ptrs[2], input_rows_count)) || - (res = executeGeneric(column_ptrs[0], column_ptrs[1], column_ptrs[2], input_rows_count))) + if ((res = executeGeneric(column_ptrs[0], column_ptrs[1], column_ptrs[2], input_rows_count)) + || (res = executeGeneric(column_ptrs[0], column_ptrs[1], column_ptrs[2], input_rows_count)) + || (res = executeGeneric(column_ptrs[0], column_ptrs[1], column_ptrs[2], input_rows_count)) + || (res = executeGeneric(column_ptrs[0], column_ptrs[1], column_ptrs[2], input_rows_count)) + || (res = executeGeneric(column_ptrs[0], column_ptrs[1], column_ptrs[2], input_rows_count)) + || (res = executeGeneric(column_ptrs[0], column_ptrs[1], column_ptrs[2], input_rows_count)) + || (res = executeGeneric(column_ptrs[0], column_ptrs[1], column_ptrs[2], input_rows_count)) + || (res = executeGeneric(column_ptrs[0], column_ptrs[1], column_ptrs[2], input_rows_count))) { } } diff --git a/src/Functions/bar.cpp b/src/Functions/bar.cpp index 982e1ff3a25..e1f65a61175 100644 --- a/src/Functions/bar.cpp +++ b/src/Functions/bar.cpp @@ -118,7 +118,7 @@ public: size_t next_size = current_offset + UnicodeBar::getWidthInBytes(width) + 1; dst_chars.resize(next_size); - UnicodeBar::render(width, reinterpret_cast(&dst_chars[current_offset])); + UnicodeBar::render(width, reinterpret_cast(&dst_chars[current_offset]), reinterpret_cast(&dst_chars[next_size])); current_offset = next_size; dst_offsets[i] = current_offset; } diff --git a/src/Functions/formatDateTime.cpp b/src/Functions/formatDateTime.cpp index 4db04d61d84..4c24239a06c 100644 --- a/src/Functions/formatDateTime.cpp +++ b/src/Functions/formatDateTime.cpp @@ -48,7 +48,6 @@ template <> struct ActionValueTypeMap { using ActionValueTyp template <> struct ActionValueTypeMap { using ActionValueType = UInt16; }; template <> struct ActionValueTypeMap { using ActionValueType = Int32; }; template <> struct ActionValueTypeMap { using ActionValueType = UInt32; }; -// TODO(vnemkov): to add sub-second format instruction, make that DateTime64 and do some math in Action. template <> struct ActionValueTypeMap { using ActionValueType = Int64; }; @@ -113,16 +112,16 @@ private: class Action { public: - using Func = void (*)(char *, Time, const DateLUTImpl &); + using Func = void (*)(char *, Time, UInt64, UInt32, const DateLUTImpl &); Func func; size_t shift; explicit Action(Func func_, size_t shift_ = 0) : func(func_), shift(shift_) {} - void perform(char *& target, Time source, const DateLUTImpl & timezone) + void perform(char *& target, Time source, UInt64 fractional_second, UInt32 scale, const DateLUTImpl & timezone) { - func(target, source, timezone); + func(target, source, fractional_second, scale, timezone); target += shift; } @@ -148,30 +147,30 @@ private: } public: - static void noop(char *, Time, const DateLUTImpl &) + static void noop(char *, Time, UInt64 , UInt32 , const DateLUTImpl &) { } - static void century(char * target, Time source, const DateLUTImpl & timezone) + static void century(char * target, Time source, UInt64 /*fractional_second*/, UInt32 /*scale*/, const DateLUTImpl & timezone) { auto year = ToYearImpl::execute(source, timezone); auto century = year / 100; writeNumber2(target, century); } - static void dayOfMonth(char * target, Time source, const DateLUTImpl & timezone) + static void dayOfMonth(char * target, Time source, UInt64 /*fractional_second*/, UInt32 /*scale*/, const DateLUTImpl & timezone) { writeNumber2(target, ToDayOfMonthImpl::execute(source, timezone)); } - static void americanDate(char * target, Time source, const DateLUTImpl & timezone) + static void americanDate(char * target, Time source, UInt64 /*fractional_second*/, UInt32 /*scale*/, const DateLUTImpl & timezone) { writeNumber2(target, ToMonthImpl::execute(source, timezone)); writeNumber2(target + 3, ToDayOfMonthImpl::execute(source, timezone)); writeNumber2(target + 6, ToYearImpl::execute(source, timezone) % 100); } - static void dayOfMonthSpacePadded(char * target, Time source, const DateLUTImpl & timezone) + static void dayOfMonthSpacePadded(char * target, Time source, UInt64 /*fractional_second*/, UInt32 /*scale*/, const DateLUTImpl & timezone) { auto day = ToDayOfMonthImpl::execute(source, timezone); if (day < 10) @@ -180,101 +179,107 @@ private: writeNumber2(target, day); } - static void ISO8601Date(char * target, Time source, const DateLUTImpl & timezone) // NOLINT + static void ISO8601Date(char * target, Time source, UInt64 /*fractional_second*/, UInt32 /*scale*/, const DateLUTImpl & timezone) // NOLINT { writeNumber4(target, ToYearImpl::execute(source, timezone)); writeNumber2(target + 5, ToMonthImpl::execute(source, timezone)); writeNumber2(target + 8, ToDayOfMonthImpl::execute(source, timezone)); } - static void dayOfYear(char * target, Time source, const DateLUTImpl & timezone) + static void dayOfYear(char * target, Time source, UInt64 /*fractional_second*/, UInt32 /*scale*/, const DateLUTImpl & timezone) { writeNumber3(target, ToDayOfYearImpl::execute(source, timezone)); } - static void month(char * target, Time source, const DateLUTImpl & timezone) + static void month(char * target, Time source, UInt64 /*fractional_second*/, UInt32 /*scale*/, const DateLUTImpl & timezone) { writeNumber2(target, ToMonthImpl::execute(source, timezone)); } - static void dayOfWeek(char * target, Time source, const DateLUTImpl & timezone) + static void dayOfWeek(char * target, Time source, UInt64 /*fractional_second*/, UInt32 /*scale*/, const DateLUTImpl & timezone) { *target += ToDayOfWeekImpl::execute(source, timezone); } - static void dayOfWeek0To6(char * target, Time source, const DateLUTImpl & timezone) + static void dayOfWeek0To6(char * target, Time source, UInt64 /*fractional_second*/, UInt32 /*scale*/, const DateLUTImpl & timezone) { auto day = ToDayOfWeekImpl::execute(source, timezone); *target += (day == 7 ? 0 : day); } - static void ISO8601Week(char * target, Time source, const DateLUTImpl & timezone) // NOLINT + static void ISO8601Week(char * target, Time source, UInt64 /*fractional_second*/, UInt32 /*scale*/, const DateLUTImpl & timezone) // NOLINT { writeNumber2(target, ToISOWeekImpl::execute(source, timezone)); } - static void ISO8601Year2(char * target, Time source, const DateLUTImpl & timezone) // NOLINT + static void ISO8601Year2(char * target, Time source, UInt64 /*fractional_second*/, UInt32 /*scale*/, const DateLUTImpl & timezone) // NOLINT { writeNumber2(target, ToISOYearImpl::execute(source, timezone) % 100); } - static void ISO8601Year4(char * target, Time source, const DateLUTImpl & timezone) // NOLINT + static void ISO8601Year4(char * target, Time source, UInt64 /*fractional_second*/, UInt32 /*scale*/, const DateLUTImpl & timezone) // NOLINT { writeNumber4(target, ToISOYearImpl::execute(source, timezone)); } - static void year2(char * target, Time source, const DateLUTImpl & timezone) + static void year2(char * target, Time source, UInt64 /*fractional_second*/, UInt32 /*scale*/, const DateLUTImpl & timezone) { writeNumber2(target, ToYearImpl::execute(source, timezone) % 100); } - static void year4(char * target, Time source, const DateLUTImpl & timezone) + static void year4(char * target, Time source, UInt64 /*fractional_second*/, UInt32 /*scale*/, const DateLUTImpl & timezone) { writeNumber4(target, ToYearImpl::execute(source, timezone)); } - static void hour24(char * target, Time source, const DateLUTImpl & timezone) + static void hour24(char * target, Time source, UInt64 /*fractional_second*/, UInt32 /*scale*/, const DateLUTImpl & timezone) { writeNumber2(target, ToHourImpl::execute(source, timezone)); } - static void hour12(char * target, Time source, const DateLUTImpl & timezone) + static void hour12(char * target, Time source, UInt64 /*fractional_second*/, UInt32 /*scale*/, const DateLUTImpl & timezone) { auto x = ToHourImpl::execute(source, timezone); writeNumber2(target, x == 0 ? 12 : (x > 12 ? x - 12 : x)); } - static void minute(char * target, Time source, const DateLUTImpl & timezone) + static void minute(char * target, Time source, UInt64 /*fractional_second*/, UInt32 /*scale*/, const DateLUTImpl & timezone) { writeNumber2(target, ToMinuteImpl::execute(source, timezone)); } - static void AMPM(char * target, Time source, const DateLUTImpl & timezone) // NOLINT + static void AMPM(char * target, Time source, UInt64 /*fractional_second*/, UInt32 /*scale*/, const DateLUTImpl & timezone) // NOLINT { auto hour = ToHourImpl::execute(source, timezone); if (hour >= 12) *target = 'P'; } - static void hhmm24(char * target, Time source, const DateLUTImpl & timezone) + static void hhmm24(char * target, Time source, UInt64 /*fractional_second*/, UInt32 /*scale*/, const DateLUTImpl & timezone) { writeNumber2(target, ToHourImpl::execute(source, timezone)); writeNumber2(target + 3, ToMinuteImpl::execute(source, timezone)); } - static void second(char * target, Time source, const DateLUTImpl & timezone) + static void second(char * target, Time source, UInt64 /*fractional_second*/, UInt32 /*scale*/, const DateLUTImpl & timezone) { writeNumber2(target, ToSecondImpl::execute(source, timezone)); } - static void ISO8601Time(char * target, Time source, const DateLUTImpl & timezone) // NOLINT + static void fractionalSecond(char * target, Time /*source*/, UInt64 fractional_second, UInt32 scale, const DateLUTImpl & /*timezone*/) + { + for (Int64 i = scale, value = fractional_second; i > 0; --i, value /= 10) + target[i - 1] += value % 10; + } + + static void ISO8601Time(char * target, Time source, UInt64 /*fractional_second*/, UInt32 /*scale*/, const DateLUTImpl & timezone) // NOLINT { writeNumber2(target, ToHourImpl::execute(source, timezone)); writeNumber2(target + 3, ToMinuteImpl::execute(source, timezone)); writeNumber2(target + 6, ToSecondImpl::execute(source, timezone)); } - static void timezoneOffset(char * target, Time source, const DateLUTImpl & timezone) + static void timezoneOffset(char * target, Time source, UInt64 /*fractional_second*/, UInt32 /*scale*/, const DateLUTImpl & timezone) { auto offset = TimezoneOffsetImpl::execute(source, timezone); if (offset < 0) @@ -287,7 +292,7 @@ private: writeNumber2(target + 3, offset % 3600 / 60); } - static void quarter(char * target, Time source, const DateLUTImpl & timezone) + static void quarter(char * target, Time source, UInt64 /*fractional_second*/, UInt32 /*scale*/, const DateLUTImpl & timezone) { *target += ToQuarterImpl::execute(source, timezone); } @@ -426,9 +431,15 @@ public: String pattern = pattern_column->getValue(); + UInt32 scale [[maybe_unused]] = 0; + if constexpr (std::is_same_v) + { + scale = times->getScale(); + } + using T = typename ActionValueTypeMap::ActionValueType; std::vector> instructions; - String pattern_to_fill = parsePattern(pattern, instructions); + String pattern_to_fill = parsePattern(pattern, instructions, scale); size_t result_size = pattern_to_fill.size(); const DateLUTImpl * time_zone_tmp = nullptr; @@ -444,12 +455,6 @@ public: const DateLUTImpl & time_zone = *time_zone_tmp; const auto & vec = times->getData(); - UInt32 scale [[maybe_unused]] = 0; - if constexpr (std::is_same_v) - { - scale = times->getScale(); - } - auto col_res = ColumnString::create(); auto & dst_data = col_res->getChars(); auto & dst_offsets = col_res->getOffsets(); @@ -484,16 +489,16 @@ public: { if constexpr (std::is_same_v) { + const auto c = DecimalUtils::split(vec[i], scale); for (auto & instruction : instructions) { - const auto c = DecimalUtils::split(vec[i], scale); - instruction.perform(pos, static_cast(c.whole), time_zone); + instruction.perform(pos, static_cast(c.whole), c.fractional, scale, time_zone); } } else { for (auto & instruction : instructions) - instruction.perform(pos, static_cast(vec[i]), time_zone); + instruction.perform(pos, static_cast(vec[i]), 0, 0, time_zone); } dst_offsets[i] = pos - begin; @@ -504,7 +509,7 @@ public: } template - String parsePattern(const String & pattern, std::vector> & instructions) const + String parsePattern(const String & pattern, std::vector> & instructions, UInt32 scale) const { String result; @@ -573,6 +578,16 @@ public: result.append(" 0"); break; + // Fractional seconds + case 'f': + { + /// If the time data type has no fractional part, then we print '0' as the fractional part. + const auto actual_scale = std::max(1, scale); + instructions.emplace_back(&Action::fractionalSecond, actual_scale); + result.append(actual_scale, '0'); + break; + } + // Short YYYY-MM-DD date, equivalent to %Y-%m-%d 2001-08-23 case 'F': instructions.emplace_back(&Action::ISO8601Date, 10); diff --git a/src/Functions/if.cpp b/src/Functions/if.cpp index 0baf64c83d9..049e6d24920 100644 --- a/src/Functions/if.cpp +++ b/src/Functions/if.cpp @@ -1026,6 +1026,7 @@ public: } bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; } ColumnNumbers getArgumentsThatDontImplyNullableReturnType(size_t /*number_of_arguments*/) const override { return {0}; } + bool canBeExecutedOnLowCardinalityDictionary() const override { return false; } /// Get result types by argument types. If the function does not apply to these arguments, throw an exception. DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override diff --git a/src/Functions/in.cpp b/src/Functions/in.cpp index 5773e823a80..1de8371cf90 100644 --- a/src/Functions/in.cpp +++ b/src/Functions/in.cpp @@ -17,6 +17,7 @@ namespace DB namespace ErrorCodes { extern const int ILLEGAL_COLUMN; + extern const int LOGICAL_ERROR; } namespace @@ -94,6 +95,8 @@ public: { if constexpr (ignore_set) return ColumnUInt8::create(input_rows_count, 0u); + if (input_rows_count == 0) + return ColumnUInt8::create(); /// Second argument must be ColumnSet. ColumnPtr column_set_ptr = arguments[1].column; @@ -135,12 +138,16 @@ public: /// Replace single LowCardinality column to it's dictionary if possible. ColumnPtr lc_indexes = nullptr; + bool is_const = false; if (columns_of_key_columns.size() == 1) { auto & arg = columns_of_key_columns.at(0); const auto * col = arg.column.get(); if (const auto * const_col = typeid_cast(col)) + { col = &const_col->getDataColumn(); + is_const = true; + } if (const auto * lc = typeid_cast(col)) { @@ -153,7 +160,13 @@ public: auto res = set->execute(columns_of_key_columns, negative); if (lc_indexes) - return res->index(*lc_indexes, 0); + res = res->index(*lc_indexes, 0); + + if (is_const) + res = ColumnUInt8::create(input_rows_count, res->getUInt(0)); + + if (res->size() != input_rows_count) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Output size is different from input size, expect {}, get {}", input_rows_count, res->size()); return res; } diff --git a/src/Functions/initializeAggregation.cpp b/src/Functions/initializeAggregation.cpp index 08352553b9c..b782cd04f75 100644 --- a/src/Functions/initializeAggregation.cpp +++ b/src/Functions/initializeAggregation.cpp @@ -87,7 +87,7 @@ DataTypePtr FunctionInitializeAggregation::getReturnTypeImpl(const ColumnsWithTy aggregate_function = AggregateFunctionFactory::instance().get(aggregate_function_name, argument_types, params_row, properties); } - return aggregate_function->getReturnType(); + return aggregate_function->getResultType(); } diff --git a/src/Functions/multiIf.cpp b/src/Functions/multiIf.cpp index f658528a2a7..37301037c0e 100644 --- a/src/Functions/multiIf.cpp +++ b/src/Functions/multiIf.cpp @@ -51,6 +51,7 @@ public: size_t getNumberOfArguments() const override { return 0; } bool useDefaultImplementationForNulls() const override { return false; } bool useDefaultImplementationForNothing() const override { return false; } + bool canBeExecutedOnLowCardinalityDictionary() const override { return false; } ColumnNumbers getArgumentsThatDontImplyNullableReturnType(size_t number_of_arguments) const override { diff --git a/src/Functions/runningAccumulate.cpp b/src/Functions/runningAccumulate.cpp index 336c45e49cb..436637fbe56 100644 --- a/src/Functions/runningAccumulate.cpp +++ b/src/Functions/runningAccumulate.cpp @@ -102,7 +102,7 @@ public: /// Will pass empty arena if agg_func does not allocate memory in arena std::unique_ptr arena = agg_func.allocatesMemoryInArena() ? std::make_unique() : nullptr; - auto result_column_ptr = agg_func.getReturnType()->createColumn(); + auto result_column_ptr = agg_func.getResultType()->createColumn(); IColumn & result_column = *result_column_ptr; result_column.reserve(column_with_states->size()); diff --git a/src/IO/HashingWriteBuffer.h b/src/IO/HashingWriteBuffer.h index bf636deeb07..988dfc227fe 100644 --- a/src/IO/HashingWriteBuffer.h +++ b/src/IO/HashingWriteBuffer.h @@ -77,6 +77,11 @@ public: state = uint128(0, 0); } + void sync() override + { + out.sync(); + } + uint128 getHash() { next(); diff --git a/src/IO/IResourceManager.h b/src/IO/IResourceManager.h new file mode 100644 index 00000000000..019778595bf --- /dev/null +++ b/src/IO/IResourceManager.h @@ -0,0 +1,53 @@ +#pragma once + +#include + +#include + +#include + +#include +#include + +namespace DB +{ + +/* + * Instance of derived class holds everything required for resource consumption, + * including resources currently registered at `SchedulerRoot`. This is required to avoid + * problems during configuration update. Do not hold instances longer than required. + * Should be created on query start and destructed when query is done. + */ +class IClassifier : private boost::noncopyable +{ +public: + virtual ~IClassifier() {} + + /// Returns ResouceLink that should be used to access resource. + /// Returned link is valid until classifier destruction. + virtual ResourceLink get(const String & resource_name) = 0; +}; + +using ClassifierPtr = std::shared_ptr; + +/* + * Represents control plane of resource scheduling. Derived class is responsible for reading + * configuration, creating all required `ISchedulerNode` objects and + * managing their lifespan. + */ +class IResourceManager : private boost::noncopyable +{ +public: + virtual ~IResourceManager() {} + + /// Initialize or reconfigure manager. + virtual void updateConfiguration(const Poco::Util::AbstractConfiguration & config) = 0; + + /// Obtain a classifier instance required to get access to resources. + /// Note that it holds resource configuration, so should be destructed when query is done. + virtual ClassifierPtr acquire(const String & classifier_name) = 0; +}; + +using ResourceManagerPtr = std::shared_ptr; + +} diff --git a/src/IO/ISchedulerConstraint.h b/src/IO/ISchedulerConstraint.h new file mode 100644 index 00000000000..47f6905e265 --- /dev/null +++ b/src/IO/ISchedulerConstraint.h @@ -0,0 +1,55 @@ +#pragma once + +#include + +namespace DB +{ + +/* + * Constraint defined on the set of requests in consumption state. + * It allows to track two events: + * - dequeueRequest(): resource consumption begins + * - finishRequest(): resource consumption finishes + * This allows to keep track of in-flight requests and implement different constraints (e.g. in-flight limit). + * When constraint is violated, node must be deactivated by dequeueRequest() returning `false`. + * When constraint is again satisfied, scheduleActivation() is called from finishRequest(). + * + * Derived class behaviour requirements: + * - dequeueRequest() must fill `request->constraint` iff it is nullptr; + * - finishRequest() must be recursive: call to `parent_constraint->finishRequest()`. + */ +class ISchedulerConstraint : public ISchedulerNode +{ +public: + ISchedulerConstraint(EventQueue * event_queue_, const Poco::Util::AbstractConfiguration & config = emptyConfig(), const String & config_prefix = {}) + : ISchedulerNode(event_queue_, config, config_prefix) + {} + + /// Resource consumption by `request` is finished. + /// Should be called outside of scheduling subsystem, implementation must be thread-safe. + virtual void finishRequest(ResourceRequest * request) = 0; + + void setParent(ISchedulerNode * parent_) override + { + ISchedulerNode::setParent(parent_); + + // Assign `parent_constraint` to the nearest parent derived from ISchedulerConstraint + for (ISchedulerNode * node = parent_; node != nullptr; node = node->parent) + { + if (auto * constraint = dynamic_cast(node)) + { + parent_constraint = constraint; + break; + } + } + } + +protected: + // Reference to nearest parent that is also derived from ISchedulerConstraint. + // Request can traverse through multiple constraints while being dequeue from hierarchy, + // while finishing request should traverse the same chain in reverse order. + // NOTE: it must be immutable after initialization, because it is accessed in not thread-safe way from finishRequest() + ISchedulerConstraint * parent_constraint = nullptr; +}; + +} diff --git a/src/IO/ISchedulerNode.h b/src/IO/ISchedulerNode.h new file mode 100644 index 00000000000..266f1ba07a0 --- /dev/null +++ b/src/IO/ISchedulerNode.h @@ -0,0 +1,221 @@ +#pragma once + +#include +#include + +#include +#include +#include + +#include + +#include +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int INVALID_SCHEDULER_NODE; +} + +class ISchedulerNode; + +inline const Poco::Util::AbstractConfiguration & emptyConfig() +{ + static Poco::AutoPtr config = new Poco::Util::XMLConfiguration(); + return *config; +} + +/* + * Info read and write for scheduling purposes by parent + */ +struct SchedulerNodeInfo +{ + double weight = 1.0; /// Weight of this node among it's siblings + Int64 priority = 0; /// Priority of this node among it's siblings (higher value means higher priority) + + /// Arbitrary data accessed/stored by parent + union { + size_t idx; + void * ptr; + } parent; + + SchedulerNodeInfo() = default; + + explicit SchedulerNodeInfo(const Poco::Util::AbstractConfiguration & config = emptyConfig(), const String & config_prefix = {}) + { + setWeight(config.getDouble(config_prefix + ".weight", weight)); + setPriority(config.getInt64(config_prefix + ".priority", priority)); + } + + void setWeight(double value) + { + if (value <= 0 || !isfinite(value)) + throw Exception( + ErrorCodes::INVALID_SCHEDULER_NODE, + "Negative and non-finite node weights are not allowed: {}", + value); + weight = value; + } + + void setPriority(Int64 value) + { + priority = value; + } +}; + +/* + * Simple waitable thread-safe FIFO task queue. + * Intended to hold postponed events for later handling (usually by scheduler thread). + */ +class EventQueue +{ +public: + using Event = std::function; + + void enqueue(Event&& event) + { + std::unique_lock lock{mutex}; + bool was_empty = queue.empty(); + queue.emplace_back(event); + if (was_empty) + pending.notify_one(); + } + + /// Process single event if it exists + /// Returns `true` iff event has been processed + bool tryProcess() + { + std::unique_lock lock{mutex}; + if (queue.empty()) + return false; + Event event = std::move(queue.front()); + queue.pop_front(); + lock.unlock(); // do not hold queue mutext while processing events + event(); + return true; + } + + /// Wait for single event (if not available) and process it + void process() + { + std::unique_lock lock{mutex}; + pending.wait(lock, [&] { return !queue.empty(); }); + Event event = std::move(queue.front()); + queue.pop_front(); + lock.unlock(); // do not hold queue mutext while processing events + event(); + } + +private: + std::mutex mutex; + std::condition_variable pending; + std::deque queue; +}; + +/* + * Node of hierarchy for scheduling requests for resource. Base class for all + * kinds of scheduling elements (queues, policies, constraints and schedulers). + * + * Root node is a scheduler, which has it's thread to dequeue requests, + * execute requests (see ResourceRequest) and process events in a thread-safe manner. + * Immediate children of the scheduler represent independent resources. + * Each resource has it's own hierarchy to achieve required scheduling policies. + * Non-leaf nodes do not hold requests, but keep scheduling state + * (e.g. consumption history, amount of in-flight requests, etc). + * Leafs of hierarchy are queues capable of holding pending requests. + * + * scheduler (SchedulerRoot) + * / \ + * constraint constraint (SemaphoreConstraint) + * | | + * policy policy (PriorityPolicy) + * / \ / \ + * q1 q2 q3 q4 (FifoQueue) + * + * Dequeueing request from an inner node will dequeue request from one of active leaf-queues in its subtree. + * Node is considered to be active iff: + * - it has at least one pending request in one of leaves of it's subtree; + * - and enforced constraints, if any, are satisfied + * (e.g. amount of concurrent requests is not greater than some number). + * + * All methods must be called only from scheduler thread for thread-safety. + */ +class ISchedulerNode : private boost::noncopyable +{ +public: + ISchedulerNode(EventQueue * event_queue_, const Poco::Util::AbstractConfiguration & config = emptyConfig(), const String & config_prefix = {}) + : event_queue(event_queue_) + , info(config, config_prefix) + {} + + virtual ~ISchedulerNode() {} + + // Checks if two nodes configuration is equal + virtual bool equals(ISchedulerNode * other) = 0; + + /// Attach new child + virtual void attachChild(const std::shared_ptr & child) = 0; + + /// Detach and destroy child + virtual void removeChild(ISchedulerNode * child) = 0; + + /// Get attached child by name + virtual ISchedulerNode * getChild(const String & child_name) = 0; + + /// Activation of child due to the first pending request + /// Should be called on leaf node (i.e. queue) to propagate activation signal through chain to the root + virtual void activateChild(ISchedulerNode * child) = 0; + + /// Returns true iff node is active + virtual bool isActive() = 0; + + /// Returns the first request to be executed as the first component of resuting pair. + /// The second pair component is `true` iff node is still active after dequeueing. + virtual std::pair dequeueRequest() = 0; + + /// Returns full path string using names of every parent + String getPath() + { + String result; + ISchedulerNode * ptr = this; + while (ptr->parent) + { + result = "/" + ptr->basename + result; + ptr = ptr->parent; + } + return result.empty() ? "/" : result; + } + + /// Attach to a parent (used by attachChild) + virtual void setParent(ISchedulerNode * parent_) + { + parent = parent_; + } + +protected: + /// Notify parents about the first pending request or constraint becoming satisfied. + /// Postponed to be handled in scheduler thread, so it is intended to be called from outside. + void scheduleActivation() + { + if (likely(parent)) + { + event_queue->enqueue([this] { parent->activateChild(this); }); + } + } + +public: + EventQueue * const event_queue; + String basename; + SchedulerNodeInfo info; + ISchedulerNode * parent = nullptr; +}; + +using SchedulerNodePtr = std::shared_ptr; + +} diff --git a/src/IO/ISchedulerQueue.h b/src/IO/ISchedulerQueue.h new file mode 100644 index 00000000000..75ea05fbee3 --- /dev/null +++ b/src/IO/ISchedulerQueue.h @@ -0,0 +1,26 @@ +#pragma once + +#include + +#include + + +namespace DB +{ + +/* + * Queue for pending requests for specific resource, leaf of hierarchy. + */ +class ISchedulerQueue : public ISchedulerNode +{ +public: + ISchedulerQueue(EventQueue * event_queue_, const Poco::Util::AbstractConfiguration & config = emptyConfig(), const String & config_prefix = {}) + : ISchedulerNode(event_queue_, config, config_prefix) + {} + + /// Enqueue new request to be executed using underlying resource. + /// Should be called outside of scheduling subsystem, implementation must be thread-safe. + virtual void enqueueRequest(ResourceRequest * request) = 0; +}; + +} diff --git a/src/IO/ReadHelpers.cpp b/src/IO/ReadHelpers.cpp index 8de1b85c8b9..a754f267e71 100644 --- a/src/IO/ReadHelpers.cpp +++ b/src/IO/ReadHelpers.cpp @@ -319,12 +319,17 @@ template void readStringUntilEOFInto>(PaddedPODArray -static void parseComplexEscapeSequence(Vector & s, ReadBuffer & buf) +template +static ReturnType parseComplexEscapeSequence(Vector & s, ReadBuffer & buf) { ++buf.position(); if (buf.eof()) - throw Exception("Cannot parse escape sequence", ErrorCodes::CANNOT_PARSE_ESCAPE_SEQUENCE); + { + if constexpr (std::is_same_v) + throw Exception("Cannot parse escape sequence", ErrorCodes::CANNOT_PARSE_ESCAPE_SEQUENCE); + else + return ReturnType(false); + } char char_after_backslash = *buf.position(); @@ -363,6 +368,8 @@ static void parseComplexEscapeSequence(Vector & s, ReadBuffer & buf) s.push_back(decoded_char); ++buf.position(); } + + return ReturnType(true); } @@ -521,14 +528,18 @@ template void readEscapedStringInto(NullOutput & s, ReadBuffer & buf * backslash escape sequences are also parsed, * that could be slightly confusing. */ -template -static void readAnyQuotedStringInto(Vector & s, ReadBuffer & buf) +template +static ReturnType readAnyQuotedStringInto(Vector & s, ReadBuffer & buf) { + static constexpr bool throw_exception = std::is_same_v; if (buf.eof() || *buf.position() != quote) { - throw ParsingException(ErrorCodes::CANNOT_PARSE_QUOTED_STRING, - "Cannot parse quoted string: expected opening quote '{}', got '{}'", - std::string{quote}, buf.eof() ? "EOF" : std::string{*buf.position()}); + if constexpr (throw_exception) + throw ParsingException(ErrorCodes::CANNOT_PARSE_QUOTED_STRING, + "Cannot parse quoted string: expected opening quote '{}', got '{}'", + std::string{quote}, buf.eof() ? "EOF" : std::string{*buf.position()}); + else + return ReturnType(false); } ++buf.position(); @@ -554,15 +565,26 @@ static void readAnyQuotedStringInto(Vector & s, ReadBuffer & buf) continue; } - return; + return ReturnType(true); } if (*buf.position() == '\\') - parseComplexEscapeSequence(s, buf); + { + if constexpr (throw_exception) + parseComplexEscapeSequence(s, buf); + else + { + if (!parseComplexEscapeSequence(s, buf)) + return ReturnType(false); + } + } } - throw ParsingException("Cannot parse quoted string: expected closing quote", - ErrorCodes::CANNOT_PARSE_QUOTED_STRING); + if constexpr (throw_exception) + throw ParsingException("Cannot parse quoted string: expected closing quote", + ErrorCodes::CANNOT_PARSE_QUOTED_STRING); + else + return ReturnType(false); } template @@ -571,6 +593,14 @@ void readQuotedStringInto(Vector & s, ReadBuffer & buf) readAnyQuotedStringInto<'\'', enable_sql_style_quoting>(s, buf); } +template +bool tryReadQuotedStringInto(Vector & s, ReadBuffer & buf) +{ + return readAnyQuotedStringInto<'\'', false, Vector, bool>(s, buf); +} + +template bool tryReadQuotedStringInto(String & s, ReadBuffer & buf); + template void readDoubleQuotedStringInto(Vector & s, ReadBuffer & buf) { @@ -934,6 +964,7 @@ template void readJSONStringInto, void>(PaddedPODArray, bool>(PaddedPODArray & s, ReadBuffer & buf); template void readJSONStringInto(NullOutput & s, ReadBuffer & buf); template void readJSONStringInto(String & s, ReadBuffer & buf); +template bool readJSONStringInto(String & s, ReadBuffer & buf); template ReturnType readJSONObjectPossiblyInvalid(Vector & s, ReadBuffer & buf) @@ -1501,6 +1532,43 @@ static void readParsedValueInto(Vector & s, ReadBuffer & buf, ParseFunc parse_fu peekable_buf.position() = end; } +template +static void readQuotedStringFieldInto(Vector & s, ReadBuffer & buf) +{ + assertChar('\'', buf); + s.push_back('\''); + while (!buf.eof()) + { + char * next_pos = find_first_symbols<'\\', '\''>(buf.position(), buf.buffer().end()); + + s.append(buf.position(), next_pos); + buf.position() = next_pos; + + if (!buf.hasPendingData()) + continue; + + if (*buf.position() == '\'') + break; + + s.push_back(*buf.position()); + if (*buf.position() == '\\') + { + ++buf.position(); + if (!buf.eof()) + { + s.push_back(*buf.position()); + ++buf.position(); + } + } + } + + if (buf.eof()) + return; + + ++buf.position(); + s.push_back('\''); +} + template static void readQuotedFieldInBracketsInto(Vector & s, ReadBuffer & buf) { @@ -1518,20 +1586,19 @@ static void readQuotedFieldInBracketsInto(Vector & s, ReadBuffer & buf) if (!buf.hasPendingData()) continue; - s.push_back(*buf.position()); - if (*buf.position() == '\'') { - readQuotedStringInto(s, buf); - s.push_back('\''); + readQuotedStringFieldInto(s, buf); } else if (*buf.position() == opening_bracket) { + s.push_back(opening_bracket); ++balance; ++buf.position(); } else if (*buf.position() == closing_bracket) { + s.push_back(closing_bracket); --balance; ++buf.position(); } @@ -1554,11 +1621,7 @@ void readQuotedFieldInto(Vector & s, ReadBuffer & buf) /// - Number: integer, float, decimal. if (*buf.position() == '\'') - { - s.push_back('\''); - readQuotedStringInto(s, buf); - s.push_back('\''); - } + readQuotedStringFieldInto(s, buf); else if (*buf.position() == '[') readQuotedFieldInBracketsInto<'[', ']'>(s, buf); else if (*buf.position() == '(') diff --git a/src/IO/ReadHelpers.h b/src/IO/ReadHelpers.h index 4225c01bbd4..5b13f52e277 100644 --- a/src/IO/ReadHelpers.h +++ b/src/IO/ReadHelpers.h @@ -613,6 +613,9 @@ bool tryReadJSONStringInto(Vector & s, ReadBuffer & buf) return readJSONStringInto(s, buf); } +template +bool tryReadQuotedStringInto(Vector & s, ReadBuffer & buf); + /// Reads chunk of data between {} in that way, /// that it has balanced parentheses sequence of {}. /// So, it may form a JSON object, but it can be incorrenct. diff --git a/src/IO/Resource/ClassifiersConfig.cpp b/src/IO/Resource/ClassifiersConfig.cpp new file mode 100644 index 00000000000..fcd4655e2e4 --- /dev/null +++ b/src/IO/Resource/ClassifiersConfig.cpp @@ -0,0 +1,40 @@ +#include + +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int RESOURCE_NOT_FOUND; +} + +ClassifierDescription::ClassifierDescription(const Poco::Util::AbstractConfiguration & config, const String & config_prefix) +{ + Poco::Util::AbstractConfiguration::Keys keys; + config.keys(config_prefix, keys); + for (const auto & key : keys) + emplace(key, config.getString(config_prefix + "." + key)); +} + +ClassifiersConfig::ClassifiersConfig(const Poco::Util::AbstractConfiguration & config) +{ + Poco::Util::AbstractConfiguration::Keys keys; + const String config_prefix = "classifiers"; + config.keys(config_prefix, keys); + for (const auto & key : keys) + classifiers.emplace(std::piecewise_construct, + std::forward_as_tuple(key), + std::forward_as_tuple(config, config_prefix + "." + key)); +} + +const ClassifierDescription & ClassifiersConfig::get(const String & classifier_name) +{ + if (auto it = classifiers.find(classifier_name); it != classifiers.end()) + return it->second; + else + throw Exception(ErrorCodes::RESOURCE_NOT_FOUND, "Unknown classifier '{}' to access resources", classifier_name); +} + +} diff --git a/src/IO/Resource/ClassifiersConfig.h b/src/IO/Resource/ClassifiersConfig.h new file mode 100644 index 00000000000..96e2bd0f0b9 --- /dev/null +++ b/src/IO/Resource/ClassifiersConfig.h @@ -0,0 +1,39 @@ +#pragma once + +#include +#include +#include + +namespace DB +{ + +/// Mapping of resource name into path string (e.g. "disk1" -> "/path/to/class") +struct ClassifierDescription : std::unordered_map +{ + ClassifierDescription(const Poco::Util::AbstractConfiguration & config, const String & config_prefix); +}; + +/* + * Loads a config with the following format: + * + * + * /path/to/queue + * /path/to/another/queue + * + * ... + * ... + * + */ +class ClassifiersConfig +{ +public: + ClassifiersConfig() = default; + explicit ClassifiersConfig(const Poco::Util::AbstractConfiguration & config); + + const ClassifierDescription & get(const String & classifier_name); + +private: + std::unordered_map classifiers; // by classifier_name +}; + +} diff --git a/src/IO/Resource/FifoQueue.cpp b/src/IO/Resource/FifoQueue.cpp new file mode 100644 index 00000000000..f4b0e9c3328 --- /dev/null +++ b/src/IO/Resource/FifoQueue.cpp @@ -0,0 +1,13 @@ +#include + +#include + +namespace DB +{ + +void registerFifoQueue(SchedulerNodeFactory & factory) +{ + factory.registerMethod("fifo"); +} + +} diff --git a/src/IO/Resource/FifoQueue.h b/src/IO/Resource/FifoQueue.h new file mode 100644 index 00000000000..f3ff15ad461 --- /dev/null +++ b/src/IO/Resource/FifoQueue.h @@ -0,0 +1,91 @@ +#pragma once + +#include + +#include + +#include + +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int INVALID_SCHEDULER_NODE; +} + +/* + * FIFO queue to hold pending resource requests + */ +class FifoQueue : public ISchedulerQueue +{ +public: + FifoQueue(EventQueue * event_queue_, const Poco::Util::AbstractConfiguration & config, const String & config_prefix) + : ISchedulerQueue(event_queue_, config, config_prefix) + {} + + bool equals(ISchedulerNode * other) override + { + if (auto * o = dynamic_cast(other)) + return true; + return false; + } + + void enqueueRequest(ResourceRequest * request) override + { + std::unique_lock lock(mutex); + request->enqueue_ns = clock_gettime_ns(); + bool was_empty = requests.empty(); + requests.push_back(request); + if (was_empty) + scheduleActivation(); + } + + std::pair dequeueRequest() override + { + std::unique_lock lock(mutex); + if (requests.empty()) + return {nullptr, false}; + ResourceRequest * result = requests.front(); + requests.pop_front(); + return {result, !requests.empty()}; + } + + bool isActive() override + { + std::unique_lock lock(mutex); + return !requests.empty(); + } + + void activateChild(ISchedulerNode *) override + { + assert(false); // queue cannot have children + } + + void attachChild(const SchedulerNodePtr &) override + { + throw Exception( + ErrorCodes::INVALID_SCHEDULER_NODE, + "Cannot add child to leaf scheduler queue: {}", + getPath()); + } + + void removeChild(ISchedulerNode *) override + { + } + + ISchedulerNode * getChild(const String &) override + { + return nullptr; + } + +private: + std::mutex mutex; + std::deque requests; +}; + +} diff --git a/src/IO/Resource/PriorityPolicy.cpp b/src/IO/Resource/PriorityPolicy.cpp new file mode 100644 index 00000000000..bee9a6d5dde --- /dev/null +++ b/src/IO/Resource/PriorityPolicy.cpp @@ -0,0 +1,13 @@ +#include + +#include + +namespace DB +{ + +void registerPriorityPolicy(SchedulerNodeFactory & factory) +{ + factory.registerMethod("priority"); +} + +} diff --git a/src/IO/Resource/PriorityPolicy.h b/src/IO/Resource/PriorityPolicy.h new file mode 100644 index 00000000000..961f5af4d27 --- /dev/null +++ b/src/IO/Resource/PriorityPolicy.h @@ -0,0 +1,143 @@ +#pragma once + +#include +#include + +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int INVALID_SCHEDULER_NODE; +} + +/* + * Scheduler node that implements priority scheduling policy. + * Requests are scheduled in order of priorities. + */ +class PriorityPolicy : public ISchedulerNode +{ + /// Scheduling state of a child + struct Item + { + ISchedulerNode * child = nullptr; + Int64 priority = 0; // higher value means higher priority + + /// For max-heap by priority + bool operator<(const Item& rhs) const noexcept + { + return priority < rhs.priority; + } + }; + +public: + PriorityPolicy(EventQueue * event_queue_, const Poco::Util::AbstractConfiguration & config = emptyConfig(), const String & config_prefix = {}) + : ISchedulerNode(event_queue_, config, config_prefix) + {} + + bool equals(ISchedulerNode * other) override + { + if (auto * o = dynamic_cast(other)) + return true; + return false; + } + + void attachChild(const SchedulerNodePtr & child) override + { + // Take ownership + chassert(child->parent == nullptr); + if (auto [it, inserted] = children.emplace(child->basename, child); !inserted) + throw Exception( + ErrorCodes::INVALID_SCHEDULER_NODE, + "Can't add another child with the same path: {}", + it->second->getPath()); + + // Attach + child->setParent(this); + + // Activate child if it is not empty + if (child->isActive()) + activateChild(child.get()); + } + + void removeChild(ISchedulerNode * child) override + { + if (auto iter = children.find(child->basename); iter != children.end()) + { + SchedulerNodePtr removed = iter->second; + + // Deactivate: detach is not very common operation, so we can afford O(N) here + for (auto i = items.begin(), e = items.end(); i != e; ++i) + { + if (i->child == removed.get()) + { + items.erase(i); + // Element was removed from inside of heap -- heap must be rebuilt + std::make_heap(items.begin(), items.end()); + break; + } + } + + // Detach + removed->setParent(nullptr); + + // Get rid of ownership + children.erase(iter); + } + } + + ISchedulerNode * getChild(const String & child_name) override + { + if (auto iter = children.find(child_name); iter != children.end()) + return iter->second.get(); + else + return nullptr; + } + + std::pair dequeueRequest() override + { + if (items.empty()) + return {nullptr, false}; + + // Recursively pull request from child + auto [request, child_active] = items.front().child->dequeueRequest(); + assert(request != nullptr); + + // Deactivate child if it is empty + if (!child_active) + { + std::pop_heap(items.begin(), items.end()); + items.pop_back(); + } + + return {request, !items.empty()}; + } + + bool isActive() override + { + return !items.empty(); + } + + void activateChild(ISchedulerNode * child) override + { + bool activate_parent = items.empty(); + items.emplace_back(Item{child, child->info.priority}); + std::push_heap(items.begin(), items.end()); + if (activate_parent && parent) + parent->activateChild(this); + } + +private: + /// Heap of active children + std::vector items; + + /// All children with ownership + std::unordered_map children; // basename -> child +}; + +} diff --git a/src/IO/Resource/SemaphoreConstraint.cpp b/src/IO/Resource/SemaphoreConstraint.cpp new file mode 100644 index 00000000000..2135fd65a84 --- /dev/null +++ b/src/IO/Resource/SemaphoreConstraint.cpp @@ -0,0 +1,13 @@ +#include + +#include + +namespace DB +{ + +void registerSemaphoreConstraint(SchedulerNodeFactory & factory) +{ + factory.registerMethod("inflight_limit"); +} + +} diff --git a/src/IO/Resource/SemaphoreConstraint.h b/src/IO/Resource/SemaphoreConstraint.h new file mode 100644 index 00000000000..237e63eaddb --- /dev/null +++ b/src/IO/Resource/SemaphoreConstraint.h @@ -0,0 +1,138 @@ +#pragma once + +#include +#include + +#include +#include +#include + +namespace DB +{ + +/* + * Limited concurrency constraint. + * Blocks if either number of concurrent in-flight requests exceeds `max_requests`, or their total cost exceeds `max_cost` + */ +class SemaphoreConstraint : public ISchedulerConstraint +{ + static constexpr Int64 default_max_requests = std::numeric_limits::max(); + static constexpr Int64 default_max_cost = std::numeric_limits::max(); +public: + SemaphoreConstraint(EventQueue * event_queue_, const Poco::Util::AbstractConfiguration & config = emptyConfig(), const String & config_prefix = {}) + : ISchedulerConstraint(event_queue_, config, config_prefix) + , max_requests(config.getInt64(config_prefix + ".max_requests", default_max_requests)) + , max_cost(config.getInt64(config_prefix + ".max_cost", config.getInt64(config_prefix + ".max_bytes", default_max_cost))) + {} + + bool equals(ISchedulerNode * other) override + { + if (auto * o = dynamic_cast(other)) + return max_requests == o->max_requests && max_cost == o->max_cost; + return false; + } + + void attachChild(const std::shared_ptr & child_) override + { + // Take ownership + child = child_; + child->setParent(this); + + // Activate if required + if (child->isActive()) + activateChild(child.get()); + } + + void removeChild(ISchedulerNode * child_) override + { + if (child.get() == child_) + { + child_active = false; // deactivate + child->setParent(nullptr); // detach + child.reset(); + } + } + + ISchedulerNode * getChild(const String & child_name) override + { + if (child->basename == child_name) + return child.get(); + else + return nullptr; + } + + std::pair dequeueRequest() override + { + // Dequeue request from the child + auto [request, child_now_active] = child->dequeueRequest(); + if (!request) + return {nullptr, false}; + + // Request has reference to the first (closest to leaf) `constraint`, which can have `parent_constraint`. + // The former is initialized here dynamically and the latter is initialized once during hierarchy construction. + if (!request->constraint) + request->constraint = this; + + // Update state on request arrival + std::unique_lock lock(mutex); + requests++; + cost += request->cost; + child_active = child_now_active; + + return {request, active()}; + } + + void finishRequest(ResourceRequest * request) override + { + // Recursive traverse of parent flow controls in reverse order + if (parent_constraint) + parent_constraint->finishRequest(request); + + // Update state on request departure + std::unique_lock lock(mutex); + bool was_active = active(); + requests--; + cost -= request->cost; + + // Schedule activation on transition from inactive state + if (!was_active && active()) + scheduleActivation(); + } + + void activateChild(ISchedulerNode * child_) override + { + std::unique_lock lock(mutex); + if (child_ == child.get()) + if (!std::exchange(child_active, true) && satisfied() && parent) + parent->activateChild(this); + } + + bool isActive() override + { + std::unique_lock lock(mutex); + return active(); + } + +private: + bool satisfied() const + { + return requests < max_requests && cost < max_cost; + } + + bool active() const + { + return satisfied() && child_active; + } + +private: + std::mutex mutex; + Int64 requests = 0; + Int64 cost = 0; + bool child_active = false; + + SchedulerNodePtr child; + Int64 max_requests = default_max_requests; + Int64 max_cost = default_max_cost; +}; + +} diff --git a/src/IO/Resource/StaticResourceManager.cpp b/src/IO/Resource/StaticResourceManager.cpp new file mode 100644 index 00000000000..a79e8148f94 --- /dev/null +++ b/src/IO/Resource/StaticResourceManager.cpp @@ -0,0 +1,138 @@ +#include + +#include +#include +#include + +#include +#include + +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int RESOURCE_ACCESS_DENIED; + extern const int RESOURCE_NOT_FOUND; + extern const int INVALID_SCHEDULER_NODE; +} + +StaticResourceManager::Resource::Resource( + const String & name, + EventQueue * event_queue, + const Poco::Util::AbstractConfiguration & config, + const std::string & config_prefix) +{ + // Initialize scheduler nodes + Poco::Util::AbstractConfiguration::Keys keys; + std::sort(keys.begin(), keys.end()); // for parents to appear before children + config.keys(config_prefix, keys); + for (const auto & key : keys) + { + if (!startsWith(key, "node")) + continue; + + // Validate path + String path = config.getString(config_prefix + "." + key + "[@path]", ""); + if (path.empty()) + throw Exception(ErrorCodes::INVALID_SCHEDULER_NODE, "Attribute 'path' must be specified in all nodes for resource '{}'", name); + if (path[0] != '/') + throw Exception(ErrorCodes::INVALID_SCHEDULER_NODE, "path must start with '/' for resource '{}'", name); + + // Create node + String type = config.getString(config_prefix + "." + key + ".type", "fifo"); + SchedulerNodePtr node = SchedulerNodeFactory::instance().get(type, event_queue, config, config_prefix + "." + key); + node->basename = path.substr(1); + + // Take ownership + if (auto [_, inserted] = nodes.emplace(path, node); !inserted) + throw Exception(ErrorCodes::INVALID_SCHEDULER_NODE, "Duplicate path '{}' for resource '{}'", path, name); + + // Attach created node to parent (if not root) + if (path != "/") + { + String parent_path = path.substr(0, path.rfind('/')); + if (parent_path.empty()) + parent_path = "/"; + if (auto parent = nodes.find(parent_path); parent != nodes.end()) + parent->second->attachChild(node); + else + throw Exception(ErrorCodes::INVALID_SCHEDULER_NODE, "Parent doesn't exist for path '{}' for resource '{}'", path, name); + } + } + + if (nodes.find("/") == nodes.end()) + throw Exception(ErrorCodes::INVALID_SCHEDULER_NODE, "undefined root node path '/' for resource '{}'", name); +} + +StaticResourceManager::Classifier::Classifier(const StaticResourceManager & manager, const ClassifierDescription & cfg) +{ + for (auto [resource_name, path] : cfg) + { + if (auto resource_iter = manager.resources.find(resource_name); resource_iter != manager.resources.end()) + { + const Resource & resource = resource_iter->second; + if (auto node_iter = resource.nodes.find(path); node_iter != resource.nodes.end()) + { + if (auto * queue = dynamic_cast(node_iter->second.get())) + resources.emplace(resource_name, ResourceLink{.queue = queue}); + else + throw Exception(ErrorCodes::RESOURCE_NOT_FOUND, "Unable to access non-queue node at path '{}' for resource '{}'", path, resource_name); + } + else + throw Exception(ErrorCodes::RESOURCE_NOT_FOUND, "Path '{}' for resource '{}' does not exist", path, resource_name); + } + else + resources.emplace(resource_name, ResourceLink{}); // resource not configured - unlimited + } +} + +ResourceLink StaticResourceManager::Classifier::get(const String & resource_name) +{ + if (auto iter = resources.find(resource_name); iter != resources.end()) + return iter->second; + else + throw Exception(ErrorCodes::RESOURCE_ACCESS_DENIED, "Access denied to resource '{}'", resource_name); +} + +void StaticResourceManager::updateConfiguration(const Poco::Util::AbstractConfiguration & config) +{ + if (!resources.empty()) + return; // already initialized, configuration update is not supported + + Poco::Util::AbstractConfiguration::Keys keys; + const String config_prefix = "resources"; + config.keys(config_prefix, keys); + + // Create resource for every element under tag + for (const auto & key : keys) + { + auto [iter, _] = resources.emplace(std::piecewise_construct, + std::forward_as_tuple(key), + std::forward_as_tuple(key, scheduler.event_queue, config, config_prefix + "." + key)); + // Attach root of resource to scheduler + scheduler.attachChild(iter->second.nodes.find("/")->second); + } + + // Initialize classifiers + classifiers = std::make_unique(config); + + // Run scheduler thread + scheduler.start(); +} + +ClassifierPtr StaticResourceManager::acquire(const String & classifier_name) +{ + return std::make_shared(*this, classifiers->get(classifier_name)); +} + +void registerStaticResourceManager(ResourceManagerFactory & factory) +{ + factory.registerMethod("static"); +} + +} diff --git a/src/IO/Resource/StaticResourceManager.h b/src/IO/Resource/StaticResourceManager.h new file mode 100644 index 00000000000..066dbf4ebf8 --- /dev/null +++ b/src/IO/Resource/StaticResourceManager.h @@ -0,0 +1,49 @@ +#pragma once + +#include +#include +#include + +#include + +namespace DB +{ + +/* + * Reads `` from config at startup and registers them in single `SchedulerRoot`. + * Do not support configuration updates, server restart is required. + */ +class StaticResourceManager : public IResourceManager +{ +public: + // Just initialization, any further updates are ignored for the sake of simplicity + // NOTE: manager must be initialized before any acquire() calls to avoid races + void updateConfiguration(const Poco::Util::AbstractConfiguration & config) override; + + ClassifierPtr acquire(const String & classifier_name) override; + +private: + struct Resource + { + std::unordered_map nodes; // by paths + + Resource( + const String & name, + EventQueue * event_queue, + const Poco::Util::AbstractConfiguration & config, + const std::string & config_prefix); + }; + + struct Classifier : public IClassifier + { + Classifier(const StaticResourceManager & manager, const ClassifierDescription & cfg); + ResourceLink get(const String & resource_name) override; + std::unordered_map resources; // accessible resources by names + }; + + SchedulerRoot scheduler; + std::unordered_map resources; // by name + std::unique_ptr classifiers; +}; + +} diff --git a/src/IO/Resource/registerResourceManagers.cpp b/src/IO/Resource/registerResourceManagers.cpp new file mode 100644 index 00000000000..0a394e3f0cd --- /dev/null +++ b/src/IO/Resource/registerResourceManagers.cpp @@ -0,0 +1,15 @@ +#include +#include + +namespace DB +{ + +void registerStaticResourceManager(ResourceManagerFactory &); + +void registerResourceManagers() +{ + auto & factory = ResourceManagerFactory::instance(); + registerStaticResourceManager(factory); +} + +} diff --git a/src/IO/Resource/registerResourceManagers.h b/src/IO/Resource/registerResourceManagers.h new file mode 100644 index 00000000000..243b25a9587 --- /dev/null +++ b/src/IO/Resource/registerResourceManagers.h @@ -0,0 +1,8 @@ +#pragma once + +namespace DB +{ + +void registerResourceManagers(); + +} diff --git a/src/IO/Resource/registerSchedulerNodes.cpp b/src/IO/Resource/registerSchedulerNodes.cpp new file mode 100644 index 00000000000..1b58b3981c2 --- /dev/null +++ b/src/IO/Resource/registerSchedulerNodes.cpp @@ -0,0 +1,28 @@ +#include + +#include +#include +#include + +namespace DB +{ + +void registerPriorityPolicy(SchedulerNodeFactory &); +void registerSemaphoreConstraint(SchedulerNodeFactory &); +void registerFifoQueue(SchedulerNodeFactory &); + +void registerSchedulerNodes() +{ + auto & factory = SchedulerNodeFactory::instance(); + + // ISchedulerNode + registerPriorityPolicy(factory); + + // ISchedulerConstraint + registerSemaphoreConstraint(factory); + + // ISchedulerQueue + registerFifoQueue(factory); +} + +} diff --git a/src/IO/Resource/registerSchedulerNodes.h b/src/IO/Resource/registerSchedulerNodes.h new file mode 100644 index 00000000000..1e2092aaf4d --- /dev/null +++ b/src/IO/Resource/registerSchedulerNodes.h @@ -0,0 +1,8 @@ +#pragma once + +namespace DB +{ + +void registerSchedulerNodes(); + +} diff --git a/src/IO/Resource/tests/ResourceTest.h b/src/IO/Resource/tests/ResourceTest.h new file mode 100644 index 00000000000..36009b3afbe --- /dev/null +++ b/src/IO/Resource/tests/ResourceTest.h @@ -0,0 +1,309 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +struct ResourceTestBase +{ + ResourceTestBase() + { + [[maybe_unused]] static bool typesRegistered = [] { registerSchedulerNodes(); registerResourceManagers(); return true; }(); + } + + template + static TClass * add(EventQueue * event_queue, SchedulerNodePtr & root_node, const String & path, const String & xml = {}) + { + std::stringstream stream; // STYLE_CHECK_ALLOW_STD_STRING_STREAM + stream << "" << xml << ""; + Poco::AutoPtr config{new Poco::Util::XMLConfiguration(stream)}; + String config_prefix = "node"; + + if (path == "/") + { + EXPECT_TRUE(root_node.get() == nullptr); + root_node.reset(new TClass(event_queue, *config, config_prefix)); + return static_cast(root_node.get()); + } + + EXPECT_TRUE(root_node.get() != nullptr); // root should be initialized first + ISchedulerNode * parent = root_node.get(); + size_t pos = 1; + String child_name; + while (pos < path.length()) + { + size_t slash = path.find('/', pos); + if (slash != String::npos) + { + parent = parent->getChild(path.substr(pos, slash - pos)); + EXPECT_TRUE(parent != nullptr); // parent does not exist + pos = slash + 1; + } + else + { + child_name = path.substr(pos); + pos = String::npos; + } + } + + EXPECT_TRUE(!child_name.empty()); // wrong path + SchedulerNodePtr node = std::make_shared(event_queue, *config, config_prefix); + node->basename = child_name; + parent->attachChild(node); + return static_cast(node.get()); + } +}; + + +struct ConstraintTest : public SemaphoreConstraint +{ + ConstraintTest(EventQueue * event_queue_, const Poco::Util::AbstractConfiguration & config = emptyConfig(), const String & config_prefix = {}) + : SemaphoreConstraint(event_queue_, config, config_prefix) + {} + + std::pair dequeueRequest() override + { + auto [request, active] = SemaphoreConstraint::dequeueRequest(); + if (request) + { + std::unique_lock lock(mutex); + requests.insert(request); + } + return {request, active}; + } + + void finishRequest(ResourceRequest * request) override + { + { + std::unique_lock lock(mutex); + requests.erase(request); + } + SemaphoreConstraint::finishRequest(request); + } + + std::mutex mutex; + std::set requests; +}; + +class ResourceTestClass : public ResourceTestBase +{ + struct Request : public ResourceRequest + { + String name; + + Request(ResourceCost cost_, const String & name_) + : ResourceRequest(cost_) + , name(name_) + {} + + void execute() override + { + } + }; + +public: + template + void add(const String & path, const String & xml = {}) + { + ResourceTestBase::add(&event_queue, root_node, path, xml); + } + + void enqueue(const String & path, const std::vector & costs) + { + ASSERT_TRUE(root_node.get() != nullptr); // root should be initialized first + ISchedulerNode * node = root_node.get(); + size_t pos = 1; + while (pos < path.length()) + { + size_t slash = path.find('/', pos); + if (slash != String::npos) + { + node = node->getChild(path.substr(pos, slash - pos)); + ASSERT_TRUE(node != nullptr); // does not exist + pos = slash + 1; + } + else + { + node = node->getChild(path.substr(pos)); + pos = String::npos; + } + } + ISchedulerQueue * queue = dynamic_cast(node); + ASSERT_TRUE(queue != nullptr); // not a queue + + for (ResourceCost cost : costs) + { + queue->enqueueRequest(new Request(cost, queue->basename)); + } + processEvents(); // to activate queues + } + + void dequeue(size_t count_limit = size_t(-1), ResourceCost cost_limit = ResourceCostMax) + { + while (count_limit > 0 && cost_limit > 0) + { + if (auto [request, _] = root_node->dequeueRequest(); request) + { + count_limit--; + cost_limit -= request->cost; + handle(static_cast(request)); + } + else + { + break; + } + } + } + + void handle(Request * request) + { + consumed_cost[request->name] += request->cost; + delete request; + } + + void consumed(const String & name, ResourceCost value, ResourceCost error = 0) + { + EXPECT_TRUE(consumed_cost[name] >= value - error); + EXPECT_TRUE(consumed_cost[name] <= value + error); + consumed_cost[name] -= value; + } + + void processEvents() + { + while (event_queue.tryProcess()) {} + } + +private: + EventQueue event_queue; + SchedulerNodePtr root_node; + std::unordered_map consumed_cost; +}; + +template +struct ResourceTestManager : public ResourceTestBase +{ + ResourceManagerPtr manager; + + std::vector threads; + std::barrier<> busy_period; + + struct Guard : public ResourceGuard + { + ResourceTestManager & t; + + Guard(ResourceTestManager & t_, ResourceLink link_, ResourceCost cost) + : ResourceGuard(link_, cost, PostponeLocking) + , t(t_) + { + t.onEnqueue(link); + lock(); + t.onExecute(link); + } + }; + + struct TItem + { + std::atomic enqueued = 0; // number of enqueued requests + std::atomic left = 0; // number of requests left to be executed + }; + + struct ResourceQueueHash + { + size_t operator()(const ResourceLink & link) const + { + return std::hash()(link.queue); + } + }; + + std::mutex link_data_mutex; + std::unordered_map link_data; + + explicit ResourceTestManager(size_t thread_count = 1) + : manager(new TManager) + , busy_period(thread_count) + {} + + ~ResourceTestManager() + { + for (auto & thread : threads) + thread.join(); + } + + void update(const String & xml) + { + std::istringstream stream(xml); // STYLE_CHECK_ALLOW_STD_STRING_STREAM + Poco::AutoPtr config{new Poco::Util::XMLConfiguration(stream)}; + manager->updateConfiguration(*config); + } + + auto & getLinkData(ResourceLink link) + { + std::unique_lock lock{link_data_mutex}; + return link_data[link]; + } + + // Use at least two threads for each queue to avoid queue being deactivated: + // while the first request is executing, the second request is in queue - holding it active. + // use onEnqueue() and onExecute() functions for this purpose. + void onEnqueue(ResourceLink link) + { + getLinkData(link).enqueued.fetch_add(1, std::memory_order_relaxed); + } + void onExecute(ResourceLink link) + { + auto & data = getLinkData(link); + Int64 left = data.left.fetch_sub(1, std::memory_order_relaxed) - 1; + Int64 enqueued = data.enqueued.fetch_sub(1, std::memory_order_relaxed) - 1; + while (left > 0 && enqueued <= 0) // Ensure at least one thread has already enqueued itself (or there is no more requests) + { + std::this_thread::yield(); + left = data.left.load(); + enqueued = data.enqueued.load(); + } + } + + // This is required for proper busy period start, i.e. everyone to be seen by scheduler as appeared at the same time: + // - resource is blocked with queries by leader thread; + // - leader thread notifies followers to enqueue their requests; + // - leader thread unblocks resource; + // - busy period begins. + // NOTE: actually leader's request(s) make their own small busy period. + void blockResource(ResourceLink link) + { + ResourceGuard g(link, 1, ResourceGuard::PostponeLocking); + g.lock(); + // NOTE: at this point we assume resource to be blocked by single request (1) + busy_period.arrive_and_wait(); // (1) notify all followers that resource is blocked + busy_period.arrive_and_wait(); // (2) wait all followers to enqueue their requests + } + void startBusyPeriod(ResourceLink link, ResourceCost cost, Int64 total_requests) + { + getLinkData(link).left += total_requests + 1; + busy_period.arrive_and_wait(); // (1) wait leader to block resource + ResourceGuard g(link, cost, ResourceGuard::PostponeLocking); + onEnqueue(link); + busy_period.arrive_and_wait(); // (2) notify leader to unblock + g.lock(); + onExecute(link); + } +}; + +} diff --git a/src/IO/Resource/tests/gtest_resource_class_priority.cpp b/src/IO/Resource/tests/gtest_resource_class_priority.cpp new file mode 100644 index 00000000000..9e1a55a0741 --- /dev/null +++ b/src/IO/Resource/tests/gtest_resource_class_priority.cpp @@ -0,0 +1,122 @@ +#include + +#include + +#include + +using namespace DB; + +using ResourceTest = ResourceTestClass; + +TEST(IOResourcePriorityPolicy, Factory) +{ + ResourceTest t; + + Poco::AutoPtr cfg = new Poco::Util::XMLConfiguration(); + SchedulerNodePtr prio = SchedulerNodeFactory::instance().get("priority", /* event_queue = */ nullptr, *cfg, ""); + EXPECT_TRUE(dynamic_cast(prio.get()) != nullptr); +} + +TEST(IOResourcePriorityPolicy, Priorities) +{ + ResourceTest t; + + t.add("/"); + t.add("/A", "1"); + t.add("/B", "2"); + t.add("/C", "3"); + + t.enqueue("/A", {10, 10, 10}); + t.enqueue("/B", {10, 10, 10}); + t.enqueue("/C", {10, 10, 10}); + + t.dequeue(2); + t.consumed("A", 0); + t.consumed("B", 0); + t.consumed("C", 20); + + t.dequeue(2); + t.consumed("A", 0); + t.consumed("B", 10); + t.consumed("C", 10); + + t.dequeue(2); + t.consumed("A", 0); + t.consumed("B", 20); + t.consumed("C", 0); + + t.dequeue(); + t.consumed("A", 30); + t.consumed("B", 0); + t.consumed("C", 0); +} + +TEST(IOResourcePriorityPolicy, Activation) +{ + ResourceTest t; + + t.add("/"); + t.add("/A", "1"); + t.add("/B", "2"); + t.add("/C", "3"); + + t.enqueue("/A", {10, 10, 10, 10, 10, 10}); + t.enqueue("/B", {10}); + t.enqueue("/C", {10, 10}); + + t.dequeue(3); + t.consumed("A", 0); + t.consumed("B", 10); + t.consumed("C", 20); + + t.dequeue(2); + t.consumed("A", 20); + t.consumed("B", 0); + t.consumed("C", 0); + + t.enqueue("/B", {10, 10, 10}); + t.dequeue(2); + t.consumed("A", 0); + t.consumed("B", 20); + t.consumed("C", 0); + + t.enqueue("/C", {10, 10}); + t.dequeue(3); + t.consumed("A", 0); + t.consumed("B", 10); + t.consumed("C", 20); + + t.dequeue(2); + t.consumed("A", 20); + t.consumed("B", 0); + t.consumed("C", 0); +} + +TEST(IOResourcePriorityPolicy, SinglePriority) +{ + ResourceTest t; + + t.add("/"); + t.add("/A"); + + for (int i = 0; i < 3; i++) + { + t.enqueue("/A", {10, 10}); + t.dequeue(1); + t.consumed("A", 10); + + for (int j = 0; j < 3; j++) + { + t.enqueue("/A", {10, 10, 10}); + t.dequeue(1); + t.consumed("A", 10); + t.dequeue(1); + t.consumed("A", 10); + t.dequeue(1); + t.consumed("A", 10); + } + + t.dequeue(1); + t.consumed("A", 10); + } +} diff --git a/src/IO/Resource/tests/gtest_resource_manager_static.cpp b/src/IO/Resource/tests/gtest_resource_manager_static.cpp new file mode 100644 index 00000000000..1a0af9198bc --- /dev/null +++ b/src/IO/Resource/tests/gtest_resource_manager_static.cpp @@ -0,0 +1,103 @@ +#include + +#include + +#include +#include + +using namespace DB; + +using ResourceTest = ResourceTestManager; +using TestGuard = ResourceTest::Guard; + +TEST(IOResourceStaticResourceManager, Smoke) +{ + ResourceTest t; + + t.update(R"CONFIG( + + + + inflight_limit10 + priority + + 1 + + + + /prio/A + /prio/B + + + )CONFIG"); + + ClassifierPtr ca = t.manager->acquire("A"); + ClassifierPtr cb = t.manager->acquire("B"); + + for (int i = 0; i < 10; i++) + { + ResourceGuard ga(ca->get("res1")); + ResourceGuard gb(cb->get("res1")); + } +} + +TEST(IOResourceStaticResourceManager, Prioritization) +{ + constexpr size_t threads_per_queue = 2; + int requests_per_thead = 100; + ResourceTest t(4 * threads_per_queue + 1); + + t.update(R"CONFIG( + + + + inflight_limit1 + priority + -1 + 1 + + + + + + + /prio/A + /prio/B + /prio/C + /prio/D + /prio/leader + + + )CONFIG"); + + std::optional last_priority; + auto check = [&] (Int64 priority) + { + // Lock is not required here because this is called during request execution and we have max_requests = 1 + if (last_priority) + EXPECT_TRUE(priority <= *last_priority); // Should be true if every queue arrived at the same time at busy period start + last_priority = priority; + }; + + for (String name : {"A", "B", "C", "D"}) + { + for (int thr = 0; thr < threads_per_queue; thr++) + { + t.threads.emplace_back([&, name] + { + ClassifierPtr c = t.manager->acquire(name); + ResourceLink link = c->get("res1"); + t.startBusyPeriod(link, 1, requests_per_thead); + for (int req = 0; req < requests_per_thead; req++) + { + TestGuard g(t, link, 1); + check(link.queue->info.priority); + } + }); + } + } + + ClassifierPtr c = t.manager->acquire("leader"); + ResourceLink link = c->get("res1"); + t.blockResource(link); +} diff --git a/src/IO/Resource/tests/gtest_resource_scheduler.cpp b/src/IO/Resource/tests/gtest_resource_scheduler.cpp new file mode 100644 index 00000000000..da2ffb2dd25 --- /dev/null +++ b/src/IO/Resource/tests/gtest_resource_scheduler.cpp @@ -0,0 +1,113 @@ +#include + +#include + +#include + +#include + +using namespace DB; + +struct ResourceTest : public ResourceTestBase +{ + SchedulerRoot scheduler; + + ResourceTest() + { + scheduler.start(); + } + + ~ResourceTest() + { + scheduler.stop(true); + } +}; + +struct ResourceHolder +{ + ResourceTest & t; + SchedulerNodePtr root_node; + + explicit ResourceHolder(ResourceTest & t_) + : t(t_) + {} + + ~ResourceHolder() + { + unregisterResource(); + } + + template + TClass * add(const String & path, const String & xml = {}) + { + return ResourceTest::add(t.scheduler.event_queue, root_node, path, xml); + } + + ResourceLink addQueue(const String & path, const String & xml = {}) + { + return {.queue = static_cast(ResourceTest::add(t.scheduler.event_queue, root_node, path, xml))}; + } + + void registerResource() + { + std::promise p; + auto f = p.get_future(); + t.scheduler.event_queue->enqueue([this, &p] + { + t.scheduler.attachChild(root_node); + p.set_value(); + }); + f.get(); + } + + void unregisterResource() + { + std::promise p; + auto f = p.get_future(); + t.scheduler.event_queue->enqueue([this, &p] + { + t.scheduler.removeChild(root_node.get()); + p.set_value(); + }); + f.get(); + } +}; + +TEST(IOSchedulerRoot, Smoke) +{ + ResourceTest t; + + ResourceHolder r1(t); + auto * fc1 = r1.add("/", "1"); + r1.add("/prio"); + auto a = r1.addQueue("/prio/A", "1"); + auto b = r1.addQueue("/prio/B", "2"); + r1.registerResource(); + + ResourceHolder r2(t); + auto * fc2 = r2.add("/", "1"); + r2.add("/prio"); + auto c = r2.addQueue("/prio/C", "-1"); + auto d = r2.addQueue("/prio/D", "-2"); + r2.registerResource(); + + { + ResourceGuard rg(a); + EXPECT_TRUE(fc1->requests.contains(&rg.request)); + } + + { + ResourceGuard rg(b); + EXPECT_TRUE(fc1->requests.contains(&rg.request)); + } + + { + ResourceGuard rg(c); + EXPECT_TRUE(fc2->requests.contains(&rg.request)); + } + + { + ResourceGuard rg(d); + EXPECT_TRUE(fc2->requests.contains(&rg.request)); + } +} diff --git a/src/IO/ResourceGuard.h b/src/IO/ResourceGuard.h new file mode 100644 index 00000000000..c8b826d0932 --- /dev/null +++ b/src/IO/ResourceGuard.h @@ -0,0 +1,93 @@ +#pragma once + +#include + +#include +#include +#include + +#include + +namespace DB +{ + +/* + * Scoped resource guard. + * Waits for resource to be available in constructor and releases resource in destructor + */ +class ResourceGuard +{ +public: + enum ResourceGuardCtor + { + LockStraightAway, /// Lock inside constructor (default) + PostponeLocking /// Don't lock in constructor, but during later `lock()` call + }; + + struct Request : public ResourceRequest + { + /// Promise to be set on request execution + std::promise dequeued; + + explicit Request(ResourceCost cost_ = 1) + : ResourceRequest(cost_) + {} + + void execute() override + { + // This function is executed inside scheduler thread and wakes thread issued this `request` (using ResourceGuard) + // That thread will continue execution and do real consumption of requested resource synchronously. + dequeued.set_value(); + } + }; + + /// Creates pending request for resource; blocks while resource is not available (unless `PostponeLocking`) + explicit ResourceGuard(ResourceLink link_, ResourceCost cost = 1, ResourceGuardCtor ctor = LockStraightAway) + : link(link_) + , request(cost) + { + if (link.queue) + { + dequeued_future = request.dequeued.get_future(); + link.queue->enqueueRequest(&request); + if (ctor == LockStraightAway) + lock(); + } + } + + ~ResourceGuard() + { + unlock(); + } + + /// Blocks until resource is available + void lock() + { + if (link.queue) + dequeued_future.get(); + } + + /// Report request execution has finished + void unlock() + { + if (link.queue) + { + assert(!dequeued_future.valid()); // unlock must be called only after lock() + if (request.constraint) + request.constraint->finishRequest(&request); + } + } + + /// Mark request as unsuccessful; by default request is considered to be successful + void setFailure() + { + request.successful = false; + } + +public: + ResourceLink link; + Request request; + std::future dequeued_future; +}; + +} diff --git a/src/IO/ResourceManagerFactory.h b/src/IO/ResourceManagerFactory.h new file mode 100644 index 00000000000..8e972f05640 --- /dev/null +++ b/src/IO/ResourceManagerFactory.h @@ -0,0 +1,55 @@ +#pragma once + +#include +#include + +#include + +#include + +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int INVALID_SCHEDULER_NODE; +} + +class ResourceManagerFactory : private boost::noncopyable +{ +public: + static ResourceManagerFactory & instance() + { + static ResourceManagerFactory ret; + return ret; + } + + ResourceManagerPtr get(const String & name) + { + std::lock_guard lock{mutex}; + if (auto iter = methods.find(name); iter != methods.end()) + return iter->second(); + throw Exception(ErrorCodes::INVALID_SCHEDULER_NODE, "Unknown scheduler node type: {}", name); + } + + template + void registerMethod(const String & name) + { + std::lock_guard lock{mutex}; + methods[name] = [] () + { + return std::make_shared(); + }; + } + +private: + std::mutex mutex; + using Method = std::function; + std::unordered_map methods; +}; + +} diff --git a/src/IO/ResourceRequest.h b/src/IO/ResourceRequest.h new file mode 100644 index 00000000000..93c33bcef22 --- /dev/null +++ b/src/IO/ResourceRequest.h @@ -0,0 +1,92 @@ +#pragma once + +#include +#include + +namespace DB +{ + +// Forward declarations +class ISchedulerQueue; +class ISchedulerNode; +class ISchedulerConstraint; + +/// Cost in terms of used resource (e.g. bytes for network IO) +using ResourceCost = Int64; +constexpr ResourceCost ResourceCostMax = std::numeric_limits::max(); + +/// Internal identifier of a resource (for arrays; unique per scheduler) +using ResourceIdx = size_t; +constexpr ResourceIdx ResourceIdxNotSet = ResourceIdx(-1); + +/// Timestamps (nanoseconds since epoch) +using ResourceNs = UInt64; + +/* + * Info required for resource consumption. + */ +struct ResourceLink +{ + ISchedulerQueue * queue = nullptr; + bool operator==(const ResourceLink &) const = default; +}; + +/* + * Request for a resource consumption. The main moving part of the scheduling subsystem. + * Resource requests processing workflow: + * + * ----1=2222222222222=3=4=555555555555555=6-----> time + * ^ ^ ^ ^ ^ ^ + * | | | | | | + * enqueue wait dequeue execute consume finish + * + * 1) Request is enqueued using ISchedulerQueue::enqueueRequest(). + * 2) Request competes with others for access to a resource; effectively just waiting in a queue. + * 3) Scheduler calls ISchedulerNode::dequeueRequest() that returns the request. + * 4) Callback ResourceRequest::execute() is called to provide access to the resource. + * 5) The resource consumption is happening outside of the scheduling subsystem. + * 6) request->constraint->finishRequest() is called when consumption is finished. + * + * Steps (5) and (6) can be omitted if constraint is not used by the resource. + * + * Request can be created on stack or heap. + * Request ownership is done outside of the scheduling subsystem. + * After (6) request can be destructed safely. + * + * Request cancelling is not supported yet. + */ +class ResourceRequest +{ +public: + /// Cost of request execution; should be filled before request enqueueing. + /// NOTE: If cost is not known in advance, credit model can be used: + /// NOTE: for the first request use 1 and + ResourceCost cost; + + /// Request outcome + /// Should be filled during resource consumption + bool successful = true; + + /// Scheduler node to be notified on consumption finish + /// Auto-filled during request enqueue/dequeue + ISchedulerConstraint * constraint = nullptr; + + /// Timestamps for introspection + ResourceNs enqueue_ns = 0; + ResourceNs execute_ns = 0; + ResourceNs finish_ns = 0; + + explicit ResourceRequest(ResourceCost cost_ = 1) + : cost(cost_) + {} + + virtual ~ResourceRequest() = default; + + /// Callback to trigger resource consumption. + /// IMPORTANT: is called from scheduler thread and must be fast, + /// just triggering start of a consumption, not doing the consumption itself + /// (e.g. setting an std::promise or creating a job in a thread pool) + virtual void execute() = 0; +}; + +} diff --git a/src/IO/SchedulerNodeFactory.h b/src/IO/SchedulerNodeFactory.h new file mode 100644 index 00000000000..5c31534a9b8 --- /dev/null +++ b/src/IO/SchedulerNodeFactory.h @@ -0,0 +1,57 @@ +#pragma once + +#include +#include + +#include + +#include + +#include + +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int INVALID_SCHEDULER_NODE; +} + +class SchedulerNodeFactory : private boost::noncopyable +{ +public: + static SchedulerNodeFactory & instance() + { + static SchedulerNodeFactory ret; + return ret; + } + + SchedulerNodePtr get(const String & name, EventQueue * event_queue, const Poco::Util::AbstractConfiguration & config, const String & config_prefix) + { + std::lock_guard lock{mutex}; + if (auto iter = methods.find(name); iter != methods.end()) + return iter->second(event_queue, config, config_prefix); + throw Exception(ErrorCodes::INVALID_SCHEDULER_NODE, "Unknown scheduler node type: {}", name); + } + + template + void registerMethod(const String & name) + { + std::lock_guard lock{mutex}; + methods[name] = [] (EventQueue * event_queue, const Poco::Util::AbstractConfiguration & config, const String & config_prefix) + { + return std::make_shared(event_queue, config, config_prefix); + }; + } + +private: + std::mutex mutex; + using Method = std::function; + std::unordered_map methods; +}; + +} diff --git a/src/IO/SchedulerRoot.h b/src/IO/SchedulerRoot.h new file mode 100644 index 00000000000..f9af2099b8c --- /dev/null +++ b/src/IO/SchedulerRoot.h @@ -0,0 +1,250 @@ +#pragma once + +#include + +#include +#include + +#include +#include + +#include + +#include +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int INVALID_SCHEDULER_NODE; +} + +/* + * Resource scheduler root node with a dedicated thread. + * Immediate children correspond to different resources. + */ +class SchedulerRoot : public ISchedulerNode +{ +private: + struct TResource + { + SchedulerNodePtr root; + + // Intrusive cyclic list of active resources + TResource * next = nullptr; + TResource * prev = nullptr; + + explicit TResource(const SchedulerNodePtr & root_) + : root(root_) + { + root->info.parent.ptr = this; + } + + // Get pointer stored by ctor in info + static TResource * get(SchedulerNodeInfo & info) + { + return reinterpret_cast(info.parent.ptr); + } + }; + +public: + SchedulerRoot() + : ISchedulerNode(&events) + {} + + ~SchedulerRoot() override + { + stop(); + } + + /// Runs separate scheduler thread + void start() + { + if (!scheduler.joinable()) + scheduler = ThreadFromGlobalPool([this] { schedulerThread(); }); + } + + /// Joins scheduler threads and execute every pending request iff graceful + void stop(bool graceful = true) + { + if (scheduler.joinable()) + { + stop_flag.store(true); + events.enqueue([]{}); // just to wake up thread + scheduler.join(); + if (graceful) + { + // Do the same cycle as schedulerThread() but never block, just exit instead + bool has_work = true; + while (has_work) + { + auto [request, _] = dequeueRequest(); + if (request) + execute(request); + else + has_work = false; + while (events.tryProcess()) + has_work = true; + } + } + } + } + + bool equals(ISchedulerNode * other) override + { + if (auto * o = dynamic_cast(other)) + return true; + return false; + } + + void attachChild(const SchedulerNodePtr & child) override + { + // Take ownership + assert(child->parent == nullptr); + if (auto [it, inserted] = children.emplace(child.get(), child); !inserted) + throw Exception( + ErrorCodes::INVALID_SCHEDULER_NODE, + "Can't add the same scheduler node twice"); + + // Attach + child->setParent(this); + + // Activate child if required + if (child->isActive()) + activateChild(child.get()); + } + + void removeChild(ISchedulerNode * child) override + { + if (auto iter = children.find(child); iter != children.end()) + { + SchedulerNodePtr removed = iter->second.root; + + // Deactivate if required + deactivate(&iter->second); + + // Detach + removed->setParent(nullptr); + + // Remove ownership + children.erase(iter); + } + } + + ISchedulerNode * getChild(const String &) override + { + abort(); // scheduler is allowed to have multiple children with the same name + } + + std::pair dequeueRequest() override + { + if (current == nullptr) // No active resources + return {nullptr, false}; + + // Dequeue request from current resource + auto [request, resource_active] = current->root->dequeueRequest(); + assert(request != nullptr); + + // Deactivate resource if required + if (!resource_active) + deactivate(current); + else + current = current->next; // Just move round-robin pointer + + return {request, current != nullptr}; + } + + bool isActive() override + { + return current != nullptr; + } + + void activateChild(ISchedulerNode * child) override + { + activate(TResource::get(child->info)); + } + + void setParent(ISchedulerNode *) override + { + abort(); // scheduler must be the root and this function should not be called + } + +private: + void activate(TResource * value) + { + assert(value->next == nullptr && value->prev == nullptr); + if (current == nullptr) // No active children + { + current = value; + value->prev = value; + value->next = value; + } + else + { + current->prev->next = value; + value->prev = current->prev; + current->prev = value; + value->next = current; + } + } + + void deactivate(TResource * value) + { + if (value->next == nullptr) + return; // Already deactivated + assert(current != nullptr); + if (current == value) + { + if (current->next == current) // We are going to remove the last active child + { + value->next = nullptr; + value->prev = nullptr; + current = nullptr; + return; + } + else // Just move current to next to avoid invalidation + current = current->next; + } + value->prev->next = value->next; + value->next->prev = value->prev; + value->prev = nullptr; + value->next = nullptr; + } + +private: + void schedulerThread() + { + while (!stop_flag.load()) + { + // Dequeue and execute single request + auto [request, _] = dequeueRequest(); + if (request) + execute(request); + else // No more requests -- block until any event happens + events.process(); + + // Process all events before dequeuing to ensure fair competition + while (events.tryProcess()) {} + } + } + + void execute(ResourceRequest * request) + { + request->execute_ns = clock_gettime_ns(); + request->execute(); + } + +private: + TResource * current = nullptr; // round-robin pointer + std::unordered_map children; // resources by pointer + std::atomic stop_flag = false; + EventQueue events; + ThreadFromGlobalPool scheduler; +}; + +} diff --git a/src/IO/WriteBufferFromS3.cpp b/src/IO/WriteBufferFromS3.cpp index d5ec2eeee39..37bc8c78cf4 100644 --- a/src/IO/WriteBufferFromS3.cpp +++ b/src/IO/WriteBufferFromS3.cpp @@ -72,7 +72,7 @@ WriteBufferFromS3::WriteBufferFromS3( std::shared_ptr client_ptr_, const String & bucket_, const String & key_, - const S3Settings::RequestSettings & request_settings_, + const S3Settings::RequestSettings & request_settings, std::optional> object_metadata_, size_t buffer_size_, ThreadPoolCallbackRunner schedule_, @@ -80,10 +80,12 @@ WriteBufferFromS3::WriteBufferFromS3( : BufferWithOwnMemory(buffer_size_, nullptr, 0) , bucket(bucket_) , key(key_) - , request_settings(request_settings_) + , settings(request_settings.getUploadSettings()) + , check_objects_after_upload(request_settings.check_objects_after_upload) + , max_unexpected_write_error_retries(request_settings.max_unexpected_write_error_retries) , client_ptr(std::move(client_ptr_)) , object_metadata(std::move(object_metadata_)) - , upload_part_size(request_settings_.min_upload_part_size) + , upload_part_size(settings.min_upload_part_size) , schedule(std::move(schedule_)) , write_settings(write_settings_) { @@ -108,9 +110,10 @@ void WriteBufferFromS3::nextImpl() write_settings.remote_throttler->add(offset()); /// Data size exceeds singlepart upload threshold, need to use multipart upload. - if (multipart_upload_id.empty() && last_part_size > request_settings.max_single_part_upload_size) + if (multipart_upload_id.empty() && last_part_size > settings.max_single_part_upload_size) createMultipartUpload(); + chassert(upload_part_size > 0); if (!multipart_upload_id.empty() && last_part_size > upload_part_size) { writePart(); @@ -175,7 +178,7 @@ void WriteBufferFromS3::finalizeImpl() if (!multipart_upload_id.empty()) completeMultipartUpload(); - if (request_settings.check_objects_after_upload) + if (check_objects_after_upload) { LOG_TRACE(log, "Checking object {} exists after upload", key); @@ -300,15 +303,15 @@ void WriteBufferFromS3::fillUploadRequest(Aws::S3::Model::UploadPartRequest & re { /// Increase part number. ++part_number; - if (!multipart_upload_id.empty() && (part_number > request_settings.max_part_number)) + if (!multipart_upload_id.empty() && (part_number > settings.max_part_number)) { throw Exception( ErrorCodes::INVALID_CONFIG_PARAMETER, "Part number exceeded {} while writing {} bytes to S3. Check min_upload_part_size = {}, max_upload_part_size = {}, " "upload_part_size_multiply_factor = {}, upload_part_size_multiply_parts_count_threshold = {}, max_single_part_upload_size = {}", - request_settings.max_part_number, count(), request_settings.min_upload_part_size, request_settings.max_upload_part_size, - request_settings.upload_part_size_multiply_factor, request_settings.upload_part_size_multiply_parts_count_threshold, - request_settings.max_single_part_upload_size); + settings.max_part_number, count(), settings.min_upload_part_size, settings.max_upload_part_size, + settings.upload_part_size_multiply_factor, settings.upload_part_size_multiply_parts_count_threshold, + settings.max_single_part_upload_size); } /// Setup request. @@ -323,10 +326,10 @@ void WriteBufferFromS3::fillUploadRequest(Aws::S3::Model::UploadPartRequest & re req.SetContentType("binary/octet-stream"); /// Maybe increase `upload_part_size` (we need to increase it sometimes to keep `part_number` less or equal than `max_part_number`). - if (!multipart_upload_id.empty() && (part_number % request_settings.upload_part_size_multiply_parts_count_threshold == 0)) + if (!multipart_upload_id.empty() && (part_number % settings.upload_part_size_multiply_parts_count_threshold == 0)) { - upload_part_size *= request_settings.upload_part_size_multiply_factor; - upload_part_size = std::min(upload_part_size, request_settings.max_upload_part_size); + upload_part_size *= settings.upload_part_size_multiply_factor; + upload_part_size = std::min(upload_part_size, settings.max_upload_part_size); } } @@ -371,7 +374,7 @@ void WriteBufferFromS3::completeMultipartUpload() req.SetMultipartUpload(multipart_upload); - size_t max_retry = std::max(request_settings.max_unexpected_write_error_retries, 1UL); + size_t max_retry = std::max(max_unexpected_write_error_retries, 1UL); for (size_t i = 0; i < max_retry; ++i) { ProfileEvents::increment(ProfileEvents::S3CompleteMultipartUpload); @@ -477,7 +480,7 @@ void WriteBufferFromS3::fillPutRequest(Aws::S3::Model::PutObjectRequest & req) void WriteBufferFromS3::processPutRequest(const PutObjectTask & task) { - size_t max_retry = std::max(request_settings.max_unexpected_write_error_retries, 1UL); + size_t max_retry = std::max(max_unexpected_write_error_retries, 1UL); for (size_t i = 0; i < max_retry; ++i) { ProfileEvents::increment(ProfileEvents::S3PutObject); diff --git a/src/IO/WriteBufferFromS3.h b/src/IO/WriteBufferFromS3.h index 1663b186437..41ed009bcf9 100644 --- a/src/IO/WriteBufferFromS3.h +++ b/src/IO/WriteBufferFromS3.h @@ -50,7 +50,7 @@ public: std::shared_ptr client_ptr_, const String & bucket_, const String & key_, - const S3Settings::RequestSettings & request_settings_, + const S3Settings::RequestSettings & request_settings, std::optional> object_metadata_ = std::nullopt, size_t buffer_size_ = DBMS_DEFAULT_BUFFER_SIZE, ThreadPoolCallbackRunner schedule_ = {}, @@ -88,7 +88,9 @@ private: const String bucket; const String key; - const S3Settings::RequestSettings request_settings; + const S3Settings::RequestSettings::PartUploadSettings settings; + const bool check_objects_after_upload = false; + const size_t max_unexpected_write_error_retries = 4; const std::shared_ptr client_ptr; const std::optional> object_metadata; diff --git a/src/IO/WriteBufferFromTemporaryFile.cpp b/src/IO/WriteBufferFromTemporaryFile.cpp index f93c79ca587..4562ad512b3 100644 --- a/src/IO/WriteBufferFromTemporaryFile.cpp +++ b/src/IO/WriteBufferFromTemporaryFile.cpp @@ -13,7 +13,7 @@ namespace ErrorCodes } -WriteBufferFromTemporaryFile::WriteBufferFromTemporaryFile(std::unique_ptr && tmp_file_) +WriteBufferFromTemporaryFile::WriteBufferFromTemporaryFile(std::unique_ptr && tmp_file_) : WriteBufferFromFile(tmp_file_->path(), DBMS_DEFAULT_BUFFER_SIZE, O_RDWR | O_TRUNC | O_CREAT, 0600), tmp_file(std::move(tmp_file_)) {} @@ -40,11 +40,11 @@ public: return std::make_shared(fd, file_name, std::move(origin->tmp_file)); } - ReadBufferFromTemporaryWriteBuffer(int fd_, const std::string & file_name_, std::unique_ptr && tmp_file_) + ReadBufferFromTemporaryWriteBuffer(int fd_, const std::string & file_name_, std::unique_ptr && tmp_file_) : ReadBufferFromFile(fd_, file_name_), tmp_file(std::move(tmp_file_)) {} - std::unique_ptr tmp_file; + std::unique_ptr tmp_file; }; diff --git a/src/IO/WriteBufferFromTemporaryFile.h b/src/IO/WriteBufferFromTemporaryFile.h index 06e2911db26..a4e83b95ac6 100644 --- a/src/IO/WriteBufferFromTemporaryFile.h +++ b/src/IO/WriteBufferFromTemporaryFile.h @@ -20,11 +20,11 @@ public: ~WriteBufferFromTemporaryFile() override; private: - explicit WriteBufferFromTemporaryFile(std::unique_ptr && tmp_file); + explicit WriteBufferFromTemporaryFile(std::unique_ptr && tmp_file); std::shared_ptr getReadBufferImpl() override; - std::unique_ptr tmp_file; + std::unique_ptr tmp_file; friend class ReadBufferFromTemporaryWriteBuffer; }; diff --git a/src/IO/WriteHelpers.h b/src/IO/WriteHelpers.h index 003e5a56958..8dbfe63be7e 100644 --- a/src/IO/WriteHelpers.h +++ b/src/IO/WriteHelpers.h @@ -1098,6 +1098,25 @@ inline String toString(const T & x) return buf.str(); } +template +inline String toStringWithFinalSeparator(const std::vector & x, const String & final_sep) +{ + WriteBufferFromOwnString buf; + for (auto it = x.begin(); it != x.end(); ++it) + { + if (it != x.begin()) + { + if (std::next(it) == x.end()) + writeString(final_sep, buf); + else + writeString(", ", buf); + } + writeQuoted(*it, buf); + } + + return buf.str(); +} + inline void writeNullTerminatedString(const String & s, WriteBuffer & buffer) { /// c_str is guaranteed to return zero-terminated string diff --git a/src/IO/WriteSettings.h b/src/IO/WriteSettings.h index a1f5b23fb97..764d6c8992b 100644 --- a/src/IO/WriteSettings.h +++ b/src/IO/WriteSettings.h @@ -15,6 +15,8 @@ struct WriteSettings bool enable_filesystem_cache_on_write_operations = false; bool enable_filesystem_cache_log = false; bool is_file_cache_persistent = false; + bool throw_on_error_from_cache = false; + bool s3_allow_parallel_part_upload = true; /// Monitoring diff --git a/src/IO/tests/gtest_WriteHelpers.cpp b/src/IO/tests/gtest_WriteHelpers.cpp new file mode 100644 index 00000000000..b3c7062be58 --- /dev/null +++ b/src/IO/tests/gtest_WriteHelpers.cpp @@ -0,0 +1,32 @@ +#include + +#include +#include +#include + +using namespace DB; + + +TEST(WriteHelpersTest, ToStringWithFinalSeparatorTest) +{ + { + std::vector v; + EXPECT_EQ(toStringWithFinalSeparator(v, " or "), ""); + } + { + std::vector v = {"AAA"}; + EXPECT_EQ(toStringWithFinalSeparator(v, " or "), "'AAA'"); + } + { + std::vector v = {"AAA", "BBB"}; + EXPECT_EQ(toStringWithFinalSeparator(v, " or "), "'AAA' or 'BBB'"); + } + { + std::vector v = {"AAA", "BBB", "CCC"}; + EXPECT_EQ(toStringWithFinalSeparator(v, " or "), "'AAA', 'BBB' or 'CCC'"); + } + { + std::vector v = {"AAA", "BBB", "CCC", "DDD"}; + EXPECT_EQ(toStringWithFinalSeparator(v, " or "), "'AAA', 'BBB', 'CCC' or 'DDD'"); + } +} diff --git a/src/Interpreters/ActionsDAG.cpp b/src/Interpreters/ActionsDAG.cpp index a7ca6ed521d..3b4d2dd1dd4 100644 --- a/src/Interpreters/ActionsDAG.cpp +++ b/src/Interpreters/ActionsDAG.cpp @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -47,8 +48,6 @@ void ActionsDAG::Node::toTree(JSONBuilder::JSONMap & map) const if (function_base) map.add("Function", function_base->getName()); - else if (function_builder) - map.add("Function", function_builder->getName()); if (type == ActionType::FUNCTION) map.add("Compiled", is_function_compiled); @@ -141,7 +140,7 @@ const ActionsDAG::Node & ActionsDAG::addAlias(const Node & child, std::string al const ActionsDAG::Node & ActionsDAG::addArrayJoin(const Node & child, std::string result_name) { - const DataTypeArray * array_type = typeid_cast(child.result_type.get()); + const auto & array_type = getArrayJoinDataType(child.result_type); if (!array_type) throw Exception("ARRAY JOIN requires array argument", ErrorCodes::TYPE_MISMATCH); @@ -166,7 +165,6 @@ const ActionsDAG::Node & ActionsDAG::addFunction( Node node; node.type = ActionType::FUNCTION; - node.function_builder = function; node.children = std::move(children); bool all_const = true; @@ -238,6 +236,86 @@ const ActionsDAG::Node & ActionsDAG::addFunction( return addNode(std::move(node)); } +const ActionsDAG::Node & ActionsDAG::addFunction( + const FunctionBasePtr & function_base, + NodeRawConstPtrs children, + std::string result_name) +{ + size_t num_arguments = children.size(); + + Node node; + node.type = ActionType::FUNCTION; + node.children = std::move(children); + + bool all_const = true; + ColumnsWithTypeAndName arguments(num_arguments); + + for (size_t i = 0; i < num_arguments; ++i) + { + const auto & child = *node.children[i]; + + ColumnWithTypeAndName argument; + argument.column = child.column; + argument.type = child.result_type; + argument.name = child.result_name; + + if (!argument.column || !isColumnConst(*argument.column)) + all_const = false; + + arguments[i] = std::move(argument); + } + + node.function_base = function_base; + node.result_type = node.function_base->getResultType(); + node.function = node.function_base->prepare(arguments); + node.is_deterministic = node.function_base->isDeterministic(); + + /// If all arguments are constants, and function is suitable to be executed in 'prepare' stage - execute function. + if (node.function_base->isSuitableForConstantFolding()) + { + ColumnPtr column; + + if (all_const) + { + size_t num_rows = arguments.empty() ? 0 : arguments.front().column->size(); + column = node.function->execute(arguments, node.result_type, num_rows, true); + } + else + { + column = node.function_base->getConstantResultForNonConstArguments(arguments, node.result_type); + } + + /// If the result is not a constant, just in case, we will consider the result as unknown. + if (column && isColumnConst(*column)) + { + /// All constant (literal) columns in block are added with size 1. + /// But if there was no columns in block before executing a function, the result has size 0. + /// Change the size to 1. + + if (column->empty()) + column = column->cloneResized(1); + + node.column = std::move(column); + } + } + + if (result_name.empty()) + { + result_name = function_base->getName() + "("; + for (size_t i = 0; i < num_arguments; ++i) + { + if (i) + result_name += ", "; + result_name += node.children[i]->result_name; + } + result_name += ")"; + } + + node.result_name = std::move(result_name); + + return addNode(std::move(node)); +} + const ActionsDAG::Node & ActionsDAG::findInOutputs(const std::string & name) const { if (const auto * node = tryFindInOutputs(name)) @@ -463,11 +541,10 @@ static ColumnWithTypeAndName executeActionForHeader(const ActionsDAG::Node * nod auto key = arguments.at(0); key.column = key.column->convertToFullColumnIfConst(); - const ColumnArray * array = typeid_cast(key.column.get()); + const auto * array = getArrayJoinColumnRawPtr(key.column); if (!array) throw Exception(ErrorCodes::TYPE_MISMATCH, - "ARRAY JOIN of not array: {}", node->result_name); - + "ARRAY JOIN of not array nor map: {}", node->result_name); res_column.column = array->getDataPtr()->cloneEmpty(); break; } @@ -1537,12 +1614,39 @@ ActionsDAG::SplitResult ActionsDAG::splitActionsBeforeArrayJoin(const NameSet & return res; } +ActionsDAG::NodeRawConstPtrs ActionsDAG::getParents(const Node * target) const +{ + NodeRawConstPtrs parents; + for (const auto & node : getNodes()) + { + for (const auto & child : node.children) + { + if (child == target) + { + parents.push_back(&node); + break; + } + } + } + return parents; +} + ActionsDAG::SplitResult ActionsDAG::splitActionsBySortingDescription(const NameSet & sort_columns) const { std::unordered_set split_nodes; for (const auto & sort_column : sort_columns) if (const auto * node = tryFindInOutputs(sort_column)) + { split_nodes.insert(node); + /// Sorting can materialize const columns, so if we have const expression used in sorting, + /// we should also add all it's parents, otherwise, we can break the header + /// (function can expect const column, but will get materialized). + if (node->column && isColumnConst(*node->column)) + { + auto parents = getParents(node); + split_nodes.insert(parents.begin(), parents.end()); + } + } else throw Exception(ErrorCodes::LOGICAL_ERROR, "Sorting column {} wasn't found in the ActionsDAG's outputs. DAG:\n{}", @@ -1927,8 +2031,7 @@ ActionsDAGPtr ActionsDAG::cloneActionsForFilterPushDown( FunctionOverloadResolverPtr func_builder_cast = CastInternalOverloadResolver::createImpl(); - predicate->function_builder = func_builder_cast; - predicate->function_base = predicate->function_builder->build(arguments); + predicate->function_base = func_builder_cast->build(arguments); predicate->function = predicate->function_base->prepare(arguments); } } @@ -1939,7 +2042,9 @@ ActionsDAGPtr ActionsDAG::cloneActionsForFilterPushDown( predicate->children.swap(new_children); auto arguments = prepareFunctionArguments(predicate->children); - predicate->function_base = predicate->function_builder->build(arguments); + FunctionOverloadResolverPtr func_builder_and = std::make_unique(std::make_shared()); + + predicate->function_base = func_builder_and->build(arguments); predicate->function = predicate->function_base->prepare(arguments); } } @@ -2144,7 +2249,7 @@ ActionsDAGPtr ActionsDAG::buildFilterActionsDAG( for (const auto & child : node->children) function_children.push_back(node_to_result_node.find(child)->second); - result_node = &result_dag->addFunction(node->function_builder, std::move(function_children), {}); + result_node = &result_dag->addFunction(node->function_base, std::move(function_children), {}); break; } } diff --git a/src/Interpreters/ActionsDAG.h b/src/Interpreters/ActionsDAG.h index 7f00250505c..f574757abac 100644 --- a/src/Interpreters/ActionsDAG.h +++ b/src/Interpreters/ActionsDAG.h @@ -17,7 +17,7 @@ class IExecutableFunction; using ExecutableFunctionPtr = std::shared_ptr; class IFunctionBase; -using FunctionBasePtr = std::shared_ptr; +using FunctionBasePtr = std::shared_ptr; class IFunctionOverloadResolver; using FunctionOverloadResolverPtr = std::shared_ptr; @@ -74,7 +74,6 @@ public: std::string result_name; DataTypePtr result_type; - FunctionOverloadResolverPtr function_builder; /// Can be used to get function signature or properties like monotonicity. FunctionBasePtr function_base; /// Prepared function which is used in function execution. @@ -139,6 +138,10 @@ public: const FunctionOverloadResolverPtr & function, NodeRawConstPtrs children, std::string result_name); + const Node & addFunction( + const FunctionBasePtr & function_base, + NodeRawConstPtrs children, + std::string result_name); /// Find first column by name in output nodes. This search is linear. const Node & findInOutputs(const std::string & name) const; @@ -343,6 +346,8 @@ public: const ContextPtr & context); private: + NodeRawConstPtrs getParents(const Node * target) const; + Node & addNode(Node node); #if USE_EMBEDDED_COMPILER diff --git a/src/Interpreters/AggregateDescription.cpp b/src/Interpreters/AggregateDescription.cpp index b0f51ea7c90..787e0a503f8 100644 --- a/src/Interpreters/AggregateDescription.cpp +++ b/src/Interpreters/AggregateDescription.cpp @@ -53,7 +53,7 @@ void AggregateDescription::explain(WriteBuffer & out, size_t indent) const out << type->getName(); } - out << ") → " << function->getReturnType()->getName() << "\n"; + out << ") → " << function->getResultType()->getName() << "\n"; } else out << prefix << " Function: nullptr\n"; @@ -109,7 +109,7 @@ void AggregateDescription::explain(JSONBuilder::JSONMap & map) const args_array->add(type->getName()); function_map->add("Argument Types", std::move(args_array)); - function_map->add("Result Type", function->getReturnType()->getName()); + function_map->add("Result Type", function->getResultType()->getName()); map.add("Function", std::move(function_map)); } diff --git a/src/Interpreters/AggregationUtils.cpp b/src/Interpreters/AggregationUtils.cpp index 4e870e8152b..157590e6f44 100644 --- a/src/Interpreters/AggregationUtils.cpp +++ b/src/Interpreters/AggregationUtils.cpp @@ -45,7 +45,7 @@ OutputBlockColumns prepareOutputBlockColumns( } else { - final_aggregate_columns[i] = aggregate_functions[i]->getReturnType()->createColumn(); + final_aggregate_columns[i] = aggregate_functions[i]->getResultType()->createColumn(); final_aggregate_columns[i]->reserve(rows); if (aggregate_functions[i]->isState()) diff --git a/src/Interpreters/Aggregator.cpp b/src/Interpreters/Aggregator.cpp index 14113514f1e..b42ec5c547c 100644 --- a/src/Interpreters/Aggregator.cpp +++ b/src/Interpreters/Aggregator.cpp @@ -448,7 +448,7 @@ Block Aggregator::Params::getHeader( { auto & elem = res.getByName(aggregate.column_name); - elem.type = aggregate.function->getReturnType(); + elem.type = aggregate.function->getResultType(); elem.column = elem.type->createColumn(); } } @@ -467,7 +467,7 @@ Block Aggregator::Params::getHeader( DataTypePtr type; if (final) - type = aggregate.function->getReturnType(); + type = aggregate.function->getResultType(); else type = std::make_shared(aggregate.function, argument_types, aggregate.parameters); @@ -1599,7 +1599,7 @@ void Aggregator::writeToTemporaryFile(AggregatedDataVariants & data_variants, si auto & out_stream = tmp_data->createStream(getHeader(false), max_temp_file_size); ProfileEvents::increment(ProfileEvents::ExternalAggregationWritePart); - LOG_DEBUG(log, "Writing part of aggregation data into temporary file {}", out_stream.path()); + LOG_DEBUG(log, "Writing part of aggregation data into temporary file {}", out_stream.getPath()); /// Flush only two-level data and possibly overflow data. diff --git a/src/Interpreters/ApplyWithGlobalVisitor.cpp b/src/Interpreters/ApplyWithGlobalVisitor.cpp index a0f256fca83..1d36b4ab203 100644 --- a/src/Interpreters/ApplyWithGlobalVisitor.cpp +++ b/src/Interpreters/ApplyWithGlobalVisitor.cpp @@ -88,7 +88,7 @@ void ApplyWithGlobalVisitor::visit(ASTPtr & ast) if (auto * ast_with_alias = dynamic_cast(child.get())) exprs[ast_with_alias->alias] = child; } - for (auto it = node_union->list_of_selects->children.begin() + 1; it != node_union->list_of_selects->children.end(); ++it) + for (auto * it = node_union->list_of_selects->children.begin() + 1; it != node_union->list_of_selects->children.end(); ++it) { if (auto * union_child = (*it)->as()) visit(*union_child, exprs, with_expression_list); diff --git a/src/Interpreters/ArrayJoinAction.cpp b/src/Interpreters/ArrayJoinAction.cpp index 51aaa5fb169..ba54f1a324e 100644 --- a/src/Interpreters/ArrayJoinAction.cpp +++ b/src/Interpreters/ArrayJoinAction.cpp @@ -1,6 +1,8 @@ #include -#include #include +#include +#include +#include #include #include #include @@ -16,6 +18,46 @@ namespace ErrorCodes extern const int TYPE_MISMATCH; } +std::shared_ptr getArrayJoinDataType(DataTypePtr type) +{ + if (const auto * array_type = typeid_cast(type.get())) + return std::shared_ptr{type, array_type}; + else if (const auto * map_type = typeid_cast(type.get())) + { + const auto & nested_type = map_type->getNestedType(); + const auto * nested_array_type = typeid_cast(nested_type.get()); + return std::shared_ptr{nested_type, nested_array_type}; + } + else + return nullptr; +} + +ColumnPtr getArrayJoinColumn(const ColumnPtr & column) +{ + if (typeid_cast(column.get())) + return column; + else if (const auto * map = typeid_cast(column.get())) + return map->getNestedColumnPtr(); + else + return nullptr; +} + +const ColumnArray * getArrayJoinColumnRawPtr(const ColumnPtr & column) +{ + if (const auto & col_arr = getArrayJoinColumn(column)) + return typeid_cast(col_arr.get()); + return nullptr; +} + +ColumnWithTypeAndName convertArrayJoinColumn(const ColumnWithTypeAndName & src_col) +{ + ColumnWithTypeAndName array_col; + array_col.name = src_col.name; + array_col.type = getArrayJoinDataType(src_col.type); + array_col.column = getArrayJoinColumn(src_col.column->convertToFullColumnIfConst()); + return array_col; +} + ArrayJoinAction::ArrayJoinAction(const NameSet & array_joined_columns_, bool array_join_is_left, ContextPtr context) : columns(array_joined_columns_) , is_left(array_join_is_left) @@ -28,13 +70,12 @@ ArrayJoinAction::ArrayJoinAction(const NameSet & array_joined_columns_, bool arr { function_length = FunctionFactory::instance().get("length", context); function_greatest = FunctionFactory::instance().get("greatest", context); - function_arrayResize = FunctionFactory::instance().get("arrayResize", context); + function_array_resize = FunctionFactory::instance().get("arrayResize", context); } else if (is_left) function_builder = FunctionFactory::instance().get("emptyArrayToSingle", context); } - void ArrayJoinAction::prepare(ColumnsWithTypeAndName & sample) const { for (auto & current : sample) @@ -42,11 +83,13 @@ void ArrayJoinAction::prepare(ColumnsWithTypeAndName & sample) const if (!columns.contains(current.name)) continue; - const DataTypeArray * array_type = typeid_cast(&*current.type); - if (!array_type) - throw Exception("ARRAY JOIN requires array argument", ErrorCodes::TYPE_MISMATCH); - current.type = array_type->getNestedType(); - current.column = nullptr; + if (const auto & type = getArrayJoinDataType(current.type)) + { + current.column = nullptr; + current.type = type->getNestedType(); + } + else + throw Exception("ARRAY JOIN requires array or map argument", ErrorCodes::TYPE_MISMATCH); } } @@ -55,10 +98,10 @@ void ArrayJoinAction::execute(Block & block) if (columns.empty()) throw Exception("No arrays to join", ErrorCodes::LOGICAL_ERROR); - ColumnPtr any_array_ptr = block.getByName(*columns.begin()).column->convertToFullColumnIfConst(); - const ColumnArray * any_array = typeid_cast(&*any_array_ptr); + ColumnPtr any_array_map_ptr = block.getByName(*columns.begin()).column->convertToFullColumnIfConst(); + const auto * any_array = getArrayJoinColumnRawPtr(any_array_map_ptr); if (!any_array) - throw Exception("ARRAY JOIN of not array: " + *columns.begin(), ErrorCodes::TYPE_MISMATCH); + throw Exception("ARRAY JOIN requires array or map argument", ErrorCodes::TYPE_MISMATCH); /// If LEFT ARRAY JOIN, then we create columns in which empty arrays are replaced by arrays with one element - the default value. std::map non_empty_array_columns; @@ -78,7 +121,8 @@ void ArrayJoinAction::execute(Block & block) { auto & src_col = block.getByName(name); - ColumnsWithTypeAndName tmp_block{src_col}; //, {{}, uint64, {}}}; + ColumnWithTypeAndName array_col = convertArrayJoinColumn(src_col); + ColumnsWithTypeAndName tmp_block{array_col}; //, {{}, uint64, {}}}; auto len_col = function_length->build(tmp_block)->execute(tmp_block, uint64, rows); ColumnsWithTypeAndName tmp_block2{column_of_max_length, {len_col, uint64, {}}}; @@ -89,28 +133,35 @@ void ArrayJoinAction::execute(Block & block) { auto & src_col = block.getByName(name); - ColumnsWithTypeAndName tmp_block{src_col, column_of_max_length}; - src_col.column = function_arrayResize->build(tmp_block)->execute(tmp_block, src_col.type, rows); - any_array_ptr = src_col.column->convertToFullColumnIfConst(); + ColumnWithTypeAndName array_col = convertArrayJoinColumn(src_col); + ColumnsWithTypeAndName tmp_block{array_col, column_of_max_length}; + array_col.column = function_array_resize->build(tmp_block)->execute(tmp_block, array_col.type, rows); + + src_col = std::move(array_col); + any_array_map_ptr = src_col.column->convertToFullColumnIfConst(); } - any_array = typeid_cast(&*any_array_ptr); + any_array = getArrayJoinColumnRawPtr(any_array_map_ptr); + if (!any_array) + throw Exception("ARRAY JOIN requires array or map argument", ErrorCodes::TYPE_MISMATCH); } else if (is_left) { for (const auto & name : columns) { - auto src_col = block.getByName(name); - - ColumnsWithTypeAndName tmp_block{src_col}; - - non_empty_array_columns[name] = function_builder->build(tmp_block)->execute(tmp_block, src_col.type, src_col.column->size()); + const auto & src_col = block.getByName(name); + ColumnWithTypeAndName array_col = convertArrayJoinColumn(src_col); + ColumnsWithTypeAndName tmp_block{array_col}; + non_empty_array_columns[name] = function_builder->build(tmp_block)->execute(tmp_block, array_col.type, array_col.column->size()); } - any_array_ptr = non_empty_array_columns.begin()->second->convertToFullColumnIfConst(); - any_array = &typeid_cast(*any_array_ptr); + any_array_map_ptr = non_empty_array_columns.begin()->second->convertToFullColumnIfConst(); + any_array = getArrayJoinColumnRawPtr(any_array_map_ptr); + if (!any_array) + throw Exception("ARRAY JOIN requires array or map argument", ErrorCodes::TYPE_MISMATCH); } + size_t num_columns = block.columns(); for (size_t i = 0; i < num_columns; ++i) { @@ -118,18 +169,30 @@ void ArrayJoinAction::execute(Block & block) if (columns.contains(current.name)) { - if (!typeid_cast(&*current.type)) - throw Exception("ARRAY JOIN of not array: " + current.name, ErrorCodes::TYPE_MISMATCH); + if (const auto & type = getArrayJoinDataType(current.type)) + { + ColumnPtr array_ptr; + if (typeid_cast(current.type.get())) + { + array_ptr = (is_left && !is_unaligned) ? non_empty_array_columns[current.name] : current.column; + array_ptr = array_ptr->convertToFullColumnIfConst(); + } + else + { + ColumnPtr map_ptr = current.column->convertToFullColumnIfConst(); + const ColumnMap & map = typeid_cast(*map_ptr); + array_ptr = (is_left && !is_unaligned) ? non_empty_array_columns[current.name] : map.getNestedColumnPtr(); + } - ColumnPtr array_ptr = (is_left && !is_unaligned) ? non_empty_array_columns[current.name] : current.column; - array_ptr = array_ptr->convertToFullColumnIfConst(); + const ColumnArray & array = typeid_cast(*array_ptr); + if (!is_unaligned && !array.hasEqualOffsets(*any_array)) + throw Exception("Sizes of ARRAY-JOIN-ed arrays do not match", ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH); - const ColumnArray & array = typeid_cast(*array_ptr); - if (!is_unaligned && !array.hasEqualOffsets(typeid_cast(*any_array_ptr))) - throw Exception("Sizes of ARRAY-JOIN-ed arrays do not match", ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH); - - current.column = typeid_cast(*array_ptr).getDataPtr(); - current.type = typeid_cast(*current.type).getNestedType(); + current.column = typeid_cast(*array_ptr).getDataPtr(); + current.type = type->getNestedType(); + } + else + throw Exception("ARRAY JOIN of not array nor map: " + current.name, ErrorCodes::TYPE_MISMATCH); } else { diff --git a/src/Interpreters/ArrayJoinAction.h b/src/Interpreters/ArrayJoinAction.h index 975bf25a953..3baabd797d7 100644 --- a/src/Interpreters/ArrayJoinAction.h +++ b/src/Interpreters/ArrayJoinAction.h @@ -11,6 +11,15 @@ namespace DB class IFunctionOverloadResolver; using FunctionOverloadResolverPtr = std::shared_ptr; +class DataTypeArray; +class ColumnArray; +std::shared_ptr getArrayJoinDataType(DataTypePtr type); +const ColumnArray * getArrayJoinColumnRawPtr(const ColumnPtr & column); + +/// If input array join column has map type, convert it to array type. +/// Otherwise do nothing. +ColumnWithTypeAndName convertArrayJoinColumn(const ColumnWithTypeAndName & src_col); + class ArrayJoinAction { public: @@ -21,7 +30,7 @@ public: /// For unaligned [LEFT] ARRAY JOIN FunctionOverloadResolverPtr function_length; FunctionOverloadResolverPtr function_greatest; - FunctionOverloadResolverPtr function_arrayResize; + FunctionOverloadResolverPtr function_array_resize; /// For LEFT ARRAY JOIN. FunctionOverloadResolverPtr function_builder; diff --git a/src/Interpreters/AsynchronousInsertQueue.cpp b/src/Interpreters/AsynchronousInsertQueue.cpp index 8bd8efd40ba..fa3e9915e8f 100644 --- a/src/Interpreters/AsynchronousInsertQueue.cpp +++ b/src/Interpreters/AsynchronousInsertQueue.cpp @@ -245,7 +245,8 @@ std::future AsynchronousInsertQueue::push(ASTPtr query, ContextPtr query_c /// Here we check whether we hit the limit on maximum data size in the buffer. /// And use setting from query context. /// It works, because queries with the same set of settings are already grouped together. - if (data->size_in_bytes > key.settings.async_insert_max_data_size || data->query_number > key.settings.async_insert_max_query_number) + if (data->size_in_bytes >= key.settings.async_insert_max_data_size + || data->query_number >= key.settings.async_insert_max_query_number) { data_to_process = std::move(data); shard.iterators.erase(it); diff --git a/src/Interpreters/Cache/FileCache.cpp b/src/Interpreters/Cache/FileCache.cpp index db95b161a4f..687b38c3020 100644 --- a/src/Interpreters/Cache/FileCache.cpp +++ b/src/Interpreters/Cache/FileCache.cpp @@ -2,6 +2,7 @@ #include #include +#include #include #include #include @@ -12,13 +13,13 @@ #include #include + namespace fs = std::filesystem; namespace DB { namespace ErrorCodes { - extern const int REMOTE_FS_OBJECT_CACHE_ERROR; extern const int LOGICAL_ERROR; } @@ -46,13 +47,27 @@ FileCache::Key FileCache::hash(const String & path) return Key(sipHash128(path.data(), path.size())); } -String FileCache::getPathInLocalCache(const Key & key, size_t offset, bool is_persistent) const +String FileCache::getPathInLocalCache(const Key & key, size_t offset, FileSegmentKind segment_kind) const { + String file_suffix; + switch (segment_kind) + { + case FileSegmentKind::Persistent: + file_suffix = "_persistent"; + break; + case FileSegmentKind::Temporary: + file_suffix = "_temporary"; + break; + case FileSegmentKind::Regular: + file_suffix = ""; + break; + } + auto key_str = key.toString(); return fs::path(cache_base_path) / key_str.substr(0, 3) / key_str - / (std::to_string(offset) + (is_persistent ? "_persistent" : "")); + / (std::to_string(offset) + file_suffix); } String FileCache::getPathInLocalCache(const Key & key) const @@ -98,7 +113,7 @@ void FileCache::assertInitialized(std::lock_guard & /* cache_lock */ if (initialization_exception) std::rethrow_exception(initialization_exception); else - throw Exception(ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR, "Cache not initialized"); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cache not initialized"); } } @@ -422,6 +437,27 @@ FileSegmentsHolder FileCache::getOrSet(const Key & key, size_t offset, size_t si return FileSegmentsHolder(std::move(file_segments)); } +FileSegmentsHolder FileCache::set(const Key & key, size_t offset, size_t size, const CreateFileSegmentSettings & settings) +{ + std::lock_guard cache_lock(mutex); + + auto it = files.find(key); + if (it != files.end()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "File {} already exists", key.toString()); + + if (settings.unbounded) + { + /// If the file is unbounded, we can create a single cell for it. + FileSegments file_segments; + if (auto * cell = addCell(key, offset, size, FileSegment::State::EMPTY, settings, cache_lock)) + file_segments.push_back(cell->file_segment); + else + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot add cell for file {}", key.toString()); + return FileSegmentsHolder(std::move(file_segments)); + } + return FileSegmentsHolder(splitRangeIntoCells(key, offset, size, FileSegment::State::EMPTY, settings, cache_lock)); +} + FileSegmentsHolder FileCache::get(const Key & key, size_t offset, size_t size) { std::lock_guard cache_lock(mutex); @@ -540,13 +576,13 @@ FileSegmentPtr FileCache::createFileSegmentForDownload( assertCacheCorrectness(key, cache_lock); #endif - if (size > max_file_segment_size) - throw Exception(ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR, "Requested size exceeds max file segment size"); + if (!settings.unbounded && size > max_file_segment_size) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Requested size exceeds max file segment size"); auto * cell = getCell(key, offset, cache_lock); if (cell) throw Exception( - ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR, + ErrorCodes::LOGICAL_ERROR, "Cache cell already exists for key `{}` and offset {}", key.toString(), offset); @@ -738,7 +774,7 @@ bool FileCache::tryReserveForMainList( auto * cell = getCell(entry_key, entry_offset, cache_lock); if (!cell) throw Exception( - ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR, + ErrorCodes::LOGICAL_ERROR, "Cache became inconsistent. Key: {}, offset: {}", key.toString(), offset); @@ -964,7 +1000,7 @@ void FileCache::remove( catch (...) { throw Exception( - ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR, + ErrorCodes::LOGICAL_ERROR, "Removal of cached file failed. Key: {}, offset: {}, path: {}, error: {}", key.toString(), offset, cache_file_path, getCurrentExceptionMessage(false)); } @@ -981,7 +1017,7 @@ void FileCache::loadCacheInfoIntoMemory(std::lock_guard & cache_lock /// cache_base_path / key_prefix / key / offset if (!files.empty()) throw Exception( - ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR, + ErrorCodes::LOGICAL_ERROR, "Cache initialization is partially made. " "This can be a result of a failed first attempt to initialize cache. " "Please, check log for error messages"); @@ -1012,14 +1048,20 @@ void FileCache::loadCacheInfoIntoMemory(std::lock_guard & cache_lock auto offset_with_suffix = offset_it->path().filename().string(); auto delim_pos = offset_with_suffix.find('_'); bool parsed; - bool is_persistent = false; + FileSegmentKind segment_kind = FileSegmentKind::Regular; if (delim_pos == std::string::npos) parsed = tryParse(offset, offset_with_suffix); else { parsed = tryParse(offset, offset_with_suffix.substr(0, delim_pos)); - is_persistent = offset_with_suffix.substr(delim_pos+1) == "persistent"; + if (offset_with_suffix.substr(delim_pos+1) == "persistent") + segment_kind = FileSegmentKind::Persistent; + if (offset_with_suffix.substr(delim_pos+1) == "temporary") + { + fs::remove(offset_it->path()); + continue; + } } if (!parsed) @@ -1039,7 +1081,7 @@ void FileCache::loadCacheInfoIntoMemory(std::lock_guard & cache_lock { auto * cell = addCell( key, offset, size, FileSegment::State::DOWNLOADED, - CreateFileSegmentSettings{ .is_persistent = is_persistent }, cache_lock); + CreateFileSegmentSettings(segment_kind), cache_lock); if (cell) queue_entries.emplace_back(cell->queue_iterator, cell->file_segment); @@ -1107,7 +1149,7 @@ void FileCache::reduceSizeToDownloaded( file_segment->getInfoForLogUnlocked(segment_lock)); } - CreateFileSegmentSettings create_settings{ .is_persistent = file_segment->is_persistent }; + CreateFileSegmentSettings create_settings(file_segment->getKind()); cell->file_segment = std::make_shared( offset, downloaded_size, key, this, FileSegment::State::DOWNLOADED, create_settings); @@ -1153,7 +1195,7 @@ std::vector FileCache::tryGetCachePaths(const Key & key) for (const auto & [offset, cell] : cells_by_offset) { if (cell.file_segment->state() == FileSegment::State::DOWNLOADED) - cache_paths.push_back(getPathInLocalCache(key, offset, cell.file_segment->isPersistent())); + cache_paths.push_back(getPathInLocalCache(key, offset, cell.file_segment->getKind())); } return cache_paths; @@ -1214,7 +1256,7 @@ FileCache::FileSegmentCell::FileSegmentCell( } default: throw Exception( - ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR, + ErrorCodes::LOGICAL_ERROR, "Can create cell with either EMPTY, DOWNLOADED, DOWNLOADING state, got: {}", FileSegment::stateToString(file_segment->download_state)); } diff --git a/src/Interpreters/Cache/FileCache.h b/src/Interpreters/Cache/FileCache.h index 706762b6915..afafa39c4c6 100644 --- a/src/Interpreters/Cache/FileCache.h +++ b/src/Interpreters/Cache/FileCache.h @@ -60,6 +60,7 @@ public: * it is guaranteed that these file segments are not removed from cache. */ FileSegmentsHolder getOrSet(const Key & key, size_t offset, size_t size, const CreateFileSegmentSettings & settings); + FileSegmentsHolder set(const Key & key, size_t offset, size_t size, const CreateFileSegmentSettings & settings); /** * Segments in returned list are ordered in ascending order and represent a full contiguous @@ -80,7 +81,7 @@ public: static Key hash(const String & path); - String getPathInLocalCache(const Key & key, size_t offset, bool is_persistent) const; + String getPathInLocalCache(const Key & key, size_t offset, FileSegmentKind segment_kind) const; String getPathInLocalCache(const Key & key) const; @@ -221,6 +222,8 @@ private: FileSegmentCell * getCell(const Key & key, size_t offset, std::lock_guard & cache_lock); + /// Returns non-owned pointer to the cell stored in the `files` map. + /// Doesn't reserve any space. FileSegmentCell * addCell( const Key & key, size_t offset, diff --git a/src/Interpreters/Cache/FileCacheFactory.cpp b/src/Interpreters/Cache/FileCacheFactory.cpp index b276760c0dd..e120fe3fc27 100644 --- a/src/Interpreters/Cache/FileCacheFactory.cpp +++ b/src/Interpreters/Cache/FileCacheFactory.cpp @@ -31,14 +31,21 @@ const FileCacheSettings & FileCacheFactory::getSettings(const std::string & cach } -FileCachePtr FileCacheFactory::get(const std::string & cache_base_path) +FileCachePtr FileCacheFactory::tryGet(const std::string & cache_base_path) { std::lock_guard lock(mutex); auto it = caches_by_path.find(cache_base_path); if (it == caches_by_path.end()) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "No cache found by path: {}", cache_base_path); + return nullptr; return it->second->cache; +} +FileCachePtr FileCacheFactory::get(const std::string & cache_base_path) +{ + auto file_cache_ptr = tryGet(cache_base_path); + if (!file_cache_ptr) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "No cache found by path: {}", cache_base_path); + return file_cache_ptr; } FileCachePtr FileCacheFactory::getOrCreate( diff --git a/src/Interpreters/Cache/FileCacheFactory.h b/src/Interpreters/Cache/FileCacheFactory.h index 82e0ec8f928..32ecd05f019 100644 --- a/src/Interpreters/Cache/FileCacheFactory.h +++ b/src/Interpreters/Cache/FileCacheFactory.h @@ -33,6 +33,7 @@ public: FileCachePtr getOrCreate(const std::string & cache_base_path, const FileCacheSettings & file_cache_settings, const std::string & name); + FileCachePtr tryGet(const std::string & cache_base_path); FileCachePtr get(const std::string & cache_base_path); CacheByBasePath getAll(); diff --git a/src/Interpreters/Cache/FileCacheKey.h b/src/Interpreters/Cache/FileCacheKey.h index cf4ab5d20c5..fed4c7f47e0 100644 --- a/src/Interpreters/Cache/FileCacheKey.h +++ b/src/Interpreters/Cache/FileCacheKey.h @@ -1,6 +1,7 @@ #pragma once #include #include +#include namespace DB { @@ -15,6 +16,8 @@ struct FileCacheKey explicit FileCacheKey(const UInt128 & key_) : key(key_) { } + static FileCacheKey random() { return FileCacheKey(UUIDHelpers::generateV4().toUnderType()); } + bool operator==(const FileCacheKey & other) const { return key == other.key; } }; diff --git a/src/Interpreters/Cache/FileSegment.cpp b/src/Interpreters/Cache/FileSegment.cpp index 177c6aecf7c..f4d7b2612a5 100644 --- a/src/Interpreters/Cache/FileSegment.cpp +++ b/src/Interpreters/Cache/FileSegment.cpp @@ -9,6 +9,8 @@ #include #include +#include + namespace CurrentMetrics { extern const Metric CacheDetachedFileSegments; @@ -19,10 +21,14 @@ namespace DB namespace ErrorCodes { - extern const int REMOTE_FS_OBJECT_CACHE_ERROR; extern const int LOGICAL_ERROR; } +String toString(FileSegmentKind kind) +{ + return String(magic_enum::enum_name(kind)); +} + FileSegment::FileSegment( size_t offset_, size_t size_, @@ -39,7 +45,8 @@ FileSegment::FileSegment( #else , log(&Poco::Logger::get("FileSegment")) #endif - , is_persistent(settings.is_persistent) + , segment_kind(settings.kind) + , is_unbound(settings.unbounded) { /// On creation, file segment state can be EMPTY, DOWNLOADED, DOWNLOADING. switch (download_state) @@ -66,7 +73,7 @@ FileSegment::FileSegment( default: { throw Exception( - ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR, + ErrorCodes::LOGICAL_ERROR, "Can only create cell with either EMPTY, DOWNLOADED or SKIP_CACHE state"); } } @@ -74,7 +81,8 @@ FileSegment::FileSegment( String FileSegment::getPathInLocalCache() const { - return cache->getPathInLocalCache(key(), offset(), isPersistent()); + chassert(cache); + return cache->getPathInLocalCache(key(), offset(), segment_kind); } FileSegment::State FileSegment::state() const @@ -126,6 +134,18 @@ size_t FileSegment::getDownloadedSizeUnlocked(std::unique_lock & /* return downloaded_size; } +void FileSegment::setDownloadedSize(size_t delta) +{ + std::unique_lock download_lock(download_mutex); + setDownloadedSizeUnlocked(download_lock, delta); +} + +void FileSegment::setDownloadedSizeUnlocked(std::unique_lock & /* download_lock */, size_t delta) +{ + downloaded_size += delta; + assert(downloaded_size == std::filesystem::file_size(getPathInLocalCache())); +} + bool FileSegment::isDownloaded() const { std::lock_guard segment_lock(mutex); @@ -275,10 +295,26 @@ void FileSegment::resetRemoteFileReader() remote_file_reader.reset(); } +std::unique_ptr FileSegment::detachWriter() +{ + std::unique_lock segment_lock(mutex); + + if (!cache_writer) + { + if (detached_writer) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Writer is already detached"); + + auto download_path = getPathInLocalCache(); + cache_writer = std::make_unique(download_path); + } + detached_writer = true; + return std::move(cache_writer); +} + void FileSegment::write(const char * from, size_t size, size_t offset) { if (!size) - throw Exception(ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR, "Writing zero size is not allowed"); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Writing zero size is not allowed"); { std::unique_lock segment_lock(mutex); @@ -294,7 +330,7 @@ void FileSegment::write(const char * from, size_t size, size_t offset) size_t first_non_downloaded_offset = getFirstNonDownloadedOffsetUnlocked(segment_lock); if (offset != first_non_downloaded_offset) throw Exception( - ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR, + ErrorCodes::LOGICAL_ERROR, "Attempt to write {} bytes to offset: {}, but current write offset is {}", size, offset, first_non_downloaded_offset); @@ -304,7 +340,7 @@ void FileSegment::write(const char * from, size_t size, size_t offset) if (free_reserved_size < size) throw Exception( - ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR, + ErrorCodes::LOGICAL_ERROR, "Not enough space is reserved. Available: {}, expected: {}", free_reserved_size, size); if (current_downloaded_size == range().size()) @@ -318,6 +354,9 @@ void FileSegment::write(const char * from, size_t size, size_t offset) "Cache writer was finalized (downloaded size: {}, state: {})", current_downloaded_size, stateToString(download_state)); + if (detached_writer) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cache writer was detached"); + auto download_path = getPathInLocalCache(); cache_writer = std::make_unique(download_path); } @@ -364,7 +403,7 @@ FileSegment::State FileSegment::wait() return download_state; if (download_state == State::EMPTY) - throw Exception(ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR, "Cannot wait on a file segment with empty state"); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot wait on a file segment with empty state"); if (download_state == State::DOWNLOADING) { @@ -382,23 +421,28 @@ FileSegment::State FileSegment::wait() bool FileSegment::reserve(size_t size_to_reserve) { if (!size_to_reserve) - throw Exception(ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR, "Zero space reservation is not allowed"); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Zero space reservation is not allowed"); size_t expected_downloaded_size; + bool is_file_segment_size_exceeded; { std::unique_lock segment_lock(mutex); + assertNotDetachedUnlocked(segment_lock); assertIsDownloaderUnlocked("reserve", segment_lock); expected_downloaded_size = getDownloadedSizeUnlocked(segment_lock); - if (expected_downloaded_size + size_to_reserve > range().size()) + is_file_segment_size_exceeded = expected_downloaded_size + size_to_reserve > range().size(); + if (is_file_segment_size_exceeded && !is_unbound) + { throw Exception( - ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR, + ErrorCodes::LOGICAL_ERROR, "Attempt to reserve space too much space ({}) for file segment with range: {} (downloaded size: {})", size_to_reserve, range().toString(), downloaded_size); + } chassert(reserved_size >= expected_downloaded_size); } @@ -415,15 +459,19 @@ bool FileSegment::reserve(size_t size_to_reserve) if (!reserved) { std::lock_guard cache_lock(cache->mutex); + std::lock_guard segment_lock(mutex); size_to_reserve = size_to_reserve - already_reserved_size; + + if (is_unbound && is_file_segment_size_exceeded) + { + segment_range.right = range().left + expected_downloaded_size + size_to_reserve; + } + reserved = cache->tryReserve(key(), offset(), size_to_reserve, cache_lock); if (reserved) - { - std::lock_guard segment_lock(mutex); reserved_size += size_to_reserve; - } } return reserved; @@ -434,9 +482,6 @@ void FileSegment::setDownloadedUnlocked([[maybe_unused]] std::unique_lockfinalize(); @@ -498,7 +543,7 @@ void FileSegment::completeWithState(State state) { cv.notify_all(); throw Exception( - ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR, + ErrorCodes::LOGICAL_ERROR, "Cannot complete file segment with state: {}", stateToString(state)); } @@ -553,14 +598,22 @@ void FileSegment::completeBasedOnCurrentState(std::lock_guard & cach remote_file_reader.reset(); } + if (segment_kind == FileSegmentKind::Temporary && is_last_holder) + { + LOG_TEST(log, "Removing temporary file segment: {}", getInfoForLogUnlocked(segment_lock)); + detach(cache_lock, segment_lock); + setDownloadState(State::SKIP_CACHE); + cache->remove(key(), offset(), cache_lock, segment_lock); + return; + } + switch (download_state) { case State::SKIP_CACHE: { if (is_last_holder) cache->remove(key(), offset(), cache_lock, segment_lock); - - return; + break; } case State::DOWNLOADED: { @@ -613,6 +666,7 @@ void FileSegment::completeBasedOnCurrentState(std::lock_guard & cach } } + is_completed = true; LOG_TEST(log, "Completed file segment: {}", getInfoForLogUnlocked(segment_lock)); } @@ -635,7 +689,7 @@ String FileSegment::getInfoForLogUnlocked(std::unique_lock & segment info << "first non-downloaded offset: " << getFirstNonDownloadedOffsetUnlocked(segment_lock) << ", "; info << "caller id: " << getCallerId() << ", "; info << "detached: " << is_detached << ", "; - info << "persistent: " << is_persistent; + info << "kind: " << toString(segment_kind); return info.str(); } @@ -730,7 +784,7 @@ FileSegmentPtr FileSegment::getSnapshot(const FileSegmentPtr & file_segment, std snapshot->ref_count = file_segment.use_count(); snapshot->downloaded_size = file_segment->getDownloadedSizeUnlocked(segment_lock); snapshot->download_state = file_segment->download_state; - snapshot->is_persistent = file_segment->isPersistent(); + snapshot->segment_kind = file_segment->getKind(); return snapshot; } @@ -748,6 +802,12 @@ bool FileSegment::isDetached() const return is_detached; } +bool FileSegment::isCompleted() const +{ + std::unique_lock segment_lock(mutex); + return is_completed; +} + void FileSegment::detach(std::lock_guard & /* cache_lock */, std::unique_lock & segment_lock) { if (is_detached) @@ -776,12 +836,16 @@ FileSegment::~FileSegment() CurrentMetrics::sub(CurrentMetrics::CacheDetachedFileSegments); } -FileSegmentsHolder::~FileSegmentsHolder() +void FileSegmentsHolder::reset() { /// In CacheableReadBufferFromRemoteFS file segment's downloader removes file segments from /// FileSegmentsHolder right after calling file_segment->complete(), so on destruction here /// remain only uncompleted file segments. + SCOPE_EXIT({ + file_segments.clear(); + }); + FileCache * cache = nullptr; for (auto file_segment_it = file_segments.begin(); file_segment_it != file_segments.end();) @@ -792,6 +856,8 @@ FileSegmentsHolder::~FileSegmentsHolder() if (!cache) cache = file_segment->cache; + assert(cache == file_segment->cache); /// all segments should belong to the same cache + try { bool is_detached = false; @@ -826,6 +892,11 @@ FileSegmentsHolder::~FileSegmentsHolder() } } +FileSegmentsHolder::~FileSegmentsHolder() +{ + reset(); +} + String FileSegmentsHolder::toString() { String ranges; diff --git a/src/Interpreters/Cache/FileSegment.h b/src/Interpreters/Cache/FileSegment.h index 8f9c0097d77..d49f73d2aa2 100644 --- a/src/Interpreters/Cache/FileSegment.h +++ b/src/Interpreters/Cache/FileSegment.h @@ -30,9 +30,39 @@ using FileSegmentPtr = std::shared_ptr; using FileSegments = std::list; +/* + * FileSegmentKind is used to specify the eviction policy for file segments. + */ +enum class FileSegmentKind +{ + /* `Regular` file segment is still in cache after usage, and can be evicted + * (unless there're some holders). + */ + Regular, + + /* `Persistent` file segment can't be evicted from cache, + * it should be removed manually. + */ + Persistent, + + /* `Temporary` file segment is removed right after releasing. + * Also corresponding files are removed during cache loading (if any). + */ + Temporary, +}; + +String toString(FileSegmentKind kind); + struct CreateFileSegmentSettings { - bool is_persistent = false; + FileSegmentKind kind = FileSegmentKind::Regular; + bool unbounded = false; + + CreateFileSegmentSettings() = default; + + explicit CreateFileSegmentSettings(FileSegmentKind kind_, bool unbounded_ = false) + : kind(kind_), unbounded(unbounded_) + {} }; class FileSegment : private boost::noncopyable, public std::enable_shared_from_this @@ -127,7 +157,8 @@ public: size_t offset() const { return range().left; } - bool isPersistent() const { return is_persistent; } + FileSegmentKind getKind() const { return segment_kind; } + bool isPersistent() const { return segment_kind == FileSegmentKind::Persistent; } using UniqueId = std::pair; UniqueId getUniqueId() const { return std::pair(key(), offset()); } @@ -181,21 +212,23 @@ public: bool isDetached() const; + bool isCompleted() const; + void assertCorrectness() const; - /** - * ========== Methods for _only_ file segment's `writer` ====================== - */ - - void synchronousWrite(const char * from, size_t size, size_t offset); - /** * ========== Methods for _only_ file segment's `downloader` ================== */ /// Try to reserve exactly `size` bytes. + /// Returns true if reservation was successful, false otherwise. bool reserve(size_t size_to_reserve); + /// Try to reserve at max `size_to_reserve` bytes. + /// Returns actual size reserved. It can be less than size_to_reserve in non strict mode. + /// In strict mode throws an error on attempt to reserve space too much space. + size_t tryReserve(size_t size_to_reserve, bool strict = false); + /// Write data into reserved space. void write(const char * from, size_t size, size_t offset); @@ -217,6 +250,10 @@ public: void resetRemoteFileReader(); + void setDownloadedSize(size_t delta); + + LocalCacheWriterPtr detachWriter(); + private: size_t getFirstNonDownloadedOffsetUnlocked(std::unique_lock & segment_lock) const; size_t getCurrentWriteOffsetUnlocked(std::unique_lock & segment_lock) const; @@ -232,6 +269,7 @@ private: void setDownloadedUnlocked(std::unique_lock & segment_lock); void setDownloadFailedUnlocked(std::unique_lock & segment_lock); + void setDownloadedSizeUnlocked(std::unique_lock & /* download_lock */, size_t delta); bool hasFinalizedStateUnlocked(std::unique_lock & segment_lock) const; @@ -247,9 +285,9 @@ private: void assertIsDownloaderUnlocked(const std::string & operation, std::unique_lock & segment_lock) const; void assertCorrectnessUnlocked(std::unique_lock & segment_lock) const; - /// complete() without any completion state is called from destructor of - /// FileSegmentsHolder. complete() might check if the caller of the method - /// is the last alive holder of the segment. Therefore, complete() and destruction + /// completeWithoutStateUnlocked() is called from destructor of FileSegmentsHolder. + /// Function might check if the caller of the method + /// is the last alive holder of the segment. Therefore, completion and destruction /// of the file segment pointer must be done under the same cache mutex. void completeWithoutStateUnlocked(std::lock_guard & cache_lock); void completeBasedOnCurrentState(std::lock_guard & cache_lock, std::unique_lock & segment_lock); @@ -267,7 +305,9 @@ private: RemoteFileReaderPtr remote_file_reader; LocalCacheWriterPtr cache_writer; + bool detached_writer = false; + /// downloaded_size should always be less or equal to reserved_size size_t downloaded_size = 0; size_t reserved_size = 0; @@ -294,13 +334,17 @@ private: /// "detached" file segment means that it is not owned by cache ("detached" from cache). /// In general case, all file segments are owned by cache. bool is_detached = false; + bool is_completed = false; - bool is_downloaded{false}; + bool is_downloaded = false; std::atomic hits_count = 0; /// cache hits. std::atomic ref_count = 0; /// Used for getting snapshot state - bool is_persistent; + FileSegmentKind segment_kind; + + /// Size of the segment is not known until it is downloaded and can be bigger than max_file_segment_size. + bool is_unbound = false; CurrentMetrics::Increment metric_increment{CurrentMetrics::CacheFileSegments}; }; @@ -313,15 +357,13 @@ struct FileSegmentsHolder : private boost::noncopyable FileSegmentsHolder(FileSegmentsHolder && other) noexcept : file_segments(std::move(other.file_segments)) {} + void reset(); + bool empty() const { return file_segments.empty(); } + ~FileSegmentsHolder(); String toString(); - FileSegments::iterator add(FileSegmentPtr && file_segment) - { - return file_segments.insert(file_segments.end(), file_segment); - } - FileSegments file_segments{}; }; diff --git a/src/Interpreters/Cache/WriteBufferToFileSegment.cpp b/src/Interpreters/Cache/WriteBufferToFileSegment.cpp new file mode 100644 index 00000000000..16906e9440e --- /dev/null +++ b/src/Interpreters/Cache/WriteBufferToFileSegment.cpp @@ -0,0 +1,76 @@ +#include +#include + +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int NOT_ENOUGH_SPACE; + extern const int LOGICAL_ERROR; +} + +namespace +{ + class SwapHelper + { + public: + SwapHelper(WriteBuffer & b1_, WriteBuffer & b2_) : b1(b1_), b2(b2_) { b1.swap(b2); } + ~SwapHelper() { b1.swap(b2); } + + private: + WriteBuffer & b1; + WriteBuffer & b2; + }; +} + +WriteBufferToFileSegment::WriteBufferToFileSegment(FileSegment * file_segment_) + : WriteBufferFromFileDecorator(file_segment_->detachWriter()), file_segment(file_segment_) +{ + auto downloader = file_segment->getOrSetDownloader(); + if (downloader != FileSegment::getCallerId()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Failed to set a downloader. ({})", file_segment->getInfoForLog()); +} + +/// If it throws an exception, the file segment will be incomplete, so you should not use it in the future. +void WriteBufferToFileSegment::nextImpl() +{ + size_t bytes_to_write = offset(); + + /// In case of an error, we don't need to finalize the file segment + /// because it will be deleted soon and completed in the holder's destructor. + bool ok = file_segment->reserve(bytes_to_write); + if (!ok) + throw Exception(ErrorCodes::NOT_ENOUGH_SPACE, "Failed to reserve space for the file cache ({})", file_segment->getInfoForLog()); + + try + { + SwapHelper swap(*this, *impl); + /// Write data to the underlying buffer. + impl->next(); + } + catch (...) + { + LOG_WARNING(&Poco::Logger::get("WriteBufferToFileSegment"), "Failed to write to the underlying buffer ({})", file_segment->getInfoForLog()); + throw; + } + + file_segment->setDownloadedSize(bytes_to_write); +} + + +WriteBufferToFileSegment::~WriteBufferToFileSegment() +{ + try + { + finalize(); + } + catch (...) + { + tryLogCurrentException(__PRETTY_FUNCTION__); + } +} + +} diff --git a/src/Interpreters/Cache/WriteBufferToFileSegment.h b/src/Interpreters/Cache/WriteBufferToFileSegment.h new file mode 100644 index 00000000000..4748891a6e0 --- /dev/null +++ b/src/Interpreters/Cache/WriteBufferToFileSegment.h @@ -0,0 +1,24 @@ +#pragma once + +#include + +namespace DB +{ + +class FileSegment; + +class WriteBufferToFileSegment : public WriteBufferFromFileDecorator +{ +public: + explicit WriteBufferToFileSegment(FileSegment * file_segment_); + + void nextImpl() override; + + ~WriteBufferToFileSegment() override; + +private: + FileSegment * file_segment; +}; + + +} diff --git a/src/Interpreters/CollectJoinOnKeysVisitor.cpp b/src/Interpreters/CollectJoinOnKeysVisitor.cpp index 15ecb822976..9b264cb52a3 100644 --- a/src/Interpreters/CollectJoinOnKeysVisitor.cpp +++ b/src/Interpreters/CollectJoinOnKeysVisitor.cpp @@ -19,6 +19,11 @@ namespace ErrorCodes namespace { +bool isDeterminedIdentifier(JoinIdentifierPos pos) +{ + return pos == JoinIdentifierPos::Left || pos == JoinIdentifierPos::Right; +} + bool isLeftIdentifier(JoinIdentifierPos pos) { /// Unknown identifiers considered as left, we will try to process it on later stages @@ -79,7 +84,7 @@ void CollectJoinOnKeysMatcher::Data::asofToJoinKeys() void CollectJoinOnKeysMatcher::visit(const ASTIdentifier & ident, const ASTPtr & ast, CollectJoinOnKeysMatcher::Data & data) { - if (auto expr_from_table = getTableForIdentifiers(ast, false, data); expr_from_table != JoinIdentifierPos::Unknown) + if (auto expr_from_table = getTableForIdentifiers(ast, false, data); isDeterminedIdentifier(expr_from_table)) data.analyzed_join.addJoinCondition(ast, isLeftIdentifier(expr_from_table)); else throw Exception("Unexpected identifier '" + ident.name() + "' in JOIN ON section", @@ -105,23 +110,26 @@ void CollectJoinOnKeysMatcher::visit(const ASTFunction & func, const ASTPtr & as ASTPtr left = func.arguments->children.at(0); ASTPtr right = func.arguments->children.at(1); auto table_numbers = getTableNumbers(left, right, data); + if (table_numbers.first == table_numbers.second) { - if (table_numbers.first == JoinIdentifierPos::Unknown) - throw Exception("Ambiguous column in expression '" + queryToString(ast) + "' in JOIN ON section", - ErrorCodes::AMBIGUOUS_COLUMN_NAME); + if (!isDeterminedIdentifier(table_numbers.first)) + throw Exception(ErrorCodes::AMBIGUOUS_COLUMN_NAME, + "Ambiguous columns in expression '{}' in JOIN ON section", queryToString(ast)); data.analyzed_join.addJoinCondition(ast, isLeftIdentifier(table_numbers.first)); return; } - if (table_numbers.first != JoinIdentifierPos::NotApplicable && table_numbers.second != JoinIdentifierPos::NotApplicable) + if ((isLeftIdentifier(table_numbers.first) && isRightIdentifier(table_numbers.second)) || + (isRightIdentifier(table_numbers.first) && isLeftIdentifier(table_numbers.second))) { data.addJoinKeys(left, right, table_numbers); return; } } - if (auto expr_from_table = getTableForIdentifiers(ast, false, data); expr_from_table != JoinIdentifierPos::Unknown) + + if (auto expr_from_table = getTableForIdentifiers(ast, false, data); isDeterminedIdentifier(expr_from_table)) { data.analyzed_join.addJoinCondition(ast, isLeftIdentifier(expr_from_table)); return; @@ -204,7 +212,7 @@ JoinIdentifierPos CollectJoinOnKeysMatcher::getTableForIdentifiers(const ASTPtr std::vector identifiers; getIdentifiers(ast, identifiers); if (identifiers.empty()) - return JoinIdentifierPos::NotApplicable; + return JoinIdentifierPos::NotColumn; JoinIdentifierPos table_number = JoinIdentifierPos::Unknown; diff --git a/src/Interpreters/CollectJoinOnKeysVisitor.h b/src/Interpreters/CollectJoinOnKeysVisitor.h index e52b0c69591..4f4e886099e 100644 --- a/src/Interpreters/CollectJoinOnKeysVisitor.h +++ b/src/Interpreters/CollectJoinOnKeysVisitor.h @@ -2,6 +2,7 @@ #include #include +#include #include #include #include @@ -18,6 +19,11 @@ namespace ASOF enum class Inequality; } +namespace ErrorCodes +{ + extern const int INVALID_JOIN_ON_EXPRESSION; +} + enum class JoinIdentifierPos { /// Position can't be established, identifier not resolved @@ -26,8 +32,8 @@ enum class JoinIdentifierPos Left, /// Right side of JOIN Right, - /// Expression not valid, e.g. doesn't contain identifiers - NotApplicable, + /// Identifier is not a column (e.g constant) + NotColumn, }; using JoinIdentifierPosPair = std::pair; @@ -66,6 +72,9 @@ public: } else { + if (ast->children.empty()) + throw Exception(ErrorCodes::INVALID_JOIN_ON_EXPRESSION, "Illegal expression '{}' in JOIN ON section", queryToString(ast)); + /// visit children } } diff --git a/src/Interpreters/ComparisonGraph.h b/src/Interpreters/ComparisonGraph.h index 3891fbf51cf..996526b60df 100644 --- a/src/Interpreters/ComparisonGraph.h +++ b/src/Interpreters/ComparisonGraph.h @@ -17,7 +17,7 @@ class ComparisonGraph { public: /// atomic_formulas are extracted from constraints. - explicit ComparisonGraph(const std::vector & atomic_formulas); + explicit ComparisonGraph(const ASTs & atomic_formulas); enum class CompareResult { @@ -43,7 +43,7 @@ public: bool isAlwaysCompare(CompareResult expected, const ASTPtr & left, const ASTPtr & right) const; /// Returns all expressions from component to which @ast belongs if any. - std::vector getEqual(const ASTPtr & ast) const; + ASTs getEqual(const ASTPtr & ast) const; /// Returns constant expression from component to which @ast belongs if any. std::optional getEqualConst(const ASTPtr & ast) const; @@ -52,7 +52,7 @@ public: std::optional getComponentId(const ASTPtr & ast) const; /// Returns all expressions from component. - std::vector getComponent(size_t id) const; + ASTs getComponent(size_t id) const; size_t getNumOfComponents() const { return graph.vertices.size(); } @@ -72,7 +72,7 @@ private: struct EqualComponent { /// All these expressions are considered as equal. - std::vector asts; + ASTs asts; std::optional constant_index; bool hasConstant() const; diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index 858b8ea8881..7ac1b7482ac 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -31,7 +31,7 @@ #include #include #include -#include +#include #include #include #include @@ -55,6 +55,7 @@ #include #include #include +#include #include #include #include @@ -103,9 +104,12 @@ #include #include #include +#include #include #include +#include + #if USE_ROCKSDB #include #endif @@ -222,6 +226,7 @@ struct ContextSharedPart : boost::noncopyable String system_profile_name; /// Profile used by system processes String buffer_profile_name; /// Profile used by Buffer engine for flushing to the underlying std::unique_ptr access_control; + mutable ResourceManagerPtr resource_manager; mutable UncompressedCachePtr uncompressed_cache; /// The cache of decompressed blocks. mutable MarkCachePtr mark_cache; /// Cache of marks in compressed files. mutable std::unique_ptr load_marks_threadpool; /// Threadpool for loading marks cache. @@ -748,28 +753,69 @@ void Context::setPath(const String & path) shared->user_scripts_path = shared->path + "user_scripts/"; } -VolumePtr Context::setTemporaryStorage(const String & path, const String & policy_name, size_t max_size) +static void setupTmpPath(Poco::Logger * log, const std::string & path) +try +{ + LOG_DEBUG(log, "Setting up {} to store temporary data in it", path); + + fs::create_directories(path); + + /// Clearing old temporary files. + fs::directory_iterator dir_end; + for (fs::directory_iterator it(path); it != dir_end; ++it) + { + if (it->is_regular_file()) + { + if (startsWith(it->path().filename(), "tmp")) + { + LOG_DEBUG(log, "Removing old temporary file {}", it->path().string()); + fs::remove(it->path()); + } + else + LOG_DEBUG(log, "Found unknown file in temporary path {}", it->path().string()); + } + /// We skip directories (for example, 'http_buffers' - it's used for buffering of the results) and all other file types. + } +} +catch (...) +{ + DB::tryLogCurrentException(log, fmt::format( + "Caught exception while setup temporary path: {}. " + "It is ok to skip this exception as cleaning old temporary files is not necessary", path)); +} + +static VolumePtr createLocalSingleDiskVolume(const std::string & path) +{ + auto disk = std::make_shared("_tmp_default", path, 0); + VolumePtr volume = std::make_shared("_tmp_default", disk, 0); + return volume; +} + +void Context::setTemporaryStoragePath(const String & path, size_t max_size) +{ + shared->tmp_path = path; + if (!shared->tmp_path.ends_with('/')) + shared->tmp_path += '/'; + + VolumePtr volume = createLocalSingleDiskVolume(shared->tmp_path); + + for (const auto & disk : volume->getDisks()) + { + setupTmpPath(shared->log, disk->getPath()); + } + + shared->temp_data_on_disk = std::make_shared(volume, max_size); +} + +void Context::setTemporaryStoragePolicy(const String & policy_name, size_t max_size) { std::lock_guard lock(shared->storage_policies_mutex); - VolumePtr volume; - if (policy_name.empty()) - { - shared->tmp_path = path; - if (!shared->tmp_path.ends_with('/')) - shared->tmp_path += '/'; - - auto disk = std::make_shared("_tmp_default", shared->tmp_path, 0); - volume = std::make_shared("_tmp_default", disk, 0); - } - else - { - StoragePolicyPtr tmp_policy = getStoragePolicySelector(lock)->get(policy_name); - if (tmp_policy->getVolumes().size() != 1) - throw Exception(ErrorCodes::NO_ELEMENTS_IN_CONFIG, - "Policy '{}' is used temporary files, such policy should have exactly one volume", policy_name); - volume = tmp_policy->getVolume(0); - } + StoragePolicyPtr tmp_policy = getStoragePolicySelector(lock)->get(policy_name); + if (tmp_policy->getVolumes().size() != 1) + throw Exception(ErrorCodes::NO_ELEMENTS_IN_CONFIG, + "Policy '{}' is used temporary files, such policy should have exactly one volume", policy_name); + VolumePtr volume = tmp_policy->getVolume(0); if (volume->getDisks().empty()) throw Exception("No disks volume for temporary files", ErrorCodes::NO_ELEMENTS_IN_CONFIG); @@ -781,8 +827,6 @@ VolumePtr Context::setTemporaryStorage(const String & path, const String & polic /// Check that underlying disk is local (can be wrapped in decorator) DiskPtr disk_ptr = disk; - if (const auto * disk_decorator = dynamic_cast(disk_ptr.get())) - disk_ptr = disk_decorator->getNestedDisk(); if (dynamic_cast(disk_ptr.get()) == nullptr) { @@ -791,10 +835,33 @@ VolumePtr Context::setTemporaryStorage(const String & path, const String & polic "Disk '{}' ({}) is not local and can't be used for temporary files", disk_ptr->getName(), typeid(*disk_raw_ptr).name()); } + + setupTmpPath(shared->log, disk->getPath()); } shared->temp_data_on_disk = std::make_shared(volume, max_size); - return volume; +} + + +void Context::setTemporaryStorageInCache(const String & cache_disk_name, size_t max_size) +{ + auto disk_ptr = getDisk(cache_disk_name); + if (!disk_ptr) + throw Exception(ErrorCodes::NO_ELEMENTS_IN_CONFIG, "Disk '{}' is not found", cache_disk_name); + + const auto * disk_object_storage_ptr = dynamic_cast(disk_ptr.get()); + if (!disk_object_storage_ptr) + throw Exception(ErrorCodes::NO_ELEMENTS_IN_CONFIG, "Disk '{}' does not use cache", cache_disk_name); + + auto file_cache = disk_object_storage_ptr->getCache(); + if (!file_cache) + throw Exception(ErrorCodes::NO_ELEMENTS_IN_CONFIG, "Cache '{}' is not found", file_cache->getBasePath()); + + LOG_DEBUG(shared->log, "Using file cache ({}) for temporary files", file_cache->getBasePath()); + + shared->tmp_path = file_cache->getBasePath(); + VolumePtr volume = createLocalSingleDiskVolume(shared->tmp_path); + shared->temp_data_on_disk = std::make_shared(volume, file_cache.get(), max_size); } void Context::setFlagsPath(const String & path) @@ -1067,6 +1134,21 @@ std::vector Context::getEnabledProfiles() const } +ResourceManagerPtr Context::getResourceManager() const +{ + auto lock = getLock(); + if (!shared->resource_manager) + shared->resource_manager = ResourceManagerFactory::instance().get(getConfigRef().getString("resource_manager", "static")); + return shared->resource_manager; +} + +ClassifierPtr Context::getClassifier() const +{ + auto lock = getLock(); + return getResourceManager()->acquire(getSettingsRef().workload); +} + + const Scalars & Context::getScalars() const { return scalars; @@ -3753,6 +3835,8 @@ WriteSettings Context::getWriteSettings() const res.enable_filesystem_cache_on_write_operations = settings.enable_filesystem_cache_on_write_operations; res.enable_filesystem_cache_log = settings.enable_filesystem_cache_log; + res.throw_on_error_from_cache = settings.throw_on_error_from_cache_on_write_operations; + res.s3_allow_parallel_part_upload = settings.s3_allow_parallel_part_upload; res.remote_throttler = getRemoteWriteThrottler(); diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h index debda4a277a..b8e81ab9281 100644 --- a/src/Interpreters/Context.h +++ b/src/Interpreters/Context.h @@ -19,6 +19,7 @@ #include #include #include +#include #include @@ -467,7 +468,9 @@ public: void addWarningMessage(const String & msg) const; - VolumePtr setTemporaryStorage(const String & path, const String & policy_name, size_t max_size); + void setTemporaryStorageInCache(const String & cache_disk_name, size_t max_size); + void setTemporaryStoragePolicy(const String & policy_name, size_t max_size); + void setTemporaryStoragePath(const String & path, size_t max_size); using ConfigurationPtr = Poco::AutoPtr; @@ -541,6 +544,10 @@ public: std::shared_ptr getQuota() const; std::optional getQuotaUsage() const; + /// Resource management related + ResourceManagerPtr getResourceManager() const; + ClassifierPtr getClassifier() const; + /// We have to copy external tables inside executeQuery() to track limits. Therefore, set callback for it. Must set once. void setExternalTablesInitializer(ExternalTablesInitializer && initializer); /// This method is called in executeQuery() and will call the external tables initializer. diff --git a/src/Interpreters/CrossToInnerJoinVisitor.cpp b/src/Interpreters/CrossToInnerJoinVisitor.cpp index bafa63e767f..09aebf874be 100644 --- a/src/Interpreters/CrossToInnerJoinVisitor.cpp +++ b/src/Interpreters/CrossToInnerJoinVisitor.cpp @@ -149,7 +149,7 @@ ASTPtr makeOnExpression(const std::vector & expressions) if (expressions.size() == 1) return expressions[0]->clone(); - std::vector arguments; + ASTs arguments; arguments.reserve(expressions.size()); for (const auto & ast : expressions) arguments.emplace_back(ast->clone()); diff --git a/src/Interpreters/DDLTask.cpp b/src/Interpreters/DDLTask.cpp index 1c551dc89e0..0425b3de99b 100644 --- a/src/Interpreters/DDLTask.cpp +++ b/src/Interpreters/DDLTask.cpp @@ -29,6 +29,7 @@ namespace ErrorCodes extern const int DNS_ERROR; } + HostID HostID::fromString(const String & host_port_str) { HostID res; @@ -36,6 +37,7 @@ HostID HostID::fromString(const String & host_port_str) return res; } + bool HostID::isLocalAddress(UInt16 clickhouse_port) const { try diff --git a/src/Interpreters/DDLWorker.cpp b/src/Interpreters/DDLWorker.cpp index 2e1918e1a37..d427e97828b 100644 --- a/src/Interpreters/DDLWorker.cpp +++ b/src/Interpreters/DDLWorker.cpp @@ -248,6 +248,7 @@ void DDLWorker::scheduleTasks(bool reinitialized) LOG_TRACE(log, "Don't have unfinished tasks after restarting"); else LOG_INFO(log, "Have {} unfinished tasks, will check them", current_tasks.size()); + assert(current_tasks.size() <= pool_size + (worker_pool != nullptr)); auto task_it = current_tasks.begin(); while (task_it != current_tasks.end()) @@ -279,7 +280,9 @@ void DDLWorker::scheduleTasks(bool reinitialized) task->completely_processed = true; } else + { processTask(*task, zookeeper); + } ++task_it; } else @@ -291,6 +294,7 @@ void DDLWorker::scheduleTasks(bool reinitialized) /// of log entry number (with leading zeros). if (!first_failed_task_name || task->entry_name < *first_failed_task_name) first_failed_task_name = task->entry_name; + task_it = current_tasks.erase(task_it); } } @@ -416,18 +420,24 @@ void DDLWorker::scheduleTasks(bool reinitialized) DDLTaskBase & DDLWorker::saveTask(DDLTaskPtr && task) { current_tasks.remove_if([](const DDLTaskPtr & t) { return t->completely_processed.load(); }); + /// Tasks are scheduled and executed in main thread <==> Parallel execution is disabled assert((worker_pool != nullptr) == (1 < pool_size)); + /// Parallel execution is disabled ==> All previous tasks are failed to start or finished, /// so current tasks list must be empty when we are ready to process new one. assert(worker_pool || current_tasks.empty()); + /// Parallel execution is enabled ==> Not more than pool_size tasks are currently executing. /// Note: If current_tasks.size() == pool_size, then all worker threads are busy, /// so we will wait on worker_pool->scheduleOrThrowOnError(...) assert(!worker_pool || current_tasks.size() <= pool_size); + current_tasks.emplace_back(std::move(task)); + if (first_failed_task_name && *first_failed_task_name == current_tasks.back()->entry_name) first_failed_task_name.reset(); + return *current_tasks.back(); } @@ -660,8 +670,8 @@ void DDLWorker::processTask(DDLTaskBase & task, const ZooKeeperPtr & zookeeper) active_node->setAlreadyRemoved(); task.createSyncedNodeIfNeed(zookeeper); - task.completely_processed = true; updateMaxDDLEntryID(task.entry_name); + task.completely_processed = true; } @@ -748,13 +758,13 @@ bool DDLWorker::tryExecuteQueryOnLeaderReplica( /// but DDL worker can continue processing other queries. while (stopwatch.elapsedSeconds() <= MAX_EXECUTION_TIMEOUT_SEC) { - StorageReplicatedMergeTree::Status status; + ReplicatedTableStatus status; // Has to get with zk fields to get active replicas field replicated_storage->getStatus(status, true); // Should return as soon as possible if the table is dropped. - bool replica_dropped = replicated_storage->is_dropped; - bool all_replicas_likely_detached = status.active_replicas == 0 && !DatabaseCatalog::instance().isTableExist(replicated_storage->getStorageID(), context); + bool replica_dropped = storage->is_dropped; + bool all_replicas_likely_detached = status.active_replicas == 0 && !DatabaseCatalog::instance().isTableExist(storage->getStorageID(), context); if (replica_dropped || all_replicas_likely_detached) { LOG_WARNING(log, ", task {} will not be executed.", task.entry_name); diff --git a/src/Interpreters/ExpressionActions.cpp b/src/Interpreters/ExpressionActions.cpp index 9b38072b5af..d89be9f3e2e 100644 --- a/src/Interpreters/ExpressionActions.cpp +++ b/src/Interpreters/ExpressionActions.cpp @@ -620,9 +620,9 @@ static void executeAction(const ExpressionActions::Action & action, ExecutionCon array_join_key.column = array_join_key.column->convertToFullColumnIfConst(); - const ColumnArray * array = typeid_cast(array_join_key.column.get()); + const auto * array = getArrayJoinColumnRawPtr(array_join_key.column); if (!array) - throw Exception("ARRAY JOIN of not array: " + action.node->result_name, ErrorCodes::TYPE_MISMATCH); + throw Exception("ARRAY JOIN of not array nor map: " + action.node->result_name, ErrorCodes::TYPE_MISMATCH); for (auto & column : columns) if (column.column) @@ -635,7 +635,7 @@ static void executeAction(const ExpressionActions::Action & action, ExecutionCon auto & res_column = columns[action.result_position]; res_column.column = array->getDataPtr(); - res_column.type = assert_cast(*array_join_key.type).getNestedType(); + res_column.type = getArrayJoinDataType(array_join_key.type)->getNestedType(); res_column.name = action.node->result_name; num_rows = res_column.column->size(); @@ -1008,7 +1008,7 @@ ExpressionActionsChain::ArrayJoinStep::ArrayJoinStep(ArrayJoinActionPtr array_jo if (array_join->columns.contains(column.name)) { - const auto * array = typeid_cast(column.type.get()); + const auto & array = getArrayJoinDataType(column.type); column.type = array->getNestedType(); /// Arrays are materialized column.column = nullptr; diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp index 22229c0d6c2..a3db464fbbb 100644 --- a/src/Interpreters/ExpressionAnalyzer.cpp +++ b/src/Interpreters/ExpressionAnalyzer.cpp @@ -15,6 +15,7 @@ #include #include +#include #include #include #include @@ -33,6 +34,7 @@ #include #include +#include #include #include @@ -423,7 +425,7 @@ void ExpressionAnalyzer::analyzeAggregation(ActionsDAGPtr & temp_actions) aggregated_columns = temp_actions->getNamesAndTypesList(); for (const auto & desc : aggregate_descriptions) - aggregated_columns.emplace_back(desc.column_name, desc.function->getReturnType()); + aggregated_columns.emplace_back(desc.column_name, desc.function->getResultType()); } @@ -1831,7 +1833,7 @@ ExpressionAnalysisResult::ExpressionAnalysisResult( ssize_t where_step_num = -1; ssize_t having_step_num = -1; - auto finalize_chain = [&](ExpressionActionsChain & chain) + auto finalize_chain = [&](ExpressionActionsChain & chain) -> ColumnsWithTypeAndName { if (prewhere_step_num >= 0) { @@ -1852,7 +1854,9 @@ ExpressionAnalysisResult::ExpressionAnalysisResult( finalize(chain, prewhere_step_num, where_step_num, having_step_num, query); + auto res = chain.getLastStep().getResultColumns(); chain.clear(); + return res; }; { @@ -1970,7 +1974,55 @@ ExpressionAnalysisResult::ExpressionAnalysisResult( if (settings.group_by_use_nulls) query_analyzer.appendGroupByModifiers(before_aggregation, chain, only_types); - finalize_chain(chain); + auto columns_before_aggregation = finalize_chain(chain); + + /// Here we want to check that columns after aggregation have the same type as + /// were promised in query_analyzer.aggregated_columns + /// Ideally, they should be equal. In practice, this may be not true. + /// As an example, we don't build sets for IN inside ExpressionAnalysis::analyzeAggregation, + /// so that constant folding for expression (1 in 1) will not work. This may change the return type + /// for functions with LowCardinality argument: function "substr(toLowCardinality('abc'), 1 IN 1)" + /// should usually return LowCardinality(String) when (1 IN 1) is constant, but without built set + /// for (1 IN 1) constant is not propagated and "substr" returns String type. + /// See 02503_in_lc_const_args_bug.sql + /// + /// As a temporary solution, we add converting actions to the next chain. + /// Hopefully, later we can + /// * use a new analyzer where this issue is absent + /// * or remove ExpressionActionsChain completely and re-implement its logic on top of the query plan + { + for (auto & col : columns_before_aggregation) + if (!col.column) + col.column = col.type->createColumn(); + + Block header_before_aggregation(std::move(columns_before_aggregation)); + + auto keys = query_analyzer.aggregationKeys().getNames(); + const auto & aggregates = query_analyzer.aggregates(); + + bool has_grouping = query_analyzer.group_by_kind != GroupByKind::ORDINARY; + auto actual_header = Aggregator::Params::getHeader( + header_before_aggregation, /*only_merge*/ false, keys, aggregates, /*final*/ true); + actual_header = AggregatingStep::appendGroupingColumn( + std::move(actual_header), keys, has_grouping, settings.group_by_use_nulls); + + Block expected_header; + for (const auto & expected : query_analyzer.aggregated_columns) + expected_header.insert(ColumnWithTypeAndName(expected.type, expected.name)); + + if (!blocksHaveEqualStructure(actual_header, expected_header)) + { + auto converting = ActionsDAG::makeConvertingActions( + actual_header.getColumnsWithTypeAndName(), + expected_header.getColumnsWithTypeAndName(), + ActionsDAG::MatchColumnsMode::Name, + true); + + auto & step = chain.lastStep(query_analyzer.aggregated_columns); + auto & actions = step.actions(); + actions = ActionsDAG::merge(std::move(*actions), std::move(*converting)); + } + } if (query_analyzer.appendHaving(chain, only_types || !second_stage)) { @@ -2022,7 +2074,7 @@ ExpressionAnalysisResult::ExpressionAnalysisResult( for (const auto & f : w.window_functions) { query_analyzer.columns_after_window.push_back( - {f.column_name, f.aggregate_function->getReturnType()}); + {f.column_name, f.aggregate_function->getResultType()}); } } diff --git a/src/Interpreters/ExpressionJIT.cpp b/src/Interpreters/ExpressionJIT.cpp index 3a2c2e333a9..dfc88e97052 100644 --- a/src/Interpreters/ExpressionJIT.cpp +++ b/src/Interpreters/ExpressionJIT.cpp @@ -263,7 +263,7 @@ public: return result; } - static void applyFunction(IFunctionBase & function, Field & value) + static void applyFunction(const IFunctionBase & function, Field & value) { const auto & type = function.getArgumentTypes().at(0); ColumnsWithTypeAndName args{{type->createColumnConst(1, value), type, "x" }}; @@ -338,7 +338,7 @@ static bool isCompilableFunction(const ActionsDAG::Node & node, const std::unord if (node.type != ActionsDAG::ActionType::FUNCTION) return false; - auto & function = *node.function_base; + const auto & function = *node.function_base; IFunction::ShortCircuitSettings settings; if (function.isShortCircuit(settings, node.children.size())) diff --git a/src/Interpreters/GraceHashJoin.cpp b/src/Interpreters/GraceHashJoin.cpp index 5ef27613591..b8c6c639e82 100644 --- a/src/Interpreters/GraceHashJoin.cpp +++ b/src/Interpreters/GraceHashJoin.cpp @@ -236,22 +236,32 @@ private: Poco::Logger * log; }; - -static void flushBlocksToBuckets(Blocks & blocks, const GraceHashJoin::Buckets & buckets_snapshot) +namespace { - assert(blocks.size() == buckets_snapshot.size()); +template +void flushBlocksToBuckets(Blocks & blocks, const GraceHashJoin::Buckets & buckets) +{ + chassert(blocks.size() == buckets.size()); retryForEach( - generateRandomPermutation(1, buckets_snapshot.size()), + generateRandomPermutation(1, buckets.size()), // skipping 0 block, since we join it in memory w/o spilling on disk [&](size_t i) { if (!blocks[i].rows()) return true; - bool flushed = buckets_snapshot[i]->tryAddRightBlock(blocks[i]); + + bool flushed = false; + if constexpr (table_side == JoinTableSide::Left) + flushed = buckets[i]->tryAddLeftBlock(blocks[i]); + if constexpr (table_side == JoinTableSide::Right) + flushed = buckets[i]->tryAddRightBlock(blocks[i]); + if (flushed) blocks[i].clear(); + return flushed; }); } +} GraceHashJoin::GraceHashJoin( ContextPtr context_, std::shared_ptr table_join_, @@ -274,7 +284,6 @@ GraceHashJoin::GraceHashJoin( { if (!GraceHashJoin::isSupported(table_join)) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "GraceHashJoin is not supported for this join type"); - } void GraceHashJoin::initBuckets() @@ -382,8 +391,9 @@ void GraceHashJoin::joinBlock(Block & block, std::shared_ptr & not_p materializeBlockInplace(block); - Buckets buckets_snapshot = getCurrentBuckets(); - size_t num_buckets = buckets_snapshot.size(); + /// number of buckets doesn't change after right table is split to buckets, i.e. read-only access to buckets + /// so, no need to copy buckets here + size_t num_buckets = getNumBuckets(); Blocks blocks = JoinCommon::scatterBlockByHash(left_key_names, block, num_buckets); block = std::move(blocks[current_bucket->idx]); @@ -392,15 +402,7 @@ void GraceHashJoin::joinBlock(Block & block, std::shared_ptr & not_p if (not_processed) throw Exception(ErrorCodes::LOGICAL_ERROR, "Unhandled not processed block in GraceHashJoin"); - // We need to skip the first bucket that is already joined in memory, so we start with 1. - retryForEach( - generateRandomPermutation(1, num_buckets), - [&blocks, &buckets_snapshot](size_t idx) - { - if (blocks[idx].rows() == 0) - return true; - return buckets_snapshot[idx]->tryAddLeftBlock(blocks[idx]); - }); + flushBlocksToBuckets(blocks, buckets); } void GraceHashJoin::setTotals(const Block & block) @@ -428,9 +430,11 @@ bool GraceHashJoin::alwaysReturnsEmptySet() const if (!isInnerOrRight(table_join->kind())) return false; - std::shared_lock lock(rehash_mutex); - - bool file_buckets_are_empty = std::all_of(buckets.begin(), buckets.end(), [](const auto & bucket) { return bucket->empty(); }); + bool file_buckets_are_empty = [this]() + { + std::shared_lock lock(rehash_mutex); + return std::all_of(buckets.begin(), buckets.end(), [](const auto & bucket) { return bucket->empty(); }); + }(); bool hash_join_is_empty = hash_join && hash_join->alwaysReturnsEmptySet(); return hash_join_is_empty && file_buckets_are_empty; @@ -610,7 +614,7 @@ void GraceHashJoin::addJoinedBlockImpl(Block block) blocks[bucket_index].clear(); } - flushBlocksToBuckets(blocks, buckets_snapshot); + flushBlocksToBuckets(blocks, buckets_snapshot); } size_t GraceHashJoin::getNumBuckets() const diff --git a/src/Interpreters/HashJoin.cpp b/src/Interpreters/HashJoin.cpp index 9fd577318f8..dc041094381 100644 --- a/src/Interpreters/HashJoin.cpp +++ b/src/Interpreters/HashJoin.cpp @@ -178,7 +178,7 @@ namespace JoinStuff } } -static ColumnWithTypeAndName correctNullability(ColumnWithTypeAndName && column, bool nullable) +static void correctNullabilityInplace(ColumnWithTypeAndName & column, bool nullable) { if (nullable) { @@ -193,11 +193,9 @@ static ColumnWithTypeAndName correctNullability(ColumnWithTypeAndName && column, JoinCommon::removeColumnNullability(column); } - - return std::move(column); } -static ColumnWithTypeAndName correctNullability(ColumnWithTypeAndName && column, bool nullable, const ColumnUInt8 & negative_null_map) +static void correctNullabilityInplace(ColumnWithTypeAndName & column, bool nullable, const ColumnUInt8 & negative_null_map) { if (nullable) { @@ -211,8 +209,6 @@ static ColumnWithTypeAndName correctNullability(ColumnWithTypeAndName && column, } else JoinCommon::removeColumnNullability(column); - - return std::move(column); } HashJoin::HashJoin(std::shared_ptr table_join_, const Block & right_sample_block_, bool any_take_last_row_) @@ -1475,7 +1471,7 @@ void HashJoin::joinBlockImpl( ColumnWithTypeAndName right_col(col.column, col.type, right_col_name); if (right_col.type->lowCardinality() != right_key.type->lowCardinality()) JoinCommon::changeLowCardinalityInplace(right_col); - right_col = correctNullability(std::move(right_col), is_nullable); + correctNullabilityInplace(right_col, is_nullable); block.insert(std::move(right_col)); } } @@ -1509,7 +1505,7 @@ void HashJoin::joinBlockImpl( ColumnWithTypeAndName right_col(thin_column, col.type, right_col_name); if (right_col.type->lowCardinality() != right_key.type->lowCardinality()) JoinCommon::changeLowCardinalityInplace(right_col); - right_col = correctNullability(std::move(right_col), is_nullable, null_map_filter); + correctNullabilityInplace(right_col, is_nullable, null_map_filter); block.insert(std::move(right_col)); if constexpr (jf.need_replication) @@ -2020,7 +2016,8 @@ BlocksList HashJoin::releaseJoinedBlocks() for (size_t i = 0; i < positions.size(); ++i) { auto & column = saved_block.getByPosition(positions[i]); - restored_block.insert(correctNullability(std::move(column), is_nullable[i])); + correctNullabilityInplace(column, is_nullable[i]); + restored_block.insert(column); } restored_blocks.emplace_back(std::move(restored_block)); } @@ -2028,7 +2025,6 @@ BlocksList HashJoin::releaseJoinedBlocks() return restored_blocks; } - const ColumnWithTypeAndName & HashJoin::rightAsofKeyColumn() const { /// It should be nullable when right side is nullable diff --git a/src/Interpreters/IdentifierSemantic.cpp b/src/Interpreters/IdentifierSemantic.cpp index d3750e98b8c..0aa70057794 100644 --- a/src/Interpreters/IdentifierSemantic.cpp +++ b/src/Interpreters/IdentifierSemantic.cpp @@ -348,7 +348,7 @@ void splitConjunctionsAst(const ASTPtr & node, ASTs & result) ASTs splitConjunctionsAst(const ASTPtr & node) { - std::vector result; + ASTs result; splitConjunctionsAst(node, result); return result; } diff --git a/src/Interpreters/InterpreterAlterQuery.cpp b/src/Interpreters/InterpreterAlterQuery.cpp index 22edac051a5..14628f34111 100644 --- a/src/Interpreters/InterpreterAlterQuery.cpp +++ b/src/Interpreters/InterpreterAlterQuery.cpp @@ -143,7 +143,7 @@ BlockIO InterpreterAlterQuery::executeToTable(const ASTAlterQuery & alter) "to execute ALTERs of different types in single query"); } - if (!mutation_commands.empty()) + if (mutation_commands.hasNonEmptyMutationCommands()) { table->checkMutationIsPossible(mutation_commands, getContext()->getSettingsRef()); MutationsInterpreter(table, metadata_snapshot, mutation_commands, getContext(), false).validate(); diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index 50536b66185..c359b24df35 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -227,7 +227,7 @@ BlockIO InterpreterCreateQuery::createDatabase(ASTCreateQuery & create) if ((create.storage->engine->name == "MaterializeMySQL" || create.storage->engine->name == "MaterializedMySQL") && !getContext()->getSettingsRef().allow_experimental_database_materialized_mysql - && !internal) + && !internal && !create.attach) { throw Exception("MaterializedMySQL is an experimental database engine. " "Enable allow_experimental_database_materialized_mysql to use it.", ErrorCodes::UNKNOWN_DATABASE_ENGINE); @@ -235,7 +235,7 @@ BlockIO InterpreterCreateQuery::createDatabase(ASTCreateQuery & create) if (create.storage->engine->name == "Replicated" && !getContext()->getSettingsRef().allow_experimental_database_replicated - && !internal) + && !internal && !create.attach) { throw Exception("Replicated is an experimental database engine. " "Enable allow_experimental_database_replicated to use it.", ErrorCodes::UNKNOWN_DATABASE_ENGINE); @@ -243,7 +243,7 @@ BlockIO InterpreterCreateQuery::createDatabase(ASTCreateQuery & create) if (create.storage->engine->name == "MaterializedPostgreSQL" && !getContext()->getSettingsRef().allow_experimental_database_materialized_postgresql - && !internal) + && !internal && !create.attach) { throw Exception("MaterializedPostgreSQL is an experimental database engine. " "Enable allow_experimental_database_materialized_postgresql to use it.", ErrorCodes::UNKNOWN_DATABASE_ENGINE); @@ -404,6 +404,8 @@ ASTPtr InterpreterCreateQuery::formatColumns(const ColumnsDescription & columns) column_declaration->children.push_back(column_declaration->default_expression); } + column_declaration->ephemeral_default = column.default_desc.ephemeral_default; + if (!column.comment.empty()) { column_declaration->comment = std::make_shared(Field(column.comment)); @@ -540,11 +542,7 @@ ColumnsDescription InterpreterCreateQuery::getColumnsDescription( final_column_name)); default_expr_list->children.emplace_back( - setAlias( - col_decl.default_specifier == "EPHEMERAL" ? /// can be ASTLiteral::value NULL - std::make_shared(data_type_ptr->getDefault()) : - col_decl.default_expression->clone(), - tmp_column_name)); + setAlias(col_decl.default_expression->clone(), tmp_column_name)); } else default_expr_list->children.emplace_back(setAlias(col_decl.default_expression->clone(), col_decl.name)); @@ -561,7 +559,7 @@ ColumnsDescription InterpreterCreateQuery::getColumnsDescription( ColumnsDescription res; auto name_type_it = column_names_and_types.begin(); - for (auto ast_it = columns_ast.children.begin(); ast_it != columns_ast.children.end(); ++ast_it, ++name_type_it) + for (const auto * ast_it = columns_ast.children.begin(); ast_it != columns_ast.children.end(); ++ast_it, ++name_type_it) { ColumnDescription column; @@ -590,10 +588,7 @@ ColumnsDescription InterpreterCreateQuery::getColumnsDescription( visitor.visit(col_decl.default_expression); } - ASTPtr default_expr = - col_decl.default_specifier == "EPHEMERAL" && col_decl.default_expression->as()->value.isNull() ? - std::make_shared(DataTypeFactory::instance().get(col_decl.type)->getDefault()) : - col_decl.default_expression->clone(); + ASTPtr default_expr = col_decl.default_expression->clone(); if (col_decl.type) column.type = name_type_it->type; @@ -607,6 +602,7 @@ ColumnsDescription InterpreterCreateQuery::getColumnsDescription( column.default_desc.kind = columnDefaultKindFromString(col_decl.default_specifier); column.default_desc.expression = default_expr; + column.default_desc.ephemeral_default = col_decl.ephemeral_default; } else if (col_decl.type) column.type = name_type_it->type; diff --git a/src/Interpreters/InterpreterDeleteQuery.cpp b/src/Interpreters/InterpreterDeleteQuery.cpp index b5b8ae81366..63dad10ebd6 100644 --- a/src/Interpreters/InterpreterDeleteQuery.cpp +++ b/src/Interpreters/InterpreterDeleteQuery.cpp @@ -23,6 +23,7 @@ namespace ErrorCodes { extern const int TABLE_IS_READ_ONLY; extern const int SUPPORT_IS_DISABLED; + extern const int BAD_ARGUMENTS; } @@ -58,8 +59,7 @@ BlockIO InterpreterDeleteQuery::execute() auto table_lock = table->lockForShare(getContext()->getCurrentQueryId(), getContext()->getSettingsRef().lock_acquire_timeout); auto metadata_snapshot = table->getInMemoryMetadataPtr(); - auto merge_tree = std::dynamic_pointer_cast(table); - if (!merge_tree) + if (table->supportsDelete()) { /// Convert to MutationCommand MutationCommands mutation_commands; @@ -75,39 +75,45 @@ BlockIO InterpreterDeleteQuery::execute() table->mutate(mutation_commands, getContext()); return {}; } + else if (table->supportsLightweightDelete()) + { + if (!getContext()->getSettingsRef().allow_experimental_lightweight_delete) + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Lightweight delete mutate is experimental. Set `allow_experimental_lightweight_delete` setting to enable it"); - if (!getContext()->getSettingsRef().allow_experimental_lightweight_delete) - throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Lightweight delete mutate is experimental. Set `allow_experimental_lightweight_delete` setting to enable it"); + /// Convert to MutationCommand + MutationCommands mutation_commands; + MutationCommand mut_command; - /// Convert to MutationCommand - MutationCommands mutation_commands; - MutationCommand mut_command; + /// Build "UPDATE _row_exists = 0 WHERE predicate" query + mut_command.type = MutationCommand::Type::UPDATE; + mut_command.predicate = delete_query.predicate; - /// Build "UPDATE _row_exists = 0 WHERE predicate" query - mut_command.type = MutationCommand::Type::UPDATE; - mut_command.predicate = delete_query.predicate; + auto command = std::make_shared(); + command->type = ASTAlterCommand::UPDATE; + command->predicate = delete_query.predicate; + command->update_assignments = std::make_shared(); + auto set_row_does_not_exist = std::make_shared(); + set_row_does_not_exist->column_name = LightweightDeleteDescription::FILTER_COLUMN.name; + auto zero_value = std::make_shared(DB::Field(UInt8(0))); + set_row_does_not_exist->children.push_back(zero_value); + command->update_assignments->children.push_back(set_row_does_not_exist); + command->children.push_back(command->predicate); + command->children.push_back(command->update_assignments); + mut_command.column_to_update_expression[set_row_does_not_exist->column_name] = zero_value; + mut_command.ast = command->ptr(); - auto command = std::make_shared(); - command->type = ASTAlterCommand::UPDATE; - command->predicate = delete_query.predicate; - command->update_assignments = std::make_shared(); - auto set_row_does_not_exist = std::make_shared(); - set_row_does_not_exist->column_name = LightweightDeleteDescription::FILTER_COLUMN.name; - auto zero_value = std::make_shared(DB::Field(UInt8(0))); - set_row_does_not_exist->children.push_back(zero_value); - command->update_assignments->children.push_back(set_row_does_not_exist); - command->children.push_back(command->predicate); - command->children.push_back(command->update_assignments); - mut_command.column_to_update_expression[set_row_does_not_exist->column_name] = zero_value; - mut_command.ast = command->ptr(); + mutation_commands.emplace_back(mut_command); - mutation_commands.emplace_back(mut_command); + table->checkMutationIsPossible(mutation_commands, getContext()->getSettingsRef()); + MutationsInterpreter(table, metadata_snapshot, mutation_commands, getContext(), false).validate(); + table->mutate(mutation_commands, getContext()); - table->checkMutationIsPossible(mutation_commands, getContext()->getSettingsRef()); - MutationsInterpreter(table, metadata_snapshot, mutation_commands, getContext(), false).validate(); - table->mutate(mutation_commands, getContext()); - - return {}; + return {}; + } + else + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "DELETE query is not supported for table {}", table->getStorageID().getFullTableName()); + } } } diff --git a/src/Interpreters/InterpreterSystemQuery.cpp b/src/Interpreters/InterpreterSystemQuery.cpp index a5b5eb8d095..11c08d0fb7f 100644 --- a/src/Interpreters/InterpreterSystemQuery.cpp +++ b/src/Interpreters/InterpreterSystemQuery.cpp @@ -42,7 +42,6 @@ #include #include #include -#include #include #include #include @@ -62,6 +61,7 @@ namespace DB { + namespace ErrorCodes { extern const int LOGICAL_ERROR; @@ -487,6 +487,9 @@ BlockIO InterpreterSystemQuery::execute() case Type::DROP_REPLICA: dropReplica(query); break; + case Type::DROP_DATABASE_REPLICA: + dropDatabaseReplica(query); + break; case Type::SYNC_REPLICA: syncReplica(query); break; @@ -510,7 +513,6 @@ BlockIO InterpreterSystemQuery::execute() break; case Type::RESTART_DISK: restartDisk(query.disk); - break; case Type::FLUSH_LOGS: { getContext()->checkAccess(AccessType::SYSTEM_FLUSH_LOGS); @@ -727,7 +729,7 @@ void InterpreterSystemQuery::dropReplica(ASTSystemQuery & query) { if (auto * storage_replicated = dynamic_cast(iterator->table().get())) { - StorageReplicatedMergeTree::Status status; + ReplicatedTableStatus status; storage_replicated->getStatus(status); if (status.zookeeper_path == query.replica_zk_path) throw Exception("There is a local table " + storage_replicated->getStorageID().getNameForLogs() + @@ -763,7 +765,7 @@ bool InterpreterSystemQuery::dropReplicaImpl(ASTSystemQuery & query, const Stora if (!storage_replicated) return false; - StorageReplicatedMergeTree::Status status; + ReplicatedTableStatus status; auto zookeeper = getContext()->getZooKeeper(); storage_replicated->getStatus(status); @@ -785,6 +787,75 @@ bool InterpreterSystemQuery::dropReplicaImpl(ASTSystemQuery & query, const Stora return true; } +void InterpreterSystemQuery::dropDatabaseReplica(ASTSystemQuery & query) +{ + if (query.replica.empty()) + throw Exception("Replica name is empty", ErrorCodes::BAD_ARGUMENTS); + + auto check_not_local_replica = [](const DatabaseReplicated * replicated, const ASTSystemQuery & query) + { + if (!query.replica_zk_path.empty() && fs::path(replicated->getZooKeeperPath()) != fs::path(query.replica_zk_path)) + return; + if (replicated->getFullReplicaName() != query.replica) + return; + + throw Exception(ErrorCodes::TABLE_WAS_NOT_DROPPED, "There is a local database {}, which has the same path in ZooKeeper " + "and the same replica name. Please check the path in query. " + "If you want to drop replica of this database, use `DROP DATABASE`", replicated->getDatabaseName()); + }; + + if (query.database) + { + getContext()->checkAccess(AccessType::SYSTEM_DROP_REPLICA, query.getDatabase()); + DatabasePtr database = DatabaseCatalog::instance().getDatabase(query.getDatabase()); + if (auto * replicated = dynamic_cast(database.get())) + { + check_not_local_replica(replicated, query); + DatabaseReplicated::dropReplica(replicated, replicated->getZooKeeperPath(), query.replica); + } + else + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Database {} is not Replicated, cannot drop replica", query.getDatabase()); + LOG_TRACE(log, "Dropped replica {} of Replicated database {}", query.replica, backQuoteIfNeed(database->getDatabaseName())); + } + else if (query.is_drop_whole_replica) + { + auto databases = DatabaseCatalog::instance().getDatabases(); + auto access = getContext()->getAccess(); + bool access_is_granted_globally = access->isGranted(AccessType::SYSTEM_DROP_REPLICA); + + for (auto & elem : databases) + { + DatabasePtr & database = elem.second; + auto * replicated = dynamic_cast(database.get()); + if (!replicated) + continue; + if (!access_is_granted_globally && !access->isGranted(AccessType::SYSTEM_DROP_REPLICA, elem.first)) + { + LOG_INFO(log, "Access {} denied, skipping database {}", "SYSTEM DROP REPLICA", elem.first); + continue; + } + + check_not_local_replica(replicated, query); + DatabaseReplicated::dropReplica(replicated, replicated->getZooKeeperPath(), query.replica); + LOG_TRACE(log, "Dropped replica {} of Replicated database {}", query.replica, backQuoteIfNeed(database->getDatabaseName())); + } + } + else if (!query.replica_zk_path.empty()) + { + getContext()->checkAccess(AccessType::SYSTEM_DROP_REPLICA); + + /// This check is actually redundant, but it may prevent from some user mistakes + for (auto & elem : DatabaseCatalog::instance().getDatabases()) + if (auto * replicated = dynamic_cast(elem.second.get())) + check_not_local_replica(replicated, query); + + DatabaseReplicated::dropReplica(nullptr, query.replica_zk_path, query.replica); + LOG_INFO(log, "Dropped replica {} of Replicated database with path {}", query.replica, query.replica_zk_path); + } + else + throw Exception("Invalid query", ErrorCodes::LOGICAL_ERROR); +} + void InterpreterSystemQuery::syncReplica(ASTSystemQuery &) { getContext()->checkAccess(AccessType::SYSTEM_SYNC_REPLICA, table_id); @@ -844,16 +915,10 @@ void InterpreterSystemQuery::flushDistributed(ASTSystemQuery &) throw Exception("Table " + table_id.getNameForLogs() + " is not distributed", ErrorCodes::BAD_ARGUMENTS); } -void InterpreterSystemQuery::restartDisk(String & name) +[[noreturn]] void InterpreterSystemQuery::restartDisk(String &) { getContext()->checkAccess(AccessType::SYSTEM_RESTART_DISK); - - auto disk = getContext()->getDisk(name); - - if (DiskRestartProxy * restart_proxy = dynamic_cast(disk.get())) - restart_proxy->restart(getContext()); - else - throw Exception("Disk " + name + " doesn't have possibility to restart", ErrorCodes::BAD_ARGUMENTS); + throw Exception("SYSTEM RESTART DISK is not supported", ErrorCodes::NOT_IMPLEMENTED); } @@ -986,6 +1051,7 @@ AccessRightsElements InterpreterSystemQuery::getRequiredAccessForDDLOnCluster() break; } case Type::DROP_REPLICA: + case Type::DROP_DATABASE_REPLICA: { required_access.emplace_back(AccessType::SYSTEM_DROP_REPLICA, query.getDatabase(), query.getTable()); break; diff --git a/src/Interpreters/InterpreterSystemQuery.h b/src/Interpreters/InterpreterSystemQuery.h index af8734e8237..0058d0c9def 100644 --- a/src/Interpreters/InterpreterSystemQuery.h +++ b/src/Interpreters/InterpreterSystemQuery.h @@ -66,8 +66,9 @@ private: void dropReplica(ASTSystemQuery & query); bool dropReplicaImpl(ASTSystemQuery & query, const StoragePtr & table); + void dropDatabaseReplica(ASTSystemQuery & query); void flushDistributed(ASTSystemQuery & query); - void restartDisk(String & name); + [[noreturn]] void restartDisk(String & name); AccessRightsElements getRequiredAccessForDDLOnCluster() const; void startStopAction(StorageActionBlockType action_type, bool start); diff --git a/src/Interpreters/JIT/compileFunction.cpp b/src/Interpreters/JIT/compileFunction.cpp index e12b4894eb0..8bf0eb25b60 100644 --- a/src/Interpreters/JIT/compileFunction.cpp +++ b/src/Interpreters/JIT/compileFunction.cpp @@ -403,7 +403,7 @@ static void compileInsertAggregatesIntoResultColumns(llvm::Module & module, cons std::vector columns(functions.size()); for (size_t i = 0; i < functions.size(); ++i) { - auto return_type = functions[i].function->getReturnType(); + auto return_type = functions[i].function->getResultType(); auto * data = b.CreateLoad(column_type, b.CreateConstInBoundsGEP1_64(column_type, columns_arg, i)); auto * column_data_type = toNativeType(b, removeNullable(return_type)); diff --git a/src/Interpreters/JoinToSubqueryTransformVisitor.cpp b/src/Interpreters/JoinToSubqueryTransformVisitor.cpp index 10b122364f9..15d12de527d 100644 --- a/src/Interpreters/JoinToSubqueryTransformVisitor.cpp +++ b/src/Interpreters/JoinToSubqueryTransformVisitor.cpp @@ -153,7 +153,7 @@ private: data.addTableColumns(identifier.name(), columns); // QualifiedAsterisk's transformers start to appear at child 1 - for (auto it = qualified_asterisk->children.begin() + 1; it != qualified_asterisk->children.end(); ++it) + for (const auto * it = qualified_asterisk->children.begin() + 1; it != qualified_asterisk->children.end(); ++it) { IASTColumnsTransformer::transform(*it, columns); } @@ -209,7 +209,7 @@ struct RewriteTablesVisitorData { if (done) return; - std::vector new_tables{left, right}; + ASTs new_tables{left, right}; ast->children.swap(new_tables); done = true; } diff --git a/src/Interpreters/LogicalExpressionsOptimizer.cpp b/src/Interpreters/LogicalExpressionsOptimizer.cpp index 9e30cac2e19..35989f0dfba 100644 --- a/src/Interpreters/LogicalExpressionsOptimizer.cpp +++ b/src/Interpreters/LogicalExpressionsOptimizer.cpp @@ -313,7 +313,7 @@ void LogicalExpressionsOptimizer::cleanupOrExpressions() for (const auto & entry : garbage_map) { const auto * function = entry.first; - auto first_erased = entry.second; + auto * first_erased = entry.second; auto & operands = getFunctionOperands(function); operands.erase(first_erased, operands.end()); diff --git a/src/Interpreters/MutationsInterpreter.cpp b/src/Interpreters/MutationsInterpreter.cpp index 26b8bce1f4a..1578e454049 100644 --- a/src/Interpreters/MutationsInterpreter.cpp +++ b/src/Interpreters/MutationsInterpreter.cpp @@ -220,8 +220,13 @@ bool isStorageTouchedByMutations( if (all_commands_can_be_skipped) return false; + /// We must read with one thread because it guarantees that + /// output stream will be sorted after reading from MergeTree parts. + /// Disable all settings that can enable reading with several streams. context_copy->setSetting("max_streams_to_max_threads_ratio", 1); context_copy->setSetting("max_threads", 1); + context_copy->setSetting("allow_asynchronous_read_from_io_pool_for_merge_tree", false); + context_copy->setSetting("max_streams_for_merge_tree_reading", Field(0)); ASTPtr select_query = prepareQueryAffectedAST(commands, storage, context_copy); diff --git a/src/Interpreters/MySQL/InterpretersMySQLDDLQuery.cpp b/src/Interpreters/MySQL/InterpretersMySQLDDLQuery.cpp index 6989940323c..70773e2fffb 100644 --- a/src/Interpreters/MySQL/InterpretersMySQLDDLQuery.cpp +++ b/src/Interpreters/MySQL/InterpretersMySQLDDLQuery.cpp @@ -155,7 +155,7 @@ static ColumnsDescription createColumnsDescription(const NamesAndTypesList & col /// but this produce endless recursion in gcc-11, and leads to SIGSEGV /// (see git blame for details). auto column_name_and_type = columns_name_and_type.begin(); - auto declare_column_ast = columns_definition->children.begin(); + const auto * declare_column_ast = columns_definition->children.begin(); for (; column_name_and_type != columns_name_and_type.end(); column_name_and_type++, declare_column_ast++) { const auto & declare_column = (*declare_column_ast)->as(); diff --git a/src/Interpreters/ProcessList.cpp b/src/Interpreters/ProcessList.cpp index cc22ca6597e..beda10a3af2 100644 --- a/src/Interpreters/ProcessList.cpp +++ b/src/Interpreters/ProcessList.cpp @@ -69,7 +69,8 @@ static bool isUnlimitedQuery(const IAST * ast) } -ProcessList::EntryPtr ProcessList::insert(const String & query_, const IAST * ast, ContextMutablePtr query_context) +ProcessList::EntryPtr +ProcessList::insert(const String & query_, const IAST * ast, ContextMutablePtr query_context, UInt64 watch_start_nanoseconds) { EntryPtr res; @@ -218,7 +219,6 @@ ProcessList::EntryPtr ProcessList::insert(const String & query_, const IAST * as user_process_list.user_temp_data_on_disk, settings.max_temporary_data_on_disk_size_for_query)); } thread_group->query = query_; - thread_group->one_line_query = toOneLineQuery(query_); thread_group->normalized_query_hash = normalizedQueryHash(query_); /// Set query-level memory trackers @@ -243,13 +243,16 @@ ProcessList::EntryPtr ProcessList::insert(const String & query_, const IAST * as /// since allocation and deallocation could happen in different threads } - auto process_it = processes.emplace(processes.end(), std::make_shared( - query_context, - query_, - client_info, - priorities.insert(static_cast(settings.priority)), - std::move(thread_group), - query_kind)); + auto process_it = processes.emplace( + processes.end(), + std::make_shared( + query_context, + query_, + client_info, + priorities.insert(static_cast(settings.priority)), + std::move(thread_group), + query_kind, + watch_start_nanoseconds)); increaseQueryKindAmount(query_kind); @@ -344,11 +347,13 @@ QueryStatus::QueryStatus( const ClientInfo & client_info_, QueryPriorities::Handle && priority_handle_, ThreadGroupStatusPtr && thread_group_, - IAST::QueryKind query_kind_) + IAST::QueryKind query_kind_, + UInt64 watch_start_nanoseconds) : WithContext(context_) , query(query_) , client_info(client_info_) , thread_group(std::move(thread_group_)) + , watch(CLOCK_MONOTONIC, watch_start_nanoseconds, true) , priority_handle(std::move(priority_handle_)) , global_overcommit_tracker(context_->getGlobalOvercommitTracker()) , query_kind(query_kind_) @@ -522,7 +527,7 @@ QueryStatusInfo QueryStatus::getInfo(bool get_thread_list, bool get_profile_even res.query = query; res.client_info = client_info; - res.elapsed_seconds = watch.elapsedSeconds(); + res.elapsed_microseconds = watch.elapsedMicroseconds(); res.is_cancelled = is_killed.load(std::memory_order_relaxed); res.is_all_data_sent = is_all_data_sent.load(std::memory_order_relaxed); res.read_rows = progress_in.read_rows; diff --git a/src/Interpreters/ProcessList.h b/src/Interpreters/ProcessList.h index 5fbdce358f9..34edfc5a2e2 100644 --- a/src/Interpreters/ProcessList.h +++ b/src/Interpreters/ProcessList.h @@ -53,7 +53,7 @@ class ProcessListEntry; struct QueryStatusInfo { String query; - double elapsed_seconds; + UInt64 elapsed_microseconds; size_t read_rows; size_t read_bytes; size_t total_rows; @@ -142,15 +142,14 @@ protected: CurrentMetrics::Increment num_queries_increment; public: - QueryStatus( ContextPtr context_, const String & query_, const ClientInfo & client_info_, QueryPriorities::Handle && priority_handle_, ThreadGroupStatusPtr && thread_group_, - IAST::QueryKind query_kind_ - ); + IAST::QueryKind query_kind_, + UInt64 watch_start_nanoseconds); ~QueryStatus(); @@ -221,6 +220,9 @@ public: bool checkTimeLimit(); /// Same as checkTimeLimit but it never throws [[nodiscard]] bool checkTimeLimitSoft(); + + /// Get the reference for the start of the query. Used to synchronize with other Stopwatches + UInt64 getQueryCPUStartTime() { return watch.getStart(); } }; using QueryStatusPtr = std::shared_ptr; @@ -382,7 +384,7 @@ public: * If timeout is passed - throw an exception. * Don't count KILL QUERY queries. */ - EntryPtr insert(const String & query_, const IAST * ast, ContextMutablePtr query_context); + EntryPtr insert(const String & query_, const IAST * ast, ContextMutablePtr query_context, UInt64 watch_start_nanoseconds); /// Number of currently executing queries. size_t size() const { return processes.size(); } diff --git a/src/Interpreters/ServerAsynchronousMetrics.cpp b/src/Interpreters/ServerAsynchronousMetrics.cpp index dc4a2a8e435..0777ffd6c44 100644 --- a/src/Interpreters/ServerAsynchronousMetrics.cpp +++ b/src/Interpreters/ServerAsynchronousMetrics.cpp @@ -250,7 +250,7 @@ void ServerAsynchronousMetrics::updateImpl(AsynchronousMetricValues & new_values if (StorageReplicatedMergeTree * table_replicated_merge_tree = typeid_cast(table.get())) { - StorageReplicatedMergeTree::Status status; + ReplicatedTableStatus status; table_replicated_merge_tree->getStatus(status, false); calculateMaxAndSum(max_queue_size, sum_queue_size, status.queue.queue_size); diff --git a/src/Interpreters/Set.h b/src/Interpreters/Set.h index 44f543ce222..bafb0dcea7a 100644 --- a/src/Interpreters/Set.h +++ b/src/Interpreters/Set.h @@ -18,7 +18,7 @@ struct Range; class Context; class IFunctionBase; -using FunctionBasePtr = std::shared_ptr; +using FunctionBasePtr = std::shared_ptr; class Chunk; diff --git a/src/Interpreters/TemporaryDataOnDisk.cpp b/src/Interpreters/TemporaryDataOnDisk.cpp index c5ae6f6c885..9e9389451b7 100644 --- a/src/Interpreters/TemporaryDataOnDisk.cpp +++ b/src/Interpreters/TemporaryDataOnDisk.cpp @@ -7,8 +7,11 @@ #include #include #include +#include +#include #include +#include namespace DB { @@ -20,12 +23,12 @@ namespace ErrorCodes extern const int NOT_ENOUGH_SPACE; } + void TemporaryDataOnDiskScope::deltaAllocAndCheck(ssize_t compressed_delta, ssize_t uncompressed_delta) { if (parent) parent->deltaAllocAndCheck(compressed_delta, uncompressed_delta); - /// check that we don't go negative if ((compressed_delta < 0 && stat.compressed_size < static_cast(-compressed_delta)) || (uncompressed_delta < 0 && stat.uncompressed_size < static_cast(-uncompressed_delta))) @@ -35,7 +38,8 @@ void TemporaryDataOnDiskScope::deltaAllocAndCheck(ssize_t compressed_delta, ssiz size_t new_consumprion = stat.compressed_size + compressed_delta; if (compressed_delta > 0 && limit && new_consumprion > limit) - throw Exception(ErrorCodes::TOO_MANY_ROWS_OR_BYTES, "Limit for temporary files size exceeded"); + throw Exception(ErrorCodes::TOO_MANY_ROWS_OR_BYTES, + "Limit for temporary files size exceeded (would consume {} / {} bytes)", new_consumprion, limit); stat.compressed_size += compressed_delta; stat.uncompressed_size += uncompressed_delta; @@ -43,6 +47,31 @@ void TemporaryDataOnDiskScope::deltaAllocAndCheck(ssize_t compressed_delta, ssiz TemporaryFileStream & TemporaryDataOnDisk::createStream(const Block & header, size_t max_file_size) { + if (file_cache) + return createStreamToCacheFile(header, max_file_size); + else if (volume) + return createStreamToRegularFile(header, max_file_size); + + throw Exception("TemporaryDataOnDiskScope has no cache and no volume", ErrorCodes::LOGICAL_ERROR); +} + +TemporaryFileStream & TemporaryDataOnDisk::createStreamToCacheFile(const Block & header, size_t max_file_size) +{ + if (!file_cache) + throw Exception("TemporaryDataOnDiskScope has no cache", ErrorCodes::LOGICAL_ERROR); + + auto holder = file_cache->set(FileSegment::Key::random(), 0, std::max(10_MiB, max_file_size), CreateFileSegmentSettings(FileSegmentKind::Temporary, /* unbounded */ true)); + + std::lock_guard lock(mutex); + TemporaryFileStreamPtr & tmp_stream = streams.emplace_back(std::make_unique(std::move(holder), header, this)); + return *tmp_stream; +} + +TemporaryFileStream & TemporaryDataOnDisk::createStreamToRegularFile(const Block & header, size_t max_file_size) +{ + if (!volume) + throw Exception("TemporaryDataOnDiskScope has no volume", ErrorCodes::LOGICAL_ERROR); + DiskPtr disk; if (max_file_size > 0) { @@ -63,7 +92,6 @@ TemporaryFileStream & TemporaryDataOnDisk::createStream(const Block & header, si return *tmp_stream; } - std::vector TemporaryDataOnDisk::getStreams() const { std::vector res; @@ -83,18 +111,40 @@ bool TemporaryDataOnDisk::empty() const struct TemporaryFileStream::OutputWriter { OutputWriter(const String & path, const Block & header_) - : out_file_buf(path) - , out_compressed_buf(out_file_buf) + : out_buf(std::make_unique(path)) + , out_compressed_buf(*out_buf) , out_writer(out_compressed_buf, DBMS_TCP_PROTOCOL_VERSION, header_) { + LOG_TEST(&Poco::Logger::get("TemporaryFileStream"), "Writing to {}", path); } - void write(const Block & block) + OutputWriter(std::unique_ptr out_buf_, const Block & header_) + : out_buf(std::move(out_buf_)) + , out_compressed_buf(*out_buf) + , out_writer(out_compressed_buf, DBMS_TCP_PROTOCOL_VERSION, header_) + { + LOG_TEST(&Poco::Logger::get("TemporaryFileStream"), + "Writing to {}", + static_cast(out_buf.get())->getFileName()); + } + + size_t write(const Block & block) { if (finalized) throw Exception("Cannot write to finalized stream", ErrorCodes::LOGICAL_ERROR); - out_writer.write(block); + size_t written_bytes = out_writer.write(block); num_rows += block.rows(); + return written_bytes; + } + + void flush() + { + if (finalized) + throw Exception("Cannot flush finalized stream", ErrorCodes::LOGICAL_ERROR); + + out_compressed_buf.next(); + out_buf->next(); + out_writer.flush(); } void finalize() @@ -108,7 +158,7 @@ struct TemporaryFileStream::OutputWriter out_writer.flush(); out_compressed_buf.finalize(); - out_file_buf.finalize(); + out_buf->finalize(); } ~OutputWriter() @@ -123,7 +173,7 @@ struct TemporaryFileStream::OutputWriter } } - WriteBufferFromFile out_file_buf; + std::unique_ptr out_buf; CompressedWriteBuffer out_compressed_buf; NativeWriter out_writer; @@ -139,6 +189,7 @@ struct TemporaryFileStream::InputReader , in_compressed_buf(in_file_buf) , in_reader(in_compressed_buf, header_, DBMS_TCP_PROTOCOL_VERSION) { + LOG_TEST(&Poco::Logger::get("TemporaryFileStream"), "Reading {} from {}", header_.dumpStructure(), path); } explicit InputReader(const String & path) @@ -146,9 +197,13 @@ struct TemporaryFileStream::InputReader , in_compressed_buf(in_file_buf) , in_reader(in_compressed_buf, DBMS_TCP_PROTOCOL_VERSION) { + LOG_TEST(&Poco::Logger::get("TemporaryFileStream"), "Reading from {}", path); } - Block read() { return in_reader.read(); } + Block read() + { + return in_reader.read(); + } ReadBufferFromFile in_file_buf; CompressedReadBuffer in_compressed_buf; @@ -163,13 +218,34 @@ TemporaryFileStream::TemporaryFileStream(TemporaryFileOnDiskHolder file_, const { } -void TemporaryFileStream::write(const Block & block) +TemporaryFileStream::TemporaryFileStream(FileSegmentsHolder && segments_, const Block & header_, TemporaryDataOnDisk * parent_) + : parent(parent_) + , header(header_) + , segment_holder(std::move(segments_)) +{ + if (segment_holder.file_segments.size() != 1) + throw Exception(ErrorCodes::LOGICAL_ERROR, "TemporaryFileStream can be created only from single segment"); + auto & segment = segment_holder.file_segments.front(); + auto out_buf = std::make_unique(segment.get()); + out_writer = std::make_unique(std::move(out_buf), header); +} + +size_t TemporaryFileStream::write(const Block & block) { if (!out_writer) throw Exception("Writing has been finished", ErrorCodes::LOGICAL_ERROR); updateAllocAndCheck(); - out_writer->write(block); + size_t bytes_written = out_writer->write(block); + return bytes_written; +} + +void TemporaryFileStream::flush() +{ + if (!out_writer) + throw Exception("Writing has been finished", ErrorCodes::LOGICAL_ERROR); + + out_writer->flush(); } TemporaryFileStream::Stat TemporaryFileStream::finishWriting() @@ -206,7 +282,7 @@ Block TemporaryFileStream::read() if (!in_reader) { - in_reader = std::make_unique(file->getPath(), header); + in_reader = std::make_unique(getPath(), header); } Block block = in_reader->read(); @@ -228,7 +304,7 @@ void TemporaryFileStream::updateAllocAndCheck() { throw Exception(ErrorCodes::LOGICAL_ERROR, "Temporary file {} size decreased after write: compressed: {} -> {}, uncompressed: {} -> {}", - file->getPath(), new_compressed_size, stat.compressed_size, new_uncompressed_size, stat.uncompressed_size); + getPath(), new_compressed_size, stat.compressed_size, new_uncompressed_size, stat.uncompressed_size); } parent->deltaAllocAndCheck(new_compressed_size - stat.compressed_size, new_uncompressed_size - stat.uncompressed_size); @@ -239,17 +315,11 @@ void TemporaryFileStream::updateAllocAndCheck() bool TemporaryFileStream::isEof() const { - return file == nullptr; + return file == nullptr && segment_holder.empty(); } void TemporaryFileStream::release() { - if (file) - { - file.reset(); - parent->deltaAllocAndCheck(-stat.compressed_size, -stat.uncompressed_size); - } - if (in_reader) in_reader.reset(); @@ -258,6 +328,25 @@ void TemporaryFileStream::release() out_writer->finalize(); out_writer.reset(); } + + if (file) + { + file.reset(); + parent->deltaAllocAndCheck(-stat.compressed_size, -stat.uncompressed_size); + } + + if (!segment_holder.empty()) + segment_holder.reset(); +} + +String TemporaryFileStream::getPath() const +{ + if (file) + return file->getPath(); + if (!segment_holder.file_segments.empty()) + return segment_holder.file_segments.front()->getPathInLocalCache(); + + throw Exception(ErrorCodes::LOGICAL_ERROR, "TemporaryFileStream has no file"); } TemporaryFileStream::~TemporaryFileStream() diff --git a/src/Interpreters/TemporaryDataOnDisk.h b/src/Interpreters/TemporaryDataOnDisk.h index 11edc8700d2..1b56f953d17 100644 --- a/src/Interpreters/TemporaryDataOnDisk.h +++ b/src/Interpreters/TemporaryDataOnDisk.h @@ -6,6 +6,8 @@ #include #include #include +#include +#include namespace CurrentMetrics @@ -44,8 +46,12 @@ public: : volume(std::move(volume_)), limit(limit_) {} + explicit TemporaryDataOnDiskScope(VolumePtr volume_, FileCache * file_cache_, size_t limit_) + : volume(std::move(volume_)), file_cache(file_cache_), limit(limit_) + {} + explicit TemporaryDataOnDiskScope(TemporaryDataOnDiskScopePtr parent_, size_t limit_) - : parent(std::move(parent_)), volume(parent->volume), limit(limit_) + : parent(std::move(parent_)), volume(parent->volume), file_cache(parent->file_cache), limit(limit_) {} /// TODO: remove @@ -56,7 +62,9 @@ protected: void deltaAllocAndCheck(ssize_t compressed_delta, ssize_t uncompressed_delta); TemporaryDataOnDiskScopePtr parent = nullptr; - VolumePtr volume; + + VolumePtr volume = nullptr; + FileCache * file_cache = nullptr; StatAtomic stat; size_t limit = 0; @@ -93,6 +101,9 @@ public: const StatAtomic & getStat() const { return stat; } private: + TemporaryFileStream & createStreamToCacheFile(const Block & header, size_t max_file_size); + TemporaryFileStream & createStreamToRegularFile(const Block & header, size_t max_file_size); + mutable std::mutex mutex; std::vector streams TSA_GUARDED_BY(mutex); @@ -117,14 +128,18 @@ public: }; TemporaryFileStream(TemporaryFileOnDiskHolder file_, const Block & header_, TemporaryDataOnDisk * parent_); + TemporaryFileStream(FileSegmentsHolder && segments_, const Block & header_, TemporaryDataOnDisk * parent_); + + size_t write(const Block & block); + void flush(); - void write(const Block & block); Stat finishWriting(); bool isWriteFinished() const; Block read(); - const String path() const { return file->getPath(); } + String getPath() const; + Block getHeader() const { return header; } /// Read finished and file released @@ -142,7 +157,9 @@ private: Block header; + /// Data can be stored in file directly or in the cache TemporaryFileOnDiskHolder file; + FileSegmentsHolder segment_holder; Stat stat; diff --git a/src/Interpreters/ThreadStatusExt.cpp b/src/Interpreters/ThreadStatusExt.cpp index ad7884ade55..e96a8a4b188 100644 --- a/src/Interpreters/ThreadStatusExt.cpp +++ b/src/Interpreters/ThreadStatusExt.cpp @@ -97,7 +97,7 @@ void CurrentThread::defaultThreadDeleter() void ThreadStatus::setupState(const ThreadGroupStatusPtr & thread_group_) { - assertState({ThreadState::DetachedFromQuery}, __PRETTY_FUNCTION__); + assertState(ThreadState::DetachedFromQuery, __PRETTY_FUNCTION__); /// Attach or init current thread to thread group and copy useful information from it thread_group = thread_group_; @@ -324,7 +324,7 @@ void ThreadStatus::detachQuery(bool exit_if_already_detached, bool thread_exits) return; } - assertState({ThreadState::AttachedToQuery}, __PRETTY_FUNCTION__); + assertState(ThreadState::AttachedToQuery, __PRETTY_FUNCTION__); finalizeQueryProfiler(); finalizePerformanceCounters(); diff --git a/src/Interpreters/TraceCollector.cpp b/src/Interpreters/TraceCollector.cpp index 367249f1289..40a5b1f228d 100644 --- a/src/Interpreters/TraceCollector.cpp +++ b/src/Interpreters/TraceCollector.cpp @@ -31,6 +31,7 @@ TraceCollector::TraceCollector(std::shared_ptr trace_log_) TraceCollector::~TraceCollector() +try { if (!thread.joinable()) LOG_ERROR(&Poco::Logger::get("TraceCollector"), "TraceCollector thread is malformed and cannot be joined"); @@ -39,6 +40,10 @@ TraceCollector::~TraceCollector() TraceSender::pipe.close(); } +catch (...) +{ + tryLogCurrentException("TraceCollector"); +} /** Sends TraceCollector stop message @@ -97,9 +102,6 @@ void TraceCollector::run() Int64 size; readPODBinary(size, in); - UInt64 ptr; - readPODBinary(ptr, in); - ProfileEvents::Event event; readPODBinary(event, in); @@ -115,7 +117,7 @@ void TraceCollector::run() UInt64 time = static_cast(ts.tv_sec * 1000000000LL + ts.tv_nsec); UInt64 time_in_microseconds = static_cast((ts.tv_sec * 1000000LL) + (ts.tv_nsec / 1000)); - TraceLogElement element{time_t(time / 1000000000), time_in_microseconds, time, trace_type, thread_id, query_id, trace, size, ptr, event, increment}; + TraceLogElement element{time_t(time / 1000000000), time_in_microseconds, time, trace_type, thread_id, query_id, trace, size, event, increment}; trace_log->add(element); } } diff --git a/src/Interpreters/TraceLog.cpp b/src/Interpreters/TraceLog.cpp index cd5f965a679..0408ebe504b 100644 --- a/src/Interpreters/TraceLog.cpp +++ b/src/Interpreters/TraceLog.cpp @@ -38,7 +38,6 @@ NamesAndTypesList TraceLogElement::getNamesAndTypes() {"query_id", std::make_shared()}, {"trace", std::make_shared(std::make_shared())}, {"size", std::make_shared()}, - {"ptr", std::make_shared()}, {"event", std::make_shared(std::make_shared())}, {"increment", std::make_shared()}, }; @@ -58,7 +57,6 @@ void TraceLogElement::appendToBlock(MutableColumns & columns) const columns[i++]->insertData(query_id.data(), query_id.size()); columns[i++]->insert(trace); columns[i++]->insert(size); - columns[i++]->insert(ptr); String event_name; if (event != ProfileEvents::end()) diff --git a/src/Interpreters/TraceLog.h b/src/Interpreters/TraceLog.h index 71aec0b50c4..c481f033a72 100644 --- a/src/Interpreters/TraceLog.h +++ b/src/Interpreters/TraceLog.h @@ -27,10 +27,8 @@ struct TraceLogElement UInt64 thread_id{}; String query_id{}; Array trace{}; - /// Allocation size in bytes for TraceType::Memory and TraceType::MemorySample. + /// Allocation size in bytes for TraceType::Memory. Int64 size{}; - /// Allocation ptr for TraceType::MemorySample. - UInt64 ptr{}; /// ProfileEvent for TraceType::ProfileEvent. ProfileEvents::Event event{ProfileEvents::end()}; /// Increment of profile event for TraceType::ProfileEvent. diff --git a/src/Interpreters/TranslateQualifiedNamesVisitor.cpp b/src/Interpreters/TranslateQualifiedNamesVisitor.cpp index bc862ed7b38..2ca1174f704 100644 --- a/src/Interpreters/TranslateQualifiedNamesVisitor.cpp +++ b/src/Interpreters/TranslateQualifiedNamesVisitor.cpp @@ -299,7 +299,7 @@ void TranslateQualifiedNamesMatcher::visit(ASTExpressionList & node, const ASTPt } // QualifiedAsterisk's transformers start to appear at child 1 - for (auto it = qualified_asterisk->children.begin() + 1; it != qualified_asterisk->children.end(); ++it) + for (const auto * it = qualified_asterisk->children.begin() + 1; it != qualified_asterisk->children.end(); ++it) { IASTColumnsTransformer::transform(*it, columns); } diff --git a/src/Interpreters/TreeOptimizer.cpp b/src/Interpreters/TreeOptimizer.cpp index 6461a35dae6..6a8c9dc7dbd 100644 --- a/src/Interpreters/TreeOptimizer.cpp +++ b/src/Interpreters/TreeOptimizer.cpp @@ -688,59 +688,6 @@ void optimizeFunctionsToSubcolumns(ASTPtr & query, const StorageMetadataPtr & me RewriteFunctionToSubcolumnVisitor(data).visit(query); } -std::shared_ptr getQuantileFuseCandidate(const String & func_name, std::vector & functions) -{ - if (functions.size() < 2) - return nullptr; - - const auto & common_arguments = (*functions[0])->as()->arguments->children; - auto func_base = makeASTFunction(GatherFunctionQuantileData::getFusedName(func_name)); - func_base->arguments->children = common_arguments; - func_base->parameters = std::make_shared(); - - for (const auto * ast : functions) - { - assert(ast && *ast); - const auto * func = (*ast)->as(); - assert(func && func->parameters->as()); - const ASTs & parameters = func->parameters->as().children; - if (parameters.size() != 1) - return nullptr; /// query is illegal, give up - func_base->parameters->children.push_back(parameters[0]); - } - return func_base; -} - -/// Rewrites multi quantile()() functions with the same arguments to quantiles()()[] -/// eg:SELECT quantile(0.5)(x), quantile(0.9)(x), quantile(0.95)(x) FROM... -/// rewrite to : SELECT quantiles(0.5, 0.9, 0.95)(x)[1], quantiles(0.5, 0.9, 0.95)(x)[2], quantiles(0.5, 0.9, 0.95)(x)[3] FROM ... -void optimizeFuseQuantileFunctions(ASTPtr & query) -{ - GatherFunctionQuantileVisitor::Data data{}; - GatherFunctionQuantileVisitor(data).visit(query); - for (auto & candidate : data.fuse_quantile) - { - String func_name = candidate.first; - auto & args_to_functions = candidate.second; - - /// Try to fuse multiply `quantile*` Function to plural - for (auto it : args_to_functions.arg_map_function) - { - std::vector & functions = it.second; - auto func_base = getQuantileFuseCandidate(func_name, functions); - if (!func_base) - continue; - for (size_t i = 0; i < functions.size(); ++i) - { - std::shared_ptr ast_new = makeASTFunction("arrayElement", func_base, std::make_shared(i + 1)); - if (const auto & alias = (*functions[i])->tryGetAlias(); !alias.empty()) - ast_new->setAlias(alias); - *functions[i] = ast_new; - } - } - } -} - void optimizeOrLikeChain(ASTPtr & query) { ConvertFunctionOrLikeVisitor::Data data = {}; @@ -890,9 +837,6 @@ void TreeOptimizer::apply(ASTPtr & query, TreeRewriterResult & result, /// Remove duplicated columns from USING(...). optimizeUsing(select_query); - if (settings.optimize_syntax_fuse_functions) - optimizeFuseQuantileFunctions(query); - if (settings.optimize_or_like_chain && settings.allow_hyperscan && settings.max_hyperscan_regexp_length == 0 diff --git a/src/Interpreters/TreeRewriter.cpp b/src/Interpreters/TreeRewriter.cpp index e0da9e77b81..20c14b8d7b6 100644 --- a/src/Interpreters/TreeRewriter.cpp +++ b/src/Interpreters/TreeRewriter.cpp @@ -1,5 +1,6 @@ #include #include +#include #include #include @@ -62,7 +63,6 @@ namespace ErrorCodes extern const int EMPTY_LIST_OF_COLUMNS_QUERIED; extern const int EMPTY_NESTED_TABLE; extern const int EXPECTED_ALL_OR_ANY; - extern const int INCOMPATIBLE_TYPE_OF_JOIN; extern const int INVALID_JOIN_ON_EXPRESSION; extern const int LOGICAL_ERROR; extern const int NOT_IMPLEMENTED; @@ -573,7 +573,7 @@ void removeUnneededColumnsFromSelectClause(ASTSelectQuery * select_query, const auto & children = select_query->interpolate()->children; if (!children.empty()) { - for (auto it = children.begin(); it != children.end();) + for (auto * it = children.begin(); it != children.end();) { if (remove_columns.contains((*it)->as()->column)) it = select_query->interpolate()->children.erase(it); @@ -715,32 +715,34 @@ std::optional tryEvaluateConstCondition(ASTPtr expr, ContextPtr context) return res > 0; } -bool tryJoinOnConst(TableJoin & analyzed_join, ASTPtr & on_expression, ContextPtr context) +bool tryJoinOnConst(TableJoin & analyzed_join, const ASTPtr & on_expression, ContextPtr context) { - bool join_on_value; - if (auto eval_const_res = tryEvaluateConstCondition(on_expression, context)) - join_on_value = *eval_const_res; - else + if (!analyzed_join.isEnabledAlgorithm(JoinAlgorithm::HASH)) return false; - if (!analyzed_join.isEnabledAlgorithm(JoinAlgorithm::HASH)) - throw Exception(ErrorCodes::NOT_IMPLEMENTED, - "JOIN ON constant ({}) supported only with join algorithm 'hash'", - queryToString(on_expression)); + if (analyzed_join.strictness() == JoinStrictness::Asof) + return false; - on_expression = nullptr; - if (join_on_value) - { - LOG_DEBUG(&Poco::Logger::get("TreeRewriter"), "Join on constant executed as cross join"); - analyzed_join.resetToCross(); - } - else - { - LOG_DEBUG(&Poco::Logger::get("TreeRewriter"), "Join on constant executed as empty join"); - analyzed_join.resetKeys(); - } + if (analyzed_join.isSpecialStorage()) + return false; - return true; + if (auto eval_const_res = tryEvaluateConstCondition(on_expression, context)) + { + if (eval_const_res.value()) + { + /// JOIN ON 1 == 1 + LOG_DEBUG(&Poco::Logger::get("TreeRewriter"), "Join on constant executed as cross join"); + analyzed_join.resetToCross(); + } + else + { + /// JOIN ON 1 != 1 + LOG_DEBUG(&Poco::Logger::get("TreeRewriter"), "Join on constant executed as empty join"); + analyzed_join.resetKeys(); + } + return true; + } + return false; } /// Find the columns that are obtained by JOIN. @@ -759,6 +761,13 @@ void collectJoinedColumns(TableJoin & analyzed_join, ASTTableJoin & table_join, } else if (table_join.on_expression) { + bool join_on_const_ok = tryJoinOnConst(analyzed_join, table_join.on_expression, context); + if (join_on_const_ok) + { + table_join.on_expression = nullptr; + return; + } + bool is_asof = (table_join.strictness == JoinStrictness::Asof); CollectJoinOnKeysVisitor::Data data{analyzed_join, tables[0], tables[1], aliases, is_asof}; @@ -779,44 +788,22 @@ void collectJoinedColumns(TableJoin & analyzed_join, ASTTableJoin & table_join, } auto check_keys_empty = [] (auto e) { return e.key_names_left.empty(); }; + bool any_keys_empty = std::any_of(analyzed_join.getClauses().begin(), analyzed_join.getClauses().end(), check_keys_empty); - /// All clauses should to have keys or be empty simultaneously - bool all_keys_empty = std::all_of(analyzed_join.getClauses().begin(), analyzed_join.getClauses().end(), check_keys_empty); - if (all_keys_empty) + if (any_keys_empty) + throw DB::Exception(ErrorCodes::INVALID_JOIN_ON_EXPRESSION, + "Cannot get JOIN keys from JOIN ON section: '{}', found keys: {}", + queryToString(table_join.on_expression), TableJoin::formatClauses(analyzed_join.getClauses())); + + if (is_asof) { - /// Try join on constant (cross or empty join) or fail - if (is_asof) - throw Exception(ErrorCodes::INVALID_JOIN_ON_EXPRESSION, - "Cannot get JOIN keys from JOIN ON section: {}", queryToString(table_join.on_expression)); - - if (const auto storage_join = analyzed_join.getStorageJoin()) - throw Exception(ErrorCodes::INCOMPATIBLE_TYPE_OF_JOIN, - "StorageJoin keys should match JOIN keys, expected JOIN ON [{}]", fmt::join(storage_join->getKeyNames(), ", ")); - - bool join_on_const_ok = tryJoinOnConst(analyzed_join, table_join.on_expression, context); - if (!join_on_const_ok) - throw Exception(ErrorCodes::INVALID_JOIN_ON_EXPRESSION, - "Cannot get JOIN keys from JOIN ON section: {}", queryToString(table_join.on_expression)); + if (!analyzed_join.oneDisjunct()) + throw DB::Exception(ErrorCodes::NOT_IMPLEMENTED, "ASOF join doesn't support multiple ORs for keys in JOIN ON section"); + data.asofToJoinKeys(); } - else - { - bool any_keys_empty = std::any_of(analyzed_join.getClauses().begin(), analyzed_join.getClauses().end(), check_keys_empty); - if (any_keys_empty) - throw DB::Exception(ErrorCodes::INVALID_JOIN_ON_EXPRESSION, - "Cannot get JOIN keys from JOIN ON section: '{}'", - queryToString(table_join.on_expression)); - - if (is_asof) - { - if (!analyzed_join.oneDisjunct()) - throw DB::Exception(ErrorCodes::NOT_IMPLEMENTED, "ASOF join doesn't support multiple ORs for keys in JOIN ON section"); - data.asofToJoinKeys(); - } - - if (!analyzed_join.oneDisjunct() && !analyzed_join.isEnabledAlgorithm(JoinAlgorithm::HASH)) - throw DB::Exception(ErrorCodes::NOT_IMPLEMENTED, "Only `hash` join supports multiple ORs for keys in JOIN ON section"); - } + if (!analyzed_join.oneDisjunct() && !analyzed_join.isEnabledAlgorithm(JoinAlgorithm::HASH)) + throw DB::Exception(ErrorCodes::NOT_IMPLEMENTED, "Only `hash` join supports multiple ORs for keys in JOIN ON section"); } } @@ -1243,16 +1230,24 @@ void TreeRewriterResult::collectUsedColumns(const ASTPtr & query, bool is_select if (storage) { std::vector hint_name{}; + std::set helper_hint_name{}; for (const auto & name : columns_context.requiredColumns()) { auto hints = storage->getHints(name); - hint_name.insert(hint_name.end(), hints.begin(), hints.end()); + for (const auto & hint : hints) + { + // We want to preserve the ordering of the hints + // (as they are ordered by Levenshtein distance) + auto [_, inserted] = helper_hint_name.insert(hint); + if (inserted) + hint_name.push_back(hint); + } } if (!hint_name.empty()) { ss << ", maybe you meant: "; - ss << toString(hint_name); + ss << toStringWithFinalSeparator(hint_name, " or "); } } else diff --git a/src/Interpreters/applyTableOverride.cpp b/src/Interpreters/applyTableOverride.cpp index e614e58b06b..8e88047c13c 100644 --- a/src/Interpreters/applyTableOverride.cpp +++ b/src/Interpreters/applyTableOverride.cpp @@ -26,10 +26,10 @@ void applyTableOverrideToCreateQuery(const ASTTableOverride & override, ASTCreat if (!create_query->columns_list->columns) create_query->columns_list->set(create_query->columns_list->columns, std::make_shared()); auto & dest_children = create_query->columns_list->columns->children; - auto exists = std::find_if(dest_children.begin(), dest_children.end(), [&](ASTPtr node) -> bool - { - return node->as()->name == override_column->name; - }); + auto * exists = std::find_if( + dest_children.begin(), + dest_children.end(), + [&](ASTPtr node) -> bool { return node->as()->name == override_column->name; }); /// For columns, only allow adding ALIAS (non-physical) for now. /// TODO: This logic should instead be handled by validation that is /// executed from InterpreterCreateQuery / InterpreterAlterQuery. @@ -52,10 +52,10 @@ void applyTableOverrideToCreateQuery(const ASTTableOverride & override, ASTCreat if (!create_query->columns_list->indices) create_query->columns_list->set(create_query->columns_list->indices, std::make_shared()); auto & dest_children = create_query->columns_list->indices->children; - auto exists = std::find_if(dest_children.begin(), dest_children.end(), [&](ASTPtr node) -> bool - { - return node->as()->name == override_index->name; - }); + auto * exists = std::find_if( + dest_children.begin(), + dest_children.end(), + [&](ASTPtr node) -> bool { return node->as()->name == override_index->name; }); if (exists == dest_children.end()) dest_children.emplace_back(override_index_ast); else @@ -72,10 +72,10 @@ void applyTableOverrideToCreateQuery(const ASTTableOverride & override, ASTCreat if (!create_query->columns_list->constraints) create_query->columns_list->set(create_query->columns_list->constraints, std::make_shared()); auto & dest_children = create_query->columns_list->constraints->children; - auto exists = std::find_if(dest_children.begin(), dest_children.end(), [&](ASTPtr node) -> bool - { - return node->as()->name == override_constraint->name; - }); + auto * exists = std::find_if( + dest_children.begin(), + dest_children.end(), + [&](ASTPtr node) -> bool { return node->as()->name == override_constraint->name; }); if (exists == dest_children.end()) dest_children.emplace_back(override_constraint_ast); else @@ -92,10 +92,10 @@ void applyTableOverrideToCreateQuery(const ASTTableOverride & override, ASTCreat if (!create_query->columns_list->projections) create_query->columns_list->set(create_query->columns_list->projections, std::make_shared()); auto & dest_children = create_query->columns_list->projections->children; - auto exists = std::find_if(dest_children.begin(), dest_children.end(), [&](ASTPtr node) -> bool - { - return node->as()->name == override_projection->name; - }); + auto * exists = std::find_if( + dest_children.begin(), + dest_children.end(), + [&](ASTPtr node) -> bool { return node->as()->name == override_projection->name; }); if (exists == dest_children.end()) dest_children.emplace_back(override_projection_ast); else diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index 2976cec454f..478ab9421c7 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -202,26 +202,32 @@ static void logException(ContextPtr context, QueryLogElement & elem) elem.stack_trace); } -static void onExceptionBeforeStart(const String & query_for_logging, ContextPtr context, UInt64 current_time_us, ASTPtr ast, const std::shared_ptr & query_span) +static void onExceptionBeforeStart( + const String & query_for_logging, + ContextPtr context, + ASTPtr ast, + const std::shared_ptr & query_span, + UInt64 elapsed_millliseconds) { + auto query_end_time = std::chrono::system_clock::now(); + /// Exception before the query execution. if (auto quota = context->getQuota()) quota->used(QuotaType::ERRORS, 1, /* check_exceeded = */ false); const Settings & settings = context->getSettingsRef(); + const auto & client_info = context->getClientInfo(); + /// Log the start of query execution into the table if necessary. QueryLogElement elem; elem.type = QueryLogElementType::EXCEPTION_BEFORE_START; - - // all callers to onExceptionBeforeStart method construct the timespec for event_time and - // event_time_microseconds from the same time point. So, it can be assumed that both of these - // times are equal up to the precision of a second. - elem.event_time = current_time_us / 1000000; - elem.event_time_microseconds = current_time_us; - elem.query_start_time = current_time_us / 1000000; - elem.query_start_time_microseconds = current_time_us; + elem.event_time = timeInSeconds(query_end_time); + elem.event_time_microseconds = timeInMicroseconds(query_end_time); + elem.query_start_time = client_info.initial_query_start_time; + elem.query_start_time_microseconds = client_info.initial_query_start_time_microseconds; + elem.query_duration_ms = elapsed_millliseconds; elem.current_database = context->getCurrentDatabase(); elem.query = query_for_logging; @@ -325,19 +331,32 @@ static std::tuple executeQueryImpl( /// we still have enough span logs for the execution of external queries. std::shared_ptr query_span = internal ? nullptr : std::make_shared("query"); - const auto current_time = std::chrono::system_clock::now(); + auto query_start_time = std::chrono::system_clock::now(); + + /// Used to set the watch in QueryStatus and the output formats. It is not based on query_start_time as that might be based on + /// the value passed by the client + Stopwatch start_watch{CLOCK_MONOTONIC}; auto & client_info = context->getClientInfo(); - // If it's not an internal query and we don't see an initial_query_start_time yet, initialize it - // to current time. Internal queries are those executed without an independent client context, - // thus should not set initial_query_start_time, because it might introduce data race. It's also - // possible to have unset initial_query_start_time for non-internal and non-initial queries. For - // example, the query is from an initiator that is running an old version of clickhouse. - if (!internal && client_info.initial_query_start_time == 0) + if (!internal) { - client_info.initial_query_start_time = timeInSeconds(current_time); - client_info.initial_query_start_time_microseconds = timeInMicroseconds(current_time); + // If it's not an internal query and we don't see an initial_query_start_time yet, initialize it + // to current time. Internal queries are those executed without an independent client context, + // thus should not set initial_query_start_time, because it might introduce data race. It's also + // possible to have unset initial_query_start_time for non-internal and non-initial queries. For + // example, the query is from an initiator that is running an old version of clickhouse. + // On the other hand, if it's initialized then take it as the start of the query + if (client_info.initial_query_start_time == 0) + { + client_info.initial_query_start_time = timeInSeconds(query_start_time); + client_info.initial_query_start_time_microseconds = timeInMicroseconds(query_start_time); + } + else + { + query_start_time = std::chrono::time_point( + std::chrono::microseconds{client_info.initial_query_start_time_microseconds}); + } } assert(internal || CurrentThread::get().getQueryContext()); @@ -414,7 +433,7 @@ static std::tuple executeQueryImpl( logQuery(query_for_logging, context, internal, stage); if (!internal) - onExceptionBeforeStart(query_for_logging, context, timeInMicroseconds(current_time), ast, query_span); + onExceptionBeforeStart(query_for_logging, context, ast, query_span, start_watch.elapsedMilliseconds()); throw; } @@ -515,7 +534,7 @@ static std::tuple executeQueryImpl( if (!internal && !ast->as()) { /// processlist also has query masked now, to avoid secrets leaks though SHOW PROCESSLIST by other users. - process_list_entry = context->getProcessList().insert(query_for_logging, ast.get(), context); + process_list_entry = context->getProcessList().insert(query_for_logging, ast.get(), context, start_watch.getStart()); context->setProcessListElement(process_list_entry->getQueryStatus()); } @@ -753,10 +772,10 @@ static std::tuple executeQueryImpl( elem.type = QueryLogElementType::QUERY_START; //-V1048 - elem.event_time = timeInSeconds(current_time); - elem.event_time_microseconds = timeInMicroseconds(current_time); - elem.query_start_time = timeInSeconds(current_time); - elem.query_start_time_microseconds = timeInMicroseconds(current_time); + elem.event_time = timeInSeconds(query_start_time); + elem.event_time_microseconds = timeInMicroseconds(query_start_time); + elem.query_start_time = timeInSeconds(query_start_time); + elem.query_start_time_microseconds = timeInMicroseconds(query_start_time); elem.current_database = context->getCurrentDatabase(); elem.query = query_for_logging; @@ -805,25 +824,29 @@ static std::tuple executeQueryImpl( } /// Common code for finish and exception callbacks - auto status_info_to_query_log = [](QueryLogElement & element, const QueryStatusInfo & info, const ASTPtr query_ast, const ContextPtr context_ptr) mutable + auto status_info_to_query_log + = [](QueryLogElement & element, const QueryStatusInfo & info, const ASTPtr query_ast, const ContextPtr context_ptr) mutable { - UInt64 query_time = static_cast(info.elapsed_seconds * 1000000); - ProfileEvents::increment(ProfileEvents::QueryTimeMicroseconds, query_time); + const auto time_now = std::chrono::system_clock::now(); + UInt64 elapsed_microseconds = info.elapsed_microseconds; + element.event_time = timeInSeconds(time_now); + element.event_time_microseconds = timeInMicroseconds(time_now); + element.query_duration_ms = elapsed_microseconds / 1000; + + ProfileEvents::increment(ProfileEvents::QueryTimeMicroseconds, elapsed_microseconds); if (query_ast->as() || query_ast->as()) { - ProfileEvents::increment(ProfileEvents::SelectQueryTimeMicroseconds, query_time); + ProfileEvents::increment(ProfileEvents::SelectQueryTimeMicroseconds, elapsed_microseconds); } else if (query_ast->as()) { - ProfileEvents::increment(ProfileEvents::InsertQueryTimeMicroseconds, query_time); + ProfileEvents::increment(ProfileEvents::InsertQueryTimeMicroseconds, elapsed_microseconds); } else { - ProfileEvents::increment(ProfileEvents::OtherQueryTimeMicroseconds, query_time); + ProfileEvents::increment(ProfileEvents::OtherQueryTimeMicroseconds, elapsed_microseconds); } - element.query_duration_ms = static_cast(info.elapsed_seconds * 1000); - element.read_rows = info.read_rows; element.read_bytes = info.read_bytes; @@ -877,16 +900,8 @@ static std::tuple executeQueryImpl( CurrentThread::finalizePerformanceCounters(); QueryStatusInfo info = process_list_elem->getInfo(true, context->getSettingsRef().log_profile_events); - - double elapsed_seconds = info.elapsed_seconds; - elem.type = QueryLogElementType::QUERY_FINISH; - // construct event_time and event_time_microseconds using the same time point - // so that the two times will always be equal up to a precision of a second. - const auto finish_time = std::chrono::system_clock::now(); - elem.event_time = timeInSeconds(finish_time); - elem.event_time_microseconds = timeInMicroseconds(finish_time); status_info_to_query_log(elem, info, ast, context); if (pulling_pipeline) @@ -910,9 +925,15 @@ static std::tuple executeQueryImpl( if (elem.read_rows != 0) { - LOG_INFO(&Poco::Logger::get("executeQuery"), "Read {} rows, {} in {} sec., {} rows/sec., {}/sec.", - elem.read_rows, ReadableSize(elem.read_bytes), elapsed_seconds, - static_cast(elem.read_rows / elapsed_seconds), + double elapsed_seconds = static_cast(info.elapsed_microseconds) / 1000000.0; + double rows_per_second = static_cast(elem.read_rows) / elapsed_seconds; + LOG_INFO( + &Poco::Logger::get("executeQuery"), + "Read {} rows, {} in {} sec., {} rows/sec., {}/sec.", + elem.read_rows, + ReadableSize(elem.read_bytes), + elapsed_seconds, + rows_per_second, ReadableSize(elem.read_bytes / elapsed_seconds)); } @@ -926,8 +947,8 @@ static std::tuple executeQueryImpl( if (auto processors_profile_log = context->getProcessorsProfileLog()) { ProcessorProfileLogElement processor_elem; - processor_elem.event_time = timeInSeconds(finish_time); - processor_elem.event_time_microseconds = timeInMicroseconds(finish_time); + processor_elem.event_time = elem.event_time; + processor_elem.event_time_microseconds = elem.event_time_microseconds; processor_elem.query_id = elem.client_info.current_query_id; auto get_proc_id = [](const IProcessor & proc) -> UInt64 @@ -1002,7 +1023,8 @@ static std::tuple executeQueryImpl( } }; - auto exception_callback = [elem, + auto exception_callback = [start_watch, + elem, context, ast, log_queries, @@ -1025,14 +1047,6 @@ static std::tuple executeQueryImpl( quota->used(QuotaType::ERRORS, 1, /* check_exceeded = */ false); elem.type = QueryLogElementType::EXCEPTION_WHILE_PROCESSING; - - // event_time and event_time_microseconds are being constructed from the same time point - // to ensure that both the times will be equal up to the precision of a second. - const auto time_now = std::chrono::system_clock::now(); - - elem.event_time = timeInSeconds(time_now); - elem.event_time_microseconds = timeInMicroseconds(time_now); - elem.query_duration_ms = 1000 * (elem.event_time - elem.query_start_time); elem.exception_code = getCurrentExceptionCode(); elem.exception = getCurrentExceptionMessage(false); @@ -1041,12 +1055,19 @@ static std::tuple executeQueryImpl( /// Update performance counters before logging to query_log CurrentThread::finalizePerformanceCounters(); + const auto time_now = std::chrono::system_clock::now(); + elem.event_time = timeInSeconds(time_now); + elem.event_time_microseconds = timeInMicroseconds(time_now); if (process_list_elem) { QueryStatusInfo info = process_list_elem->getInfo(true, current_settings.log_profile_events, false); status_info_to_query_log(elem, info, ast, context); } + else + { + elem.query_duration_ms = start_watch.elapsedMilliseconds(); + } if (current_settings.calculate_text_stack_trace) setExceptionStackTrace(elem); @@ -1096,7 +1117,7 @@ static std::tuple executeQueryImpl( } if (!internal) - onExceptionBeforeStart(query_for_logging, context, timeInMicroseconds(current_time), ast, query_span); + onExceptionBeforeStart(query_for_logging, context, ast, query_span, start_watch.elapsedMilliseconds()); throw; } diff --git a/src/Interpreters/tests/gtest_lru_file_cache.cpp b/src/Interpreters/tests/gtest_lru_file_cache.cpp index 5bad3e9bba2..93faafb5cea 100644 --- a/src/Interpreters/tests/gtest_lru_file_cache.cpp +++ b/src/Interpreters/tests/gtest_lru_file_cache.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -14,11 +15,16 @@ #include #include #include +#include + +#include +#include +#include namespace fs = std::filesystem; +using namespace DB; -fs::path caches_dir = fs::current_path() / "lru_cache_test"; -String cache_base_path = caches_dir / "cache1" / ""; +static constexpr auto TEST_LOG_LEVEL = "debug"; void assertRange( [[maybe_unused]] size_t assert_n, DB::FileSegmentPtr file_segment, @@ -53,7 +59,7 @@ String getFileSegmentPath(const String & base_path, const DB::FileCache::Key & k return fs::path(base_path) / key_str.substr(0, 3) / key_str / DB::toString(offset); } -void download(DB::FileSegmentPtr file_segment) +void download(const std::string & cache_base_path, DB::FileSegmentPtr file_segment) { const auto & key = file_segment->key(); size_t size = file_segment->range().size(); @@ -67,30 +73,57 @@ void download(DB::FileSegmentPtr file_segment) file_segment->write(data.data(), size, file_segment->getCurrentWriteOffset()); } -void prepareAndDownload(DB::FileSegmentPtr file_segment) +void prepareAndDownload(const std::string & cache_base_path, DB::FileSegmentPtr file_segment) { - // std::cerr << "Reserving: " << file_segment->range().size() << " for: " << file_segment->range().toString() << "\n"; ASSERT_TRUE(file_segment->reserve(file_segment->range().size())); - download(file_segment); + download(cache_base_path, file_segment); } -void complete(const DB::FileSegmentsHolder & holder) +void complete(const std::string & cache_base_path, const DB::FileSegmentsHolder & holder) { for (const auto & file_segment : holder.file_segments) { ASSERT_TRUE(file_segment->getOrSetDownloader() == DB::FileSegment::getCallerId()); - prepareAndDownload(file_segment); + prepareAndDownload(cache_base_path, file_segment); file_segment->completeWithoutState(); } } - -TEST(FileCache, get) +class FileCacheTest : public ::testing::Test { - if (fs::exists(cache_base_path)) - fs::remove_all(cache_base_path); - fs::create_directories(cache_base_path); +public: + static void setupLogs(const std::string & level) + { + Poco::AutoPtr channel(new Poco::ConsoleChannel(std::cerr)); + Poco::Logger::root().setChannel(channel); + Poco::Logger::root().setLevel(level); + } + + void SetUp() override + { + if(const char * test_log_level = std::getenv("TEST_LOG_LEVEL")) // NOLINT(concurrency-mt-unsafe) + setupLogs(test_log_level); + else + setupLogs(TEST_LOG_LEVEL); + + if (fs::exists(cache_base_path)) + fs::remove_all(cache_base_path); + fs::create_directories(cache_base_path); + } + + void TearDown() override + { + if (fs::exists(cache_base_path)) + fs::remove_all(cache_base_path); + } + + fs::path caches_dir = fs::current_path() / "lru_cache_test"; + std::string cache_base_path = caches_dir / "cache1" / ""; +}; + +TEST_F(FileCacheTest, get) +{ DB::ThreadStatus thread_status; /// To work with cache need query_id and query context. @@ -126,7 +159,7 @@ TEST(FileCache, get) ASSERT_TRUE(segments[0]->reserve(segments[0]->range().size())); assertRange(2, segments[0], DB::FileSegment::Range(0, 9), DB::FileSegment::State::DOWNLOADING); - download(segments[0]); + download(cache_base_path, segments[0]); segments[0]->completeWithoutState(); assertRange(3, segments[0], DB::FileSegment::Range(0, 9), DB::FileSegment::State::DOWNLOADED); } @@ -147,7 +180,7 @@ TEST(FileCache, get) assertRange(5, segments[1], DB::FileSegment::Range(10, 14), DB::FileSegment::State::EMPTY); ASSERT_TRUE(segments[1]->getOrSetDownloader() == DB::FileSegment::getCallerId()); - prepareAndDownload(segments[1]); + prepareAndDownload(cache_base_path, segments[1]); segments[1]->completeWithoutState(); assertRange(6, segments[1], DB::FileSegment::Range(10, 14), DB::FileSegment::State::DOWNLOADED); } @@ -180,8 +213,8 @@ TEST(FileCache, get) assertRange(10, segments[0], DB::FileSegment::Range(10, 14), DB::FileSegment::State::DOWNLOADED); } - complete(cache.getOrSet(key, 17, 4, {})); /// Get [17, 20] - complete(cache.getOrSet(key, 24, 3, {})); /// Get [24, 26] + complete(cache_base_path, cache.getOrSet(key, 17, 4, {})); /// Get [17, 20] + complete(cache_base_path, cache.getOrSet(key, 24, 3, {})); /// Get [24, 26] /// completeWithState(cache.getOrSet(key, 27, 1, false)); /// Get [27, 27] /// Current cache: [__________][_____] [____] [___][] @@ -203,7 +236,7 @@ TEST(FileCache, get) assertRange(13, segments[2], DB::FileSegment::Range(15, 16), DB::FileSegment::State::EMPTY); ASSERT_TRUE(segments[2]->getOrSetDownloader() == DB::FileSegment::getCallerId()); - prepareAndDownload(segments[2]); + prepareAndDownload(cache_base_path, segments[2]); segments[2]->completeWithoutState(); @@ -244,7 +277,7 @@ TEST(FileCache, get) assertRange(21, segments[3], DB::FileSegment::Range(21, 21), DB::FileSegment::State::EMPTY); ASSERT_TRUE(segments[3]->getOrSetDownloader() == DB::FileSegment::getCallerId()); - prepareAndDownload(segments[3]); + prepareAndDownload(cache_base_path, segments[3]); segments[3]->completeWithoutState(); ASSERT_TRUE(segments[3]->state() == DB::FileSegment::State::DOWNLOADED); @@ -267,8 +300,8 @@ TEST(FileCache, get) ASSERT_TRUE(segments[0]->getOrSetDownloader() == DB::FileSegment::getCallerId()); ASSERT_TRUE(segments[2]->getOrSetDownloader() == DB::FileSegment::getCallerId()); - prepareAndDownload(segments[0]); - prepareAndDownload(segments[2]); + prepareAndDownload(cache_base_path, segments[0]); + prepareAndDownload(cache_base_path, segments[2]); segments[0]->completeWithoutState(); segments[2]->completeWithoutState(); } @@ -290,8 +323,8 @@ TEST(FileCache, get) ASSERT_TRUE(s5[0]->getOrSetDownloader() == DB::FileSegment::getCallerId()); ASSERT_TRUE(s1[0]->getOrSetDownloader() == DB::FileSegment::getCallerId()); - prepareAndDownload(s5[0]); - prepareAndDownload(s1[0]); + prepareAndDownload(cache_base_path, s5[0]); + prepareAndDownload(cache_base_path, s1[0]); s5[0]->completeWithoutState(); s1[0]->completeWithoutState(); @@ -394,7 +427,7 @@ TEST(FileCache, get) cv.wait(lock, [&]{ return lets_start_download; }); } - prepareAndDownload(segments[2]); + prepareAndDownload(cache_base_path, segments[2]); segments[2]->completeWithoutState(); ASSERT_TRUE(segments[2]->state() == DB::FileSegment::State::DOWNLOADED); @@ -459,7 +492,7 @@ TEST(FileCache, get) ASSERT_TRUE(segments_2[1]->state() == DB::FileSegment::State::PARTIALLY_DOWNLOADED); ASSERT_TRUE(segments_2[1]->getOrSetDownloader() == DB::FileSegment::getCallerId()); - prepareAndDownload(segments_2[1]); + prepareAndDownload(cache_base_path, segments_2[1]); segments_2[1]->completeWithoutState(); }); @@ -517,3 +550,171 @@ TEST(FileCache, get) } } + +TEST_F(FileCacheTest, writeBuffer) +{ + DB::FileCacheSettings settings; + settings.max_size = 100; + settings.max_elements = 5; + settings.max_file_segment_size = 5; + + DB::FileCache cache(cache_base_path, settings); + cache.initialize(); + + auto write_to_cache = [&cache](const String & key, const Strings & data) + { + CreateFileSegmentSettings segment_settings; + segment_settings.kind = FileSegmentKind::Temporary; + segment_settings.unbounded = true; + + auto holder = cache.set(cache.hash(key), 0, 3, segment_settings); + EXPECT_EQ(holder.file_segments.size(), 1); + auto & segment = holder.file_segments.front(); + WriteBufferToFileSegment out(segment.get()); + for (const auto & s : data) + out.write(s.data(), s.size()); + return holder; + }; + + std::vector file_segment_paths; + { + auto holder = write_to_cache("key1", {"abc", "defg"}); + file_segment_paths.emplace_back(holder.file_segments.front()->getPathInLocalCache()); + + ASSERT_EQ(fs::file_size(file_segment_paths.back()), 7); + ASSERT_TRUE(holder.file_segments.front()->range() == FileSegment::Range(0, 7)); + ASSERT_EQ(cache.getUsedCacheSize(), 7); + + { + auto holder2 = write_to_cache("key2", {"1", "22", "333", "4444", "55555"}); + file_segment_paths.emplace_back(holder2.file_segments.front()->getPathInLocalCache()); + + ASSERT_EQ(fs::file_size(file_segment_paths.back()), 15); + ASSERT_TRUE(holder2.file_segments.front()->range() == FileSegment::Range(0, 15)); + ASSERT_EQ(cache.getUsedCacheSize(), 22); + } + ASSERT_FALSE(fs::exists(file_segment_paths.back())); + ASSERT_EQ(cache.getUsedCacheSize(), 7); + } + + for (const auto & file_segment_path : file_segment_paths) + { + ASSERT_FALSE(fs::exists(file_segment_path)); + } + ASSERT_EQ(cache.getUsedCacheSize(), 0); +} + + +static Block generateBlock(size_t size = 0) +{ + Block block; + ColumnWithTypeAndName column; + column.name = "x"; + column.type = std::make_shared(); + + { + MutableColumnPtr mut_col = column.type->createColumn(); + for (size_t i = 0; i < size; ++i) + mut_col->insert(i); + column.column = std::move(mut_col); + } + + block.insert(column); + return block; +} + +static size_t readAllTemporaryData(TemporaryFileStream & stream) +{ + Block block; + size_t read_rows = 0; + do + { + block = stream.read(); + read_rows += block.rows(); + } while (block); + return read_rows; +} + +TEST_F(FileCacheTest, temporaryData) +{ + DB::FileCacheSettings settings; + settings.max_size = 10_KiB; + settings.max_file_segment_size = 1_KiB; + + DB::FileCache file_cache(cache_base_path, settings); + file_cache.initialize(); + + auto tmp_data_scope = std::make_shared(nullptr, &file_cache, 0); + + auto some_data_holder = file_cache.getOrSet(file_cache.hash("some_data"), 0, 5_KiB, CreateFileSegmentSettings{}); + + { + auto segments = fromHolder(some_data_holder); + ASSERT_EQ(segments.size(), 5); + for (auto & segment : segments) + { + ASSERT_TRUE(segment->getOrSetDownloader() == DB::FileSegment::getCallerId()); + ASSERT_TRUE(segment->reserve(segment->range().size())); + download(cache_base_path, segment); + segment->completeWithoutState(); + } + } + + size_t size_used_before_temporary_data = file_cache.getUsedCacheSize(); + size_t segments_used_before_temporary_data = file_cache.getFileSegmentsNum(); + ASSERT_GT(size_used_before_temporary_data, 0); + ASSERT_GT(segments_used_before_temporary_data, 0); + + size_t size_used_with_temporary_data; + size_t segments_used_with_temporary_data; + { + auto tmp_data = std::make_unique(tmp_data_scope); + + auto & stream = tmp_data->createStream(generateBlock()); + + ASSERT_GT(stream.write(generateBlock(100)), 0); + + ASSERT_GT(file_cache.getUsedCacheSize(), 0); + ASSERT_GT(file_cache.getFileSegmentsNum(), 0); + + size_t used_size_before_attempt = file_cache.getUsedCacheSize(); + /// data can't be evicted because it is still held by `some_data_holder` + ASSERT_THROW({ + stream.write(generateBlock(2000)); + stream.flush(); + }, DB::Exception); + + ASSERT_EQ(file_cache.getUsedCacheSize(), used_size_before_attempt); + } + { + auto tmp_data = std::make_unique(tmp_data_scope); + auto & stream = tmp_data->createStream(generateBlock()); + + ASSERT_GT(stream.write(generateBlock(100)), 0); + + some_data_holder.reset(); + + stream.write(generateBlock(2000)); + + auto stat = stream.finishWriting(); + + ASSERT_TRUE(fs::exists(stream.getPath())); + ASSERT_GT(fs::file_size(stream.getPath()), 100); + + ASSERT_EQ(stat.num_rows, 2100); + ASSERT_EQ(readAllTemporaryData(stream), 2100); + + size_used_with_temporary_data = file_cache.getUsedCacheSize(); + segments_used_with_temporary_data = file_cache.getFileSegmentsNum(); + ASSERT_GT(size_used_with_temporary_data, 0); + ASSERT_GT(segments_used_with_temporary_data, 0); + } + + /// All temp data should be evicted after removing temporary files + ASSERT_LE(file_cache.getUsedCacheSize(), size_used_with_temporary_data); + ASSERT_LE(file_cache.getFileSegmentsNum(), segments_used_with_temporary_data); + + /// Some segments reserved by `some_data_holder` was eviced by temporary data + ASSERT_LE(file_cache.getUsedCacheSize(), size_used_before_temporary_data); + ASSERT_LE(file_cache.getFileSegmentsNum(), segments_used_before_temporary_data); +} diff --git a/src/Parsers/ASTColumnDeclaration.cpp b/src/Parsers/ASTColumnDeclaration.cpp index dc5651d9f14..c2396708a73 100644 --- a/src/Parsers/ASTColumnDeclaration.cpp +++ b/src/Parsers/ASTColumnDeclaration.cpp @@ -2,6 +2,7 @@ #include #include #include +#include namespace DB @@ -78,7 +79,7 @@ void ASTColumnDeclaration::formatImpl(const FormatSettings & settings, FormatSta if (default_expression) { settings.ostr << ' ' << (settings.hilite ? hilite_keyword : "") << default_specifier << (settings.hilite ? hilite_none : ""); - if (default_specifier != "EPHEMERAL" || !default_expression->as()->value.isNull()) + if (!ephemeral_default) { settings.ostr << ' '; default_expression->formatImpl(settings, state, frame); diff --git a/src/Parsers/ASTColumnDeclaration.h b/src/Parsers/ASTColumnDeclaration.h index 5ecfb859abc..2008e4f99d1 100644 --- a/src/Parsers/ASTColumnDeclaration.h +++ b/src/Parsers/ASTColumnDeclaration.h @@ -16,6 +16,7 @@ public: std::optional null_modifier; String default_specifier; ASTPtr default_expression; + bool ephemeral_default; ASTPtr comment; ASTPtr codec; ASTPtr ttl; diff --git a/src/Parsers/ASTColumnsMatcher.cpp b/src/Parsers/ASTColumnsMatcher.cpp index 0fc6847de68..124206043cf 100644 --- a/src/Parsers/ASTColumnsMatcher.cpp +++ b/src/Parsers/ASTColumnsMatcher.cpp @@ -87,7 +87,7 @@ void ASTColumnsListMatcher::updateTreeHashImpl(SipHash & hash_state) const void ASTColumnsListMatcher::appendColumnName(WriteBuffer & ostr) const { writeCString("COLUMNS(", ostr); - for (auto it = column_list->children.begin(); it != column_list->children.end(); ++it) + for (auto * it = column_list->children.begin(); it != column_list->children.end(); ++it) { if (it != column_list->children.begin()) writeCString(", ", ostr); @@ -198,7 +198,7 @@ void ASTQualifiedColumnsListMatcher::appendColumnName(WriteBuffer & ostr) const qualifier->appendColumnName(ostr); writeCString(".COLUMNS(", ostr); - for (auto it = column_list->children.begin(); it != column_list->children.end(); ++it) + for (auto * it = column_list->children.begin(); it != column_list->children.end(); ++it) { if (it != column_list->children.begin()) writeCString(", ", ostr); diff --git a/src/Parsers/ASTColumnsTransformers.cpp b/src/Parsers/ASTColumnsTransformers.cpp index 118c22b463f..16752fa115e 100644 --- a/src/Parsers/ASTColumnsTransformers.cpp +++ b/src/Parsers/ASTColumnsTransformers.cpp @@ -217,7 +217,7 @@ void ASTColumnsExceptTransformer::transform(ASTs & nodes) const for (const auto & child : children) expected_columns.insert(child->as().name()); - for (auto it = nodes.begin(); it != nodes.end();) + for (auto * it = nodes.begin(); it != nodes.end();) { if (const auto * id = it->get()->as()) { @@ -234,7 +234,7 @@ void ASTColumnsExceptTransformer::transform(ASTs & nodes) const } else { - for (auto it = nodes.begin(); it != nodes.end();) + for (auto * it = nodes.begin(); it != nodes.end();) { if (const auto * id = it->get()->as()) { diff --git a/src/Parsers/ASTFunction.cpp b/src/Parsers/ASTFunction.cpp index 9668848f0b6..4ac4bb6144e 100644 --- a/src/Parsers/ASTFunction.cpp +++ b/src/Parsers/ASTFunction.cpp @@ -383,7 +383,7 @@ void ASTFunction::appendColumnNameImpl(WriteBuffer & ostr) const if (parameters) { writeChar('(', ostr); - for (auto it = parameters->children.begin(); it != parameters->children.end(); ++it) + for (auto * it = parameters->children.begin(); it != parameters->children.end(); ++it) { if (it != parameters->children.begin()) writeCString(", ", ostr); @@ -396,7 +396,7 @@ void ASTFunction::appendColumnNameImpl(WriteBuffer & ostr) const writeChar('(', ostr); if (arguments) { - for (auto it = arguments->children.begin(); it != arguments->children.end(); ++it) + for (auto * it = arguments->children.begin(); it != arguments->children.end(); ++it) { if (it != arguments->children.begin()) writeCString(", ", ostr); diff --git a/src/Parsers/ASTIdentifier.cpp b/src/Parsers/ASTIdentifier.cpp index 341ac44b56e..8651a52f2c1 100644 --- a/src/Parsers/ASTIdentifier.cpp +++ b/src/Parsers/ASTIdentifier.cpp @@ -24,7 +24,7 @@ ASTIdentifier::ASTIdentifier(const String & short_name, ASTPtr && name_param) children.push_back(std::move(name_param)); } -ASTIdentifier::ASTIdentifier(std::vector && name_parts_, bool special, std::vector && name_params) +ASTIdentifier::ASTIdentifier(std::vector && name_parts_, bool special, ASTs && name_params) : name_parts(name_parts_), semantic(std::make_shared()) { assert(!name_parts.empty()); @@ -164,12 +164,12 @@ void ASTIdentifier::resetFullName() full_name += '.' + name_parts[i]; } -ASTTableIdentifier::ASTTableIdentifier(const String & table_name, std::vector && name_params) +ASTTableIdentifier::ASTTableIdentifier(const String & table_name, ASTs && name_params) : ASTIdentifier({table_name}, true, std::move(name_params)) { } -ASTTableIdentifier::ASTTableIdentifier(const StorageID & table_id, std::vector && name_params) +ASTTableIdentifier::ASTTableIdentifier(const StorageID & table_id, ASTs && name_params) : ASTIdentifier( table_id.database_name.empty() ? std::vector{table_id.table_name} : std::vector{table_id.database_name, table_id.table_name}, @@ -178,7 +178,7 @@ ASTTableIdentifier::ASTTableIdentifier(const StorageID & table_id, std::vector && name_params) +ASTTableIdentifier::ASTTableIdentifier(const String & database_name, const String & table_name, ASTs && name_params) : ASTIdentifier({database_name, table_name}, true, std::move(name_params)) { } diff --git a/src/Parsers/ASTIdentifier.h b/src/Parsers/ASTIdentifier.h index c9712d578e0..0e030c797ce 100644 --- a/src/Parsers/ASTIdentifier.h +++ b/src/Parsers/ASTIdentifier.h @@ -4,6 +4,7 @@ #include #include #include +#include namespace DB @@ -23,7 +24,7 @@ class ASTIdentifier : public ASTWithAlias { public: explicit ASTIdentifier(const String & short_name, ASTPtr && name_param = {}); - explicit ASTIdentifier(std::vector && name_parts, bool special = false, std::vector && name_params = {}); + explicit ASTIdentifier(std::vector && name_parts, bool special = false, ASTs && name_params = {}); /** Get the text that identifies this element. */ String getID(char delim) const override { return "Identifier" + (delim + name()); } @@ -72,9 +73,9 @@ private: class ASTTableIdentifier : public ASTIdentifier { public: - explicit ASTTableIdentifier(const String & table_name, std::vector && name_params = {}); - explicit ASTTableIdentifier(const StorageID & table_id, std::vector && name_params = {}); - ASTTableIdentifier(const String & database_name, const String & table_name, std::vector && name_params = {}); + explicit ASTTableIdentifier(const String & table_name, ASTs && name_params = {}); + explicit ASTTableIdentifier(const StorageID & table_id, ASTs && name_params = {}); + ASTTableIdentifier(const String & database_name, const String & table_name, ASTs && name_params = {}); String getID(char delim) const override { return "TableIdentifier" + (delim + name()); } ASTPtr clone() const override; diff --git a/src/Parsers/ASTSystemQuery.cpp b/src/Parsers/ASTSystemQuery.cpp index ab5137d0960..5ed77f48ceb 100644 --- a/src/Parsers/ASTSystemQuery.cpp +++ b/src/Parsers/ASTSystemQuery.cpp @@ -185,7 +185,7 @@ void ASTSystemQuery::formatImpl(const FormatSettings & settings, FormatState &, { print_identifier(database->as()->name()); } - else if (type == Type::DROP_REPLICA) + else if (type == Type::DROP_REPLICA || type == Type::DROP_DATABASE_REPLICA) { print_drop_replica(); } diff --git a/src/Parsers/ASTSystemQuery.h b/src/Parsers/ASTSystemQuery.h index 8787677fc87..664af9c65ce 100644 --- a/src/Parsers/ASTSystemQuery.h +++ b/src/Parsers/ASTSystemQuery.h @@ -37,6 +37,7 @@ public: RESTART_REPLICA, RESTORE_REPLICA, DROP_REPLICA, + DROP_DATABASE_REPLICA, SYNC_REPLICA, SYNC_DATABASE_REPLICA, SYNC_TRANSACTION_LOG, diff --git a/src/Parsers/ASTTTLElement.cpp b/src/Parsers/ASTTTLElement.cpp index 86dd85e0eb8..bb353194e8c 100644 --- a/src/Parsers/ASTTTLElement.cpp +++ b/src/Parsers/ASTTTLElement.cpp @@ -52,7 +52,7 @@ void ASTTTLElement::formatImpl(const FormatSettings & settings, FormatState & st else if (mode == TTLMode::GROUP_BY) { settings.ostr << " GROUP BY "; - for (auto it = group_by_key.begin(); it != group_by_key.end(); ++it) + for (const auto * it = group_by_key.begin(); it != group_by_key.end(); ++it) { if (it != group_by_key.begin()) settings.ostr << ", "; @@ -62,7 +62,7 @@ void ASTTTLElement::formatImpl(const FormatSettings & settings, FormatState & st if (!group_by_assignments.empty()) { settings.ostr << " SET "; - for (auto it = group_by_assignments.begin(); it != group_by_assignments.end(); ++it) + for (const auto * it = group_by_assignments.begin(); it != group_by_assignments.end(); ++it) { if (it != group_by_assignments.begin()) settings.ostr << ", "; diff --git a/src/Parsers/ExpressionElementParsers.cpp b/src/Parsers/ExpressionElementParsers.cpp index 74d14292459..c6b51fd4dfe 100644 --- a/src/Parsers/ExpressionElementParsers.cpp +++ b/src/Parsers/ExpressionElementParsers.cpp @@ -8,6 +8,7 @@ #include #include #include +#include #include #include @@ -36,6 +37,7 @@ #include #include #include +#include #include #include @@ -223,7 +225,7 @@ bool ParserCompoundIdentifier::parseImpl(Pos & pos, ASTPtr & node, Expected & ex return false; std::vector parts; - std::vector params; + ASTs params; const auto & list = id_list->as(); for (const auto & child : list.children) { @@ -986,6 +988,38 @@ bool ParserUnsignedInteger::parseImpl(Pos & pos, ASTPtr & node, Expected & expec return true; } +inline static bool makeStringLiteral(IParser::Pos & pos, ASTPtr & node, String str) +{ + auto literal = std::make_shared(str); + literal->begin = pos; + literal->end = ++pos; + node = literal; + return true; +} + +inline static bool makeHexOrBinStringLiteral(IParser::Pos & pos, ASTPtr & node, bool hex, size_t word_size) +{ + const char * str_begin = pos->begin + 2; + const char * str_end = pos->end - 1; + if (str_begin == str_end) + return makeStringLiteral(pos, node, ""); + + PODArray res; + res.resize((pos->size() + word_size) / word_size + 1); + char * res_begin = reinterpret_cast(res.data()); + char * res_pos = res_begin; + + if (hex) + { + hexStringDecode(str_begin, str_end, res_pos); + } + else + { + binStringDecode(str_begin, str_end, res_pos); + } + + return makeStringLiteral(pos, node, String(reinterpret_cast(res.data()), (res_pos - res_begin - 1))); +} bool ParserStringLiteral::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { @@ -996,6 +1030,18 @@ bool ParserStringLiteral::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte if (pos->type == TokenType::StringLiteral) { + if (*pos->begin == 'x' || *pos->begin == 'X') + { + constexpr size_t word_size = 2; + return makeHexOrBinStringLiteral(pos, node, true, word_size); + } + + if (*pos->begin == 'b' || *pos->begin == 'B') + { + constexpr size_t word_size = 8; + return makeHexOrBinStringLiteral(pos, node, false, word_size); + } + ReadBufferFromMemory in(pos->begin, pos->size()); try @@ -1022,11 +1068,7 @@ bool ParserStringLiteral::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte s = String(pos->begin + heredoc_size, pos->size() - heredoc_size * 2); } - auto literal = std::make_shared(s); - literal->begin = pos; - literal->end = ++pos; - node = literal; - return true; + return makeStringLiteral(pos, node, s); } template @@ -1128,36 +1170,42 @@ class ICollection { public: virtual ~ICollection() = default; - virtual bool parse(IParser::Pos & pos, Collections & collections, ASTPtr & node, Expected & expected) = 0; + virtual bool parse(IParser::Pos & pos, Collections & collections, ASTPtr & node, Expected & expected, bool allow_map) = 0; }; template class CommonCollection : public ICollection { public: - bool parse(IParser::Pos & pos, Collections & collections, ASTPtr & node, Expected & expected) override; + explicit CommonCollection(const IParser::Pos & pos) : begin(pos) {} + + bool parse(IParser::Pos & pos, Collections & collections, ASTPtr & node, Expected & expected, bool allow_map) override; private: Container container; + IParser::Pos begin; }; class MapCollection : public ICollection { public: - bool parse(IParser::Pos & pos, Collections & collections, ASTPtr & node, Expected & expected) override; + explicit MapCollection(const IParser::Pos & pos) : begin(pos) {} + + bool parse(IParser::Pos & pos, Collections & collections, ASTPtr & node, Expected & expected, bool allow_map) override; private: Map container; + IParser::Pos begin; }; -bool parseAllCollectionsStart(IParser::Pos & pos, Collections & collections, Expected & /*expected*/) +bool parseAllCollectionsStart(IParser::Pos & pos, Collections & collections, Expected & /*expected*/, bool allow_map) { - if (pos->type == TokenType::OpeningCurlyBrace) - collections.push_back(std::make_unique()); + if (allow_map && pos->type == TokenType::OpeningCurlyBrace) + collections.push_back(std::make_unique(pos)); else if (pos->type == TokenType::OpeningRoundBracket) - collections.push_back(std::make_unique>()); + collections.push_back(std::make_unique>(pos)); else if (pos->type == TokenType::OpeningSquareBracket) - collections.push_back(std::make_unique>()); + collections.push_back(std::make_unique>(pos)); else return false; @@ -1166,7 +1214,7 @@ bool parseAllCollectionsStart(IParser::Pos & pos, Collections & collections, Exp } template -bool CommonCollection::parse(IParser::Pos & pos, Collections & collections, ASTPtr & node, Expected & expected) +bool CommonCollection::parse(IParser::Pos & pos, Collections & collections, ASTPtr & node, Expected & expected, bool allow_map) { if (node) { @@ -1183,23 +1231,27 @@ bool CommonCollection::parse(IParser::Pos & pos, Collectio { if (end_p.ignore(pos, expected)) { - node = std::make_shared(std::move(container)); + auto result = std::make_shared(std::move(container)); + result->begin = begin; + result->end = pos; + + node = std::move(result); break; } if (!container.empty() && !comma_p.ignore(pos, expected)) - return false; + return false; if (literal_p.parse(pos, literal, expected)) container.push_back(std::move(literal->as().value)); else - return parseAllCollectionsStart(pos, collections, expected); + return parseAllCollectionsStart(pos, collections, expected, allow_map); } return true; } -bool MapCollection::parse(IParser::Pos & pos, Collections & collections, ASTPtr & node, Expected & expected) +bool MapCollection::parse(IParser::Pos & pos, Collections & collections, ASTPtr & node, Expected & expected, bool allow_map) { if (node) { @@ -1217,7 +1269,11 @@ bool MapCollection::parse(IParser::Pos & pos, Collections & collections, ASTPtr { if (end_p.ignore(pos, expected)) { - node = std::make_shared(std::move(container)); + auto result = std::make_shared(std::move(container)); + result->begin = begin; + result->end = pos; + + node = std::move(result); break; } @@ -1235,7 +1291,7 @@ bool MapCollection::parse(IParser::Pos & pos, Collections & collections, ASTPtr if (literal_p.parse(pos, literal, expected)) container.push_back(std::move(literal->as().value)); else - return parseAllCollectionsStart(pos, collections, expected); + return parseAllCollectionsStart(pos, collections, expected, allow_map); } return true; @@ -1248,12 +1304,12 @@ bool ParserAllCollectionsOfLiterals::parseImpl(Pos & pos, ASTPtr & node, Expecte { Collections collections; - if (!parseAllCollectionsStart(pos, collections, expected)) + if (!parseAllCollectionsStart(pos, collections, expected, allow_map)) return false; while (!collections.empty()) { - if (!collections.back()->parse(pos, collections, node, expected)) + if (!collections.back()->parse(pos, collections, node, expected, allow_map)) return false; if (node) diff --git a/src/Parsers/ExpressionElementParsers.h b/src/Parsers/ExpressionElementParsers.h index 8e328db976b..cc88faf2653 100644 --- a/src/Parsers/ExpressionElementParsers.h +++ b/src/Parsers/ExpressionElementParsers.h @@ -307,9 +307,14 @@ protected: class ParserAllCollectionsOfLiterals : public IParserBase { public: + explicit ParserAllCollectionsOfLiterals(bool allow_map_ = true) : allow_map(allow_map_) {} + protected: const char * getName() const override { return "combination of maps, arrays, tuples"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; + +private: + bool allow_map; }; diff --git a/src/Parsers/ExpressionListParsers.cpp b/src/Parsers/ExpressionListParsers.cpp index cbbee4a04e3..01955c2c05a 100644 --- a/src/Parsers/ExpressionListParsers.cpp +++ b/src/Parsers/ExpressionListParsers.cpp @@ -46,16 +46,15 @@ bool ParserList::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) if (!elem_parser->parse(pos, element, expected)) return false; - elements.push_back(element); + elements.push_back(std::move(element)); return true; }; if (!parseUtil(pos, expected, parse_element, *separator_parser, allow_empty)) return false; - auto list = std::make_shared(result_separator); - list->children = std::move(elements); - node = list; + node = std::make_shared(result_separator); + node->children = std::move(elements); return true; } @@ -76,7 +75,7 @@ bool ParserUnionList::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) if (!elem_parser.parse(pos, element, expected)) return false; - elements.push_back(element); + elements.push_back(std::move(element)); return true; }; @@ -120,9 +119,8 @@ bool ParserUnionList::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) if (!parseUtil(pos, parse_element, parse_separator)) return false; - auto list = std::make_shared(); - list->children = std::move(elements); - node = list; + node = std::make_shared(); + node->children = std::move(elements); return true; } @@ -242,7 +240,7 @@ bool ParserLeftAssociativeBinaryOperatorList::parseImpl(Pos & pos, ASTPtr & node if (!elem_parser->parse(pos, elem, expected)) return false; - node = elem; + node = std::move(elem); first = false; } else @@ -607,7 +605,7 @@ public: asts.reserve(asts.size() + n); - auto start = operands.begin() + operands.size() - n; + auto * start = operands.begin() + operands.size() - n; asts.insert(asts.end(), std::make_move_iterator(start), std::make_move_iterator(operands.end())); operands.erase(start, operands.end()); @@ -701,7 +699,7 @@ public: /// 2. If there is already tuple do nothing if (tryGetFunctionName(elements.back()) == "tuple") { - pushOperand(elements.back()); + pushOperand(std::move(elements.back())); elements.pop_back(); } /// 3. Put all elements in a single tuple @@ -711,6 +709,19 @@ public: elements.clear(); pushOperand(function); } + + /// We must check that tuple arguments are identifiers + auto * func_ptr = operands.back()->as(); + auto * args_ptr = func_ptr->arguments->as(); + + for (const auto & child : args_ptr->children) + { + if (typeid_cast(child.get())) + continue; + + return false; + } + return true; } @@ -1064,9 +1075,7 @@ public: is_tuple = true; // Special case for f(x, (y) -> z) = f(x, tuple(y) -> z) - auto test_pos = pos; - auto test_expected = expected; - if (parseOperator(test_pos, "->", test_expected)) + if (pos->type == TokenType::Arrow) is_tuple = true; } @@ -1448,7 +1457,7 @@ public: return false; auto subquery = std::make_shared(); - subquery->children.push_back(node); + subquery->children.push_back(std::move(node)); elements = {makeASTFunction("exists", subquery)}; finished = true; @@ -1734,6 +1743,29 @@ private: bool parsed_interval_kind = false; }; +class TupleLayer : public LayerWithSeparator +{ +public: + bool parse(IParser::Pos & pos, Expected & expected, Action & action) override + { + bool result = LayerWithSeparator::parse(pos, expected, action); + + /// Check that after the tuple() function there is no lambdas operator + if (finished && pos->type == TokenType::Arrow) + return false; + + return result; + } + +protected: + bool getResultImpl(ASTPtr & node) override + { + node = makeASTFunction("tuple", std::move(elements)); + return true; + } +}; + + class IntervalLayer : public Layer { public: @@ -2037,6 +2069,9 @@ std::unique_ptr getFunctionLayer(ASTPtr identifier, bool is_table_functio return std::make_unique(true); } + if (function_name == "tuple") + return std::make_unique(); + if (function_name_lowercase == "cast") return std::make_unique(); else if (function_name_lowercase == "extract") @@ -2361,6 +2396,7 @@ Action ParserExpressionImpl::tryParseOperand(Layers & layers, IParser::Pos & pos if (layers.back()->previousType() == OperatorType::Comparison) { + auto old_pos = pos; SubqueryFunctionType subquery_function_type = SubqueryFunctionType::NONE; if (any_parser.ignore(pos, expected) && subquery_parser.parse(pos, tmp, expected)) @@ -2386,6 +2422,10 @@ Action ParserExpressionImpl::tryParseOperand(Layers & layers, IParser::Pos & pos layers.back()->pushOperand(std::move(function)); return Action::OPERATOR; } + else + { + pos = old_pos; + } } /// Try to find any unary operators diff --git a/src/Parsers/IAST_fwd.h b/src/Parsers/IAST_fwd.h index 18ba79d6618..53d41d42d65 100644 --- a/src/Parsers/IAST_fwd.h +++ b/src/Parsers/IAST_fwd.h @@ -1,13 +1,36 @@ #pragma once +#include #include -#include +#include namespace DB { class IAST; using ASTPtr = std::shared_ptr; -using ASTs = std::vector; +/// sizeof(absl::InlinedVector) == 8 + N * 16. +/// 7 elements take 120 Bytes which is ~128 +using ASTs = absl::InlinedVector; + +} + +namespace std +{ + +inline typename DB::ASTs::size_type erase(DB::ASTs & asts, const DB::ASTPtr & element) +{ + auto old_size = asts.size(); + asts.erase(std::remove(asts.begin(), asts.end(), element), asts.end()); + return old_size - asts.size(); +} + +template +inline typename DB::ASTs::size_type erase_if(DB::ASTs & asts, Predicate pred) +{ + auto old_size = asts.size(); + asts.erase(std::remove_if(asts.begin(), asts.end(), pred), asts.end()); + return old_size - asts.size(); +} } diff --git a/src/Parsers/IParser.h b/src/Parsers/IParser.h index 4e6dbca15a6..466cdf7a4b1 100644 --- a/src/Parsers/IParser.h +++ b/src/Parsers/IParser.h @@ -1,7 +1,7 @@ #pragma once +#include #include -#include #include #include @@ -24,8 +24,8 @@ namespace ErrorCodes */ struct Expected { + absl::InlinedVector variants; const char * max_parsed_pos = nullptr; - std::vector variants; /// 'description' should be statically allocated string. ALWAYS_INLINE void add(const char * current_pos, const char * description) @@ -38,7 +38,7 @@ struct Expected return; } - if ((current_pos == max_parsed_pos) && (find(variants.begin(), variants.end(), description) == variants.end())) + if ((current_pos == max_parsed_pos) && (std::find(variants.begin(), variants.end(), description) == variants.end())) variants.push_back(description); } @@ -64,6 +64,8 @@ public: { } + Pos(TokenIterator token_iterator_, uint32_t max_depth_) : TokenIterator(token_iterator_), max_depth(max_depth_) { } + ALWAYS_INLINE void increaseDepth() { ++depth; diff --git a/src/Parsers/Lexer.cpp b/src/Parsers/Lexer.cpp index 6bd27ee62ae..be67807ad8f 100644 --- a/src/Parsers/Lexer.cpp +++ b/src/Parsers/Lexer.cpp @@ -1,3 +1,4 @@ +#include #include #include #include @@ -44,6 +45,36 @@ Token quotedString(const char *& pos, const char * const token_begin, const char } } +Token quotedHexOrBinString(const char *& pos, const char * const token_begin, const char * const end) +{ + constexpr char quote = '\''; + + assert(pos[1] == quote); + + bool hex = (*pos == 'x' || *pos == 'X'); + + pos += 2; + + if (hex) + { + while (pos < end && isHexDigit(*pos)) + ++pos; + } + else + { + pos = find_first_not_symbols<'0', '1'>(pos, end); + } + + if (pos >= end || *pos != quote) + { + pos = end; + return Token(TokenType::ErrorSingleQuoteIsNotClosed, token_begin, end); + } + + ++pos; + return Token(TokenType::StringLiteral, token_begin, pos); +} + } @@ -420,6 +451,12 @@ Token Lexer::nextTokenImpl() return Token(TokenType::DollarSign, token_begin, ++pos); } } + + if (pos + 2 < end && pos[1] == '\'' && (*pos == 'x' || *pos == 'b' || *pos == 'X' || *pos == 'B')) + { + return quotedHexOrBinString(pos, token_begin, end); + } + if (isWordCharASCII(*pos) || *pos == '$') { ++pos; diff --git a/src/Parsers/ParserCreateQuery.h b/src/Parsers/ParserCreateQuery.h index e97033c51f0..ef87988aab2 100644 --- a/src/Parsers/ParserCreateQuery.h +++ b/src/Parsers/ParserCreateQuery.h @@ -1,5 +1,6 @@ #pragma once +#include #include #include #include @@ -170,6 +171,7 @@ bool IParserColumnDeclaration::parseImpl(Pos & pos, ASTPtr & node, E ASTPtr type; String default_specifier; std::optional null_modifier; + bool ephemeral_default = false; ASTPtr default_expression; ASTPtr comment_expression; ASTPtr codec_expression; @@ -235,8 +237,16 @@ bool IParserColumnDeclaration::parseImpl(Pos & pos, ASTPtr & node, E else if (s_ephemeral.ignore(pos, expected)) { default_specifier = s_ephemeral.getName(); - if (!literal_parser.parse(pos, default_expression, expected) && type) - default_expression = std::make_shared(Field()); + if (!expr_parser.parse(pos, default_expression, expected) && type) + { + ephemeral_default = true; + + auto default_function = std::make_shared(); + default_function->name = "defaultValueOfTypeName"; + default_function->arguments = std::make_shared(); + default_function->arguments->children.emplace_back(std::make_shared(type->as()->formatWithSecretsHidden())); + default_expression = default_function; + } if (!default_expression && !type) return false; @@ -302,6 +312,7 @@ bool IParserColumnDeclaration::parseImpl(Pos & pos, ASTPtr & node, E column_declaration->default_specifier = default_specifier; if (default_expression) { + column_declaration->ephemeral_default = ephemeral_default; column_declaration->default_expression = default_expression; column_declaration->children.push_back(std::move(default_expression)); } diff --git a/src/Parsers/ParserDataType.cpp b/src/Parsers/ParserDataType.cpp index a1a24c40ac2..3e2a6facac6 100644 --- a/src/Parsers/ParserDataType.cpp +++ b/src/Parsers/ParserDataType.cpp @@ -27,7 +27,7 @@ private: { ParserNestedTable nested_parser; ParserDataType data_type_parser; - ParserLiteral literal_parser; + ParserAllCollectionsOfLiterals literal_parser(false); const char * operators[] = {"=", "equals", nullptr}; ParserLeftAssociativeBinaryOperatorList enum_parser(operators, std::make_unique()); @@ -145,4 +145,3 @@ bool ParserDataType::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) } } - diff --git a/src/Parsers/ParserSystemQuery.cpp b/src/Parsers/ParserSystemQuery.cpp index 26ba9290d04..a44516fc4a3 100644 --- a/src/Parsers/ParserSystemQuery.cpp +++ b/src/Parsers/ParserSystemQuery.cpp @@ -17,7 +17,7 @@ namespace ErrorCodes namespace DB { -static bool parseQueryWithOnClusterAndMaybeTable(std::shared_ptr & res, IParser::Pos & pos, +[[nodiscard]] static bool parseQueryWithOnClusterAndMaybeTable(std::shared_ptr & res, IParser::Pos & pos, Expected & expected, bool require_table, bool allow_string_literal) { /// Better form for user: SYSTEM table ON CLUSTER cluster @@ -68,10 +68,10 @@ enum class SystemQueryTargetType { Model, Function, - Disk + Disk, }; -static bool parseQueryWithOnClusterAndTarget(std::shared_ptr & res, IParser::Pos & pos, Expected & expected, SystemQueryTargetType target_type) +[[nodiscard]] static bool parseQueryWithOnClusterAndTarget(std::shared_ptr & res, IParser::Pos & pos, Expected & expected, SystemQueryTargetType target_type) { /// Better form for user: SYSTEM target_name ON CLUSTER cluster /// Query rewritten form + form while executing on cluster: SYSTEM ON CLUSTER cluster target_name @@ -136,7 +136,7 @@ static bool parseQueryWithOnClusterAndTarget(std::shared_ptr & r return true; } -static bool parseQueryWithOnCluster(std::shared_ptr & res, IParser::Pos & pos, +[[nodiscard]] static bool parseQueryWithOnCluster(std::shared_ptr & res, IParser::Pos & pos, Expected & expected) { String cluster_str; @@ -150,6 +150,49 @@ static bool parseQueryWithOnCluster(std::shared_ptr & res, IPars return true; } +[[nodiscard]] static bool parseDropReplica(std::shared_ptr & res, IParser::Pos & pos, Expected & expected, bool database) +{ + if (!parseQueryWithOnCluster(res, pos, expected)) + return false; + + ASTPtr ast; + if (!ParserStringLiteral{}.parse(pos, ast, expected)) + return false; + res->replica = ast->as().value.safeGet(); + if (ParserKeyword{"FROM"}.ignore(pos, expected)) + { + // way 1. parse replica database + // way 2. parse replica table + // way 3. parse replica zkpath + if (ParserKeyword{"DATABASE"}.ignore(pos, expected)) + { + ParserIdentifier database_parser; + if (!database_parser.parse(pos, res->database, expected)) + return false; + } + else if (!database && ParserKeyword{"TABLE"}.ignore(pos, expected)) + { + parseDatabaseAndTableAsAST(pos, expected, res->database, res->table); + } + else if (ParserKeyword{"ZKPATH"}.ignore(pos, expected)) + { + ASTPtr path_ast; + if (!ParserStringLiteral{}.parse(pos, path_ast, expected)) + return false; + String zk_path = path_ast->as().value.safeGet(); + if (!zk_path.empty() && zk_path[zk_path.size() - 1] == '/') + zk_path.pop_back(); + res->replica_zk_path = zk_path; + } + else + return false; + } + else + res->is_drop_whole_replica = true; + + return true; +} + bool ParserSystemQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expected & expected) { if (!ParserKeyword{"SYSTEM"}.ignore(pos, expected)) @@ -194,52 +237,25 @@ bool ParserSystemQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expected & return false; break; } + case Type::DROP_REPLICA: { - parseQueryWithOnCluster(res, pos, expected); - - ASTPtr ast; - if (!ParserStringLiteral{}.parse(pos, ast, expected)) + if (!parseDropReplica(res, pos, expected, /* database */ false)) + return false; + break; + } + case Type::DROP_DATABASE_REPLICA: + { + if (!parseDropReplica(res, pos, expected, /* database */ true)) return false; - res->replica = ast->as().value.safeGet(); - if (ParserKeyword{"FROM"}.ignore(pos, expected)) - { - // way 1. parse replica database - // way 2. parse replica tables - // way 3. parse replica zkpath - if (ParserKeyword{"DATABASE"}.ignore(pos, expected)) - { - ParserIdentifier database_parser; - if (!database_parser.parse(pos, res->database, expected)) - return false; - } - else if (ParserKeyword{"TABLE"}.ignore(pos, expected)) - { - parseDatabaseAndTableAsAST(pos, expected, res->database, res->table); - } - else if (ParserKeyword{"ZKPATH"}.ignore(pos, expected)) - { - ASTPtr path_ast; - if (!ParserStringLiteral{}.parse(pos, path_ast, expected)) - return false; - String zk_path = path_ast->as().value.safeGet(); - if (!zk_path.empty() && zk_path[zk_path.size() - 1] == '/') - zk_path.pop_back(); - res->replica_zk_path = zk_path; - } - else - return false; - } - else - res->is_drop_whole_replica = true; - break; } case Type::RESTART_REPLICA: case Type::SYNC_REPLICA: { - parseQueryWithOnCluster(res, pos, expected); + if (!parseQueryWithOnCluster(res, pos, expected)) + return false; if (!parseDatabaseAndTableAsAST(pos, expected, res->database, res->table)) return false; break; @@ -247,19 +263,18 @@ bool ParserSystemQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expected & case Type::SYNC_DATABASE_REPLICA: { - parseQueryWithOnCluster(res, pos, expected); + if (!parseQueryWithOnCluster(res, pos, expected)) + return false; if (!parseDatabaseAsAST(pos, expected, res->database)) return false; break; } - case Type::RESTART_DISK: { if (!parseQueryWithOnClusterAndTarget(res, pos, expected, SystemQueryTargetType::Disk)) return false; break; } - /// FLUSH DISTRIBUTED requires table /// START/STOP DISTRIBUTED SENDS does not require table case Type::STOP_DISTRIBUTED_SENDS: @@ -310,7 +325,8 @@ bool ParserSystemQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expected & } else { - parseQueryWithOnCluster(res, pos, expected); + if (!parseQueryWithOnCluster(res, pos, expected)) + return false; if (ParserKeyword{"ON VOLUME"}.ignore(pos, expected)) { if (!parse_on_volume()) @@ -335,13 +351,15 @@ bool ParserSystemQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expected & case Type::START_REPLICATED_SENDS: case Type::STOP_REPLICATION_QUEUES: case Type::START_REPLICATION_QUEUES: - parseQueryWithOnCluster(res, pos, expected); + if (!parseQueryWithOnCluster(res, pos, expected)) + return false; parseDatabaseAndTableAsAST(pos, expected, res->database, res->table); break; case Type::SUSPEND: { - parseQueryWithOnCluster(res, pos, expected); + if (!parseQueryWithOnCluster(res, pos, expected)) + return false; ASTPtr seconds; if (!(ParserKeyword{"FOR"}.ignore(pos, expected) @@ -360,7 +378,8 @@ bool ParserSystemQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expected & ASTPtr ast; if (path_parser.parse(pos, ast, expected)) res->filesystem_cache_path = ast->as()->value.safeGet(); - parseQueryWithOnCluster(res, pos, expected); + if (!parseQueryWithOnCluster(res, pos, expected)) + return false; break; } case Type::DROP_SCHEMA_CACHE: @@ -397,7 +416,8 @@ bool ParserSystemQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expected & default: { - parseQueryWithOnCluster(res, pos, expected); + if (!parseQueryWithOnCluster(res, pos, expected)) + return false; break; } } diff --git a/src/Parsers/TokenIterator.cpp b/src/Parsers/TokenIterator.cpp index 08877e0b2fe..6633ddb9563 100644 --- a/src/Parsers/TokenIterator.cpp +++ b/src/Parsers/TokenIterator.cpp @@ -4,6 +4,20 @@ namespace DB { +Tokens::Tokens(const char * begin, const char * end, size_t max_query_size) +{ + Lexer lexer(begin, end, max_query_size); + + bool stop = false; + do + { + Token token = lexer.nextToken(); + stop = token.isEnd() || token.type == TokenType::ErrorMaxQuerySizeExceeded; + if (token.isSignificant()) + data.emplace_back(std::move(token)); + } while (!stop); +} + UnmatchedParentheses checkUnmatchedParentheses(TokenIterator begin) { /// We have just two kind of parentheses: () and []. diff --git a/src/Parsers/TokenIterator.h b/src/Parsers/TokenIterator.h index b84bec57817..c9ac61dfef9 100644 --- a/src/Parsers/TokenIterator.h +++ b/src/Parsers/TokenIterator.h @@ -3,6 +3,7 @@ #include #include +#include #include @@ -20,34 +21,19 @@ class Tokens { private: std::vector data; - Lexer lexer; + std::size_t last_accessed_index = 0; public: - Tokens(const char * begin, const char * end, size_t max_query_size = 0) : lexer(begin, end, max_query_size) {} + Tokens(const char * begin, const char * end, size_t max_query_size = 0); - const Token & operator[] (size_t index) + ALWAYS_INLINE inline const Token & operator[](size_t index) { - while (true) - { - if (index < data.size()) - return data[index]; - - if (!data.empty() && data.back().isEnd()) - return data.back(); - - Token token = lexer.nextToken(); - - if (token.isSignificant()) - data.emplace_back(token); - } + assert(index < data.size()); + last_accessed_index = std::max(last_accessed_index, index); + return data[index]; } - const Token & max() - { - if (data.empty()) - return (*this)[0]; - return data.back(); - } + ALWAYS_INLINE inline const Token & max() { return data[last_accessed_index]; } }; diff --git a/src/Parsers/parseQuery.cpp b/src/Parsers/parseQuery.cpp index 4a0c60da48d..da8450ac301 100644 --- a/src/Parsers/parseQuery.cpp +++ b/src/Parsers/parseQuery.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -263,7 +264,19 @@ ASTPtr tryParseQuery( ASTInsertQuery * insert = nullptr; if (parse_res) - insert = res->as(); + { + if (auto * explain = res->as()) + { + if (auto explained_query = explain->getExplainedQuery()) + { + insert = explained_query->as(); + } + } + else + { + insert = res->as(); + } + } // If parsed query ends at data for insertion. Data for insertion could be // in any format and not necessary be lexical correct, so we can't perform diff --git a/src/Planner/Planner.cpp b/src/Planner/Planner.cpp index d88766f3656..6b2de30722c 100644 --- a/src/Planner/Planner.cpp +++ b/src/Planner/Planner.cpp @@ -349,8 +349,8 @@ void Planner::buildQueryPlanIfNeeded() { auto function_node = std::make_shared("and"); auto and_function = FunctionFactory::instance().get("and", query_context); - function_node->resolveAsFunction(std::move(and_function), std::make_shared()); function_node->getArguments().getNodes() = {query_node.getPrewhere(), query_node.getWhere()}; + function_node->resolveAsFunction(and_function->build(function_node->getArgumentTypes())); query_node.getWhere() = std::move(function_node); query_node.getPrewhere() = {}; } diff --git a/src/Planner/PlannerActionsVisitor.cpp b/src/Planner/PlannerActionsVisitor.cpp index aa1b61e5559..95edd93dd9f 100644 --- a/src/Planner/PlannerActionsVisitor.cpp +++ b/src/Planner/PlannerActionsVisitor.cpp @@ -121,7 +121,8 @@ public: return node; } - const ActionsDAG::Node * addFunctionIfNecessary(const std::string & node_name, ActionsDAG::NodeRawConstPtrs children, FunctionOverloadResolverPtr function) + template + const ActionsDAG::Node * addFunctionIfNecessary(const std::string & node_name, ActionsDAG::NodeRawConstPtrs children, FunctionOrOverloadResolver function) { auto it = node_name_to_node.find(node_name); if (it != node_name_to_node.end()) @@ -325,6 +326,7 @@ PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::vi lambda_actions, captured_column_names, lambda_arguments_names_and_types, result_type, lambda_expression_node_name); actions_stack.pop_back(); + // TODO: Pass IFunctionBase here not FunctionCaptureOverloadResolver. actions_stack[level].addFunctionIfNecessary(lambda_node_name, std::move(lambda_children), std::move(function_capture)); size_t actions_stack_size = actions_stack.size(); diff --git a/src/Planner/PlannerAggregation.cpp b/src/Planner/PlannerAggregation.cpp index a1a8b54426a..05e7b5418e3 100644 --- a/src/Planner/PlannerAggregation.cpp +++ b/src/Planner/PlannerAggregation.cpp @@ -101,14 +101,14 @@ public: { auto grouping_ordinary_function = std::make_shared(arguments_indexes, force_grouping_standard_compatibility); auto grouping_ordinary_function_adaptor = std::make_shared(std::move(grouping_ordinary_function)); - function_node->resolveAsFunction(std::move(grouping_ordinary_function_adaptor), std::make_shared()); + function_node->resolveAsFunction(grouping_ordinary_function_adaptor->build({})); break; } case GroupByKind::ROLLUP: { auto grouping_rollup_function = std::make_shared(arguments_indexes, aggregation_keys_size, force_grouping_standard_compatibility); auto grouping_rollup_function_adaptor = std::make_shared(std::move(grouping_rollup_function)); - function_node->resolveAsFunction(std::move(grouping_rollup_function_adaptor), std::make_shared()); + function_node->resolveAsFunction(grouping_rollup_function_adaptor->build({})); function_node->getArguments().getNodes().push_back(std::move(grouping_set_argument_column)); break; } @@ -116,7 +116,7 @@ public: { auto grouping_cube_function = std::make_shared(arguments_indexes, aggregation_keys_size, force_grouping_standard_compatibility); auto grouping_cube_function_adaptor = std::make_shared(std::move(grouping_cube_function)); - function_node->resolveAsFunction(std::move(grouping_cube_function_adaptor), std::make_shared()); + function_node->resolveAsFunction(grouping_cube_function_adaptor->build({})); function_node->getArguments().getNodes().push_back(std::move(grouping_set_argument_column)); break; } @@ -124,7 +124,7 @@ public: { auto grouping_grouping_sets_function = std::make_shared(arguments_indexes, grouping_sets_keys_indices, force_grouping_standard_compatibility); auto grouping_grouping_sets_function_adaptor = std::make_shared(std::move(grouping_grouping_sets_function)); - function_node->resolveAsFunction(std::move(grouping_grouping_sets_function_adaptor), std::make_shared()); + function_node->resolveAsFunction(grouping_grouping_sets_function_adaptor->build({})); function_node->getArguments().getNodes().push_back(std::move(grouping_set_argument_column)); break; } diff --git a/src/Planner/PlannerExpressionAnalysis.cpp b/src/Planner/PlannerExpressionAnalysis.cpp index 9db268512be..91a04b090fc 100644 --- a/src/Planner/PlannerExpressionAnalysis.cpp +++ b/src/Planner/PlannerExpressionAnalysis.cpp @@ -65,7 +65,7 @@ std::optional analyzeAggregation(QueryTreeNodePtr & q ColumnsWithTypeAndName aggregates_columns; aggregates_columns.reserve(aggregates_descriptions.size()); for (auto & aggregate_description : aggregates_descriptions) - aggregates_columns.emplace_back(nullptr, aggregate_description.function->getReturnType(), aggregate_description.column_name); + aggregates_columns.emplace_back(nullptr, aggregate_description.function->getResultType(), aggregate_description.column_name); Names aggregation_keys; @@ -284,7 +284,7 @@ std::optional analyzeWindow(QueryTreeNodePtr & query_tree, for (auto & window_description : window_descriptions) for (auto & window_function : window_description.window_functions) - window_functions_additional_columns.emplace_back(nullptr, window_function.aggregate_function->getReturnType(), window_function.column_name); + window_functions_additional_columns.emplace_back(nullptr, window_function.aggregate_function->getResultType(), window_function.column_name); auto before_window_step = std::make_unique(before_window_actions, ActionsChainStep::AvailableOutputColumnsStrategy::ALL_NODES, diff --git a/src/Processors/Formats/IOutputFormat.h b/src/Processors/Formats/IOutputFormat.h index 1f21537c6c6..aaa14ac227f 100644 --- a/src/Processors/Formats/IOutputFormat.h +++ b/src/Processors/Formats/IOutputFormat.h @@ -76,6 +76,15 @@ public: void doNotWritePrefix() { need_write_prefix = false; } + /// Reset the statistics watch to a specific point in time + /// If set to not running it will stop on the call (elapsed = now() - given start) + void setStartTime(UInt64 start, bool is_running) + { + statistics.watch = Stopwatch(CLOCK_MONOTONIC, start, true); + if (!is_running) + statistics.watch.stop(); + } + protected: friend class ParallelFormattingOutputFormat; @@ -132,9 +141,6 @@ protected: Chunk extremes; }; - void setOutsideStatistics(Statistics statistics_) { statistics = std::make_shared(std::move(statistics_)); } - std::shared_ptr getOutsideStatistics() const { return statistics; } - /// In some formats the way we print extremes depends on /// were totals printed or not. In this case in parallel formatting /// we should notify underling format if totals were printed. @@ -160,10 +166,10 @@ protected: bool need_write_suffix = true; RowsBeforeLimitCounterPtr rows_before_limit_counter; + Statistics statistics; private: size_t rows_read_before = 0; - std::shared_ptr statistics = nullptr; bool are_totals_written = false; /// Counters for consumed chunks. Are used for QueryLog. diff --git a/src/Processors/Formats/ISchemaReader.cpp b/src/Processors/Formats/ISchemaReader.cpp index a26ed6b0b40..4599cdb8748 100644 --- a/src/Processors/Formats/ISchemaReader.cpp +++ b/src/Processors/Formats/ISchemaReader.cpp @@ -1,6 +1,5 @@ #include -#include -#include +#include #include #include #include @@ -11,65 +10,29 @@ namespace DB namespace ErrorCodes { extern const int ONLY_NULLS_WHILE_READING_SCHEMA; - extern const int TYPE_MISMATCH; extern const int INCORRECT_DATA; extern const int EMPTY_DATA_PASSED; extern const int BAD_ARGUMENTS; } -void chooseResultColumnType( - DataTypePtr & type, - DataTypePtr & new_type, - std::function transform_types_if_needed, - const DataTypePtr & default_type, - const String & column_name, - size_t row) +void checkFinalInferredType(DataTypePtr & type, const String & name, const FormatSettings & settings, const DataTypePtr & default_type, size_t rows_read) { - if (!type) - { - type = new_type; - return; - } - - if (!new_type || type->equals(*new_type)) - return; - - transform_types_if_needed(type, new_type); - if (type->equals(*new_type)) - return; - - /// If the new type and the previous type for this column are different, - /// we will use default type if we have it or throw an exception. - if (default_type) - type = default_type; - else - { - throw Exception( - ErrorCodes::TYPE_MISMATCH, - "Automatically defined type {} for column '{}' in row {} differs from type defined by previous rows: {}. " - "You can specify the type for this column using setting schema_inference_hints", - type->getName(), - column_name, - row, - new_type->getName()); - } -} - -void checkResultColumnTypeAndAppend(NamesAndTypesList & result, DataTypePtr & type, const String & name, const DataTypePtr & default_type, size_t rows_read) -{ - if (!type) + if (!checkIfTypeIsComplete(type)) { if (!default_type) throw Exception( ErrorCodes::ONLY_NULLS_WHILE_READING_SCHEMA, "Cannot determine type for column '{}' by first {} rows of data, most likely this column contains only Nulls or empty " - "Arrays/Maps. You can specify the type for this column using setting schema_inference_hints", + "Arrays/Maps. You can specify the type for this column using setting schema_inference_hints. " + "If your data contains complex JSON objects, try enabling one of the settings allow_experimental_object_type/input_format_json_read_objects_as_strings", name, rows_read); type = default_type; } - result.emplace_back(name, type); + + if (settings.schema_inference_make_columns_nullable) + type = makeNullableRecursively(type); } IIRowSchemaReader::IIRowSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings_, DataTypePtr default_type_) @@ -88,6 +51,11 @@ void IIRowSchemaReader::setContext(ContextPtr & context) } } +void IIRowSchemaReader::transformTypesIfNeeded(DataTypePtr & type, DataTypePtr & new_type) +{ + transformInferredTypesIfNeeded(type, new_type, format_settings); +} + IRowSchemaReader::IRowSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings_) : IIRowSchemaReader(in_, format_settings_), column_names(splitColumnNames(format_settings.column_names_for_schema_inference)) { @@ -160,23 +128,28 @@ NamesAndTypesList IRowSchemaReader::readSchema() if (new_data_types.size() != data_types.size()) throw Exception(ErrorCodes::INCORRECT_DATA, "Rows have different amount of values"); - for (size_t i = 0; i != data_types.size(); ++i) + for (field_index = 0; field_index != data_types.size(); ++field_index) { /// Check if we couldn't determine the type of this column in a new row /// or the type for this column was taken from hints. - if (!new_data_types[i] || hints.contains(column_names[i])) + if (!new_data_types[field_index] || hints.contains(column_names[field_index])) continue; - auto transform_types_if_needed = [&](DataTypePtr & type, DataTypePtr & new_type){ transformTypesIfNeeded(type, new_type, i); }; - chooseResultColumnType(data_types[i], new_data_types[i], transform_types_if_needed, getDefaultType(i), std::to_string(i + 1), rows_read); + chooseResultColumnType(*this, data_types[field_index], new_data_types[field_index], getDefaultType(field_index), std::to_string(field_index + 1), rows_read); } } NamesAndTypesList result; - for (size_t i = 0; i != data_types.size(); ++i) + for (field_index = 0; field_index != data_types.size(); ++field_index) { - /// Check that we could determine the type of this column. - checkResultColumnTypeAndAppend(result, data_types[i], column_names[i], getDefaultType(i), rows_read); + /// Don't check/change types from hints. + if (!hints.contains(column_names[field_index])) + { + transformFinalTypeIfNeeded(data_types[field_index]); + /// Check that we could determine the type of this column. + checkFinalInferredType(data_types[field_index], column_names[field_index], format_settings, getDefaultType(field_index), rows_read); + } + result.emplace_back(column_names[field_index], data_types[field_index]); } return result; @@ -208,11 +181,6 @@ DataTypePtr IRowSchemaReader::getDefaultType(size_t column) const return nullptr; } -void IRowSchemaReader::transformTypesIfNeeded(DataTypePtr & type, DataTypePtr & new_type, size_t) -{ - transformInferredTypesIfNeeded(type, new_type, format_settings); -} - IRowWithNamesSchemaReader::IRowWithNamesSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings_, DataTypePtr default_type_) : IIRowSchemaReader(in_, format_settings_, default_type_) { @@ -245,7 +213,6 @@ NamesAndTypesList IRowWithNamesSchemaReader::readSchema() names_order.push_back(name); } - auto transform_types_if_needed = [&](DataTypePtr & type, DataTypePtr & new_type){ transformTypesIfNeeded(type, new_type); }; for (rows_read = 1; rows_read < max_rows_to_read; ++rows_read) { auto new_names_and_types = readRowAndGetNamesAndDataTypes(eof); @@ -277,7 +244,7 @@ NamesAndTypesList IRowWithNamesSchemaReader::readSchema() continue; auto & type = it->second; - chooseResultColumnType(type, new_type, transform_types_if_needed, default_type, name, rows_read); + chooseResultColumnType(*this, type, new_type, default_type, name, rows_read); } } @@ -285,20 +252,21 @@ NamesAndTypesList IRowWithNamesSchemaReader::readSchema() if (names_to_types.empty()) throw Exception(ErrorCodes::EMPTY_DATA_PASSED, "Cannot read rows from the data"); - NamesAndTypesList result; + NamesAndTypesList result = getStaticNamesAndTypes(); for (auto & name : names_order) { auto & type = names_to_types[name]; - /// Check that we could determine the type of this column. - checkResultColumnTypeAndAppend(result, type, name, default_type, rows_read); + /// Don't check/change types from hints. + if (!hints.contains(name)) + { + transformFinalTypeIfNeeded(type); + /// Check that we could determine the type of this column. + checkFinalInferredType(type, name, format_settings, default_type, rows_read); + } + result.emplace_back(name, type); } return result; } -void IRowWithNamesSchemaReader::transformTypesIfNeeded(DataTypePtr & type, DataTypePtr & new_type) -{ - transformInferredTypesIfNeeded(type, new_type, format_settings); -} - } diff --git a/src/Processors/Formats/ISchemaReader.h b/src/Processors/Formats/ISchemaReader.h index 503632fd2f8..9f3f4d880ef 100644 --- a/src/Processors/Formats/ISchemaReader.h +++ b/src/Processors/Formats/ISchemaReader.h @@ -9,6 +9,11 @@ namespace DB { +namespace ErrorCodes +{ + extern const int TYPE_MISMATCH; +} + /// Base class for schema inference for the data in some specific format. /// It reads some data from read buffer and try to determine the schema /// from read data. @@ -45,10 +50,14 @@ public: bool needContext() const override { return !hints_str.empty(); } void setContext(ContextPtr & context) override; + virtual void transformTypesIfNeeded(DataTypePtr & type, DataTypePtr & new_type); + protected: void setMaxRowsToRead(size_t max_rows) override { max_rows_to_read = max_rows; } size_t getNumRowsRead() const override { return rows_read; } + virtual void transformFinalTypeIfNeeded(DataTypePtr &) {} + size_t max_rows_to_read; size_t rows_read = 0; DataTypePtr default_type; @@ -82,7 +91,7 @@ protected: void setColumnNames(const std::vector & names) { column_names = names; } - virtual void transformTypesIfNeeded(DataTypePtr & type, DataTypePtr & new_type, size_t index); + size_t field_index; private: DataTypePtr getDefaultType(size_t column) const; @@ -111,7 +120,10 @@ protected: /// Set eof = true if can't read more data. virtual NamesAndTypesList readRowAndGetNamesAndDataTypes(bool & eof) = 0; - virtual void transformTypesIfNeeded(DataTypePtr & type, DataTypePtr & new_type); + /// Get special static types that have the same name/type for each row. + /// For example, in JSONObjectEachRow format we have static column with + /// type String and name from a settings for object keys. + virtual NamesAndTypesList getStaticNamesAndTypes() { return {}; } }; /// Base class for schema inference for formats that don't need any data to @@ -125,16 +137,46 @@ public: virtual ~IExternalSchemaReader() = default; }; +template void chooseResultColumnType( + SchemaReader & schema_reader, DataTypePtr & type, DataTypePtr & new_type, - std::function transform_types_if_needed, const DataTypePtr & default_type, const String & column_name, - size_t row); + size_t row) +{ + if (!type) + { + type = new_type; + return; + } -void checkResultColumnTypeAndAppend( - NamesAndTypesList & result, DataTypePtr & type, const String & name, const DataTypePtr & default_type, size_t rows_read); + if (!new_type || type->equals(*new_type)) + return; + + schema_reader.transformTypesIfNeeded(type, new_type); + if (type->equals(*new_type)) + return; + + /// If the new type and the previous type for this column are different, + /// we will use default type if we have it or throw an exception. + if (default_type) + type = default_type; + else + { + throw Exception( + ErrorCodes::TYPE_MISMATCH, + "Automatically defined type {} for column '{}' in row {} differs from type defined by previous rows: {}. " + "You can specify the type for this column using setting schema_inference_hints", + type->getName(), + column_name, + row, + new_type->getName()); + } +} + +void checkFinalInferredType(DataTypePtr & type, const String & name, const FormatSettings & settings, const DataTypePtr & default_type, size_t rows_read); Strings splitColumnNames(const String & column_names_str); diff --git a/src/Processors/Formats/Impl/ArrowBlockInputFormat.cpp b/src/Processors/Formats/Impl/ArrowBlockInputFormat.cpp index cae4cbab0d7..8b455d3441e 100644 --- a/src/Processors/Formats/Impl/ArrowBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ArrowBlockInputFormat.cpp @@ -3,7 +3,7 @@ #if USE_ARROW #include -#include +#include #include #include #include @@ -71,7 +71,7 @@ Chunk ArrowBlockInputFormat::generate() ++record_batch_current; - arrow_column_to_ch_column->arrowTableToCHChunk(res, *table_result); + arrow_column_to_ch_column->arrowTableToCHChunk(res, *table_result, (*table_result)->num_rows()); /// If defaults_for_omitted_fields is true, calculate the default values from default expression for omitted fields. /// Otherwise fill the missing columns with zero values of its type. diff --git a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp index 8b546f48116..cbc87f921ef 100644 --- a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp +++ b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp @@ -69,7 +69,6 @@ namespace ErrorCodes extern const int DUPLICATE_COLUMN; extern const int THERE_IS_NO_COLUMN; extern const int UNKNOWN_EXCEPTION; - extern const int INCORRECT_NUMBER_OF_COLUMNS; extern const int INCORRECT_DATA; } @@ -810,7 +809,7 @@ ArrowColumnToCHColumn::ArrowColumnToCHColumn( { } -void ArrowColumnToCHColumn::arrowTableToCHChunk(Chunk & res, std::shared_ptr & table) +void ArrowColumnToCHColumn::arrowTableToCHChunk(Chunk & res, std::shared_ptr & table, size_t num_rows) { NameToColumnPtr name_to_column_ptr; for (auto column_name : table->ColumnNames()) @@ -824,16 +823,12 @@ void ArrowColumnToCHColumn::arrowTableToCHChunk(Chunk & res, std::shared_ptrsecond->length(); columns_list.reserve(header.columns()); std::unordered_map>> nested_tables; bool skipped = false; diff --git a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.h b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.h index 3540778940e..dd9f44eb94e 100644 --- a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.h +++ b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.h @@ -28,9 +28,9 @@ public: bool allow_missing_columns_, bool case_insensitive_matching_ = false); - void arrowTableToCHChunk(Chunk & res, std::shared_ptr & table); + void arrowTableToCHChunk(Chunk & res, std::shared_ptr & table, size_t num_rows); - void arrowColumnsToCHChunk(Chunk & res, NameToColumnPtr & name_to_column_ptr); + void arrowColumnsToCHChunk(Chunk & res, NameToColumnPtr & name_to_column_ptr, size_t num_rows); /// Get missing columns that exists in header but not in arrow::Schema std::vector getMissingColumns(const arrow::Schema & schema) const; diff --git a/src/Processors/Formats/Impl/AvroRowInputFormat.cpp b/src/Processors/Formats/Impl/AvroRowInputFormat.cpp index 80183838277..7a6e28d9372 100644 --- a/src/Processors/Formats/Impl/AvroRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/AvroRowInputFormat.cpp @@ -26,6 +26,7 @@ #include #include #include +#include #include #include @@ -46,7 +47,6 @@ #include #include -#include #include #include #include @@ -989,7 +989,7 @@ DataTypePtr AvroSchemaReader::avroNodeToDataType(avro::NodePtr node) case avro::Type::AVRO_LONG: return std::make_shared(); case avro::Type::AVRO_BOOL: - return std::make_shared(); + return DataTypeFactory::instance().get("Bool"); case avro::Type::AVRO_FLOAT: return std::make_shared(); case avro::Type::AVRO_DOUBLE: diff --git a/src/Processors/Formats/Impl/AvroRowOutputFormat.cpp b/src/Processors/Formats/Impl/AvroRowOutputFormat.cpp index e3d570d1876..3ea967054da 100644 --- a/src/Processors/Formats/Impl/AvroRowOutputFormat.cpp +++ b/src/Processors/Formats/Impl/AvroRowOutputFormat.cpp @@ -97,6 +97,12 @@ AvroSerializer::SchemaWithSerializeFn AvroSerializer::createSchemaWithSerializeF switch (data_type->getTypeId()) { case TypeIndex::UInt8: + if (isBool(data_type)) + return {avro::BoolSchema(), [](const IColumn & column, size_t row_num, avro::Encoder & encoder) + { + encoder.encodeBool(assert_cast(column).getElement(row_num)); + }}; + return {avro::IntSchema(), [](const IColumn & column, size_t row_num, avro::Encoder & encoder) { encoder.encodeInt(assert_cast(column).getElement(row_num)); diff --git a/src/Processors/Formats/Impl/BSONEachRowRowInputFormat.cpp b/src/Processors/Formats/Impl/BSONEachRowRowInputFormat.cpp index 878860aeb25..fd0c553538f 100644 --- a/src/Processors/Formats/Impl/BSONEachRowRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/BSONEachRowRowInputFormat.cpp @@ -18,6 +18,7 @@ #include #include +#include #include #include #include @@ -282,7 +283,7 @@ static void readAndInsertString(ReadBuffer & in, IColumn & column, BSONType bson } else if (bson_type == BSONType::OBJECT_ID) { - readAndInsertStringImpl(in, column, 12); + readAndInsertStringImpl(in, column, BSON_OBJECT_ID_SIZE); } else { @@ -664,7 +665,7 @@ static void skipBSONField(ReadBuffer & in, BSONType type) } case BSONType::OBJECT_ID: { - in.ignore(12); + in.ignore(BSON_OBJECT_ID_SIZE); break; } case BSONType::REGEXP: @@ -677,7 +678,7 @@ static void skipBSONField(ReadBuffer & in, BSONType type) { BSONSizeT size; readBinary(size, in); - in.ignore(size + 12); + in.ignore(size + BSON_DB_POINTER_SIZE); break; } case BSONType::JAVA_SCRIPT_CODE_W_SCOPE: @@ -772,37 +773,41 @@ DataTypePtr BSONEachRowSchemaReader::getDataTypeFromBSONField(BSONType type, boo case BSONType::DOUBLE: { in.ignore(sizeof(Float64)); - return makeNullable(std::make_shared()); + return std::make_shared(); } case BSONType::BOOL: { in.ignore(sizeof(UInt8)); - return makeNullable(DataTypeFactory::instance().get("Bool")); + return DataTypeFactory::instance().get("Bool"); } case BSONType::INT64: { in.ignore(sizeof(Int64)); - return makeNullable(std::make_shared()); + return std::make_shared(); } case BSONType::DATETIME: { in.ignore(sizeof(Int64)); - return makeNullable(std::make_shared(6, "UTC")); + return std::make_shared(6, "UTC"); } case BSONType::INT32: { in.ignore(sizeof(Int32)); - return makeNullable(std::make_shared()); + return std::make_shared(); } case BSONType::SYMBOL: [[fallthrough]]; case BSONType::JAVA_SCRIPT_CODE: [[fallthrough]]; - case BSONType::OBJECT_ID: [[fallthrough]]; case BSONType::STRING: { BSONSizeT size; readBinary(size, in); in.ignore(size); - return makeNullable(std::make_shared()); + return std::make_shared(); + } + case BSONType::OBJECT_ID:; + { + in.ignore(BSON_OBJECT_ID_SIZE); + return makeNullable(std::make_shared(BSON_OBJECT_ID_SIZE)); } case BSONType::DOCUMENT: { @@ -856,10 +861,10 @@ DataTypePtr BSONEachRowSchemaReader::getDataTypeFromBSONField(BSONType type, boo { case BSONBinarySubtype::BINARY_OLD: [[fallthrough]]; case BSONBinarySubtype::BINARY: - return makeNullable(std::make_shared()); + return std::make_shared(); case BSONBinarySubtype::UUID_OLD: [[fallthrough]]; case BSONBinarySubtype::UUID: - return makeNullable(std::make_shared()); + return std::make_shared(); default: throw Exception(ErrorCodes::UNKNOWN_TYPE, "BSON binary subtype {} is not supported", getBSONBinarySubtypeName(subtype)); } @@ -954,6 +959,7 @@ void registerInputFormatBSONEachRow(FormatFactory & factory) "BSONEachRow", [](ReadBuffer & buf, const Block & sample, IRowInputFormat::Params params, const FormatSettings & settings) { return std::make_shared(buf, sample, std::move(params), settings); }); + factory.registerFileExtension("bson", "BSONEachRow"); } void registerFileSegmentationEngineBSONEachRow(FormatFactory & factory) diff --git a/src/Processors/Formats/Impl/CSVRowInputFormat.cpp b/src/Processors/Formats/Impl/CSVRowInputFormat.cpp index a50a9423965..1f1bf99739a 100644 --- a/src/Processors/Formats/Impl/CSVRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/CSVRowInputFormat.cpp @@ -274,15 +274,15 @@ void CSVFormatReader::skipPrefixBeforeHeader() } -CSVSchemaReader::CSVSchemaReader(ReadBuffer & in_, bool with_names_, bool with_types_, const FormatSettings & format_setting_) +CSVSchemaReader::CSVSchemaReader(ReadBuffer & in_, bool with_names_, bool with_types_, const FormatSettings & format_settings_) : FormatWithNamesAndTypesSchemaReader( in_, - format_setting_, + format_settings_, with_names_, with_types_, &reader, getDefaultDataTypeForEscapingRule(FormatSettings::EscapingRule::CSV)) - , reader(in_, format_setting_) + , reader(in_, format_settings_) { } @@ -293,7 +293,7 @@ DataTypes CSVSchemaReader::readRowAndGetDataTypes() return {}; auto fields = reader.readRow(); - return determineDataTypesByEscapingRule(fields, reader.getFormatSettings(), FormatSettings::EscapingRule::CSV); + return tryInferDataTypesByEscapingRule(fields, reader.getFormatSettings(), FormatSettings::EscapingRule::CSV); } diff --git a/src/Processors/Formats/Impl/CSVRowInputFormat.h b/src/Processors/Formats/Impl/CSVRowInputFormat.h index 23c659c5c0c..1d79265c22b 100644 --- a/src/Processors/Formats/Impl/CSVRowInputFormat.h +++ b/src/Processors/Formats/Impl/CSVRowInputFormat.h @@ -75,7 +75,7 @@ public: class CSVSchemaReader : public FormatWithNamesAndTypesSchemaReader { public: - CSVSchemaReader(ReadBuffer & in_, bool with_names_, bool with_types_, const FormatSettings & format_setting_); + CSVSchemaReader(ReadBuffer & in_, bool with_names_, bool with_types_, const FormatSettings & format_settings_); private: DataTypes readRowAndGetDataTypes() override; diff --git a/src/Processors/Formats/Impl/CapnProtoRowInputFormat.cpp b/src/Processors/Formats/Impl/CapnProtoRowInputFormat.cpp index 08d2cac743a..58ace9cfca5 100644 --- a/src/Processors/Formats/Impl/CapnProtoRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/CapnProtoRowInputFormat.cpp @@ -99,6 +99,12 @@ static void insertSignedInteger(IColumn & column, const DataTypePtr & column_typ case TypeIndex::DateTime64: assert_cast &>(column).insertValue(value); break; + case TypeIndex::Decimal32: + assert_cast &>(column).insertValue(static_cast(value)); + break; + case TypeIndex::Decimal64: + assert_cast &>(column).insertValue(value); + break; default: throw Exception(ErrorCodes::LOGICAL_ERROR, "Column type is not a signed integer."); } @@ -178,14 +184,14 @@ static void insertEnum(IColumn & column, const DataTypePtr & column_type, const } } -static void insertValue(IColumn & column, const DataTypePtr & column_type, const capnp::DynamicValue::Reader & value, FormatSettings::EnumComparingMode enum_comparing_mode) +static void insertValue(IColumn & column, const DataTypePtr & column_type, const String & column_name, const capnp::DynamicValue::Reader & value, FormatSettings::EnumComparingMode enum_comparing_mode) { if (column_type->lowCardinality()) { auto & lc_column = assert_cast(column); auto tmp_column = lc_column.getDictionary().getNestedColumn()->cloneEmpty(); auto dict_type = assert_cast(column_type.get())->getDictionaryType(); - insertValue(*tmp_column, dict_type, value, enum_comparing_mode); + insertValue(*tmp_column, dict_type, column_name, value, enum_comparing_mode); lc_column.insertFromFullColumn(*tmp_column, 0); return; } @@ -226,7 +232,7 @@ static void insertValue(IColumn & column, const DataTypePtr & column_type, const auto & nested_column = column_array.getData(); auto nested_type = assert_cast(column_type.get())->getNestedType(); for (const auto & nested_value : list_value) - insertValue(nested_column, nested_type, nested_value, enum_comparing_mode); + insertValue(nested_column, nested_type, column_name, nested_value, enum_comparing_mode); break; } case capnp::DynamicValue::Type::STRUCT: @@ -243,11 +249,11 @@ static void insertValue(IColumn & column, const DataTypePtr & column_type, const auto & nested_column = nullable_column.getNestedColumn(); auto nested_type = assert_cast(column_type.get())->getNestedType(); auto nested_value = struct_value.get(field); - insertValue(nested_column, nested_type, nested_value, enum_comparing_mode); + insertValue(nested_column, nested_type, column_name, nested_value, enum_comparing_mode); nullable_column.getNullMapData().push_back(0); } } - else + else if (isTuple(column_type)) { auto & tuple_column = assert_cast(column); const auto * tuple_type = assert_cast(column_type.get()); @@ -255,9 +261,16 @@ static void insertValue(IColumn & column, const DataTypePtr & column_type, const insertValue( tuple_column.getColumn(i), tuple_type->getElements()[i], + tuple_type->getElementNames()[i], struct_value.get(tuple_type->getElementNames()[i]), enum_comparing_mode); } + else + { + /// It can be nested column from Nested type. + auto [field_name, nested_name] = splitCapnProtoFieldName(column_name); + insertValue(column, column_type, nested_name, struct_value.get(nested_name), enum_comparing_mode); + } break; } default: @@ -278,7 +291,7 @@ bool CapnProtoRowInputFormat::readRow(MutableColumns & columns, RowReadExtension for (size_t i = 0; i != columns.size(); ++i) { auto value = getReaderByColumnName(root_reader, column_names[i]); - insertValue(*columns[i], column_types[i], value, format_settings.capn_proto.enum_comparing_mode); + insertValue(*columns[i], column_types[i], column_names[i], value, format_settings.capn_proto.enum_comparing_mode); } } catch (const kj::Exception & e) diff --git a/src/Processors/Formats/Impl/CapnProtoRowOutputFormat.cpp b/src/Processors/Formats/Impl/CapnProtoRowOutputFormat.cpp index 654917b6357..bcf362d1e0b 100644 --- a/src/Processors/Formats/Impl/CapnProtoRowOutputFormat.cpp +++ b/src/Processors/Formats/Impl/CapnProtoRowOutputFormat.cpp @@ -92,6 +92,7 @@ static std::optional convertToDynamicValue( const ColumnPtr & column, const DataTypePtr & data_type, size_t row_num, + const String & column_name, capnp::DynamicValue::Builder builder, FormatSettings::EnumComparingMode enum_comparing_mode, std::vector> & temporary_text_data_storage) @@ -103,15 +104,12 @@ static std::optional convertToDynamicValue( const auto * lc_column = assert_cast(column.get()); const auto & dict_type = assert_cast(data_type.get())->getDictionaryType(); size_t index = lc_column->getIndexAt(row_num); - return convertToDynamicValue(lc_column->getDictionary().getNestedColumn(), dict_type, index, builder, enum_comparing_mode, temporary_text_data_storage); + return convertToDynamicValue(lc_column->getDictionary().getNestedColumn(), dict_type, index, column_name, builder, enum_comparing_mode, temporary_text_data_storage); } switch (builder.getType()) { case capnp::DynamicValue::Type::INT: - /// We allow output DateTime64 as Int64. - if (WhichDataType(data_type).isDateTime64()) - return capnp::DynamicValue::Reader(assert_cast *>(column.get())->getElement(row_num)); return capnp::DynamicValue::Reader(column->getInt(row_num)); case capnp::DynamicValue::Type::UINT: return capnp::DynamicValue::Reader(column->getUInt(row_num)); @@ -150,7 +148,7 @@ static std::optional convertToDynamicValue( { auto struct_builder = builder.as(); auto nested_struct_schema = struct_builder.getSchema(); - /// Struct can be represent Tuple or Naullable (named union with two fields) + /// Struct can represent Tuple, Nullable (named union with two fields) or single column when it contains one nested column. if (data_type->isNullable()) { const auto * nullable_type = assert_cast(data_type.get()); @@ -167,12 +165,12 @@ static std::optional convertToDynamicValue( struct_builder.clear(value_field); const auto & nested_column = nullable_column->getNestedColumnPtr(); auto value_builder = initStructFieldBuilder(nested_column, row_num, struct_builder, value_field); - auto value = convertToDynamicValue(nested_column, nullable_type->getNestedType(), row_num, value_builder, enum_comparing_mode, temporary_text_data_storage); + auto value = convertToDynamicValue(nested_column, nullable_type->getNestedType(), row_num, column_name, value_builder, enum_comparing_mode, temporary_text_data_storage); if (value) struct_builder.set(value_field, *value); } } - else + else if (isTuple(data_type)) { const auto * tuple_data_type = assert_cast(data_type.get()); auto nested_types = tuple_data_type->getElements(); @@ -182,11 +180,21 @@ static std::optional convertToDynamicValue( auto pos = tuple_data_type->getPositionByName(name); auto field_builder = initStructFieldBuilder(nested_columns[pos], row_num, struct_builder, nested_struct_schema.getFieldByName(name)); - auto value = convertToDynamicValue(nested_columns[pos], nested_types[pos], row_num, field_builder, enum_comparing_mode, temporary_text_data_storage); + auto value = convertToDynamicValue(nested_columns[pos], nested_types[pos], row_num, column_name, field_builder, enum_comparing_mode, temporary_text_data_storage); if (value) struct_builder.set(name, *value); } } + else + { + /// It can be nested column from Nested type. + auto [field_name, nested_name] = splitCapnProtoFieldName(column_name); + auto nested_field = nested_struct_schema.getFieldByName(nested_name); + auto field_builder = initStructFieldBuilder(column, row_num, struct_builder, nested_field); + auto value = convertToDynamicValue(column, data_type, row_num, nested_name, field_builder, enum_comparing_mode, temporary_text_data_storage); + if (value) + struct_builder.set(nested_field, *value); + } return std::nullopt; } case capnp::DynamicValue::Type::LIST: @@ -213,7 +221,7 @@ static std::optional convertToDynamicValue( else value_builder = list_builder[i]; - auto value = convertToDynamicValue(nested_column, nested_type, offset + i, value_builder, enum_comparing_mode, temporary_text_data_storage); + auto value = convertToDynamicValue(nested_column, nested_type, offset + i, column_name, value_builder, enum_comparing_mode, temporary_text_data_storage); if (value) list_builder.set(i, *value); } @@ -231,11 +239,19 @@ void CapnProtoRowOutputFormat::write(const Columns & columns, size_t row_num) /// See comment in convertToDynamicValue() for more details. std::vector> temporary_text_data_storage; capnp::DynamicStruct::Builder root = message.initRoot(schema); + + /// Some columns can share same field builder. For example when we have + /// column with Nested type that was flattened into several columns. + std::unordered_map field_builders; for (size_t i = 0; i != columns.size(); ++i) { auto [struct_builder, field] = getStructBuilderAndFieldByColumnName(root, column_names[i]); - auto field_builder = initStructFieldBuilder(columns[i], row_num, struct_builder, field); - auto value = convertToDynamicValue(columns[i], column_types[i], row_num, field_builder, format_settings.capn_proto.enum_comparing_mode, temporary_text_data_storage); + if (!field_builders.contains(field.getIndex())) + { + auto field_builder = initStructFieldBuilder(columns[i], row_num, struct_builder, field); + field_builders[field.getIndex()] = field_builder; + } + auto value = convertToDynamicValue(columns[i], column_types[i], row_num, column_names[i], field_builders[field.getIndex()], format_settings.capn_proto.enum_comparing_mode, temporary_text_data_storage); if (value) struct_builder.set(field, *value); } diff --git a/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp b/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp index bc363e5aa98..994af449947 100644 --- a/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp +++ b/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp @@ -418,12 +418,13 @@ ConstantExpressionTemplate::Cache::getFromCacheOrConstruct(const DataTypePtr & r return res; } -bool ConstantExpressionTemplate::parseExpression(ReadBuffer & istr, const FormatSettings & format_settings, const Settings & settings) +bool ConstantExpressionTemplate::parseExpression( + ReadBuffer & istr, const TokenIterator & token_iterator, const FormatSettings & format_settings, const Settings & settings) { size_t cur_column = 0; try { - if (tryParseExpression(istr, format_settings, cur_column, settings)) + if (tryParseExpression(istr, token_iterator, format_settings, cur_column, settings)) { ++rows_count; return true; @@ -445,7 +446,12 @@ bool ConstantExpressionTemplate::parseExpression(ReadBuffer & istr, const Format return false; } -bool ConstantExpressionTemplate::tryParseExpression(ReadBuffer & istr, const FormatSettings & format_settings, size_t & cur_column, const Settings & settings) +bool ConstantExpressionTemplate::tryParseExpression( + ReadBuffer & istr, + const TokenIterator & token_iterator, + const FormatSettings & format_settings, + size_t & cur_column, + const Settings & settings) { size_t cur_token = 0; size_t num_columns = structure->literals.columns(); @@ -464,7 +470,7 @@ bool ConstantExpressionTemplate::tryParseExpression(ReadBuffer & istr, const For const DataTypePtr & type = structure->literals.getByPosition(cur_column).type; if (format_settings.values.accurate_types_of_literals && !structure->special_parser[cur_column].useDefaultParser()) { - if (!parseLiteralAndAssertType(istr, type.get(), cur_column, settings)) + if (!parseLiteralAndAssertType(istr, token_iterator, type.get(), cur_column, settings)) return false; } else @@ -482,7 +488,8 @@ bool ConstantExpressionTemplate::tryParseExpression(ReadBuffer & istr, const For return true; } -bool ConstantExpressionTemplate::parseLiteralAndAssertType(ReadBuffer & istr, const IDataType * complex_type, size_t column_idx, const Settings & settings) +bool ConstantExpressionTemplate::parseLiteralAndAssertType( + ReadBuffer & istr, const TokenIterator & token_iterator, const IDataType * complex_type, size_t column_idx, const Settings & settings) { using Type = Field::Types::Which; @@ -497,12 +504,12 @@ bool ConstantExpressionTemplate::parseLiteralAndAssertType(ReadBuffer & istr, co if (type_info.is_array || type_info.is_tuple || type_info.is_map) { - /// TODO faster way to check types without using Parsers ParserArrayOfLiterals parser_array; ParserTupleOfLiterals parser_tuple; - Tokens tokens_number(istr.position(), istr.buffer().end()); - IParser::Pos iterator(tokens_number, static_cast(settings.max_parser_depth)); + IParser::Pos iterator(token_iterator, static_cast(settings.max_parser_depth)); + while (iterator->begin < istr.position()) + ++iterator; Expected expected; ASTPtr ast; if (!parser_array.parse(iterator, ast, expected) && !parser_tuple.parse(iterator, ast, expected)) diff --git a/src/Processors/Formats/Impl/ConstantExpressionTemplate.h b/src/Processors/Formats/Impl/ConstantExpressionTemplate.h index c5d4f033258..fbb3cbcd22a 100644 --- a/src/Processors/Formats/Impl/ConstantExpressionTemplate.h +++ b/src/Processors/Formats/Impl/ConstantExpressionTemplate.h @@ -71,7 +71,8 @@ public: /// Read expression from istr, assert it has the same structure and the same types of literals (template matches) /// and parse literals into temporary columns - bool parseExpression(ReadBuffer & istr, const FormatSettings & format_settings, const Settings & settings); + bool parseExpression( + ReadBuffer & istr, const TokenIterator & token_iterator, const FormatSettings & format_settings, const Settings & settings); /// Evaluate batch of expressions were parsed using template. /// If template was deduced with null_as_default == true, set bits in nulls for NULL values in column_idx, starting from offset. @@ -80,8 +81,14 @@ public: size_t rowsCount() const { return rows_count; } private: - bool tryParseExpression(ReadBuffer & istr, const FormatSettings & format_settings, size_t & cur_column, const Settings & settings); - bool parseLiteralAndAssertType(ReadBuffer & istr, const IDataType * type, size_t column_idx, const Settings & settings); + bool tryParseExpression( + ReadBuffer & istr, + const TokenIterator & token_iterator, + const FormatSettings & format_settings, + size_t & cur_column, + const Settings & settings); + bool parseLiteralAndAssertType( + ReadBuffer & istr, const TokenIterator & token_iterator, const IDataType * type, size_t column_idx, const Settings & settings); private: TemplateStructurePtr structure; diff --git a/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.cpp b/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.cpp index 7583bf72457..f8e328ed0fb 100644 --- a/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.cpp @@ -1,6 +1,7 @@ #include #include #include +#include #include #include @@ -370,12 +371,12 @@ DataTypes CustomSeparatedSchemaReader::readRowAndGetDataTypes() first_row = false; auto fields = reader.readRow(); - return determineDataTypesByEscapingRule(fields, reader.getFormatSettings(), reader.getEscapingRule()); + return tryInferDataTypesByEscapingRule(fields, reader.getFormatSettings(), reader.getEscapingRule(), &json_inference_info); } -void CustomSeparatedSchemaReader::transformTypesIfNeeded(DataTypePtr & type, DataTypePtr & new_type, size_t) +void CustomSeparatedSchemaReader::transformTypesIfNeeded(DataTypePtr & type, DataTypePtr & new_type) { - transformInferredTypesIfNeeded(type, new_type, format_settings, reader.getEscapingRule()); + transformInferredTypesByEscapingRuleIfNeeded(type, new_type, format_settings, reader.getEscapingRule(), &json_inference_info); } void registerInputFormatCustomSeparated(FormatFactory & factory) diff --git a/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.h b/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.h index 625278631a5..8a3112eb9c1 100644 --- a/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.h +++ b/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.h @@ -2,6 +2,7 @@ #include #include +#include #include #include @@ -100,11 +101,12 @@ public: private: DataTypes readRowAndGetDataTypes() override; - void transformTypesIfNeeded(DataTypePtr & type, DataTypePtr & new_type, size_t) override; + void transformTypesIfNeeded(DataTypePtr & type, DataTypePtr & new_type) override; PeekableReadBuffer buf; CustomSeparatedFormatReader reader; bool first_row = true; + JSONInferenceInfo json_inference_info; }; } diff --git a/src/Processors/Formats/Impl/JSONColumnsBlockInputFormatBase.cpp b/src/Processors/Formats/Impl/JSONColumnsBlockInputFormatBase.cpp index cfd68079bba..204a5077e31 100644 --- a/src/Processors/Formats/Impl/JSONColumnsBlockInputFormatBase.cpp +++ b/src/Processors/Formats/Impl/JSONColumnsBlockInputFormatBase.cpp @@ -2,8 +2,11 @@ #include #include #include +#include +#include #include #include +#include namespace DB { @@ -170,19 +173,25 @@ JSONColumnsSchemaReaderBase::JSONColumnsSchemaReaderBase( ReadBuffer & in_, const FormatSettings & format_settings_, std::unique_ptr reader_) : ISchemaReader(in_) , format_settings(format_settings_) + , hints_str(format_settings_.schema_inference_hints) , reader(std::move(reader_)) , column_names_from_settings(splitColumnNames(format_settings_.column_names_for_schema_inference)) { } -void JSONColumnsSchemaReaderBase::chooseResulType(DataTypePtr & type, DataTypePtr & new_type, const String & column_name, size_t row) const +void JSONColumnsSchemaReaderBase::setContext(ContextPtr & ctx) { - auto convert_types_if_needed = [&](DataTypePtr & first, DataTypePtr & second) + ColumnsDescription columns; + if (tryParseColumnsListFromString(hints_str, columns, ctx)) { - DataTypes types = {first, second}; - transformInferredJSONTypesIfNeeded(types, format_settings); - }; - chooseResultColumnType(type, new_type, convert_types_if_needed, nullptr, column_name, row); + for (const auto & [name, type] : columns.getAll()) + hints[name] = type; + } +} + +void JSONColumnsSchemaReaderBase::transformTypesIfNeeded(DataTypePtr & type, DataTypePtr & new_type) +{ + transformInferredJSONTypesIfNeeded(type, new_type, format_settings, &inference_info); } NamesAndTypesList JSONColumnsSchemaReaderBase::readSchema() @@ -220,9 +229,18 @@ NamesAndTypesList JSONColumnsSchemaReaderBase::readSchema() if (!names_to_types.contains(column_name)) names_order.push_back(column_name); - rows_in_block = 0; - auto column_type = readColumnAndGetDataType(column_name, rows_in_block, format_settings.max_rows_to_read_for_schema_inference - total_rows_read); - chooseResulType(names_to_types[column_name], column_type, column_name, total_rows_read + 1); + if (const auto it = hints.find(column_name); it != hints.end()) + { + names_to_types[column_name] = it->second; + } + else + { + rows_in_block = 0; + auto column_type = readColumnAndGetDataType( + column_name, rows_in_block, format_settings.max_rows_to_read_for_schema_inference - total_rows_read); + chooseResultColumnType(*this, names_to_types[column_name], column_type, nullptr, column_name, total_rows_read + 1); + } + ++iteration; } while (!reader->checkChunkEndOrSkipColumnDelimiter()); @@ -237,8 +255,14 @@ NamesAndTypesList JSONColumnsSchemaReaderBase::readSchema() for (auto & name : names_order) { auto & type = names_to_types[name]; - /// Check that we could determine the type of this column. - checkResultColumnTypeAndAppend(result, type, name, nullptr, format_settings.max_rows_to_read_for_schema_inference); + /// Don't check/change types from hints. + if (!hints.contains(name)) + { + transformJSONTupleToArrayIfPossible(type, format_settings, &inference_info); + /// Check that we could determine the type of this column. + checkFinalInferredType(type, name, format_settings, nullptr, format_settings.max_rows_to_read_for_schema_inference); + } + result.emplace_back(name, type); } return result; @@ -262,8 +286,8 @@ DataTypePtr JSONColumnsSchemaReaderBase::readColumnAndGetDataType(const String & } readJSONField(field, in); - DataTypePtr field_type = JSONUtils::getDataTypeFromField(field, format_settings); - chooseResulType(column_type, field_type, column_name, rows_read); + DataTypePtr field_type = tryInferDataTypeForSingleJSONField(field, format_settings, &inference_info); + chooseResultColumnType(*this, column_type, field_type, nullptr, column_name, rows_read); ++rows_read; } while (!reader->checkColumnEndOrSkipFieldDelimiter()); diff --git a/src/Processors/Formats/Impl/JSONColumnsBlockInputFormatBase.h b/src/Processors/Formats/Impl/JSONColumnsBlockInputFormatBase.h index a8311123afc..3292b5649c9 100644 --- a/src/Processors/Formats/Impl/JSONColumnsBlockInputFormatBase.h +++ b/src/Processors/Formats/Impl/JSONColumnsBlockInputFormatBase.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include #include @@ -76,18 +77,23 @@ class JSONColumnsSchemaReaderBase : public ISchemaReader public: JSONColumnsSchemaReaderBase(ReadBuffer & in_, const FormatSettings & format_settings_, std::unique_ptr reader_); + void transformTypesIfNeeded(DataTypePtr & type, DataTypePtr & new_type); + + bool needContext() const override { return !hints_str.empty(); } + void setContext(ContextPtr & ctx) override; + private: NamesAndTypesList readSchema() override; /// Read whole column in the block (up to max_rows_to_read rows) and extract the data type. DataTypePtr readColumnAndGetDataType(const String & column_name, size_t & rows_read, size_t max_rows_to_read); - /// Choose result type for column from two inferred types from different rows. - void chooseResulType(DataTypePtr & type, DataTypePtr & new_type, const String & column_name, size_t row) const; - const FormatSettings format_settings; + String hints_str; + std::unordered_map hints; std::unique_ptr reader; Names column_names_from_settings; + JSONInferenceInfo inference_info; }; } diff --git a/src/Processors/Formats/Impl/JSONColumnsWithMetadataBlockOutputFormat.cpp b/src/Processors/Formats/Impl/JSONColumnsWithMetadataBlockOutputFormat.cpp index f8b864ca65f..c5991c0c587 100644 --- a/src/Processors/Formats/Impl/JSONColumnsWithMetadataBlockOutputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONColumnsWithMetadataBlockOutputFormat.cpp @@ -77,10 +77,6 @@ void JSONColumnsWithMetadataBlockOutputFormat::consumeTotals(Chunk chunk) void JSONColumnsWithMetadataBlockOutputFormat::finalizeImpl() { - auto outside_statistics = getOutsideStatistics(); - if (outside_statistics) - statistics = std::move(*outside_statistics); - JSONUtils::writeAdditionalInfo( rows, statistics.rows_before_limit, diff --git a/src/Processors/Formats/Impl/JSONColumnsWithMetadataBlockOutputFormat.h b/src/Processors/Formats/Impl/JSONColumnsWithMetadataBlockOutputFormat.h index 0e481ada804..17fb0467c78 100644 --- a/src/Processors/Formats/Impl/JSONColumnsWithMetadataBlockOutputFormat.h +++ b/src/Processors/Formats/Impl/JSONColumnsWithMetadataBlockOutputFormat.h @@ -60,7 +60,6 @@ protected: void writeExtremesElement(const char * title, const Columns & columns, size_t row_num); DataTypes types; - Statistics statistics; size_t rows; }; diff --git a/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.cpp b/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.cpp index 6d74ca6d616..49564bde429 100644 --- a/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.cpp @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -202,12 +203,17 @@ DataTypes JSONCompactEachRowRowSchemaReader::readRowAndGetDataTypes() if (in.eof()) return {}; - return JSONUtils::readRowAndGetDataTypesForJSONCompactEachRow(in, format_settings, reader.yieldStrings()); + return JSONUtils::readRowAndGetDataTypesForJSONCompactEachRow(in, format_settings, &inference_info); } -void JSONCompactEachRowRowSchemaReader::transformTypesIfNeeded(DataTypePtr & type, DataTypePtr & new_type, size_t) +void JSONCompactEachRowRowSchemaReader::transformTypesIfNeeded(DataTypePtr & type, DataTypePtr & new_type) { - transformInferredJSONTypesIfNeeded(type, new_type, format_settings); + transformInferredJSONTypesIfNeeded(type, new_type, format_settings, &inference_info); +} + +void JSONCompactEachRowRowSchemaReader::transformFinalTypeIfNeeded(DataTypePtr & type) +{ + transformJSONTupleToArrayIfPossible(type, format_settings, &inference_info); } void registerInputFormatJSONCompactEachRow(FormatFactory & factory) diff --git a/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.h b/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.h index 2bcc0abae77..2151967517a 100644 --- a/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.h +++ b/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.h @@ -4,6 +4,7 @@ #include #include #include +#include #include namespace DB @@ -80,10 +81,12 @@ public: private: DataTypes readRowAndGetDataTypes() override; - void transformTypesIfNeeded(DataTypePtr & type, DataTypePtr & new_type, size_t) override; + void transformTypesIfNeeded(DataTypePtr & type, DataTypePtr & new_type) override; + void transformFinalTypeIfNeeded(DataTypePtr & type) override; JSONCompactEachRowFormatReader reader; bool first_row = true; + JSONInferenceInfo inference_info; }; } diff --git a/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp b/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp index 8a5ef33b73d..c9502659267 100644 --- a/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include @@ -300,9 +301,8 @@ void JSONEachRowRowInputFormat::readSuffix() assertEOF(*in); } -JSONEachRowSchemaReader::JSONEachRowSchemaReader(ReadBuffer & in_, bool json_strings_, const FormatSettings & format_settings_) +JSONEachRowSchemaReader::JSONEachRowSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings_) : IRowWithNamesSchemaReader(in_, format_settings_) - , json_strings(json_strings_) { } @@ -336,12 +336,17 @@ NamesAndTypesList JSONEachRowSchemaReader::readRowAndGetNamesAndDataTypes(bool & return {}; } - return JSONUtils::readRowAndGetNamesAndDataTypesForJSONEachRow(in, format_settings, json_strings); + return JSONUtils::readRowAndGetNamesAndDataTypesForJSONEachRow(in, format_settings, &inference_info); } void JSONEachRowSchemaReader::transformTypesIfNeeded(DataTypePtr & type, DataTypePtr & new_type) { - transformInferredJSONTypesIfNeeded(type, new_type, format_settings); + transformInferredJSONTypesIfNeeded(type, new_type, format_settings, &inference_info); +} + +void JSONEachRowSchemaReader::transformFinalTypeIfNeeded(DataTypePtr & type) +{ + transformJSONTupleToArrayIfPossible(type, format_settings, &inference_info); } void registerInputFormatJSONEachRow(FormatFactory & factory) @@ -391,11 +396,11 @@ void registerNonTrivialPrefixAndSuffixCheckerJSONEachRow(FormatFactory & factory void registerJSONEachRowSchemaReader(FormatFactory & factory) { - auto register_schema_reader = [&](const String & format_name, bool json_strings) + auto register_schema_reader = [&](const String & format_name) { - factory.registerSchemaReader(format_name, [json_strings](ReadBuffer & buf, const FormatSettings & settings) + factory.registerSchemaReader(format_name, [](ReadBuffer & buf, const FormatSettings & settings) { - return std::make_unique(buf, json_strings, settings); + return std::make_unique(buf, settings); }); factory.registerAdditionalInfoForSchemaCacheGetter(format_name, [](const FormatSettings & settings) { @@ -403,10 +408,10 @@ void registerJSONEachRowSchemaReader(FormatFactory & factory) }); }; - register_schema_reader("JSONEachRow", false); - register_schema_reader("JSONLines", false); - register_schema_reader("NDJSON", false); - register_schema_reader("JSONStringsEachRow", true); + register_schema_reader("JSONEachRow"); + register_schema_reader("JSONLines"); + register_schema_reader("NDJSON"); + register_schema_reader("JSONStringsEachRow"); } } diff --git a/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.h b/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.h index 4e2946cfea6..beee9e95821 100644 --- a/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.h +++ b/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.h @@ -4,6 +4,7 @@ #include #include #include +#include #include @@ -94,15 +95,16 @@ protected: class JSONEachRowSchemaReader : public IRowWithNamesSchemaReader { public: - JSONEachRowSchemaReader(ReadBuffer & in_, bool json_strings, const FormatSettings & format_settings_); + JSONEachRowSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings_); private: NamesAndTypesList readRowAndGetNamesAndDataTypes(bool & eof) override; void transformTypesIfNeeded(DataTypePtr & type, DataTypePtr & new_type) override; + void transformFinalTypeIfNeeded(DataTypePtr & type) override; - bool json_strings; bool first_row = true; bool data_in_square_brackets = false; + JSONInferenceInfo inference_info; }; } diff --git a/src/Processors/Formats/Impl/JSONObjectEachRowRowInputFormat.cpp b/src/Processors/Formats/Impl/JSONObjectEachRowRowInputFormat.cpp index 6e6d6287840..f01f07024da 100644 --- a/src/Processors/Formats/Impl/JSONObjectEachRowRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONObjectEachRowRowInputFormat.cpp @@ -2,6 +2,7 @@ #include #include #include +#include #include namespace DB @@ -85,15 +86,25 @@ NamesAndTypesList JSONObjectEachRowSchemaReader::readRowAndGetNamesAndDataTypes( JSONUtils::skipComma(in); JSONUtils::readFieldName(in); - auto names_and_types = JSONUtils::readRowAndGetNamesAndDataTypesForJSONEachRow(in, format_settings, false); + return JSONUtils::readRowAndGetNamesAndDataTypesForJSONEachRow(in, format_settings, &inference_info); +} + +NamesAndTypesList JSONObjectEachRowSchemaReader::getStaticNamesAndTypes() +{ if (!format_settings.json_object_each_row.column_for_object_name.empty()) - names_and_types.emplace_front(format_settings.json_object_each_row.column_for_object_name, std::make_shared()); - return names_and_types; + return {{format_settings.json_object_each_row.column_for_object_name, std::make_shared()}}; + + return {}; } void JSONObjectEachRowSchemaReader::transformTypesIfNeeded(DataTypePtr & type, DataTypePtr & new_type) { - transformInferredJSONTypesIfNeeded(type, new_type, format_settings); + transformInferredJSONTypesIfNeeded(type, new_type, format_settings, &inference_info); +} + +void JSONObjectEachRowSchemaReader::transformFinalTypeIfNeeded(DataTypePtr & type) +{ + transformJSONTupleToArrayIfPossible(type, format_settings, &inference_info); } void registerInputFormatJSONObjectEachRow(FormatFactory & factory) diff --git a/src/Processors/Formats/Impl/JSONObjectEachRowRowInputFormat.h b/src/Processors/Formats/Impl/JSONObjectEachRowRowInputFormat.h index 466c0111a03..a15bc558c65 100644 --- a/src/Processors/Formats/Impl/JSONObjectEachRowRowInputFormat.h +++ b/src/Processors/Formats/Impl/JSONObjectEachRowRowInputFormat.h @@ -4,6 +4,7 @@ #include #include #include +#include #include @@ -41,9 +42,12 @@ public: private: NamesAndTypesList readRowAndGetNamesAndDataTypes(bool & eof) override; + NamesAndTypesList getStaticNamesAndTypes() override; void transformTypesIfNeeded(DataTypePtr & type, DataTypePtr & new_type) override; + void transformFinalTypeIfNeeded(DataTypePtr & type) override; bool first_row = true; + JSONInferenceInfo inference_info; }; std::optional getColumnIndexForJSONObjectEachRowObjectName(const Block & header, const FormatSettings & settings); diff --git a/src/Processors/Formats/Impl/JSONRowOutputFormat.cpp b/src/Processors/Formats/Impl/JSONRowOutputFormat.cpp index 98120abe8d8..b759c8f4e1d 100644 --- a/src/Processors/Formats/Impl/JSONRowOutputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONRowOutputFormat.cpp @@ -112,10 +112,6 @@ void JSONRowOutputFormat::writeAfterExtremes() void JSONRowOutputFormat::finalizeImpl() { - auto outside_statistics = getOutsideStatistics(); - if (outside_statistics) - statistics = std::move(*outside_statistics); - JSONUtils::writeAdditionalInfo( row_count, statistics.rows_before_limit, diff --git a/src/Processors/Formats/Impl/JSONRowOutputFormat.h b/src/Processors/Formats/Impl/JSONRowOutputFormat.h index 9147aaa5387..72a4b028949 100644 --- a/src/Processors/Formats/Impl/JSONRowOutputFormat.h +++ b/src/Processors/Formats/Impl/JSONRowOutputFormat.h @@ -66,7 +66,6 @@ protected: size_t row_count = 0; Names names; /// The column names are pre-escaped to be put into JSON string literal. - Statistics statistics; FormatSettings settings; bool yield_strings; diff --git a/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp b/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp index 4e62754bc3d..3a76a5a3fc6 100644 --- a/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp @@ -247,6 +247,14 @@ static void insertNull(IColumn & column, DataTypePtr type) static void insertUUID(IColumn & column, DataTypePtr type, const char * value, size_t size) { + auto insert_func = [&](IColumn & column_, DataTypePtr type_) + { + insertUUID(column_, type_, value, size); + }; + + if (checkAndInsertNullable(column, type, insert_func) || checkAndInsertLowCardinality(column, type, insert_func)) + return; + if (!isUUID(type)) throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot insert MessagePack UUID into column with type {}.", type->getName()); ReadBufferFromMemory buf(value, size); @@ -470,16 +478,16 @@ DataTypePtr MsgPackSchemaReader::getDataType(const msgpack::object & object) { case msgpack::type::object_type::POSITIVE_INTEGER: [[fallthrough]]; case msgpack::type::object_type::NEGATIVE_INTEGER: - return makeNullable(std::make_shared()); + return std::make_shared(); case msgpack::type::object_type::FLOAT32: - return makeNullable(std::make_shared()); + return std::make_shared(); case msgpack::type::object_type::FLOAT64: - return makeNullable(std::make_shared()); + return std::make_shared(); case msgpack::type::object_type::BOOLEAN: - return makeNullable(std::make_shared()); + return std::make_shared(); case msgpack::type::object_type::BIN: [[fallthrough]]; case msgpack::type::object_type::STR: - return makeNullable(std::make_shared()); + return std::make_shared(); case msgpack::type::object_type::ARRAY: { msgpack::object_array object_array = object.via.array; diff --git a/src/Processors/Formats/Impl/MySQLDumpRowInputFormat.cpp b/src/Processors/Formats/Impl/MySQLDumpRowInputFormat.cpp index faa74e234b9..c3f7b4e0ad7 100644 --- a/src/Processors/Formats/Impl/MySQLDumpRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/MySQLDumpRowInputFormat.cpp @@ -435,7 +435,7 @@ DataTypes MySQLDumpSchemaReader::readRowAndGetDataTypes() skipFieldDelimiter(in); readQuotedField(value, in); - auto type = determineDataTypeByEscapingRule(value, format_settings, FormatSettings::EscapingRule::Quoted); + auto type = tryInferDataTypeByEscapingRule(value, format_settings, FormatSettings::EscapingRule::Quoted); data_types.push_back(std::move(type)); } skipEndOfRow(in, table_name); diff --git a/src/Processors/Formats/Impl/MySQLOutputFormat.cpp b/src/Processors/Formats/Impl/MySQLOutputFormat.cpp index 75a03cb6d0e..d486f99e8bc 100644 --- a/src/Processors/Formats/Impl/MySQLOutputFormat.cpp +++ b/src/Processors/Formats/Impl/MySQLOutputFormat.cpp @@ -79,11 +79,14 @@ void MySQLOutputFormat::finalizeImpl() CurrentThread::finalizePerformanceCounters(); QueryStatusInfo info = process_list_elem->getInfo(); affected_rows = info.written_rows; + double elapsed_seconds = static_cast(info.elapsed_microseconds) / 1000000.0; human_readable_info = fmt::format( "Read {} rows, {} in {} sec., {} rows/sec., {}/sec.", - info.read_rows, ReadableSize(info.read_bytes), info.elapsed_seconds, - static_cast(info.read_rows / info.elapsed_seconds), - ReadableSize(info.read_bytes / info.elapsed_seconds)); + info.read_rows, + ReadableSize(info.read_bytes), + elapsed_seconds, + static_cast(info.read_rows / elapsed_seconds), + ReadableSize(info.read_bytes / elapsed_seconds)); } const auto & header = getPort(PortKind::Main).getHeader(); diff --git a/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp b/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp index 58fd03a7a78..be48406ca26 100644 --- a/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp @@ -3,7 +3,7 @@ #if USE_ORC #include -#include +#include #include #include #include @@ -54,14 +54,19 @@ Chunk ORCBlockInputFormat::generate() throw ParsingException( ErrorCodes::CANNOT_READ_ALL_DATA, "Error while reading batch of ORC data: {}", table_result.status().ToString()); + /// We should extract the number of rows directly from the stripe, because in case when + /// record batch contains 0 columns (for example if we requested only columns that + /// are not presented in data) the number of rows in record batch will be 0. + size_t num_rows = file_reader->GetRawORCReader()->getStripe(stripe_current)->getNumberOfRows(); + auto table = table_result.ValueOrDie(); - if (!table || !table->num_rows()) + if (!table || !num_rows) return {}; ++stripe_current; Chunk res; - arrow_column_to_ch_column->arrowTableToCHChunk(res, table); + arrow_column_to_ch_column->arrowTableToCHChunk(res, table, num_rows); /// If defaults_for_omitted_fields is true, calculate the default values from default expression for omitted fields. /// Otherwise fill the missing columns with zero values of its type. if (format_settings.defaults_for_omitted_fields) @@ -101,7 +106,7 @@ static size_t countIndicesForType(std::shared_ptr type) if (type->id() == arrow::Type::MAP) { auto * map_type = static_cast(type.get()); - return countIndicesForType(map_type->key_type()) + countIndicesForType(map_type->item_type()); + return countIndicesForType(map_type->key_type()) + countIndicesForType(map_type->item_type()) + 1; } return 1; diff --git a/src/Processors/Formats/Impl/ParallelFormattingOutputFormat.cpp b/src/Processors/Formats/Impl/ParallelFormattingOutputFormat.cpp index 40ab6554115..db4bb422cb1 100644 --- a/src/Processors/Formats/Impl/ParallelFormattingOutputFormat.cpp +++ b/src/Processors/Formats/Impl/ParallelFormattingOutputFormat.cpp @@ -216,7 +216,7 @@ namespace DB } case ProcessingUnitType::FINALIZE: { - formatter->setOutsideStatistics(std::move(unit.statistics)); + formatter->statistics = std::move(unit.statistics); formatter->finalizeImpl(); break; } diff --git a/src/Processors/Formats/Impl/ParallelFormattingOutputFormat.h b/src/Processors/Formats/Impl/ParallelFormattingOutputFormat.h index fb58f5765c1..dedc4e80092 100644 --- a/src/Processors/Formats/Impl/ParallelFormattingOutputFormat.h +++ b/src/Processors/Formats/Impl/ParallelFormattingOutputFormat.h @@ -227,7 +227,6 @@ private: size_t rows_consumed = 0; std::atomic_bool are_totals_written = false; - Statistics statistics; /// We change statistics in onProgress() which can be called from different threads. std::mutex statistics_mutex; bool save_totals_and_extremes_in_statistics; diff --git a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp index c2253fe4b20..126adb9104e 100644 --- a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp @@ -4,7 +4,7 @@ #if USE_PARQUET #include -#include +#include #include #include #include @@ -70,7 +70,7 @@ Chunk ParquetBlockInputFormat::generate() ++row_group_current; - arrow_column_to_ch_column->arrowTableToCHChunk(res, table); + arrow_column_to_ch_column->arrowTableToCHChunk(res, table, table->num_rows()); /// If defaults_for_omitted_fields is true, calculate the default values from default expression for omitted fields. /// Otherwise fill the missing columns with zero values of its type. diff --git a/src/Processors/Formats/Impl/RegexpRowInputFormat.cpp b/src/Processors/Formats/Impl/RegexpRowInputFormat.cpp index 2ad2ad6f7a3..5c0192c1e4a 100644 --- a/src/Processors/Formats/Impl/RegexpRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/RegexpRowInputFormat.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include @@ -155,15 +156,15 @@ DataTypes RegexpSchemaReader::readRowAndGetDataTypes() for (size_t i = 0; i != field_extractor.getMatchedFieldsSize(); ++i) { String field(field_extractor.getField(i)); - data_types.push_back(determineDataTypeByEscapingRule(field, format_settings, format_settings.regexp.escaping_rule)); + data_types.push_back(tryInferDataTypeByEscapingRule(field, format_settings, format_settings.regexp.escaping_rule, &json_inference_info)); } return data_types; } -void RegexpSchemaReader::transformTypesIfNeeded(DataTypePtr & type, DataTypePtr & new_type, size_t) +void RegexpSchemaReader::transformTypesIfNeeded(DataTypePtr & type, DataTypePtr & new_type) { - transformInferredTypesIfNeeded(type, new_type, format_settings, format_settings.regexp.escaping_rule); + transformInferredTypesByEscapingRuleIfNeeded(type, new_type, format_settings, format_settings.regexp.escaping_rule, &json_inference_info); } diff --git a/src/Processors/Formats/Impl/RegexpRowInputFormat.h b/src/Processors/Formats/Impl/RegexpRowInputFormat.h index 7fbb3fc320f..d6696ffe751 100644 --- a/src/Processors/Formats/Impl/RegexpRowInputFormat.h +++ b/src/Processors/Formats/Impl/RegexpRowInputFormat.h @@ -5,12 +5,13 @@ #include #include #include +#include #include #include #include #include -#include #include +#include namespace DB @@ -81,12 +82,13 @@ public: private: DataTypes readRowAndGetDataTypes() override; - void transformTypesIfNeeded(DataTypePtr & type, DataTypePtr & new_type, size_t) override; + void transformTypesIfNeeded(DataTypePtr & type, DataTypePtr & new_type) override; using EscapingRule = FormatSettings::EscapingRule; RegexpFieldExtractor field_extractor; PeekableReadBuffer buf; + JSONInferenceInfo json_inference_info; }; } diff --git a/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp b/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp index 838aba72e3d..f5f05453f25 100644 --- a/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp @@ -249,7 +249,7 @@ NamesAndTypesList TSKVSchemaReader::readRowAndGetNamesAndDataTypes(bool & eof) if (has_value) { readEscapedString(value, in); - names_and_types.emplace_back(std::move(name), determineDataTypeByEscapingRule(value, format_settings, FormatSettings::EscapingRule::Escaped)); + names_and_types.emplace_back(std::move(name), tryInferDataTypeByEscapingRule(value, format_settings, FormatSettings::EscapingRule::Escaped)); } else { diff --git a/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp b/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp index 5066d40cbae..174a41a8a59 100644 --- a/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp @@ -268,7 +268,7 @@ DataTypes TabSeparatedSchemaReader::readRowAndGetDataTypes() return {}; auto fields = reader.readRow(); - return determineDataTypesByEscapingRule(fields, reader.getFormatSettings(), reader.getEscapingRule()); + return tryInferDataTypesByEscapingRule(fields, reader.getFormatSettings(), reader.getEscapingRule()); } void registerInputFormatTabSeparated(FormatFactory & factory) diff --git a/src/Processors/Formats/Impl/TemplateBlockOutputFormat.cpp b/src/Processors/Formats/Impl/TemplateBlockOutputFormat.cpp index 0e7bdb259ac..d424370f3e8 100644 --- a/src/Processors/Formats/Impl/TemplateBlockOutputFormat.cpp +++ b/src/Processors/Formats/Impl/TemplateBlockOutputFormat.cpp @@ -137,10 +137,6 @@ void TemplateBlockOutputFormat::finalizeImpl() return; size_t parts = format.format_idx_to_column_idx.size(); - auto outside_statistics = getOutsideStatistics(); - if (outside_statistics) - statistics = std::move(*outside_statistics); - for (size_t i = 0; i < parts; ++i) { auto type = std::make_shared(); diff --git a/src/Processors/Formats/Impl/TemplateBlockOutputFormat.h b/src/Processors/Formats/Impl/TemplateBlockOutputFormat.h index 419fcac37c1..0e5529ac9b8 100644 --- a/src/Processors/Formats/Impl/TemplateBlockOutputFormat.h +++ b/src/Processors/Formats/Impl/TemplateBlockOutputFormat.h @@ -57,8 +57,6 @@ private: ParsedTemplateFormatString format; ParsedTemplateFormatString row_format; - Statistics statistics; - size_t row_count = 0; std::string row_between_delimiter; diff --git a/src/Processors/Formats/Impl/TemplateRowInputFormat.cpp b/src/Processors/Formats/Impl/TemplateRowInputFormat.cpp index 1532b16525f..ba6650c2887 100644 --- a/src/Processors/Formats/Impl/TemplateRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/TemplateRowInputFormat.cpp @@ -2,6 +2,7 @@ #include #include #include +#include #include #include #include @@ -511,16 +512,16 @@ DataTypes TemplateSchemaReader::readRowAndGetDataTypes() format_reader.skipDelimiter(i); updateFormatSettingsIfNeeded(row_format.escaping_rules[i], format_settings, row_format, default_csv_delimiter, i); field = readFieldByEscapingRule(buf, row_format.escaping_rules[i], format_settings); - data_types.push_back(determineDataTypeByEscapingRule(field, format_settings, row_format.escaping_rules[i])); + data_types.push_back(tryInferDataTypeByEscapingRule(field, format_settings, row_format.escaping_rules[i], &json_inference_info)); } format_reader.skipRowEndDelimiter(); return data_types; } -void TemplateSchemaReader::transformTypesIfNeeded(DataTypePtr & type, DataTypePtr & new_type, size_t column_idx) +void TemplateSchemaReader::transformTypesIfNeeded(DataTypePtr & type, DataTypePtr & new_type) { - transformInferredTypesIfNeeded(type, new_type, format_settings, row_format.escaping_rules[column_idx]); + transformInferredTypesByEscapingRuleIfNeeded(type, new_type, format_settings, row_format.escaping_rules[field_index], &json_inference_info); } static ParsedTemplateFormatString fillResultSetFormat(const FormatSettings & settings) diff --git a/src/Processors/Formats/Impl/TemplateRowInputFormat.h b/src/Processors/Formats/Impl/TemplateRowInputFormat.h index cf12eb8d136..8f9088e2c47 100644 --- a/src/Processors/Formats/Impl/TemplateRowInputFormat.h +++ b/src/Processors/Formats/Impl/TemplateRowInputFormat.h @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -121,13 +122,14 @@ public: DataTypes readRowAndGetDataTypes() override; private: - void transformTypesIfNeeded(DataTypePtr & type, DataTypePtr & new_type, size_t column_idx) override; + void transformTypesIfNeeded(DataTypePtr & type, DataTypePtr & new_type) override; PeekableReadBuffer buf; const ParsedTemplateFormatString format; const ParsedTemplateFormatString row_format; TemplateFormatReader format_reader; bool first_row = true; + JSONInferenceInfo json_inference_info; const char default_csv_delimiter; }; diff --git a/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp b/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp index 108b4203e3e..99f0c292966 100644 --- a/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp @@ -108,15 +108,97 @@ Chunk ValuesBlockInputFormat::generate() return Chunk{std::move(columns), rows_in_block}; } +/// Can be used in fileSegmentationEngine for parallel parsing of Values +static bool skipToNextRow(PeekableReadBuffer * buf, size_t min_chunk_bytes, int balance) +{ + skipWhitespaceIfAny(*buf); + if (buf->eof() || *buf->position() == ';') + return false; + bool quoted = false; + + size_t chunk_begin_buf_count = buf->count(); + while (!buf->eof() && (balance || buf->count() - chunk_begin_buf_count < min_chunk_bytes)) + { + buf->position() = find_first_symbols<'\\', '\'', ')', '('>(buf->position(), buf->buffer().end()); + if (buf->position() == buf->buffer().end()) + continue; + if (*buf->position() == '\\') + { + ++buf->position(); + if (!buf->eof()) + ++buf->position(); + } + else if (*buf->position() == '\'') + { + quoted ^= true; + ++buf->position(); + } + else if (*buf->position() == ')') + { + ++buf->position(); + if (!quoted) + --balance; + } + else if (*buf->position() == '(') + { + ++buf->position(); + if (!quoted) + ++balance; + } + } + + if (!buf->eof() && *buf->position() == ',') + ++buf->position(); + return true; +} + +/// We need continuous memory containing the expression to use Lexer +/// Note that this is both reading and tokenizing until the end of the row +/// This is doing unnecessary work if the rest of the columns can be read with tryReadValue (which doesn't require tokens) +/// and it's more efficient if they don't (as everything is already tokenized) +void ValuesBlockInputFormat::readUntilTheEndOfRowAndReTokenize(size_t current_column_idx) +{ + if (tokens && token_iterator && + /// Make sure the underlying memory hasn't changed because of next() calls in the buffer + ((*token_iterator)->begin >= buf->buffer().begin() && (*token_iterator)->begin <= buf->buffer().end())) + { + while ((*token_iterator)->begin < buf->position() && !(*token_iterator)->isError() && !(*token_iterator)->isEnd()) + ++(*token_iterator); + + if (!(*token_iterator)->isError() && !(*token_iterator)->isEnd()) + return; + } + + skipToNextRow(buf.get(), 0, 1); + buf->makeContinuousMemoryFromCheckpointToPos(); + auto * row_end = buf->position(); + buf->rollbackToCheckpoint(); + tokens.emplace(buf->position(), row_end); + token_iterator.emplace(*tokens, static_cast(context->getSettingsRef().max_parser_depth)); + auto const & first = (*token_iterator).get(); + if (first.isError() || first.isEnd()) + { + const Block & header = getPort().getHeader(); + const IDataType & type = *header.getByPosition(current_column_idx).type; + throw Exception( + ErrorCodes::SYNTAX_ERROR, + "Cannot parse expression of type {} here: {}", + type.getName(), + std::string_view(buf->position(), std::min(SHOW_CHARS_ON_SYNTAX_ERROR, buf->buffer().end() - buf->position()))); + } +} + void ValuesBlockInputFormat::readRow(MutableColumns & columns, size_t row_num) { + tokens.reset(); + token_iterator.reset(); assertChar('(', *buf); for (size_t column_idx = 0; column_idx < num_columns; ++column_idx) { skipWhitespaceIfAny(*buf); PeekableReadBufferCheckpoint checkpoint{*buf}; - bool read; + bool read = false; /// Parse value using fast streaming parser for literals and slow SQL parser for expressions. /// If there is SQL expression in some row, template of this expression will be deduced, @@ -126,7 +208,7 @@ void ValuesBlockInputFormat::readRow(MutableColumns & columns, size_t row_num) read = tryReadValue(*columns[column_idx], column_idx); else if (parser_type_for_column[column_idx] == ParserType::BatchTemplate) read = tryParseExpressionUsingTemplate(columns[column_idx], column_idx); - else /// if (parser_type_for_column[column_idx] == ParserType::SingleExpressionEvaluation) + else read = parseExpression(*columns[column_idx], column_idx); if (!read) @@ -143,9 +225,12 @@ void ValuesBlockInputFormat::readRow(MutableColumns & columns, size_t row_num) bool ValuesBlockInputFormat::tryParseExpressionUsingTemplate(MutableColumnPtr & column, size_t column_idx) { + readUntilTheEndOfRowAndReTokenize(column_idx); + IParser::Pos start = *token_iterator; + /// Try to parse expression using template if one was successfully deduced while parsing the first row - auto settings = context->getSettingsRef(); - if (templates[column_idx]->parseExpression(*buf, format_settings, settings)) + const auto & settings = context->getSettingsRef(); + if (templates[column_idx]->parseExpression(*buf, *token_iterator, format_settings, settings)) { ++rows_parsed_using_template[column_idx]; return true; @@ -166,6 +251,7 @@ bool ValuesBlockInputFormat::tryParseExpressionUsingTemplate(MutableColumnPtr & /// Do not use this template anymore templates[column_idx].reset(); buf->rollbackToCheckpoint(); + *token_iterator = start; /// It will deduce new template or fallback to slow SQL parser return parseExpression(*column, column_idx); @@ -295,79 +381,41 @@ namespace } } -/// Can be used in fileSegmentationEngine for parallel parsing of Values -static bool skipToNextRow(PeekableReadBuffer * buf, size_t min_chunk_bytes, int balance) -{ - skipWhitespaceIfAny(*buf); - if (buf->eof() || *buf->position() == ';') - return false; - bool quoted = false; - - size_t chunk_begin_buf_count = buf->count(); - while (!buf->eof() && (balance || buf->count() - chunk_begin_buf_count < min_chunk_bytes)) - { - buf->position() = find_first_symbols<'\\', '\'', ')', '('>(buf->position(), buf->buffer().end()); - if (buf->position() == buf->buffer().end()) - continue; - if (*buf->position() == '\\') - { - ++buf->position(); - if (!buf->eof()) - ++buf->position(); - } - else if (*buf->position() == '\'') - { - quoted ^= true; - ++buf->position(); - } - else if (*buf->position() == ')') - { - ++buf->position(); - if (!quoted) - --balance; - } - else if (*buf->position() == '(') - { - ++buf->position(); - if (!quoted) - ++balance; - } - } - - if (!buf->eof() && *buf->position() == ',') - ++buf->position(); - return true; -} - bool ValuesBlockInputFormat::parseExpression(IColumn & column, size_t column_idx) { const Block & header = getPort().getHeader(); const IDataType & type = *header.getByPosition(column_idx).type; auto settings = context->getSettingsRef(); - /// We need continuous memory containing the expression to use Lexer - skipToNextRow(buf.get(), 0, 1); - buf->makeContinuousMemoryFromCheckpointToPos(); - buf->rollbackToCheckpoint(); + /// Advance the token iterator until the start of the column expression + readUntilTheEndOfRowAndReTokenize(column_idx); - Expected expected; - Tokens tokens(buf->position(), buf->buffer().end()); - IParser::Pos token_iterator(tokens, static_cast(settings.max_parser_depth)); + bool parsed = false; ASTPtr ast; + std::optional ti_start; - bool parsed = parser.parse(token_iterator, ast, expected); + if (!(*token_iterator)->isError() && !(*token_iterator)->isEnd()) + { + Expected expected; + /// Keep a copy to the start of the column tokens to use if later if necessary + ti_start = IParser::Pos(*token_iterator, static_cast(settings.max_parser_depth)); - /// Consider delimiter after value (',' or ')') as part of expression - if (column_idx + 1 != num_columns) - parsed &= token_iterator->type == TokenType::Comma; - else - parsed &= token_iterator->type == TokenType::ClosingRoundBracket; + parsed = parser.parse(*token_iterator, ast, expected); + + /// Consider delimiter after value (',' or ')') as part of expression + if (column_idx + 1 != num_columns) + parsed &= (*token_iterator)->type == TokenType::Comma; + else + parsed &= (*token_iterator)->type == TokenType::ClosingRoundBracket; + } if (!parsed) - throw Exception("Cannot parse expression of type " + type.getName() + " here: " - + String(buf->position(), std::min(SHOW_CHARS_ON_SYNTAX_ERROR, buf->buffer().end() - buf->position())), - ErrorCodes::SYNTAX_ERROR); - ++token_iterator; + throw Exception( + ErrorCodes::SYNTAX_ERROR, + "Cannot parse expression of type {} here: {}", + type.getName(), + std::string_view(buf->position(), std::min(SHOW_CHARS_ON_SYNTAX_ERROR, buf->buffer().end() - buf->position()))); + ++(*token_iterator); if (parser_type_for_column[column_idx] != ParserType::Streaming && dynamic_cast(ast.get())) { @@ -417,8 +465,8 @@ bool ValuesBlockInputFormat::parseExpression(IColumn & column, size_t column_idx auto structure = templates_cache.getFromCacheOrConstruct( result_type, !result_type->isNullable() && format_settings.null_as_default, - TokenIterator(tokens), - token_iterator, + *ti_start, + *token_iterator, ast, context, &found_in_cache, @@ -430,7 +478,7 @@ bool ValuesBlockInputFormat::parseExpression(IColumn & column, size_t column_idx ++attempts_to_deduce_template[column_idx]; buf->rollbackToCheckpoint(); - if (templates[column_idx]->parseExpression(*buf, format_settings, settings)) + if (templates[column_idx]->parseExpression(*buf, *ti_start, format_settings, settings)) { ++rows_parsed_using_template[column_idx]; parser_type_for_column[column_idx] = ParserType::BatchTemplate; @@ -448,7 +496,7 @@ bool ValuesBlockInputFormat::parseExpression(IColumn & column, size_t column_idx else { buf->rollbackToCheckpoint(); - size_t len = const_cast(token_iterator->begin) - buf->position(); + size_t len = const_cast((*token_iterator)->begin) - buf->position(); throw Exception("Cannot deduce template of expression: " + std::string(buf->position(), len), ErrorCodes::SYNTAX_ERROR); } } @@ -460,7 +508,7 @@ bool ValuesBlockInputFormat::parseExpression(IColumn & column, size_t column_idx throw Exception("Interpreting expressions is disabled", ErrorCodes::SUPPORT_IS_DISABLED); /// Try to evaluate single expression if other parsers don't work - buf->position() = const_cast(token_iterator->begin); + buf->position() = const_cast((*token_iterator)->begin); std::pair value_raw = evaluateConstantExpression(ast, context); @@ -593,14 +641,14 @@ DataTypes ValuesSchemaReader::readRowAndGetDataTypes() { if (!data_types.empty()) { - skipWhitespaceIfAny(buf); assertChar(',', buf); skipWhitespaceIfAny(buf); } readQuotedField(value, buf); - auto type = determineDataTypeByEscapingRule(value, format_settings, FormatSettings::EscapingRule::Quoted); + auto type = tryInferDataTypeByEscapingRule(value, format_settings, FormatSettings::EscapingRule::Quoted); data_types.push_back(std::move(type)); + skipWhitespaceIfAny(buf); } assertChar(')', buf); diff --git a/src/Processors/Formats/Impl/ValuesBlockInputFormat.h b/src/Processors/Formats/Impl/ValuesBlockInputFormat.h index bf243c54bd7..9abade72af1 100644 --- a/src/Processors/Formats/Impl/ValuesBlockInputFormat.h +++ b/src/Processors/Formats/Impl/ValuesBlockInputFormat.h @@ -56,6 +56,7 @@ private: Chunk generate() override; void readRow(MutableColumns & columns, size_t row_num); + void readUntilTheEndOfRowAndReTokenize(size_t current_column_idx); bool tryParseExpressionUsingTemplate(MutableColumnPtr & column, size_t column_idx); ALWAYS_INLINE inline bool tryReadValue(IColumn & column, size_t column_idx); @@ -70,6 +71,8 @@ private: void readSuffix(); std::unique_ptr buf; + std::optional token_iterator{}; + std::optional tokens{}; const RowInputFormatParams params; diff --git a/src/Processors/Formats/Impl/XMLRowOutputFormat.cpp b/src/Processors/Formats/Impl/XMLRowOutputFormat.cpp index b0de4d0a976..f73d13b7739 100644 --- a/src/Processors/Formats/Impl/XMLRowOutputFormat.cpp +++ b/src/Processors/Formats/Impl/XMLRowOutputFormat.cpp @@ -190,9 +190,6 @@ void XMLRowOutputFormat::finalizeImpl() writeIntText(row_count, *ostr); writeCString("\n", *ostr); - auto outside_statistics = getOutsideStatistics(); - if (outside_statistics) - statistics = std::move(*outside_statistics); writeRowsBeforeLimitAtLeast(); diff --git a/src/Processors/Formats/Impl/XMLRowOutputFormat.h b/src/Processors/Formats/Impl/XMLRowOutputFormat.h index 25544bab35e..197e91ff760 100644 --- a/src/Processors/Formats/Impl/XMLRowOutputFormat.h +++ b/src/Processors/Formats/Impl/XMLRowOutputFormat.h @@ -61,7 +61,6 @@ private: NamesAndTypes fields; Names field_tag_names; - Statistics statistics; const FormatSettings format_settings; }; diff --git a/src/Processors/Merges/Algorithms/Graphite.cpp b/src/Processors/Merges/Algorithms/Graphite.cpp index c5c611366ff..0616c4bd6e6 100644 --- a/src/Processors/Merges/Algorithms/Graphite.cpp +++ b/src/Processors/Merges/Algorithms/Graphite.cpp @@ -332,8 +332,7 @@ std::string buildTaggedRegex(std::string regexp_str) * * */ -static const Pattern & -appendGraphitePattern( +static const Pattern & appendGraphitePattern( const Poco::Util::AbstractConfiguration & config, const String & config_element, Patterns & patterns, bool default_rule, diff --git a/src/Processors/Merges/Algorithms/GraphiteRollupSortedAlgorithm.cpp b/src/Processors/Merges/Algorithms/GraphiteRollupSortedAlgorithm.cpp index 467ded19f4d..c5937fe0bc5 100644 --- a/src/Processors/Merges/Algorithms/GraphiteRollupSortedAlgorithm.cpp +++ b/src/Processors/Merges/Algorithms/GraphiteRollupSortedAlgorithm.cpp @@ -9,6 +9,12 @@ namespace DB { +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; +} + + static GraphiteRollupSortedAlgorithm::ColumnsDefinition defineColumns( const Block & header, const Graphite::Params & params) { @@ -26,6 +32,9 @@ static GraphiteRollupSortedAlgorithm::ColumnsDefinition defineColumns( if (i != def.time_column_num && i != def.value_column_num && i != def.version_column_num) def.unmodified_column_numbers.push_back(i); + if (!WhichDataType(header.getByPosition(def.value_column_num).type).isFloat64()) + throw Exception("Only `Float64` data type is allowed for the value column of GraphiteMergeTree", ErrorCodes::BAD_ARGUMENTS); + return def; } diff --git a/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.cpp b/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.cpp index c79c667a988..ee3177e132f 100644 --- a/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.cpp +++ b/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.cpp @@ -382,7 +382,7 @@ static MutableColumns getMergedDataColumns( for (const auto & desc : def.columns_to_aggregate) { // Wrap aggregated columns in a tuple to match function signature - if (!desc.is_agg_func_type && !desc.is_simple_agg_func_type && isTuple(desc.function->getReturnType())) + if (!desc.is_agg_func_type && !desc.is_simple_agg_func_type && isTuple(desc.function->getResultType())) { size_t tuple_size = desc.column_numbers.size(); MutableColumns tuple_columns(tuple_size); @@ -439,7 +439,7 @@ static void postprocessChunk( auto column = std::move(columns[next_column]); ++next_column; - if (!desc.is_agg_func_type && !desc.is_simple_agg_func_type && isTuple(desc.function->getReturnType())) + if (!desc.is_agg_func_type && !desc.is_simple_agg_func_type && isTuple(desc.function->getResultType())) { /// Unpack tuple into block. size_t tuple_size = desc.column_numbers.size(); diff --git a/src/Processors/QueryPlan/AggregatingStep.cpp b/src/Processors/QueryPlan/AggregatingStep.cpp index 86039342c49..4fd6e7c11dd 100644 --- a/src/Processors/QueryPlan/AggregatingStep.cpp +++ b/src/Processors/QueryPlan/AggregatingStep.cpp @@ -79,9 +79,9 @@ Block generateOutputHeader(const Block & input_header, const Names & keys, bool } -static Block appendGroupingColumn(Block block, const Names & keys, const GroupingSetsParamsList & params, bool use_nulls) +Block AggregatingStep::appendGroupingColumn(Block block, const Names & keys, bool has_grouping, bool use_nulls) { - if (params.empty()) + if (!has_grouping) return block; return generateOutputHeader(block, keys, use_nulls); @@ -104,7 +104,7 @@ AggregatingStep::AggregatingStep( bool memory_bound_merging_of_aggregation_results_enabled_) : ITransformingStep( input_stream_, - appendGroupingColumn(params_.getHeader(input_stream_.header, final_), params_.keys, grouping_sets_params_, group_by_use_nulls_), + appendGroupingColumn(params_.getHeader(input_stream_.header, final_), params_.keys, !grouping_sets_params_.empty(), group_by_use_nulls_), getTraits(should_produce_results_in_order_of_bucket_number_), false) , params(std::move(params_)) @@ -469,7 +469,7 @@ void AggregatingStep::updateOutputStream() { output_stream = createOutputStream( input_streams.front(), - appendGroupingColumn(params.getHeader(input_streams.front().header, final), params.keys, grouping_sets_params, group_by_use_nulls), + appendGroupingColumn(params.getHeader(input_streams.front().header, final), params.keys, !grouping_sets_params.empty(), group_by_use_nulls), getDataStreamTraits()); } diff --git a/src/Processors/QueryPlan/AggregatingStep.h b/src/Processors/QueryPlan/AggregatingStep.h index 9cb56432797..0dc06649d2d 100644 --- a/src/Processors/QueryPlan/AggregatingStep.h +++ b/src/Processors/QueryPlan/AggregatingStep.h @@ -42,6 +42,8 @@ public: bool should_produce_results_in_order_of_bucket_number_, bool memory_bound_merging_of_aggregation_results_enabled_); + static Block appendGroupingColumn(Block block, const Names & keys, bool has_grouping, bool use_nulls); + String getName() const override { return "Aggregating"; } void transformPipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override; diff --git a/src/Processors/QueryPlan/CreatingSetsStep.h b/src/Processors/QueryPlan/CreatingSetsStep.h index 9995af7bca7..b4777578a30 100644 --- a/src/Processors/QueryPlan/CreatingSetsStep.h +++ b/src/Processors/QueryPlan/CreatingSetsStep.h @@ -44,9 +44,6 @@ public: QueryPipelineBuilderPtr updatePipeline(QueryPipelineBuilders pipelines, const BuildQueryPipelineSettings &) override; void describePipeline(FormatSettings & settings) const override; - -private: - Processors processors; }; void addCreatingSetsStep(QueryPlan & query_plan, PreparedSets::SubqueriesForSets subqueries_for_sets, ContextPtr context); diff --git a/src/Processors/QueryPlan/IQueryPlanStep.cpp b/src/Processors/QueryPlan/IQueryPlanStep.cpp index b36d1f0e12f..a0035089c29 100644 --- a/src/Processors/QueryPlan/IQueryPlanStep.cpp +++ b/src/Processors/QueryPlan/IQueryPlanStep.cpp @@ -113,4 +113,9 @@ void IQueryPlanStep::describePipeline(const Processors & processors, FormatSetti doDescribeProcessor(*prev, count, settings); } +void IQueryPlanStep::appendExtraProcessors(const Processors & extra_processors) +{ + processors.insert(processors.end(), extra_processors.begin(), extra_processors.end()); +} + } diff --git a/src/Processors/QueryPlan/IQueryPlanStep.h b/src/Processors/QueryPlan/IQueryPlanStep.h index 1e00d76b66f..316ecff9c2e 100644 --- a/src/Processors/QueryPlan/IQueryPlanStep.h +++ b/src/Processors/QueryPlan/IQueryPlanStep.h @@ -110,6 +110,9 @@ public: /// Get description of processors added in current step. Should be called after updatePipeline(). virtual void describePipeline(FormatSettings & /*settings*/) const {} + /// Append extra processors for this step. + void appendExtraProcessors(const Processors & extra_processors); + protected: DataStreams input_streams; std::optional output_stream; @@ -117,6 +120,10 @@ protected: /// Text description about what current step does. std::string step_description; + /// This field is used to store added processors from this step. + /// It is used only for introspection (EXPLAIN PIPELINE). + Processors processors; + static void describePipeline(const Processors & processors, FormatSettings & settings); }; diff --git a/src/Processors/QueryPlan/ISourceStep.h b/src/Processors/QueryPlan/ISourceStep.h index 08c939b626d..744b6f9b5c4 100644 --- a/src/Processors/QueryPlan/ISourceStep.h +++ b/src/Processors/QueryPlan/ISourceStep.h @@ -15,10 +15,6 @@ public: virtual void initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings & settings) = 0; void describePipeline(FormatSettings & settings) const override; - -protected: - /// We collect processors got after pipeline transformation. - Processors processors; }; } diff --git a/src/Processors/QueryPlan/ITransformingStep.cpp b/src/Processors/QueryPlan/ITransformingStep.cpp index 64ad2ec5626..195fa9ad68c 100644 --- a/src/Processors/QueryPlan/ITransformingStep.cpp +++ b/src/Processors/QueryPlan/ITransformingStep.cpp @@ -70,9 +70,4 @@ void ITransformingStep::describePipeline(FormatSettings & settings) const IQueryPlanStep::describePipeline(processors, settings); } -void ITransformingStep::appendExtraProcessors(const Processors & extra_processors) -{ - processors.insert(processors.end(), extra_processors.begin(), extra_processors.end()); -} - } diff --git a/src/Processors/QueryPlan/ITransformingStep.h b/src/Processors/QueryPlan/ITransformingStep.h index 8b16e982af5..1513b4307f8 100644 --- a/src/Processors/QueryPlan/ITransformingStep.h +++ b/src/Processors/QueryPlan/ITransformingStep.h @@ -75,9 +75,6 @@ public: void describePipeline(FormatSettings & settings) const override; - /// Append extra processors for this step. - void appendExtraProcessors(const Processors & extra_processors); - /// Enforcement is supposed to be done through the special settings that will be taken into account by remote nodes during query planning (e.g. force_aggregation_in_order). /// Should be called only if data_stream_traits.can_enforce_sorting_properties_in_distributed_query == true. virtual void adjustSettingsToEnforceSortingPropertiesInDistributedQuery(ContextMutablePtr) const @@ -100,8 +97,7 @@ protected: private: virtual void updateOutputStream() = 0; - /// We collect processors got after pipeline transformation. - Processors processors; + /// If we should collect processors got after pipeline transformation. bool collect_processors; const DataStreamTraits data_stream_traits; diff --git a/src/Processors/QueryPlan/IntersectOrExceptStep.cpp b/src/Processors/QueryPlan/IntersectOrExceptStep.cpp index c031303cc7f..afdff44020f 100644 --- a/src/Processors/QueryPlan/IntersectOrExceptStep.cpp +++ b/src/Processors/QueryPlan/IntersectOrExceptStep.cpp @@ -42,10 +42,10 @@ IntersectOrExceptStep::IntersectOrExceptStep( QueryPipelineBuilderPtr IntersectOrExceptStep::updatePipeline(QueryPipelineBuilders pipelines, const BuildQueryPipelineSettings &) { auto pipeline = std::make_unique(); - QueryPipelineProcessorsCollector collector(*pipeline, this); if (pipelines.empty()) { + QueryPipelineProcessorsCollector collector(*pipeline, this); pipeline->init(Pipe(std::make_shared(output_stream->header))); processors = collector.detachProcessors(); return pipeline; @@ -56,6 +56,7 @@ QueryPipelineBuilderPtr IntersectOrExceptStep::updatePipeline(QueryPipelineBuild /// Just in case. if (!isCompatibleHeader(cur_pipeline->getHeader(), getOutputStream().header)) { + QueryPipelineProcessorsCollector collector(*cur_pipeline, this); auto converting_dag = ActionsDAG::makeConvertingActions( cur_pipeline->getHeader().getColumnsWithTypeAndName(), getOutputStream().header.getColumnsWithTypeAndName(), @@ -66,16 +67,20 @@ QueryPipelineBuilderPtr IntersectOrExceptStep::updatePipeline(QueryPipelineBuild { return std::make_shared(cur_header, converting_actions); }); + + auto added_processors = collector.detachProcessors(); + processors.insert(processors.end(), added_processors.begin(), added_processors.end()); } /// For the case of union. cur_pipeline->addTransform(std::make_shared(header, cur_pipeline->getNumStreams(), 1)); } - *pipeline = QueryPipelineBuilder::unitePipelines(std::move(pipelines), max_threads); - pipeline->addTransform(std::make_shared(header, current_operator)); + *pipeline = QueryPipelineBuilder::unitePipelines(std::move(pipelines), max_threads, &processors); + auto transform = std::make_shared(header, current_operator); + processors.push_back(transform); + pipeline->addTransform(std::move(transform)); - processors = collector.detachProcessors(); return pipeline; } diff --git a/src/Processors/QueryPlan/IntersectOrExceptStep.h b/src/Processors/QueryPlan/IntersectOrExceptStep.h index d7eab574431..d15d2802456 100644 --- a/src/Processors/QueryPlan/IntersectOrExceptStep.h +++ b/src/Processors/QueryPlan/IntersectOrExceptStep.h @@ -24,7 +24,6 @@ private: Block header; Operator current_operator; size_t max_threads; - Processors processors; }; } diff --git a/src/Processors/QueryPlan/JoinStep.h b/src/Processors/QueryPlan/JoinStep.h index fc7f74d4fe8..a814d541574 100644 --- a/src/Processors/QueryPlan/JoinStep.h +++ b/src/Processors/QueryPlan/JoinStep.h @@ -37,7 +37,6 @@ private: size_t max_block_size; size_t max_streams; bool keep_left_read_in_order; - Processors processors; }; /// Special step for the case when Join is already filled. diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index 556997b6e7a..0d8fe84f9d3 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -66,7 +66,7 @@ static MergeTreeReaderSettings getMergeTreeReaderSettings( .read_in_order = query_info.input_order_info != nullptr, .apply_deleted_mask = context->applyDeletedMask(), .use_asynchronous_read_from_pool = settings.allow_asynchronous_read_from_io_pool_for_merge_tree - && (settings.max_streams_to_max_threads_ratio > 1 || settings.allow_asynchronous_read_from_io_pool_for_merge_tree), + && (settings.max_streams_to_max_threads_ratio > 1 || settings.max_streams_for_merge_tree_reading > 1), }; } diff --git a/src/Processors/QueryPlan/UnionStep.cpp b/src/Processors/QueryPlan/UnionStep.cpp index 6290c7417db..e111890a833 100644 --- a/src/Processors/QueryPlan/UnionStep.cpp +++ b/src/Processors/QueryPlan/UnionStep.cpp @@ -62,10 +62,10 @@ void UnionStep::updateOutputSortDescription() QueryPipelineBuilderPtr UnionStep::updatePipeline(QueryPipelineBuilders pipelines, const BuildQueryPipelineSettings &) { auto pipeline = std::make_unique(); - QueryPipelineProcessorsCollector collector(*pipeline, this); if (pipelines.empty()) { + QueryPipelineProcessorsCollector collector(*pipeline, this); pipeline->init(Pipe(std::make_shared(output_stream->header))); processors = collector.detachProcessors(); return pipeline; @@ -80,6 +80,7 @@ QueryPipelineBuilderPtr UnionStep::updatePipeline(QueryPipelineBuilders pipeline /// But, just in case, convert it to the same header if not. if (!isCompatibleHeader(cur_pipeline->getHeader(), getOutputStream().header)) { + QueryPipelineProcessorsCollector collector(*cur_pipeline, this); auto converting_dag = ActionsDAG::makeConvertingActions( cur_pipeline->getHeader().getColumnsWithTypeAndName(), getOutputStream().header.getColumnsWithTypeAndName(), @@ -90,12 +91,13 @@ QueryPipelineBuilderPtr UnionStep::updatePipeline(QueryPipelineBuilders pipeline { return std::make_shared(cur_header, converting_actions); }); + + auto added_processors = collector.detachProcessors(); + processors.insert(processors.end(), added_processors.begin(), added_processors.end()); } } - *pipeline = QueryPipelineBuilder::unitePipelines(std::move(pipelines), max_threads); - - processors = collector.detachProcessors(); + *pipeline = QueryPipelineBuilder::unitePipelines(std::move(pipelines), max_threads, &processors); return pipeline; } diff --git a/src/Processors/QueryPlan/UnionStep.h b/src/Processors/QueryPlan/UnionStep.h index a5a2f6b356e..6278de07673 100644 --- a/src/Processors/QueryPlan/UnionStep.h +++ b/src/Processors/QueryPlan/UnionStep.h @@ -24,7 +24,6 @@ public: private: Block header; size_t max_threads; - Processors processors; }; } diff --git a/src/Processors/QueryPlan/WindowStep.cpp b/src/Processors/QueryPlan/WindowStep.cpp index b67b394b57b..92e9948c4c7 100644 --- a/src/Processors/QueryPlan/WindowStep.cpp +++ b/src/Processors/QueryPlan/WindowStep.cpp @@ -35,7 +35,7 @@ static Block addWindowFunctionResultColumns(const Block & block, { ColumnWithTypeAndName column_with_type; column_with_type.name = f.column_name; - column_with_type.type = f.aggregate_function->getReturnType(); + column_with_type.type = f.aggregate_function->getResultType(); column_with_type.column = column_with_type.type->createColumn(); result.insert(column_with_type); diff --git a/src/Processors/Sources/ShellCommandSource.cpp b/src/Processors/Sources/ShellCommandSource.cpp index 3f70abaea6d..4983fa047dc 100644 --- a/src/Processors/Sources/ShellCommandSource.cpp +++ b/src/Processors/Sources/ShellCommandSource.cpp @@ -71,28 +71,22 @@ static bool pollFd(int fd, size_t timeout_milliseconds, int events) pfd.events = events; pfd.revents = 0; - Stopwatch watch; - int res; while (true) { + Stopwatch watch; res = poll(&pfd, 1, static_cast(timeout_milliseconds)); if (res < 0) { - if (errno == EINTR) - { - watch.stop(); - timeout_milliseconds -= watch.elapsedMilliseconds(); - watch.start(); - - continue; - } - else - { + if (errno != EINTR) throwFromErrno("Cannot poll", ErrorCodes::CANNOT_POLL); - } + + const auto elapsed = watch.elapsedMilliseconds(); + if (timeout_milliseconds <= elapsed) + break; + timeout_milliseconds -= elapsed; } else { @@ -474,7 +468,7 @@ Pipe ShellCommandSourceCoordinator::createPipe( std::unique_ptr process; std::unique_ptr process_holder; - auto destructor_strategy = ShellCommand::DestructorStrategy{true /*terminate_in_destructor*/, configuration.command_termination_timeout_seconds}; + auto destructor_strategy = ShellCommand::DestructorStrategy{true /*terminate_in_destructor*/, SIGTERM, configuration.command_termination_timeout_seconds}; command_config.terminate_in_destructor_strategy = destructor_strategy; bool is_executable_pool = (process_pool != nullptr); diff --git a/src/Processors/Transforms/MergeSortingTransform.cpp b/src/Processors/Transforms/MergeSortingTransform.cpp index b039109c3f5..efd9249066c 100644 --- a/src/Processors/Transforms/MergeSortingTransform.cpp +++ b/src/Processors/Transforms/MergeSortingTransform.cpp @@ -34,7 +34,7 @@ public: , tmp_stream(tmp_stream_) , log(log_) { - LOG_INFO(log, "Sorting and writing part of data into temporary file {}", tmp_stream.path()); + LOG_INFO(log, "Sorting and writing part of data into temporary file {}", tmp_stream.getPath()); ProfileEvents::increment(ProfileEvents::ExternalSortWritePart); } @@ -58,7 +58,7 @@ public: ProfileEvents::increment(ProfileEvents::ExternalSortUncompressedBytes, stat.uncompressed_size); LOG_INFO(log, "Done writing part of data into temporary file {}, compressed {}, uncompressed {} ", - tmp_stream.path(), ReadableSize(static_cast(stat.compressed_size)), ReadableSize(static_cast(stat.uncompressed_size))); + tmp_stream.getPath(), ReadableSize(static_cast(stat.compressed_size)), ReadableSize(static_cast(stat.uncompressed_size))); } Block block = tmp_stream.read(); diff --git a/src/Processors/Transforms/MongoDBSource.cpp b/src/Processors/Transforms/MongoDBSource.cpp index b8f40789e83..88eddde0b3d 100644 --- a/src/Processors/Transforms/MongoDBSource.cpp +++ b/src/Processors/Transforms/MongoDBSource.cpp @@ -29,111 +29,11 @@ namespace DB namespace ErrorCodes { extern const int TYPE_MISMATCH; - extern const int MONGODB_CANNOT_AUTHENTICATE; extern const int UNKNOWN_TYPE; extern const int MONGODB_ERROR; } -#if POCO_VERSION < 0x01070800 -/// See https://pocoproject.org/forum/viewtopic.php?f=10&t=6326&p=11426&hilit=mongodb+auth#p11485 -void authenticate(Poco::MongoDB::Connection & connection, const std::string & database, const std::string & user, const std::string & password) -{ - Poco::MongoDB::Database db(database); - - /// Challenge-response authentication. - std::string nonce; - - /// First step: request nonce. - { - auto command = db.createCommand(); - command->setNumberToReturn(1); - command->selector().add("getnonce", 1); - - Poco::MongoDB::ResponseMessage response; - connection.sendRequest(*command, response); - - if (response.documents().empty()) - throw Exception( - "Cannot authenticate in MongoDB: server returned empty response for 'getnonce' command", - ErrorCodes::MONGODB_CANNOT_AUTHENTICATE); - - auto doc = response.documents()[0]; - try - { - double ok = doc->get("ok", 0); - if (ok != 1) - throw Exception( - "Cannot authenticate in MongoDB: server returned response for 'getnonce' command that" - " has field 'ok' missing or having wrong value", - ErrorCodes::MONGODB_CANNOT_AUTHENTICATE); - - nonce = doc->get("nonce", ""); - if (nonce.empty()) - throw Exception( - "Cannot authenticate in MongoDB: server returned response for 'getnonce' command that" - " has field 'nonce' missing or empty", - ErrorCodes::MONGODB_CANNOT_AUTHENTICATE); - } - catch (Poco::NotFoundException & e) - { - throw Exception( - "Cannot authenticate in MongoDB: server returned response for 'getnonce' command that has missing required field: " - + e.displayText(), - ErrorCodes::MONGODB_CANNOT_AUTHENTICATE); - } - } - - /// Second step: use nonce to calculate digest and send it back to the server. - /// Digest is hex_md5(n.nonce + username + hex_md5(username + ":mongo:" + password)) - { - std::string first = user + ":mongo:" + password; - - Poco::MD5Engine md5; - md5.update(first); - std::string digest_first(Poco::DigestEngine::digestToHex(md5.digest())); - std::string second = nonce + user + digest_first; - md5.reset(); - md5.update(second); - std::string digest_second(Poco::DigestEngine::digestToHex(md5.digest())); - - auto command = db.createCommand(); - command->setNumberToReturn(1); - command->selector() - .add("authenticate", 1) - .add("user", user) - .add("nonce", nonce) - .add("key", digest_second); - - Poco::MongoDB::ResponseMessage response; - connection.sendRequest(*command, response); - - if (response.empty()) - throw Exception( - "Cannot authenticate in MongoDB: server returned empty response for 'authenticate' command", - ErrorCodes::MONGODB_CANNOT_AUTHENTICATE); - - auto doc = response.documents()[0]; - try - { - double ok = doc->get("ok", 0); - if (ok != 1) - throw Exception( - "Cannot authenticate in MongoDB: server returned response for 'authenticate' command that" - " has field 'ok' missing or having wrong value", - ErrorCodes::MONGODB_CANNOT_AUTHENTICATE); - } - catch (Poco::NotFoundException & e) - { - throw Exception( - "Cannot authenticate in MongoDB: server returned response for 'authenticate' command that has missing required field: " - + e.displayText(), - ErrorCodes::MONGODB_CANNOT_AUTHENTICATE); - } - } -} -#endif - std::unique_ptr createCursor(const std::string & database, const std::string & collection, const Block & sample_block_to_select) { auto cursor = std::make_unique(database, collection); diff --git a/src/Processors/Transforms/WindowTransform.cpp b/src/Processors/Transforms/WindowTransform.cpp index 4d3eb1f0bbd..9bfaf1f375f 100644 --- a/src/Processors/Transforms/WindowTransform.cpp +++ b/src/Processors/Transforms/WindowTransform.cpp @@ -1067,7 +1067,7 @@ void WindowTransform::appendChunk(Chunk & chunk) // Initialize output columns. for (auto & ws : workspaces) { - block.output_columns.push_back(ws.aggregate_function->getReturnType() + block.output_columns.push_back(ws.aggregate_function->getResultType() ->createColumn()); block.output_columns.back()->reserve(block.rows); } @@ -1441,8 +1441,8 @@ struct WindowFunction { std::string name; - WindowFunction(const std::string & name_, const DataTypes & argument_types_, const Array & parameters_) - : IAggregateFunctionHelper(argument_types_, parameters_) + WindowFunction(const std::string & name_, const DataTypes & argument_types_, const Array & parameters_, const DataTypePtr & result_type_) + : IAggregateFunctionHelper(argument_types_, parameters_, result_type_) , name(name_) {} @@ -1472,12 +1472,9 @@ struct WindowFunctionRank final : public WindowFunction { WindowFunctionRank(const std::string & name_, const DataTypes & argument_types_, const Array & parameters_) - : WindowFunction(name_, argument_types_, parameters_) + : WindowFunction(name_, argument_types_, parameters_, std::make_shared()) {} - DataTypePtr getReturnType() const override - { return std::make_shared(); } - bool allocatesMemoryInArena() const override { return false; } void windowInsertResultInto(const WindowTransform * transform, @@ -1494,12 +1491,9 @@ struct WindowFunctionDenseRank final : public WindowFunction { WindowFunctionDenseRank(const std::string & name_, const DataTypes & argument_types_, const Array & parameters_) - : WindowFunction(name_, argument_types_, parameters_) + : WindowFunction(name_, argument_types_, parameters_, std::make_shared()) {} - DataTypePtr getReturnType() const override - { return std::make_shared(); } - bool allocatesMemoryInArena() const override { return false; } void windowInsertResultInto(const WindowTransform * transform, @@ -1560,8 +1554,8 @@ template struct StatefulWindowFunction : public WindowFunction { StatefulWindowFunction(const std::string & name_, - const DataTypes & argument_types_, const Array & parameters_) - : WindowFunction(name_, argument_types_, parameters_) + const DataTypes & argument_types_, const Array & parameters_, const DataTypePtr & result_type_) + : WindowFunction(name_, argument_types_, parameters_, result_type_) { } @@ -1607,7 +1601,7 @@ struct WindowFunctionExponentialTimeDecayedSum final : public StatefulWindowFunc WindowFunctionExponentialTimeDecayedSum(const std::string & name_, const DataTypes & argument_types_, const Array & parameters_) - : StatefulWindowFunction(name_, argument_types_, parameters_) + : StatefulWindowFunction(name_, argument_types_, parameters_, std::make_shared()) { if (parameters_.size() != 1) { @@ -1639,11 +1633,6 @@ struct WindowFunctionExponentialTimeDecayedSum final : public StatefulWindowFunc } } - DataTypePtr getReturnType() const override - { - return std::make_shared(); - } - bool allocatesMemoryInArena() const override { return false; } void windowInsertResultInto(const WindowTransform * transform, @@ -1705,7 +1694,7 @@ struct WindowFunctionExponentialTimeDecayedMax final : public WindowFunction WindowFunctionExponentialTimeDecayedMax(const std::string & name_, const DataTypes & argument_types_, const Array & parameters_) - : WindowFunction(name_, argument_types_, parameters_) + : WindowFunction(name_, argument_types_, parameters_, std::make_shared()) { if (parameters_.size() != 1) { @@ -1737,11 +1726,6 @@ struct WindowFunctionExponentialTimeDecayedMax final : public WindowFunction } } - DataTypePtr getReturnType() const override - { - return std::make_shared(); - } - bool allocatesMemoryInArena() const override { return false; } void windowInsertResultInto(const WindowTransform * transform, @@ -1781,7 +1765,7 @@ struct WindowFunctionExponentialTimeDecayedCount final : public StatefulWindowFu WindowFunctionExponentialTimeDecayedCount(const std::string & name_, const DataTypes & argument_types_, const Array & parameters_) - : StatefulWindowFunction(name_, argument_types_, parameters_) + : StatefulWindowFunction(name_, argument_types_, parameters_, std::make_shared()) { if (parameters_.size() != 1) { @@ -1805,11 +1789,6 @@ struct WindowFunctionExponentialTimeDecayedCount final : public StatefulWindowFu } } - DataTypePtr getReturnType() const override - { - return std::make_shared(); - } - bool allocatesMemoryInArena() const override { return false; } void windowInsertResultInto(const WindowTransform * transform, @@ -1868,7 +1847,7 @@ struct WindowFunctionExponentialTimeDecayedAvg final : public StatefulWindowFunc WindowFunctionExponentialTimeDecayedAvg(const std::string & name_, const DataTypes & argument_types_, const Array & parameters_) - : StatefulWindowFunction(name_, argument_types_, parameters_) + : StatefulWindowFunction(name_, argument_types_, parameters_, std::make_shared()) { if (parameters_.size() != 1) { @@ -1900,11 +1879,6 @@ struct WindowFunctionExponentialTimeDecayedAvg final : public StatefulWindowFunc } } - DataTypePtr getReturnType() const override - { - return std::make_shared(); - } - bool allocatesMemoryInArena() const override { return false; } void windowInsertResultInto(const WindowTransform * transform, @@ -1980,12 +1954,9 @@ struct WindowFunctionRowNumber final : public WindowFunction { WindowFunctionRowNumber(const std::string & name_, const DataTypes & argument_types_, const Array & parameters_) - : WindowFunction(name_, argument_types_, parameters_) + : WindowFunction(name_, argument_types_, parameters_, std::make_shared()) {} - DataTypePtr getReturnType() const override - { return std::make_shared(); } - bool allocatesMemoryInArena() const override { return false; } void windowInsertResultInto(const WindowTransform * transform, @@ -2004,7 +1975,7 @@ struct WindowFunctionLagLeadInFrame final : public WindowFunction { WindowFunctionLagLeadInFrame(const std::string & name_, const DataTypes & argument_types_, const Array & parameters_) - : WindowFunction(name_, argument_types_, parameters_) + : WindowFunction(name_, argument_types_, parameters_, createResultType(argument_types_, name_)) { if (!parameters.empty()) { @@ -2012,12 +1983,6 @@ struct WindowFunctionLagLeadInFrame final : public WindowFunction "Function {} cannot be parameterized", name_); } - if (argument_types.empty()) - { - throw Exception(ErrorCodes::BAD_ARGUMENTS, - "Function {} takes at least one argument", name_); - } - if (argument_types.size() == 1) { return; @@ -2060,7 +2025,16 @@ struct WindowFunctionLagLeadInFrame final : public WindowFunction } } - DataTypePtr getReturnType() const override { return argument_types[0]; } + static DataTypePtr createResultType(const DataTypes & argument_types_, const std::string & name_) + { + if (argument_types_.empty()) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Function {} takes at least one argument", name_); + } + + return argument_types_[0]; + } bool allocatesMemoryInArena() const override { return false; } @@ -2125,7 +2099,7 @@ struct WindowFunctionNthValue final : public WindowFunction { WindowFunctionNthValue(const std::string & name_, const DataTypes & argument_types_, const Array & parameters_) - : WindowFunction(name_, argument_types_, parameters_) + : WindowFunction(name_, argument_types_, parameters_, createResultType(name_, argument_types_)) { if (!parameters.empty()) { @@ -2133,12 +2107,6 @@ struct WindowFunctionNthValue final : public WindowFunction "Function {} cannot be parameterized", name_); } - if (argument_types.size() != 2) - { - throw Exception(ErrorCodes::BAD_ARGUMENTS, - "Function {} takes exactly two arguments", name_); - } - if (!isInt64OrUInt64FieldType(argument_types[1]->getDefault().getType())) { throw Exception(ErrorCodes::BAD_ARGUMENTS, @@ -2147,7 +2115,16 @@ struct WindowFunctionNthValue final : public WindowFunction } } - DataTypePtr getReturnType() const override { return argument_types[0]; } + static DataTypePtr createResultType(const std::string & name_, const DataTypes & argument_types_) + { + if (argument_types_.size() != 2) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Function {} takes exactly two arguments", name_); + } + + return argument_types_[0]; + } bool allocatesMemoryInArena() const override { return false; } @@ -2204,7 +2181,7 @@ struct WindowFunctionNonNegativeDerivative final : public StatefulWindowFunction WindowFunctionNonNegativeDerivative(const std::string & name_, const DataTypes & argument_types_, const Array & parameters_) - : StatefulWindowFunction(name_, argument_types_, parameters_) + : StatefulWindowFunction(name_, argument_types_, parameters_, std::make_shared()) { if (!parameters.empty()) { @@ -2263,9 +2240,6 @@ struct WindowFunctionNonNegativeDerivative final : public StatefulWindowFunction } } - - DataTypePtr getReturnType() const override { return std::make_shared(); } - bool allocatesMemoryInArena() const override { return false; } void windowInsertResultInto(const WindowTransform * transform, @@ -2339,7 +2313,8 @@ void registerWindowFunctions(AggregateFunctionFactory & factory) .returns_default_when_only_null = true, // This probably doesn't make any difference for window functions because // it is an Aggregator-specific setting. - .is_order_dependent = true }; + .is_order_dependent = true, + .is_window_function = true}; factory.registerFunction("rank", {[](const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings *) diff --git a/src/QueryPipeline/QueryPipelineBuilder.cpp b/src/QueryPipeline/QueryPipelineBuilder.cpp index 626296834a2..a3b3438306e 100644 --- a/src/QueryPipeline/QueryPipelineBuilder.cpp +++ b/src/QueryPipeline/QueryPipelineBuilder.cpp @@ -309,6 +309,7 @@ QueryPipelineBuilder QueryPipelineBuilder::unitePipelines( pipeline.limitMaxThreads(max_threads_limit); } + pipeline.setCollectedProcessors(nullptr); return pipeline; } @@ -384,11 +385,7 @@ std::unique_ptr QueryPipelineBuilder::joinPipelinesRightLe /// Collect the NEW processors for the right pipeline. QueryPipelineProcessorsCollector collector(*right); /// Remember the last step of the right pipeline. - ExpressionStep * step = typeid_cast(right->pipe.processors->back()->getQueryPlanStep()); - if (!step) - { - throw Exception(ErrorCodes::LOGICAL_ERROR, "The top step of the right pipeline should be ExpressionStep"); - } + IQueryPlanStep * step = right->pipe.processors->back()->getQueryPlanStep(); /// In case joined subquery has totals, and we don't, add default chunk to totals. bool default_totals = false; @@ -560,7 +557,8 @@ std::unique_ptr QueryPipelineBuilder::joinPipelinesRightLe /// Move the collected processors to the last step in the right pipeline. Processors processors = collector.detachProcessors(); - step->appendExtraProcessors(processors); + if (step) + step->appendExtraProcessors(processors); left->pipe.processors->insert(left->pipe.processors->end(), right->pipe.processors->begin(), right->pipe.processors->end()); left->resources = std::move(right->resources); diff --git a/src/Storages/AlterCommands.cpp b/src/Storages/AlterCommands.cpp index c2c12da6077..f39f830bcc0 100644 --- a/src/Storages/AlterCommands.cpp +++ b/src/Storages/AlterCommands.cpp @@ -572,20 +572,17 @@ void AlterCommand::apply(StorageInMemoryMetadata & metadata, ContextPtr context) ErrorCodes::ILLEGAL_COLUMN); } - auto insert_it = constraints.end(); + auto * insert_it = constraints.end(); constraints.emplace(insert_it, constraint_decl); metadata.constraints = ConstraintsDescription(constraints); } else if (type == DROP_CONSTRAINT) { auto constraints = metadata.constraints.getConstraints(); - auto erase_it = std::find_if( - constraints.begin(), - constraints.end(), - [this](const ASTPtr & constraint_ast) - { - return constraint_ast->as().name == constraint_name; - }); + auto * erase_it = std::find_if( + constraints.begin(), + constraints.end(), + [this](const ASTPtr & constraint_ast) { return constraint_ast->as().name == constraint_name; }); if (erase_it == constraints.end()) { @@ -1355,12 +1352,20 @@ static MutationCommand createMaterializeTTLCommand() return command; } -MutationCommands AlterCommands::getMutationCommands(StorageInMemoryMetadata metadata, bool materialize_ttl, ContextPtr context) const +MutationCommands AlterCommands::getMutationCommands(StorageInMemoryMetadata metadata, bool materialize_ttl, ContextPtr context, bool with_alters) const { MutationCommands result; for (const auto & alter_cmd : *this) + { if (auto mutation_cmd = alter_cmd.tryConvertToMutationCommand(metadata, context); mutation_cmd) + { result.push_back(*mutation_cmd); + } + else if (with_alters) + { + result.push_back(MutationCommand{.ast = alter_cmd.ast->clone(), .type = MutationCommand::Type::ALTER_WITHOUT_MUTATION}); + } + } if (materialize_ttl) { diff --git a/src/Storages/AlterCommands.h b/src/Storages/AlterCommands.h index 71c622cb9be..c91c82e9c7a 100644 --- a/src/Storages/AlterCommands.h +++ b/src/Storages/AlterCommands.h @@ -209,7 +209,7 @@ public: /// alter. If alter can be performed as pure metadata update, than result is /// empty. If some TTL changes happened than, depending on materialize_ttl /// additional mutation command (MATERIALIZE_TTL) will be returned. - MutationCommands getMutationCommands(StorageInMemoryMetadata metadata, bool materialize_ttl, ContextPtr context) const; + MutationCommands getMutationCommands(StorageInMemoryMetadata metadata, bool materialize_ttl, ContextPtr context, bool with_alters=false) const; }; } diff --git a/src/Storages/ColumnDefault.h b/src/Storages/ColumnDefault.h index 096a1f177ab..af1be6f3bec 100644 --- a/src/Storages/ColumnDefault.h +++ b/src/Storages/ColumnDefault.h @@ -26,6 +26,7 @@ struct ColumnDefault { ColumnDefaultKind kind = ColumnDefaultKind::Default; ASTPtr expression; + bool ephemeral_default = false; }; diff --git a/src/Storages/ColumnsDescription.cpp b/src/Storages/ColumnsDescription.cpp index 5fa267a964b..0fdb21e064f 100644 --- a/src/Storages/ColumnsDescription.cpp +++ b/src/Storages/ColumnsDescription.cpp @@ -123,6 +123,7 @@ void ColumnDescription::readText(ReadBuffer & buf) { default_desc.kind = columnDefaultKindFromString(col_ast->default_specifier); default_desc.expression = std::move(col_ast->default_expression); + default_desc.ephemeral_default = col_ast->ephemeral_default; } if (col_ast->comment) diff --git a/src/Storages/ConstraintsDescription.cpp b/src/Storages/ConstraintsDescription.cpp index f73a148ad07..96037b46e52 100644 --- a/src/Storages/ConstraintsDescription.cpp +++ b/src/Storages/ConstraintsDescription.cpp @@ -107,7 +107,7 @@ std::unique_ptr ConstraintsDescription::buildGraph() const { static const NameSet relations = { "equals", "less", "lessOrEquals", "greaterOrEquals", "greater" }; - std::vector constraints_for_graph; + ASTs constraints_for_graph; auto atomic_formulas = getAtomicConstraintData(); for (const auto & atomic_formula : atomic_formulas) { @@ -153,7 +153,7 @@ const std::vector> & ConstraintsDescription return cnf_constraints; } -const std::vector & ConstraintsDescription::getConstraints() const +const ASTs & ConstraintsDescription::getConstraints() const { return constraints; } @@ -218,7 +218,7 @@ void ConstraintsDescription::update() { cnf_constraints.clear(); ast_to_atom_ids.clear(); - graph = std::make_unique(std::vector()); + graph = std::make_unique(ASTs()); return; } diff --git a/src/Storages/FileLog/StorageFileLog.cpp b/src/Storages/FileLog/StorageFileLog.cpp index 64b82eb4000..94d5f7441ec 100644 --- a/src/Storages/FileLog/StorageFileLog.cpp +++ b/src/Storages/FileLog/StorageFileLog.cpp @@ -72,6 +72,21 @@ StorageFileLog::StorageFileLog( storage_metadata.setComment(comment); setInMemoryMetadata(storage_metadata); + if (!fileOrSymlinkPathStartsWith(path, getContext()->getUserFilesPath())) + { + if (attach) + { + LOG_ERROR(log, "The absolute data path should be inside `user_files_path`({})", getContext()->getUserFilesPath()); + return; + } + else + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "The absolute data path should be inside `user_files_path`({})", + getContext()->getUserFilesPath()); + } + + bool created_metadata_directory = false; try { if (!attach) @@ -84,6 +99,7 @@ StorageFileLog::StorageFileLog( metadata_base_path); } disk->createDirectories(metadata_base_path); + created_metadata_directory = true; } loadMetaFiles(attach); @@ -101,7 +117,12 @@ StorageFileLog::StorageFileLog( catch (...) { if (!attach) + { + if (created_metadata_directory) + disk->removeRecursive(metadata_base_path); throw; + } + tryLogCurrentException(__PRETTY_FUNCTION__); } } @@ -124,12 +145,6 @@ void StorageFileLog::loadMetaFiles(bool attach) void StorageFileLog::loadFiles() { - if (!fileOrSymlinkPathStartsWith(path, getContext()->getUserFilesPath())) - { - throw Exception( - ErrorCodes::BAD_ARGUMENTS, "The absolute data path should be inside `user_files_path`({})", getContext()->getUserFilesPath()); - } - auto absolute_path = std::filesystem::absolute(path); absolute_path = absolute_path.lexically_normal(); /// Normalize path. @@ -372,43 +387,26 @@ void StorageFileLog::drop() void StorageFileLog::startup() { - try - { - if (task) - { - task->holder->activateAndSchedule(); - } - } - catch (...) - { - tryLogCurrentException(__PRETTY_FUNCTION__); - } + if (task) + task->holder->activateAndSchedule(); } void StorageFileLog::shutdown() { - try + if (task) { - if (task) - { - task->stream_cancelled = true; + task->stream_cancelled = true; - /// Reader thread may wait for wake up - wakeUp(); + /// Reader thread may wait for wake up + wakeUp(); - LOG_TRACE(log, "Waiting for cleanup"); - task->holder->deactivate(); - } + LOG_TRACE(log, "Waiting for cleanup"); + task->holder->deactivate(); /// If no reading call and threadFunc, the log files will never /// be opened, also just leave the work of close files and /// store meta to streams. because if we close files in here, /// may result in data race with unfinishing reading pipeline } - catch (...) - { - tryLogCurrentException(__PRETTY_FUNCTION__); - task->holder->deactivate(); - } } void StorageFileLog::assertStreamGood(const std::ifstream & reader) diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h index a0585e9c9a1..2c4ea91869c 100644 --- a/src/Storages/IStorage.h +++ b/src/Storages/IStorage.h @@ -241,6 +241,10 @@ public: /// Return true if storage can execute lightweight delete mutations. virtual bool supportsLightweightDelete() const { return false; } + /// Return true if storage can execute 'DELETE FROM' mutations. This is different from lightweight delete + /// because those are internally translated into 'ALTER UDPATE' mutations. + virtual bool supportsDelete() const { return false; } + private: StorageID storage_id; diff --git a/src/Storages/MergeTree/ActiveDataPartSet.cpp b/src/Storages/MergeTree/ActiveDataPartSet.cpp index a482dd21099..67199ca02ac 100644 --- a/src/Storages/MergeTree/ActiveDataPartSet.cpp +++ b/src/Storages/MergeTree/ActiveDataPartSet.cpp @@ -23,8 +23,13 @@ ActiveDataPartSet::ActiveDataPartSet(MergeTreeDataFormatVersion format_version_, bool ActiveDataPartSet::add(const String & name, Strings * out_replaced_parts) { - /// TODO make it exception safe (out_replaced_parts->push_back(...) may throw) auto part_info = MergeTreePartInfo::fromPartName(name, format_version); + return add(part_info, name, out_replaced_parts); +} + +bool ActiveDataPartSet::add(const MergeTreePartInfo & part_info, const String & name, Strings * out_replaced_parts) +{ + /// TODO make it exception safe (out_replaced_parts->push_back(...) may throw) if (getContainingPartImpl(part_info) != part_info_to_name.end()) return false; @@ -42,7 +47,7 @@ bool ActiveDataPartSet::add(const String & name, Strings * out_replaced_parts) if (!part_info.contains(it->first)) { if (!part_info.isDisjoint(it->first)) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Part {} intersects previous part {}. It is a bug or a result of manual intervention in the ZooKeeper data.", name, it->first.getPartName()); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Part {} intersects previous part {}. It is a bug or a result of manual intervention in the ZooKeeper data.", part_info.getPartName(), it->first.getPartName()); ++it; break; } @@ -69,6 +74,12 @@ bool ActiveDataPartSet::add(const String & name, Strings * out_replaced_parts) part_info_to_name.emplace(part_info, name); return true; + +} + +bool ActiveDataPartSet::add(const MergeTreePartInfo & part_info, Strings * out_replaced_parts) +{ + return add(part_info, part_info.getPartName(), out_replaced_parts); } @@ -160,6 +171,16 @@ Strings ActiveDataPartSet::getParts() const return res; } +std::vector ActiveDataPartSet::getPartInfos() const +{ + std::vector res; + res.reserve(part_info_to_name.size()); + for (const auto & kv : part_info_to_name) + res.push_back(kv.first); + + return res; +} + size_t ActiveDataPartSet::size() const { return part_info_to_name.size(); diff --git a/src/Storages/MergeTree/ActiveDataPartSet.h b/src/Storages/MergeTree/ActiveDataPartSet.h index 8ab03625d5c..f3cd6b0019d 100644 --- a/src/Storages/MergeTree/ActiveDataPartSet.h +++ b/src/Storages/MergeTree/ActiveDataPartSet.h @@ -40,6 +40,8 @@ public: /// Returns true if the part was actually added. If out_replaced_parts != nullptr, it will contain /// parts that were replaced from the set by the newly added part. bool add(const String & name, Strings * out_replaced_parts = nullptr); + bool add(const MergeTreePartInfo & part_info, const String & name, Strings * out_replaced_parts = nullptr); + bool add(const MergeTreePartInfo & part_info, Strings * out_replaced_parts = nullptr); bool remove(const MergeTreePartInfo & part_info) { @@ -83,6 +85,7 @@ public: /// Returns parts in ascending order of the partition_id and block number. Strings getParts() const; + std::vector getPartInfos() const; size_t size() const; diff --git a/src/Storages/MergeTree/DataPartStorageOnDisk.cpp b/src/Storages/MergeTree/DataPartStorageOnDisk.cpp index 347ea16950e..215d6034a53 100644 --- a/src/Storages/MergeTree/DataPartStorageOnDisk.cpp +++ b/src/Storages/MergeTree/DataPartStorageOnDisk.cpp @@ -59,9 +59,9 @@ std::string DataPartStorageOnDisk::getFullRootPath() const return fs::path(volume->getDisk()->getPath()) / root_path / ""; } -MutableDataPartStoragePtr DataPartStorageOnDisk::getProjection(const std::string & name) +MutableDataPartStoragePtr DataPartStorageOnDisk::getProjection(const std::string & name, bool use_parent_transaction) // NOLINT { - return std::shared_ptr(new DataPartStorageOnDisk(volume, std::string(fs::path(root_path) / part_dir), name, transaction)); + return std::shared_ptr(new DataPartStorageOnDisk(volume, std::string(fs::path(root_path) / part_dir), name, use_parent_transaction ? transaction : nullptr)); } DataPartStoragePtr DataPartStorageOnDisk::getProjection(const std::string & name) const @@ -638,12 +638,17 @@ MutableDataPartStoragePtr DataPartStorageOnDisk::clonePart( } void DataPartStorageOnDisk::rename( - const std::string & new_root_path, - const std::string & new_part_dir, + std::string new_root_path, + std::string new_part_dir, Poco::Logger * log, bool remove_new_dir_if_exists, bool fsync_part_dir) { + if (new_root_path.ends_with('/')) + new_root_path.pop_back(); + if (new_part_dir.ends_with('/')) + new_part_dir.pop_back(); + String to = fs::path(new_root_path) / new_part_dir / ""; if (volume->getDisk()->exists(to)) @@ -668,7 +673,6 @@ void DataPartStorageOnDisk::rename( fullPath(volume->getDisk(), to)); } } - String from = getRelativePath(); /// Why? diff --git a/src/Storages/MergeTree/DataPartStorageOnDisk.h b/src/Storages/MergeTree/DataPartStorageOnDisk.h index bea1596e1f7..fd408af9cf1 100644 --- a/src/Storages/MergeTree/DataPartStorageOnDisk.h +++ b/src/Storages/MergeTree/DataPartStorageOnDisk.h @@ -21,7 +21,7 @@ public: std::string getPartDirectory() const override { return part_dir; } std::string getFullRootPath() const override; - MutableDataPartStoragePtr getProjection(const std::string & name) override; + MutableDataPartStoragePtr getProjection(const std::string & name, bool use_parent_transaction = true) override; // NOLINT DataPartStoragePtr getProjection(const std::string & name) const override; bool exists() const override; @@ -123,8 +123,8 @@ public: void createHardLinkFrom(const IDataPartStorage & source, const std::string & from, const std::string & to) override; void rename( - const std::string & new_root_path, - const std::string & new_part_dir, + std::string new_root_path, + std::string new_part_dir, Poco::Logger * log, bool remove_new_dir_if_exists, bool fsync_part_dir) override; @@ -156,5 +156,4 @@ private: Poco::Logger * log, bool is_projection) const; }; - } diff --git a/src/Storages/MergeTree/EphemeralLockInZooKeeper.cpp b/src/Storages/MergeTree/EphemeralLockInZooKeeper.cpp index bc28a555f77..ca81578c5c6 100644 --- a/src/Storages/MergeTree/EphemeralLockInZooKeeper.cpp +++ b/src/Storages/MergeTree/EphemeralLockInZooKeeper.cpp @@ -2,7 +2,7 @@ #include #include #include -#include +#include namespace DB diff --git a/src/Storages/MergeTree/IDataPartStorage.h b/src/Storages/MergeTree/IDataPartStorage.h index 53ee2738fc6..d7c0c9c76e3 100644 --- a/src/Storages/MergeTree/IDataPartStorage.h +++ b/src/Storages/MergeTree/IDataPartStorage.h @@ -8,6 +8,8 @@ #include #include #include +#include +#include namespace DB { @@ -85,7 +87,7 @@ public: /// virtual std::string getRelativeRootPath() const = 0; /// Get a storage for projection. - virtual std::shared_ptr getProjection(const std::string & name) = 0; + virtual std::shared_ptr getProjection(const std::string & name, bool use_parent_transaction = true) = 0; // NOLINT virtual std::shared_ptr getProjection(const std::string & name) const = 0; /// Part directory exists. @@ -237,12 +239,13 @@ public: /// Examples are: 'all_1_2_1' -> 'detached/all_1_2_1' /// 'moving/tmp_all_1_2_1' -> 'all_1_2_1' virtual void rename( - const std::string & new_root_path, - const std::string & new_part_dir, + std::string new_root_path, + std::string new_part_dir, Poco::Logger * log, bool remove_new_dir_if_exists, bool fsync_part_dir) = 0; + /// Starts a transaction of mutable operations. virtual void beginTransaction() = 0; /// Commits a transaction of mutable operations. diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index 02a7a2ae641..afebb8992e0 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -576,6 +576,9 @@ void IMergeTreeDataPart::assertState(const std::initializer_list #include +#include + #include #include #include @@ -599,9 +601,9 @@ static const ActionsDAG::Node & cloneASTWithInversionPushDown( if (name == "indexHint") { ActionsDAG::NodeRawConstPtrs children; - if (const auto * adaptor = typeid_cast(node.function_builder.get())) + if (const auto * adaptor = typeid_cast(node.function_base.get())) { - if (const auto * index_hint = typeid_cast(adaptor->getFunction())) + if (const auto * index_hint = typeid_cast(adaptor->getFunction().get())) { const auto & index_hint_dag = index_hint->getActions(); children = index_hint_dag->getOutputs(); @@ -611,7 +613,7 @@ static const ActionsDAG::Node & cloneASTWithInversionPushDown( } } - const auto & func = inverted_dag.addFunction(node.function_builder, children, ""); + const auto & func = inverted_dag.addFunction(FunctionFactory::instance().get(node.function_base->getName(), context), children, ""); to_inverted[&node] = &func; return func; } @@ -654,7 +656,8 @@ static const ActionsDAG::Node & cloneASTWithInversionPushDown( return func; } - res = &inverted_dag.addFunction(node.function_builder, children, ""); + res = &inverted_dag.addFunction(node.function_base, children, ""); + chassert(res->result_type == node.result_type); } } @@ -939,12 +942,13 @@ static FieldRef applyFunction(const FunctionBasePtr & func, const DataTypePtr & * which while not strictly monotonic, are monotonic everywhere on the input range. */ bool KeyCondition::transformConstantWithValidFunctions( + ContextPtr context, const String & expr_name, size_t & out_key_column_num, DataTypePtr & out_key_column_type, Field & out_value, DataTypePtr & out_type, - std::function always_monotonic) const + std::function always_monotonic) const { const auto & sample_block = key_expr->getSampleBlock(); @@ -1024,14 +1028,16 @@ bool KeyCondition::transformConstantWithValidFunctions( auto left_arg_type = left->result_type; auto left_arg_value = (*left->column)[0]; std::tie(const_value, const_type) = applyBinaryFunctionForFieldOfUnknownType( - func->function_builder, left_arg_type, left_arg_value, const_type, const_value); + FunctionFactory::instance().get(func->function_base->getName(), context), + left_arg_type, left_arg_value, const_type, const_value); } else { auto right_arg_type = right->result_type; auto right_arg_value = (*right->column)[0]; std::tie(const_value, const_type) = applyBinaryFunctionForFieldOfUnknownType( - func->function_builder, const_type, const_value, right_arg_type, right_arg_value); + FunctionFactory::instance().get(func->function_base->getName(), context), + const_type, const_value, right_arg_type, right_arg_value); } } } @@ -1067,7 +1073,13 @@ bool KeyCondition::canConstantBeWrappedByMonotonicFunctions( return false; return transformConstantWithValidFunctions( - expr_name, out_key_column_num, out_key_column_type, out_value, out_type, [](IFunctionBase & func, const IDataType & type) + node.getTreeContext().getQueryContext(), + expr_name, + out_key_column_num, + out_key_column_type, + out_value, + out_type, + [](const IFunctionBase & func, const IDataType & type) { if (!func.hasInformationAboutMonotonicity()) return false; @@ -1116,7 +1128,13 @@ bool KeyCondition::canConstantBeWrappedByFunctions( return false; return transformConstantWithValidFunctions( - expr_name, out_key_column_num, out_key_column_type, out_value, out_type, [](IFunctionBase & func, const IDataType &) + node.getTreeContext().getQueryContext(), + expr_name, + out_key_column_num, + out_key_column_type, + out_value, + out_type, + [](const IFunctionBase & func, const IDataType &) { return func.isDeterministic(); }); diff --git a/src/Storages/MergeTree/KeyCondition.h b/src/Storages/MergeTree/KeyCondition.h index 258f88ac6b9..0a4ac93b082 100644 --- a/src/Storages/MergeTree/KeyCondition.h +++ b/src/Storages/MergeTree/KeyCondition.h @@ -19,7 +19,7 @@ namespace DB class ASTFunction; class Context; class IFunction; -using FunctionBasePtr = std::shared_ptr; +using FunctionBasePtr = std::shared_ptr; class ExpressionActions; using ExpressionActionsPtr = std::shared_ptr; struct ActionDAGNodes; @@ -421,12 +421,13 @@ private: std::vector & out_functions_chain); bool transformConstantWithValidFunctions( + ContextPtr context, const String & expr_name, size_t & out_key_column_num, DataTypePtr & out_key_column_type, Field & out_value, DataTypePtr & out_type, - std::function always_monotonic) const; + std::function always_monotonic) const; bool canConstantBeWrappedByMonotonicFunctions( const RPNBuilderTreeNode & node, diff --git a/src/Storages/MergeTree/MergeFromLogEntryTask.cpp b/src/Storages/MergeTree/MergeFromLogEntryTask.cpp index 9a9b8a4a6bb..d5627774052 100644 --- a/src/Storages/MergeTree/MergeFromLogEntryTask.cpp +++ b/src/Storages/MergeTree/MergeFromLogEntryTask.cpp @@ -297,9 +297,14 @@ bool MergeFromLogEntryTask::finalize(ReplicatedMergeMutateTaskBase::PartLogWrite { part = merge_task->getFuture().get(); - /// Task is not needed - merge_task.reset(); storage.merger_mutator.renameMergedTemporaryPart(part, parts, NO_TRANSACTION_PTR, *transaction_ptr); + /// Why we reset task here? Because it holds shared pointer to part and tryRemovePartImmediately will + /// not able to remove the part and will throw an exception (because someone holds the pointer). + /// + /// Why we cannot reset task right after obtaining part from getFuture()? Because it holds RAII wrapper for + /// temp directories which guards temporary dir from background removal. So it's right place to reset the task + /// and it's really needed. + merge_task.reset(); try { diff --git a/src/Storages/MergeTree/MergeList.cpp b/src/Storages/MergeTree/MergeList.cpp index a833da7064f..76d69cc6b7d 100644 --- a/src/Storages/MergeTree/MergeList.cpp +++ b/src/Storages/MergeTree/MergeList.cpp @@ -88,10 +88,6 @@ MergeListElement::MergeListElement( /// thread_group::memory_tracker, but MemoryTrackerThreadSwitcher will reset parent). memory_tracker.setProfilerStep(settings.memory_profiler_step); memory_tracker.setSampleProbability(settings.memory_profiler_sample_probability); - /// Specify sample probability also for current thread to track more deallocations. - if (auto * thread_memory_tracker = DB::CurrentThread::getMemoryTracker()) - thread_memory_tracker->setSampleProbability(settings.memory_profiler_sample_probability); - memory_tracker.setSoftLimit(settings.memory_overcommit_ratio_denominator); if (settings.memory_tracker_fault_probability > 0.0) memory_tracker.setFaultProbability(settings.memory_tracker_fault_probability); diff --git a/src/Storages/MergeTree/MergeTask.cpp b/src/Storages/MergeTree/MergeTask.cpp index 0b6fe23e961..ea6ed4b403a 100644 --- a/src/Storages/MergeTree/MergeTask.cpp +++ b/src/Storages/MergeTree/MergeTask.cpp @@ -96,9 +96,13 @@ static void extractMergingAndGatheringColumns( bool MergeTask::ExecuteAndFinalizeHorizontalPart::prepare() { - // projection parts have different prefix and suffix compared to normal parts. - // E.g. `proj_a.proj` for a normal projection merge and `proj_a.tmp_proj` for a projection materialization merge. - const String local_tmp_prefix = global_ctx->parent_part ? "" : "tmp_merge_"; + String local_tmp_prefix; + if (global_ctx->need_prefix) + { + // projection parts have different prefix and suffix compared to normal parts. + // E.g. `proj_a.proj` for a normal projection merge and `proj_a.tmp_proj` for a projection materialization merge. + local_tmp_prefix = global_ctx->parent_part ? "" : "tmp_merge_"; + } const String local_tmp_suffix = global_ctx->parent_part ? ctx->suffix : ""; if (global_ctx->merges_blocker->isCancelled() || global_ctx->merge_list_element_ptr->is_cancelled.load(std::memory_order_relaxed)) @@ -653,6 +657,7 @@ bool MergeTask::MergeProjectionsStage::mergeMinMaxIndexAndPrepareProjections() c global_ctx->deduplicate, global_ctx->deduplicate_by_columns, projection_merging_params, + global_ctx->need_prefix, global_ctx->new_data_part.get(), ".proj", NO_TRANSACTION_PTR, diff --git a/src/Storages/MergeTree/MergeTask.h b/src/Storages/MergeTree/MergeTask.h index 6a29cdbb5ca..46af2e1563a 100644 --- a/src/Storages/MergeTree/MergeTask.h +++ b/src/Storages/MergeTree/MergeTask.h @@ -59,6 +59,7 @@ public: bool deduplicate_, Names deduplicate_by_columns_, MergeTreeData::MergingParams merging_params_, + bool need_prefix, IMergeTreeDataPart * parent_part_, String suffix_, MergeTreeTransactionPtr txn, @@ -86,6 +87,7 @@ public: global_ctx->merges_blocker = std::move(merges_blocker_); global_ctx->ttl_merges_blocker = std::move(ttl_merges_blocker_); global_ctx->txn = std::move(txn); + global_ctx->need_prefix = need_prefix; auto prepare_stage_ctx = std::make_shared(); @@ -171,6 +173,7 @@ private: IMergedBlockOutputStream::WrittenOffsetColumns written_offset_columns{}; MergeTreeTransactionPtr txn; + bool need_prefix; scope_guard temporary_directory_lock; }; @@ -184,6 +187,7 @@ private: { /// Dependencies String suffix; + bool need_prefix; MergeTreeData::MergingParams merging_params{}; DiskPtr tmp_disk{nullptr}; @@ -192,7 +196,7 @@ private: bool force_ttl{false}; CompressionCodecPtr compression_codec{nullptr}; size_t sum_input_rows_upper_bound{0}; - std::unique_ptr rows_sources_file{nullptr}; + std::unique_ptr rows_sources_file{nullptr}; std::unique_ptr rows_sources_uncompressed_write_buf{nullptr}; std::unique_ptr rows_sources_write_buf{nullptr}; std::optional column_sizes{}; @@ -257,7 +261,7 @@ private: /// Begin dependencies from previous stage std::unique_ptr rows_sources_write_buf{nullptr}; std::unique_ptr rows_sources_uncompressed_write_buf{nullptr}; - std::unique_ptr rows_sources_file; + std::unique_ptr rows_sources_file; std::optional column_sizes; CompressionCodecPtr compression_codec; DiskPtr tmp_disk{nullptr}; diff --git a/src/Storages/MergeTree/MergeTreeBackgroundExecutor.cpp b/src/Storages/MergeTree/MergeTreeBackgroundExecutor.cpp index 234487763d7..f1c1a96d24f 100644 --- a/src/Storages/MergeTree/MergeTreeBackgroundExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeBackgroundExecutor.cpp @@ -41,7 +41,7 @@ void MergeTreeBackgroundExecutor::increaseThreadsAndMaxTasksCount(size_t return; } - if (new_max_tasks_count < max_tasks_count) + if (new_max_tasks_count < max_tasks_count.load(std::memory_order_relaxed)) { LOG_WARNING(log, "Loaded new max tasks count for {}Executor from top level config, but new value ({}) is not greater than current {}", name, new_max_tasks_count, max_tasks_count); return; @@ -59,15 +59,14 @@ void MergeTreeBackgroundExecutor::increaseThreadsAndMaxTasksCount(size_t for (size_t number = threads_count; number < new_threads_count; ++number) pool.scheduleOrThrowOnError([this] { threadFunction(); }); - max_tasks_count = new_max_tasks_count; + max_tasks_count.store(new_max_tasks_count, std::memory_order_relaxed); threads_count = new_threads_count; } template size_t MergeTreeBackgroundExecutor::getMaxTasksCount() const { - std::lock_guard lock(mutex); - return max_tasks_count; + return max_tasks_count.load(std::memory_order_relaxed); } template diff --git a/src/Storages/MergeTree/MergeTreeBackgroundExecutor.h b/src/Storages/MergeTree/MergeTreeBackgroundExecutor.h index 0fc888dd6ad..ad50cd44189 100644 --- a/src/Storages/MergeTree/MergeTreeBackgroundExecutor.h +++ b/src/Storages/MergeTree/MergeTreeBackgroundExecutor.h @@ -194,6 +194,10 @@ public: /// Supports only increasing the number of threads and tasks, because /// implementing tasks eviction will definitely be too error-prone and buggy. void increaseThreadsAndMaxTasksCount(size_t new_threads_count, size_t new_max_tasks_count); + + /// This method can return stale value of max_tasks_count (no mutex locking). + /// It's okay because amount of tasks can be only increased and getting stale value + /// can lead only to some postponing, not logical error. size_t getMaxTasksCount() const; bool trySchedule(ExecutableTaskPtr task); @@ -203,7 +207,7 @@ public: private: String name; size_t threads_count TSA_GUARDED_BY(mutex) = 0; - size_t max_tasks_count TSA_GUARDED_BY(mutex) = 0; + std::atomic max_tasks_count = 0; CurrentMetrics::Metric metric; void routine(TaskRuntimeDataPtr item); diff --git a/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp b/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp index bd7e3a64749..5b6b0f09bc3 100644 --- a/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp +++ b/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp @@ -68,12 +68,13 @@ IMergeTreeSelectAlgorithm::IMergeTreeSelectAlgorithm( size_t non_const_columns_offset = header_without_const_virtual_columns.columns(); injectNonConstVirtualColumns(0, header_without_const_virtual_columns, virt_column_names); - /// Reverse order is to minimize reallocations when removing columns from the block for (size_t col_num = non_const_columns_offset; col_num < header_without_const_virtual_columns.columns(); ++col_num) non_const_virtual_column_names.emplace_back(header_without_const_virtual_columns.getByPosition(col_num).name); result_header = header_without_const_virtual_columns; injectPartConstVirtualColumns(0, result_header, nullptr, partition_value_type, virt_column_names); + + LOG_TEST(log, "PREWHERE actions: {}", (prewhere_actions ? prewhere_actions->dump() : std::string(""))); } diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index e60781efa9c..263c95bd68c 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -1,34 +1,23 @@ #include #include -#include #include #include #include #include #include -#include -#include -#include #include #include -#include #include #include #include -#include #include -#include #include #include #include -#include #include -#include #include -#include #include -#include #include #include #include @@ -56,7 +45,6 @@ #include #include #include -#include #include #include #include @@ -70,22 +58,17 @@ #include #include #include -#include #include #include #include -#include #include #include -#include #include -#include #include #include -#include #include #include #include @@ -209,9 +192,86 @@ static void checkSampleExpression(const StorageInMemoryMetadata & metadata, bool ErrorCodes::ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER); } + +void MergeTreeData::initializeDirectoriesAndFormatVersion(const std::string & relative_data_path_, bool attach, const std::string & date_column_name, bool need_create_directories) +{ + relative_data_path = relative_data_path_; + + MergeTreeDataFormatVersion min_format_version(0); + if (date_column_name.empty()) + min_format_version = MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING; + + if (relative_data_path.empty()) + throw Exception("MergeTree storages require data path", ErrorCodes::INCORRECT_FILE_NAME); + + const auto format_version_path = fs::path(relative_data_path) / MergeTreeData::FORMAT_VERSION_FILE_NAME; + std::optional read_format_version; + + for (const auto & disk : getDisks()) + { + if (disk->isBroken()) + continue; + + if (need_create_directories) + { + disk->createDirectories(relative_data_path); + disk->createDirectories(fs::path(relative_data_path) / MergeTreeData::DETACHED_DIR_NAME); + } + + if (disk->exists(format_version_path)) + { + auto buf = disk->readFile(format_version_path); + UInt32 current_format_version{0}; + readIntText(current_format_version, *buf); + if (!buf->eof()) + throw Exception(ErrorCodes::CORRUPTED_DATA, "Bad version file: {}", fullPath(disk, format_version_path)); + + if (!read_format_version.has_value()) + read_format_version = current_format_version; + else if (*read_format_version != current_format_version) + throw Exception(ErrorCodes::CORRUPTED_DATA, "Version file on {} contains version {} expected version is {}.", fullPath(disk, format_version_path), current_format_version, *read_format_version); + } + } + + + // When data path or file not exists, ignore the format_version check + if (!attach || !read_format_version) + { + format_version = min_format_version; + + // try to write to first non-readonly disk + for (const auto & disk : getStoragePolicy()->getDisks()) + { + if (disk->isBroken()) + continue; + + if (!disk->isReadOnly()) + { + auto buf = disk->writeFile(format_version_path, DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Rewrite, getContext()->getWriteSettings()); + writeIntText(format_version.toUnderType(), *buf); + if (getContext()->getSettingsRef().fsync_metadata) + buf->sync(); + } + + break; + } + } + else + { + format_version = *read_format_version; + } + + if (format_version < min_format_version) + { + if (min_format_version == MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING.toUnderType()) + throw Exception( + "MergeTree data format version on disk doesn't support custom partitioning", + ErrorCodes::METADATA_MISMATCH); + } +} + MergeTreeData::MergeTreeData( const StorageID & table_id_, - const String & relative_data_path_, const StorageInMemoryMetadata & metadata_, ContextMutablePtr context_, const String & date_column_name, @@ -222,9 +282,9 @@ MergeTreeData::MergeTreeData( BrokenPartCallback broken_part_callback_) : IStorage(table_id_) , WithMutableContext(context_->getGlobalContext()) + , format_version(date_column_name.empty() ? MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING : MERGE_TREE_DATA_OLD_FORMAT_VERSION) , merging_params(merging_params_) , require_part_metadata(require_part_metadata_) - , relative_data_path(relative_data_path_) , broken_part_callback(broken_part_callback_) , log_name(std::make_shared(table_id_.getNameForLogs())) , log(&Poco::Logger::get(*log_name)) @@ -242,14 +302,10 @@ MergeTreeData::MergeTreeData( const auto settings = getSettings(); allow_nullable_key = attach || settings->allow_nullable_key; - if (relative_data_path.empty()) - throw Exception("MergeTree storages require data path", ErrorCodes::INCORRECT_FILE_NAME); - /// Check sanity of MergeTreeSettings. Only when table is created. if (!attach) settings->sanityCheck(getContext()->getMergeMutateExecutor()->getMaxTasksCount()); - MergeTreeDataFormatVersion min_format_version(0); if (!date_column_name.empty()) { try @@ -270,7 +326,6 @@ MergeTreeData::MergeTreeData( { is_custom_partitioned = true; checkPartitionKeyAndInitMinMax(metadata_.partition_key); - min_format_version = MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING; } setProperties(metadata_, metadata_, attach); @@ -286,64 +341,6 @@ MergeTreeData::MergeTreeData( checkTTLExpressions(metadata_, metadata_); - const auto format_version_path = fs::path(relative_data_path) / MergeTreeData::FORMAT_VERSION_FILE_NAME; - std::optional read_format_version; - /// Creating directories, if not exist. - for (const auto & disk : getDisks()) - { - if (disk->isBroken()) - continue; - - disk->createDirectories(relative_data_path); - disk->createDirectories(fs::path(relative_data_path) / MergeTreeData::DETACHED_DIR_NAME); - - if (disk->exists(format_version_path)) - { - auto buf = disk->readFile(format_version_path); - UInt32 current_format_version{0}; - readIntText(current_format_version, *buf); - if (!buf->eof()) - throw Exception(ErrorCodes::CORRUPTED_DATA, "Bad version file: {}", fullPath(disk, format_version_path)); - - if (!read_format_version.has_value()) - read_format_version = current_format_version; - else if (*read_format_version != current_format_version) - throw Exception(ErrorCodes::CORRUPTED_DATA, "Version file on {} contains version {} expected version is {}.", fullPath(disk, format_version_path), current_format_version, *read_format_version); - } - } - - // When data path or file not exists, ignore the format_version check - if (!attach || !read_format_version) - { - format_version = min_format_version; - - // try to write to first non-readonly disk - for (const auto & disk : getStoragePolicy()->getDisks()) - { - if (!disk->isReadOnly()) - { - auto buf = disk->writeFile(format_version_path, DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Rewrite, context_->getWriteSettings()); - writeIntText(format_version.toUnderType(), *buf); - if (getContext()->getSettingsRef().fsync_metadata) - buf->sync(); - - break; - } - } - } - else - { - format_version = *read_format_version; - } - - if (format_version < min_format_version) - { - if (min_format_version == MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING.toUnderType()) - throw Exception( - "MergeTree data format version on disk doesn't support custom partitioning", - ErrorCodes::METADATA_MISMATCH); - } - String reason; if (!canUsePolymorphicParts(*settings, &reason) && !reason.empty()) LOG_WARNING(log, "{} Settings 'min_rows_for_wide_part', 'min_bytes_for_wide_part', " @@ -1016,7 +1013,7 @@ void MergeTreeData::loadDataPartsFromDisk( size_t suspicious_broken_parts_bytes = 0; std::atomic has_adaptive_parts = false; std::atomic has_non_adaptive_parts = false; - std::atomic has_lightweight_in_parts = false; + std::atomic has_lightweight_deletes_in_parts = false; std::mutex mutex; auto load_part = [&](const String & part_name, const DiskPtr & part_disk_ptr) @@ -1108,7 +1105,7 @@ void MergeTreeData::loadDataPartsFromDisk( /// Check if there is lightweight delete in part if (part->hasLightweightDelete()) - has_lightweight_in_parts.store(true, std::memory_order_relaxed); + has_lightweight_deletes_in_parts.store(true, std::memory_order_relaxed); part->modification_time = part_disk_ptr->getLastModified(fs::path(relative_data_path) / part_name).epochTime(); /// Assume that all parts are Active, covered parts will be detected and marked as Outdated later @@ -1192,7 +1189,7 @@ void MergeTreeData::loadDataPartsFromDisk( has_non_adaptive_index_granularity_parts = has_non_adaptive_parts; - if (has_lightweight_in_parts) + if (has_lightweight_deletes_in_parts) has_lightweight_delete_parts.store(true); if (suspicious_broken_parts > settings->max_suspicious_broken_parts && !skip_sanity_checks) @@ -1348,6 +1345,8 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks) loadDataPartsFromDisk( broken_parts_to_detach, duplicate_parts_to_remove, pool, num_parts, parts_queue, skip_sanity_checks, settings); + bool is_static_storage = isStaticStorage(); + if (settings->in_memory_parts_enable_wal) { std::map disk_wal_part_map; @@ -1376,13 +1375,13 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks) ErrorCodes::CORRUPTED_DATA); write_ahead_log = std::make_shared(*this, disk_ptr, it->name()); - for (auto && part : write_ahead_log->restore(metadata_snapshot, getContext(), part_lock)) + for (auto && part : write_ahead_log->restore(metadata_snapshot, getContext(), part_lock, is_static_storage)) disk_wal_parts.push_back(std::move(part)); } else { MergeTreeWriteAheadLog wal(*this, disk_ptr, it->name()); - for (auto && part : wal.restore(metadata_snapshot, getContext(), part_lock)) + for (auto && part : wal.restore(metadata_snapshot, getContext(), part_lock, is_static_storage)) disk_wal_parts.push_back(std::move(part)); } } @@ -1408,11 +1407,17 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks) return; } - for (auto & part : broken_parts_to_detach) - part->renameToDetached("broken-on-start"); /// detached parts must not have '_' in prefixes + if (!is_static_storage) + { + for (auto & part : broken_parts_to_detach) + { + /// detached parts must not have '_' in prefixes + part->renameToDetached("broken-on-start"); + } - for (auto & part : duplicate_parts_to_remove) - part->remove(); + for (auto & part : duplicate_parts_to_remove) + part->remove(); + } auto deactivate_part = [&] (DataPartIteratorByStateAndInfo it) { @@ -1811,7 +1816,7 @@ MergeTreeData::DataPartsVector MergeTreeData::grabOldParts(bool force) } if (!res.empty()) - LOG_TRACE(log, "Found {} old parts to remove. Parts {}", + LOG_TRACE(log, "Found {} old parts to remove. Parts: [{}]", res.size(), fmt::join(getPartsNames(res), ", ")); return res; @@ -1831,6 +1836,9 @@ void MergeTreeData::rollbackDeletingParts(const MergeTreeData::DataPartsVector & void MergeTreeData::removePartsFinally(const MergeTreeData::DataPartsVector & parts) { + if (parts.empty()) + return; + { auto lock = lockParts(); @@ -1843,16 +1851,16 @@ void MergeTreeData::removePartsFinally(const MergeTreeData::DataPartsVector & pa auto it = data_parts_by_info.find(part->info); if (it == data_parts_by_info.end()) - throw Exception("Deleting data part " + part->name + " doesn't exist", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Deleting data part {} doesn't exist", part->name); (*it)->assertState({DataPartState::Deleting}); - LOG_DEBUG(log, "Finally removing part from memory {}", part->name); - data_parts_indexes.erase(it); } } + LOG_DEBUG(log, "Removing {} parts from memory: Parts: [{}]", parts.size(), fmt::join(parts, ", ")); + /// Data parts is still alive (since DataPartsVector holds shared_ptrs) and contain useful metainformation for logging /// NOTE: There is no need to log parts deletion somewhere else, all deleting parts pass through this function and pass away @@ -1905,13 +1913,14 @@ void MergeTreeData::flushAllInMemoryPartsIfNeeded() size_t MergeTreeData::clearOldPartsFromFilesystem(bool force) { DataPartsVector parts_to_remove = grabOldParts(force); + if (parts_to_remove.empty()) + return 0; + clearPartsFromFilesystem(parts_to_remove); removePartsFinally(parts_to_remove); - /// This is needed to close files to avoid they reside on disk after being deleted. /// NOTE: we can drop files from cache more selectively but this is good enough. - if (!parts_to_remove.empty()) - getContext()->dropMMappedFileCache(); + getContext()->dropMMappedFileCache(); return parts_to_remove.size(); } @@ -1956,7 +1965,7 @@ void MergeTreeData::clearPartsFromFilesystemImpl(const DataPartsVector & parts_t { const auto settings = getSettings(); bool has_zero_copy_parts = false; - if (supportsReplication() && settings->allow_remote_fs_zero_copy_replication) + if (settings->allow_remote_fs_zero_copy_replication && dynamic_cast(this) != nullptr) { has_zero_copy_parts = std::any_of( parts_to_remove.begin(), parts_to_remove.end(), @@ -1975,7 +1984,8 @@ void MergeTreeData::clearPartsFromFilesystemImpl(const DataPartsVector & parts_t ThreadPool pool(num_threads); /// NOTE: Under heavy system load you may get "Cannot schedule a task" from ThreadPool. - LOG_DEBUG(log, "Removing {} parts from filesystem: {} (concurrently)", parts_to_remove.size(), fmt::join(parts_to_remove, ", ")); + LOG_DEBUG( + log, "Removing {} parts from filesystem (concurrently): Parts: [{}]", parts_to_remove.size(), fmt::join(parts_to_remove, ", ")); for (const DataPartPtr & part : parts_to_remove) { pool.scheduleOrThrowOnError([&, thread_group = CurrentThread::getGroup()] @@ -2000,7 +2010,8 @@ void MergeTreeData::clearPartsFromFilesystemImpl(const DataPartsVector & parts_t } else if (!parts_to_remove.empty()) { - LOG_DEBUG(log, "Removing {} parts from filesystem: {}", parts_to_remove.size(), fmt::join(parts_to_remove, ", ")); + LOG_DEBUG( + log, "Removing {} parts from filesystem (serially): Parts: [{}]", parts_to_remove.size(), fmt::join(parts_to_remove, ", ")); for (const DataPartPtr & part : parts_to_remove) { preparePartForRemoval(part)->remove(); @@ -2167,6 +2178,8 @@ size_t MergeTreeData::clearEmptyParts() void MergeTreeData::rename(const String & new_table_path, const StorageID & new_table_id) { + LOG_INFO(log, "Renaming table to path {} with ID {}", new_table_path, new_table_id.getFullTableName()); + auto disks = getStoragePolicy()->getDisks(); for (const auto & disk : disks) @@ -2288,7 +2301,9 @@ void MergeTreeData::dropAllData() try { - if (!disk->isDirectoryEmpty(relative_data_path) && supportsReplication() && disk->supportZeroCopyReplication() && settings_ptr->allow_remote_fs_zero_copy_replication) + if (!disk->isDirectoryEmpty(relative_data_path) && + supportsReplication() && disk->supportZeroCopyReplication() + && settings_ptr->allow_remote_fs_zero_copy_replication) { std::vector files_left; disk->listFiles(relative_data_path, files_left); @@ -3081,7 +3096,7 @@ void MergeTreeData::checkPartDuplicate(MutableDataPartPtr & part, Transaction & } } -void MergeTreeData::preparePartForCommit(MutableDataPartPtr & part, Transaction & out_transaction) +void MergeTreeData::preparePartForCommit(MutableDataPartPtr & part, Transaction & out_transaction, bool need_rename) { part->is_temp = false; part->setState(DataPartState::PreActive); @@ -3093,12 +3108,55 @@ void MergeTreeData::preparePartForCommit(MutableDataPartPtr & part, Transaction return !may_be_cleaned_up || temporary_parts.contains(dir_name); }()); - part->renameTo(part->name, true); + if (need_rename) + part->renameTo(part->name, true); data_parts_indexes.insert(part); out_transaction.addPart(part); } +bool MergeTreeData::addTempPart( + MutableDataPartPtr & part, + Transaction & out_transaction, + DataPartsLock & lock, + DataPartsVector * out_covered_parts) +{ + LOG_TRACE(log, "Adding temporary part from directory {} with name {}.", part->getDataPartStorage().getPartDirectory(), part->name); + if (&out_transaction.data != this) + throw Exception("MergeTreeData::Transaction for one table cannot be used with another. It is a bug.", + ErrorCodes::LOGICAL_ERROR); + + if (part->hasLightweightDelete()) + has_lightweight_delete_parts.store(true); + + checkPartPartition(part, lock); + checkPartDuplicate(part, out_transaction, lock); + + DataPartPtr covering_part; + DataPartsVector covered_parts = getActivePartsToReplace(part->info, part->name, covering_part, lock); + + if (covering_part) + { + LOG_WARNING(log, "Tried to add obsolete part {} covered by {}", part->name, covering_part->getNameWithState()); + return false; + } + + /// All checks are passed. Now we can rename the part on disk. + /// So, we maintain invariant: if a non-temporary part in filesystem then it is in data_parts + preparePartForCommit(part, out_transaction, /* need_rename = */false); + + if (out_covered_parts) + { + out_covered_parts->reserve(covered_parts.size()); + + for (DataPartPtr & covered_part : covered_parts) + out_covered_parts->emplace_back(std::move(covered_part)); + } + + return true; +} + + bool MergeTreeData::renameTempPartAndReplaceImpl( MutableDataPartPtr & part, Transaction & out_transaction, @@ -3140,7 +3198,7 @@ bool MergeTreeData::renameTempPartAndReplaceImpl( /// All checks are passed. Now we can rename the part on disk. /// So, we maintain invariant: if a non-temporary part in filesystem then it is in data_parts - preparePartForCommit(part, out_transaction); + preparePartForCommit(part, out_transaction, /* need_rename */ true); if (out_covered_parts) { @@ -3261,8 +3319,8 @@ void MergeTreeData::removePartsInRangeFromWorkingSet(MergeTreeTransaction * txn, removePartsInRangeFromWorkingSetAndGetPartsToRemoveFromZooKeeper(txn, drop_range, lock); } -MergeTreeData::PartsToRemoveFromZooKeeper MergeTreeData::removePartsInRangeFromWorkingSetAndGetPartsToRemoveFromZooKeeper( - MergeTreeTransaction * txn, const MergeTreePartInfo & drop_range, DataPartsLock & lock) +DataPartsVector MergeTreeData::grabActivePartsToRemoveForDropRange( + MergeTreeTransaction * txn, const MergeTreePartInfo & drop_range, DataPartsLock & lock) { DataPartsVector parts_to_remove; @@ -3329,6 +3387,14 @@ MergeTreeData::PartsToRemoveFromZooKeeper MergeTreeData::removePartsInRangeFromW parts_to_remove.emplace_back(part); } + return parts_to_remove; +} + +MergeTreeData::PartsToRemoveFromZooKeeper MergeTreeData::removePartsInRangeFromWorkingSetAndGetPartsToRemoveFromZooKeeper( + MergeTreeTransaction * txn, const MergeTreePartInfo & drop_range, DataPartsLock & lock) +{ + + auto parts_to_remove = grabActivePartsToRemoveForDropRange(txn, drop_range, lock); bool clear_without_timeout = true; /// We a going to remove active parts covered by drop_range without timeout. @@ -3905,10 +3971,25 @@ MergeTreeData::DataPartsVector MergeTreeData::getVisibleDataPartsVectorInPartiti return res; } -MergeTreeData::DataPartPtr MergeTreeData::getPartIfExists(const MergeTreePartInfo & part_info, const MergeTreeData::DataPartStates & valid_states, DataPartsLock * acquired_lock) +MergeTreeData::DataPartPtr MergeTreeData::getPartIfExists(const MergeTreePartInfo & part_info, const MergeTreeData::DataPartStates & valid_states) { - auto lock = (acquired_lock) ? DataPartsLock() : lockParts(); + auto lock = lockParts(); + return getPartIfExistsUnlocked(part_info, valid_states, lock); +} +MergeTreeData::DataPartPtr MergeTreeData::getPartIfExists(const String & part_name, const MergeTreeData::DataPartStates & valid_states) +{ + auto lock = lockParts(); + return getPartIfExistsUnlocked(part_name, valid_states, lock); +} + +MergeTreeData::DataPartPtr MergeTreeData::getPartIfExistsUnlocked(const String & part_name, const DataPartStates & valid_states, DataPartsLock & acquired_lock) +{ + return getPartIfExistsUnlocked(MergeTreePartInfo::fromPartName(part_name, format_version), valid_states, acquired_lock); +} + +MergeTreeData::DataPartPtr MergeTreeData::getPartIfExistsUnlocked(const MergeTreePartInfo & part_info, const DataPartStates & valid_states, DataPartsLock & /* acquired_lock */) +{ auto it = data_parts_by_info.find(part_info); if (it == data_parts_by_info.end()) return nullptr; @@ -3920,12 +4001,6 @@ MergeTreeData::DataPartPtr MergeTreeData::getPartIfExists(const MergeTreePartInf return nullptr; } -MergeTreeData::DataPartPtr MergeTreeData::getPartIfExists(const String & part_name, const MergeTreeData::DataPartStates & valid_states, DataPartsLock * acquired_lock) -{ - return getPartIfExists(MergeTreePartInfo::fromPartName(part_name, format_version), valid_states, acquired_lock); -} - - static void loadPartAndFixMetadataImpl(MergeTreeData::MutableDataPartPtr part) { part->loadColumnsChecksumsIndexes(false, true); @@ -4977,6 +5052,8 @@ MergeTreeData::MutableDataPartsVector MergeTreeData::tryLoadPartsToAttach(const { const String containing_part = active_parts.getContainingPart(part_info.dir_name); + LOG_DEBUG(log, "Found containing part {} for part {}", containing_part, part_info.dir_name); + if (!containing_part.empty() && containing_part != part_info.dir_name) part_info.disk->moveDirectory(fs::path(relative_data_path) / source_dir / part_info.dir_name, fs::path(relative_data_path) / source_dir / ("inactive_" + part_info.dir_name)); @@ -4990,7 +5067,7 @@ MergeTreeData::MutableDataPartsVector MergeTreeData::tryLoadPartsToAttach(const renamed_parts.tryRenameAll(); /// Synchronously check that added parts exist and are not broken. We will write checksums.txt if it does not exist. - LOG_DEBUG(log, "Checking parts"); + LOG_DEBUG(log, "Checking {} parts", renamed_parts.old_and_new_names.size()); MutableDataPartsVector loaded_parts; loaded_parts.reserve(renamed_parts.old_and_new_names.size()); @@ -5353,6 +5430,7 @@ MergeTreeData::DataPartsVector MergeTreeData::Transaction::commit(MergeTreeData: part->getDataPartStorage().commitTransaction(); if (txn) + { for (const auto & part : precommitted_parts) { DataPartPtr covering_part; @@ -5374,6 +5452,7 @@ MergeTreeData::DataPartsVector MergeTreeData::Transaction::commit(MergeTreeData: MergeTreeTransaction::addNewPartAndRemoveCovered(data.shared_from_this(), part, covered_parts, txn); } + } MergeTreeData::WriteAheadLogPtr wal; auto get_inited_wal = [&] () @@ -5661,7 +5740,7 @@ Block MergeTreeData::getMinMaxCountProjectionBlock( agg_count->set(place, value.get()); else { - auto value_column = func->getReturnType()->createColumnConst(1, value)->convertToFullColumnIfConst(); + auto value_column = func->getResultType()->createColumnConst(1, value)->convertToFullColumnIfConst(); const auto * value_column_ptr = value_column.get(); func->add(place, &value_column_ptr, 0, &arena); } @@ -5911,20 +5990,25 @@ std::optional MergeTreeData::getQueryProcessingStageWithAgg if (select_query->interpolate() && !select_query->interpolate()->children.empty()) return std::nullopt; - // Currently projections don't support GROUPING SET yet. - if (select_query->group_by_with_grouping_sets) + // Projections don't support grouping sets yet. + if (select_query->group_by_with_grouping_sets + || select_query->group_by_with_totals + || select_query->group_by_with_rollup + || select_query->group_by_with_cube) return std::nullopt; auto query_options = SelectQueryOptions( QueryProcessingStage::WithMergeableState, /* depth */ 1, /* is_subquery_= */ true - ).ignoreProjections().ignoreAlias(); + ).ignoreProjections().ignoreAlias(); + InterpreterSelectQuery select( query_ptr, query_context, query_options, query_info.prepared_sets); + const auto & analysis_result = select.getAnalysisResult(); query_info.prepared_sets = select.getQueryAnalyzer()->getPreparedSets(); @@ -6409,7 +6493,6 @@ std::pair MergeTreeData::cloneAn quoteString(src_part->getDataPartStorage().getFullPath())); String dst_part_name = src_part->getNewName(dst_part_info); - assert(!tmp_part_prefix.empty()); String tmp_dst_part_name = tmp_part_prefix + dst_part_name; auto temporary_directory_lock = getTemporaryPartDirectoryHolder(tmp_dst_part_name); @@ -6507,7 +6590,7 @@ DiskPtr MergeTreeData::tryGetDiskForDetachedPart(const String & part_name) const const auto disks = getStoragePolicy()->getDisks(); for (const DiskPtr & disk : disks) - if (disk->exists(relative_data_path + additional_path + part_name)) + if (disk->exists(fs::path(relative_data_path) / additional_path / part_name)) return disk; return nullptr; @@ -7365,6 +7448,12 @@ StorageSnapshotPtr MergeTreeData::getStorageSnapshot(const StorageMetadataPtr & return std::make_shared(*this, metadata_snapshot, object_columns, std::move(snapshot_data)); } +StorageSnapshotPtr MergeTreeData::getStorageSnapshotWithoutParts(const StorageMetadataPtr & metadata_snapshot) const +{ + auto lock = lockParts(); + return std::make_shared(*this, metadata_snapshot, object_columns, std::make_unique()); +} + void MergeTreeData::incrementInsertedPartsProfileEvent(MergeTreeDataPartType type) { switch (type.getValue()) diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index 840e21ec321..670c755cf72 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -273,6 +273,7 @@ public: tryLogCurrentException("~MergeTreeData::Transaction"); } } + void clear(); TransactionID getTID() const; @@ -284,7 +285,6 @@ public: MutableDataParts precommitted_parts; MutableDataParts locked_parts; - void clear(); }; using TransactionUniquePtr = std::unique_ptr; @@ -376,7 +376,6 @@ public: /// require_part_metadata - should checksums.txt and columns.txt exist in the part directory. /// attach - whether the existing table is attached or the new table is created. MergeTreeData(const StorageID & table_id_, - const String & relative_data_path_, const StorageInMemoryMetadata & metadata_, ContextMutablePtr context_, const String & date_column_name, @@ -450,6 +449,9 @@ public: StorageSnapshotPtr getStorageSnapshot(const StorageMetadataPtr & metadata_snapshot, ContextPtr query_context) const override; + /// The same as above but does not hold vector of data parts. + StorageSnapshotPtr getStorageSnapshotWithoutParts(const StorageMetadataPtr & metadata_snapshot) const; + /// Load the set of data parts from disk. Call once - immediately after the object is created. void loadDataParts(bool skip_sanity_checks); @@ -514,8 +516,10 @@ public: DataPartsVector getDataPartsVectorInPartitionForInternalUsage(const DataPartStates & affordable_states, const String & partition_id, DataPartsLock * acquired_lock = nullptr) const; /// Returns the part with the given name and state or nullptr if no such part. - DataPartPtr getPartIfExists(const String & part_name, const DataPartStates & valid_states, DataPartsLock * acquired_lock = nullptr); - DataPartPtr getPartIfExists(const MergeTreePartInfo & part_info, const DataPartStates & valid_states, DataPartsLock * acquired_lock = nullptr); + DataPartPtr getPartIfExistsUnlocked(const String & part_name, const DataPartStates & valid_states, DataPartsLock & acquired_lock); + DataPartPtr getPartIfExistsUnlocked(const MergeTreePartInfo & part_info, const DataPartStates & valid_states, DataPartsLock & acquired_lock); + DataPartPtr getPartIfExists(const String & part_name, const DataPartStates & valid_states); + DataPartPtr getPartIfExists(const MergeTreePartInfo & part_info, const DataPartStates & valid_states); /// Total size of active parts in bytes. size_t getTotalActiveSizeInBytes() const; @@ -589,6 +593,8 @@ public: /// Used in REPLACE PARTITION command. void removePartsInRangeFromWorkingSet(MergeTreeTransaction * txn, const MergeTreePartInfo & drop_range, DataPartsLock & lock); + DataPartsVector grabActivePartsToRemoveForDropRange( + MergeTreeTransaction * txn, const MergeTreePartInfo & drop_range, DataPartsLock & lock); /// This wrapper is required to restrict access to parts in Deleting state class PartToRemoveFromZooKeeper { @@ -784,8 +790,6 @@ public: return column_sizes; } - const ColumnsDescription & getConcreteObjectColumns() const { return object_columns; } - /// Creates description of columns of data type Object from the range of data parts. static ColumnsDescription getConcreteObjectColumns( const DataPartsVector & parts, const ColumnsDescription & storage_columns); @@ -974,6 +978,14 @@ public: /// If one_part is true, fill in at most one part. Block getBlockWithVirtualPartColumns(const MergeTreeData::DataPartsVector & parts, bool one_part, bool ignore_empty = false) const; + /// In merge tree we do inserts with several steps. One of them: + /// X. write part to temporary directory with some temp name + /// Y. rename temporary directory to final name with correct block number value + /// As temp name MergeTree use just ordinary in memory counter, but in some cases + /// it can be useful to add additional part in temp name to avoid collisions on FS. + /// FIXME: Currently unused. + virtual std::string getPostfixForTempInsertName() const { return ""; } + /// For generating names of temporary parts during insertion. SimpleIncrement insert_increment; @@ -1087,6 +1099,8 @@ protected: struct TagByInfo{}; struct TagByStateAndInfo{}; + void initializeDirectoriesAndFormatVersion(const std::string & relative_data_path_, bool attach, const std::string & date_column_name, bool need_create_directories=true); + static const MergeTreePartInfo & dataPartPtrToInfo(const DataPartPtr & part) { return part->info; @@ -1317,6 +1331,12 @@ protected: static void incrementInsertedPartsProfileEvent(MergeTreeDataPartType type); static void incrementMergedPartsProfileEvent(MergeTreeDataPartType type); + bool addTempPart( + MutableDataPartPtr & part, + Transaction & out_transaction, + DataPartsLock & lock, + DataPartsVector * out_covered_parts); + private: /// Checking that candidate part doesn't break invariants: correct partition void checkPartPartition(MutableDataPartPtr & part, DataPartsLock & lock) const; @@ -1324,7 +1344,7 @@ private: /// Preparing itself to be committed in memory: fill some fields inside part, add it to data_parts_indexes /// in precommitted state and to transaction - void preparePartForCommit(MutableDataPartPtr & part, Transaction & out_transaction); + void preparePartForCommit(MutableDataPartPtr & part, Transaction & out_transaction, bool need_rename); /// Low-level method for preparing parts for commit (in-memory). /// FIXME Merge MergeTreeTransaction and Transaction diff --git a/src/Storages/MergeTree/MergeTreeDataFormatVersion.h b/src/Storages/MergeTree/MergeTreeDataFormatVersion.h index 1e21070dd6b..0a84f08ea71 100644 --- a/src/Storages/MergeTree/MergeTreeDataFormatVersion.h +++ b/src/Storages/MergeTree/MergeTreeDataFormatVersion.h @@ -8,6 +8,7 @@ namespace DB STRONG_TYPEDEF(UInt32, MergeTreeDataFormatVersion) +const MergeTreeDataFormatVersion MERGE_TREE_DATA_OLD_FORMAT_VERSION {0}; const MergeTreeDataFormatVersion MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING {1}; } diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp index 79670c0ab27..6c8b4a7ef57 100644 --- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp +++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp @@ -65,8 +65,8 @@ static const double DISK_USAGE_COEFFICIENT_TO_SELECT = 2; /// because between selecting parts to merge and doing merge, amount of free space could have decreased. static const double DISK_USAGE_COEFFICIENT_TO_RESERVE = 1.1; -MergeTreeDataMergerMutator::MergeTreeDataMergerMutator(MergeTreeData & data_, size_t max_tasks_count_) - : data(data_), max_tasks_count(max_tasks_count_), log(&Poco::Logger::get(data.getLogName() + " (MergerMutator)")) +MergeTreeDataMergerMutator::MergeTreeDataMergerMutator(MergeTreeData & data_) + : data(data_), log(&Poco::Logger::get(data.getLogName() + " (MergerMutator)")) { } @@ -75,6 +75,7 @@ UInt64 MergeTreeDataMergerMutator::getMaxSourcePartsSizeForMerge() const { size_t scheduled_tasks_count = CurrentMetrics::values[CurrentMetrics::BackgroundMergesAndMutationsPoolTask].load(std::memory_order_relaxed); + auto max_tasks_count = data.getContext()->getMergeMutateExecutor()->getMaxTasksCount(); return getMaxSourcePartsSizeForMerge(max_tasks_count, scheduled_tasks_count); } @@ -114,7 +115,7 @@ UInt64 MergeTreeDataMergerMutator::getMaxSourcePartSizeForMutation() const /// DataPart can be store only at one disk. Get maximum reservable free space at all disks. UInt64 disk_space = data.getStoragePolicy()->getMaxUnreservedFreeSpace(); - + auto max_tasks_count = data.getContext()->getMergeMutateExecutor()->getMaxTasksCount(); /// Allow mutations only if there are enough threads, leave free threads for merges else if (occupied <= 1 || max_tasks_count - occupied >= data_settings->number_of_free_entries_in_pool_to_execute_mutation) @@ -523,6 +524,7 @@ MergeTaskPtr MergeTreeDataMergerMutator::mergePartsToTemporaryPart( const Names & deduplicate_by_columns, const MergeTreeData::MergingParams & merging_params, const MergeTreeTransactionPtr & txn, + bool need_prefix, IMergeTreeDataPart * parent_part, const String & suffix) { @@ -537,6 +539,7 @@ MergeTaskPtr MergeTreeDataMergerMutator::mergePartsToTemporaryPart( deduplicate, deduplicate_by_columns, merging_params, + need_prefix, parent_part, suffix, txn, @@ -556,7 +559,8 @@ MutateTaskPtr MergeTreeDataMergerMutator::mutatePartToTemporaryPart( ContextPtr context, const MergeTreeTransactionPtr & txn, ReservationSharedPtr space_reservation, - TableLockHolder & holder) + TableLockHolder & holder, + bool need_prefix) { return std::make_shared( future_part, @@ -570,7 +574,8 @@ MutateTaskPtr MergeTreeDataMergerMutator::mutatePartToTemporaryPart( txn, data, *this, - merges_blocker + merges_blocker, + need_prefix ); } @@ -627,7 +632,7 @@ MergeTreeData::DataPartPtr MergeTreeDataMergerMutator::renameMergedTemporaryPart + " instead of " + parts[i]->name, ErrorCodes::LOGICAL_ERROR); } - LOG_TRACE(log, "Merged {} parts: from {} to {}", parts.size(), parts.front()->name, parts.back()->name); + LOG_TRACE(log, "Merged {} parts: [{}, {}] -> []", parts.size(), parts.front()->name, parts.back()->name, new_data_part->name); return new_data_part; } diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.h b/src/Storages/MergeTree/MergeTreeDataMergerMutator.h index 5d98f526325..b5143834650 100644 --- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.h +++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.h @@ -45,7 +45,7 @@ public: const MergeTreeTransaction *, String *)>; - MergeTreeDataMergerMutator(MergeTreeData & data_, size_t max_tasks_count_); + explicit MergeTreeDataMergerMutator(MergeTreeData & data_); /** Get maximum total size of parts to do merge, at current moment of time. * It depends on number of free threads in background_pool and amount of free space in disk. @@ -113,6 +113,7 @@ public: const Names & deduplicate_by_columns, const MergeTreeData::MergingParams & merging_params, const MergeTreeTransactionPtr & txn, + bool need_prefix = true, IMergeTreeDataPart * parent_part = nullptr, const String & suffix = ""); @@ -126,7 +127,8 @@ public: ContextPtr context, const MergeTreeTransactionPtr & txn, ReservationSharedPtr space_reservation, - TableLockHolder & table_lock_holder); + TableLockHolder & table_lock_holder, + bool need_prefix = true); MergeTreeData::DataPartPtr renameMergedTemporaryPart( MergeTreeData::MutableDataPartPtr & new_data_part, @@ -155,7 +157,6 @@ public : private: MergeTreeData & data; - const size_t max_tasks_count; Poco::Logger * log; diff --git a/src/Storages/MergeTree/MergeTreeDataPartInMemory.cpp b/src/Storages/MergeTree/MergeTreeDataPartInMemory.cpp index 48b1b6bab60..ac56868894f 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartInMemory.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartInMemory.cpp @@ -149,10 +149,10 @@ MutableDataPartStoragePtr MergeTreeDataPartInMemory::flushToDisk(const String & return new_data_part_storage; } -void MergeTreeDataPartInMemory::makeCloneInDetached(const String & prefix, const StorageMetadataPtr & metadata_snapshot) const +DataPartStoragePtr MergeTreeDataPartInMemory::makeCloneInDetached(const String & prefix, const StorageMetadataPtr & metadata_snapshot) const { String detached_path = *getRelativePathForDetachedPart(prefix, /* broken */ false); - flushToDisk(detached_path, metadata_snapshot); + return flushToDisk(detached_path, metadata_snapshot); } void MergeTreeDataPartInMemory::renameTo(const String & new_relative_path, bool /* remove_new_dir_if_exists */) diff --git a/src/Storages/MergeTree/MergeTreeDataPartInMemory.h b/src/Storages/MergeTree/MergeTreeDataPartInMemory.h index e58701b04a1..acb1cd8c844 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartInMemory.h +++ b/src/Storages/MergeTree/MergeTreeDataPartInMemory.h @@ -47,7 +47,7 @@ public: bool hasColumnFiles(const NameAndTypePair & column) const override { return !!getColumnPosition(column.getNameInStorage()); } String getFileNameForColumn(const NameAndTypePair & /* column */) const override { return ""; } void renameTo(const String & new_relative_path, bool remove_new_dir_if_exists) override; - void makeCloneInDetached(const String & prefix, const StorageMetadataPtr & metadata_snapshot) const override; + DataPartStoragePtr makeCloneInDetached(const String & prefix, const StorageMetadataPtr & metadata_snapshot) const override; MutableDataPartStoragePtr flushToDisk(const String & new_relative_path, const StorageMetadataPtr & metadata_snapshot) const; diff --git a/src/Storages/MergeTree/MergeTreeDataPartState.h b/src/Storages/MergeTree/MergeTreeDataPartState.h index a52f7559375..5c4779f016e 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartState.h +++ b/src/Storages/MergeTree/MergeTreeDataPartState.h @@ -3,24 +3,23 @@ namespace DB { -/** - * Part state is a stage of its lifetime. States are ordered and state of a part could be increased only. - * Part state should be modified under data_parts mutex. - * - * Possible state transitions: - * Temporary -> PreActive: we are trying to add a fetched, inserted or merged part to active set - * PreActive -> Outdated: we could not add a part to active set and are doing a rollback (for example it is duplicated part) - * PreActive -> Active: we successfully added a part to active dataset - * PreActive -> Outdated: a part was replaced by a covering part or DROP PARTITION - * Outdated -> Deleting: a cleaner selected this part for deletion - * Deleting -> Outdated: if an ZooKeeper error occurred during the deletion, we will retry deletion - * Active -> DeleteOnDestroy: if part was moved to another disk - */ +/** Part state is a stage of its lifetime. States are ordered and state of a part could be increased only. + * Part state should be modified under data_parts mutex. + * + * Possible state transitions: + * Temporary -> PreActive: we are trying to add a fetched, inserted or merged part to active set + * PreActive -> Outdated: we could not add a part to active set and are doing a rollback (for example it is duplicated part) + * PreActive -> Active: we successfully added a part to active dataset + * PreActive -> Outdated: a part was replaced by a covering part or DROP PARTITION + * Outdated -> Deleting: a cleaner selected this part for deletion + * Deleting -> Outdated: if an ZooKeeper error occurred during the deletion, we will retry deletion + * Active -> DeleteOnDestroy: if part was moved to another disk + */ enum class MergeTreeDataPartState { Temporary, /// the part is generating now, it is not in data_parts list - PreActive, /// the part is in data_parts, but not used for SELECTs - Active, /// active data part, used by current and upcoming SELECTs + PreActive, /// the part is in data_parts, but not used for SELECTs + Active, /// active data part, used by current and upcoming SELECTs Outdated, /// not active data part, but could be used by only current SELECTs, could be deleted after SELECTs finishes Deleting, /// not active data part with identity refcounter, it is deleting right now by a cleaner DeleteOnDestroy, /// part was moved to another disk and should be deleted in own destructor diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp index 020121e59d7..4c1d117ac73 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp @@ -110,7 +110,7 @@ Granules getGranulesToWrite(const MergeTreeIndexGranularity & index_granularity, .is_complete = (rows_left_in_block >= expected_rows_in_mark) }); current_row += result.back().rows_to_write; - current_mark++; + ++current_mark; } return result; @@ -146,6 +146,7 @@ void MergeTreeDataPartWriterCompact::write(const Block & block, const IColumn::P if (compute_granularity) { size_t index_granularity_for_block = computeIndexGranularity(block); + assert(index_granularity_for_block >= 1); fillIndexGranularity(index_granularity_for_block, block.rows()); } diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp index a887b0ee322..fbcf8cb241c 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp @@ -135,7 +135,9 @@ static size_t computeIndexGranularityImpl( size_t rows_in_block = block.rows(); size_t index_granularity_for_block; if (!can_use_adaptive_index_granularity) + { index_granularity_for_block = fixed_index_granularity_rows; + } else { size_t block_size_in_memory = block.bytes(); @@ -152,11 +154,13 @@ static size_t computeIndexGranularityImpl( index_granularity_for_block = index_granularity_bytes / size_of_row_in_bytes; } } - if (index_granularity_for_block == 0) /// very rare case when index granularity bytes less then single row - index_granularity_for_block = 1; - /// We should be less or equal than fixed index granularity index_granularity_for_block = std::min(fixed_index_granularity_rows, index_granularity_for_block); + + /// very rare case when index granularity bytes less then single row + if (index_granularity_for_block == 0) + index_granularity_for_block = 1; + return index_granularity_for_block; } diff --git a/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/src/Storages/MergeTree/MergeTreeDataWriter.cpp index c50c01ea356..e314c3f2e58 100644 --- a/src/Storages/MergeTree/MergeTreeDataWriter.cpp +++ b/src/Storages/MergeTree/MergeTreeDataWriter.cpp @@ -322,8 +322,19 @@ Block MergeTreeDataWriter::mergeBlock( return block.cloneWithColumns(status.chunk.getColumns()); } -MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeTempPart( - BlockWithPartition & block_with_partition, const StorageMetadataPtr & metadata_snapshot, ContextPtr context) + +MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeTempPart(BlockWithPartition & block, const StorageMetadataPtr & metadata_snapshot, ContextPtr context) +{ + return writeTempPartImpl(block, metadata_snapshot, context, data.insert_increment.get(), /*need_tmp_prefix = */true); +} + +MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeTempPartWithoutPrefix(BlockWithPartition & block, const StorageMetadataPtr & metadata_snapshot, int64_t block_number, ContextPtr context) +{ + return writeTempPartImpl(block, metadata_snapshot, context, block_number, /*need_tmp_prefix = */false); +} + +MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeTempPartImpl( + BlockWithPartition & block_with_partition, const StorageMetadataPtr & metadata_snapshot, ContextPtr context, int64_t block_number, bool need_tmp_prefix) { TemporaryPart temp_part; Block & block = block_with_partition.block; @@ -334,17 +345,12 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeTempPart( if (column.type->hasDynamicSubcolumns()) column.type = block.getByName(column.name).type; - static const String TMP_PREFIX = "tmp_insert_"; - - /// This will generate unique name in scope of current server process. - Int64 temp_index = data.insert_increment.get(); - auto minmax_idx = std::make_shared(); minmax_idx->update(block, data.getMinMaxColumnsNames(metadata_snapshot->getPartitionKey())); - MergeTreePartition partition(std::move(block_with_partition.partition)); + MergeTreePartition partition(block_with_partition.partition); - MergeTreePartInfo new_part_info(partition.getID(metadata_snapshot->getPartitionKey().sample_block), temp_index, temp_index, 0); + MergeTreePartInfo new_part_info(partition.getID(metadata_snapshot->getPartitionKey().sample_block), block_number, block_number, 0); String part_name; if (data.format_version < MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING) { @@ -364,7 +370,19 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeTempPart( else part_name = new_part_info.getPartName(); - String part_dir = TMP_PREFIX + part_name; + std::string part_dir; + if (need_tmp_prefix) + { + std::string temp_prefix = "tmp_insert_"; + const auto & temp_postfix = data.getPostfixForTempInsertName(); + if (!temp_postfix.empty()) + temp_prefix += temp_postfix + "_"; + part_dir = temp_prefix + part_name; + } + else + { + part_dir = part_name; + } temp_part.temporary_directory_lock = data.getTemporaryPartDirectoryHolder(part_dir); /// If we need to calculate some columns to sort. @@ -419,7 +437,7 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeTempPart( auto data_part_storage = std::make_shared( data_part_volume, data.relative_data_path, - TMP_PREFIX + part_name); + part_dir); data_part_storage->beginTransaction(); @@ -549,7 +567,10 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeProjectionPartImpl( } auto relative_path = part_name + (is_temp ? ".tmp_proj" : ".proj"); - auto projection_part_storage = parent_part->getDataPartStorage().getProjection(relative_path); + auto projection_part_storage = parent_part->getDataPartStorage().getProjection(relative_path, !is_temp); + if (is_temp) + projection_part_storage->beginTransaction(); + auto new_data_part = data.createPart( part_name, part_type, diff --git a/src/Storages/MergeTree/MergeTreeDataWriter.h b/src/Storages/MergeTree/MergeTreeDataWriter.h index 2d7e19cf9d5..cbf8094f7fd 100644 --- a/src/Storages/MergeTree/MergeTreeDataWriter.h +++ b/src/Storages/MergeTree/MergeTreeDataWriter.h @@ -79,6 +79,8 @@ public: */ TemporaryPart writeTempPart(BlockWithPartition & block, const StorageMetadataPtr & metadata_snapshot, ContextPtr context); + TemporaryPart writeTempPartWithoutPrefix(BlockWithPartition & block, const StorageMetadataPtr & metadata_snapshot, int64_t block_number, ContextPtr context); + /// For insertion. static TemporaryPart writeProjectionPart( const MergeTreeData & data, @@ -104,6 +106,14 @@ public: const MergeTreeData::MergingParams & merging_params); private: + + TemporaryPart writeTempPartImpl( + BlockWithPartition & block, + const StorageMetadataPtr & metadata_snapshot, + ContextPtr context, + int64_t block_number, + bool need_tmp_prefix); + static TemporaryPart writeProjectionPartImpl( const String & part_name, bool is_temp, diff --git a/src/Storages/MergeTree/MergeTreeIndexHypothesisMergedCondition.cpp b/src/Storages/MergeTree/MergeTreeIndexHypothesisMergedCondition.cpp index 3a145c0f505..c62b5e86c75 100644 --- a/src/Storages/MergeTree/MergeTreeIndexHypothesisMergedCondition.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexHypothesisMergedCondition.cpp @@ -88,7 +88,7 @@ void MergeTreeIndexhypothesisMergedCondition::addConstraints(const ConstraintsDe /// Replaces < -> <=, > -> >= and assumes that all hypotheses are true then checks if path exists bool MergeTreeIndexhypothesisMergedCondition::alwaysUnknownOrTrue() const { - std::vector active_atomic_formulas(atomic_constraints); + ASTs active_atomic_formulas(atomic_constraints); for (const auto & hypothesis : index_to_compare_atomic_hypotheses) { active_atomic_formulas.insert( @@ -190,7 +190,7 @@ bool MergeTreeIndexhypothesisMergedCondition::mayBeTrueOnGranule(const MergeTree std::unique_ptr MergeTreeIndexhypothesisMergedCondition::buildGraph(const std::vector & values) const { - std::vector active_atomic_formulas(atomic_constraints); + ASTs active_atomic_formulas(atomic_constraints); for (size_t i = 0; i < values.size(); ++i) { if (values[i]) diff --git a/src/Storages/MergeTree/MergeTreeIndexHypothesisMergedCondition.h b/src/Storages/MergeTree/MergeTreeIndexHypothesisMergedCondition.h index 9ebcbe9d7dc..6153c214898 100644 --- a/src/Storages/MergeTree/MergeTreeIndexHypothesisMergedCondition.h +++ b/src/Storages/MergeTree/MergeTreeIndexHypothesisMergedCondition.h @@ -34,7 +34,7 @@ private: std::vector> index_to_compare_atomic_hypotheses; std::vector> index_to_atomic_hypotheses; - std::vector atomic_constraints; + ASTs atomic_constraints; }; } diff --git a/src/Storages/MergeTree/MergeTreeIndexSet.cpp b/src/Storages/MergeTree/MergeTreeIndexSet.cpp index a28394e943e..db99a2f37be 100644 --- a/src/Storages/MergeTree/MergeTreeIndexSet.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexSet.cpp @@ -9,6 +9,10 @@ #include #include +#include +#include + +#include namespace DB { @@ -242,67 +246,78 @@ MergeTreeIndexGranulePtr MergeTreeIndexAggregatorSet::getGranuleAndReset() MergeTreeIndexConditionSet::MergeTreeIndexConditionSet( const String & index_name_, - const Block & index_sample_block_, + const Block & index_sample_block, size_t max_rows_, - const SelectQueryInfo & query, + const SelectQueryInfo & query_info, ContextPtr context) : index_name(index_name_) , max_rows(max_rows_) - , index_sample_block(index_sample_block_) { for (const auto & name : index_sample_block.getNames()) if (!key_columns.contains(name)) key_columns.insert(name); - const auto & select = query.query->as(); - - if (select.where() && select.prewhere()) - expression_ast = makeASTFunction( - "and", - select.where()->clone(), - select.prewhere()->clone()); - else if (select.where()) - expression_ast = select.where()->clone(); - else if (select.prewhere()) - expression_ast = select.prewhere()->clone(); - - useless = checkASTUseless(expression_ast); - /// Do not proceed if index is useless for this query. - if (useless) + ASTPtr ast_filter_node = buildFilterNode(query_info.query); + if (!ast_filter_node) return; - /// Replace logical functions with bit functions. - /// Working with UInt8: last bit = can be true, previous = can be false (Like src/Storages/MergeTree/BoolMask.h). - traverseAST(expression_ast); + if (context->getSettingsRef().allow_experimental_analyzer) + { + if (!query_info.filter_actions_dag) + return; - auto syntax_analyzer_result = TreeRewriter(context).analyze( - expression_ast, index_sample_block.getNamesAndTypesList()); - actions = ExpressionAnalyzer(expression_ast, syntax_analyzer_result, context).getActions(true); + if (checkDAGUseless(*query_info.filter_actions_dag->getOutputs().at(0), context)) + return; + + const auto * filter_node = query_info.filter_actions_dag->getOutputs().at(0); + auto filter_actions_dag = ActionsDAG::buildFilterActionsDAG({filter_node}, {}, context); + const auto * filter_actions_dag_node = filter_actions_dag->getOutputs().at(0); + + std::unordered_map node_to_result_node; + filter_actions_dag->getOutputs()[0] = &traverseDAG(*filter_actions_dag_node, filter_actions_dag, context, node_to_result_node); + + filter_actions_dag->removeUnusedActions(); + actions = std::make_shared(filter_actions_dag); + } + else + { + if (checkASTUseless(ast_filter_node)) + return; + + auto expression_ast = ast_filter_node->clone(); + + /// Replace logical functions with bit functions. + /// Working with UInt8: last bit = can be true, previous = can be false (Like src/Storages/MergeTree/BoolMask.h). + traverseAST(expression_ast); + + auto syntax_analyzer_result = TreeRewriter(context).analyze(expression_ast, index_sample_block.getNamesAndTypesList()); + actions = ExpressionAnalyzer(expression_ast, syntax_analyzer_result, context).getActions(true); + } } bool MergeTreeIndexConditionSet::alwaysUnknownOrTrue() const { - return useless; + return isUseless(); } bool MergeTreeIndexConditionSet::mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx_granule) const { - if (useless) + if (isUseless()) return true; auto granule = std::dynamic_pointer_cast(idx_granule); if (!granule) - throw Exception( - "Set index condition got a granule with the wrong type.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Set index condition got a granule with the wrong type"); - if (useless || granule->empty() || (max_rows != 0 && granule->size() > max_rows)) + if (isUseless() || granule->empty() || (max_rows != 0 && granule->size() > max_rows)) return true; Block result = granule->block; actions->execute(result); - auto column - = result.getByName(expression_ast->getColumnName()).column->convertToFullColumnIfConst()->convertToFullColumnIfLowCardinality(); + const auto & filter_node_name = actions->getActionsDAG().getOutputs().at(0)->result_name; + auto column = result.getByName(filter_node_name).column->convertToFullColumnIfConst()->convertToFullColumnIfLowCardinality(); if (column->onlyNull()) return false; @@ -318,17 +333,214 @@ bool MergeTreeIndexConditionSet::mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx } if (!col_uint8) - throw Exception("ColumnUInt8 expected as Set index condition result.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, + "ColumnUInt8 expected as Set index condition result"); const auto & condition = col_uint8->getData(); + size_t column_size = column->size(); - for (size_t i = 0; i < column->size(); ++i) + for (size_t i = 0; i < column_size; ++i) if ((!null_map || (*null_map)[i] == 0) && condition[i] & 1) return true; return false; } + +const ActionsDAG::Node & MergeTreeIndexConditionSet::traverseDAG(const ActionsDAG::Node & node, + ActionsDAGPtr & result_dag, + const ContextPtr & context, + std::unordered_map & node_to_result_node) const +{ + auto result_node_it = node_to_result_node.find(&node); + if (result_node_it != node_to_result_node.end()) + return *result_node_it->second; + + const ActionsDAG::Node * result_node = nullptr; + + if (const auto * operator_node_ptr = operatorFromDAG(node, result_dag, context, node_to_result_node)) + { + result_node = operator_node_ptr; + } + else if (const auto * atom_node_ptr = atomFromDAG(node, result_dag, context)) + { + result_node = atom_node_ptr; + + if (atom_node_ptr->type == ActionsDAG::ActionType::INPUT || + atom_node_ptr->type == ActionsDAG::ActionType::FUNCTION) + { + auto bit_wrapper_function = FunctionFactory::instance().get("__bitWrapperFunc", context); + result_node = &result_dag->addFunction(bit_wrapper_function, {atom_node_ptr}, {}); + } + } + else + { + ColumnWithTypeAndName unknown_field_column_with_type; + + unknown_field_column_with_type.name = calculateConstantActionNodeName(UNKNOWN_FIELD); + unknown_field_column_with_type.type = std::make_shared(); + unknown_field_column_with_type.column = unknown_field_column_with_type.type->createColumnConst(1, UNKNOWN_FIELD); + + result_node = &result_dag->addColumn(unknown_field_column_with_type); + } + + node_to_result_node.emplace(&node, result_node); + return *result_node; +} + +const ActionsDAG::Node * MergeTreeIndexConditionSet::atomFromDAG(const ActionsDAG::Node & node, ActionsDAGPtr & result_dag, const ContextPtr & context) const +{ + /// Function, literal or column + + const auto * node_to_check = &node; + while (node_to_check->type == ActionsDAG::ActionType::ALIAS) + node_to_check = node_to_check->children[0]; + + if (node_to_check->column && isColumnConst(*node_to_check->column)) + return &node; + + RPNBuilderTreeContext tree_context(context); + RPNBuilderTreeNode tree_node(node_to_check, tree_context); + + auto column_name = tree_node.getColumnName(); + if (key_columns.contains(column_name)) + { + const auto * result_node = node_to_check; + + if (node.type != ActionsDAG::ActionType::INPUT) + result_node = &result_dag->addInput(column_name, node.result_type); + + return result_node; + } + + if (node.type != ActionsDAG::ActionType::FUNCTION) + return nullptr; + + const auto & arguments = node.children; + size_t arguments_size = arguments.size(); + + ActionsDAG::NodeRawConstPtrs children(arguments_size); + + for (size_t i = 0; i < arguments_size; ++i) + { + children[i] = atomFromDAG(*arguments[i], result_dag, context); + + if (!children[i]) + return nullptr; + } + + return &result_dag->addFunction(node.function_base, children, {}); +} + +const ActionsDAG::Node * MergeTreeIndexConditionSet::operatorFromDAG(const ActionsDAG::Node & node, + ActionsDAGPtr & result_dag, + const ContextPtr & context, + std::unordered_map & node_to_result_node) const +{ + /// Functions AND, OR, NOT. Replace with bit*. + + const auto * node_to_check = &node; + while (node_to_check->type == ActionsDAG::ActionType::ALIAS) + node_to_check = node_to_check->children[0]; + + if (node_to_check->column && isColumnConst(*node_to_check->column)) + return nullptr; + + if (node_to_check->type != ActionsDAG::ActionType::FUNCTION) + return nullptr; + + auto function_name = node_to_check->function->getName(); + const auto & arguments = node_to_check->children; + size_t arguments_size = arguments.size(); + + if (function_name == "not") + { + if (arguments_size != 1) + return nullptr; + + auto bit_swap_last_two_function = FunctionFactory::instance().get("__bitSwapLastTwo", context); + return &result_dag->addFunction(bit_swap_last_two_function, {arguments[0]}, {}); + } + else if (function_name == "and" || function_name == "indexHint" || function_name == "or") + { + if (arguments_size < 2) + return nullptr; + + ActionsDAG::NodeRawConstPtrs children; + children.resize(arguments_size); + + for (size_t i = 0; i < arguments_size; ++i) + children[i] = &traverseDAG(*arguments[i], result_dag, context, node_to_result_node); + + FunctionOverloadResolverPtr function; + + if (function_name == "and" || function_name == "indexHint") + function = FunctionFactory::instance().get("__bitBoolMaskAnd", context); + else + function = FunctionFactory::instance().get("__bitBoolMaskOr", context); + + const auto * last_argument = children.back(); + children.pop_back(); + + const auto * before_last_argument = children.back(); + children.pop_back(); + + while (true) + { + last_argument = &result_dag->addFunction(function, {before_last_argument, last_argument}, {}); + + if (children.empty()) + break; + + before_last_argument = children.back(); + children.pop_back(); + } + + return last_argument; + } + + return nullptr; +} + +bool MergeTreeIndexConditionSet::checkDAGUseless(const ActionsDAG::Node & node, const ContextPtr & context, bool atomic) const +{ + const auto * node_to_check = &node; + while (node_to_check->type == ActionsDAG::ActionType::ALIAS) + node_to_check = node_to_check->children[0]; + + RPNBuilderTreeContext tree_context(context); + RPNBuilderTreeNode tree_node(node_to_check, tree_context); + + if (node.column && isColumnConst(*node.column)) + { + Field literal; + node.column->get(0, literal); + return !atomic && literal.safeGet(); + } + else if (node.type == ActionsDAG::ActionType::FUNCTION) + { + auto column_name = tree_node.getColumnName(); + if (key_columns.contains(column_name)) + return false; + + auto function_name = node.function_base->getName(); + const auto & arguments = node.children; + + if (function_name == "and" || function_name == "indexHint") + return std::all_of(arguments.begin(), arguments.end(), [&, atomic](const auto & arg) { return checkDAGUseless(*arg, context, atomic); }); + else if (function_name == "or") + return std::any_of(arguments.begin(), arguments.end(), [&, atomic](const auto & arg) { return checkDAGUseless(*arg, context, atomic); }); + else if (function_name == "not") + return checkDAGUseless(*arguments.at(0), context, atomic); + else + return std::any_of(arguments.begin(), arguments.end(), + [&](const auto & arg) { return checkDAGUseless(*arg, context, true /*atomic*/); }); + } + + auto column_name = tree_node.getColumnName(); + return !key_columns.contains(column_name); +} + void MergeTreeIndexConditionSet::traverseAST(ASTPtr & node) const { if (operatorFromAST(node)) @@ -465,7 +677,7 @@ bool MergeTreeIndexConditionSet::checkASTUseless(const ASTPtr & node, bool atomi else if (const auto * literal = node->as()) return !atomic && literal->value.safeGet(); else if (const auto * identifier = node->as()) - return key_columns.find(identifier->getColumnName()) == std::end(key_columns); + return !key_columns.contains(identifier->getColumnName()); else return true; } diff --git a/src/Storages/MergeTree/MergeTreeIndexSet.h b/src/Storages/MergeTree/MergeTreeIndexSet.h index 23b336d274b..e23fddc0f28 100644 --- a/src/Storages/MergeTree/MergeTreeIndexSet.h +++ b/src/Storages/MergeTree/MergeTreeIndexSet.h @@ -84,9 +84,9 @@ class MergeTreeIndexConditionSet final : public IMergeTreeIndexCondition public: MergeTreeIndexConditionSet( const String & index_name_, - const Block & index_sample_block_, + const Block & index_sample_block, size_t max_rows_, - const SelectQueryInfo & query, + const SelectQueryInfo & query_info, ContextPtr context); bool alwaysUnknownOrTrue() const override; @@ -95,20 +95,39 @@ public: ~MergeTreeIndexConditionSet() override = default; private: + const ActionsDAG::Node & traverseDAG(const ActionsDAG::Node & node, + ActionsDAGPtr & result_dag, + const ContextPtr & context, + std::unordered_map & node_to_result_node) const; + + const ActionsDAG::Node * atomFromDAG(const ActionsDAG::Node & node, + ActionsDAGPtr & result_dag, + const ContextPtr & context) const; + + const ActionsDAG::Node * operatorFromDAG(const ActionsDAG::Node & node, + ActionsDAGPtr & result_dag, + const ContextPtr & context, + std::unordered_map & node_to_result_node) const; + + bool checkDAGUseless(const ActionsDAG::Node & node, const ContextPtr & context, bool atomic = false) const; + void traverseAST(ASTPtr & node) const; + bool atomFromAST(ASTPtr & node) const; + static bool operatorFromAST(ASTPtr & node); bool checkASTUseless(const ASTPtr & node, bool atomic = false) const; - String index_name; size_t max_rows; - Block index_sample_block; - bool useless; - std::set key_columns; - ASTPtr expression_ast; + bool isUseless() const + { + return actions == nullptr; + } + + std::unordered_set key_columns; ExpressionActionsPtr actions; }; diff --git a/src/Storages/MergeTree/MergeTreeMutationEntry.cpp b/src/Storages/MergeTree/MergeTreeMutationEntry.cpp index a222f2a8ad8..9906ea3d02a 100644 --- a/src/Storages/MergeTree/MergeTreeMutationEntry.cpp +++ b/src/Storages/MergeTree/MergeTreeMutationEntry.cpp @@ -61,7 +61,7 @@ MergeTreeMutationEntry::MergeTreeMutationEntry(MutationCommands commands_, DiskP *out << "format version: 1\n" << "create time: " << LocalDateTime(create_time) << "\n"; *out << "commands: "; - commands.writeText(*out); + commands.writeText(*out, /* with_pure_metadata_commands = */ false); *out << "\n"; if (tid.isPrehistoric()) { @@ -174,7 +174,7 @@ std::shared_ptr MergeTreeMutationEntry::backup() const out << "block number: " << block_number << "\n"; out << "commands: "; - commands.writeText(out); + commands.writeText(out, /* with_pure_metadata_commands = */ false); out << "\n"; return std::make_shared(out.str()); diff --git a/src/Storages/MergeTree/MergeTreeMutationStatus.h b/src/Storages/MergeTree/MergeTreeMutationStatus.h index acda43b9254..5f29b777293 100644 --- a/src/Storages/MergeTree/MergeTreeMutationStatus.h +++ b/src/Storages/MergeTree/MergeTreeMutationStatus.h @@ -27,6 +27,9 @@ struct MergeTreeMutationStatus String latest_failed_part; time_t latest_fail_time = 0; String latest_fail_reason; + + /// FIXME: currently unused, but would be much better to report killed mutations with this flag. + bool is_killed = false; }; /// Check mutation status and throw exception in case of error during mutation diff --git a/src/Storages/MergeTree/MergeTreePartInfo.cpp b/src/Storages/MergeTree/MergeTreePartInfo.cpp index 8c518e4d17f..f537e7cb285 100644 --- a/src/Storages/MergeTree/MergeTreePartInfo.cpp +++ b/src/Storages/MergeTree/MergeTreePartInfo.cpp @@ -18,7 +18,7 @@ MergeTreePartInfo MergeTreePartInfo::fromPartName(const String & part_name, Merg if (auto part_opt = tryParsePartName(part_name, format_version)) return *part_opt; else - throw Exception(ErrorCodes::BAD_DATA_PART_NAME, "Unexpected part name: {}", part_name); + throw Exception(ErrorCodes::BAD_DATA_PART_NAME, "Unexpected part name: {} for format version: {}", part_name, format_version); } void MergeTreePartInfo::validatePartitionID(const String & partition_id, MergeTreeDataFormatVersion format_version) diff --git a/src/Storages/MergeTree/MergeTreeRangeReader.cpp b/src/Storages/MergeTree/MergeTreeRangeReader.cpp index 1a5a4d91806..ac5c3b1db2d 100644 --- a/src/Storages/MergeTree/MergeTreeRangeReader.cpp +++ b/src/Storages/MergeTree/MergeTreeRangeReader.cpp @@ -30,13 +30,17 @@ namespace ErrorCodes } -static void filterColumns(Columns & columns, const IColumn::Filter & filter) +static void filterColumns(Columns & columns, const IColumn::Filter & filter, size_t filter_bytes) { for (auto & column : columns) { if (column) { - column = column->filter(filter, -1); + if (column->size() != filter.size()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Size of column {} doesn't match size of filter {}", + column->size(), filter.size()); + + column = column->filter(filter, filter_bytes); if (column->empty()) { @@ -47,13 +51,12 @@ static void filterColumns(Columns & columns, const IColumn::Filter & filter) } } -static void filterColumns(Columns & columns, const ColumnPtr & filter) +static void filterColumns(Columns & columns, const FilterWithCachedCount & filter) { - ConstantFilterDescription const_descr(*filter); - if (const_descr.always_true) + if (filter.alwaysTrue()) return; - if (const_descr.always_false) + if (filter.alwaysFalse()) { for (auto & col : columns) if (col) @@ -62,8 +65,7 @@ static void filterColumns(Columns & columns, const ColumnPtr & filter) return; } - FilterDescription descr(*filter); - filterColumns(columns, *descr.data); + filterColumns(columns, filter.getData(), filter.countBytesInFilter()); } @@ -320,11 +322,13 @@ void MergeTreeRangeReader::ReadResult::clear() num_rows_to_skip_in_last_granule += rows_per_granule.back(); rows_per_granule.assign(rows_per_granule.size(), 0); total_rows_per_granule = 0; - filter_holder = nullptr; - filter = nullptr; + final_filter = FilterWithCachedCount(); + num_rows = 0; + columns.clear(); + additional_columns.clear(); } -void MergeTreeRangeReader::ReadResult::shrink(Columns & old_columns) +void MergeTreeRangeReader::ReadResult::shrink(Columns & old_columns, const NumRows & rows_per_granule_previous) const { for (auto & column : old_columns) { @@ -337,9 +341,12 @@ void MergeTreeRangeReader::ReadResult::shrink(Columns & old_columns) continue; } + LOG_TEST(log, "ReadResult::shrink() column size: {} total_rows_per_granule: {}", + column->size(), total_rows_per_granule); + auto new_column = column->cloneEmpty(); new_column->reserve(total_rows_per_granule); - for (size_t j = 0, pos = 0; j < rows_per_granule_original.size(); pos += rows_per_granule_original[j++]) + for (size_t j = 0, pos = 0; j < rows_per_granule_previous.size(); pos += rows_per_granule_previous[j++]) { if (rows_per_granule[j]) new_column->insertRangeFrom(*column, pos, rows_per_granule[j]); @@ -348,74 +355,265 @@ void MergeTreeRangeReader::ReadResult::shrink(Columns & old_columns) } } +/// The main invariant of the data in the read result is that he number of rows is +/// either equal to total_rows_per_granule (if filter has not been applied) or to the number of +/// 1s in the filter (if filter has been applied). +void MergeTreeRangeReader::ReadResult::checkInternalConsistency() const +{ + /// Check that filter size matches number of rows that will be read. + if (final_filter.present() && final_filter.size() != total_rows_per_granule) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Final filter size {} doesn't match total_rows_per_granule {}", + final_filter.size(), total_rows_per_granule); + + /// Check that num_rows is consistent with final_filter and rows_per_granule. + if (final_filter.present() && final_filter.countBytesInFilter() != num_rows && total_rows_per_granule != num_rows) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Number of rows {} doesn't match neither filter 1s count {} nor total_rows_per_granule {}", + num_rows, final_filter.countBytesInFilter(), total_rows_per_granule); + + /// Check that additional columns have the same number of rows as the main columns. + if (additional_columns && additional_columns.rows() != num_rows) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Number of rows in additional columns {} is not equal to number of rows in result columns {}", + additional_columns.rows(), num_rows); + + for (const auto & column : columns) + { + if (column) + chassert(column->size() == num_rows); + } +} + +std::string MergeTreeRangeReader::ReadResult::dumpInfo() const +{ + WriteBufferFromOwnString out; + out << "num_rows: " << num_rows + << ", columns: " << columns.size() + << ", total_rows_per_granule: " << total_rows_per_granule; + if (final_filter.present()) + { + out << ", filter size: " << final_filter.size() + << ", filter 1s: " << final_filter.countBytesInFilter(); + } + else + { + out << ", no filter"; + } + for (size_t ci = 0; ci < columns.size(); ++ci) + { + out << ", column[" << ci << "]: "; + if (!columns[ci]) + out << " nullptr"; + else + { + out << " " << columns[ci]->dumpStructure(); + } + } + if (additional_columns) + { + out << ", additional_columns: " << additional_columns.dumpStructure(); + } + return out.str(); +} + +static std::string dumpNames(const NamesAndTypesList & columns) +{ + WriteBufferFromOwnString out; + for (auto it = columns.begin(); it != columns.end(); ++it) + { + if (it != columns.begin()) + out << ", "; + out << it->name; + } + return out.str(); +} + void MergeTreeRangeReader::ReadResult::setFilterConstTrue() { - clearFilter(); - filter_holder = DataTypeUInt8().createColumnConst(num_rows, 1u); + /// Remove the filter, so newly read columns will not be filtered. + final_filter = FilterWithCachedCount(); } -void MergeTreeRangeReader::ReadResult::setFilterConstFalse() +static ColumnPtr andFilters(ColumnPtr c1, ColumnPtr c2) { - clearFilter(); - columns.clear(); - num_rows = 0; + if (c1->size() != c2->size()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Size of filters don't match: {} and {}", + c1->size(), c2->size()); + + // TODO: use proper vectorized implementation of AND? + auto res = ColumnUInt8::create(c1->size()); + auto & res_data = res->getData(); + const auto & c1_data = typeid_cast(*c1).getData(); + const auto & c2_data = typeid_cast(*c2).getData(); + const size_t size = c1->size(); + const size_t step = 16; + size_t i = 0; + /// NOTE: '&&' must be used instead of '&' for 'AND' operation because UInt8 columns might contain any non-zero + /// value for true and we cannot bitwise AND them to get the correct result. + for (; i + step < size; i += step) + for (size_t j = 0; j < step; ++j) + res_data[i+j] = (c1_data[i+j] && c2_data[i+j]); + for (; i < size; ++i) + res_data[i] = (c1_data[i] && c2_data[i]); + return res; } -void MergeTreeRangeReader::ReadResult::optimize(bool can_read_incomplete_granules, bool allow_filter_columns) +static ColumnPtr combineFilters(ColumnPtr first, ColumnPtr second); + +void MergeTreeRangeReader::ReadResult::applyFilter(const FilterWithCachedCount & filter) { - if (total_rows_per_granule == 0 || filter == nullptr) + if (filter.size() != num_rows) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Filter size {} doesn't match number of rows {}", + filter.size(), num_rows); + + LOG_TEST(log, "ReadResult::applyFilter() num_rows before: {}", num_rows); + + filterColumns(columns, filter); + + { + auto tmp_columns = additional_columns.getColumns(); + filterColumns(tmp_columns, filter); + if (!tmp_columns.empty()) + additional_columns.setColumns(tmp_columns); + else + additional_columns.clear(); + } + + num_rows = filter.countBytesInFilter(); + + LOG_TEST(log, "ReadResult::applyFilter() num_rows after: {}", num_rows); +} + +void MergeTreeRangeReader::ReadResult::optimize(const FilterWithCachedCount & current_filter, bool can_read_incomplete_granules) +{ + checkInternalConsistency(); + + /// Combine new filter with the previous one if it is present. + /// This filter has the size of total_rows_per granule. It is applied after reading contiguous chunks from + /// the start of each granule. + FilterWithCachedCount filter = current_filter; + if (final_filter.present()) + { + /// If current filter has the same size as the final filter, it means that the final filter has not been applied. + /// In this case we AND current filter with the existing final filter. + /// In other case, when the final filter has been applied, the size of current step filter will be equal to number of ones + /// in the final filter. In this case we combine current filter with the final filter. + ColumnPtr combined_filter; + if (current_filter.size() == final_filter.size()) + combined_filter = andFilters(final_filter.getColumn(), current_filter.getColumn()); + else + combined_filter = combineFilters(final_filter.getColumn(), current_filter.getColumn()); + + filter = FilterWithCachedCount(combined_filter); + } + + if (total_rows_per_granule == 0 || !filter.present()) return; NumRows zero_tails; - auto total_zero_rows_in_tails = countZeroTails(filter->getData(), zero_tails, can_read_incomplete_granules); + auto total_zero_rows_in_tails = countZeroTails(filter.getData(), zero_tails, can_read_incomplete_granules); - if (total_zero_rows_in_tails == filter->size()) + LOG_TEST(log, "ReadResult::optimize() before: {}", dumpInfo()); + + SCOPE_EXIT(checkInternalConsistency()); + + SCOPE_EXIT({ + LOG_TEST(log, "ReadResult::optimize() after: {}", dumpInfo()); + }); + + if (total_zero_rows_in_tails == filter.size()) { + LOG_TEST(log, "ReadResult::optimize() combined filter is const False"); clear(); return; } - else if (total_zero_rows_in_tails == 0 && countBytesInResultFilter(filter->getData()) == filter->size()) + else if (total_zero_rows_in_tails == 0 && filter.countBytesInFilter() == filter.size()) { + LOG_TEST(log, "ReadResult::optimize() combined filter is const True"); setFilterConstTrue(); return; } /// Just a guess. If only a few rows may be skipped, it's better not to skip at all. - else if (2 * total_zero_rows_in_tails > filter->size()) + else if (2 * total_zero_rows_in_tails > filter.size()) { + const NumRows rows_per_granule_previous = rows_per_granule; + const size_t total_rows_per_granule_previous = total_rows_per_granule; + for (auto i : collections::range(0, rows_per_granule.size())) { - rows_per_granule_original.push_back(rows_per_granule[i]); rows_per_granule[i] -= zero_tails[i]; } - num_rows_to_skip_in_last_granule += rows_per_granule_original.back() - rows_per_granule.back(); + num_rows_to_skip_in_last_granule += rows_per_granule_previous.back() - rows_per_granule.back(); + total_rows_per_granule = total_rows_per_granule_previous - total_zero_rows_in_tails; - filter_original = filter; - filter_holder_original = std::move(filter_holder); - - /// Check if const 1 after shrink - if (allow_filter_columns && countBytesInResultFilter(filter->getData()) + total_zero_rows_in_tails == total_rows_per_granule) + /// Check if const 1 after shrink. + /// We can apply shrink only if after the previous step the number of rows in the result + /// matches the rows_per_granule info. Otherwise we will not be able to match newly added zeros in granule tails. + if (num_rows == total_rows_per_granule_previous && + filter.countBytesInFilter() + total_zero_rows_in_tails == total_rows_per_granule_previous) /// All zeros are in tails? { - total_rows_per_granule = total_rows_per_granule - total_zero_rows_in_tails; - num_rows = total_rows_per_granule; setFilterConstTrue(); - shrink(columns); /// shrink acts as filtering in such case + + /// If all zeros are in granule tails, we can use shrink to filter out rows. + shrink(columns, rows_per_granule_previous); /// shrink acts as filtering in such case + auto c = additional_columns.getColumns(); + shrink(c, rows_per_granule_previous); + additional_columns.setColumns(c); + + num_rows = total_rows_per_granule; + + LOG_TEST(log, "ReadResult::optimize() after shrink {}", dumpInfo()); } else { - auto new_filter = ColumnUInt8::create(filter->size() - total_zero_rows_in_tails); + auto new_filter = ColumnUInt8::create(filter.size() - total_zero_rows_in_tails); IColumn::Filter & new_data = new_filter->getData(); - collapseZeroTails(filter->getData(), new_data); - total_rows_per_granule = new_filter->size(); - num_rows = total_rows_per_granule; - filter = new_filter.get(); - filter_holder = std::move(new_filter); + /// Shorten the filter by removing zeros from granule tails + collapseZeroTails(filter.getData(), rows_per_granule_previous, new_data); + if (total_rows_per_granule != new_filter->size()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "New filter size {} doesn't match number of rows to be read {}", + new_filter->size(), total_rows_per_granule); + + /// Need to apply combined filter here before replacing it with shortened one because otherwise + /// the filter size will not match the number of rows in the result columns. + if (num_rows == total_rows_per_granule_previous) + { + /// Filter from the previous steps has not been applied yet, do it now. + applyFilter(filter); + } + else + { + /// Filter was applied before, so apply only new filter from the current step. + applyFilter(current_filter); + } + + final_filter = FilterWithCachedCount(new_filter->getPtr()); + if (num_rows != final_filter.countBytesInFilter()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Count of 1s in final filter {} doesn't match number of rows {}", + final_filter.countBytesInFilter(), num_rows); + + LOG_TEST(log, "ReadResult::optimize() after colapseZeroTails {}", dumpInfo()); } - need_filter = true; } - /// Another guess, if it's worth filtering at PREWHERE - else if (countBytesInResultFilter(filter->getData()) < 0.6 * filter->size()) - need_filter = true; + else + { + /// Check if we have rows already filtered at the previous step. In such case we must apply the filter because + /// otherwise num_rows doesn't match total_rows_per_granule and the next read step will not know how to filter + /// newly read columns to match the num_rows. + if (num_rows != total_rows_per_granule) + { + applyFilter(current_filter); + } + /// Another guess, if it's worth filtering at PREWHERE + else if (filter.countBytesInFilter() < 0.6 * filter.size()) + { + applyFilter(filter); + } + + final_filter = std::move(filter); + } } size_t MergeTreeRangeReader::ReadResult::countZeroTails(const IColumn::Filter & filter_vec, NumRows & zero_tails, bool can_read_incomplete_granules) const @@ -441,7 +639,7 @@ size_t MergeTreeRangeReader::ReadResult::countZeroTails(const IColumn::Filter & return total_zero_rows_in_tails; } -void MergeTreeRangeReader::ReadResult::collapseZeroTails(const IColumn::Filter & filter_vec, IColumn::Filter & new_filter_vec) +void MergeTreeRangeReader::ReadResult::collapseZeroTails(const IColumn::Filter & filter_vec, const NumRows & rows_per_granule_previous, IColumn::Filter & new_filter_vec) const { const auto * filter_data = filter_vec.data(); auto * new_filter_data = new_filter_vec.data(); @@ -449,7 +647,7 @@ void MergeTreeRangeReader::ReadResult::collapseZeroTails(const IColumn::Filter & for (auto i : collections::range(0, rows_per_granule.size())) { memcpySmallAllowReadWriteOverflow15(new_filter_data, filter_data, rows_per_granule[i]); - filter_data += rows_per_granule_original[i]; + filter_data += rows_per_granule_previous[i]; new_filter_data += rows_per_granule[i]; } @@ -597,54 +795,6 @@ size_t MergeTreeRangeReader::ReadResult::numZerosInTail(const UInt8 * begin, con return count; } -/// Filter size must match total_rows_per_granule -void MergeTreeRangeReader::ReadResult::setFilter(const ColumnPtr & new_filter) -{ - if (!new_filter && filter) - throw Exception("Can't replace existing filter with empty.", ErrorCodes::LOGICAL_ERROR); - - if (filter) - { - size_t new_size = new_filter->size(); - - if (new_size != total_rows_per_granule) - throw Exception("Can't set filter because it's size is " + toString(new_size) + " but " - + toString(total_rows_per_granule) + " rows was read.", ErrorCodes::LOGICAL_ERROR); - } - - ConstantFilterDescription const_description(*new_filter); - if (const_description.always_true) - { - setFilterConstTrue(); - } - else if (const_description.always_false) - { - clear(); - } - else - { - FilterDescription filter_description(*new_filter); - filter_holder = filter_description.data_holder ? filter_description.data_holder : new_filter; - filter = typeid_cast(filter_holder.get()); - if (!filter) - throw Exception("setFilter function expected ColumnUInt8.", ErrorCodes::LOGICAL_ERROR); - } -} - - -size_t MergeTreeRangeReader::ReadResult::countBytesInResultFilter(const IColumn::Filter & filter_) -{ - auto it = filter_bytes_map.find(&filter_); - if (it == filter_bytes_map.end()) - { - auto bytes = countBytesInFilter(filter_); - filter_bytes_map[&filter_] = bytes; - return bytes; - } - else - return it->second; -} - MergeTreeRangeReader::MergeTreeRangeReader( IMergeTreeReader * merge_tree_reader_, MergeTreeRangeReader * prev_reader_, @@ -659,30 +809,37 @@ MergeTreeRangeReader::MergeTreeRangeReader( , is_initialized(true) { if (prev_reader) - sample_block = prev_reader->getSampleBlock(); + result_sample_block = prev_reader->getSampleBlock(); for (const auto & name_and_type : merge_tree_reader->getColumns()) - sample_block.insert({name_and_type.type->createColumn(), name_and_type.type, name_and_type.name}); + { + read_sample_block.insert({name_and_type.type->createColumn(), name_and_type.type, name_and_type.name}); + result_sample_block.insert({name_and_type.type->createColumn(), name_and_type.type, name_and_type.name}); + } for (const auto & column_name : non_const_virtual_column_names_) { - if (sample_block.has(column_name)) + if (result_sample_block.has(column_name)) continue; non_const_virtual_column_names.push_back(column_name); - if (column_name == "_part_offset") - sample_block.insert(ColumnWithTypeAndName(ColumnUInt64::create(), std::make_shared(), column_name)); + if (column_name == "_part_offset" && !prev_reader) + { + /// _part_offset column is filled by the first reader. + read_sample_block.insert(ColumnWithTypeAndName(ColumnUInt64::create(), std::make_shared(), column_name)); + result_sample_block.insert(ColumnWithTypeAndName(ColumnUInt64::create(), std::make_shared(), column_name)); + } } if (prewhere_info) { const auto & step = *prewhere_info; if (step.actions) - step.actions->execute(sample_block, true); + step.actions->execute(result_sample_block, true); if (step.remove_column) - sample_block.erase(step.column_name); + result_sample_block.erase(step.column_name); } } @@ -765,7 +922,12 @@ MergeTreeRangeReader::ReadResult MergeTreeRangeReader::read(size_t max_rows, Mar if (max_rows == 0) throw Exception("Expected at least 1 row to read, got 0.", ErrorCodes::LOGICAL_ERROR); - ReadResult read_result; + ReadResult read_result(log); + + SCOPE_EXIT({ + LOG_TEST(log, "read() returned {}, sample block {}", + read_result.dumpInfo(), this->result_sample_block.dumpNames()); + }); if (prev_reader) { @@ -778,69 +940,52 @@ MergeTreeRangeReader::ReadResult MergeTreeRangeReader::read(size_t max_rows, Mar if (read_result.num_rows == 0) return read_result; - bool has_columns = false; + /// Calculate and update read bytes size_t total_bytes = 0; for (auto & column : columns) { if (column) { total_bytes += column->byteSize(); - has_columns = true; } } - read_result.addNumBytesRead(total_bytes); - bool should_evaluate_missing_defaults = false; - - if (has_columns) - { - /// num_read_rows >= read_result.num_rows - /// We must filter block before adding columns to read_result.block - - /// Fill missing columns before filtering because some arrays from Nested may have empty data. - merge_tree_reader->fillMissingColumns(columns, should_evaluate_missing_defaults, num_read_rows); - - if (read_result.getFilter()) - filterColumns(columns, read_result.getFilter()->getData()); - } - else - { - size_t num_rows = read_result.num_rows; - - /// If block is empty, we still may need to add missing columns. - /// In that case use number of rows in result block and don't filter block. - if (num_rows) - merge_tree_reader->fillMissingColumns(columns, should_evaluate_missing_defaults, num_rows); - } - if (!columns.empty()) { + /// If all requested columns are absent in part num_read_rows will be 0. + /// In this case we need to use number of rows in the result to fill the default values and don't filter block. + if (num_read_rows == 0) + num_read_rows = read_result.num_rows; + + /// fillMissingColumns() must be called after reading but befoe any filterings because + /// some columns (e.g. arrays) might be only partially filled and thus not be valid and + /// fillMissingColumns() fixes this. + bool should_evaluate_missing_defaults; + merge_tree_reader->fillMissingColumns(columns, should_evaluate_missing_defaults, + num_read_rows); + + if (read_result.total_rows_per_granule == num_read_rows && read_result.num_rows != num_read_rows) + { + /// We have filter applied from the previous step + /// So we need to apply it to the newly read rows + if (!read_result.final_filter.present() || read_result.final_filter.countBytesInFilter() != read_result.num_rows) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Final filter is missing or has mistaching size, read_result: {}", + read_result.dumpInfo()); + + filterColumns(columns, read_result.final_filter); + } + /// If some columns absent in part, then evaluate default values if (should_evaluate_missing_defaults) { - auto block = prev_reader->sample_block.cloneWithColumns(read_result.columns); - auto block_before_prewhere = read_result.block_before_prewhere; - for (const auto & column : block) - { - if (block_before_prewhere.has(column.name)) - block_before_prewhere.erase(column.name); - } + Block additional_columns = prev_reader->getSampleBlock().cloneWithColumns(read_result.columns); + for (const auto & col : read_result.additional_columns) + additional_columns.insert(col); - if (block_before_prewhere) - { - if (read_result.need_filter) - { - auto old_columns = block_before_prewhere.getColumns(); - filterColumns(old_columns, read_result.getFilterOriginal()->getData()); - block_before_prewhere.setColumns(old_columns); - } - - for (auto & column : block_before_prewhere) - block.insert(std::move(column)); - } - merge_tree_reader->evaluateMissingDefaults(block, columns); + merge_tree_reader->evaluateMissingDefaults(additional_columns, columns); } + /// If columns not empty, then apply on-fly alter conversions if any required merge_tree_reader->performRequiredConversions(columns); } @@ -854,11 +999,15 @@ MergeTreeRangeReader::ReadResult MergeTreeRangeReader::read(size_t max_rows, Mar read_result = startReadingChain(max_rows, ranges); read_result.num_rows = read_result.numReadRows(); - if (read_result.num_rows) + LOG_TEST(log, "First reader returned: {}, requested columns: {}", + read_result.dumpInfo(), dumpNames(merge_tree_reader->getColumns())); + + if (read_result.num_rows == 0) + return read_result; + { /// Physical columns go first and then some virtual columns follow - /// TODO: is there a better way to account for virtual columns that were filled by previous readers? - size_t physical_columns_count = read_result.columns.size() - read_result.extra_columns_filled.size(); + size_t physical_columns_count = merge_tree_reader->getColumns().size(); Columns physical_columns(read_result.columns.begin(), read_result.columns.begin() + physical_columns_count); bool should_evaluate_missing_defaults; @@ -875,8 +1024,6 @@ MergeTreeRangeReader::ReadResult MergeTreeRangeReader::read(size_t max_rows, Mar for (size_t i = 0; i < physical_columns.size(); ++i) read_result.columns[i] = std::move(physical_columns[i]); } - else - read_result.columns.clear(); size_t total_bytes = 0; for (auto & column : read_result.columns) @@ -885,18 +1032,35 @@ MergeTreeRangeReader::ReadResult MergeTreeRangeReader::read(size_t max_rows, Mar read_result.addNumBytesRead(total_bytes); } - if (read_result.num_rows == 0) - return read_result; - executePrewhereActionsAndFilterColumns(read_result); + read_result.checkInternalConsistency(); + + if (!read_result.can_return_prewhere_column_without_filtering) + { + if (!read_result.filterWasApplied()) + { + /// TODO: another solution might be to set all 0s from final filter into the prewhere column and not filter all the columns here + /// but rely on filtering in WHERE. + read_result.applyFilter(read_result.final_filter); + read_result.checkInternalConsistency(); + } + + read_result.can_return_prewhere_column_without_filtering = true; + } + + if (read_result.num_rows != 0 && read_result.columns.size() != getSampleBlock().columns()) + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "Number of columns in result doesn't match number of columns in sample block, read_result: {}, sample block: {}", + read_result.dumpInfo(), getSampleBlock().dumpStructure()); + return read_result; } - MergeTreeRangeReader::ReadResult MergeTreeRangeReader::startReadingChain(size_t max_rows, MarkRanges & ranges) { - ReadResult result; + ReadResult result(log); result.columns.resize(merge_tree_reader->getColumns().size()); size_t current_task_last_mark = getLastMark(ranges); @@ -946,14 +1110,11 @@ MergeTreeRangeReader::ReadResult MergeTreeRangeReader::startReadingChain(size_t result.addRows(stream.finalize(result.columns)); /// Last granule may be incomplete. - if (!result.rowsPerGranule().empty()) + if (!result.rows_per_granule.empty()) result.adjustLastGranule(); - for (const auto & column_name : non_const_virtual_column_names) - { - if (column_name == "_part_offset") - fillPartOffsetColumn(result, leading_begin_part_offset, leading_end_part_offset); - } + if (read_sample_block.has("_part_offset")) + fillPartOffsetColumn(result, leading_begin_part_offset, leading_end_part_offset); return result; } @@ -968,11 +1129,13 @@ void MergeTreeRangeReader::fillPartOffsetColumn(ReadResult & result, UInt64 lead UInt64 * pos = vec.data(); UInt64 * end = &vec[num_rows]; + /// Fill the reamining part of the previous range (it was started in the previous read request). while (pos < end && leading_begin_part_offset < leading_end_part_offset) *pos++ = leading_begin_part_offset++; - const auto start_ranges = result.startedRanges(); + const auto & start_ranges = result.started_ranges; + /// Fill the ranges which were started in the current read request. for (const auto & start_range : start_ranges) { UInt64 start_part_offset = index_granularity->getMarkStartingRow(start_range.range.begin); @@ -983,7 +1146,6 @@ void MergeTreeRangeReader::fillPartOffsetColumn(ReadResult & result, UInt64 lead } result.columns.emplace_back(std::move(column)); - result.extra_columns_filled.push_back("_part_offset"); } Columns MergeTreeRangeReader::continueReadingChain(const ReadResult & result, size_t & num_rows) @@ -995,7 +1157,7 @@ Columns MergeTreeRangeReader::continueReadingChain(const ReadResult & result, si if (merge_tree_reader->getColumns().empty()) return columns; - if (result.rowsPerGranule().empty()) + if (result.rows_per_granule.empty()) { /// If zero rows were read on prev step, than there is no more rows to read. /// Last granule may have less rows than index_granularity, so finish reading manually. @@ -1005,8 +1167,8 @@ Columns MergeTreeRangeReader::continueReadingChain(const ReadResult & result, si columns.resize(merge_tree_reader->numColumnsInResult()); - const auto & rows_per_granule = result.rowsPerGranule(); - const auto & started_ranges = result.startedRanges(); + const auto & rows_per_granule = result.rows_per_granule; + const auto & started_ranges = result.started_ranges; size_t current_task_last_mark = ReadResult::getLastMark(started_ranges); size_t next_range_to_start = 0; @@ -1027,13 +1189,13 @@ Columns MergeTreeRangeReader::continueReadingChain(const ReadResult & result, si num_rows += stream.read(columns, rows_per_granule[i], !last); } - stream.skip(result.numRowsToSkipInLastGranule()); + stream.skip(result.num_rows_to_skip_in_last_granule); num_rows += stream.finalize(columns); /// added_rows may be zero if all columns were read in prewhere and it's ok. - if (num_rows && num_rows != result.totalRowsPerGranule()) + if (num_rows && num_rows != result.total_rows_per_granule) throw Exception("RangeReader read " + toString(num_rows) + " rows, but " - + toString(result.totalRowsPerGranule()) + " expected.", ErrorCodes::LOGICAL_ERROR); + + toString(result.total_rows_per_granule) + " expected.", ErrorCodes::LOGICAL_ERROR); return columns; } @@ -1047,7 +1209,7 @@ static void checkCombinedFiltersSize(size_t bytes_in_first_filter, size_t second } /// Second filter size must be equal to number of 1s in the first filter. -/// The result size is equal to first filter size. +/// The result has size equal to first filter size and contains 1s only where both filters contain 1s. static ColumnPtr combineFilters(ColumnPtr first, ColumnPtr second) { ConstantFilterDescription first_const_descr(*first); @@ -1100,23 +1262,22 @@ static ColumnPtr combineFilters(ColumnPtr first, ColumnPtr second) return mut_first; } -void MergeTreeRangeReader::executePrewhereActionsAndFilterColumns(ReadResult & result) +void MergeTreeRangeReader::executePrewhereActionsAndFilterColumns(ReadResult & result) const { + result.checkInternalConsistency(); + if (!prewhere_info) return; - const auto & header = merge_tree_reader->getColumns(); - size_t num_columns = header.size(); + const auto & header = read_sample_block; + size_t num_columns = header.columns(); /// Check that we have columns from previous steps and newly read required columns - if (result.columns.size() < num_columns + result.extra_columns_filled.size()) + if (result.columns.size() < num_columns) throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid number of columns passed to MergeTreeRangeReader. Expected {}, got {}", num_columns, result.columns.size()); - /// This filter has the size of total_rows_per granule. It is applied after reading contiguous chunks from - /// the start of each granule. - ColumnPtr combined_filter; /// Filter computed at the current step. Its size is equal to num_rows which is <= total_rows_per_granule ColumnPtr current_step_filter; size_t prewhere_column_pos; @@ -1138,35 +1299,28 @@ void MergeTreeRangeReader::executePrewhereActionsAndFilterColumns(ReadResult & r for (auto name_and_type = header.begin(); name_and_type != header.end() && pos < result.columns.size(); ++pos, ++name_and_type) block.insert({result.columns[pos], name_and_type->type, name_and_type->name}); - for (const auto & column_name : non_const_virtual_column_names) { - if (block.has(column_name)) - continue; + /// Columns might be projected out. We need to store them here so that default columns can be evaluated later. + Block additional_columns = block; - if (column_name == "_part_offset") - { - if (pos >= result.columns.size()) - throw Exception(ErrorCodes::LOGICAL_ERROR, - "Invalid number of columns passed to MergeTreeRangeReader. Expected {}, got {}", - num_columns, result.columns.size()); + if (prewhere_info->actions) + prewhere_info->actions->execute(block); - block.insert({result.columns[pos], std::make_shared(), column_name}); - } - else if (column_name == LightweightDeleteDescription::FILTER_COLUMN.name) + result.additional_columns.clear(); + /// Additional columns might only be needed if there are more steps in the chain. + if (!last_reader_in_chain) { - /// Do nothing, it will be added later + for (auto & col : additional_columns) + { + /// Exclude columns that are present in the result block to avoid storing them and filtering twice. + /// TODO: also need to exclude the columns that are not needed for the next steps. + if (block.has(col.name)) + continue; + result.additional_columns.insert(col); + } } - else - throw Exception("Unexpected non-const virtual column: " + column_name, ErrorCodes::LOGICAL_ERROR); - ++pos; } - /// Columns might be projected out. We need to store them here so that default columns can be evaluated later. - result.block_before_prewhere = block; - - if (prewhere_info->actions) - prewhere_info->actions->execute(block); - prewhere_column_pos = block.getPositionByName(prewhere_info->column_name); result.columns.clear(); @@ -1174,90 +1328,38 @@ void MergeTreeRangeReader::executePrewhereActionsAndFilterColumns(ReadResult & r for (auto & col : block) result.columns.emplace_back(std::move(col.column)); - current_step_filter.swap(result.columns[prewhere_column_pos]); - combined_filter = current_step_filter; + current_step_filter = result.columns[prewhere_column_pos]; } - if (result.getFilter()) - { - ColumnPtr prev_filter = result.getFilterHolder(); - combined_filter = combineFilters(prev_filter, std::move(combined_filter)); - } - - result.setFilter(combined_filter); - - /// If there is a WHERE, we filter in there, and only optimize IO and shrink columns here - if (!last_reader_in_chain) - result.optimize(merge_tree_reader->canReadIncompleteGranules(), true); - - /// If we read nothing or filter gets optimized to nothing - if (result.totalRowsPerGranule() == 0) - result.setFilterConstFalse(); - /// If we need to filter in PREWHERE - else if (prewhere_info->need_filter || result.need_filter) - { - /// If there is a filter and without optimized - if (result.getFilter() && last_reader_in_chain) - { - const auto * result_filter = result.getFilter(); - /// optimize is not called, need to check const 1 and const 0 - size_t bytes_in_filter = result.countBytesInResultFilter(result_filter->getData()); - if (bytes_in_filter == 0) - result.setFilterConstFalse(); - else if (bytes_in_filter == result.num_rows) - result.setFilterConstTrue(); - } - - /// If there is still a filter, do the filtering now - if (result.getFilter()) - { - /// filter might be shrunk while columns not - const auto * result_filter = result.getFilterOriginal(); - - filterColumns(result.columns, current_step_filter); - - result.need_filter = true; - - bool has_column = false; - for (auto & column : result.columns) - { - if (column) - { - has_column = true; - result.num_rows = column->size(); - break; - } - } - - /// There is only one filter column. Record the actual number - if (!has_column) - result.num_rows = result.countBytesInResultFilter(result_filter->getData()); - } - - /// Check if the PREWHERE column is needed - if (!result.columns.empty()) - { - if (prewhere_info->remove_column) - result.columns.erase(result.columns.begin() + prewhere_column_pos); - else - result.columns[prewhere_column_pos] = - getSampleBlock().getByName(prewhere_info->column_name).type-> - createColumnConst(result.num_rows, 1u)->convertToFullColumnIfConst(); - } - } - /// Filter in WHERE instead + if (prewhere_info->remove_column) + result.columns.erase(result.columns.begin() + prewhere_column_pos); else { - if (prewhere_info->remove_column) - result.columns.erase(result.columns.begin() + prewhere_column_pos); - else - { - auto type = getSampleBlock().getByName(prewhere_info->column_name).type; - ColumnWithTypeAndName col(result.getFilterHolder()->convertToFullColumnIfConst(), std::make_shared(), ""); - result.columns[prewhere_column_pos] = castColumn(col, type); - result.clearFilter(); // Acting as a flag to not filter in PREWHERE - } + /// In case when we are not removing prewhere column the caller expects it to serve as a final filter: + /// it must contain 0s not only from the current step but also from all the previous steps. + /// One way to achieve this is to apply the final_filter if we know that the final _filter was not applied at + /// several previous steps but was accumulated instead. + result.can_return_prewhere_column_without_filtering = + (!result.final_filter.present() || result.final_filter.countBytesInFilter() == result.num_rows); } + + FilterWithCachedCount current_filter(current_step_filter); + + result.optimize(current_filter, merge_tree_reader->canReadIncompleteGranules()); + + if (prewhere_info->need_filter && !result.filterWasApplied()) + { + /// Depending on whether the final filter was applied at the previous step or not we need to apply either + /// just the current step filter or the accumulated filter. + FilterWithCachedCount filter_to_apply = + current_filter.size() == result.total_rows_per_granule ? + result.final_filter : + current_filter; + + result.applyFilter(filter_to_apply); + } + + LOG_TEST(log, "After execute prewhere {}", result.dumpInfo()); } std::string PrewhereExprInfo::dump() const diff --git a/src/Storages/MergeTree/MergeTreeRangeReader.h b/src/Storages/MergeTree/MergeTreeRangeReader.h index 06f3f5760fb..039a499e9c1 100644 --- a/src/Storages/MergeTree/MergeTreeRangeReader.h +++ b/src/Storages/MergeTree/MergeTreeRangeReader.h @@ -1,6 +1,9 @@ #pragma once #include #include +#include +#include +#include #include namespace DB @@ -34,6 +37,45 @@ struct PrewhereExprInfo std::string dump() const; }; +class FilterWithCachedCount +{ + ConstantFilterDescription const_description; /// TODO: ConstantFilterDescription only checks always true/false for const columns + /// think how to handle when the column in not const but has all 0s or all 1s + ColumnPtr column = nullptr; + const IColumn::Filter * data = nullptr; + mutable size_t cached_count_bytes = -1; + +public: + explicit FilterWithCachedCount() = default; + + explicit FilterWithCachedCount(const ColumnPtr & column_) + : const_description(*column_) + { + ColumnPtr col = column_->convertToFullIfNeeded(); + FilterDescription desc(*col); + column = desc.data_holder ? desc.data_holder : col; + data = desc.data; + } + + bool present() const { return !!column; } + + bool alwaysTrue() const { return const_description.always_true; } + bool alwaysFalse() const { return const_description.always_false; } + + ColumnPtr getColumn() const { return column; } + + const IColumn::Filter & getData() const { return *data; } + + size_t size() const { return column->size(); } + + size_t countBytesInFilter() const + { + if (cached_count_bytes == size_t(-1)) + cached_count_bytes = DB::countBytesInFilter(*data); + return cached_count_bytes; + } +}; + /// MergeTreeReader iterator which allows sequential reading for arbitrary number of rows between pairs of marks in the same part. /// Stores reading state, which can be inside granule. Can skip rows in current granule and start reading from next mark. /// Used generally for reading number of rows less than index granularity to decrease cache misses for fat blocks. @@ -174,53 +216,46 @@ public: using RangesInfo = std::vector; - const RangesInfo & startedRanges() const { return started_ranges; } - const NumRows & rowsPerGranule() const { return rows_per_granule; } + explicit ReadResult(Poco::Logger * log_) : log(log_) {} static size_t getLastMark(const MergeTreeRangeReader::ReadResult::RangesInfo & ranges); - /// The number of rows were read at LAST iteration in chain. <= num_added_rows + num_filtered_rows. - size_t totalRowsPerGranule() const { return total_rows_per_granule; } - size_t numRowsToSkipInLastGranule() const { return num_rows_to_skip_in_last_granule; } - /// Filter you need to apply to newly-read columns in order to add them to block. - const ColumnUInt8 * getFilterOriginal() const { return filter_original ? filter_original : filter; } - const ColumnUInt8 * getFilter() const { return filter; } - ColumnPtr & getFilterHolder() { return filter_holder; } - void addGranule(size_t num_rows_); void adjustLastGranule(); void addRows(size_t rows) { num_read_rows += rows; } void addRange(const MarkRange & range) { started_ranges.push_back({rows_per_granule.size(), range}); } - /// Set filter or replace old one. Filter must have more zeroes than previous. - void setFilter(const ColumnPtr & new_filter); - /// For each granule calculate the number of filtered rows at the end. Remove them and update filter. - void optimize(bool can_read_incomplete_granules, bool allow_filter_columns); + /// Add current step filter to the result and then for each granule calculate the number of filtered rows at the end. + /// Remove them and update filter. + /// Apply the filter to the columns and update num_rows if required + void optimize(const FilterWithCachedCount & current_filter, bool can_read_incomplete_granules); /// Remove all rows from granules. void clear(); - void clearFilter() { filter = nullptr; } void setFilterConstTrue(); - void setFilterConstFalse(); void addNumBytesRead(size_t count) { num_bytes_read += count; } - void shrink(Columns & old_columns); + /// Shrinks columns according to the diff between current and previous rows_per_granule. + void shrink(Columns & old_columns, const NumRows & rows_per_granule_previous) const; - size_t countBytesInResultFilter(const IColumn::Filter & filter); + /// Applies the filter to the columns and updates num_rows. + void applyFilter(const FilterWithCachedCount & filter); - /// If this flag is false than filtering form PREWHERE can be delayed and done in WHERE - /// to reduce memory copies and applying heavy filters multiple times - bool need_filter = false; + /// Verifies that columns and filter sizes match. + /// The checks might be non-trivial so it make sense to have the only in debug builds. + void checkInternalConsistency() const; - Block block_before_prewhere; + std::string dumpInfo() const; + + /// Contains columns that are not included into result but might be needed for default values calculation. + Block additional_columns; RangesInfo started_ranges; /// The number of rows read from each granule. /// Granule here is not number of rows between two marks /// It's amount of rows per single reading act NumRows rows_per_granule; - NumRows rows_per_granule_original; /// Sum(rows_per_granule) size_t total_rows_per_granule = 0; /// The number of rows was read at first step. May be zero if no read columns present in part. @@ -229,29 +264,36 @@ public: size_t num_rows_to_skip_in_last_granule = 0; /// Without any filtration. size_t num_bytes_read = 0; - /// nullptr if prev reader hasn't prewhere_actions. Otherwise filter.size() >= total_rows_per_granule. - ColumnPtr filter_holder; - ColumnPtr filter_holder_original; - const ColumnUInt8 * filter = nullptr; - const ColumnUInt8 * filter_original = nullptr; - void collapseZeroTails(const IColumn::Filter & filter, IColumn::Filter & new_filter); + /// This filter has the size of total_rows_per_granule. This means that it can be applied to newly read columns. + /// The result of applying this filter is that only rows that pass all previous filtering steps will remain. + FilterWithCachedCount final_filter; + + /// This flag is true when prewhere column can be returned without filtering. + /// It's true when it contains 0s from all filtering steps (not just the step when it was calculated). + /// NOTE: If we accumulated the final_filter for several steps without applying it then prewhere column calculated at the last step + /// will not contain 0s from all previous steps. + bool can_return_prewhere_column_without_filtering = true; + + /// Checks if result columns have current final_filter applied. + bool filterWasApplied() const { return !final_filter.present() || final_filter.countBytesInFilter() == num_rows; } + + /// Builds updated filter by cutting zeros in granules tails + void collapseZeroTails(const IColumn::Filter & filter, const NumRows & rows_per_granule_previous, IColumn::Filter & new_filter) const; size_t countZeroTails(const IColumn::Filter & filter, NumRows & zero_tails, bool can_read_incomplete_granules) const; static size_t numZerosInTail(const UInt8 * begin, const UInt8 * end); - std::map filter_bytes_map; - - Names extra_columns_filled; + Poco::Logger * log; }; ReadResult read(size_t max_rows, MarkRanges & ranges); - const Block & getSampleBlock() const { return sample_block; } + const Block & getSampleBlock() const { return result_sample_block; } private: ReadResult startReadingChain(size_t max_rows, MarkRanges & ranges); Columns continueReadingChain(const ReadResult & result, size_t & num_rows); - void executePrewhereActionsAndFilterColumns(ReadResult & result); + void executePrewhereActionsAndFilterColumns(ReadResult & result) const; void fillPartOffsetColumn(ReadResult & result, UInt64 leading_begin_part_offset, UInt64 leading_end_part_offset); IMergeTreeReader * merge_tree_reader = nullptr; @@ -261,11 +303,14 @@ private: Stream stream; - Block sample_block; + Block read_sample_block; /// Block with columns that are actually read from disk + non-const virtual columns that are filled at this step. + Block result_sample_block; /// Block with columns that are returned by this step. bool last_reader_in_chain = false; bool is_initialized = false; Names non_const_virtual_column_names; + + Poco::Logger * log = &Poco::Logger::get("MergeTreeRangeReader"); }; } diff --git a/src/Storages/MergeTree/MergeTreeSettings.cpp b/src/Storages/MergeTree/MergeTreeSettings.cpp index 5b78a59687b..2bf717c883a 100644 --- a/src/Storages/MergeTree/MergeTreeSettings.cpp +++ b/src/Storages/MergeTree/MergeTreeSettings.cpp @@ -99,6 +99,15 @@ void MergeTreeSettings::sanityCheck(size_t background_pool_tasks) const background_pool_tasks); } + // Zero index_granularity is nonsensical. + if (index_granularity < 1) + { + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "index_granularity: value {} makes no sense", + index_granularity); + } + // The min_index_granularity_bytes value is 1024 b and index_granularity_bytes is 10 mb by default. // If index_granularity_bytes is not disabled i.e > 0 b, then always ensure that it's greater than // min_index_granularity_bytes. This is mainly a safeguard against accidents whereby a really low diff --git a/src/Storages/MergeTree/MergeTreeSink.cpp b/src/Storages/MergeTree/MergeTreeSink.cpp index 13a72c24c59..0de71e94ea8 100644 --- a/src/Storages/MergeTree/MergeTreeSink.cpp +++ b/src/Storages/MergeTree/MergeTreeSink.cpp @@ -24,7 +24,7 @@ MergeTreeSink::MergeTreeSink( , metadata_snapshot(metadata_snapshot_) , max_parts_per_block(max_parts_per_block_) , context(context_) - , storage_snapshot(storage.getStorageSnapshot(metadata_snapshot, context)) + , storage_snapshot(storage.getStorageSnapshotWithoutParts(metadata_snapshot)) { } diff --git a/src/Storages/MergeTree/MergeTreeWriteAheadLog.cpp b/src/Storages/MergeTree/MergeTreeWriteAheadLog.cpp index b3625ba8e93..5b916096e06 100644 --- a/src/Storages/MergeTree/MergeTreeWriteAheadLog.cpp +++ b/src/Storages/MergeTree/MergeTreeWriteAheadLog.cpp @@ -138,7 +138,8 @@ void MergeTreeWriteAheadLog::rotate(const std::unique_lock &) MergeTreeData::MutableDataPartsVector MergeTreeWriteAheadLog::restore( const StorageMetadataPtr & metadata_snapshot, ContextPtr context, - std::unique_lock & parts_lock) + std::unique_lock & parts_lock, + bool readonly) { std::unique_lock lock(write_mutex); @@ -207,7 +208,10 @@ MergeTreeData::MutableDataPartsVector MergeTreeWriteAheadLog::restore( /// If file is broken, do not write new parts to it. /// But if it contains any part rotate and save them. if (max_block_number == -1) - disk->removeFile(path); + { + if (!readonly) + disk->removeFile(path); + } else if (name == DEFAULT_WAL_FILE_NAME) rotate(lock); @@ -256,7 +260,7 @@ MergeTreeData::MutableDataPartsVector MergeTreeWriteAheadLog::restore( [&dropped_parts](const auto & part) { return dropped_parts.count(part->name) == 0; }); /// All parts in WAL had been already committed into the disk -> clear the WAL - if (result.empty()) + if (!readonly && result.empty()) { LOG_DEBUG(log, "WAL file '{}' had been completely processed. Removing.", path); disk->removeFile(path); diff --git a/src/Storages/MergeTree/MergeTreeWriteAheadLog.h b/src/Storages/MergeTree/MergeTreeWriteAheadLog.h index a03fe09e03d..eba7698b9f9 100644 --- a/src/Storages/MergeTree/MergeTreeWriteAheadLog.h +++ b/src/Storages/MergeTree/MergeTreeWriteAheadLog.h @@ -65,7 +65,8 @@ public: std::vector restore( const StorageMetadataPtr & metadata_snapshot, ContextPtr context, - std::unique_lock & parts_lock); + std::unique_lock & parts_lock, + bool readonly); using MinMaxBlockNumber = std::pair; static std::optional tryParseMinMaxBlockNumber(const String & filename); diff --git a/src/Storages/MergeTree/MergedBlockOutputStream.cpp b/src/Storages/MergeTree/MergedBlockOutputStream.cpp index 991a8d359a8..3a7484a4141 100644 --- a/src/Storages/MergeTree/MergedBlockOutputStream.cpp +++ b/src/Storages/MergeTree/MergedBlockOutputStream.cpp @@ -93,15 +93,29 @@ void MergedBlockOutputStream::Finalizer::Impl::finish() { writer.finish(sync); - for (const auto & file_name : files_to_remove_after_finish) - part->getDataPartStorage().removeFile(file_name); - for (auto & file : written_files) { file->finalize(); if (sync) file->sync(); } + + /// TODO: this code looks really stupid. It's because DiskTransaction is + /// unable to see own write operations. When we merge part with column TTL + /// and column completely outdated we first write empty column and after + /// remove it. In case of single DiskTransaction it's impossible because + /// remove operation will not see just written files. That is why we finish + /// one transaction and start new... + /// + /// FIXME: DiskTransaction should see own writes. Column TTL implementation shouldn't be so stupid... + if (!files_to_remove_after_finish.empty()) + { + part->getDataPartStorage().commitTransaction(); + part->getDataPartStorage().beginTransaction(); + } + + for (const auto & file_name : files_to_remove_after_finish) + part->getDataPartStorage().removeFile(file_name); } MergedBlockOutputStream::Finalizer::~Finalizer() @@ -186,7 +200,9 @@ MergedBlockOutputStream::WrittenFiles MergedBlockOutputStream::finalizePartOnDis const MergeTreeMutableDataPartPtr & new_part, MergeTreeData::DataPart::Checksums & checksums) { + /// NOTE: You do not need to call fsync here, since it will be called later for the all written_files. WrittenFiles written_files; + if (new_part->isProjectionPart()) { if (storage.format_version >= MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING || isCompactPart(new_part)) diff --git a/src/Storages/MergeTree/MutateFromLogEntryTask.cpp b/src/Storages/MergeTree/MutateFromLogEntryTask.cpp index 9e3cbb0640b..b432841d5b0 100644 --- a/src/Storages/MergeTree/MutateFromLogEntryTask.cpp +++ b/src/Storages/MergeTree/MutateFromLogEntryTask.cpp @@ -160,7 +160,6 @@ ReplicatedMergeMutateTaskBase::PrepareResult MutateFromLogEntryTask::prepare() } } - const Settings & settings = storage.getContext()->getSettingsRef(); merge_mutate_entry = storage.getContext()->getMergeList().insert( storage.getStorageID(), diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp index 2b186795723..de68cb6f0ba 100644 --- a/src/Storages/MergeTree/MutateTask.cpp +++ b/src/Storages/MergeTree/MutateTask.cpp @@ -625,7 +625,8 @@ void finalizeMutatedPart( MergeTreeData::MutableDataPartPtr new_data_part, ExecuteTTLType execute_ttl_type, const CompressionCodecPtr & codec, - ContextPtr context) + ContextPtr context, + bool sync) { if (new_data_part->uuid != UUIDHelpers::Nil) { @@ -634,6 +635,8 @@ void finalizeMutatedPart( writeUUIDText(new_data_part->uuid, out_hashing); new_data_part->checksums.files[IMergeTreeDataPart::UUID_FILE_NAME].file_size = out_hashing.count(); new_data_part->checksums.files[IMergeTreeDataPart::UUID_FILE_NAME].file_hash = out_hashing.getHash(); + if (sync) + out_hashing.sync(); } if (execute_ttl_type != ExecuteTTLType::NONE) @@ -644,6 +647,8 @@ void finalizeMutatedPart( new_data_part->ttl_infos.write(out_hashing); new_data_part->checksums.files["ttl.txt"].file_size = out_hashing.count(); new_data_part->checksums.files["ttl.txt"].file_hash = out_hashing.getHash(); + if (sync) + out_hashing.sync(); } if (!new_data_part->getSerializationInfos().empty()) @@ -653,23 +658,31 @@ void finalizeMutatedPart( new_data_part->getSerializationInfos().writeJSON(out_hashing); new_data_part->checksums.files[IMergeTreeDataPart::SERIALIZATION_FILE_NAME].file_size = out_hashing.count(); new_data_part->checksums.files[IMergeTreeDataPart::SERIALIZATION_FILE_NAME].file_hash = out_hashing.getHash(); + if (sync) + out_hashing.sync(); } { /// Write file with checksums. auto out_checksums = new_data_part->getDataPartStorage().writeFile("checksums.txt", 4096, context->getWriteSettings()); new_data_part->checksums.write(*out_checksums); + if (sync) + out_checksums->sync(); } /// close fd { auto out = new_data_part->getDataPartStorage().writeFile(IMergeTreeDataPart::DEFAULT_COMPRESSION_CODEC_FILE_NAME, 4096, context->getWriteSettings()); DB::writeText(queryToString(codec->getFullCodecDesc()), *out); + if (sync) + out->sync(); } /// close fd { /// Write a file with a description of columns. auto out_columns = new_data_part->getDataPartStorage().writeFile("columns.txt", 4096, context->getWriteSettings()); new_data_part->getColumns().writeText(*out_columns); + if (sync) + out_columns->sync(); } /// close fd new_data_part->rows_count = source_part->rows_count; @@ -678,7 +691,6 @@ void finalizeMutatedPart( new_data_part->minmax_idx = source_part->minmax_idx; new_data_part->modification_time = time(nullptr); new_data_part->loadProjections(false, false); - /// All information about sizes is stored in checksums. /// It doesn't make sense to touch filesystem for sizes. new_data_part->setBytesOnDisk(new_data_part->checksums.getTotalSizeOnDisk()); @@ -756,6 +768,8 @@ struct MutationContext MergeTreeData::HardlinkedFiles hardlinked_files; + bool need_prefix = true; + scope_guard temporary_directory_lock; }; @@ -862,6 +876,7 @@ public: {}, projection_merging_params, NO_TRANSACTION_PTR, + /* need_prefix */ true, ctx->new_data_part.get(), ".tmp_proj"); @@ -1024,6 +1039,7 @@ bool PartMergerWriter::mutateOriginalPartAndPrepareProjections() auto tmp_part = MergeTreeDataWriter::writeTempProjectionPart( *ctx->data, ctx->log, projection_block, projection, ctx->new_data_part.get(), ++block_num); tmp_part.finalize(); + tmp_part.part->getDataPartStorage().commitTransaction(); projection_parts[projection.name].emplace_back(std::move(tmp_part.part)); } } @@ -1046,6 +1062,7 @@ bool PartMergerWriter::mutateOriginalPartAndPrepareProjections() auto temp_part = MergeTreeDataWriter::writeTempProjectionPart( *ctx->data, ctx->log, projection_block, projection, ctx->new_data_part.get(), ++block_num); temp_part.finalize(); + temp_part.part->getDataPartStorage().commitTransaction(); projection_parts[projection.name].emplace_back(std::move(temp_part.part)); } } @@ -1144,7 +1161,8 @@ private: void prepare() { - ctx->new_data_part->getDataPartStorage().createDirectories(); + if (ctx->new_data_part->isStoredOnDisk()) + ctx->new_data_part->getDataPartStorage().createDirectories(); /// Note: this is done before creating input streams, because otherwise data.data_parts_mutex /// (which is locked in data.getTotalActiveSizeInBytes()) @@ -1409,7 +1427,7 @@ private: } } - MutationHelpers::finalizeMutatedPart(ctx->source_part, ctx->new_data_part, ctx->execute_ttl_type, ctx->compression_codec, ctx->context); + MutationHelpers::finalizeMutatedPart(ctx->source_part, ctx->new_data_part, ctx->execute_ttl_type, ctx->compression_codec, ctx->context, ctx->need_sync); } @@ -1442,7 +1460,8 @@ MutateTask::MutateTask( const MergeTreeTransactionPtr & txn, MergeTreeData & data_, MergeTreeDataMergerMutator & mutator_, - ActionBlocker & merges_blocker_) + ActionBlocker & merges_blocker_, + bool need_prefix_) : ctx(std::make_shared()) { ctx->data = &data_; @@ -1460,6 +1479,7 @@ MutateTask::MutateTask( ctx->txn = txn; ctx->source_part = ctx->future_part->parts[0]; ctx->storage_from_source_part = std::make_shared(ctx->source_part); + ctx->need_prefix = need_prefix_; auto storage_snapshot = ctx->storage_from_source_part->getStorageSnapshot(ctx->metadata_snapshot, context_); extendObjectColumns(ctx->storage_columns, storage_snapshot->object_columns, /*with_subcolumns=*/ false); @@ -1514,8 +1534,14 @@ bool MutateTask::prepare() ctx->num_mutations = std::make_unique(CurrentMetrics::PartMutation); auto context_for_reading = Context::createCopy(ctx->context); + + /// We must read with one thread because it guarantees that output stream will be sorted. + /// Disable all settings that can enable reading with several streams. context_for_reading->setSetting("max_streams_to_max_threads_ratio", 1); context_for_reading->setSetting("max_threads", 1); + context_for_reading->setSetting("allow_asynchronous_read_from_io_pool_for_merge_tree", false); + context_for_reading->setSetting("max_streams_for_merge_tree_reading", Field(0)); + /// Allow mutations to work when force_index_by_date or force_primary_key is on. context_for_reading->setSetting("force_index_by_date", false); context_for_reading->setSetting("force_primary_key", false); @@ -1547,7 +1573,14 @@ bool MutateTask::prepare() files_to_copy_instead_of_hardlinks.insert(IMergeTreeDataPart::FILE_FOR_REFERENCES_CHECK); LOG_TRACE(ctx->log, "Part {} doesn't change up to mutation version {}", ctx->source_part->name, ctx->future_part->part_info.mutation); - auto [part, lock] = ctx->data->cloneAndLoadDataPartOnSameDisk(ctx->source_part, "tmp_clone_", ctx->future_part->part_info, ctx->metadata_snapshot, ctx->txn, &ctx->hardlinked_files, false, files_to_copy_instead_of_hardlinks); + std::string prefix; + if (ctx->need_prefix) + prefix = "tmp_clone_"; + + auto [part, lock] = ctx->data->cloneAndLoadDataPartOnSameDisk(ctx->source_part, prefix, ctx->future_part->part_info, ctx->metadata_snapshot, ctx->txn, &ctx->hardlinked_files, false, files_to_copy_instead_of_hardlinks); + + part->getDataPartStorage().beginTransaction(); + ctx->temporary_directory_lock = std::move(lock); promise.set_value(std::move(part)); return false; @@ -1580,7 +1613,10 @@ bool MutateTask::prepare() /// FIXME new_data_part is not used in the case when we clone part with cloneAndLoadDataPartOnSameDisk and return false /// Is it possible to handle this case earlier? - String tmp_part_dir_name = "tmp_mut_" + ctx->future_part->name; + std::string prefix; + if (ctx->need_prefix) + prefix = "tmp_mut_"; + String tmp_part_dir_name = prefix + ctx->future_part->name; ctx->temporary_directory_lock = ctx->data->getTemporaryPartDirectoryHolder(tmp_part_dir_name); auto data_part_storage = std::make_shared( @@ -1674,7 +1710,9 @@ bool MutateTask::prepare() if (copy_checksumns) files_to_copy_instead_of_hardlinks.insert(IMergeTreeDataPart::FILE_FOR_REFERENCES_CHECK); - auto [part, lock] = ctx->data->cloneAndLoadDataPartOnSameDisk(ctx->source_part, "tmp_mut_", ctx->future_part->part_info, ctx->metadata_snapshot, ctx->txn, &ctx->hardlinked_files, false, files_to_copy_instead_of_hardlinks); + auto [part, lock] = ctx->data->cloneAndLoadDataPartOnSameDisk(ctx->source_part, prefix, ctx->future_part->part_info, ctx->metadata_snapshot, ctx->txn, &ctx->hardlinked_files, false, files_to_copy_instead_of_hardlinks); + + part->getDataPartStorage().beginTransaction(); ctx->temporary_directory_lock = std::move(lock); promise.set_value(std::move(part)); return false; diff --git a/src/Storages/MergeTree/MutateTask.h b/src/Storages/MergeTree/MutateTask.h index 3df30670d7f..54ad996ad4c 100644 --- a/src/Storages/MergeTree/MutateTask.h +++ b/src/Storages/MergeTree/MutateTask.h @@ -35,7 +35,8 @@ public: const MergeTreeTransactionPtr & txn, MergeTreeData & data_, MergeTreeDataMergerMutator & mutator_, - ActionBlocker & merges_blocker_); + ActionBlocker & merges_blocker_, + bool need_prefix_); bool execute(); @@ -46,8 +47,6 @@ public: const MergeTreeData::HardlinkedFiles & getHardlinkedFiles() const; - MutableDataPartStoragePtr getBuilder() const; - private: bool prepare(); diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeAttachThread.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeAttachThread.cpp index 049d2c2adf5..557123ddae2 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeAttachThread.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeAttachThread.cpp @@ -191,7 +191,7 @@ void ReplicatedMergeTreeAttachThread::runImpl() void ReplicatedMergeTreeAttachThread::finalizeInitialization() TSA_NO_THREAD_SAFETY_ANALYSIS { - storage.startupImpl(); + storage.startupImpl(/* from_attach_thread */ true); storage.initialization_done = true; LOG_INFO(log, "Table is initialized"); } diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeMergeStrategyPicker.h b/src/Storages/MergeTree/ReplicatedMergeTreeMergeStrategyPicker.h index 91f5824f8fc..05b3d656579 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeMergeStrategyPicker.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreeMergeStrategyPicker.h @@ -17,7 +17,7 @@ struct ReplicatedMergeTreeLogEntryData; /// (so instead of doing exactly the same merge cluster-wise you can do merge once and fetch ready part) /// Fetches may be desirable for other operational reasons (backup replica without lot of CPU resources). /// -/// That class allow to take a decisions about preferred strategy for a concreate merge. +/// That class allow to take a decisions about preferred strategy for a concrete merge. /// /// Since that code is used in shouldExecuteLogEntry we need to be able to: /// 1) make decision fast diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeMutationEntry.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeMutationEntry.cpp index 080066c1dff..1efb3f6826b 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeMutationEntry.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeMutationEntry.cpp @@ -24,7 +24,7 @@ void ReplicatedMergeTreeMutationEntry::writeText(WriteBuffer & out) const } out << "commands: "; - commands.writeText(out); + commands.writeText(out, /* with_pure_metadata_commands = */ false); out << "\n"; out << "alter version: "; @@ -93,7 +93,7 @@ std::shared_ptr ReplicatedMergeTreeMutationEntry::backup() c } out << "commands: "; - commands.writeText(out); + commands.writeText(out, /* with_pure_metadata_commands = */ false); out << "\n"; return std::make_shared(out.str()); diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp index 93724e4946d..29528e9ff80 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp @@ -10,11 +10,6 @@ #include -namespace ProfileEvents -{ - extern const Event ReplicaPartialShutdown; -} - namespace CurrentMetrics { extern const Metric ReadonlyReplica; @@ -335,34 +330,11 @@ void ReplicatedMergeTreeRestartingThread::activateReplica() void ReplicatedMergeTreeRestartingThread::partialShutdown(bool part_of_full_shutdown) { setReadonly(part_of_full_shutdown); - ProfileEvents::increment(ProfileEvents::ReplicaPartialShutdown); - - storage.partial_shutdown_called = true; - storage.partial_shutdown_event.set(); - storage.replica_is_active_node = nullptr; - - LOG_TRACE(log, "Waiting for threads to finish"); - storage.merge_selecting_task->deactivate(); - storage.queue_updating_task->deactivate(); - storage.mutations_updating_task->deactivate(); - storage.mutations_finalizing_task->deactivate(); - - storage.cleanup_thread.stop(); - storage.part_check_thread.stop(); - - /// Stop queue processing - { - auto fetch_lock = storage.fetcher.blocker.cancel(); - auto merge_lock = storage.merger_mutator.merges_blocker.cancel(); - auto move_lock = storage.parts_mover.moves_blocker.cancel(); - storage.background_operations_assignee.finish(); - } - - LOG_TRACE(log, "Threads finished"); + storage.partialShutdown(); } -void ReplicatedMergeTreeRestartingThread::shutdown() +void ReplicatedMergeTreeRestartingThread::shutdown(bool part_of_full_shutdown) { /// Stop restarting_thread before stopping other tasks - so that it won't restart them again. need_stop = true; @@ -370,7 +342,7 @@ void ReplicatedMergeTreeRestartingThread::shutdown() LOG_TRACE(log, "Restarting thread finished"); /// Stop other tasks. - partialShutdown(/* part_of_full_shutdown */ true); + partialShutdown(part_of_full_shutdown); } void ReplicatedMergeTreeRestartingThread::setReadonly(bool on_shutdown) diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.h b/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.h index bb4b0c0fdd2..b5314de9dcc 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.h @@ -28,7 +28,7 @@ public: void wakeup() { task->schedule(); } - void shutdown(); + void shutdown(bool part_of_full_shutdown); private: StorageReplicatedMergeTree & storage; diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp index 2d8bf28e700..7bd5df2b1dc 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp @@ -203,7 +203,7 @@ ReplicatedMergeTreeSinkImpl::ReplicatedMergeTreeSinkImpl( , deduplicate(deduplicate_) , log(&Poco::Logger::get(storage.getLogName() + " (Replicated OutputStream)")) , context(context_) - , storage_snapshot(storage.getStorageSnapshot(metadata_snapshot, context)) + , storage_snapshot(storage.getStorageSnapshotWithoutParts(metadata_snapshot)) { /// The quorum value `1` has the same meaning as if it is disabled. if (required_quorum_size == 1) @@ -485,7 +485,8 @@ void ReplicatedMergeTreeSinkImpl::finishDelayedChunk(const ZooKeeperWithFa auto conflict_block_ids = commitPart(zookeeper, partition.temp_part.part, partition.block_id, delayed_chunk->replicas_num, false); if (conflict_block_ids.empty()) break; - LOG_DEBUG(log, "Found depulicate block IDs: {}, retry times {}", toString(conflict_block_ids), ++retry_times); + ++retry_times; + LOG_DEBUG(log, "Found duplicate block IDs: {}, retry times {}", toString(conflict_block_ids), retry_times); /// partition clean conflict rewriteBlock(log, partition, conflict_block_ids); if (partition.block_id.empty()) @@ -538,7 +539,7 @@ std::vector ReplicatedMergeTreeSinkImpl::commitPart( /// /// metadata_snapshot->check(part->getColumns()); - String temporary_part_relative_path = part->getDataPartStorage().getPartDirectory(); + const String temporary_part_relative_path = part->getDataPartStorage().getPartDirectory(); /// There is one case when we need to retry transaction in a loop. /// But don't do it too many times - just as defensive measure. @@ -819,6 +820,14 @@ std::vector ReplicatedMergeTreeSinkImpl::commitPart( part->name); } + auto rename_part_to_temporary = [&temporary_part_relative_path, &transaction, &part]() + { + transaction.rollbackPartsToTemporaryState(); + + part->is_temp = true; + part->renameTo(temporary_part_relative_path, false); + }; + try { ThreadFuzzer::maybeInjectSleep(); @@ -827,11 +836,7 @@ std::vector ReplicatedMergeTreeSinkImpl::commitPart( } catch (const Exception &) { - transaction.rollbackPartsToTemporaryState(); - - part->is_temp = true; - part->renameTo(temporary_part_relative_path, false); - + rename_part_to_temporary(); throw; } @@ -905,10 +910,7 @@ std::vector ReplicatedMergeTreeSinkImpl::commitPart( /// We will try to add this part again on the new iteration as it's just a new part. /// So remove it from storage parts set immediately and transfer state to temporary. - transaction.rollbackPartsToTemporaryState(); - - part->is_temp = true; - part->renameTo(temporary_part_relative_path, false); + rename_part_to_temporary(); if constexpr (async_insert) { @@ -930,8 +932,20 @@ std::vector ReplicatedMergeTreeSinkImpl::commitPart( } else if (multi_code == Coordination::Error::ZNODEEXISTS && failed_op_path == quorum_info.status_path) { - storage.unlockSharedData(*part, zookeeper); - transaction.rollback(); + try + { + storage.unlockSharedData(*part, zookeeper); + } + catch (const zkutil::KeeperException & e) + { + /// suppress this exception since need to rename part to temporary next + LOG_DEBUG(log, "Unlocking shared data failed during error handling: code={} message={}", e.code, e.message()); + } + + /// Part was not committed to keeper + /// So make it temporary to avoid its resurrection on restart + rename_part_to_temporary(); + throw Exception("Another quorum insert has been already started", ErrorCodes::UNSATISFIED_QUORUM_FOR_PREVIOUS_WRITE); } else diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.h b/src/Storages/MergeTree/ReplicatedMergeTreeSink.h index 57fd6035471..1199df95b67 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.h @@ -4,7 +4,7 @@ #include #include #include -#include +#include namespace Poco { class Logger; } diff --git a/src/Storages/MergeTree/ReplicatedTableStatus.h b/src/Storages/MergeTree/ReplicatedTableStatus.h new file mode 100644 index 00000000000..b9f84091e9b --- /dev/null +++ b/src/Storages/MergeTree/ReplicatedTableStatus.h @@ -0,0 +1,35 @@ +#pragma once + +#include +#include + +namespace DB +{ + +/** For the system table replicas. */ +struct ReplicatedTableStatus +{ + bool is_leader; + bool can_become_leader; + bool is_readonly; + bool is_session_expired; + + ReplicatedMergeTreeQueue::Status queue; + UInt32 parts_to_check; + String zookeeper_path; + String replica_name; + String replica_path; + Int32 columns_version; + UInt64 log_max_index; + UInt64 log_pointer; + UInt64 absolute_delay; + UInt8 total_replicas; + UInt8 active_replicas; + String last_queue_update_exception; + /// If the error has happened fetching the info from ZooKeeper, this field will be set. + String zookeeper_exception; + + std::unordered_map replica_is_active; +}; + +} diff --git a/src/Storages/MergeTree/registerStorageMergeTree.cpp b/src/Storages/MergeTree/registerStorageMergeTree.cpp index ae2abaf8ea5..620591abbf3 100644 --- a/src/Storages/MergeTree/registerStorageMergeTree.cpp +++ b/src/Storages/MergeTree/registerStorageMergeTree.cpp @@ -314,76 +314,17 @@ static StoragePtr create(const StorageFactory::Arguments & args) /// For Replicated. String zookeeper_path; String replica_name; - StorageReplicatedMergeTree::RenamingRestrictions renaming_restrictions = StorageReplicatedMergeTree::RenamingRestrictions::ALLOW_ANY; + RenamingRestrictions renaming_restrictions = RenamingRestrictions::ALLOW_ANY; bool is_on_cluster = args.getLocalContext()->getClientInfo().query_kind == ClientInfo::QueryKind::SECONDARY_QUERY; bool is_replicated_database = args.getLocalContext()->getClientInfo().query_kind == ClientInfo::QueryKind::SECONDARY_QUERY && DatabaseCatalog::instance().getDatabase(args.table_id.database_name)->getEngineName() == "Replicated"; - if (replicated) + /// Allow implicit {uuid} macros only for zookeeper_path in ON CLUSTER queries + bool allow_uuid_macro = is_on_cluster || is_replicated_database || args.query.attach; + + auto expand_macro = [&] (ASTLiteral * ast_zk_path, ASTLiteral * ast_replica_name) { - bool has_arguments = arg_num + 2 <= arg_cnt; - bool has_valid_arguments = has_arguments && engine_args[arg_num]->as() && engine_args[arg_num + 1]->as(); - - ASTLiteral * ast_zk_path; - ASTLiteral * ast_replica_name; - - if (has_valid_arguments) - { - /// Get path and name from engine arguments - ast_zk_path = engine_args[arg_num]->as(); - if (ast_zk_path && ast_zk_path->value.getType() == Field::Types::String) - zookeeper_path = ast_zk_path->value.safeGet(); - else - throw Exception( - "Path in ZooKeeper must be a string literal" + getMergeTreeVerboseHelp(is_extended_storage_def), - ErrorCodes::BAD_ARGUMENTS); - ++arg_num; - - ast_replica_name = engine_args[arg_num]->as(); - if (ast_replica_name && ast_replica_name->value.getType() == Field::Types::String) - replica_name = ast_replica_name->value.safeGet(); - else - throw Exception( - "Replica name must be a string literal" + getMergeTreeVerboseHelp(is_extended_storage_def), ErrorCodes::BAD_ARGUMENTS); - - if (replica_name.empty()) - throw Exception( - "No replica name in config" + getMergeTreeVerboseHelp(is_extended_storage_def), ErrorCodes::NO_REPLICA_NAME_GIVEN); - ++arg_num; - } - else if (is_extended_storage_def - && (arg_cnt == 0 - || !engine_args[arg_num]->as() - || (arg_cnt == 1 && merging_params.mode == MergeTreeData::MergingParams::Graphite))) - { - /// Try use default values if arguments are not specified. - /// Note: {uuid} macro works for ON CLUSTER queries when database engine is Atomic. - const auto & config = args.getContext()->getConfigRef(); - zookeeper_path = StorageReplicatedMergeTree::getDefaultZooKeeperPath(config); - /// TODO maybe use hostname if {replica} is not defined? - replica_name = StorageReplicatedMergeTree::getDefaultReplicaName(config); - - /// Modify query, so default values will be written to metadata - assert(arg_num == 0); - ASTs old_args; - std::swap(engine_args, old_args); - auto path_arg = std::make_shared(zookeeper_path); - auto name_arg = std::make_shared(replica_name); - ast_zk_path = path_arg.get(); - ast_replica_name = name_arg.get(); - engine_args.emplace_back(std::move(path_arg)); - engine_args.emplace_back(std::move(name_arg)); - std::move(std::begin(old_args), std::end(old_args), std::back_inserter(engine_args)); - arg_num = 2; - arg_cnt += 2; - } - else - throw Exception("Expected two string literal arguments: zookeeper_path and replica_name", ErrorCodes::BAD_ARGUMENTS); - - /// Allow implicit {uuid} macros only for zookeeper_path in ON CLUSTER queries - bool allow_uuid_macro = is_on_cluster || is_replicated_database || args.query.attach; - /// Unfold {database} and {table} macro on table creation, so table can be renamed. if (!args.attach) { @@ -427,9 +368,76 @@ static StoragePtr create(const StorageFactory::Arguments & args) /// or if one of these macros is recursively expanded from some other macro. /// Also do not allow to move table from Atomic to Ordinary database if there's {uuid} macro if (info.expanded_database || info.expanded_table) - renaming_restrictions = StorageReplicatedMergeTree::RenamingRestrictions::DO_NOT_ALLOW; + renaming_restrictions = RenamingRestrictions::DO_NOT_ALLOW; else if (info.expanded_uuid) - renaming_restrictions = StorageReplicatedMergeTree::RenamingRestrictions::ALLOW_PRESERVING_UUID; + renaming_restrictions = RenamingRestrictions::ALLOW_PRESERVING_UUID; + }; + + if (replicated) + { + bool has_arguments = arg_num + 2 <= arg_cnt; + bool has_valid_arguments = has_arguments && engine_args[arg_num]->as() && engine_args[arg_num + 1]->as(); + + ASTLiteral * ast_zk_path; + ASTLiteral * ast_replica_name; + + if (has_valid_arguments) + { + /// Get path and name from engine arguments + ast_zk_path = engine_args[arg_num]->as(); + if (ast_zk_path && ast_zk_path->value.getType() == Field::Types::String) + zookeeper_path = ast_zk_path->value.safeGet(); + else + throw Exception( + "Path in ZooKeeper must be a string literal" + getMergeTreeVerboseHelp(is_extended_storage_def), + ErrorCodes::BAD_ARGUMENTS); + ++arg_num; + + ast_replica_name = engine_args[arg_num]->as(); + if (ast_replica_name && ast_replica_name->value.getType() == Field::Types::String) + replica_name = ast_replica_name->value.safeGet(); + else + throw Exception( + "Replica name must be a string literal" + getMergeTreeVerboseHelp(is_extended_storage_def), ErrorCodes::BAD_ARGUMENTS); + + if (replica_name.empty()) + throw Exception( + "No replica name in config" + getMergeTreeVerboseHelp(is_extended_storage_def), ErrorCodes::NO_REPLICA_NAME_GIVEN); + ++arg_num; + + expand_macro(ast_zk_path, ast_replica_name); + } + else if (is_extended_storage_def + && (arg_cnt == 0 + || !engine_args[arg_num]->as() + || (arg_cnt == 1 && merging_params.mode == MergeTreeData::MergingParams::Graphite))) + { + /// Try use default values if arguments are not specified. + /// Note: {uuid} macro works for ON CLUSTER queries when database engine is Atomic. + const auto & config = args.getContext()->getConfigRef(); + zookeeper_path = StorageReplicatedMergeTree::getDefaultZooKeeperPath(config); + /// TODO maybe use hostname if {replica} is not defined? + replica_name = StorageReplicatedMergeTree::getDefaultReplicaName(config); + + /// Modify query, so default values will be written to metadata + assert(arg_num == 0); + ASTs old_args; + std::swap(engine_args, old_args); + auto path_arg = std::make_shared(zookeeper_path); + auto name_arg = std::make_shared(replica_name); + ast_zk_path = path_arg.get(); + ast_replica_name = name_arg.get(); + + expand_macro(ast_zk_path, ast_replica_name); + + engine_args.emplace_back(std::move(path_arg)); + engine_args.emplace_back(std::move(name_arg)); + std::move(std::begin(old_args), std::end(old_args), std::back_inserter(engine_args)); + arg_num = 2; + arg_cnt += 2; + } + else + throw Exception("Expected two string literal arguments: zookeeper_path and replica_name", ErrorCodes::BAD_ARGUMENTS); } /// This merging param maybe used as part of sorting key @@ -468,7 +476,7 @@ static StoragePtr create(const StorageFactory::Arguments & args) { String graphite_config_name; String error_msg - = "Last parameter of GraphiteMergeTree must be name (in single quotes) of element in configuration file with Graphite options"; + = "Last parameter of GraphiteMergeTree must be the name (in single quotes) of the element in configuration file with the Graphite options"; error_msg += getMergeTreeVerboseHelp(is_extended_storage_def); if (const auto * ast = engine_args[arg_cnt - 1]->as()) diff --git a/src/Storages/MutationCommands.cpp b/src/Storages/MutationCommands.cpp index 28dfe488869..ffc2cfc3086 100644 --- a/src/Storages/MutationCommands.cpp +++ b/src/Storages/MutationCommands.cpp @@ -144,23 +144,32 @@ std::optional MutationCommand::parse(ASTAlterCommand * command, res.partition = command->partition; return res; } - return {}; + else + { + MutationCommand res; + res.ast = command->ptr(); + res.type = ALTER_WITHOUT_MUTATION; + return res; + } } -std::shared_ptr MutationCommands::ast() const +std::shared_ptr MutationCommands::ast(bool with_pure_metadata_commands) const { auto res = std::make_shared(); for (const MutationCommand & command : *this) - res->children.push_back(command.ast->clone()); + { + if (command.type != MutationCommand::ALTER_WITHOUT_MUTATION || with_pure_metadata_commands) + res->children.push_back(command.ast->clone()); + } return res; } -void MutationCommands::writeText(WriteBuffer & out) const +void MutationCommands::writeText(WriteBuffer & out, bool with_pure_metadata_commands) const { WriteBufferFromOwnString commands_buf; - formatAST(*ast(), commands_buf, /* hilite = */ false, /* one_line = */ true); + formatAST(*ast(with_pure_metadata_commands), commands_buf, /* hilite = */ false, /* one_line = */ true); writeEscapedString(commands_buf.str(), out); } @@ -169,9 +178,11 @@ void MutationCommands::readText(ReadBuffer & in) String commands_str; readEscapedString(commands_str, in); + ParserAlterCommandList p_alter_commands; auto commands_ast = parseQuery( p_alter_commands, commands_str.data(), commands_str.data() + commands_str.length(), "mutation commands list", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH); + for (const auto & child : commands_ast->children) { auto * command_ast = child->as(); @@ -182,4 +193,22 @@ void MutationCommands::readText(ReadBuffer & in) } } +std::string MutationCommands::toString() const +{ + WriteBufferFromOwnString commands_buf; + formatAST(*ast(), commands_buf, /* hilite = */ false, /* one_line = */ true); + return commands_buf.str(); +} + + +bool MutationCommands::hasNonEmptyMutationCommands() const +{ + for (const auto & command : *this) + { + if (command.type != MutationCommand::Type::EMPTY && command.type != MutationCommand::Type::ALTER_WITHOUT_MUTATION) + return true; + } + return false; +} + } diff --git a/src/Storages/MutationCommands.h b/src/Storages/MutationCommands.h index 3f8af2b4de5..aca91c16e85 100644 --- a/src/Storages/MutationCommands.h +++ b/src/Storages/MutationCommands.h @@ -37,6 +37,7 @@ struct MutationCommand MATERIALIZE_TTL, RENAME_COLUMN, MATERIALIZE_COLUMN, + ALTER_WITHOUT_MUTATION, /// pure metadata command, currently unusned }; Type type = EMPTY; @@ -72,10 +73,12 @@ struct MutationCommand class MutationCommands : public std::vector { public: - std::shared_ptr ast() const; + std::shared_ptr ast(bool with_pure_metadata_commands = false) const; - void writeText(WriteBuffer & out) const; + void writeText(WriteBuffer & out, bool with_pure_metadata_commands) const; void readText(ReadBuffer & in); + std::string toString() const; + bool hasNonEmptyMutationCommands() const; }; using MutationCommandsConstPtr = std::shared_ptr; diff --git a/src/Storages/NamedCollections/NamedCollectionsHelpers.cpp b/src/Storages/NamedCollections/NamedCollectionsHelpers.cpp index cceabdfd7bf..9a7e5fef7d6 100644 --- a/src/Storages/NamedCollections/NamedCollectionsHelpers.cpp +++ b/src/Storages/NamedCollections/NamedCollectionsHelpers.cpp @@ -25,7 +25,7 @@ namespace return nullptr; const auto & collection_name = identifier->name(); - return NamedCollectionFactory::instance().tryGet(collection_name); + return NamedCollectionFactory::instance().get(collection_name); } std::optional> getKeyValueFromAST(ASTPtr ast) diff --git a/src/Storages/PartitionedSink.cpp b/src/Storages/PartitionedSink.cpp index 027e4f1f306..363b4557290 100644 --- a/src/Storages/PartitionedSink.cpp +++ b/src/Storages/PartitionedSink.cpp @@ -33,7 +33,7 @@ PartitionedSink::PartitionedSink( , context(context_) , sample_block(sample_block_) { - std::vector arguments(1, partition_by); + ASTs arguments(1, partition_by); ASTPtr partition_by_string = makeASTFunction(FunctionToString::name, std::move(arguments)); auto syntax_result = TreeRewriter(context).analyze(partition_by_string, sample_block.getNamesAndTypesList()); diff --git a/src/Storages/RenamingRestrictions.h b/src/Storages/RenamingRestrictions.h new file mode 100644 index 00000000000..1b53ed0358a --- /dev/null +++ b/src/Storages/RenamingRestrictions.h @@ -0,0 +1,13 @@ +#pragma once + +namespace DB +{ + +enum RenamingRestrictions +{ + ALLOW_ANY, + ALLOW_PRESERVING_UUID, + DO_NOT_ALLOW, +}; + +} diff --git a/src/Storages/RocksDB/StorageEmbeddedRocksDB.h b/src/Storages/RocksDB/StorageEmbeddedRocksDB.h index ca0ab7a1840..eec817acd55 100644 --- a/src/Storages/RocksDB/StorageEmbeddedRocksDB.h +++ b/src/Storages/RocksDB/StorageEmbeddedRocksDB.h @@ -80,6 +80,8 @@ public: const std::vector & keys, PaddedPODArray * out_null_map) const; + bool supportsDelete() const override { return true; } + private: const String primary_key; using RocksDBPtr = std::unique_ptr; diff --git a/src/Storages/SelectQueryInfo.h b/src/Storages/SelectQueryInfo.h index bad2539ef07..c93531973b8 100644 --- a/src/Storages/SelectQueryInfo.h +++ b/src/Storages/SelectQueryInfo.h @@ -171,7 +171,6 @@ struct ProjectionCandidate */ struct SelectQueryInfo { - SelectQueryInfo() : prepared_sets(std::make_shared()) {} diff --git a/src/Storages/StorageExecutable.cpp b/src/Storages/StorageExecutable.cpp index cd3cc4d48ac..9fabf1a9fb6 100644 --- a/src/Storages/StorageExecutable.cpp +++ b/src/Storages/StorageExecutable.cpp @@ -30,6 +30,7 @@ namespace DB namespace ErrorCodes { + extern const int BAD_ARGUMENTS; extern const int UNSUPPORTED_METHOD; extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; } @@ -192,10 +193,15 @@ void registerStorageExecutable(StorageFactory & factory) std::vector input_queries; for (size_t i = 2; i < args.engine_args.size(); ++i) { + if (args.engine_args[i]->children.empty()) + throw Exception( + ErrorCodes::BAD_ARGUMENTS, "StorageExecutable argument \"{}\" is invalid query", + args.engine_args[i]->formatForErrorMessage()); + ASTPtr query = args.engine_args[i]->children.at(0); if (!query->as()) throw Exception( - ErrorCodes::UNSUPPORTED_METHOD, "StorageExecutable argument is invalid input query {}", + ErrorCodes::UNSUPPORTED_METHOD, "StorageExecutable argument \"{}\" is invalid input query", query->formatForErrorMessage()); input_queries.emplace_back(std::move(query)); diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index 5eb30f404c1..a57b4afda7d 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -91,7 +91,6 @@ StorageMergeTree::StorageMergeTree( bool has_force_restore_data_flag) : MergeTreeData( table_id_, - relative_data_path_, metadata_, context_, date_column_name, @@ -101,8 +100,10 @@ StorageMergeTree::StorageMergeTree( attach) , reader(*this) , writer(*this) - , merger_mutator(*this, getContext()->getMergeMutateExecutor()->getMaxTasksCount()) + , merger_mutator(*this) { + initializeDirectoriesAndFormatVersion(relative_data_path_, attach, date_column_name); + loadDataParts(has_force_restore_data_flag); if (!attach && !getDataPartsForInternalUsage().empty()) @@ -1105,7 +1106,7 @@ bool StorageMergeTree::scheduleDataProcessingJob(BackgroundJobsAssignee & assign auto metadata_snapshot = getInMemoryMetadataPtr(); MergeMutateSelectedEntryPtr merge_entry, mutate_entry; - auto share_lock = lockForShare(RWLockImpl::NO_QUERY, getSettings()->lock_acquire_timeout_for_background_operations); + auto shared_lock = lockForShare(RWLockImpl::NO_QUERY, getSettings()->lock_acquire_timeout_for_background_operations); MergeTreeTransactionHolder transaction_for_merge; MergeTreeTransactionPtr txn; @@ -1122,17 +1123,17 @@ bool StorageMergeTree::scheduleDataProcessingJob(BackgroundJobsAssignee & assign if (merger_mutator.merges_blocker.isCancelled()) return false; - merge_entry = selectPartsToMerge(metadata_snapshot, false, {}, false, nullptr, share_lock, lock, txn); + merge_entry = selectPartsToMerge(metadata_snapshot, false, {}, false, nullptr, shared_lock, lock, txn); if (!merge_entry && !current_mutations_by_version.empty()) - mutate_entry = selectPartsToMutate(metadata_snapshot, nullptr, share_lock, lock); + mutate_entry = selectPartsToMutate(metadata_snapshot, nullptr, shared_lock, lock); has_mutations = !current_mutations_by_version.empty(); } if (merge_entry) { - auto task = std::make_shared(*this, metadata_snapshot, false, Names{}, merge_entry, share_lock, common_assignee_trigger); + auto task = std::make_shared(*this, metadata_snapshot, false, Names{}, merge_entry, shared_lock, common_assignee_trigger); task->setCurrentTransaction(std::move(transaction_for_merge), std::move(txn)); bool scheduled = assignee.scheduleMergeMutateTask(task); /// The problem that we already booked a slot for TTL merge, but a merge list entry will be created only in a prepare method @@ -1143,7 +1144,7 @@ bool StorageMergeTree::scheduleDataProcessingJob(BackgroundJobsAssignee & assign } if (mutate_entry) { - auto task = std::make_shared(*this, metadata_snapshot, mutate_entry, share_lock, common_assignee_trigger); + auto task = std::make_shared(*this, metadata_snapshot, mutate_entry, shared_lock, common_assignee_trigger); assignee.scheduleMergeMutateTask(task); return true; } @@ -1160,7 +1161,7 @@ bool StorageMergeTree::scheduleDataProcessingJob(BackgroundJobsAssignee & assign getSettings()->merge_tree_clear_old_temporary_directories_interval_seconds)) { assignee.scheduleCommonTask(std::make_shared( - [this, share_lock] () + [this, shared_lock] () { return clearOldTemporaryDirectories(getSettings()->temporary_directories_lifetime.totalSeconds()); }, common_assignee_trigger, getStorageID()), /* need_trigger */ false); @@ -1171,7 +1172,7 @@ bool StorageMergeTree::scheduleDataProcessingJob(BackgroundJobsAssignee & assign getSettings()->merge_tree_clear_old_parts_interval_seconds)) { assignee.scheduleCommonTask(std::make_shared( - [this, share_lock] () + [this, shared_lock] () { /// All use relative_data_path which changes during rename /// so execute under share lock. @@ -1371,7 +1372,7 @@ MergeTreeDataPartPtr StorageMergeTree::outdatePart(MergeTreeTransaction * txn, c /// Forcefully stop merges and make part outdated auto merge_blocker = stopMergesAndWait(); auto parts_lock = lockParts(); - auto part = getPartIfExists(part_name, {MergeTreeDataPartState::Active}, &parts_lock); + auto part = getPartIfExistsUnlocked(part_name, {MergeTreeDataPartState::Active}, parts_lock); if (!part) throw Exception(ErrorCodes::NO_SUCH_DATA_PART, "Part {} not found, won't try to drop it.", part_name); @@ -1384,7 +1385,7 @@ MergeTreeDataPartPtr StorageMergeTree::outdatePart(MergeTreeTransaction * txn, c std::unique_lock lock(currently_processing_in_background_mutex); auto parts_lock = lockParts(); - auto part = getPartIfExists(part_name, {MergeTreeDataPartState::Active}, &parts_lock); + auto part = getPartIfExistsUnlocked(part_name, {MergeTreeDataPartState::Active}, parts_lock); /// It's okay, part was already removed if (!part) return nullptr; diff --git a/src/Storages/StorageMongoDB.cpp b/src/Storages/StorageMongoDB.cpp index 3ae9c974770..92d4c6c0686 100644 --- a/src/Storages/StorageMongoDB.cpp +++ b/src/Storages/StorageMongoDB.cpp @@ -72,16 +72,14 @@ void StorageMongoDB::connectIfNotConnected() auto auth_db = database_name; if (auth_source != query_params.end()) auth_db = auth_source->second; -#if POCO_VERSION >= 0x01070800 + if (!username.empty() && !password.empty()) { Poco::MongoDB::Database poco_db(auth_db); if (!poco_db.authenticate(*connection, username, password, Poco::MongoDB::Database::AUTH_SCRAM_SHA1)) throw Exception("Cannot authenticate in MongoDB, incorrect user or password", ErrorCodes::MONGODB_CANNOT_AUTHENTICATE); } -#else - authenticate(*connection, database_name, username, password); -#endif + authenticated = true; } } @@ -213,7 +211,6 @@ StorageMongoDBConfiguration StorageMongoDB::getConfiguration(ASTs engine_args, C if (engine_args.size() >= 6) configuration.options = checkAndGetLiteralArgument(engine_args[5], "database"); - } context->getRemoteHostFilter().checkHostAndPort(configuration.host, toString(configuration.port)); diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 99ceb1d90ae..7056e6a6952 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -113,6 +113,7 @@ namespace ProfileEvents extern const Event NotCreatedLogEntryForMerge; extern const Event CreatedLogEntryForMutation; extern const Event NotCreatedLogEntryForMutation; + extern const Event ReplicaPartialShutdown; } namespace CurrentMetrics @@ -258,7 +259,6 @@ StorageReplicatedMergeTree::StorageReplicatedMergeTree( bool has_force_restore_data_flag, RenamingRestrictions renaming_restrictions_) : MergeTreeData(table_id_, - relative_data_path_, metadata_, context_, date_column_name, @@ -273,7 +273,7 @@ StorageReplicatedMergeTree::StorageReplicatedMergeTree( , replica_path(fs::path(zookeeper_path) / "replicas" / replica_name_) , reader(*this) , writer(*this) - , merger_mutator(*this, getContext()->getMergeMutateExecutor()->getMaxTasksCount()) + , merger_mutator(*this) , merge_strategy_picker(*this) , queue(*this, merge_strategy_picker) , fetcher(*this) @@ -286,6 +286,7 @@ StorageReplicatedMergeTree::StorageReplicatedMergeTree( , replicated_fetches_throttler(std::make_shared(getSettings()->max_replicated_fetches_network_bandwidth, getContext()->getReplicatedFetchesThrottler())) , replicated_sends_throttler(std::make_shared(getSettings()->max_replicated_sends_network_bandwidth, getContext()->getReplicatedSendsThrottler())) { + initializeDirectoriesAndFormatVersion(relative_data_path_, attach, date_column_name); /// We create and deactivate all tasks for consistency. /// They all will be scheduled and activated by the restarting thread. queue_updating_task = getContext()->getSchedulePool().createTask( @@ -4246,10 +4247,10 @@ void StorageReplicatedMergeTree::startup() return; } - startupImpl(); + startupImpl(/* from_attach_thread */ false); } -void StorageReplicatedMergeTree::startupImpl() +void StorageReplicatedMergeTree::startupImpl(bool from_attach_thread) { /// Do not start replication if ZooKeeper is not configured or there is no metadata in zookeeper if (!has_metadata_in_zookeeper.has_value() || !*has_metadata_in_zookeeper) @@ -4291,7 +4292,16 @@ void StorageReplicatedMergeTree::startupImpl() /// It means that failed "startup" must not create any background tasks that we will have to wait. try { - shutdown(); + /// it's important to avoid full shutdown here, because it even tries to shutdown attach thread which was + /// designed exactly for this: try to start table if no zookeeper connection available. + if (from_attach_thread) + { + restarting_thread.shutdown(/* part_of_full_shutdown */false); + } + else + { + shutdown(); + } } catch (...) { @@ -4311,6 +4321,35 @@ void StorageReplicatedMergeTree::flush() flushAllInMemoryPartsIfNeeded(); } + +void StorageReplicatedMergeTree::partialShutdown() +{ + ProfileEvents::increment(ProfileEvents::ReplicaPartialShutdown); + + partial_shutdown_called = true; + partial_shutdown_event.set(); + replica_is_active_node = nullptr; + + LOG_TRACE(log, "Waiting for threads to finish"); + merge_selecting_task->deactivate(); + queue_updating_task->deactivate(); + mutations_updating_task->deactivate(); + mutations_finalizing_task->deactivate(); + + cleanup_thread.stop(); + part_check_thread.stop(); + + /// Stop queue processing + { + auto fetch_lock = fetcher.blocker.cancel(); + auto merge_lock = merger_mutator.merges_blocker.cancel(); + auto move_lock = parts_mover.moves_blocker.cancel(); + background_operations_assignee.finish(); + } + + LOG_TRACE(log, "Threads finished"); +} + void StorageReplicatedMergeTree::shutdown() { if (shutdown_called.exchange(true)) @@ -4327,7 +4366,8 @@ void StorageReplicatedMergeTree::shutdown() if (attach_thread) attach_thread->shutdown(); - restarting_thread.shutdown(); + + restarting_thread.shutdown(/* part_of_full_shutdown */true); background_operations_assignee.finish(); part_moves_between_shards_orchestrator.shutdown(); @@ -5051,8 +5091,9 @@ String getPartNamePossiblyFake(MergeTreeDataFormatVersion format_version, const return part_info.getPartName(); } -bool StorageReplicatedMergeTree::getFakePartCoveringAllPartsInPartition(const String & partition_id, MergeTreePartInfo & part_info, - std::optional & delimiting_block_lock, bool for_replace_range) +bool StorageReplicatedMergeTree::getFakePartCoveringAllPartsInPartition( + const String & partition_id, MergeTreePartInfo & part_info, + std::optional & delimiting_block_lock, bool for_replace_range) { /// Even if there is no data in the partition, you still need to mark the range for deletion. /// - Because before executing DETACH, tasks for downloading parts to this partition can be executed. @@ -5160,7 +5201,7 @@ void StorageReplicatedMergeTree::restoreMetadataInZooKeeper() LOG_INFO(log, "Attached all partitions, starting table"); - startupImpl(); + startupImpl(/* from_attach_thread */ false); } void StorageReplicatedMergeTree::dropPartNoWaitNoThrow(const String & part_name) @@ -5659,7 +5700,7 @@ bool StorageReplicatedMergeTree::tryWaitForReplicaToProcessLogEntry( } -void StorageReplicatedMergeTree::getStatus(Status & res, bool with_zk_fields) +void StorageReplicatedMergeTree::getStatus(ReplicatedTableStatus & res, bool with_zk_fields) { auto zookeeper = tryGetZooKeeper(); const auto storage_settings_ptr = getSettings(); @@ -8745,7 +8786,7 @@ void StorageReplicatedMergeTree::restoreDataFromBackup(RestorerFromBackup & rest { /// New parts could be in the replication queue but not fetched yet. /// In that case we consider the table as not empty. - StorageReplicatedMergeTree::Status status; + ReplicatedTableStatus status; getStatus(status, /* with_zk_fields = */ false); if (status.queue.inserts_in_queue) empty = false; diff --git a/src/Storages/StorageReplicatedMergeTree.h b/src/Storages/StorageReplicatedMergeTree.h index 67e79378b93..c5e95ab7b39 100644 --- a/src/Storages/StorageReplicatedMergeTree.h +++ b/src/Storages/StorageReplicatedMergeTree.h @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include @@ -17,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -89,13 +91,6 @@ using ZooKeeperWithFaultInjectionPtr = std::shared_ptr replica_is_active; - }; - /// Get the status of the table. If with_zk_fields = false - do not fill in the fields that require queries to ZK. - void getStatus(Status & res, bool with_zk_fields = true); + void getStatus(ReplicatedTableStatus & res, bool with_zk_fields = true); using LogEntriesData = std::vector; void getQueue(LogEntriesData & res, String & replica_name); @@ -879,7 +851,6 @@ private: // Create table id if needed void createTableSharedID() const; - bool checkZeroCopyLockExists(const String & part_name, const DiskPtr & disk); std::optional getZeroCopyPartPath(const String & part_name, const DiskPtr & disk); @@ -888,7 +859,7 @@ private: /// If somebody already holding the lock -- return std::nullopt. std::optional tryCreateZeroCopyExclusiveLock(const String & part_name, const DiskPtr & disk) override; - void startupImpl(); + void startupImpl(bool from_attach_thread); }; String getPartNamePossiblyFake(MergeTreeDataFormatVersion format_version, const MergeTreePartInfo & part_info); diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp index 975ce114e83..19fad9eb307 100644 --- a/src/Storages/StorageS3.cpp +++ b/src/Storages/StorageS3.cpp @@ -529,7 +529,8 @@ StorageS3Source::StorageS3Source( const String & bucket_, const String & version_id_, std::shared_ptr file_iterator_, - const size_t download_thread_num_) + const size_t download_thread_num_, + bool only_need_virtual_columns_) : ISource(getHeader(sample_block_, requested_virtual_columns_)) , WithContext(context_) , name(std::move(name_)) @@ -543,12 +544,17 @@ StorageS3Source::StorageS3Source( , client(client_) , sample_block(sample_block_) , format_settings(format_settings_) + , only_need_virtual_columns(only_need_virtual_columns_) , requested_virtual_columns(requested_virtual_columns_) , file_iterator(file_iterator_) , download_thread_num(download_thread_num_) , create_reader_pool(1) , create_reader_scheduler(threadPoolCallbackRunner(create_reader_pool, "CreateS3Reader")) { + /// If user only need virtual columns, StorageS3Source does not use ReaderHolder and does not initialize ReadBufferFromS3. + if (only_need_virtual_columns) + return; + reader = createReader(); if (reader) reader_future = createReaderAsync(); @@ -683,6 +689,35 @@ String StorageS3Source::getName() const Chunk StorageS3Source::generate() { + auto add_virtual_columns = [&](Chunk & chunk, const String & file_path, UInt64 num_rows) + { + for (const auto & virtual_column : requested_virtual_columns) + { + if (virtual_column.name == "_path") + { + chunk.addColumn(virtual_column.type->createColumnConst(num_rows, file_path)->convertToFullColumnIfConst()); + } + else if (virtual_column.name == "_file") + { + size_t last_slash_pos = file_path.find_last_of('/'); + auto column = virtual_column.type->createColumnConst(num_rows, file_path.substr(last_slash_pos + 1)); + chunk.addColumn(column->convertToFullColumnIfConst()); + } + } + }; + + if (only_need_virtual_columns) + { + Chunk chunk; + auto current_key = (*file_iterator)().key; + if (!current_key.empty()) + { + const auto & file_path = fs::path(bucket) / current_key; + add_virtual_columns(chunk, file_path, 1); + } + return chunk; + } + while (true) { if (!reader || isCancelled()) @@ -701,20 +736,7 @@ Chunk StorageS3Source::generate() *this, chunk, total_size, total_rows_approx_accumulated, total_rows_count_times, total_rows_approx_max); } - for (const auto & virtual_column : requested_virtual_columns) - { - if (virtual_column.name == "_path") - { - chunk.addColumn(virtual_column.type->createColumnConst(num_rows, file_path)->convertToFullColumnIfConst()); - } - else if (virtual_column.name == "_file") - { - size_t last_slash_pos = file_path.find_last_of('/'); - auto column = virtual_column.type->createColumnConst(num_rows, file_path.substr(last_slash_pos + 1)); - chunk.addColumn(column->convertToFullColumnIfConst()); - } - } - + add_virtual_columns(chunk, file_path, num_rows); return chunk; } @@ -1035,6 +1057,10 @@ Pipe StorageS3::read( requested_virtual_columns.push_back(virtual_column); } + bool only_need_virtual_columns = true; + if (column_names_set.size() > requested_virtual_columns.size()) + only_need_virtual_columns = false; + std::shared_ptr iterator_wrapper = createFileIterator( s3_configuration, keys, @@ -1047,25 +1073,28 @@ Pipe StorageS3::read( ColumnsDescription columns_description; Block block_for_format; - if (supportsSubsetOfColumns()) + if (!only_need_virtual_columns) { - auto fetch_columns = column_names; - const auto & virtuals = getVirtuals(); - std::erase_if( - fetch_columns, - [&](const String & col) - { return std::any_of(virtuals.begin(), virtuals.end(), [&](const NameAndTypePair & virtual_col){ return col == virtual_col.name; }); }); + if (supportsSubsetOfColumns()) + { + auto fetch_columns = column_names; + const auto & virtuals = getVirtuals(); + std::erase_if( + fetch_columns, + [&](const String & col) + { return std::any_of(virtuals.begin(), virtuals.end(), [&](const NameAndTypePair & virtual_col){ return col == virtual_col.name; }); }); - if (fetch_columns.empty()) - fetch_columns.push_back(ExpressionActions::getSmallestColumn(storage_snapshot->metadata->getColumns().getAllPhysical())); + if (fetch_columns.empty()) + fetch_columns.push_back(ExpressionActions::getSmallestColumn(storage_snapshot->metadata->getColumns().getAllPhysical())); - columns_description = storage_snapshot->getDescriptionForColumns(fetch_columns); - block_for_format = storage_snapshot->getSampleBlockForColumns(columns_description.getNamesOfPhysical()); - } - else - { - columns_description = storage_snapshot->metadata->getColumns(); - block_for_format = storage_snapshot->metadata->getSampleBlock(); + columns_description = storage_snapshot->getDescriptionForColumns(fetch_columns); + block_for_format = storage_snapshot->getSampleBlockForColumns(columns_description.getNamesOfPhysical()); + } + else + { + columns_description = storage_snapshot->metadata->getColumns(); + block_for_format = storage_snapshot->metadata->getSampleBlock(); + } } const size_t max_download_threads = local_context->getSettingsRef().max_download_threads; @@ -1086,7 +1115,8 @@ Pipe StorageS3::read( s3_configuration.uri.bucket, s3_configuration.uri.version_id, iterator_wrapper, - max_download_threads)); + max_download_threads, + only_need_virtual_columns)); } auto pipe = Pipe::unitePipes(std::move(pipes)); @@ -1196,10 +1226,8 @@ void StorageS3::truncate(const ASTPtr & /* query */, const StorageMetadataPtr &, void StorageS3::updateS3Configuration(ContextPtr ctx, StorageS3::S3Configuration & upd) { auto settings = ctx->getStorageS3Settings().getSettings(upd.uri.uri.toString()); - if (upd.request_settings != settings.request_settings) - upd.request_settings = settings.request_settings; - - upd.request_settings.updateFromSettingsIfEmpty(ctx->getSettings()); + upd.request_settings = settings.request_settings; + upd.request_settings.updateFromSettings(ctx->getSettings()); if (upd.client) { @@ -1245,6 +1273,8 @@ void StorageS3::processNamedCollectionResult(StorageS3Configuration & configurat validateNamedCollection(collection, required_configuration_keys, optional_configuration_keys); std::string filename; + configuration.request_settings = S3Settings::RequestSettings(collection); + for (const auto & key : collection) { if (key == "url") @@ -1263,25 +1293,6 @@ void StorageS3::processNamedCollectionResult(StorageS3Configuration & configurat configuration.structure = collection.get(key); else if (key == "use_environment_credentials") configuration.auth_settings.use_environment_credentials = collection.get(key); - else if (key == "max_single_read_retries") - configuration.request_settings.max_single_read_retries = collection.get(key); - else if (key == "min_upload_part_size") - configuration.request_settings.min_upload_part_size = collection.get(key); - else if (key == "upload_part_size_multiply_factor") - configuration.request_settings.upload_part_size_multiply_factor = collection.get(key); - else if (key == "upload_part_size_multiply_parts_count_threshold") - configuration.request_settings.upload_part_size_multiply_parts_count_threshold = collection.get(key); - else if (key == "max_single_part_upload_size") - configuration.request_settings.max_single_part_upload_size = collection.get(key); - else if (key == "max_connections") - configuration.request_settings.max_connections = collection.get(key); - else - throw Exception( - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, - "Unknown configuration key `{}` for StorageS3, " - "expected: url, [access_key_id, secret_access_key], " - "name of used format and [compression_method].", - key); } if (!filename.empty()) configuration.url = std::filesystem::path(configuration.url) / filename; @@ -1310,7 +1321,7 @@ StorageS3Configuration StorageS3::getConfiguration(ASTs & engine_args, ContextPt "Storage S3 requires 1 to 5 arguments: url, [access_key_id, secret_access_key], name of used format and [compression_method].", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); - auto header_it = StorageURL::collectHeaders(engine_args, configuration, local_context); + auto * header_it = StorageURL::collectHeaders(engine_args, configuration, local_context); if (header_it != engine_args.end()) engine_args.erase(header_it); diff --git a/src/Storages/StorageS3.h b/src/Storages/StorageS3.h index 671610173bd..acd5c264822 100644 --- a/src/Storages/StorageS3.h +++ b/src/Storages/StorageS3.h @@ -137,7 +137,8 @@ public: const String & bucket, const String & version_id, std::shared_ptr file_iterator_, - size_t download_thread_num); + size_t download_thread_num, + bool only_need_virtual_columns_ = false); ~StorageS3Source() override; @@ -159,6 +160,7 @@ private: std::shared_ptr client; Block sample_block; std::optional format_settings; + bool only_need_virtual_columns{false}; struct ReaderHolder { diff --git a/src/Storages/StorageS3Settings.cpp b/src/Storages/StorageS3Settings.cpp index f96b4f4509b..8c1974527b6 100644 --- a/src/Storages/StorageS3Settings.cpp +++ b/src/Storages/StorageS3Settings.cpp @@ -3,6 +3,7 @@ #include #include +#include #include #include #include @@ -12,6 +13,213 @@ namespace DB { +namespace ErrorCodes +{ + extern const int INVALID_SETTING_VALUE; +} + +S3Settings::RequestSettings::PartUploadSettings::PartUploadSettings(const Settings & settings) +{ + updateFromSettingsImpl(settings, false); + validate(); +} + +S3Settings::RequestSettings::PartUploadSettings::PartUploadSettings( + const Poco::Util::AbstractConfiguration & config, + const String & config_prefix, + const Settings & settings, + String setting_name_prefix) + : PartUploadSettings(settings) +{ + String key = config_prefix + "." + setting_name_prefix; + min_upload_part_size = config.getUInt64(key + "min_upload_part_size", min_upload_part_size); + max_upload_part_size = config.getUInt64(key + "max_upload_part_size", max_upload_part_size); + upload_part_size_multiply_factor = config.getUInt64(key + "upload_part_size_multiply_factor", upload_part_size_multiply_factor); + upload_part_size_multiply_parts_count_threshold = config.getUInt64(key + "upload_part_size_multiply_parts_count_threshold", upload_part_size_multiply_parts_count_threshold); + max_part_number = config.getUInt64(key + "max_part_number", max_part_number); + max_single_part_upload_size = config.getUInt64(key + "max_single_part_upload_size", max_single_part_upload_size); + max_single_operation_copy_size = config.getUInt64(key + "max_single_operation_copy_size", max_single_operation_copy_size); + + validate(); +} + +S3Settings::RequestSettings::PartUploadSettings::PartUploadSettings(const NamedCollection & collection) +{ + min_upload_part_size = collection.getOrDefault("min_upload_part_size", min_upload_part_size); + upload_part_size_multiply_factor = collection.getOrDefault("upload_part_size_multiply_factor", upload_part_size_multiply_factor); + upload_part_size_multiply_parts_count_threshold = collection.getOrDefault("upload_part_size_multiply_parts_count_threshold", upload_part_size_multiply_parts_count_threshold); + max_single_part_upload_size = collection.getOrDefault("max_single_part_upload_size", max_single_part_upload_size); + + validate(); +} + +void S3Settings::RequestSettings::PartUploadSettings::updateFromSettingsImpl(const Settings & settings, bool if_changed) +{ + if (!if_changed || settings.s3_min_upload_part_size.changed) + min_upload_part_size = settings.s3_min_upload_part_size; + + if (!if_changed || settings.s3_max_upload_part_size.changed) + max_upload_part_size = settings.s3_max_upload_part_size; + + if (!if_changed || settings.s3_upload_part_size_multiply_factor.changed) + upload_part_size_multiply_factor = settings.s3_upload_part_size_multiply_factor; + + if (!if_changed || settings.s3_upload_part_size_multiply_parts_count_threshold.changed) + upload_part_size_multiply_parts_count_threshold = settings.s3_upload_part_size_multiply_parts_count_threshold; + + if (!if_changed || settings.s3_max_single_part_upload_size.changed) + max_single_part_upload_size = settings.s3_max_single_part_upload_size; +} + +void S3Settings::RequestSettings::PartUploadSettings::validate() +{ + static constexpr size_t min_upload_part_size_limit = 5 * 1024 * 1024; + if (min_upload_part_size < min_upload_part_size_limit) + throw Exception( + ErrorCodes::INVALID_SETTING_VALUE, + "Setting min_upload_part_size has invalid value {} which is less than the s3 API limit {}", + ReadableSize(min_upload_part_size), ReadableSize(min_upload_part_size_limit)); + + static constexpr size_t max_upload_part_size_limit = 5ull * 1024 * 1024 * 1024; + if (max_upload_part_size > max_upload_part_size_limit) + throw Exception( + ErrorCodes::INVALID_SETTING_VALUE, + "Setting max_upload_part_size has invalid value {} which is grater than the s3 API limit {}", + ReadableSize(max_upload_part_size), ReadableSize(max_upload_part_size_limit)); + + if (max_single_part_upload_size > max_upload_part_size_limit) + throw Exception( + ErrorCodes::INVALID_SETTING_VALUE, + "Setting max_single_part_upload_size has invalid value {} which is grater than the s3 API limit {}", + ReadableSize(max_single_part_upload_size), ReadableSize(max_upload_part_size_limit)); + + if (max_single_operation_copy_size > max_upload_part_size_limit) + throw Exception( + ErrorCodes::INVALID_SETTING_VALUE, + "Setting max_single_operation_copy_size has invalid value {} which is grater than the s3 API limit {}", + ReadableSize(max_single_operation_copy_size), ReadableSize(max_upload_part_size_limit)); + + if (max_upload_part_size < min_upload_part_size) + throw Exception( + ErrorCodes::INVALID_SETTING_VALUE, + "Setting max_upload_part_size ({}) can't be less than setting min_upload_part_size {}", + ReadableSize(max_upload_part_size), ReadableSize(min_upload_part_size)); + + if (!upload_part_size_multiply_factor) + throw Exception( + ErrorCodes::INVALID_SETTING_VALUE, + "Setting upload_part_size_multiply_factor cannot be zero", + upload_part_size_multiply_factor); + + if (!upload_part_size_multiply_parts_count_threshold) + throw Exception( + ErrorCodes::INVALID_SETTING_VALUE, + "Setting upload_part_size_multiply_parts_count_threshold cannot be zero", + upload_part_size_multiply_parts_count_threshold); + + if (!max_part_number) + throw Exception( + ErrorCodes::INVALID_SETTING_VALUE, + "Setting max_part_number cannot be zero", + max_part_number); + + static constexpr size_t max_part_number_limit = 10000; + if (max_part_number > max_part_number_limit) + throw Exception( + ErrorCodes::INVALID_SETTING_VALUE, + "Setting max_part_number has invalid value {} which is grater than the s3 API limit {}", + ReadableSize(max_part_number), ReadableSize(max_part_number_limit)); + + size_t maybe_overflow; + if (common::mulOverflow(max_upload_part_size, upload_part_size_multiply_factor, maybe_overflow)) + throw Exception( + ErrorCodes::INVALID_SETTING_VALUE, + "Setting upload_part_size_multiply_factor is too big ({}). Multiplication to max_upload_part_size ({}) will cause integer overflow", + ReadableSize(max_part_number), ReadableSize(max_part_number_limit)); + + /// TODO: it's possible to set too small limits. We can check that max possible object size is not too small. +} + + +S3Settings::RequestSettings::RequestSettings(const Settings & settings) + : upload_settings(settings) +{ + updateFromSettingsImpl(settings, false); +} + +S3Settings::RequestSettings::RequestSettings(const NamedCollection & collection) + : upload_settings(collection) +{ + max_single_read_retries = collection.getOrDefault("max_single_read_retries", max_single_read_retries); + max_connections = collection.getOrDefault("max_connections", max_connections); +} + +S3Settings::RequestSettings::RequestSettings( + const Poco::Util::AbstractConfiguration & config, + const String & config_prefix, + const Settings & settings, + String setting_name_prefix) + : upload_settings(config, config_prefix, settings, setting_name_prefix) +{ + String key = config_prefix + "." + setting_name_prefix; + max_single_read_retries = config.getUInt64(key + "max_single_read_retries", settings.s3_max_single_read_retries); + max_connections = config.getUInt64(key + "max_connections", settings.s3_max_connections); + check_objects_after_upload = config.getBool(key + "check_objects_after_upload", settings.s3_check_objects_after_upload); + + /// NOTE: it would be better to reuse old throttlers to avoid losing token bucket state on every config reload, + /// which could lead to exceeding limit for short time. But it is good enough unless very high `burst` values are used. + if (UInt64 max_get_rps = config.getUInt64(key + "max_get_rps", settings.s3_max_get_rps)) + { + size_t default_max_get_burst = settings.s3_max_get_burst + ? settings.s3_max_get_burst + : (Throttler::default_burst_seconds * max_get_rps); + + size_t max_get_burst = config.getUInt64(key + "max_get_burst", default_max_get_burst); + + get_request_throttler = std::make_shared(max_get_rps, max_get_burst); + } + if (UInt64 max_put_rps = config.getUInt64(key + "max_put_rps", settings.s3_max_put_rps)) + { + size_t default_max_put_burst = settings.s3_max_put_burst + ? settings.s3_max_put_burst + : (Throttler::default_burst_seconds * max_put_rps); + + size_t max_put_burst = config.getUInt64(key + "max_put_burst", default_max_put_burst); + + put_request_throttler = std::make_shared(max_put_rps, max_put_burst); + } +} + +void S3Settings::RequestSettings::updateFromSettingsImpl(const Settings & settings, bool if_changed) +{ + if (!if_changed || settings.s3_max_single_read_retries.changed) + max_single_read_retries = settings.s3_max_single_read_retries; + + if (!if_changed || settings.s3_max_connections.changed) + max_connections = settings.s3_max_connections; + + if (!if_changed || settings.s3_check_objects_after_upload.changed) + check_objects_after_upload = settings.s3_check_objects_after_upload; + + if (!if_changed || settings.s3_max_unexpected_write_error_retries.changed) + max_unexpected_write_error_retries = settings.s3_max_unexpected_write_error_retries; + + if ((!if_changed || settings.s3_max_get_rps.changed || settings.s3_max_get_burst.changed) && settings.s3_max_get_rps) + get_request_throttler = std::make_shared( + settings.s3_max_get_rps, settings.s3_max_get_burst ? settings.s3_max_get_burst : Throttler::default_burst_seconds * settings.s3_max_get_rps); + + if ((!if_changed || settings.s3_max_put_rps.changed || settings.s3_max_put_burst.changed) && settings.s3_max_put_rps) + put_request_throttler = std::make_shared( + settings.s3_max_put_rps, settings.s3_max_put_burst ? settings.s3_max_put_burst : Throttler::default_burst_seconds * settings.s3_max_put_rps); +} + +void S3Settings::RequestSettings::updateFromSettings(const Settings & settings) +{ + updateFromSettingsImpl(settings, true); + upload_settings.updateFromSettings(settings); +} + + void StorageS3Settings::loadFromConfig(const String & config_elem, const Poco::Util::AbstractConfiguration & config, const Settings & settings) { std::lock_guard lock(mutex); @@ -22,50 +230,13 @@ void StorageS3Settings::loadFromConfig(const String & config_elem, const Poco::U Poco::Util::AbstractConfiguration::Keys config_keys; config.keys(config_elem, config_keys); - auto get_string_for_key = [&](const String & key, const String & elem, bool with_default = true, const String & default_value = "") - { - return with_default ? config.getString(config_elem + "." + key + "." + elem, default_value) : config.getString(config_elem + "." + key + "." + elem); - }; - - auto get_uint_for_key = [&](const String & key, const String & elem, bool with_default = true, UInt64 default_value = 0) - { - return with_default ? config.getUInt64(config_elem + "." + key + "." + elem, default_value) : config.getUInt64(config_elem + "." + key + "." + elem); - }; - - - auto get_bool_for_key = [&](const String & key, const String & elem, bool with_default = true, bool default_value = false) - { - return with_default ? config.getBool(config_elem + "." + key + "." + elem, default_value) : config.getBool(config_elem + "." + key + "." + elem); - }; - - for (const String & key : config_keys) { if (config.has(config_elem + "." + key + ".endpoint")) { - auto endpoint = get_string_for_key(key, "endpoint", false); - + auto endpoint = config.getString(config_elem + "." + key + ".endpoint"); auto auth_settings = S3::AuthSettings::loadFromConfig(config_elem + "." + key, config); - - S3Settings::RequestSettings request_settings; - request_settings.max_single_read_retries = get_uint_for_key(key, "max_single_read_retries", true, settings.s3_max_single_read_retries); - request_settings.min_upload_part_size = get_uint_for_key(key, "min_upload_part_size", true, settings.s3_min_upload_part_size); - request_settings.max_upload_part_size = get_uint_for_key(key, "max_upload_part_size", true, S3Settings::RequestSettings::DEFAULT_MAX_UPLOAD_PART_SIZE); - request_settings.upload_part_size_multiply_factor = get_uint_for_key(key, "upload_part_size_multiply_factor", true, settings.s3_upload_part_size_multiply_factor); - request_settings.upload_part_size_multiply_parts_count_threshold = get_uint_for_key(key, "upload_part_size_multiply_parts_count_threshold", true, settings.s3_upload_part_size_multiply_parts_count_threshold); - request_settings.max_part_number = get_uint_for_key(key, "max_part_number", true, S3Settings::RequestSettings::DEFAULT_MAX_PART_NUMBER); - request_settings.max_single_part_upload_size = get_uint_for_key(key, "max_single_part_upload_size", true, settings.s3_max_single_part_upload_size); - request_settings.max_single_operation_copy_size = get_uint_for_key(key, "max_single_operation_copy_size", true, S3Settings::RequestSettings::DEFAULT_MAX_SINGLE_OPERATION_COPY_SIZE); - request_settings.max_connections = get_uint_for_key(key, "max_connections", true, settings.s3_max_connections); - request_settings.check_objects_after_upload = get_bool_for_key(key, "check_objects_after_upload", true, false); - - // NOTE: it would be better to reuse old throttlers to avoid losing token bucket state on every config reload, which could lead to exceeding limit for short time. But it is good enough unless very high `burst` values are used. - if (UInt64 max_get_rps = get_uint_for_key(key, "max_get_rps", true, settings.s3_max_get_rps)) - request_settings.get_request_throttler = std::make_shared( - max_get_rps, get_uint_for_key(key, "max_get_burst", true, settings.s3_max_get_burst ? settings.s3_max_get_burst : Throttler::default_burst_seconds * max_get_rps)); - if (UInt64 max_put_rps = get_uint_for_key(key, "max_put_rps", true, settings.s3_max_put_rps)) - request_settings.put_request_throttler = std::make_shared( - max_put_rps, get_uint_for_key(key, "max_put_burst", true, settings.s3_max_put_burst ? settings.s3_max_put_burst : Throttler::default_burst_seconds * max_put_rps)); + S3Settings::RequestSettings request_settings(config, config_elem + "." + key, settings); s3_settings.emplace(endpoint, S3Settings{std::move(auth_settings), std::move(request_settings)}); } @@ -88,53 +259,4 @@ S3Settings StorageS3Settings::getSettings(const String & endpoint) const return {}; } -S3Settings::RequestSettings::RequestSettings(const Settings & settings) -{ - max_single_read_retries = settings.s3_max_single_read_retries; - min_upload_part_size = settings.s3_min_upload_part_size; - upload_part_size_multiply_factor = settings.s3_upload_part_size_multiply_factor; - upload_part_size_multiply_parts_count_threshold = settings.s3_upload_part_size_multiply_parts_count_threshold; - max_single_part_upload_size = settings.s3_max_single_part_upload_size; - max_connections = settings.s3_max_connections; - check_objects_after_upload = settings.s3_check_objects_after_upload; - max_unexpected_write_error_retries = settings.s3_max_unexpected_write_error_retries; - if (settings.s3_max_get_rps) - get_request_throttler = std::make_shared( - settings.s3_max_get_rps, settings.s3_max_get_burst ? settings.s3_max_get_burst : Throttler::default_burst_seconds * settings.s3_max_get_rps); - if (settings.s3_max_put_rps) - put_request_throttler = std::make_shared( - settings.s3_max_put_rps, settings.s3_max_put_burst ? settings.s3_max_put_burst : Throttler::default_burst_seconds * settings.s3_max_put_rps); -} - -void S3Settings::RequestSettings::updateFromSettingsIfEmpty(const Settings & settings) -{ - if (!max_single_read_retries) - max_single_read_retries = settings.s3_max_single_read_retries; - if (!min_upload_part_size) - min_upload_part_size = settings.s3_min_upload_part_size; - if (!max_upload_part_size) - max_upload_part_size = DEFAULT_MAX_UPLOAD_PART_SIZE; - if (!upload_part_size_multiply_factor) - upload_part_size_multiply_factor = settings.s3_upload_part_size_multiply_factor; - if (!upload_part_size_multiply_parts_count_threshold) - upload_part_size_multiply_parts_count_threshold = settings.s3_upload_part_size_multiply_parts_count_threshold; - if (!max_part_number) - max_part_number = DEFAULT_MAX_PART_NUMBER; - if (!max_single_part_upload_size) - max_single_part_upload_size = settings.s3_max_single_part_upload_size; - if (!max_single_operation_copy_size) - max_single_operation_copy_size = DEFAULT_MAX_SINGLE_OPERATION_COPY_SIZE; - if (!max_connections) - max_connections = settings.s3_max_connections; - if (!max_unexpected_write_error_retries) - max_unexpected_write_error_retries = settings.s3_max_unexpected_write_error_retries; - check_objects_after_upload = settings.s3_check_objects_after_upload; - if (!get_request_throttler && settings.s3_max_get_rps) - get_request_throttler = std::make_shared( - settings.s3_max_get_rps, settings.s3_max_get_burst ? settings.s3_max_get_burst : Throttler::default_burst_seconds * settings.s3_max_get_rps); - if (!put_request_throttler && settings.s3_max_put_rps) - put_request_throttler = std::make_shared( - settings.s3_max_put_rps, settings.s3_max_put_burst ? settings.s3_max_put_burst : Throttler::default_burst_seconds * settings.s3_max_put_rps); -} - } diff --git a/src/Storages/StorageS3Settings.h b/src/Storages/StorageS3Settings.h index bf04dbe3a61..368fcfaf469 100644 --- a/src/Storages/StorageS3Settings.h +++ b/src/Storages/StorageS3Settings.h @@ -21,67 +21,78 @@ namespace DB { struct Settings; +class NamedCollection; struct S3Settings { struct RequestSettings { - size_t max_single_read_retries = 0; - size_t min_upload_part_size = 0; - size_t max_upload_part_size = 0; - size_t upload_part_size_multiply_factor = 0; - size_t upload_part_size_multiply_parts_count_threshold = 0; - size_t max_part_number = 0; - size_t max_single_part_upload_size = 0; - size_t max_single_operation_copy_size = 0; - size_t max_connections = 0; + struct PartUploadSettings + { + size_t min_upload_part_size = 16 * 1024 * 1024; + size_t max_upload_part_size = 5ULL * 1024 * 1024 * 1024; + size_t upload_part_size_multiply_factor = 2; + size_t upload_part_size_multiply_parts_count_threshold = 500; + size_t max_part_number = 10000; + size_t max_single_part_upload_size = 32 * 1024 * 1024; + size_t max_single_operation_copy_size = 5ULL * 1024 * 1024 * 1024; + + void updateFromSettings(const Settings & settings) { updateFromSettingsImpl(settings, true); } + void validate(); + + private: + PartUploadSettings() = default; + explicit PartUploadSettings(const Settings & settings); + explicit PartUploadSettings(const NamedCollection & collection); + PartUploadSettings( + const Poco::Util::AbstractConfiguration & config, + const String & config_prefix, + const Settings & settings, + String setting_name_prefix = {}); + + void updateFromSettingsImpl(const Settings & settings, bool if_changed); + + friend struct RequestSettings; + }; + + private: + PartUploadSettings upload_settings = {}; + + public: + size_t max_single_read_retries = 4; + size_t max_connections = 1024; bool check_objects_after_upload = false; - size_t max_unexpected_write_error_retries = 0; + size_t max_unexpected_write_error_retries = 4; ThrottlerPtr get_request_throttler; ThrottlerPtr put_request_throttler; + const PartUploadSettings & getUploadSettings() const { return upload_settings; } + RequestSettings() = default; explicit RequestSettings(const Settings & settings); + explicit RequestSettings(const NamedCollection & collection); - inline bool operator==(const RequestSettings & other) const - { - return max_single_read_retries == other.max_single_read_retries - && min_upload_part_size == other.min_upload_part_size - && max_upload_part_size == other.max_upload_part_size - && upload_part_size_multiply_factor == other.upload_part_size_multiply_factor - && upload_part_size_multiply_parts_count_threshold == other.upload_part_size_multiply_parts_count_threshold - && max_part_number == other.max_part_number - && max_single_part_upload_size == other.max_single_part_upload_size - && max_single_operation_copy_size == other.max_single_operation_copy_size - && max_connections == other.max_connections - && check_objects_after_upload == other.check_objects_after_upload - && max_unexpected_write_error_retries == other.max_unexpected_write_error_retries - && get_request_throttler == other.get_request_throttler - && put_request_throttler == other.put_request_throttler; - } + /// What's the setting_name_prefix, and why do we need it? + /// There are (at least) two config sections where s3 settings can be specified: + /// * settings for s3 disk (clickhouse/storage_configuration/disks) + /// * settings for s3 storage (clickhouse/s3), which are also used for backups + /// Even though settings are the same, in case of s3 disk they are prefixed with "s3_" + /// ("s3_max_single_part_upload_size"), but in case of s3 storage they are not + /// ( "max_single_part_upload_size"). Why this happened is a complete mystery to me. + RequestSettings( + const Poco::Util::AbstractConfiguration & config, + const String & config_prefix, + const Settings & settings, + String setting_name_prefix = {}); - static const constexpr UInt64 DEFAULT_SINGLE_READ_RETRIES = 4; - static const constexpr UInt64 DEFAULT_MIN_UPLOAD_PART_SIZE = 16 * 1024 * 1024; - static const constexpr UInt64 DEFAULT_MAX_UPLOAD_PART_SIZE = 5ULL * 1024 * 1024 * 1024; - static const constexpr UInt64 DEFAULT_UPLOAD_PART_SIZE_MULTIPLY_FACTOR = 2; - static const constexpr UInt64 DEFAULT_UPLOAD_PART_SIZE_MULTIPLY_PARTS_COUNT_THRESHOLD = 500; - static const constexpr UInt64 DEFAULT_MAX_PART_NUMBER = 10000; - static const constexpr UInt64 DEFAULT_MAX_SINGLE_PART_UPLOAD_SIZE = 32 * 1024 * 1024; - static const constexpr UInt64 DEFAULT_MAX_SINGLE_OPERATION_COPY_SIZE = 5ULL * 1024 * 1024 * 1024; - static const constexpr UInt64 DEFAULT_MAX_CONNECTIONS = 1024; - static const constexpr UInt64 DEFAULT_MAX_UNEXPECTED_WRITE_ERRORS_RETRIES = 4; + void updateFromSettings(const Settings & settings); - void setEmptyFieldsByDefault(); - void updateFromSettingsIfEmpty(const Settings & settings); + private: + void updateFromSettingsImpl(const Settings & settings, bool if_changed); }; S3::AuthSettings auth_settings; RequestSettings request_settings; - - inline bool operator==(const S3Settings & other) const - { - return auth_settings == other.auth_settings && request_settings == other.request_settings; - } }; /// Settings for the StorageS3. @@ -97,28 +108,4 @@ private: std::map s3_settings; }; -inline void S3Settings::RequestSettings::setEmptyFieldsByDefault() -{ - if (!max_single_read_retries) - max_single_read_retries = DEFAULT_SINGLE_READ_RETRIES; - if (!min_upload_part_size) - min_upload_part_size = DEFAULT_MIN_UPLOAD_PART_SIZE; - if (!max_upload_part_size) - max_upload_part_size = DEFAULT_MAX_UPLOAD_PART_SIZE; - if (!upload_part_size_multiply_factor) - upload_part_size_multiply_factor = DEFAULT_UPLOAD_PART_SIZE_MULTIPLY_FACTOR; - if (!upload_part_size_multiply_parts_count_threshold) - upload_part_size_multiply_parts_count_threshold = DEFAULT_UPLOAD_PART_SIZE_MULTIPLY_PARTS_COUNT_THRESHOLD; - if (!max_part_number) - max_part_number = DEFAULT_MAX_PART_NUMBER; - if (!max_single_part_upload_size) - max_single_part_upload_size = DEFAULT_MAX_SINGLE_PART_UPLOAD_SIZE; - if (!max_single_operation_copy_size) - max_single_operation_copy_size = DEFAULT_MAX_SINGLE_OPERATION_COPY_SIZE; - if (!max_connections) - max_connections = DEFAULT_MAX_CONNECTIONS; - if (!max_unexpected_write_error_retries) - max_unexpected_write_error_retries = DEFAULT_MAX_UNEXPECTED_WRITE_ERRORS_RETRIES; -} - } diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp index 0f01dc4288c..d9690c29020 100644 --- a/src/Storages/StorageURL.cpp +++ b/src/Storages/StorageURL.cpp @@ -982,7 +982,7 @@ ASTs::iterator StorageURL::collectHeaders( { ASTs::iterator headers_it = url_function_args.end(); - for (auto arg_it = url_function_args.begin(); arg_it != url_function_args.end(); ++arg_it) + for (auto * arg_it = url_function_args.begin(); arg_it != url_function_args.end(); ++arg_it) { const auto * headers_ast_function = (*arg_it)->as(); if (headers_ast_function && headers_ast_function->name == "headers") @@ -1068,7 +1068,7 @@ URLBasedDataSourceConfiguration StorageURL::getConfiguration(ASTs & args, Contex if (args.empty() || args.size() > 3) throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, bad_arguments_error_message); - auto header_it = collectHeaders(args, configuration, local_context); + auto * header_it = collectHeaders(args, configuration, local_context); if (header_it != args.end()) args.erase(header_it); diff --git a/src/Storages/StorageView.cpp b/src/Storages/StorageView.cpp index 4f3003e68b0..f040e94e141 100644 --- a/src/Storages/StorageView.cpp +++ b/src/Storages/StorageView.cpp @@ -82,6 +82,20 @@ bool hasJoin(const ASTSelectWithUnionQuery & ast) return false; } +/** There are no limits on the maximum size of the result for the view. + * Since the result of the view is not the result of the entire query. + */ +ContextPtr getViewContext(ContextPtr context) +{ + auto view_context = Context::createCopy(context); + Settings view_settings = context->getSettings(); + view_settings.max_result_rows = 0; + view_settings.max_result_bytes = 0; + view_settings.extremes = false; + view_context->setSettings(view_settings); + return view_context; +} + } StorageView::StorageView( @@ -127,13 +141,13 @@ void StorageView::read( if (context->getSettingsRef().allow_experimental_analyzer) { - InterpreterSelectQueryAnalyzer interpreter(current_inner_query, options, context); + InterpreterSelectQueryAnalyzer interpreter(current_inner_query, options, getViewContext(context)); interpreter.addStorageLimits(*query_info.storage_limits); query_plan = std::move(interpreter).extractQueryPlan(); } else { - InterpreterSelectWithUnionQuery interpreter(current_inner_query, context, options, column_names); + InterpreterSelectWithUnionQuery interpreter(current_inner_query, getViewContext(context), options, column_names); interpreter.addStorageLimits(*query_info.storage_limits); interpreter.buildQueryPlan(query_plan); } diff --git a/src/Storages/System/StorageSystemFilesystemCache.cpp b/src/Storages/System/StorageSystemFilesystemCache.cpp index cd9324b3253..bec92a60436 100644 --- a/src/Storages/System/StorageSystemFilesystemCache.cpp +++ b/src/Storages/System/StorageSystemFilesystemCache.cpp @@ -24,7 +24,8 @@ NamesAndTypesList StorageSystemFilesystemCache::getNamesAndTypes() {"cache_hits", std::make_shared()}, {"references", std::make_shared()}, {"downloaded_size", std::make_shared()}, - {"persistent", std::make_shared>()} + {"persistent", std::make_shared>()}, + {"kind", std::make_shared()}, }; } @@ -45,8 +46,11 @@ void StorageSystemFilesystemCache::fillData(MutableColumns & res_columns, Contex for (const auto & file_segment : file_segments) { res_columns[0]->insert(cache_base_path); + + /// Do not use `file_segment->getPathInLocalCache` here because it will lead to nullptr dereference + /// (because file_segments in getSnapshot doesn't have `cache` field set) res_columns[1]->insert( - cache->getPathInLocalCache(file_segment->key(), file_segment->offset(), file_segment->isPersistent())); + cache->getPathInLocalCache(file_segment->key(), file_segment->offset(), file_segment->getKind())); const auto & range = file_segment->range(); res_columns[2]->insert(range.left); @@ -57,6 +61,7 @@ void StorageSystemFilesystemCache::fillData(MutableColumns & res_columns, Contex res_columns[7]->insert(file_segment->getRefCount()); res_columns[8]->insert(file_segment->getDownloadedSize()); res_columns[9]->insert(file_segment->isPersistent()); + res_columns[10]->insert(toString(file_segment->getKind())); } } } diff --git a/src/Storages/System/StorageSystemParts.cpp b/src/Storages/System/StorageSystemParts.cpp index b205b7c224d..f6854e7d5d0 100644 --- a/src/Storages/System/StorageSystemParts.cpp +++ b/src/Storages/System/StorageSystemParts.cpp @@ -234,9 +234,12 @@ void StorageSystemParts::processNextStorage( if (columns_mask[src_index++]) { - // The full path changes at clean up thread under deleting state, do not read it, avoid the race - if (part->isStoredOnDisk() && part_state != State::Deleting) + /// The full path changes at clean up thread, so do not read it if parts can be deleted, avoid the race. + if (part->isStoredOnDisk() + && part_state != State::Deleting && part_state != State::DeleteOnDestroy && part_state != State::Temporary) + { columns[res_index++]->insert(part->getDataPartStorage().getFullPath()); + } else columns[res_index++]->insertDefault(); } diff --git a/src/Storages/System/StorageSystemPartsColumns.cpp b/src/Storages/System/StorageSystemPartsColumns.cpp index 65b5af0c8e9..00b958b015f 100644 --- a/src/Storages/System/StorageSystemPartsColumns.cpp +++ b/src/Storages/System/StorageSystemPartsColumns.cpp @@ -193,9 +193,12 @@ void StorageSystemPartsColumns::processNextStorage( columns[res_index++]->insert(part->getDataPartStorage().getDiskName()); if (columns_mask[src_index++]) { - // The full path changes at clean up thread under deleting state, do not read it, avoid the race - if (part_state != State::Deleting) + /// The full path changes at clean up thread, so do not read it if parts can be deleted, avoid the race. + if (part->isStoredOnDisk() + && part_state != State::Deleting && part_state != State::DeleteOnDestroy && part_state != State::Temporary) + { columns[res_index++]->insert(part->getDataPartStorage().getFullPath()); + } else columns[res_index++]->insertDefault(); } diff --git a/src/Storages/System/StorageSystemProcesses.cpp b/src/Storages/System/StorageSystemProcesses.cpp index 08d3666216f..213e3ed5dc0 100644 --- a/src/Storages/System/StorageSystemProcesses.cpp +++ b/src/Storages/System/StorageSystemProcesses.cpp @@ -119,7 +119,7 @@ void StorageSystemProcesses::fillData(MutableColumns & res_columns, ContextPtr c res_columns[i++]->insert(process.client_info.quota_key); res_columns[i++]->insert(process.client_info.distributed_depth); - res_columns[i++]->insert(process.elapsed_seconds); + res_columns[i++]->insert(static_cast(process.elapsed_microseconds) / 100000.0); res_columns[i++]->insert(process.is_cancelled); res_columns[i++]->insert(process.is_all_data_sent); res_columns[i++]->insert(process.read_rows); diff --git a/src/Storages/System/StorageSystemReplicas.cpp b/src/Storages/System/StorageSystemReplicas.cpp index 0f7877a6e41..363b47d96cb 100644 --- a/src/Storages/System/StorageSystemReplicas.cpp +++ b/src/Storages/System/StorageSystemReplicas.cpp @@ -153,7 +153,7 @@ Pipe StorageSystemReplicas::read( for (size_t i = 0, size = col_database->size(); i < size; ++i) { - StorageReplicatedMergeTree::Status status; + ReplicatedTableStatus status; dynamic_cast( *replicated_tables [(*col_database)[i].safeGet()] diff --git a/src/Storages/TTLDescription.cpp b/src/Storages/TTLDescription.cpp index 41c9c1996b1..2971d977099 100644 --- a/src/Storages/TTLDescription.cpp +++ b/src/Storages/TTLDescription.cpp @@ -61,7 +61,7 @@ void checkTTLExpression(const ExpressionActionsPtr & ttl_expression, const Strin { if (action.node->type == ActionsDAG::ActionType::FUNCTION) { - IFunctionBase & func = *action.node->function_base; + const IFunctionBase & func = *action.node->function_base; if (!func.isDeterministic()) throw Exception( "TTL expression cannot contain non-deterministic functions, " diff --git a/src/Storages/VirtualColumnUtils.cpp b/src/Storages/VirtualColumnUtils.cpp index 2ada0fa3323..ffa04bcdd83 100644 --- a/src/Storages/VirtualColumnUtils.cpp +++ b/src/Storages/VirtualColumnUtils.cpp @@ -51,7 +51,7 @@ bool isValidFunction(const ASTPtr & expression, const std::function & is_constant, std::vector & result) +bool extractFunctions(const ASTPtr & expression, const std::function & is_constant, ASTs & result) { const auto * function = expression->as(); if (function && (function->name == "and" || function->name == "indexHint")) @@ -175,7 +175,7 @@ bool prepareFilterBlockWithQuery(const ASTPtr & query, ContextPtr context, Block }; /// Create an expression that evaluates the expressions in WHERE and PREWHERE, depending only on the existing columns. - std::vector functions; + ASTs functions; if (select.where()) unmodified &= extractFunctions(select.where(), is_constant, functions); if (select.prewhere()) diff --git a/src/TableFunctions/TableFunctionDeltaLake.cpp b/src/TableFunctions/TableFunctionDeltaLake.cpp index f831d4ae609..221a512172d 100644 --- a/src/TableFunctions/TableFunctionDeltaLake.cpp +++ b/src/TableFunctions/TableFunctionDeltaLake.cpp @@ -33,7 +33,7 @@ void TableFunctionDeltaLake::parseArgumentsImpl( if (args.empty() || args.size() > 6) throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, error_message); - auto header_it = StorageURL::collectHeaders(args, base_configuration, context); + auto * header_it = StorageURL::collectHeaders(args, base_configuration, context); if (header_it != args.end()) args.erase(header_it); diff --git a/src/TableFunctions/TableFunctionHudi.cpp b/src/TableFunctions/TableFunctionHudi.cpp index f39f3b515ec..49d9e7da97d 100644 --- a/src/TableFunctions/TableFunctionHudi.cpp +++ b/src/TableFunctions/TableFunctionHudi.cpp @@ -33,7 +33,7 @@ void TableFunctionHudi::parseArgumentsImpl( if (args.empty() || args.size() > 6) throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, error_message); - auto header_it = StorageURL::collectHeaders(args, base_configuration, context); + auto * header_it = StorageURL::collectHeaders(args, base_configuration, context); if (header_it != args.end()) args.erase(header_it); diff --git a/src/TableFunctions/TableFunctionMySQL.cpp b/src/TableFunctions/TableFunctionMySQL.cpp index ab1c23afa7a..a34e87ee313 100644 --- a/src/TableFunctions/TableFunctionMySQL.cpp +++ b/src/TableFunctions/TableFunctionMySQL.cpp @@ -45,7 +45,7 @@ void TableFunctionMySQL::parseArguments(const ASTPtr & ast_function, ContextPtr mysql_settings.connect_timeout = settings.external_storage_connect_timeout_sec; mysql_settings.read_write_timeout = settings.external_storage_rw_timeout_sec; - for (auto it = args.begin(); it != args.end(); ++it) + for (auto * it = args.begin(); it != args.end(); ++it) { const ASTSetQuery * settings_ast = (*it)->as(); if (settings_ast) diff --git a/src/TableFunctions/TableFunctionS3.cpp b/src/TableFunctions/TableFunctionS3.cpp index 23822486c29..562cb3460c6 100644 --- a/src/TableFunctions/TableFunctionS3.cpp +++ b/src/TableFunctions/TableFunctionS3.cpp @@ -40,7 +40,7 @@ void TableFunctionS3::parseArgumentsImpl(const String & error_message, ASTs & ar if (args.empty() || args.size() > 6) throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, error_message); - auto header_it = StorageURL::collectHeaders(args, s3_configuration, context); + auto * header_it = StorageURL::collectHeaders(args, s3_configuration, context); if (header_it != args.end()) args.erase(header_it); diff --git a/src/TableFunctions/TableFunctionURL.cpp b/src/TableFunctions/TableFunctionURL.cpp index 888f3e7b93d..46e09b02901 100644 --- a/src/TableFunctions/TableFunctionURL.cpp +++ b/src/TableFunctions/TableFunctionURL.cpp @@ -71,7 +71,7 @@ void TableFunctionURL::parseArguments(const ASTPtr & ast_function, ContextPtr co auto * url_function_args_expr = assert_cast(args[0].get()); auto & url_function_args = url_function_args_expr->children; - auto headers_it = StorageURL::collectHeaders(url_function_args, configuration, context); + auto * headers_it = StorageURL::collectHeaders(url_function_args, configuration, context); /// ITableFunctionFileLike cannot parse headers argument, so remove it. if (headers_it != url_function_args.end()) url_function_args.erase(headers_it); diff --git a/tests/ci/ast_fuzzer_check.py b/tests/ci/ast_fuzzer_check.py index 01637f928c0..096edeed149 100644 --- a/tests/ci/ast_fuzzer_check.py +++ b/tests/ci/ast_fuzzer_check.py @@ -95,7 +95,7 @@ if __name__ == "__main__": ) logging.info("Going to run %s", run_command) - run_log_path = os.path.join(temp_path, "runlog.log") + run_log_path = os.path.join(temp_path, "run.log") with open(run_log_path, "w", encoding="utf-8") as log: with subprocess.Popen( run_command, shell=True, stderr=log, stdout=log @@ -113,7 +113,7 @@ if __name__ == "__main__": ) s3_prefix = f"{pr_info.number}/{pr_info.sha}/fuzzer_{check_name_lower}/" paths = { - "runlog.log": run_log_path, + "run.log": run_log_path, "main.log": os.path.join(workspace_path, "main.log"), "server.log.gz": os.path.join(workspace_path, "server.log.gz"), "fuzzer.log": os.path.join(workspace_path, "fuzzer.log"), @@ -124,20 +124,12 @@ if __name__ == "__main__": s3_helper = S3Helper() for f in paths: try: - paths[f] = s3_helper.upload_test_report_to_s3(paths[f], s3_prefix + "/" + f) + paths[f] = s3_helper.upload_test_report_to_s3(paths[f], s3_prefix + f) except Exception as ex: logging.info("Exception uploading file %s text %s", f, ex) paths[f] = "" report_url = GITHUB_RUN_URL - if paths["runlog.log"]: - report_url = paths["runlog.log"] - if paths["main.log"]: - report_url = paths["main.log"] - if paths["server.log.gz"]: - report_url = paths["server.log.gz"] - if paths["fuzzer.log"]: - report_url = paths["fuzzer.log"] if paths["report.html"]: report_url = paths["report.html"] diff --git a/tests/ci/build_check.py b/tests/ci/build_check.py index c9e8dac2c00..a718bd53418 100644 --- a/tests/ci/build_check.py +++ b/tests/ci/build_check.py @@ -122,7 +122,8 @@ def check_for_success_run( build_name: str, build_config: BuildConfig, ) -> None: - logged_prefix = os.path.join(S3_BUILDS_BUCKET, s3_prefix) + # the final empty argument is necessary for distinguish build and build_suffix + logged_prefix = os.path.join(S3_BUILDS_BUCKET, s3_prefix, "") logging.info("Checking for artifacts in %s", logged_prefix) try: # TODO: theoretically, it would miss performance artifact for pr==0, diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py index bfc7e45812b..753da25f300 100644 --- a/tests/ci/ci_config.py +++ b/tests/ci/ci_config.py @@ -171,12 +171,12 @@ CI_CONFIG = { "tidy": "disable", "with_coverage": False, }, - "binary_amd64sse2": { - "compiler": "clang-15-amd64sse2", + "binary_amd64_compat": { + "compiler": "clang-15-amd64-compat", "build_type": "", "sanitizer": "", "package_type": "binary", - "static_binary_name": "amd64sse2", + "static_binary_name": "amd64compat", "libraries": "static", "tidy": "disable", "with_coverage": False, @@ -203,7 +203,7 @@ CI_CONFIG = { "binary_freebsd", "binary_darwin_aarch64", "binary_ppc64le", - "binary_amd64sse2", + "binary_amd64_compat", ], }, "tests_config": { diff --git a/tests/ci/codebrowser_check.py b/tests/ci/codebrowser_check.py index 412bcdf8818..a86749c794c 100644 --- a/tests/ci/codebrowser_check.py +++ b/tests/ci/codebrowser_check.py @@ -57,7 +57,7 @@ if __name__ == "__main__": logging.info("Going to run codebrowser: %s", run_command) - run_log_path = os.path.join(TEMP_PATH, "runlog.log") + run_log_path = os.path.join(TEMP_PATH, "run.log") with TeePopen(run_command, run_log_path) as process: retcode = process.wait() diff --git a/tests/ci/docker_images_check.py b/tests/ci/docker_images_check.py index 0618969f94c..034e0110e2f 100644 --- a/tests/ci/docker_images_check.py +++ b/tests/ci/docker_images_check.py @@ -476,7 +476,6 @@ def main(): url = upload_results(s3_helper, pr_info.number, pr_info.sha, test_results, [], NAME) print(f"::notice ::Report url: {url}") - print(f'::set-output name=url_output::"{url}"') if not args.reports: return diff --git a/tests/ci/docker_manifests_merge.py b/tests/ci/docker_manifests_merge.py index 2ba5a99de0a..14585159d47 100644 --- a/tests/ci/docker_manifests_merge.py +++ b/tests/ci/docker_manifests_merge.py @@ -208,7 +208,6 @@ def main(): url = upload_results(s3_helper, pr_info.number, pr_info.sha, test_results, [], NAME) print(f"::notice ::Report url: {url}") - print(f'::set-output name=url_output::"{url}"') if not args.reports: return diff --git a/tests/ci/docker_server.py b/tests/ci/docker_server.py index e0053f09664..fd28e5a1890 100644 --- a/tests/ci/docker_server.py +++ b/tests/ci/docker_server.py @@ -340,7 +340,6 @@ def main(): url = upload_results(s3_helper, pr_info.number, pr_info.sha, test_results, [], NAME) print(f"::notice ::Report url: {url}") - print(f'::set-output name=url_output::"{url}"') if not args.reports: return diff --git a/tests/ci/docs_check.py b/tests/ci/docs_check.py index c95770b646d..cac1c3aea7c 100644 --- a/tests/ci/docs_check.py +++ b/tests/ci/docs_check.py @@ -82,7 +82,7 @@ if __name__ == "__main__": f"{docker_image}" ) - run_log_path = os.path.join(test_output, "runlog.log") + run_log_path = os.path.join(test_output, "run.log") logging.info("Running command: '%s'", cmd) with TeePopen(cmd, run_log_path) as process: diff --git a/tests/ci/docs_release.py b/tests/ci/docs_release.py index 355e4af7426..f1f420318be 100644 --- a/tests/ci/docs_release.py +++ b/tests/ci/docs_release.py @@ -60,7 +60,7 @@ if __name__ == "__main__": else: user = f"{os.geteuid()}:{os.getegid()}" - run_log_path = os.path.join(test_output, "runlog.log") + run_log_path = os.path.join(test_output, "run.log") with SSHKey("ROBOT_CLICKHOUSE_SSH_KEY"): cmd = ( diff --git a/tests/ci/download_release_packets.py b/tests/ci/download_release_packages.py similarity index 61% rename from tests/ci/download_release_packets.py rename to tests/ci/download_release_packages.py index 0e0f1884fbc..26223de2f8a 100755 --- a/tests/ci/download_release_packets.py +++ b/tests/ci/download_release_packages.py @@ -15,23 +15,27 @@ CLICKHOUSE_TAGS_URL = "https://api.github.com/repos/ClickHouse/ClickHouse/tags" DOWNLOAD_PREFIX = ( "https://github.com/ClickHouse/ClickHouse/releases/download/v{version}-{type}/" ) -CLICKHOUSE_COMMON_STATIC_PACKET_NAME = "clickhouse-common-static_{version}_amd64.deb" -CLICKHOUSE_COMMON_STATIC_DBG_PACKET_NAME = ( +CLICKHOUSE_COMMON_STATIC_PACKAGE_NAME = "clickhouse-common-static_{version}_amd64.deb" +CLICKHOUSE_COMMON_STATIC_DBG_PACKAGE_NAME = ( "clickhouse-common-static-dbg_{version}_amd64.deb" ) -CLICKHOUSE_SERVER_PACKET_NAME = "clickhouse-server_{version}_amd64.deb" -CLICKHOUSE_SERVER_PACKET_FALLBACK = "clickhouse-server_{version}_all.deb" -CLICKHOUSE_CLIENT_PACKET_NAME = "clickhouse-client_{version}_amd64.deb" -CLICKHOUSE_CLIENT_PACKET_FALLBACK = "clickhouse-client_{version}_all.deb" +CLICKHOUSE_SERVER_PACKAGE_NAME = "clickhouse-server_{version}_amd64.deb" +CLICKHOUSE_SERVER_PACKAGE_FALLBACK = "clickhouse-server_{version}_all.deb" +CLICKHOUSE_CLIENT_PACKAGE_NAME = "clickhouse-client_{version}_amd64.deb" +CLICKHOUSE_CLIENT_PACKAGE_FALLBACK = "clickhouse-client_{version}_all.deb" -PACKETS_DIR = "previous_release_package_folder/" +PACKAGES_DIR = "previous_release_package_folder/" VERSION_PATTERN = r"((?:\d+\.)?(?:\d+\.)?(?:\d+\.)?\d+-[a-zA-Z]*)" -def download_packet(url, out_path, retries=10, backoff_factor=0.3): +def download_package(url, out_path, retries=10, backoff_factor=0.3): session = requests.Session() retry = Retry( - total=retries, read=retries, connect=retries, backoff_factor=backoff_factor + total=retries, + read=retries, + connect=retries, + backoff_factor=backoff_factor, + status_forcelist=[500, 502, 503, 504], ) adapter = HTTPAdapter(max_retries=retry) session.mount("http://", adapter) @@ -43,7 +47,7 @@ def download_packet(url, out_path, retries=10, backoff_factor=0.3): fd.write(response.content) -def download_packets(release, dest_path=PACKETS_DIR): +def download_packages(release, dest_path=PACKAGES_DIR): if not os.path.exists(dest_path): os.makedirs(dest_path) @@ -53,35 +57,35 @@ def download_packets(release, dest_path=PACKETS_DIR): return os.path.join(dest_path, pkg_name) for pkg in ( - CLICKHOUSE_COMMON_STATIC_PACKET_NAME, - CLICKHOUSE_COMMON_STATIC_DBG_PACKET_NAME, + CLICKHOUSE_COMMON_STATIC_PACKAGE_NAME, + CLICKHOUSE_COMMON_STATIC_DBG_PACKAGE_NAME, ): url = (DOWNLOAD_PREFIX + pkg).format(version=release.version, type=release.type) pkg_name = get_dest_path(pkg.format(version=release.version)) - download_packet(url, pkg_name) + download_package(url, pkg_name) for pkg, fallback in ( - (CLICKHOUSE_SERVER_PACKET_NAME, CLICKHOUSE_SERVER_PACKET_FALLBACK), - (CLICKHOUSE_CLIENT_PACKET_NAME, CLICKHOUSE_CLIENT_PACKET_FALLBACK), + (CLICKHOUSE_SERVER_PACKAGE_NAME, CLICKHOUSE_SERVER_PACKAGE_FALLBACK), + (CLICKHOUSE_CLIENT_PACKAGE_NAME, CLICKHOUSE_CLIENT_PACKAGE_FALLBACK), ): url = (DOWNLOAD_PREFIX + pkg).format(version=release.version, type=release.type) pkg_name = get_dest_path(pkg.format(version=release.version)) try: - download_packet(url, pkg_name) + download_package(url, pkg_name) except Exception: url = (DOWNLOAD_PREFIX + fallback).format( version=release.version, type=release.type ) pkg_name = get_dest_path(fallback.format(version=release.version)) - download_packet(url, pkg_name) + download_package(url, pkg_name) def download_last_release(dest_path): current_release = get_previous_release(None) - download_packets(current_release, dest_path=dest_path) + download_packages(current_release, dest_path=dest_path) if __name__ == "__main__": logging.basicConfig(level=logging.INFO) release = ReleaseInfo(input()) - download_packets(release) + download_packages(release) diff --git a/tests/ci/fast_test_check.py b/tests/ci/fast_test_check.py index 2a6a0d5fa57..0f4c1b19707 100644 --- a/tests/ci/fast_test_check.py +++ b/tests/ci/fast_test_check.py @@ -155,7 +155,7 @@ if __name__ == "__main__": if not os.path.exists(logs_path): os.makedirs(logs_path) - run_log_path = os.path.join(logs_path, "runlog.log") + run_log_path = os.path.join(logs_path, "run.log") with TeePopen(run_cmd, run_log_path, timeout=40 * 60) as process: retcode = process.wait() if retcode == 0: diff --git a/tests/ci/functional_test_check.py b/tests/ci/functional_test_check.py index 87833d688af..a9c692db0d0 100644 --- a/tests/ci/functional_test_check.py +++ b/tests/ci/functional_test_check.py @@ -16,7 +16,7 @@ from s3_helper import S3Helper from get_robot_token import get_best_robot_token from pr_info import FORCE_TESTS_LABEL, PRInfo from build_download_helper import download_all_deb_packages -from download_release_packets import download_last_release +from download_release_packages import download_last_release from upload_result_helper import upload_results from docker_pull_helper import get_image_with_version from commit_status_helper import ( @@ -207,16 +207,16 @@ if __name__ == "__main__": args = parse_args() check_name = args.check_name kill_timeout = args.kill_timeout - validate_bugix_check = args.validate_bugfix + validate_bugfix_check = args.validate_bugfix flaky_check = "flaky" in check_name.lower() - run_changed_tests = flaky_check or validate_bugix_check + run_changed_tests = flaky_check or validate_bugfix_check gh = Github(get_best_robot_token(), per_page=100) - # For validate_bugix_check we need up to date information about labels, so pr_event_from_api is used + # For validate_bugfix_check we need up to date information about labels, so pr_event_from_api is used pr_info = PRInfo( - need_changed_files=run_changed_tests, pr_event_from_api=validate_bugix_check + need_changed_files=run_changed_tests, pr_event_from_api=validate_bugfix_check ) atexit.register(update_mergeable_check, gh, pr_info, check_name) @@ -224,7 +224,7 @@ if __name__ == "__main__": if not os.path.exists(temp_path): os.makedirs(temp_path) - if validate_bugix_check and "pr-bugfix" not in pr_info.labels: + if validate_bugfix_check and "pr-bugfix" not in pr_info.labels: if args.post_commit_status == "file": post_commit_status_to_file( os.path.join(temp_path, "post_commit_status.tsv"), @@ -256,7 +256,7 @@ if __name__ == "__main__": tests_to_run = get_tests_to_run(pr_info) if not tests_to_run: commit = get_commit(gh, pr_info.sha) - state = override_status("success", check_name, validate_bugix_check) + state = override_status("success", check_name, validate_bugfix_check) if args.post_commit_status == "commit_status": commit.create_status( context=check_name_with_group, @@ -279,7 +279,7 @@ if __name__ == "__main__": if not os.path.exists(packages_path): os.makedirs(packages_path) - if validate_bugix_check: + if validate_bugfix_check: download_last_release(packages_path) else: download_all_deb_packages(check_name, reports_path, packages_path) @@ -292,12 +292,12 @@ if __name__ == "__main__": if not os.path.exists(result_path): os.makedirs(result_path) - run_log_path = os.path.join(result_path, "runlog.log") + run_log_path = os.path.join(result_path, "run.log") additional_envs = get_additional_envs( check_name, run_by_hash_num, run_by_hash_total ) - if validate_bugix_check: + if validate_bugfix_check: additional_envs.append("GLOBAL_TAGS=no-random-settings") run_command = get_run_command( @@ -327,7 +327,7 @@ if __name__ == "__main__": state, description, test_results, additional_logs = process_results( result_path, server_log_path ) - state = override_status(state, check_name, invert=validate_bugix_check) + state = override_status(state, check_name, invert=validate_bugfix_check) ch_helper = ClickHouseHelper() mark_flaky_tests(ch_helper, check_name, test_results) diff --git a/tests/ci/integration_test_check.py b/tests/ci/integration_test_check.py index e61117a4b45..31fae9a578b 100644 --- a/tests/ci/integration_test_check.py +++ b/tests/ci/integration_test_check.py @@ -16,7 +16,7 @@ from s3_helper import S3Helper from get_robot_token import get_best_robot_token from pr_info import PRInfo from build_download_helper import download_all_deb_packages -from download_release_packets import download_last_release +from download_release_packages import download_last_release from upload_result_helper import upload_results from docker_pull_helper import get_images_with_versions from commit_status_helper import ( @@ -153,7 +153,7 @@ if __name__ == "__main__": args = parse_args() check_name = args.check_name - validate_bugix_check = args.validate_bugfix + validate_bugfix_check = args.validate_bugfix if "RUN_BY_HASH_NUM" in os.environ: run_by_hash_num = int(os.getenv("RUN_BY_HASH_NUM", "0")) @@ -171,13 +171,13 @@ if __name__ == "__main__": is_flaky_check = "flaky" in check_name - # For validate_bugix_check we need up to date information about labels, so pr_event_from_api is used + # For validate_bugfix_check we need up to date information about labels, so pr_event_from_api is used pr_info = PRInfo( - need_changed_files=is_flaky_check or validate_bugix_check, - pr_event_from_api=validate_bugix_check, + need_changed_files=is_flaky_check or validate_bugfix_check, + pr_event_from_api=validate_bugfix_check, ) - if validate_bugix_check and "pr-bugfix" not in pr_info.labels: + if validate_bugfix_check and "pr-bugfix" not in pr_info.labels: if args.post_commit_status == "file": post_commit_status_to_file( os.path.join(temp_path, "post_commit_status.tsv"), @@ -209,7 +209,7 @@ if __name__ == "__main__": if not os.path.exists(build_path): os.makedirs(build_path) - if validate_bugix_check: + if validate_bugfix_check: download_last_release(build_path) else: download_all_deb_packages(check_name, reports_path, build_path) @@ -252,7 +252,7 @@ if __name__ == "__main__": subprocess.check_call(f"sudo chown -R ubuntu:ubuntu {temp_path}", shell=True) state, description, test_results, additional_logs = process_results(result_path) - state = override_status(state, check_name, invert=validate_bugix_check) + state = override_status(state, check_name, invert=validate_bugfix_check) ch_helper = ClickHouseHelper() mark_flaky_tests(ch_helper, check_name, test_results) diff --git a/tests/ci/jepsen_check.py b/tests/ci/jepsen_check.py index 69964c0a0bc..3ddc0089791 100644 --- a/tests/ci/jepsen_check.py +++ b/tests/ci/jepsen_check.py @@ -251,7 +251,7 @@ if __name__ == "__main__": ) logging.info("Going to run jepsen: %s", cmd) - run_log_path = os.path.join(TEMP_PATH, "runlog.log") + run_log_path = os.path.join(TEMP_PATH, "run.log") with TeePopen(cmd, run_log_path) as process: retcode = process.wait() diff --git a/tests/ci/performance_comparison_check.py b/tests/ci/performance_comparison_check.py index acde5be5814..667c80110cd 100644 --- a/tests/ci/performance_comparison_check.py +++ b/tests/ci/performance_comparison_check.py @@ -176,7 +176,7 @@ if __name__ == "__main__": ) logging.info("Going to run command %s", run_command) - run_log_path = os.path.join(temp_path, "runlog.log") + run_log_path = os.path.join(temp_path, "run.log") popen_env = os.environ.copy() popen_env.update(env_extra) @@ -198,7 +198,7 @@ if __name__ == "__main__": "all-query-metrics.tsv": os.path.join( result_path, "report/all-query-metrics.tsv" ), - "runlog.log": run_log_path, + "run.log": run_log_path, } s3_prefix = f"{pr_info.number}/{pr_info.sha}/{check_name_prefix}/" @@ -253,8 +253,8 @@ if __name__ == "__main__": report_url = GITHUB_RUN_URL - if uploaded["runlog.log"]: - report_url = uploaded["runlog.log"] + if uploaded["run.log"]: + report_url = uploaded["run.log"] if uploaded["compare.log"]: report_url = uploaded["compare.log"] diff --git a/tests/ci/sqlancer_check.py b/tests/ci/sqlancer_check.py index 5e94969d4b1..ce6d89a7267 100644 --- a/tests/ci/sqlancer_check.py +++ b/tests/ci/sqlancer_check.py @@ -95,7 +95,7 @@ if __name__ == "__main__": run_command = get_run_command(build_url, workspace_path, docker_image) logging.info("Going to run %s", run_command) - run_log_path = os.path.join(workspace_path, "runlog.log") + run_log_path = os.path.join(workspace_path, "run.log") with open(run_log_path, "w", encoding="utf-8") as log: with subprocess.Popen( run_command, shell=True, stderr=log, stdout=log diff --git a/tests/ci/stress_check.py b/tests/ci/stress_check.py index b7f74c5aeb7..37277538867 100644 --- a/tests/ci/stress_check.py +++ b/tests/ci/stress_check.py @@ -34,6 +34,7 @@ def get_run_command( "docker run --cap-add=SYS_PTRACE " # a static link, don't use S3_URL or S3_DOWNLOAD "-e S3_URL='https://s3.amazonaws.com/clickhouse-datasets' " + f"-e DISABLE_BC_CHECK={os.environ.get('DISABLE_BC_CHECK', '0')} " # For dmesg and sysctl "--privileged " f"--volume={build_path}:/package_folder " @@ -138,7 +139,7 @@ if __name__ == "__main__": if not os.path.exists(result_path): os.makedirs(result_path) - run_log_path = os.path.join(temp_path, "runlog.log") + run_log_path = os.path.join(temp_path, "run.log") run_command = get_run_command( packages_path, result_path, repo_tests_path, server_log_path, docker_image diff --git a/tests/ci/unit_tests_check.py b/tests/ci/unit_tests_check.py index 4777296da18..7c4fa0e9fe4 100644 --- a/tests/ci/unit_tests_check.py +++ b/tests/ci/unit_tests_check.py @@ -140,7 +140,7 @@ if __name__ == "__main__": run_command = f"docker run --cap-add=SYS_PTRACE --volume={tests_binary_path}:/unit_tests_dbms --volume={test_output}:/test_output {docker_image}" - run_log_path = os.path.join(test_output, "runlog.log") + run_log_path = os.path.join(test_output, "run.log") logging.info("Going to run func tests: %s", run_command) diff --git a/tests/ci/workflow_approve_rerun_lambda/app.py b/tests/ci/workflow_approve_rerun_lambda/app.py index d285e29943d..86a16adc31a 100644 --- a/tests/ci/workflow_approve_rerun_lambda/app.py +++ b/tests/ci/workflow_approve_rerun_lambda/app.py @@ -25,7 +25,7 @@ MAX_RETRY = 5 # Number of times a check can re-run as a whole. # It is needed, because we are using AWS "spot" instances, that are terminated often -MAX_WORKFLOW_RERUN = 20 +MAX_WORKFLOW_RERUN = 30 WorkflowDescription = namedtuple( "WorkflowDescription", diff --git a/tests/clickhouse-test b/tests/clickhouse-test index 20e63412d91..13669981daa 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -111,12 +111,21 @@ def clickhouse_execute_http( if default_format is not None: params["default_format"] = default_format - client.request( - "POST", - f"/?{base_args.client_options_query_str}{urllib.parse.urlencode(params)}", - ) - res = client.getresponse() - data = res.read() + for i in range(MAX_RETRIES): + try: + client.request( + "POST", + f"/?{base_args.client_options_query_str}{urllib.parse.urlencode(params)}", + ) + res = client.getresponse() + data = res.read() + break + except Exception as ex: + if i == MAX_RETRIES - 1: + raise ex + + sleep(i + 1) + if res.status != 200: raise HTTPError(data.decode(), res.status) @@ -456,6 +465,7 @@ class SettingsRandomizer: "merge_tree_coarse_index_granularity": lambda: random.randint(2, 32), "optimize_distinct_in_order": lambda: random.randint(0, 1), "optimize_sorting_by_input_stream_properties": lambda: random.randint(0, 1), + "enable_memory_bound_merging_of_aggregation_results": lambda: random.randint(0, 1), } @staticmethod @@ -1000,18 +1010,28 @@ class TestCase: seconds_left = max( args.timeout - (datetime.now() - start_time).total_seconds(), 20 ) + drop_database_query = "DROP DATABASE " + database + if args.replicated_database: + drop_database_query += " ON CLUSTER test_cluster_database_replicated" + try: - drop_database_query = "DROP DATABASE " + database - if args.replicated_database: - drop_database_query += " ON CLUSTER test_cluster_database_replicated" - clickhouse_execute( - args, - drop_database_query, - timeout=seconds_left, - settings={ - "log_comment": args.testcase_basename, - }, - ) + # It's possible to get an error "New table appeared in database being dropped or detached. Try again." + for _ in range(1, 60): + try: + clickhouse_execute( + args, + drop_database_query, + timeout=seconds_left, + settings={ + "log_comment": args.testcase_basename, + }, + ) + except HTTPError as e: + if need_retry(args, e.message, e.message, 0): + continue + raise + break + except socket.timeout: total_time = (datetime.now() - start_time).total_seconds() return ( @@ -1098,7 +1118,7 @@ class TestCase: args, self.get_description_from_exception_info(sys.exc_info()) ), ) - except (ConnectionRefusedError, ConnectionResetError): + except (ConnectionError, http.client.ImproperConnectionState): return TestResult( self.name, TestStatus.FAIL, @@ -1524,7 +1544,7 @@ def check_server_started(args): print(" OK") sys.stdout.flush() return True - except (ConnectionRefusedError, ConnectionResetError): + except (ConnectionError, http.client.ImproperConnectionState): print(".", end="") sys.stdout.flush() retry_count -= 1 @@ -1534,7 +1554,7 @@ def check_server_started(args): print("\nConnection timeout, will not retry") break except Exception as e: - print("\nUexpected exception, will not retry: ", str(e)) + print("\nUexpected exception, will not retry: ", type(e).__name__, ": ", str(e)) break print("\nAll connection tries failed") @@ -2260,7 +2280,7 @@ if __name__ == "__main__": if find_binary(args.binary + "-client"): args.client = args.binary + "-client" - print("Using " + args.client + " as client program (expecting split build)") + print("Using " + args.client + " as client program") elif find_binary(args.binary): args.client = args.binary + " client" diff --git a/tests/config/config.d/storage_conf.xml b/tests/config/config.d/storage_conf.xml index 8226d801cef..bc9269e6ec1 100644 --- a/tests/config/config.d/storage_conf.xml +++ b/tests/config/config.d/storage_conf.xml @@ -100,7 +100,7 @@ 22548578304 0 1 - 100 + 100 cache @@ -109,6 +109,15 @@ 1000 1 + + cache + s3_disk_6 + s3_cache_small_segment_size/ + 22548578304 + 10Ki + 0 + 1 + local @@ -234,6 +243,13 @@ + + +
+ s3_cache_small_segment_size +
+
+
diff --git a/tests/config/config.d/zookeeper_fault_injection.xml b/tests/config/config.d/zookeeper_fault_injection.xml index 45d3cc8193d..1f13155a130 100644 --- a/tests/config/config.d/zookeeper_fault_injection.xml +++ b/tests/config/config.d/zookeeper_fault_injection.xml @@ -4,7 +4,6 @@ localhost 9181 - + + + local + /tiny_local_cache/ + +
+ + + + +
+ tiny_local_cache +
+
+
+
+ + + tiny_local_cache + diff --git a/tests/integration/test_temporary_data_in_cache/test.py b/tests/integration/test_temporary_data_in_cache/test.py new file mode 100644 index 00000000000..0e8c7305405 --- /dev/null +++ b/tests/integration/test_temporary_data_in_cache/test.py @@ -0,0 +1,81 @@ +# pylint: disable=unused-argument +# pylint: disable=redefined-outer-name + +import pytest + +from helpers.cluster import ClickHouseCluster +from helpers.client import QueryRuntimeException + +cluster = ClickHouseCluster(__file__) + +node = cluster.add_instance( + "node", + main_configs=["configs/config.d/storage_configuration.xml"], + tmpfs=["/local_disk:size=50M", "/tiny_local_cache:size=12M"], +) + + +@pytest.fixture(scope="module") +def start_cluster(): + try: + cluster.start() + yield cluster + finally: + cluster.shutdown() + + +def test_cache_evicted_by_temporary_data(start_cluster): + q = node.query + qi = lambda query: int(node.query(query).strip()) + + cache_size_initial = qi("SELECT sum(size) FROM system.filesystem_cache") + assert cache_size_initial == 0 + + free_space_initial = qi( + "SELECT free_space FROM system.disks WHERE name = 'tiny_local_cache_local_disk'" + ) + assert free_space_initial > 8 * 1024 * 1024 + + q( + "CREATE TABLE t1 (x UInt64) ENGINE = MergeTree ORDER BY x SETTINGS storage_policy = 'tiny_local_cache'" + ) + q("INSERT INTO t1 SELECT number FROM numbers(1024 * 1024)") + + # To be sure that nothing is reading the cache and entries for t1 can be evited + q("OPTIMIZE TABLE t1 FINAL") + q("SYSTEM STOP MERGES t1") + + # Read some data to fill the cache + q("SELECT sum(x) FROM t1") + + cache_size_with_t1 = qi("SELECT sum(size) FROM system.filesystem_cache") + assert cache_size_with_t1 > 8 * 1024 * 1024 + + # Almost all disk space is occupied by t1 cache + free_space_with_t1 = qi( + "SELECT free_space FROM system.disks WHERE name = 'tiny_local_cache_local_disk'" + ) + assert free_space_with_t1 < 4 * 1024 * 1024 + + # Try to sort the table, but fail because of lack of disk space + with pytest.raises(QueryRuntimeException) as exc: + q( + "SELECT ignore(*) FROM numbers(10 * 1024 * 1024) ORDER BY sipHash64(number)", + settings={ + "max_bytes_before_external_group_by": "4M", + "max_bytes_before_external_sort": "4M", + }, + ) + assert "Failed to reserve space for the file cache" in str(exc.value) + + # Some data evicted from cache by temporary data + cache_size_after_eviction = qi("SELECT sum(size) FROM system.filesystem_cache") + assert cache_size_after_eviction < cache_size_with_t1 + + # Disk space freed, at least 3 MB, because temporary data tried to write 4 MB + free_space_after_eviction = qi( + "SELECT free_space FROM system.disks WHERE name = 'tiny_local_cache_local_disk'" + ) + assert free_space_after_eviction > free_space_with_t1 + 3 * 1024 * 1024 + + q("DROP TABLE IF EXISTS t1") diff --git a/tests/integration/test_tmp_policy/test.py b/tests/integration/test_tmp_policy/test.py index c919d9a0c3d..870a70b127a 100644 --- a/tests/integration/test_tmp_policy/test.py +++ b/tests/integration/test_tmp_policy/test.py @@ -23,7 +23,7 @@ def start_cluster(): cluster.shutdown() -def test_different_versions(start_cluster): +def test_disk_selection(start_cluster): query = "SELECT count(ignore(*)) FROM (SELECT * FROM system.numbers LIMIT 1e7) GROUP BY number" settings = { "max_bytes_before_external_group_by": 1 << 20, diff --git a/tests/performance/explain_ast.xml b/tests/performance/explain_ast.xml index 0daa748de83..5bcdd96c10e 100644 --- a/tests/performance/explain_ast.xml +++ b/tests/performance/explain_ast.xml @@ -1,6 +1,6 @@ - - + + 0, c3, NULL)) AS c3_q, +quantiles(0.25, 0.5, 0.75)(if(c4 > 0, c4, NULL)) AS c4_q, +quantiles(0.25, 0.5, 0.75)(t.c17 / t.c19) AS c5_q, +quantiles(0.25, 0.5, 0.75)(c6) AS c6_q, +quantiles(0.25, 0.5, 0.75)(c7) AS c7_q, +quantiles(0.25, 0.5, 0.75)(c8) AS c8_q, +quantiles(0.25, 0.5, 0.75)(c9) AS c9_q, +quantiles(0.25, 0.5, 0.75)(c10) AS c10_q, +quantiles(0.25, 0.5, 0.75)(c11) AS c11_q, +quantiles(0.25, 0.5, 0.75)(c12) AS c12_q, +quantiles(0.25, 0.5, 0.75)(c13) AS c13_q, +quantiles(0.25, 0.5, 0.75)(if(t.c24 > 0, t.c24, NULL) / t.c19) AS c14_q, +quantiles(0.25, 0.5, 0.75)(if(t.c24 > 0, t.c24, NULL) / t.c17) AS c15_q, +quantiles(0.25, 0.5, 0.75)(t.c16) AS c16_q, +quantiles(0.25, 0.5, 0.75)(t.c17) AS c17_q, +quantiles(0.25, 0.5, 0.75)(if(t.c18 > 0, t.c18, NULL)) AS c18_q, +max(if(c3 > 0, c3, NULL)) AS c3_max, +min(if(c3 > 0, c3, NULL)) AS c3_min, +avg(if(c3 > 0, c3, NULL)) AS c3_avg, +max(if(c4 > 0, c4, NULL)) AS c4_max, +min(if(c4 > 0, c4, NULL)) AS c4_min, +avg(if(c4 > 0, c4, NULL)) AS c4_avg, +max(t.c17 / t.c19) AS c5_max, +min(t.c17 / t.c19) AS c5_min, +avg(t.c17 / t.c19) AS c5_avg, +max(if(c6 > 0, c6, NULL)) AS c6_max, +min(if(c6 > 0, c6, NULL)) AS c6_min, +avg(if(c6 > 0, c6, NULL)) AS c6_avg, +max(if(c7 > 0, c7, NULL)) AS c7_max, +min(if(c7 > 0, c7, NULL)) AS c7_min, +avg(if(c7 > 0, c7, NULL)) AS c7_avg, +max(if(c10 > 0, c10, NULL)) AS c10_max, +min(if(c10 > 0, c10, NULL)) AS c10_min, +avg(if(c10 > 0, c10, NULL)) AS c10_avg, +max(if(c8 > 0, c8, NULL)) AS c8_max, +min(if(c8 > 0, c8, NULL)) AS c8_min, +avg(if(c8 > 0, c8, NULL)) AS c8_avg, +max(if(c9 > 0, c9, NULL)) AS c9_max, +min(if(c9 > 0, c9, NULL)) AS c9_min, +avg(if(c9 > 0, c9, NULL)) AS c9_avg, +max(if(c11 > 0, c11, NULL)) AS c11_max, +min(if(c11 > 0, c11, NULL)) AS c11_min, +avg(if(c11 > 0, c11, NULL)) AS c11_avg, +max(if(c12 > 0, c12, NULL)) AS c12_max, +min(if(c12 > 0, c12, NULL)) AS c12_min, +avg(if(c12 > 0, c12, NULL)) AS c12_avg, +max(if(c13 > 0, c13, NULL)) AS c13_max, +min(if(c13 > 0, c13, NULL)) AS c13_min, +avg(if(c13 > 0, c13, NULL)) AS c13_avg, +max(if(t.c24 > 0, t.c24, NULL) / t.c19) AS c14_max, +min(if(t.c24 > 0, t.c24, NULL) / t.c19) AS c14_min, +avg(if(t.c24 > 0, t.c24, NULL) / t.c19) AS c14_avg, +max(if(t.c24 > 0, t.c24, NULL) / t.c17) AS c15_max, +min(if(t.c24 > 0, t.c24, NULL) / t.c17) AS c15_min, +avg(if(t.c24 > 0, t.c24, NULL) / t.c17) AS c15_avg, +max(t.c16) AS c16_max, +min(t.c16) AS c16_min, +avg(t.c16) AS c16_avg, +max(t.c17) AS c17_max, +min(t.c17) AS c17_min, +avg(t.c17) AS c17_avg, +max(if(t.c18 > 0, t.c18, NULL)) AS c18_max, +min(if(t.c18 > 0, t.c18, NULL)) AS c18_min, +avg(if(t.c18 > 0, t.c18, NULL)) AS c18_avg, +sum(t.c19) AS c19, +sum(if(t.c18 > 0, t.c18, NULL)) AS c18, +sum(t.c16) AS c16, +sum(c23) AS c23, +sum(t.c17) AS c17, +sum(if(t.c24 > 0, t.c24, NULL)) AS c24, +c24 / c19 AS c14, +c24 / c17 AS c15, +median(if(isNotNull(c29) AND (t.c22 > 0), c13 * (t.c22 / c29), NULL)) AS c21, +sum(c22) AS c22 +FROM +( +SELECT +c27, +c39 AS c1, +c29, +c19, +c23, +c17, +c16, +c18, +c22, +c24, +c3, +c4, +c8, +c9, +c10, +c11, +c12, +c13, +c6, +c7 +FROM +( +SELECT +c27, +uniqExact(c30, c31) AS c19, +uniqExact(c30, c31, c32) AS c23, +uniqExactIf(c30, c31, c33 IN ('c37', 'c38')) AS c17, +countIf(c33 IN ('c37', 'c38')) AS c16, +countIf(c33 = 'c39') AS c18, +coalesce(sumIf(c29, c33 = 'c39'), 0) AS c22, +coalesce(sumIf(c37, c33 = 'c39'), 0) AS c24, +if((c18 > 0) AND (c19 > 0), c18 / c19, NULL) AS c3, +if(c17 != 0, c18 / c17, NULL) AS c4, +coalesce(avgIf(c34, (c34 > 0) AND (c33 IN ('c37', 'c38'))), NULL) AS c8, +coalesce(avgIf(c35, (c35 > 0) AND (c33 IN ('c37', 'c38'))), NULL) AS c9, +coalesce(avgIf(c34, (c34 > 0) AND (c33 = 'c39')), NULL) AS c10, +coalesce(avgIf(c35, (c35 > 0) AND (c33 = 'c39')), NULL) AS c11, +coalesce(avgIf(c37, c33 = 'c39'), NULL) AS c12, +coalesce(avgIf(c37 / c34, (c34 > 0) AND (c33 = 'c39')), NULL) AS c13, +coalesce(avgIf(c37, (c37 > 0) AND (c33 IN ('c37', 'c38'))), NULL) AS c6, +coalesce(minIf(c37, (c37 > 0) AND (c33 IN ('c37', 'c38')) AND (c37 > (c36 / 2))), NULL) AS c7 +FROM +( +SELECT +c27, +c30, +c32, +c31, +NULL AS c29, +NULL AS c33, +NULL AS c37, +NULL AS c34, +NULL AS c35 +FROM +( +SELECT +c27, +c30, +c32, +c31 +FROM database.t1 +PREWHERE ((c32 >= parseDateTimeBestEffort('2020-01-01')) AND (c32 <= parseDateTimeBestEffort('2020-01-01 23:59:59'))) AND (c27 IN +( +SELECT comp_c27 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +groupArray(c39) AS c39, +any(c40) AS c40, +any(c41) AS c41, +any(c42) AS c42, +any(c29) AS c29, +any(c43) AS c43, +any(c44) AS c44, +any(min_c32) AS min_c32, +any(max_c32) AS max_c32, +any(c45) AS c45, +any(c46) AS c46, +any(c38) AS c38, +any(c47) AS c47 +FROM +( +SELECT +c27 AS comp_c27, +if(comp_c27 = 0, toDate('2010-01-01'), toDate(minMerge(min_c32))) AS min_c32, +if(comp_c27 = 0, toDate(now()), toDate(maxMerge(max_c32))) + 1 AS max_c32, +NULL AS c39, +NULL AS c40, +NULL AS c41, +NULL AS c42, +NULL AS c29, +NULL AS c43, +NULL AS c44, +NULL AS c45, +NULL AS c46, +NULL AS c38, +NULL AS c47 +FROM database.table15 +GROUP BY comp_c27 +UNION ALL +SELECT +comp_c27, +NULL AS min_c32, +NULL AS max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38, +c47 +FROM +( +SELECT +c39, +comp_c27 AS c27, +comp_c27, +c40, +c41, +assumeNotNull(c45) AS c45, +assumeNotNull(c46) AS c46, +assumeNotNull(c38) AS c38, +joinGet('database.table14', 'c48', c40) AS c42, +joinGet('database.table14', 'c29', c40) AS c29, +joinGet('database.table14', 'c43', c40) AS c43, +joinGet('database.table14', 'property_c44', c40) AS c44, +splitByChar(',', assumeNotNull(joinGet('database.jointable13', 'prefix_c33', comp_c27))) AS c33s, +joinGet('database.jointable13', 'c47', comp_c27) AS c47 +FROM +( +SELECT +c39, +comp_c27, +joinGet('database.jointable13', 'c40', comp_c27) AS c40, +c41, +c45, +c46, +c38 +FROM +( +SELECT +c39, +arrayJoin(arrayMap(x -> toInt64(x), arrayFilter(x -> (length(x) > 0), splitByString(', ', c49)))) AS comp_c27, +c41, +c45, +c46, +c38 +FROM +( +SELECT +'c57' AS c39, +toString(c27) AS c49, +1 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE c27 IN (322) +UNION ALL +SELECT +'c58' AS c39, +arrayStringConcat(groupArray(toString(c27)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE chain_id IN +( +SELECT chain_id +FROM database.table12 +WHERE c27 IN (322) +) +UNION ALL +SELECT +'c59' AS c39, +assumeNotNull(c27s_str) AS c49, +0 AS c41, +c50 AS c45, +c51 AS c46, +c52 AS c38 +FROM +( +SELECT * +FROM table11 +WHERE c27 IN (322) +) AS c1s_c59 +WHERE c27 IN (322) +UNION ALL +SELECT +'superSupercalifragilisticexpialidocious' AS c39, +arrayStringConcat(groupArray(toString(c1_id)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table10 +WHERE c27 IN (322) +) AS table9 +) +) AS a +) AS table8 +) AS table7 +GROUP BY comp_c27 +) AS table6 +WHERE (parseDateTimeBestEffort('2020-01-01') >= min_c32) AND (max_c32 >= (parseDateTimeBestEffort('2021-05-02') - 2)) +) AS table5 +ARRAY JOIN c39 +WHERE isNotNull(c39) +) AS table4 +)) +WHERE c61 = 0 +) AS table25 +UNION ALL +SELECT +c27, +c30, +c32, +c31, +c29, +c33, +c37, +c34, +c35 +FROM +( +SELECT +c27, +c30, +c32, +'c37' AS c33, +coalesce(c37 * joinGet('database.table18', 'c60', concat(c26, '_', 'CH')), 0) AS c37, +if(c53 > 0, c53, 2) AS c53, +c54, +if(c29 > 0, c29, 1) AS c29, +c55, +c56, +datediff('day', c55, c56) AS c34, +datediff('day', c32, c55) AS c35, +c31 +FROM database.table24 +PREWHERE ((c32 >= parseDateTimeBestEffort('2020-01-01')) AND (c32 <= parseDateTimeBestEffort('2020-01-01 23:59:59'))) AND (c27 IN +( +SELECT comp_c27 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +groupArray(c39) AS c39, +any(c40) AS c40, +any(c41) AS c41, +any(c42) AS c42, +any(c29) AS c29, +any(c43) AS c43, +any(c44) AS c44, +any(min_c32) AS min_c32, +any(max_c32) AS max_c32, +any(c45) AS c45, +any(c46) AS c46, +any(c38) AS c38, +any(c47) AS c47 +FROM +( +SELECT +c27 AS comp_c27, +if(comp_c27 = 0, toDate('2010-01-01'), toDate(minMerge(min_c32))) AS min_c32, +if(comp_c27 = 0, toDate(now()), toDate(maxMerge(max_c32))) + 1 AS max_c32, +NULL AS c39, +NULL AS c40, +NULL AS c41, +NULL AS c42, +NULL AS c29, +NULL AS c43, +NULL AS c44, +NULL AS c45, +NULL AS c46, +NULL AS c38, +NULL AS c47 +FROM database.table15 +GROUP BY comp_c27 +UNION ALL +SELECT +comp_c27, +NULL AS min_c32, +NULL AS max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38, +c47 +FROM +( +SELECT +c39, +comp_c27 AS c27, +comp_c27, +c40, +c41, +assumeNotNull(c45) AS c45, +assumeNotNull(c46) AS c46, +assumeNotNull(c38) AS c38, +joinGet('database.table14', 'c48', c40) AS c42, +joinGet('database.table14', 'c29', c40) AS c29, +joinGet('database.table14', 'c43', c40) AS c43, +joinGet('database.table14', 'property_c44', c40) AS c44, +splitByChar(',', assumeNotNull(joinGet('database.jointable13', 'prefix_c33', comp_c27))) AS c33s, +joinGet('database.jointable13', 'c47', comp_c27) AS c47 +FROM +( +SELECT +c39, +comp_c27, +joinGet('database.jointable13', 'c40', comp_c27) AS c40, +c41, +c45, +c46, +c38 +FROM +( +SELECT +c39, +arrayJoin(arrayMap(x -> toInt64(x), arrayFilter(x -> (length(x) > 0), splitByString(', ', c49)))) AS comp_c27, +c41, +c45, +c46, +c38 +FROM +( +SELECT +'c57' AS c39, +toString(c27) AS c49, +1 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE c27 IN (322) +UNION ALL +SELECT +'c58' AS c39, +arrayStringConcat(groupArray(toString(c27)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE chain_id IN +( +SELECT chain_id +FROM database.table12 +WHERE c27 IN (322) +) +UNION ALL +SELECT +'c59' AS c39, +assumeNotNull(c27s_str) AS c49, +0 AS c41, +c50 AS c45, +c51 AS c46, +c52 AS c38 +FROM +( +SELECT * +FROM table11 +WHERE c27 IN (322) +) AS c1s_c59 +WHERE c27 IN (322) +UNION ALL +SELECT +'superSupercalifragilisticexpialidocious' AS c39, +arrayStringConcat(groupArray(toString(c1_id)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table10 +WHERE c27 IN (322) +) AS table9 +) +) AS a +) AS table8 +) AS table7 +GROUP BY comp_c27 +) AS table6 +WHERE (parseDateTimeBestEffort('2020-01-01') >= min_c32) AND (max_c32 >= (parseDateTimeBestEffort('2021-05-02') - 2)) +) AS table5 +ARRAY JOIN c39 +WHERE isNotNull(c39) +) AS table4 +)) +WHERE (c61 = 0) AND (c37 < (666 * (1 / joinGet('database.table18', 'c60', concat(c26, '_', 'CH'))))) +) AS table23 +UNION ALL +SELECT +c27, +c30, +c32, +c31, +c29, +c33, +c37, +c34, +c35 +FROM +( +SELECT +c27, +c30, +c32, +'c39' AS c33, +coalesce(c37 * joinGet('database.table18', 'c60', concat(c26, '_', 'CH')), 0) AS c37, +if(c53 > 0, c53, 2) AS c53, +c54, +if(c29 > 0, c29, 1) AS c29, +c55, +c56, +datediff('day', c55, c56) AS c34, +datediff('day', c32, c55) AS c35, +c31 +FROM database.table22 +PREWHERE ((c32 >= parseDateTimeBestEffort('2020-01-01')) AND (c32 <= parseDateTimeBestEffort('2020-01-01 23:59:59'))) AND (c27 IN +( +SELECT comp_c27 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +groupArray(c39) AS c39, +any(c40) AS c40, +any(c41) AS c41, +any(c42) AS c42, +any(c29) AS c29, +any(c43) AS c43, +any(c44) AS c44, +any(min_c32) AS min_c32, +any(max_c32) AS max_c32, +any(c45) AS c45, +any(c46) AS c46, +any(c38) AS c38, +any(c47) AS c47 +FROM +( +SELECT +c27 AS comp_c27, +if(comp_c27 = 0, toDate('2010-01-01'), toDate(minMerge(min_c32))) AS min_c32, +if(comp_c27 = 0, toDate(now()), toDate(maxMerge(max_c32))) + 1 AS max_c32, +NULL AS c39, +NULL AS c40, +NULL AS c41, +NULL AS c42, +NULL AS c29, +NULL AS c43, +NULL AS c44, +NULL AS c45, +NULL AS c46, +NULL AS c38, +NULL AS c47 +FROM database.table15 +GROUP BY comp_c27 +UNION ALL +SELECT +comp_c27, +NULL AS min_c32, +NULL AS max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38, +c47 +FROM +( +SELECT +c39, +comp_c27 AS c27, +comp_c27, +c40, +c41, +assumeNotNull(c45) AS c45, +assumeNotNull(c46) AS c46, +assumeNotNull(c38) AS c38, +joinGet('database.table14', 'c48', c40) AS c42, +joinGet('database.table14', 'c29', c40) AS c29, +joinGet('database.table14', 'c43', c40) AS c43, +joinGet('database.table14', 'property_c44', c40) AS c44, +splitByChar(',', assumeNotNull(joinGet('database.jointable13', 'prefix_c33', comp_c27))) AS c33s, +joinGet('database.jointable13', 'c47', comp_c27) AS c47 +FROM +( +SELECT +c39, +comp_c27, +joinGet('database.jointable13', 'c40', comp_c27) AS c40, +c41, +c45, +c46, +c38 +FROM +( +SELECT +c39, +arrayJoin(arrayMap(x -> toInt64(x), arrayFilter(x -> (length(x) > 0), splitByString(', ', c49)))) AS comp_c27, +c41, +c45, +c46, +c38 +FROM +( +SELECT +'c57' AS c39, +toString(c27) AS c49, +1 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE c27 IN (322) +UNION ALL +SELECT +'c58' AS c39, +arrayStringConcat(groupArray(toString(c27)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE chain_id IN +( +SELECT chain_id +FROM database.table12 +WHERE c27 IN (322) +) +UNION ALL +SELECT +'c59' AS c39, +assumeNotNull(c27s_str) AS c49, +0 AS c41, +c50 AS c45, +c51 AS c46, +c52 AS c38 +FROM +( +SELECT * +FROM table11 +WHERE c27 IN (322) +) AS c1s_c59 +WHERE c27 IN (322) +UNION ALL +SELECT +'superSupercalifragilisticexpialidocious' AS c39, +arrayStringConcat(groupArray(toString(c1_id)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table10 +WHERE c27 IN (322) +) AS table9 +) +) AS a +) AS table8 +) AS table7 +GROUP BY comp_c27 +) AS table6 +WHERE (parseDateTimeBestEffort('2020-01-01') >= min_c32) AND (max_c32 >= (parseDateTimeBestEffort('2021-05-02') - 2)) +) AS table5 +ARRAY JOIN c39 +WHERE isNotNull(c39) +) AS table4 +)) +WHERE (c61 = 0) AND (c37 < (666 * (1 / joinGet('database.table18', 'c60', concat(c26, '_', 'CH'))))) +) AS table21 +) AS table20 +ALL LEFT JOIN +( +SELECT +c27, +avgMerge(avg_c37) * joinGet('database.table18', 'c60', concat('USD', '_', 'CH')) AS c36 +FROM database.table19 +PREWHERE c27 IN +( +SELECT comp_c27 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +groupArray(c39) AS c39, +any(c40) AS c40, +any(c41) AS c41, +any(c42) AS c42, +any(c29) AS c29, +any(c43) AS c43, +any(c44) AS c44, +any(min_c32) AS min_c32, +any(max_c32) AS max_c32, +any(c45) AS c45, +any(c46) AS c46, +any(c38) AS c38, +any(c47) AS c47 +FROM +( +SELECT +c27 AS comp_c27, +if(comp_c27 = 0, toDate('2010-01-01'), toDate(minMerge(min_c32))) AS min_c32, +if(comp_c27 = 0, toDate(now()), toDate(maxMerge(max_c32))) + 1 AS max_c32, +NULL AS c39, +NULL AS c40, +NULL AS c41, +NULL AS c42, +NULL AS c29, +NULL AS c43, +NULL AS c44, +NULL AS c45, +NULL AS c46, +NULL AS c38, +NULL AS c47 +FROM database.table15 +GROUP BY comp_c27 +UNION ALL +SELECT +comp_c27, +NULL AS min_c32, +NULL AS max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38, +c47 +FROM +( +SELECT +c39, +comp_c27 AS c27, +comp_c27, +c40, +c41, +assumeNotNull(c45) AS c45, +assumeNotNull(c46) AS c46, +assumeNotNull(c38) AS c38, +joinGet('database.table14', 'c48', c40) AS c42, +joinGet('database.table14', 'c29', c40) AS c29, +joinGet('database.table14', 'c43', c40) AS c43, +joinGet('database.table14', 'property_c44', c40) AS c44, +splitByChar(',', assumeNotNull(joinGet('database.jointable13', 'prefix_c33', comp_c27))) AS c33s, +joinGet('database.jointable13', 'c47', comp_c27) AS c47 +FROM +( +SELECT +c39, +comp_c27, +joinGet('database.jointable13', 'c40', comp_c27) AS c40, +c41, +c45, +c46, +c38 +FROM +( +SELECT +c39, +arrayJoin(arrayMap(x -> toInt64(x), arrayFilter(x -> (length(x) > 0), splitByString(', ', c49)))) AS comp_c27, +c41, +c45, +c46, +c38 +FROM +( +SELECT +'c57' AS c39, +toString(c27) AS c49, +1 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE c27 IN (322) +UNION ALL +SELECT +'c58' AS c39, +arrayStringConcat(groupArray(toString(c27)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE chain_id IN +( +SELECT chain_id +FROM database.table12 +WHERE c27 IN (322) +) +UNION ALL +SELECT +'c59' AS c39, +assumeNotNull(c27s_str) AS c49, +0 AS c41, +c50 AS c45, +c51 AS c46, +c52 AS c38 +FROM +( +SELECT * +FROM table11 +WHERE c27 IN (322) +) AS c1s_c59 +WHERE c27 IN (322) +UNION ALL +SELECT +'superSupercalifragilisticexpialidocious' AS c39, +arrayStringConcat(groupArray(toString(c1_id)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table10 +WHERE c27 IN (322) +) AS table9 +) +) AS a +) AS table8 +) AS table7 +GROUP BY comp_c27 +) AS table6 +WHERE (parseDateTimeBestEffort('2020-01-01') >= min_c32) AND (max_c32 >= (parseDateTimeBestEffort('2021-05-02') - 2)) +) AS table5 +ARRAY JOIN c39 +WHERE isNotNull(c39) +) AS table4 +) +WHERE date > (now() - toIntervalMonth(3)) +GROUP BY c27 +) AS table17 USING (c27) +GROUP BY c27 +) AS table16 +ALL LEFT JOIN +( +SELECT +comp_c27 AS c27, +assumeNotNull(c39) AS c39, +c29 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +groupArray(c39) AS c39, +any(c40) AS c40, +any(c41) AS c41, +any(c42) AS c42, +any(c29) AS c29, +any(c43) AS c43, +any(c44) AS c44, +any(min_c32) AS min_c32, +any(max_c32) AS max_c32, +any(c45) AS c45, +any(c46) AS c46, +any(c38) AS c38, +any(c47) AS c47 +FROM +( +SELECT +c27 AS comp_c27, +if(comp_c27 = 0, toDate('2010-01-01'), toDate(minMerge(min_c32))) AS min_c32, +if(comp_c27 = 0, toDate(now()), toDate(maxMerge(max_c32))) + 1 AS max_c32, +NULL AS c39, +NULL AS c40, +NULL AS c41, +NULL AS c42, +NULL AS c29, +NULL AS c43, +NULL AS c44, +NULL AS c45, +NULL AS c46, +NULL AS c38, +NULL AS c47 +FROM database.table15 +GROUP BY comp_c27 +UNION ALL +SELECT +comp_c27, +NULL AS min_c32, +NULL AS max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38, +c47 +FROM +( +SELECT +c39, +comp_c27 AS c27, +comp_c27, +c40, +c41, +assumeNotNull(c45) AS c45, +assumeNotNull(c46) AS c46, +assumeNotNull(c38) AS c38, +joinGet('database.table14', 'c48', c40) AS c42, +joinGet('database.table14', 'c29', c40) AS c29, +joinGet('database.table14', 'c43', c40) AS c43, +joinGet('database.table14', 'property_c44', c40) AS c44, +splitByChar(',', assumeNotNull(joinGet('database.jointable13', 'prefix_c33', comp_c27))) AS c33s, +joinGet('database.jointable13', 'c47', comp_c27) AS c47 +FROM +( +SELECT +c39, +comp_c27, +joinGet('database.jointable13', 'c40', comp_c27) AS c40, +c41, +c45, +c46, +c38 +FROM +( +SELECT +c39, +arrayJoin(arrayMap(x -> toInt64(x), arrayFilter(x -> (length(x) > 0), splitByString(', ', c49)))) AS comp_c27, +c41, +c45, +c46, +c38 +FROM +( +SELECT +'c57' AS c39, +toString(c27) AS c49, +1 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE c27 IN (322) +UNION ALL +SELECT +'c58' AS c39, +arrayStringConcat(groupArray(toString(c27)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE chain_id IN +( +SELECT chain_id +FROM database.table12 +WHERE c27 IN (322) +) +UNION ALL +SELECT +'c59' AS c39, +assumeNotNull(c27s_str) AS c49, +0 AS c41, +c50 AS c45, +c51 AS c46, +c52 AS c38 +FROM +( +SELECT * +FROM table11 +WHERE c27 IN (322) +) AS c1s_c59 +WHERE c27 IN (322) +UNION ALL +SELECT +'superSupercalifragilisticexpialidocious' AS c39, +arrayStringConcat(groupArray(toString(c1_id)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table10 +WHERE c27 IN (322) +) AS table9 +) +) AS a +) AS table8 +) AS table7 +GROUP BY comp_c27 +) AS table6 +WHERE (parseDateTimeBestEffort('2020-01-01') >= min_c32) AND (max_c32 >= (parseDateTimeBestEffort('2021-05-02') - 2)) +) AS table5 +ARRAY JOIN c39 +WHERE isNotNull(c39) +) AS table4 +) USING (c27) +) AS t +ALL LEFT JOIN +( +SELECT +c1, +c2 +FROM +( +SELECT +c39 AS c1, +groupArray(comp_c27) AS c49, +multiIf(c1 = 'c58', if(length(c49) <= 2, 0, 1), c1 = 'c57', 1, if(length(c49) <= 3, 0, 1)) AS c2 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +groupArray(c39) AS c39, +any(c40) AS c40, +any(c41) AS c41, +any(c42) AS c42, +any(c29) AS c29, +any(c43) AS c43, +any(c44) AS c44, +any(min_c32) AS min_c32, +any(max_c32) AS max_c32, +any(c45) AS c45, +any(c46) AS c46, +any(c38) AS c38, +any(c47) AS c47 +FROM +( +SELECT +c27 AS comp_c27, +if(comp_c27 = 0, toDate('2010-01-01'), toDate(minMerge(min_c32))) AS min_c32, +if(comp_c27 = 0, toDate(now()), toDate(maxMerge(max_c32))) + 1 AS max_c32, +NULL AS c39, +NULL AS c40, +NULL AS c41, +NULL AS c42, +NULL AS c29, +NULL AS c43, +NULL AS c44, +NULL AS c45, +NULL AS c46, +NULL AS c38, +NULL AS c47 +FROM database.table15 +GROUP BY comp_c27 +UNION ALL +SELECT +comp_c27, +NULL AS min_c32, +NULL AS max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38, +c47 +FROM +( +SELECT +c39, +comp_c27 AS c27, +comp_c27, +c40, +c41, +assumeNotNull(c45) AS c45, +assumeNotNull(c46) AS c46, +assumeNotNull(c38) AS c38, +joinGet('database.table14', 'c48', c40) AS c42, +joinGet('database.table14', 'c29', c40) AS c29, +joinGet('database.table14', 'c43', c40) AS c43, +joinGet('database.table14', 'property_c44', c40) AS c44, +splitByChar(',', assumeNotNull(joinGet('database.jointable13', 'prefix_c33', comp_c27))) AS c33s, +joinGet('database.jointable13', 'c47', comp_c27) AS c47 +FROM +( +SELECT +c39, +comp_c27, +joinGet('database.jointable13', 'c40', comp_c27) AS c40, +c41, +c45, +c46, +c38 +FROM +( +SELECT +c39, +arrayJoin(arrayMap(x -> toInt64(x), arrayFilter(x -> (length(x) > 0), splitByString(', ', c49)))) AS comp_c27, +c41, +c45, +c46, +c38 +FROM +( +SELECT +'c57' AS c39, +toString(c27) AS c49, +1 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE c27 IN (322) +UNION ALL +SELECT +'c58' AS c39, +arrayStringConcat(groupArray(toString(c27)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE chain_id IN +( +SELECT chain_id +FROM database.table12 +WHERE c27 IN (322) +) +UNION ALL +SELECT +'c59' AS c39, +assumeNotNull(c27s_str) AS c49, +0 AS c41, +c50 AS c45, +c51 AS c46, +c52 AS c38 +FROM +( +SELECT * +FROM table11 +WHERE c27 IN (322) +) AS c1s_c59 +WHERE c27 IN (322) +UNION ALL +SELECT +'superSupercalifragilisticexpialidocious' AS c39, +arrayStringConcat(groupArray(toString(c1_id)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table10 +WHERE c27 IN (322) +) AS table9 +) +) AS a +) AS table8 +) AS table7 +GROUP BY comp_c27 +) AS table6 +WHERE (parseDateTimeBestEffort('2020-01-01') >= min_c32) AND (max_c32 >= (parseDateTimeBestEffort('2021-05-02') - 2)) +) AS table5 +ARRAY JOIN c39 +WHERE isNotNull(c39) +) AS table4 +GROUP BY c39 +) AS table3 +) USING (c1) +GROUP BY +c1, +c2 +) AS table2 +ORDER BY c1 ASC +) AS table1 +UNION ALL +SELECT * +FROM +( +SELECT +c1, +c2, +c3_q[1] AS c3_q1, +c3_q[3] AS c3_q3, +c3_q[2] AS c3_median, +least(c3_max, c3_q3 + (1.5 * (c3_q3 - c3_q1))) AS c3_max, +greatest(c3_min, c3_q1 - (1.5 * (c3_q3 - c3_q1))) AS c3_min, +c3_avg, +c4_q[1] AS c4_q1, +c4_q[3] AS c4_q3, +c4_q[2] AS c4_median, +least(c4_max, c4_q3 + (1.5 * (c4_q3 - c4_q1))) AS c4_max, +greatest(c4_min, c4_q1 - (1.5 * (c4_q3 - c4_q1))) AS c4_min, +c4_avg, +c5_q[1] AS c5_q1, +c5_q[3] AS c5_q3, +c5_q[2] AS c5_median, +least(c5_max, c5_q3 + (1.5 * (c5_q3 - c5_q1))) AS c5_max, +greatest(c5_min, c5_q1 - (1.5 * (c5_q3 - c5_q1))) AS c5_min, +c5_avg, +c6_q[1] AS c6_q1, +c6_q[3] AS c6_q3, +c6_q[2] AS c6_median, +least(c6_max, c6_q3 + (1.5 * (c6_q3 - c6_q1))) AS c6_max, +greatest(c6_min, c6_q1 - (1.5 * (c6_q3 - c6_q1))) AS c6_min, +c6_avg, +c7_q[1] AS c7_q1, +c7_q[3] AS c7_q3, +c7_q[2] AS c7_median, +least(c7_max, c7_q3 + (1.5 * (c7_q3 - c7_q1))) AS c7_max, +greatest(c7_min, c7_q1 - (1.5 * (c7_q3 - c7_q1))) AS c7_min, +c7_avg, +c8_q[1] AS c8_q1, +c8_q[3] AS c8_q3, +c8_q[2] AS c8_median, +least(c8_max, c8_q3 + (1.5 * (c8_q3 - c8_q1))) AS c8_max, +greatest(c8_min, c8_q1 - (1.5 * (c8_q3 - c8_q1))) AS c8_min, +c8_avg, +c9_q[1] AS c9_q1, +c9_q[3] AS c9_q3, +c9_q[2] AS c9_median, +least(c9_max, c9_q3 + (1.5 * (c9_q3 - c9_q1))) AS c9_max, +greatest(c9_min, c9_q1 - (1.5 * (c9_q3 - c9_q1))) AS c9_min, +c9_avg, +c10_q[1] AS c10_q1, +c10_q[3] AS c10_q3, +c10_q[2] AS c10_median, +least(c10_max, c10_q3 + (1.5 * (c10_q3 - c10_q1))) AS c10_max, +greatest(c10_min, c10_q1 - (1.5 * (c10_q3 - c10_q1))) AS c10_min, +c10_avg, +c10_avg, +c11_q[1] AS c11_q1, +c11_q[3] AS c11_q3, +c11_q[2] AS c11_median, +least(c11_max, c11_q3 + (1.5 * (c11_q3 - c11_q1))) AS c11_max, +greatest(c11_min, c11_q1 - (1.5 * (c11_q3 - c11_q1))) AS c11_min, +c11_avg, +c12_q[1] AS c12_q1, +c12_q[3] AS c12_q3, +c12_q[2] AS c12_median, +least(c12_max, c12_q3 + (1.5 * (c12_q3 - c12_q1))) AS c12_max, +greatest(c12_min, c12_q1 - (1.5 * (c12_q3 - c12_q1))) AS c12_min, +c12_avg, +c13_q[1] AS c13_q1, +c13_q[3] AS c13_q3, +c13_q[2] AS c13_median, +least(c13_max, c13_q3 + (1.5 * (c13_q3 - c13_q1))) AS c13_max, +greatest(c13_min, c13_q1 - (1.5 * (c13_q3 - c13_q1))) AS c13_min, +c13_avg, +c14_q[1] AS c14_q1, +c14_q[3] AS c14_q3, +c14_q[2] AS c14_median, +least(c14_max, c14_q3 + (1.5 * (c14_q3 - c14_q1))) AS c14_max, +greatest(c14_min, c14_q1 - (1.5 * (c14_q3 - c14_q1))) AS c14_min, +c14_avg, +c15_q[1] AS c15_q1, +c15_q[3] AS c15_q3, +c15_q[2] AS c15_median, +least(c15_max, c15_q3 + (1.5 * (c15_q3 - c15_q1))) AS c15_max, +greatest(c15_min, c15_q1 - (1.5 * (c15_q3 - c15_q1))) AS c15_min, +c15_avg, +c16_q[1] AS c16_q1, +c16_q[3] AS c16_q3, +c16_q[2] AS c16_median, +least(toFloat64(c16_max), c16_q3 + (1.5 * (c16_q3 - c16_q1))) AS c16_max, +greatest(toFloat64(c16_min), c16_q1 - (1.5 * (c16_q3 - c16_q1))) AS c16_min, +c16_avg, +c17_q[1] AS c17_q1, +c17_q[3] AS c17_q3, +c17_q[2] AS c17_median, +least(toFloat64(c17_max), c17_q3 + (1.5 * (c17_q3 - c17_q1))) AS c17_max, +greatest(toFloat64(c17_min), c17_q1 - (1.5 * (c17_q3 - c17_q1))) AS c17_min, +c17_avg, +c18_q[1] AS c18_q1, +c18_q[3] AS c18_q3, +c18_q[2] AS c18_median, +least(toFloat64(c18_max), c18_q3 + (1.5 * (c18_q3 - c18_q1))) AS c18_max, +greatest(toFloat64(c18_min), c18_q1 - (1.5 * (c18_q3 - c18_q1))) AS c18_min, +c18_avg, +round(if(c19 != 0, c24 / c19, 0), 2) AS c20, +c21, +c22, +c23 AS c23, +c19 AS c19, +c16 AS c16, +c17 AS c17, +c18 AS c18, +round(c24, 2) AS c24, +round(if(c17 != 0, c24 / c17, 0), 2) AS c25, +'CH' AS c26 +FROM +( +SELECT +c1, +c2, +groupUniqArray(c27) AS c28, +groupUniqArrayIf(c27, isNotNull(c29)) AS c28_with_c29, +quantiles(0.25, 0.5, 0.75)(if(c3 > 0, c3, NULL)) AS c3_q, +quantiles(0.25, 0.5, 0.75)(if(c4 > 0, c4, NULL)) AS c4_q, +quantiles(0.25, 0.5, 0.75)(t.c17 / t.c19) AS c5_q, +quantiles(0.25, 0.5, 0.75)(c6) AS c6_q, +quantiles(0.25, 0.5, 0.75)(c7) AS c7_q, +quantiles(0.25, 0.5, 0.75)(c8) AS c8_q, +quantiles(0.25, 0.5, 0.75)(c9) AS c9_q, +quantiles(0.25, 0.5, 0.75)(c10) AS c10_q, +quantiles(0.25, 0.5, 0.75)(c11) AS c11_q, +quantiles(0.25, 0.5, 0.75)(c12) AS c12_q, +quantiles(0.25, 0.5, 0.75)(c13) AS c13_q, +quantiles(0.25, 0.5, 0.75)(if(t.c24 > 0, t.c24, NULL) / t.c19) AS c14_q, +quantiles(0.25, 0.5, 0.75)(if(t.c24 > 0, t.c24, NULL) / t.c17) AS c15_q, +quantiles(0.25, 0.5, 0.75)(t.c16) AS c16_q, +quantiles(0.25, 0.5, 0.75)(t.c17) AS c17_q, +quantiles(0.25, 0.5, 0.75)(if(t.c18 > 0, t.c18, NULL)) AS c18_q, +max(if(c3 > 0, c3, NULL)) AS c3_max, +min(if(c3 > 0, c3, NULL)) AS c3_min, +avg(if(c3 > 0, c3, NULL)) AS c3_avg, +max(if(c4 > 0, c4, NULL)) AS c4_max, +min(if(c4 > 0, c4, NULL)) AS c4_min, +avg(if(c4 > 0, c4, NULL)) AS c4_avg, +max(t.c17 / t.c19) AS c5_max, +min(t.c17 / t.c19) AS c5_min, +avg(t.c17 / t.c19) AS c5_avg, +max(if(c6 > 0, c6, NULL)) AS c6_max, +min(if(c6 > 0, c6, NULL)) AS c6_min, +avg(if(c6 > 0, c6, NULL)) AS c6_avg, +max(if(c7 > 0, c7, NULL)) AS c7_max, +min(if(c7 > 0, c7, NULL)) AS c7_min, +avg(if(c7 > 0, c7, NULL)) AS c7_avg, +max(if(c10 > 0, c10, NULL)) AS c10_max, +min(if(c10 > 0, c10, NULL)) AS c10_min, +avg(if(c10 > 0, c10, NULL)) AS c10_avg, +max(if(c8 > 0, c8, NULL)) AS c8_max, +min(if(c8 > 0, c8, NULL)) AS c8_min, +avg(if(c8 > 0, c8, NULL)) AS c8_avg, +max(if(c9 > 0, c9, NULL)) AS c9_max, +min(if(c9 > 0, c9, NULL)) AS c9_min, +avg(if(c9 > 0, c9, NULL)) AS c9_avg, +max(if(c11 > 0, c11, NULL)) AS c11_max, +min(if(c11 > 0, c11, NULL)) AS c11_min, +avg(if(c11 > 0, c11, NULL)) AS c11_avg, +max(if(c12 > 0, c12, NULL)) AS c12_max, +min(if(c12 > 0, c12, NULL)) AS c12_min, +avg(if(c12 > 0, c12, NULL)) AS c12_avg, +max(if(c13 > 0, c13, NULL)) AS c13_max, +min(if(c13 > 0, c13, NULL)) AS c13_min, +avg(if(c13 > 0, c13, NULL)) AS c13_avg, +max(if(t.c24 > 0, t.c24, NULL) / t.c19) AS c14_max, +min(if(t.c24 > 0, t.c24, NULL) / t.c19) AS c14_min, +avg(if(t.c24 > 0, t.c24, NULL) / t.c19) AS c14_avg, +max(if(t.c24 > 0, t.c24, NULL) / t.c17) AS c15_max, +min(if(t.c24 > 0, t.c24, NULL) / t.c17) AS c15_min, +avg(if(t.c24 > 0, t.c24, NULL) / t.c17) AS c15_avg, +max(t.c16) AS c16_max, +min(t.c16) AS c16_min, +avg(t.c16) AS c16_avg, +max(t.c17) AS c17_max, +min(t.c17) AS c17_min, +avg(t.c17) AS c17_avg, +max(if(t.c18 > 0, t.c18, NULL)) AS c18_max, +min(if(t.c18 > 0, t.c18, NULL)) AS c18_min, +avg(if(t.c18 > 0, t.c18, NULL)) AS c18_avg, +sum(t.c19) AS c19, +sum(if(t.c18 > 0, t.c18, NULL)) AS c18, +sum(t.c16) AS c16, +sum(c23) AS c23, +sum(t.c17) AS c17, +sum(if(t.c24 > 0, t.c24, NULL)) AS c24, +c24 / c19 AS c14, +c24 / c17 AS c15, +median(if(isNotNull(c29) AND (t.c22 > 0), c13 * (t.c22 / c29), NULL)) AS c21, +sum(c22) AS c22 +FROM +( +SELECT +c27, +c39 AS c1, +c29, +c19, +c23, +c17, +c16, +c18, +c22, +c24, +c3, +c4, +c8, +c9, +c10, +c11, +c12, +c13, +c6, +c7 +FROM +( +SELECT +c27, +uniqExact(c30, c31) AS c19, +uniqExact(c30, c31, c32) AS c23, +uniqExactIf(c30, c31, c33 IN ('c37', 'c38')) AS c17, +countIf(c33 IN ('c37', 'c38')) AS c16, +countIf(c33 = 'c39') AS c18, +coalesce(sumIf(c29, c33 = 'c39'), 0) AS c22, +coalesce(sumIf(c37, c33 = 'c39'), 0) AS c24, +if((c18 > 0) AND (c19 > 0), c18 / c19, NULL) AS c3, +if(c17 != 0, c18 / c17, NULL) AS c4, +coalesce(avgIf(c34, (c34 > 0) AND (c33 IN ('c37', 'c38'))), NULL) AS c8, +coalesce(avgIf(c35, (c35 > 0) AND (c33 IN ('c37', 'c38'))), NULL) AS c9, +coalesce(avgIf(c34, (c34 > 0) AND (c33 = 'c39')), NULL) AS c10, +coalesce(avgIf(c35, (c35 > 0) AND (c33 = 'c39')), NULL) AS c11, +coalesce(avgIf(c37, c33 = 'c39'), NULL) AS c12, +coalesce(avgIf(c37 / c34, (c34 > 0) AND (c33 = 'c39')), NULL) AS c13, +coalesce(avgIf(c37, (c37 > 0) AND (c33 IN ('c37', 'c38'))), NULL) AS c6, +coalesce(minIf(c37, (c37 > 0) AND (c33 IN ('c37', 'c38')) AND (c37 > (c36 / 2))), NULL) AS c7 +FROM +( +SELECT +c27, +c30, +c32, +c31, +NULL AS c29, +NULL AS c33, +NULL AS c37, +NULL AS c34, +NULL AS c35 +FROM +( +SELECT +c27, +c30, +c32, +c31 +FROM database.t1 +PREWHERE ((c32 >= parseDateTimeBestEffort('2020-01-01')) AND (c32 <= parseDateTimeBestEffort('2020-01-01 23:59:59'))) AND (c27 IN +( +SELECT comp_c27 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +groupArray(c39) AS c39, +any(c40) AS c40, +any(c41) AS c41, +any(c42) AS c42, +any(c29) AS c29, +any(c43) AS c43, +any(c44) AS c44, +any(min_c32) AS min_c32, +any(max_c32) AS max_c32, +any(c45) AS c45, +any(c46) AS c46, +any(c38) AS c38, +any(c47) AS c47 +FROM +( +SELECT +c27 AS comp_c27, +if(comp_c27 = 0, toDate('2010-01-01'), toDate(minMerge(min_c32))) AS min_c32, +if(comp_c27 = 0, toDate(now()), toDate(maxMerge(max_c32))) + 1 AS max_c32, +NULL AS c39, +NULL AS c40, +NULL AS c41, +NULL AS c42, +NULL AS c29, +NULL AS c43, +NULL AS c44, +NULL AS c45, +NULL AS c46, +NULL AS c38, +NULL AS c47 +FROM database.table15 +GROUP BY comp_c27 +UNION ALL +SELECT +comp_c27, +NULL AS min_c32, +NULL AS max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38, +c47 +FROM +( +SELECT +c39, +comp_c27 AS c27, +comp_c27, +c40, +c41, +assumeNotNull(c45) AS c45, +assumeNotNull(c46) AS c46, +assumeNotNull(c38) AS c38, +joinGet('database.table14', 'c48', c40) AS c42, +joinGet('database.table14', 'c29', c40) AS c29, +joinGet('database.table14', 'c43', c40) AS c43, +joinGet('database.table14', 'property_c44', c40) AS c44, +splitByChar(',', assumeNotNull(joinGet('database.jointable13', 'prefix_c33', comp_c27))) AS c33s, +joinGet('database.jointable13', 'c47', comp_c27) AS c47 +FROM +( +SELECT +c39, +comp_c27, +joinGet('database.jointable13', 'c40', comp_c27) AS c40, +c41, +c45, +c46, +c38 +FROM +( +SELECT +c39, +arrayJoin(arrayMap(x -> toInt64(x), arrayFilter(x -> (length(x) > 0), splitByString(', ', c49)))) AS comp_c27, +c41, +c45, +c46, +c38 +FROM +( +SELECT +'c57' AS c39, +toString(c27) AS c49, +1 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE c27 IN (322) +UNION ALL +SELECT +'c58' AS c39, +arrayStringConcat(groupArray(toString(c27)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE chain_id IN +( +SELECT chain_id +FROM database.table12 +WHERE c27 IN (322) +) +UNION ALL +SELECT +'c59' AS c39, +assumeNotNull(c27s_str) AS c49, +0 AS c41, +c50 AS c45, +c51 AS c46, +c52 AS c38 +FROM +( +SELECT * +FROM table11 +WHERE c27 IN (322) +) AS c1s_c59 +WHERE c27 IN (322) +UNION ALL +SELECT +'superSupercalifragilisticexpialidocious' AS c39, +arrayStringConcat(groupArray(toString(c1_id)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table10 +WHERE c27 IN (322) +) AS table9 +) +) AS a +) AS table8 +) AS table7 +GROUP BY comp_c27 +) AS table6 +WHERE (parseDateTimeBestEffort('2020-01-01') >= min_c32) AND (max_c32 >= (parseDateTimeBestEffort('2021-05-02') - 2)) +) AS table5 +ARRAY JOIN c39 +WHERE isNotNull(c39) +) AS table4 +)) +WHERE c61 = 0 +) AS table25 +UNION ALL +SELECT +c27, +c30, +c32, +c31, +c29, +c33, +c37, +c34, +c35 +FROM +( +SELECT +c27, +c30, +c32, +'c37' AS c33, +coalesce(c37 * joinGet('database.table18', 'c60', concat(c26, '_', 'CH')), 0) AS c37, +if(c53 > 0, c53, 2) AS c53, +c54, +if(c29 > 0, c29, 1) AS c29, +c55, +c56, +datediff('day', c55, c56) AS c34, +datediff('day', c32, c55) AS c35, +c31 +FROM database.table24 +PREWHERE ((c32 >= parseDateTimeBestEffort('2020-01-01')) AND (c32 <= parseDateTimeBestEffort('2020-01-01 23:59:59'))) AND (c27 IN +( +SELECT comp_c27 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +groupArray(c39) AS c39, +any(c40) AS c40, +any(c41) AS c41, +any(c42) AS c42, +any(c29) AS c29, +any(c43) AS c43, +any(c44) AS c44, +any(min_c32) AS min_c32, +any(max_c32) AS max_c32, +any(c45) AS c45, +any(c46) AS c46, +any(c38) AS c38, +any(c47) AS c47 +FROM +( +SELECT +c27 AS comp_c27, +if(comp_c27 = 0, toDate('2010-01-01'), toDate(minMerge(min_c32))) AS min_c32, +if(comp_c27 = 0, toDate(now()), toDate(maxMerge(max_c32))) + 1 AS max_c32, +NULL AS c39, +NULL AS c40, +NULL AS c41, +NULL AS c42, +NULL AS c29, +NULL AS c43, +NULL AS c44, +NULL AS c45, +NULL AS c46, +NULL AS c38, +NULL AS c47 +FROM database.table15 +GROUP BY comp_c27 +UNION ALL +SELECT +comp_c27, +NULL AS min_c32, +NULL AS max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38, +c47 +FROM +( +SELECT +c39, +comp_c27 AS c27, +comp_c27, +c40, +c41, +assumeNotNull(c45) AS c45, +assumeNotNull(c46) AS c46, +assumeNotNull(c38) AS c38, +joinGet('database.table14', 'c48', c40) AS c42, +joinGet('database.table14', 'c29', c40) AS c29, +joinGet('database.table14', 'c43', c40) AS c43, +joinGet('database.table14', 'property_c44', c40) AS c44, +splitByChar(',', assumeNotNull(joinGet('database.jointable13', 'prefix_c33', comp_c27))) AS c33s, +joinGet('database.jointable13', 'c47', comp_c27) AS c47 +FROM +( +SELECT +c39, +comp_c27, +joinGet('database.jointable13', 'c40', comp_c27) AS c40, +c41, +c45, +c46, +c38 +FROM +( +SELECT +c39, +arrayJoin(arrayMap(x -> toInt64(x), arrayFilter(x -> (length(x) > 0), splitByString(', ', c49)))) AS comp_c27, +c41, +c45, +c46, +c38 +FROM +( +SELECT +'c57' AS c39, +toString(c27) AS c49, +1 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE c27 IN (322) +UNION ALL +SELECT +'c58' AS c39, +arrayStringConcat(groupArray(toString(c27)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE chain_id IN +( +SELECT chain_id +FROM database.table12 +WHERE c27 IN (322) +) +UNION ALL +SELECT +'c59' AS c39, +assumeNotNull(c27s_str) AS c49, +0 AS c41, +c50 AS c45, +c51 AS c46, +c52 AS c38 +FROM +( +SELECT * +FROM table11 +WHERE c27 IN (322) +) AS c1s_c59 +WHERE c27 IN (322) +UNION ALL +SELECT +'superSupercalifragilisticexpialidocious' AS c39, +arrayStringConcat(groupArray(toString(c1_id)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table10 +WHERE c27 IN (322) +) AS table9 +) +) AS a +) AS table8 +) AS table7 +GROUP BY comp_c27 +) AS table6 +WHERE (parseDateTimeBestEffort('2020-01-01') >= min_c32) AND (max_c32 >= (parseDateTimeBestEffort('2021-05-02') - 2)) +) AS table5 +ARRAY JOIN c39 +WHERE isNotNull(c39) +) AS table4 +)) +WHERE (c61 = 0) AND (c37 < (666 * (1 / joinGet('database.table18', 'c60', concat(c26, '_', 'CH'))))) +) AS table23 +UNION ALL +SELECT +c27, +c30, +c32, +c31, +c29, +c33, +c37, +c34, +c35 +FROM +( +SELECT +c27, +c30, +c32, +'c39' AS c33, +coalesce(c37 * joinGet('database.table18', 'c60', concat(c26, '_', 'CH')), 0) AS c37, +if(c53 > 0, c53, 2) AS c53, +c54, +if(c29 > 0, c29, 1) AS c29, +c55, +c56, +datediff('day', c55, c56) AS c34, +datediff('day', c32, c55) AS c35, +c31 +FROM database.table22 +PREWHERE ((c32 >= parseDateTimeBestEffort('2020-01-01')) AND (c32 <= parseDateTimeBestEffort('2020-01-01 23:59:59'))) AND (c27 IN +( +SELECT comp_c27 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +groupArray(c39) AS c39, +any(c40) AS c40, +any(c41) AS c41, +any(c42) AS c42, +any(c29) AS c29, +any(c43) AS c43, +any(c44) AS c44, +any(min_c32) AS min_c32, +any(max_c32) AS max_c32, +any(c45) AS c45, +any(c46) AS c46, +any(c38) AS c38, +any(c47) AS c47 +FROM +( +SELECT +c27 AS comp_c27, +if(comp_c27 = 0, toDate('2010-01-01'), toDate(minMerge(min_c32))) AS min_c32, +if(comp_c27 = 0, toDate(now()), toDate(maxMerge(max_c32))) + 1 AS max_c32, +NULL AS c39, +NULL AS c40, +NULL AS c41, +NULL AS c42, +NULL AS c29, +NULL AS c43, +NULL AS c44, +NULL AS c45, +NULL AS c46, +NULL AS c38, +NULL AS c47 +FROM database.table15 +GROUP BY comp_c27 +UNION ALL +SELECT +comp_c27, +NULL AS min_c32, +NULL AS max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38, +c47 +FROM +( +SELECT +c39, +comp_c27 AS c27, +comp_c27, +c40, +c41, +assumeNotNull(c45) AS c45, +assumeNotNull(c46) AS c46, +assumeNotNull(c38) AS c38, +joinGet('database.table14', 'c48', c40) AS c42, +joinGet('database.table14', 'c29', c40) AS c29, +joinGet('database.table14', 'c43', c40) AS c43, +joinGet('database.table14', 'property_c44', c40) AS c44, +splitByChar(',', assumeNotNull(joinGet('database.jointable13', 'prefix_c33', comp_c27))) AS c33s, +joinGet('database.jointable13', 'c47', comp_c27) AS c47 +FROM +( +SELECT +c39, +comp_c27, +joinGet('database.jointable13', 'c40', comp_c27) AS c40, +c41, +c45, +c46, +c38 +FROM +( +SELECT +c39, +arrayJoin(arrayMap(x -> toInt64(x), arrayFilter(x -> (length(x) > 0), splitByString(', ', c49)))) AS comp_c27, +c41, +c45, +c46, +c38 +FROM +( +SELECT +'c57' AS c39, +toString(c27) AS c49, +1 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE c27 IN (322) +UNION ALL +SELECT +'c58' AS c39, +arrayStringConcat(groupArray(toString(c27)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE chain_id IN +( +SELECT chain_id +FROM database.table12 +WHERE c27 IN (322) +) +UNION ALL +SELECT +'c59' AS c39, +assumeNotNull(c27s_str) AS c49, +0 AS c41, +c50 AS c45, +c51 AS c46, +c52 AS c38 +FROM +( +SELECT * +FROM table11 +WHERE c27 IN (322) +) AS c1s_c59 +WHERE c27 IN (322) +UNION ALL +SELECT +'superSupercalifragilisticexpialidocious' AS c39, +arrayStringConcat(groupArray(toString(c1_id)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table10 +WHERE c27 IN (322) +) AS table9 +) +) AS a +) AS table8 +) AS table7 +GROUP BY comp_c27 +) AS table6 +WHERE (parseDateTimeBestEffort('2020-01-01') >= min_c32) AND (max_c32 >= (parseDateTimeBestEffort('2021-05-02') - 2)) +) AS table5 +ARRAY JOIN c39 +WHERE isNotNull(c39) +) AS table4 +)) +WHERE (c61 = 0) AND (c37 < (666 * (1 / joinGet('database.table18', 'c60', concat(c26, '_', 'CH'))))) +) AS table21 +) AS table20 +ALL LEFT JOIN +( +SELECT +c27, +avgMerge(avg_c37) * joinGet('database.table18', 'c60', concat('USD', '_', 'CH')) AS c36 +FROM database.table19 +PREWHERE c27 IN +( +SELECT comp_c27 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +groupArray(c39) AS c39, +any(c40) AS c40, +any(c41) AS c41, +any(c42) AS c42, +any(c29) AS c29, +any(c43) AS c43, +any(c44) AS c44, +any(min_c32) AS min_c32, +any(max_c32) AS max_c32, +any(c45) AS c45, +any(c46) AS c46, +any(c38) AS c38, +any(c47) AS c47 +FROM +( +SELECT +c27 AS comp_c27, +if(comp_c27 = 0, toDate('2010-01-01'), toDate(minMerge(min_c32))) AS min_c32, +if(comp_c27 = 0, toDate(now()), toDate(maxMerge(max_c32))) + 1 AS max_c32, +NULL AS c39, +NULL AS c40, +NULL AS c41, +NULL AS c42, +NULL AS c29, +NULL AS c43, +NULL AS c44, +NULL AS c45, +NULL AS c46, +NULL AS c38, +NULL AS c47 +FROM database.table15 +GROUP BY comp_c27 +UNION ALL +SELECT +comp_c27, +NULL AS min_c32, +NULL AS max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38, +c47 +FROM +( +SELECT +c39, +comp_c27 AS c27, +comp_c27, +c40, +c41, +assumeNotNull(c45) AS c45, +assumeNotNull(c46) AS c46, +assumeNotNull(c38) AS c38, +joinGet('database.table14', 'c48', c40) AS c42, +joinGet('database.table14', 'c29', c40) AS c29, +joinGet('database.table14', 'c43', c40) AS c43, +joinGet('database.table14', 'property_c44', c40) AS c44, +splitByChar(',', assumeNotNull(joinGet('database.jointable13', 'prefix_c33', comp_c27))) AS c33s, +joinGet('database.jointable13', 'c47', comp_c27) AS c47 +FROM +( +SELECT +c39, +comp_c27, +joinGet('database.jointable13', 'c40', comp_c27) AS c40, +c41, +c45, +c46, +c38 +FROM +( +SELECT +c39, +arrayJoin(arrayMap(x -> toInt64(x), arrayFilter(x -> (length(x) > 0), splitByString(', ', c49)))) AS comp_c27, +c41, +c45, +c46, +c38 +FROM +( +SELECT +'c57' AS c39, +toString(c27) AS c49, +1 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE c27 IN (322) +UNION ALL +SELECT +'c58' AS c39, +arrayStringConcat(groupArray(toString(c27)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE chain_id IN +( +SELECT chain_id +FROM database.table12 +WHERE c27 IN (322) +) +UNION ALL +SELECT +'c59' AS c39, +assumeNotNull(c27s_str) AS c49, +0 AS c41, +c50 AS c45, +c51 AS c46, +c52 AS c38 +FROM +( +SELECT * +FROM table11 +WHERE c27 IN (322) +) AS c1s_c59 +WHERE c27 IN (322) +UNION ALL +SELECT +'superSupercalifragilisticexpialidocious' AS c39, +arrayStringConcat(groupArray(toString(c1_id)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table10 +WHERE c27 IN (322) +) AS table9 +) +) AS a +) AS table8 +) AS table7 +GROUP BY comp_c27 +) AS table6 +WHERE (parseDateTimeBestEffort('2020-01-01') >= min_c32) AND (max_c32 >= (parseDateTimeBestEffort('2021-05-02') - 2)) +) AS table5 +ARRAY JOIN c39 +WHERE isNotNull(c39) +) AS table4 +) +WHERE date > (now() - toIntervalMonth(3)) +GROUP BY c27 +) AS table17 USING (c27) +GROUP BY c27 +) AS table16 +ALL LEFT JOIN +( +SELECT +comp_c27 AS c27, +assumeNotNull(c39) AS c39, +c29 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +groupArray(c39) AS c39, +any(c40) AS c40, +any(c41) AS c41, +any(c42) AS c42, +any(c29) AS c29, +any(c43) AS c43, +any(c44) AS c44, +any(min_c32) AS min_c32, +any(max_c32) AS max_c32, +any(c45) AS c45, +any(c46) AS c46, +any(c38) AS c38, +any(c47) AS c47 +FROM +( +SELECT +c27 AS comp_c27, +if(comp_c27 = 0, toDate('2010-01-01'), toDate(minMerge(min_c32))) AS min_c32, +if(comp_c27 = 0, toDate(now()), toDate(maxMerge(max_c32))) + 1 AS max_c32, +NULL AS c39, +NULL AS c40, +NULL AS c41, +NULL AS c42, +NULL AS c29, +NULL AS c43, +NULL AS c44, +NULL AS c45, +NULL AS c46, +NULL AS c38, +NULL AS c47 +FROM database.table15 +GROUP BY comp_c27 +UNION ALL +SELECT +comp_c27, +NULL AS min_c32, +NULL AS max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38, +c47 +FROM +( +SELECT +c39, +comp_c27 AS c27, +comp_c27, +c40, +c41, +assumeNotNull(c45) AS c45, +assumeNotNull(c46) AS c46, +assumeNotNull(c38) AS c38, +joinGet('database.table14', 'c48', c40) AS c42, +joinGet('database.table14', 'c29', c40) AS c29, +joinGet('database.table14', 'c43', c40) AS c43, +joinGet('database.table14', 'property_c44', c40) AS c44, +splitByChar(',', assumeNotNull(joinGet('database.jointable13', 'prefix_c33', comp_c27))) AS c33s, +joinGet('database.jointable13', 'c47', comp_c27) AS c47 +FROM +( +SELECT +c39, +comp_c27, +joinGet('database.jointable13', 'c40', comp_c27) AS c40, +c41, +c45, +c46, +c38 +FROM +( +SELECT +c39, +arrayJoin(arrayMap(x -> toInt64(x), arrayFilter(x -> (length(x) > 0), splitByString(', ', c49)))) AS comp_c27, +c41, +c45, +c46, +c38 +FROM +( +SELECT +'c57' AS c39, +toString(c27) AS c49, +1 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE c27 IN (322) +UNION ALL +SELECT +'c58' AS c39, +arrayStringConcat(groupArray(toString(c27)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE chain_id IN +( +SELECT chain_id +FROM database.table12 +WHERE c27 IN (322) +) +UNION ALL +SELECT +'c59' AS c39, +assumeNotNull(c27s_str) AS c49, +0 AS c41, +c50 AS c45, +c51 AS c46, +c52 AS c38 +FROM +( +SELECT * +FROM table11 +WHERE c27 IN (322) +) AS c1s_c59 +WHERE c27 IN (322) +UNION ALL +SELECT +'superSupercalifragilisticexpialidocious' AS c39, +arrayStringConcat(groupArray(toString(c1_id)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table10 +WHERE c27 IN (322) +) AS table9 +) +) AS a +) AS table8 +) AS table7 +GROUP BY comp_c27 +) AS table6 +WHERE (parseDateTimeBestEffort('2020-01-01') >= min_c32) AND (max_c32 >= (parseDateTimeBestEffort('2021-05-02') - 2)) +) AS table5 +ARRAY JOIN c39 +WHERE isNotNull(c39) +) AS table4 +) USING (c27) +) AS t +ALL LEFT JOIN +( +SELECT +c1, +c2 +FROM +( +SELECT +c39 AS c1, +groupArray(comp_c27) AS c49, +multiIf(c1 = 'c58', if(length(c49) <= 2, 0, 1), c1 = 'c57', 1, if(length(c49) <= 3, 0, 1)) AS c2 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +groupArray(c39) AS c39, +any(c40) AS c40, +any(c41) AS c41, +any(c42) AS c42, +any(c29) AS c29, +any(c43) AS c43, +any(c44) AS c44, +any(min_c32) AS min_c32, +any(max_c32) AS max_c32, +any(c45) AS c45, +any(c46) AS c46, +any(c38) AS c38, +any(c47) AS c47 +FROM +( +SELECT +c27 AS comp_c27, +if(comp_c27 = 0, toDate('2010-01-01'), toDate(minMerge(min_c32))) AS min_c32, +if(comp_c27 = 0, toDate(now()), toDate(maxMerge(max_c32))) + 1 AS max_c32, +NULL AS c39, +NULL AS c40, +NULL AS c41, +NULL AS c42, +NULL AS c29, +NULL AS c43, +NULL AS c44, +NULL AS c45, +NULL AS c46, +NULL AS c38, +NULL AS c47 +FROM database.table15 +GROUP BY comp_c27 +UNION ALL +SELECT +comp_c27, +NULL AS min_c32, +NULL AS max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38, +c47 +FROM +( +SELECT +c39, +comp_c27 AS c27, +comp_c27, +c40, +c41, +assumeNotNull(c45) AS c45, +assumeNotNull(c46) AS c46, +assumeNotNull(c38) AS c38, +joinGet('database.table14', 'c48', c40) AS c42, +joinGet('database.table14', 'c29', c40) AS c29, +joinGet('database.table14', 'c43', c40) AS c43, +joinGet('database.table14', 'property_c44', c40) AS c44, +splitByChar(',', assumeNotNull(joinGet('database.jointable13', 'prefix_c33', comp_c27))) AS c33s, +joinGet('database.jointable13', 'c47', comp_c27) AS c47 +FROM +( +SELECT +c39, +comp_c27, +joinGet('database.jointable13', 'c40', comp_c27) AS c40, +c41, +c45, +c46, +c38 +FROM +( +SELECT +c39, +arrayJoin(arrayMap(x -> toInt64(x), arrayFilter(x -> (length(x) > 0), splitByString(', ', c49)))) AS comp_c27, +c41, +c45, +c46, +c38 +FROM +( +SELECT +'c57' AS c39, +toString(c27) AS c49, +1 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE c27 IN (322) +UNION ALL +SELECT +'c58' AS c39, +arrayStringConcat(groupArray(toString(c27)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE chain_id IN +( +SELECT chain_id +FROM database.table12 +WHERE c27 IN (322) +) +UNION ALL +SELECT +'c59' AS c39, +assumeNotNull(c27s_str) AS c49, +0 AS c41, +c50 AS c45, +c51 AS c46, +c52 AS c38 +FROM +( +SELECT * +FROM table11 +WHERE c27 IN (322) +) AS c1s_c59 +WHERE c27 IN (322) +UNION ALL +SELECT +'superSupercalifragilisticexpialidocious' AS c39, +arrayStringConcat(groupArray(toString(c1_id)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table10 +WHERE c27 IN (322) +) AS table9 +) +) AS a +) AS table8 +) AS table7 +GROUP BY comp_c27 +) AS table6 +WHERE (parseDateTimeBestEffort('2020-01-01') >= min_c32) AND (max_c32 >= (parseDateTimeBestEffort('2021-05-02') - 2)) +) AS table5 +ARRAY JOIN c39 +WHERE isNotNull(c39) +) AS table4 +GROUP BY c39 +) AS table3 +) USING (c1) +GROUP BY +c1, +c2 +) AS table2 +ORDER BY c1 ASC +) AS table1 +UNION ALL +SELECT * +FROM +( +SELECT +c1, +c2, +c3_q[1] AS c3_q1, +c3_q[3] AS c3_q3, +c3_q[2] AS c3_median, +least(c3_max, c3_q3 + (1.5 * (c3_q3 - c3_q1))) AS c3_max, +greatest(c3_min, c3_q1 - (1.5 * (c3_q3 - c3_q1))) AS c3_min, +c3_avg, +c4_q[1] AS c4_q1, +c4_q[3] AS c4_q3, +c4_q[2] AS c4_median, +least(c4_max, c4_q3 + (1.5 * (c4_q3 - c4_q1))) AS c4_max, +greatest(c4_min, c4_q1 - (1.5 * (c4_q3 - c4_q1))) AS c4_min, +c4_avg, +c5_q[1] AS c5_q1, +c5_q[3] AS c5_q3, +c5_q[2] AS c5_median, +least(c5_max, c5_q3 + (1.5 * (c5_q3 - c5_q1))) AS c5_max, +greatest(c5_min, c5_q1 - (1.5 * (c5_q3 - c5_q1))) AS c5_min, +c5_avg, +c6_q[1] AS c6_q1, +c6_q[3] AS c6_q3, +c6_q[2] AS c6_median, +least(c6_max, c6_q3 + (1.5 * (c6_q3 - c6_q1))) AS c6_max, +greatest(c6_min, c6_q1 - (1.5 * (c6_q3 - c6_q1))) AS c6_min, +c6_avg, +c7_q[1] AS c7_q1, +c7_q[3] AS c7_q3, +c7_q[2] AS c7_median, +least(c7_max, c7_q3 + (1.5 * (c7_q3 - c7_q1))) AS c7_max, +greatest(c7_min, c7_q1 - (1.5 * (c7_q3 - c7_q1))) AS c7_min, +c7_avg, +c8_q[1] AS c8_q1, +c8_q[3] AS c8_q3, +c8_q[2] AS c8_median, +least(c8_max, c8_q3 + (1.5 * (c8_q3 - c8_q1))) AS c8_max, +greatest(c8_min, c8_q1 - (1.5 * (c8_q3 - c8_q1))) AS c8_min, +c8_avg, +c9_q[1] AS c9_q1, +c9_q[3] AS c9_q3, +c9_q[2] AS c9_median, +least(c9_max, c9_q3 + (1.5 * (c9_q3 - c9_q1))) AS c9_max, +greatest(c9_min, c9_q1 - (1.5 * (c9_q3 - c9_q1))) AS c9_min, +c9_avg, +c10_q[1] AS c10_q1, +c10_q[3] AS c10_q3, +c10_q[2] AS c10_median, +least(c10_max, c10_q3 + (1.5 * (c10_q3 - c10_q1))) AS c10_max, +greatest(c10_min, c10_q1 - (1.5 * (c10_q3 - c10_q1))) AS c10_min, +c10_avg, +c10_avg, +c11_q[1] AS c11_q1, +c11_q[3] AS c11_q3, +c11_q[2] AS c11_median, +least(c11_max, c11_q3 + (1.5 * (c11_q3 - c11_q1))) AS c11_max, +greatest(c11_min, c11_q1 - (1.5 * (c11_q3 - c11_q1))) AS c11_min, +c11_avg, +c12_q[1] AS c12_q1, +c12_q[3] AS c12_q3, +c12_q[2] AS c12_median, +least(c12_max, c12_q3 + (1.5 * (c12_q3 - c12_q1))) AS c12_max, +greatest(c12_min, c12_q1 - (1.5 * (c12_q3 - c12_q1))) AS c12_min, +c12_avg, +c13_q[1] AS c13_q1, +c13_q[3] AS c13_q3, +c13_q[2] AS c13_median, +least(c13_max, c13_q3 + (1.5 * (c13_q3 - c13_q1))) AS c13_max, +greatest(c13_min, c13_q1 - (1.5 * (c13_q3 - c13_q1))) AS c13_min, +c13_avg, +c14_q[1] AS c14_q1, +c14_q[3] AS c14_q3, +c14_q[2] AS c14_median, +least(c14_max, c14_q3 + (1.5 * (c14_q3 - c14_q1))) AS c14_max, +greatest(c14_min, c14_q1 - (1.5 * (c14_q3 - c14_q1))) AS c14_min, +c14_avg, +c15_q[1] AS c15_q1, +c15_q[3] AS c15_q3, +c15_q[2] AS c15_median, +least(c15_max, c15_q3 + (1.5 * (c15_q3 - c15_q1))) AS c15_max, +greatest(c15_min, c15_q1 - (1.5 * (c15_q3 - c15_q1))) AS c15_min, +c15_avg, +c16_q[1] AS c16_q1, +c16_q[3] AS c16_q3, +c16_q[2] AS c16_median, +least(toFloat64(c16_max), c16_q3 + (1.5 * (c16_q3 - c16_q1))) AS c16_max, +greatest(toFloat64(c16_min), c16_q1 - (1.5 * (c16_q3 - c16_q1))) AS c16_min, +c16_avg, +c17_q[1] AS c17_q1, +c17_q[3] AS c17_q3, +c17_q[2] AS c17_median, +least(toFloat64(c17_max), c17_q3 + (1.5 * (c17_q3 - c17_q1))) AS c17_max, +greatest(toFloat64(c17_min), c17_q1 - (1.5 * (c17_q3 - c17_q1))) AS c17_min, +c17_avg, +c18_q[1] AS c18_q1, +c18_q[3] AS c18_q3, +c18_q[2] AS c18_median, +least(toFloat64(c18_max), c18_q3 + (1.5 * (c18_q3 - c18_q1))) AS c18_max, +greatest(toFloat64(c18_min), c18_q1 - (1.5 * (c18_q3 - c18_q1))) AS c18_min, +c18_avg, +round(if(c19 != 0, c24 / c19, 0), 2) AS c20, +c21, +c22, +c23 AS c23, +c19 AS c19, +c16 AS c16, +c17 AS c17, +c18 AS c18, +round(c24, 2) AS c24, +round(if(c17 != 0, c24 / c17, 0), 2) AS c25, +'CH' AS c26 +FROM +( +SELECT +c1, +c2, +groupUniqArray(c27) AS c28, +groupUniqArrayIf(c27, isNotNull(c29)) AS c28_with_c29, +quantiles(0.25, 0.5, 0.75)(if(c3 > 0, c3, NULL)) AS c3_q, +quantiles(0.25, 0.5, 0.75)(if(c4 > 0, c4, NULL)) AS c4_q, +quantiles(0.25, 0.5, 0.75)(t.c17 / t.c19) AS c5_q, +quantiles(0.25, 0.5, 0.75)(c6) AS c6_q, +quantiles(0.25, 0.5, 0.75)(c7) AS c7_q, +quantiles(0.25, 0.5, 0.75)(c8) AS c8_q, +quantiles(0.25, 0.5, 0.75)(c9) AS c9_q, +quantiles(0.25, 0.5, 0.75)(c10) AS c10_q, +quantiles(0.25, 0.5, 0.75)(c11) AS c11_q, +quantiles(0.25, 0.5, 0.75)(c12) AS c12_q, +quantiles(0.25, 0.5, 0.75)(c13) AS c13_q, +quantiles(0.25, 0.5, 0.75)(if(t.c24 > 0, t.c24, NULL) / t.c19) AS c14_q, +quantiles(0.25, 0.5, 0.75)(if(t.c24 > 0, t.c24, NULL) / t.c17) AS c15_q, +quantiles(0.25, 0.5, 0.75)(t.c16) AS c16_q, +quantiles(0.25, 0.5, 0.75)(t.c17) AS c17_q, +quantiles(0.25, 0.5, 0.75)(if(t.c18 > 0, t.c18, NULL)) AS c18_q, +max(if(c3 > 0, c3, NULL)) AS c3_max, +min(if(c3 > 0, c3, NULL)) AS c3_min, +avg(if(c3 > 0, c3, NULL)) AS c3_avg, +max(if(c4 > 0, c4, NULL)) AS c4_max, +min(if(c4 > 0, c4, NULL)) AS c4_min, +avg(if(c4 > 0, c4, NULL)) AS c4_avg, +max(t.c17 / t.c19) AS c5_max, +min(t.c17 / t.c19) AS c5_min, +avg(t.c17 / t.c19) AS c5_avg, +max(if(c6 > 0, c6, NULL)) AS c6_max, +min(if(c6 > 0, c6, NULL)) AS c6_min, +avg(if(c6 > 0, c6, NULL)) AS c6_avg, +max(if(c7 > 0, c7, NULL)) AS c7_max, +min(if(c7 > 0, c7, NULL)) AS c7_min, +avg(if(c7 > 0, c7, NULL)) AS c7_avg, +max(if(c10 > 0, c10, NULL)) AS c10_max, +min(if(c10 > 0, c10, NULL)) AS c10_min, +avg(if(c10 > 0, c10, NULL)) AS c10_avg, +max(if(c8 > 0, c8, NULL)) AS c8_max, +min(if(c8 > 0, c8, NULL)) AS c8_min, +avg(if(c8 > 0, c8, NULL)) AS c8_avg, +max(if(c9 > 0, c9, NULL)) AS c9_max, +min(if(c9 > 0, c9, NULL)) AS c9_min, +avg(if(c9 > 0, c9, NULL)) AS c9_avg, +max(if(c11 > 0, c11, NULL)) AS c11_max, +min(if(c11 > 0, c11, NULL)) AS c11_min, +avg(if(c11 > 0, c11, NULL)) AS c11_avg, +max(if(c12 > 0, c12, NULL)) AS c12_max, +min(if(c12 > 0, c12, NULL)) AS c12_min, +avg(if(c12 > 0, c12, NULL)) AS c12_avg, +max(if(c13 > 0, c13, NULL)) AS c13_max, +min(if(c13 > 0, c13, NULL)) AS c13_min, +avg(if(c13 > 0, c13, NULL)) AS c13_avg, +max(if(t.c24 > 0, t.c24, NULL) / t.c19) AS c14_max, +min(if(t.c24 > 0, t.c24, NULL) / t.c19) AS c14_min, +avg(if(t.c24 > 0, t.c24, NULL) / t.c19) AS c14_avg, +max(if(t.c24 > 0, t.c24, NULL) / t.c17) AS c15_max, +min(if(t.c24 > 0, t.c24, NULL) / t.c17) AS c15_min, +avg(if(t.c24 > 0, t.c24, NULL) / t.c17) AS c15_avg, +max(t.c16) AS c16_max, +min(t.c16) AS c16_min, +avg(t.c16) AS c16_avg, +max(t.c17) AS c17_max, +min(t.c17) AS c17_min, +avg(t.c17) AS c17_avg, +max(if(t.c18 > 0, t.c18, NULL)) AS c18_max, +min(if(t.c18 > 0, t.c18, NULL)) AS c18_min, +avg(if(t.c18 > 0, t.c18, NULL)) AS c18_avg, +sum(t.c19) AS c19, +sum(if(t.c18 > 0, t.c18, NULL)) AS c18, +sum(t.c16) AS c16, +sum(c23) AS c23, +sum(t.c17) AS c17, +sum(if(t.c24 > 0, t.c24, NULL)) AS c24, +c24 / c19 AS c14, +c24 / c17 AS c15, +median(if(isNotNull(c29) AND (t.c22 > 0), c13 * (t.c22 / c29), NULL)) AS c21, +sum(c22) AS c22 +FROM +( +SELECT +c27, +c39 AS c1, +c29, +c19, +c23, +c17, +c16, +c18, +c22, +c24, +c3, +c4, +c8, +c9, +c10, +c11, +c12, +c13, +c6, +c7 +FROM +( +SELECT +c27, +uniqExact(c30, c31) AS c19, +uniqExact(c30, c31, c32) AS c23, +uniqExactIf(c30, c31, c33 IN ('c37', 'c38')) AS c17, +countIf(c33 IN ('c37', 'c38')) AS c16, +countIf(c33 = 'c39') AS c18, +coalesce(sumIf(c29, c33 = 'c39'), 0) AS c22, +coalesce(sumIf(c37, c33 = 'c39'), 0) AS c24, +if((c18 > 0) AND (c19 > 0), c18 / c19, NULL) AS c3, +if(c17 != 0, c18 / c17, NULL) AS c4, +coalesce(avgIf(c34, (c34 > 0) AND (c33 IN ('c37', 'c38'))), NULL) AS c8, +coalesce(avgIf(c35, (c35 > 0) AND (c33 IN ('c37', 'c38'))), NULL) AS c9, +coalesce(avgIf(c34, (c34 > 0) AND (c33 = 'c39')), NULL) AS c10, +coalesce(avgIf(c35, (c35 > 0) AND (c33 = 'c39')), NULL) AS c11, +coalesce(avgIf(c37, c33 = 'c39'), NULL) AS c12, +coalesce(avgIf(c37 / c34, (c34 > 0) AND (c33 = 'c39')), NULL) AS c13, +coalesce(avgIf(c37, (c37 > 0) AND (c33 IN ('c37', 'c38'))), NULL) AS c6, +coalesce(minIf(c37, (c37 > 0) AND (c33 IN ('c37', 'c38')) AND (c37 > (c36 / 2))), NULL) AS c7 +FROM +( +SELECT +c27, +c30, +c32, +c31, +NULL AS c29, +NULL AS c33, +NULL AS c37, +NULL AS c34, +NULL AS c35 +FROM +( +SELECT +c27, +c30, +c32, +c31 +FROM database.t1 +PREWHERE ((c32 >= parseDateTimeBestEffort('2020-01-01')) AND (c32 <= parseDateTimeBestEffort('2020-01-01 23:59:59'))) AND (c27 IN +( +SELECT comp_c27 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +groupArray(c39) AS c39, +any(c40) AS c40, +any(c41) AS c41, +any(c42) AS c42, +any(c29) AS c29, +any(c43) AS c43, +any(c44) AS c44, +any(min_c32) AS min_c32, +any(max_c32) AS max_c32, +any(c45) AS c45, +any(c46) AS c46, +any(c38) AS c38, +any(c47) AS c47 +FROM +( +SELECT +c27 AS comp_c27, +if(comp_c27 = 0, toDate('2010-01-01'), toDate(minMerge(min_c32))) AS min_c32, +if(comp_c27 = 0, toDate(now()), toDate(maxMerge(max_c32))) + 1 AS max_c32, +NULL AS c39, +NULL AS c40, +NULL AS c41, +NULL AS c42, +NULL AS c29, +NULL AS c43, +NULL AS c44, +NULL AS c45, +NULL AS c46, +NULL AS c38, +NULL AS c47 +FROM database.table15 +GROUP BY comp_c27 +UNION ALL +SELECT +comp_c27, +NULL AS min_c32, +NULL AS max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38, +c47 +FROM +( +SELECT +c39, +comp_c27 AS c27, +comp_c27, +c40, +c41, +assumeNotNull(c45) AS c45, +assumeNotNull(c46) AS c46, +assumeNotNull(c38) AS c38, +joinGet('database.table14', 'c48', c40) AS c42, +joinGet('database.table14', 'c29', c40) AS c29, +joinGet('database.table14', 'c43', c40) AS c43, +joinGet('database.table14', 'property_c44', c40) AS c44, +splitByChar(',', assumeNotNull(joinGet('database.jointable13', 'prefix_c33', comp_c27))) AS c33s, +joinGet('database.jointable13', 'c47', comp_c27) AS c47 +FROM +( +SELECT +c39, +comp_c27, +joinGet('database.jointable13', 'c40', comp_c27) AS c40, +c41, +c45, +c46, +c38 +FROM +( +SELECT +c39, +arrayJoin(arrayMap(x -> toInt64(x), arrayFilter(x -> (length(x) > 0), splitByString(', ', c49)))) AS comp_c27, +c41, +c45, +c46, +c38 +FROM +( +SELECT +'c57' AS c39, +toString(c27) AS c49, +1 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE c27 IN (322) +UNION ALL +SELECT +'c58' AS c39, +arrayStringConcat(groupArray(toString(c27)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE chain_id IN +( +SELECT chain_id +FROM database.table12 +WHERE c27 IN (322) +) +UNION ALL +SELECT +'c59' AS c39, +assumeNotNull(c27s_str) AS c49, +0 AS c41, +c50 AS c45, +c51 AS c46, +c52 AS c38 +FROM +( +SELECT * +FROM table11 +WHERE c27 IN (322) +) AS c1s_c59 +WHERE c27 IN (322) +UNION ALL +SELECT +'superSupercalifragilisticexpialidocious' AS c39, +arrayStringConcat(groupArray(toString(c1_id)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table10 +WHERE c27 IN (322) +) AS table9 +) +) AS a +) AS table8 +) AS table7 +GROUP BY comp_c27 +) AS table6 +WHERE (parseDateTimeBestEffort('2020-01-01') >= min_c32) AND (max_c32 >= (parseDateTimeBestEffort('2021-05-02') - 2)) +) AS table5 +ARRAY JOIN c39 +WHERE isNotNull(c39) +) AS table4 +)) +WHERE c61 = 0 +) AS table25 +UNION ALL +SELECT +c27, +c30, +c32, +c31, +c29, +c33, +c37, +c34, +c35 +FROM +( +SELECT +c27, +c30, +c32, +'c37' AS c33, +coalesce(c37 * joinGet('database.table18', 'c60', concat(c26, '_', 'CH')), 0) AS c37, +if(c53 > 0, c53, 2) AS c53, +c54, +if(c29 > 0, c29, 1) AS c29, +c55, +c56, +datediff('day', c55, c56) AS c34, +datediff('day', c32, c55) AS c35, +c31 +FROM database.table24 +PREWHERE ((c32 >= parseDateTimeBestEffort('2020-01-01')) AND (c32 <= parseDateTimeBestEffort('2020-01-01 23:59:59'))) AND (c27 IN +( +SELECT comp_c27 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +groupArray(c39) AS c39, +any(c40) AS c40, +any(c41) AS c41, +any(c42) AS c42, +any(c29) AS c29, +any(c43) AS c43, +any(c44) AS c44, +any(min_c32) AS min_c32, +any(max_c32) AS max_c32, +any(c45) AS c45, +any(c46) AS c46, +any(c38) AS c38, +any(c47) AS c47 +FROM +( +SELECT +c27 AS comp_c27, +if(comp_c27 = 0, toDate('2010-01-01'), toDate(minMerge(min_c32))) AS min_c32, +if(comp_c27 = 0, toDate(now()), toDate(maxMerge(max_c32))) + 1 AS max_c32, +NULL AS c39, +NULL AS c40, +NULL AS c41, +NULL AS c42, +NULL AS c29, +NULL AS c43, +NULL AS c44, +NULL AS c45, +NULL AS c46, +NULL AS c38, +NULL AS c47 +FROM database.table15 +GROUP BY comp_c27 +UNION ALL +SELECT +comp_c27, +NULL AS min_c32, +NULL AS max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38, +c47 +FROM +( +SELECT +c39, +comp_c27 AS c27, +comp_c27, +c40, +c41, +assumeNotNull(c45) AS c45, +assumeNotNull(c46) AS c46, +assumeNotNull(c38) AS c38, +joinGet('database.table14', 'c48', c40) AS c42, +joinGet('database.table14', 'c29', c40) AS c29, +joinGet('database.table14', 'c43', c40) AS c43, +joinGet('database.table14', 'property_c44', c40) AS c44, +splitByChar(',', assumeNotNull(joinGet('database.jointable13', 'prefix_c33', comp_c27))) AS c33s, +joinGet('database.jointable13', 'c47', comp_c27) AS c47 +FROM +( +SELECT +c39, +comp_c27, +joinGet('database.jointable13', 'c40', comp_c27) AS c40, +c41, +c45, +c46, +c38 +FROM +( +SELECT +c39, +arrayJoin(arrayMap(x -> toInt64(x), arrayFilter(x -> (length(x) > 0), splitByString(', ', c49)))) AS comp_c27, +c41, +c45, +c46, +c38 +FROM +( +SELECT +'c57' AS c39, +toString(c27) AS c49, +1 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE c27 IN (322) +UNION ALL +SELECT +'c58' AS c39, +arrayStringConcat(groupArray(toString(c27)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE chain_id IN +( +SELECT chain_id +FROM database.table12 +WHERE c27 IN (322) +) +UNION ALL +SELECT +'c59' AS c39, +assumeNotNull(c27s_str) AS c49, +0 AS c41, +c50 AS c45, +c51 AS c46, +c52 AS c38 +FROM +( +SELECT * +FROM table11 +WHERE c27 IN (322) +) AS c1s_c59 +WHERE c27 IN (322) +UNION ALL +SELECT +'superSupercalifragilisticexpialidocious' AS c39, +arrayStringConcat(groupArray(toString(c1_id)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table10 +WHERE c27 IN (322) +) AS table9 +) +) AS a +) AS table8 +) AS table7 +GROUP BY comp_c27 +) AS table6 +WHERE (parseDateTimeBestEffort('2020-01-01') >= min_c32) AND (max_c32 >= (parseDateTimeBestEffort('2021-05-02') - 2)) +) AS table5 +ARRAY JOIN c39 +WHERE isNotNull(c39) +) AS table4 +)) +WHERE (c61 = 0) AND (c37 < (666 * (1 / joinGet('database.table18', 'c60', concat(c26, '_', 'CH'))))) +) AS table23 +UNION ALL +SELECT +c27, +c30, +c32, +c31, +c29, +c33, +c37, +c34, +c35 +FROM +( +SELECT +c27, +c30, +c32, +'c39' AS c33, +coalesce(c37 * joinGet('database.table18', 'c60', concat(c26, '_', 'CH')), 0) AS c37, +if(c53 > 0, c53, 2) AS c53, +c54, +if(c29 > 0, c29, 1) AS c29, +c55, +c56, +datediff('day', c55, c56) AS c34, +datediff('day', c32, c55) AS c35, +c31 +FROM database.table22 +PREWHERE ((c32 >= parseDateTimeBestEffort('2020-01-01')) AND (c32 <= parseDateTimeBestEffort('2020-01-01 23:59:59'))) AND (c27 IN +( +SELECT comp_c27 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +groupArray(c39) AS c39, +any(c40) AS c40, +any(c41) AS c41, +any(c42) AS c42, +any(c29) AS c29, +any(c43) AS c43, +any(c44) AS c44, +any(min_c32) AS min_c32, +any(max_c32) AS max_c32, +any(c45) AS c45, +any(c46) AS c46, +any(c38) AS c38, +any(c47) AS c47 +FROM +( +SELECT +c27 AS comp_c27, +if(comp_c27 = 0, toDate('2010-01-01'), toDate(minMerge(min_c32))) AS min_c32, +if(comp_c27 = 0, toDate(now()), toDate(maxMerge(max_c32))) + 1 AS max_c32, +NULL AS c39, +NULL AS c40, +NULL AS c41, +NULL AS c42, +NULL AS c29, +NULL AS c43, +NULL AS c44, +NULL AS c45, +NULL AS c46, +NULL AS c38, +NULL AS c47 +FROM database.table15 +GROUP BY comp_c27 +UNION ALL +SELECT +comp_c27, +NULL AS min_c32, +NULL AS max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38, +c47 +FROM +( +SELECT +c39, +comp_c27 AS c27, +comp_c27, +c40, +c41, +assumeNotNull(c45) AS c45, +assumeNotNull(c46) AS c46, +assumeNotNull(c38) AS c38, +joinGet('database.table14', 'c48', c40) AS c42, +joinGet('database.table14', 'c29', c40) AS c29, +joinGet('database.table14', 'c43', c40) AS c43, +joinGet('database.table14', 'property_c44', c40) AS c44, +splitByChar(',', assumeNotNull(joinGet('database.jointable13', 'prefix_c33', comp_c27))) AS c33s, +joinGet('database.jointable13', 'c47', comp_c27) AS c47 +FROM +( +SELECT +c39, +comp_c27, +joinGet('database.jointable13', 'c40', comp_c27) AS c40, +c41, +c45, +c46, +c38 +FROM +( +SELECT +c39, +arrayJoin(arrayMap(x -> toInt64(x), arrayFilter(x -> (length(x) > 0), splitByString(', ', c49)))) AS comp_c27, +c41, +c45, +c46, +c38 +FROM +( +SELECT +'c57' AS c39, +toString(c27) AS c49, +1 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE c27 IN (322) +UNION ALL +SELECT +'c58' AS c39, +arrayStringConcat(groupArray(toString(c27)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE chain_id IN +( +SELECT chain_id +FROM database.table12 +WHERE c27 IN (322) +) +UNION ALL +SELECT +'c59' AS c39, +assumeNotNull(c27s_str) AS c49, +0 AS c41, +c50 AS c45, +c51 AS c46, +c52 AS c38 +FROM +( +SELECT * +FROM table11 +WHERE c27 IN (322) +) AS c1s_c59 +WHERE c27 IN (322) +UNION ALL +SELECT +'superSupercalifragilisticexpialidocious' AS c39, +arrayStringConcat(groupArray(toString(c1_id)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table10 +WHERE c27 IN (322) +) AS table9 +) +) AS a +) AS table8 +) AS table7 +GROUP BY comp_c27 +) AS table6 +WHERE (parseDateTimeBestEffort('2020-01-01') >= min_c32) AND (max_c32 >= (parseDateTimeBestEffort('2021-05-02') - 2)) +) AS table5 +ARRAY JOIN c39 +WHERE isNotNull(c39) +) AS table4 +)) +WHERE (c61 = 0) AND (c37 < (666 * (1 / joinGet('database.table18', 'c60', concat(c26, '_', 'CH'))))) +) AS table21 +) AS table20 +ALL LEFT JOIN +( +SELECT +c27, +avgMerge(avg_c37) * joinGet('database.table18', 'c60', concat('USD', '_', 'CH')) AS c36 +FROM database.table19 +PREWHERE c27 IN +( +SELECT comp_c27 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +groupArray(c39) AS c39, +any(c40) AS c40, +any(c41) AS c41, +any(c42) AS c42, +any(c29) AS c29, +any(c43) AS c43, +any(c44) AS c44, +any(min_c32) AS min_c32, +any(max_c32) AS max_c32, +any(c45) AS c45, +any(c46) AS c46, +any(c38) AS c38, +any(c47) AS c47 +FROM +( +SELECT +c27 AS comp_c27, +if(comp_c27 = 0, toDate('2010-01-01'), toDate(minMerge(min_c32))) AS min_c32, +if(comp_c27 = 0, toDate(now()), toDate(maxMerge(max_c32))) + 1 AS max_c32, +NULL AS c39, +NULL AS c40, +NULL AS c41, +NULL AS c42, +NULL AS c29, +NULL AS c43, +NULL AS c44, +NULL AS c45, +NULL AS c46, +NULL AS c38, +NULL AS c47 +FROM database.table15 +GROUP BY comp_c27 +UNION ALL +SELECT +comp_c27, +NULL AS min_c32, +NULL AS max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38, +c47 +FROM +( +SELECT +c39, +comp_c27 AS c27, +comp_c27, +c40, +c41, +assumeNotNull(c45) AS c45, +assumeNotNull(c46) AS c46, +assumeNotNull(c38) AS c38, +joinGet('database.table14', 'c48', c40) AS c42, +joinGet('database.table14', 'c29', c40) AS c29, +joinGet('database.table14', 'c43', c40) AS c43, +joinGet('database.table14', 'property_c44', c40) AS c44, +splitByChar(',', assumeNotNull(joinGet('database.jointable13', 'prefix_c33', comp_c27))) AS c33s, +joinGet('database.jointable13', 'c47', comp_c27) AS c47 +FROM +( +SELECT +c39, +comp_c27, +joinGet('database.jointable13', 'c40', comp_c27) AS c40, +c41, +c45, +c46, +c38 +FROM +( +SELECT +c39, +arrayJoin(arrayMap(x -> toInt64(x), arrayFilter(x -> (length(x) > 0), splitByString(', ', c49)))) AS comp_c27, +c41, +c45, +c46, +c38 +FROM +( +SELECT +'c57' AS c39, +toString(c27) AS c49, +1 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE c27 IN (322) +UNION ALL +SELECT +'c58' AS c39, +arrayStringConcat(groupArray(toString(c27)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE chain_id IN +( +SELECT chain_id +FROM database.table12 +WHERE c27 IN (322) +) +UNION ALL +SELECT +'c59' AS c39, +assumeNotNull(c27s_str) AS c49, +0 AS c41, +c50 AS c45, +c51 AS c46, +c52 AS c38 +FROM +( +SELECT * +FROM table11 +WHERE c27 IN (322) +) AS c1s_c59 +WHERE c27 IN (322) +UNION ALL +SELECT +'superSupercalifragilisticexpialidocious' AS c39, +arrayStringConcat(groupArray(toString(c1_id)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table10 +WHERE c27 IN (322) +) AS table9 +) +) AS a +) AS table8 +) AS table7 +GROUP BY comp_c27 +) AS table6 +WHERE (parseDateTimeBestEffort('2020-01-01') >= min_c32) AND (max_c32 >= (parseDateTimeBestEffort('2021-05-02') - 2)) +) AS table5 +ARRAY JOIN c39 +WHERE isNotNull(c39) +) AS table4 +) +WHERE date > (now() - toIntervalMonth(3)) +GROUP BY c27 +) AS table17 USING (c27) +GROUP BY c27 +) AS table16 +ALL LEFT JOIN +( +SELECT +comp_c27 AS c27, +assumeNotNull(c39) AS c39, +c29 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +groupArray(c39) AS c39, +any(c40) AS c40, +any(c41) AS c41, +any(c42) AS c42, +any(c29) AS c29, +any(c43) AS c43, +any(c44) AS c44, +any(min_c32) AS min_c32, +any(max_c32) AS max_c32, +any(c45) AS c45, +any(c46) AS c46, +any(c38) AS c38, +any(c47) AS c47 +FROM +( +SELECT +c27 AS comp_c27, +if(comp_c27 = 0, toDate('2010-01-01'), toDate(minMerge(min_c32))) AS min_c32, +if(comp_c27 = 0, toDate(now()), toDate(maxMerge(max_c32))) + 1 AS max_c32, +NULL AS c39, +NULL AS c40, +NULL AS c41, +NULL AS c42, +NULL AS c29, +NULL AS c43, +NULL AS c44, +NULL AS c45, +NULL AS c46, +NULL AS c38, +NULL AS c47 +FROM database.table15 +GROUP BY comp_c27 +UNION ALL +SELECT +comp_c27, +NULL AS min_c32, +NULL AS max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38, +c47 +FROM +( +SELECT +c39, +comp_c27 AS c27, +comp_c27, +c40, +c41, +assumeNotNull(c45) AS c45, +assumeNotNull(c46) AS c46, +assumeNotNull(c38) AS c38, +joinGet('database.table14', 'c48', c40) AS c42, +joinGet('database.table14', 'c29', c40) AS c29, +joinGet('database.table14', 'c43', c40) AS c43, +joinGet('database.table14', 'property_c44', c40) AS c44, +splitByChar(',', assumeNotNull(joinGet('database.jointable13', 'prefix_c33', comp_c27))) AS c33s, +joinGet('database.jointable13', 'c47', comp_c27) AS c47 +FROM +( +SELECT +c39, +comp_c27, +joinGet('database.jointable13', 'c40', comp_c27) AS c40, +c41, +c45, +c46, +c38 +FROM +( +SELECT +c39, +arrayJoin(arrayMap(x -> toInt64(x), arrayFilter(x -> (length(x) > 0), splitByString(', ', c49)))) AS comp_c27, +c41, +c45, +c46, +c38 +FROM +( +SELECT +'c57' AS c39, +toString(c27) AS c49, +1 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE c27 IN (322) +UNION ALL +SELECT +'c58' AS c39, +arrayStringConcat(groupArray(toString(c27)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE chain_id IN +( +SELECT chain_id +FROM database.table12 +WHERE c27 IN (322) +) +UNION ALL +SELECT +'c59' AS c39, +assumeNotNull(c27s_str) AS c49, +0 AS c41, +c50 AS c45, +c51 AS c46, +c52 AS c38 +FROM +( +SELECT * +FROM table11 +WHERE c27 IN (322) +) AS c1s_c59 +WHERE c27 IN (322) +UNION ALL +SELECT +'superSupercalifragilisticexpialidocious' AS c39, +arrayStringConcat(groupArray(toString(c1_id)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table10 +WHERE c27 IN (322) +) AS table9 +) +) AS a +) AS table8 +) AS table7 +GROUP BY comp_c27 +) AS table6 +WHERE (parseDateTimeBestEffort('2020-01-01') >= min_c32) AND (max_c32 >= (parseDateTimeBestEffort('2021-05-02') - 2)) +) AS table5 +ARRAY JOIN c39 +WHERE isNotNull(c39) +) AS table4 +) USING (c27) +) AS t +ALL LEFT JOIN +( +SELECT +c1, +c2 +FROM +( +SELECT +c39 AS c1, +groupArray(comp_c27) AS c49, +multiIf(c1 = 'c58', if(length(c49) <= 2, 0, 1), c1 = 'c57', 1, if(length(c49) <= 3, 0, 1)) AS c2 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +groupArray(c39) AS c39, +any(c40) AS c40, +any(c41) AS c41, +any(c42) AS c42, +any(c29) AS c29, +any(c43) AS c43, +any(c44) AS c44, +any(min_c32) AS min_c32, +any(max_c32) AS max_c32, +any(c45) AS c45, +any(c46) AS c46, +any(c38) AS c38, +any(c47) AS c47 +FROM +( +SELECT +c27 AS comp_c27, +if(comp_c27 = 0, toDate('2010-01-01'), toDate(minMerge(min_c32))) AS min_c32, +if(comp_c27 = 0, toDate(now()), toDate(maxMerge(max_c32))) + 1 AS max_c32, +NULL AS c39, +NULL AS c40, +NULL AS c41, +NULL AS c42, +NULL AS c29, +NULL AS c43, +NULL AS c44, +NULL AS c45, +NULL AS c46, +NULL AS c38, +NULL AS c47 +FROM database.table15 +GROUP BY comp_c27 +UNION ALL +SELECT +comp_c27, +NULL AS min_c32, +NULL AS max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38, +c47 +FROM +( +SELECT +c39, +comp_c27 AS c27, +comp_c27, +c40, +c41, +assumeNotNull(c45) AS c45, +assumeNotNull(c46) AS c46, +assumeNotNull(c38) AS c38, +joinGet('database.table14', 'c48', c40) AS c42, +joinGet('database.table14', 'c29', c40) AS c29, +joinGet('database.table14', 'c43', c40) AS c43, +joinGet('database.table14', 'property_c44', c40) AS c44, +splitByChar(',', assumeNotNull(joinGet('database.jointable13', 'prefix_c33', comp_c27))) AS c33s, +joinGet('database.jointable13', 'c47', comp_c27) AS c47 +FROM +( +SELECT +c39, +comp_c27, +joinGet('database.jointable13', 'c40', comp_c27) AS c40, +c41, +c45, +c46, +c38 +FROM +( +SELECT +c39, +arrayJoin(arrayMap(x -> toInt64(x), arrayFilter(x -> (length(x) > 0), splitByString(', ', c49)))) AS comp_c27, +c41, +c45, +c46, +c38 +FROM +( +SELECT +'c57' AS c39, +toString(c27) AS c49, +1 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE c27 IN (322) +UNION ALL +SELECT +'c58' AS c39, +arrayStringConcat(groupArray(toString(c27)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE chain_id IN +( +SELECT chain_id +FROM database.table12 +WHERE c27 IN (322) +) +UNION ALL +SELECT +'c59' AS c39, +assumeNotNull(c27s_str) AS c49, +0 AS c41, +c50 AS c45, +c51 AS c46, +c52 AS c38 +FROM +( +SELECT * +FROM table11 +WHERE c27 IN (322) +) AS c1s_c59 +WHERE c27 IN (322) +UNION ALL +SELECT +'superSupercalifragilisticexpialidocious' AS c39, +arrayStringConcat(groupArray(toString(c1_id)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table10 +WHERE c27 IN (322) +) AS table9 +) +) AS a +) AS table8 +) AS table7 +GROUP BY comp_c27 +) AS table6 +WHERE (parseDateTimeBestEffort('2020-01-01') >= min_c32) AND (max_c32 >= (parseDateTimeBestEffort('2021-05-02') - 2)) +) AS table5 +ARRAY JOIN c39 +WHERE isNotNull(c39) +) AS table4 +GROUP BY c39 +) AS table3 +) USING (c1) +GROUP BY +c1, +c2 +) AS table2 +ORDER BY c1 ASC +) AS table1 +UNION ALL +SELECT * +FROM +( +SELECT +c1, +c2, +c3_q[1] AS c3_q1, +c3_q[3] AS c3_q3, +c3_q[2] AS c3_median, +least(c3_max, c3_q3 + (1.5 * (c3_q3 - c3_q1))) AS c3_max, +greatest(c3_min, c3_q1 - (1.5 * (c3_q3 - c3_q1))) AS c3_min, +c3_avg, +c4_q[1] AS c4_q1, +c4_q[3] AS c4_q3, +c4_q[2] AS c4_median, +least(c4_max, c4_q3 + (1.5 * (c4_q3 - c4_q1))) AS c4_max, +greatest(c4_min, c4_q1 - (1.5 * (c4_q3 - c4_q1))) AS c4_min, +c4_avg, +c5_q[1] AS c5_q1, +c5_q[3] AS c5_q3, +c5_q[2] AS c5_median, +least(c5_max, c5_q3 + (1.5 * (c5_q3 - c5_q1))) AS c5_max, +greatest(c5_min, c5_q1 - (1.5 * (c5_q3 - c5_q1))) AS c5_min, +c5_avg, +c6_q[1] AS c6_q1, +c6_q[3] AS c6_q3, +c6_q[2] AS c6_median, +least(c6_max, c6_q3 + (1.5 * (c6_q3 - c6_q1))) AS c6_max, +greatest(c6_min, c6_q1 - (1.5 * (c6_q3 - c6_q1))) AS c6_min, +c6_avg, +c7_q[1] AS c7_q1, +c7_q[3] AS c7_q3, +c7_q[2] AS c7_median, +least(c7_max, c7_q3 + (1.5 * (c7_q3 - c7_q1))) AS c7_max, +greatest(c7_min, c7_q1 - (1.5 * (c7_q3 - c7_q1))) AS c7_min, +c7_avg, +c8_q[1] AS c8_q1, +c8_q[3] AS c8_q3, +c8_q[2] AS c8_median, +least(c8_max, c8_q3 + (1.5 * (c8_q3 - c8_q1))) AS c8_max, +greatest(c8_min, c8_q1 - (1.5 * (c8_q3 - c8_q1))) AS c8_min, +c8_avg, +c9_q[1] AS c9_q1, +c9_q[3] AS c9_q3, +c9_q[2] AS c9_median, +least(c9_max, c9_q3 + (1.5 * (c9_q3 - c9_q1))) AS c9_max, +greatest(c9_min, c9_q1 - (1.5 * (c9_q3 - c9_q1))) AS c9_min, +c9_avg, +c10_q[1] AS c10_q1, +c10_q[3] AS c10_q3, +c10_q[2] AS c10_median, +least(c10_max, c10_q3 + (1.5 * (c10_q3 - c10_q1))) AS c10_max, +greatest(c10_min, c10_q1 - (1.5 * (c10_q3 - c10_q1))) AS c10_min, +c10_avg, +c10_avg, +c11_q[1] AS c11_q1, +c11_q[3] AS c11_q3, +c11_q[2] AS c11_median, +least(c11_max, c11_q3 + (1.5 * (c11_q3 - c11_q1))) AS c11_max, +greatest(c11_min, c11_q1 - (1.5 * (c11_q3 - c11_q1))) AS c11_min, +c11_avg, +c12_q[1] AS c12_q1, +c12_q[3] AS c12_q3, +c12_q[2] AS c12_median, +least(c12_max, c12_q3 + (1.5 * (c12_q3 - c12_q1))) AS c12_max, +greatest(c12_min, c12_q1 - (1.5 * (c12_q3 - c12_q1))) AS c12_min, +c12_avg, +c13_q[1] AS c13_q1, +c13_q[3] AS c13_q3, +c13_q[2] AS c13_median, +least(c13_max, c13_q3 + (1.5 * (c13_q3 - c13_q1))) AS c13_max, +greatest(c13_min, c13_q1 - (1.5 * (c13_q3 - c13_q1))) AS c13_min, +c13_avg, +c14_q[1] AS c14_q1, +c14_q[3] AS c14_q3, +c14_q[2] AS c14_median, +least(c14_max, c14_q3 + (1.5 * (c14_q3 - c14_q1))) AS c14_max, +greatest(c14_min, c14_q1 - (1.5 * (c14_q3 - c14_q1))) AS c14_min, +c14_avg, +c15_q[1] AS c15_q1, +c15_q[3] AS c15_q3, +c15_q[2] AS c15_median, +least(c15_max, c15_q3 + (1.5 * (c15_q3 - c15_q1))) AS c15_max, +greatest(c15_min, c15_q1 - (1.5 * (c15_q3 - c15_q1))) AS c15_min, +c15_avg, +c16_q[1] AS c16_q1, +c16_q[3] AS c16_q3, +c16_q[2] AS c16_median, +least(toFloat64(c16_max), c16_q3 + (1.5 * (c16_q3 - c16_q1))) AS c16_max, +greatest(toFloat64(c16_min), c16_q1 - (1.5 * (c16_q3 - c16_q1))) AS c16_min, +c16_avg, +c17_q[1] AS c17_q1, +c17_q[3] AS c17_q3, +c17_q[2] AS c17_median, +least(toFloat64(c17_max), c17_q3 + (1.5 * (c17_q3 - c17_q1))) AS c17_max, +greatest(toFloat64(c17_min), c17_q1 - (1.5 * (c17_q3 - c17_q1))) AS c17_min, +c17_avg, +c18_q[1] AS c18_q1, +c18_q[3] AS c18_q3, +c18_q[2] AS c18_median, +least(toFloat64(c18_max), c18_q3 + (1.5 * (c18_q3 - c18_q1))) AS c18_max, +greatest(toFloat64(c18_min), c18_q1 - (1.5 * (c18_q3 - c18_q1))) AS c18_min, +c18_avg, +round(if(c19 != 0, c24 / c19, 0), 2) AS c20, +c21, +c22, +c23 AS c23, +c19 AS c19, +c16 AS c16, +c17 AS c17, +c18 AS c18, +round(c24, 2) AS c24, +round(if(c17 != 0, c24 / c17, 0), 2) AS c25, +'CH' AS c26 +FROM +( +SELECT +c1, +c2, +groupUniqArray(c27) AS c28, +groupUniqArrayIf(c27, isNotNull(c29)) AS c28_with_c29, +quantiles(0.25, 0.5, 0.75)(if(c3 > 0, c3, NULL)) AS c3_q, +quantiles(0.25, 0.5, 0.75)(if(c4 > 0, c4, NULL)) AS c4_q, +quantiles(0.25, 0.5, 0.75)(t.c17 / t.c19) AS c5_q, +quantiles(0.25, 0.5, 0.75)(c6) AS c6_q, +quantiles(0.25, 0.5, 0.75)(c7) AS c7_q, +quantiles(0.25, 0.5, 0.75)(c8) AS c8_q, +quantiles(0.25, 0.5, 0.75)(c9) AS c9_q, +quantiles(0.25, 0.5, 0.75)(c10) AS c10_q, +quantiles(0.25, 0.5, 0.75)(c11) AS c11_q, +quantiles(0.25, 0.5, 0.75)(c12) AS c12_q, +quantiles(0.25, 0.5, 0.75)(c13) AS c13_q, +quantiles(0.25, 0.5, 0.75)(if(t.c24 > 0, t.c24, NULL) / t.c19) AS c14_q, +quantiles(0.25, 0.5, 0.75)(if(t.c24 > 0, t.c24, NULL) / t.c17) AS c15_q, +quantiles(0.25, 0.5, 0.75)(t.c16) AS c16_q, +quantiles(0.25, 0.5, 0.75)(t.c17) AS c17_q, +quantiles(0.25, 0.5, 0.75)(if(t.c18 > 0, t.c18, NULL)) AS c18_q, +max(if(c3 > 0, c3, NULL)) AS c3_max, +min(if(c3 > 0, c3, NULL)) AS c3_min, +avg(if(c3 > 0, c3, NULL)) AS c3_avg, +max(if(c4 > 0, c4, NULL)) AS c4_max, +min(if(c4 > 0, c4, NULL)) AS c4_min, +avg(if(c4 > 0, c4, NULL)) AS c4_avg, +max(t.c17 / t.c19) AS c5_max, +min(t.c17 / t.c19) AS c5_min, +avg(t.c17 / t.c19) AS c5_avg, +max(if(c6 > 0, c6, NULL)) AS c6_max, +min(if(c6 > 0, c6, NULL)) AS c6_min, +avg(if(c6 > 0, c6, NULL)) AS c6_avg, +max(if(c7 > 0, c7, NULL)) AS c7_max, +min(if(c7 > 0, c7, NULL)) AS c7_min, +avg(if(c7 > 0, c7, NULL)) AS c7_avg, +max(if(c10 > 0, c10, NULL)) AS c10_max, +min(if(c10 > 0, c10, NULL)) AS c10_min, +avg(if(c10 > 0, c10, NULL)) AS c10_avg, +max(if(c8 > 0, c8, NULL)) AS c8_max, +min(if(c8 > 0, c8, NULL)) AS c8_min, +avg(if(c8 > 0, c8, NULL)) AS c8_avg, +max(if(c9 > 0, c9, NULL)) AS c9_max, +min(if(c9 > 0, c9, NULL)) AS c9_min, +avg(if(c9 > 0, c9, NULL)) AS c9_avg, +max(if(c11 > 0, c11, NULL)) AS c11_max, +min(if(c11 > 0, c11, NULL)) AS c11_min, +avg(if(c11 > 0, c11, NULL)) AS c11_avg, +max(if(c12 > 0, c12, NULL)) AS c12_max, +min(if(c12 > 0, c12, NULL)) AS c12_min, +avg(if(c12 > 0, c12, NULL)) AS c12_avg, +max(if(c13 > 0, c13, NULL)) AS c13_max, +min(if(c13 > 0, c13, NULL)) AS c13_min, +avg(if(c13 > 0, c13, NULL)) AS c13_avg, +max(if(t.c24 > 0, t.c24, NULL) / t.c19) AS c14_max, +min(if(t.c24 > 0, t.c24, NULL) / t.c19) AS c14_min, +avg(if(t.c24 > 0, t.c24, NULL) / t.c19) AS c14_avg, +max(if(t.c24 > 0, t.c24, NULL) / t.c17) AS c15_max, +min(if(t.c24 > 0, t.c24, NULL) / t.c17) AS c15_min, +avg(if(t.c24 > 0, t.c24, NULL) / t.c17) AS c15_avg, +max(t.c16) AS c16_max, +min(t.c16) AS c16_min, +avg(t.c16) AS c16_avg, +max(t.c17) AS c17_max, +min(t.c17) AS c17_min, +avg(t.c17) AS c17_avg, +max(if(t.c18 > 0, t.c18, NULL)) AS c18_max, +min(if(t.c18 > 0, t.c18, NULL)) AS c18_min, +avg(if(t.c18 > 0, t.c18, NULL)) AS c18_avg, +sum(t.c19) AS c19, +sum(if(t.c18 > 0, t.c18, NULL)) AS c18, +sum(t.c16) AS c16, +sum(c23) AS c23, +sum(t.c17) AS c17, +sum(if(t.c24 > 0, t.c24, NULL)) AS c24, +c24 / c19 AS c14, +c24 / c17 AS c15, +median(if(isNotNull(c29) AND (t.c22 > 0), c13 * (t.c22 / c29), NULL)) AS c21, +sum(c22) AS c22 +FROM +( +SELECT +c27, +c39 AS c1, +c29, +c19, +c23, +c17, +c16, +c18, +c22, +c24, +c3, +c4, +c8, +c9, +c10, +c11, +c12, +c13, +c6, +c7 +FROM +( +SELECT +c27, +uniqExact(c30, c31) AS c19, +uniqExact(c30, c31, c32) AS c23, +uniqExactIf(c30, c31, c33 IN ('c37', 'c38')) AS c17, +countIf(c33 IN ('c37', 'c38')) AS c16, +countIf(c33 = 'c39') AS c18, +coalesce(sumIf(c29, c33 = 'c39'), 0) AS c22, +coalesce(sumIf(c37, c33 = 'c39'), 0) AS c24, +if((c18 > 0) AND (c19 > 0), c18 / c19, NULL) AS c3, +if(c17 != 0, c18 / c17, NULL) AS c4, +coalesce(avgIf(c34, (c34 > 0) AND (c33 IN ('c37', 'c38'))), NULL) AS c8, +coalesce(avgIf(c35, (c35 > 0) AND (c33 IN ('c37', 'c38'))), NULL) AS c9, +coalesce(avgIf(c34, (c34 > 0) AND (c33 = 'c39')), NULL) AS c10, +coalesce(avgIf(c35, (c35 > 0) AND (c33 = 'c39')), NULL) AS c11, +coalesce(avgIf(c37, c33 = 'c39'), NULL) AS c12, +coalesce(avgIf(c37 / c34, (c34 > 0) AND (c33 = 'c39')), NULL) AS c13, +coalesce(avgIf(c37, (c37 > 0) AND (c33 IN ('c37', 'c38'))), NULL) AS c6, +coalesce(minIf(c37, (c37 > 0) AND (c33 IN ('c37', 'c38')) AND (c37 > (c36 / 2))), NULL) AS c7 +FROM +( +SELECT +c27, +c30, +c32, +c31, +NULL AS c29, +NULL AS c33, +NULL AS c37, +NULL AS c34, +NULL AS c35 +FROM +( +SELECT +c27, +c30, +c32, +c31 +FROM database.t1 +PREWHERE ((c32 >= parseDateTimeBestEffort('2020-01-01')) AND (c32 <= parseDateTimeBestEffort('2020-01-01 23:59:59'))) AND (c27 IN +( +SELECT comp_c27 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +groupArray(c39) AS c39, +any(c40) AS c40, +any(c41) AS c41, +any(c42) AS c42, +any(c29) AS c29, +any(c43) AS c43, +any(c44) AS c44, +any(min_c32) AS min_c32, +any(max_c32) AS max_c32, +any(c45) AS c45, +any(c46) AS c46, +any(c38) AS c38, +any(c47) AS c47 +FROM +( +SELECT +c27 AS comp_c27, +if(comp_c27 = 0, toDate('2010-01-01'), toDate(minMerge(min_c32))) AS min_c32, +if(comp_c27 = 0, toDate(now()), toDate(maxMerge(max_c32))) + 1 AS max_c32, +NULL AS c39, +NULL AS c40, +NULL AS c41, +NULL AS c42, +NULL AS c29, +NULL AS c43, +NULL AS c44, +NULL AS c45, +NULL AS c46, +NULL AS c38, +NULL AS c47 +FROM database.table15 +GROUP BY comp_c27 +UNION ALL +SELECT +comp_c27, +NULL AS min_c32, +NULL AS max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38, +c47 +FROM +( +SELECT +c39, +comp_c27 AS c27, +comp_c27, +c40, +c41, +assumeNotNull(c45) AS c45, +assumeNotNull(c46) AS c46, +assumeNotNull(c38) AS c38, +joinGet('database.table14', 'c48', c40) AS c42, +joinGet('database.table14', 'c29', c40) AS c29, +joinGet('database.table14', 'c43', c40) AS c43, +joinGet('database.table14', 'property_c44', c40) AS c44, +splitByChar(',', assumeNotNull(joinGet('database.jointable13', 'prefix_c33', comp_c27))) AS c33s, +joinGet('database.jointable13', 'c47', comp_c27) AS c47 +FROM +( +SELECT +c39, +comp_c27, +joinGet('database.jointable13', 'c40', comp_c27) AS c40, +c41, +c45, +c46, +c38 +FROM +( +SELECT +c39, +arrayJoin(arrayMap(x -> toInt64(x), arrayFilter(x -> (length(x) > 0), splitByString(', ', c49)))) AS comp_c27, +c41, +c45, +c46, +c38 +FROM +( +SELECT +'c57' AS c39, +toString(c27) AS c49, +1 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE c27 IN (322) +UNION ALL +SELECT +'c58' AS c39, +arrayStringConcat(groupArray(toString(c27)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE chain_id IN +( +SELECT chain_id +FROM database.table12 +WHERE c27 IN (322) +) +UNION ALL +SELECT +'c59' AS c39, +assumeNotNull(c27s_str) AS c49, +0 AS c41, +c50 AS c45, +c51 AS c46, +c52 AS c38 +FROM +( +SELECT * +FROM table11 +WHERE c27 IN (322) +) AS c1s_c59 +WHERE c27 IN (322) +UNION ALL +SELECT +'superSupercalifragilisticexpialidocious' AS c39, +arrayStringConcat(groupArray(toString(c1_id)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table10 +WHERE c27 IN (322) +) AS table9 +) +) AS a +) AS table8 +) AS table7 +GROUP BY comp_c27 +) AS table6 +WHERE (parseDateTimeBestEffort('2020-01-01') >= min_c32) AND (max_c32 >= (parseDateTimeBestEffort('2021-05-02') - 2)) +) AS table5 +ARRAY JOIN c39 +WHERE isNotNull(c39) +) AS table4 +)) +WHERE c61 = 0 +) AS table25 +UNION ALL +SELECT +c27, +c30, +c32, +c31, +c29, +c33, +c37, +c34, +c35 +FROM +( +SELECT +c27, +c30, +c32, +'c37' AS c33, +coalesce(c37 * joinGet('database.table18', 'c60', concat(c26, '_', 'CH')), 0) AS c37, +if(c53 > 0, c53, 2) AS c53, +c54, +if(c29 > 0, c29, 1) AS c29, +c55, +c56, +datediff('day', c55, c56) AS c34, +datediff('day', c32, c55) AS c35, +c31 +FROM database.table24 +PREWHERE ((c32 >= parseDateTimeBestEffort('2020-01-01')) AND (c32 <= parseDateTimeBestEffort('2020-01-01 23:59:59'))) AND (c27 IN +( +SELECT comp_c27 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +groupArray(c39) AS c39, +any(c40) AS c40, +any(c41) AS c41, +any(c42) AS c42, +any(c29) AS c29, +any(c43) AS c43, +any(c44) AS c44, +any(min_c32) AS min_c32, +any(max_c32) AS max_c32, +any(c45) AS c45, +any(c46) AS c46, +any(c38) AS c38, +any(c47) AS c47 +FROM +( +SELECT +c27 AS comp_c27, +if(comp_c27 = 0, toDate('2010-01-01'), toDate(minMerge(min_c32))) AS min_c32, +if(comp_c27 = 0, toDate(now()), toDate(maxMerge(max_c32))) + 1 AS max_c32, +NULL AS c39, +NULL AS c40, +NULL AS c41, +NULL AS c42, +NULL AS c29, +NULL AS c43, +NULL AS c44, +NULL AS c45, +NULL AS c46, +NULL AS c38, +NULL AS c47 +FROM database.table15 +GROUP BY comp_c27 +UNION ALL +SELECT +comp_c27, +NULL AS min_c32, +NULL AS max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38, +c47 +FROM +( +SELECT +c39, +comp_c27 AS c27, +comp_c27, +c40, +c41, +assumeNotNull(c45) AS c45, +assumeNotNull(c46) AS c46, +assumeNotNull(c38) AS c38, +joinGet('database.table14', 'c48', c40) AS c42, +joinGet('database.table14', 'c29', c40) AS c29, +joinGet('database.table14', 'c43', c40) AS c43, +joinGet('database.table14', 'property_c44', c40) AS c44, +splitByChar(',', assumeNotNull(joinGet('database.jointable13', 'prefix_c33', comp_c27))) AS c33s, +joinGet('database.jointable13', 'c47', comp_c27) AS c47 +FROM +( +SELECT +c39, +comp_c27, +joinGet('database.jointable13', 'c40', comp_c27) AS c40, +c41, +c45, +c46, +c38 +FROM +( +SELECT +c39, +arrayJoin(arrayMap(x -> toInt64(x), arrayFilter(x -> (length(x) > 0), splitByString(', ', c49)))) AS comp_c27, +c41, +c45, +c46, +c38 +FROM +( +SELECT +'c57' AS c39, +toString(c27) AS c49, +1 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE c27 IN (322) +UNION ALL +SELECT +'c58' AS c39, +arrayStringConcat(groupArray(toString(c27)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE chain_id IN +( +SELECT chain_id +FROM database.table12 +WHERE c27 IN (322) +) +UNION ALL +SELECT +'c59' AS c39, +assumeNotNull(c27s_str) AS c49, +0 AS c41, +c50 AS c45, +c51 AS c46, +c52 AS c38 +FROM +( +SELECT * +FROM table11 +WHERE c27 IN (322) +) AS c1s_c59 +WHERE c27 IN (322) +UNION ALL +SELECT +'superSupercalifragilisticexpialidocious' AS c39, +arrayStringConcat(groupArray(toString(c1_id)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table10 +WHERE c27 IN (322) +) AS table9 +) +) AS a +) AS table8 +) AS table7 +GROUP BY comp_c27 +) AS table6 +WHERE (parseDateTimeBestEffort('2020-01-01') >= min_c32) AND (max_c32 >= (parseDateTimeBestEffort('2021-05-02') - 2)) +) AS table5 +ARRAY JOIN c39 +WHERE isNotNull(c39) +) AS table4 +)) +WHERE (c61 = 0) AND (c37 < (666 * (1 / joinGet('database.table18', 'c60', concat(c26, '_', 'CH'))))) +) AS table23 +UNION ALL +SELECT +c27, +c30, +c32, +c31, +c29, +c33, +c37, +c34, +c35 +FROM +( +SELECT +c27, +c30, +c32, +'c39' AS c33, +coalesce(c37 * joinGet('database.table18', 'c60', concat(c26, '_', 'CH')), 0) AS c37, +if(c53 > 0, c53, 2) AS c53, +c54, +if(c29 > 0, c29, 1) AS c29, +c55, +c56, +datediff('day', c55, c56) AS c34, +datediff('day', c32, c55) AS c35, +c31 +FROM database.table22 +PREWHERE ((c32 >= parseDateTimeBestEffort('2020-01-01')) AND (c32 <= parseDateTimeBestEffort('2020-01-01 23:59:59'))) AND (c27 IN +( +SELECT comp_c27 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +groupArray(c39) AS c39, +any(c40) AS c40, +any(c41) AS c41, +any(c42) AS c42, +any(c29) AS c29, +any(c43) AS c43, +any(c44) AS c44, +any(min_c32) AS min_c32, +any(max_c32) AS max_c32, +any(c45) AS c45, +any(c46) AS c46, +any(c38) AS c38, +any(c47) AS c47 +FROM +( +SELECT +c27 AS comp_c27, +if(comp_c27 = 0, toDate('2010-01-01'), toDate(minMerge(min_c32))) AS min_c32, +if(comp_c27 = 0, toDate(now()), toDate(maxMerge(max_c32))) + 1 AS max_c32, +NULL AS c39, +NULL AS c40, +NULL AS c41, +NULL AS c42, +NULL AS c29, +NULL AS c43, +NULL AS c44, +NULL AS c45, +NULL AS c46, +NULL AS c38, +NULL AS c47 +FROM database.table15 +GROUP BY comp_c27 +UNION ALL +SELECT +comp_c27, +NULL AS min_c32, +NULL AS max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38, +c47 +FROM +( +SELECT +c39, +comp_c27 AS c27, +comp_c27, +c40, +c41, +assumeNotNull(c45) AS c45, +assumeNotNull(c46) AS c46, +assumeNotNull(c38) AS c38, +joinGet('database.table14', 'c48', c40) AS c42, +joinGet('database.table14', 'c29', c40) AS c29, +joinGet('database.table14', 'c43', c40) AS c43, +joinGet('database.table14', 'property_c44', c40) AS c44, +splitByChar(',', assumeNotNull(joinGet('database.jointable13', 'prefix_c33', comp_c27))) AS c33s, +joinGet('database.jointable13', 'c47', comp_c27) AS c47 +FROM +( +SELECT +c39, +comp_c27, +joinGet('database.jointable13', 'c40', comp_c27) AS c40, +c41, +c45, +c46, +c38 +FROM +( +SELECT +c39, +arrayJoin(arrayMap(x -> toInt64(x), arrayFilter(x -> (length(x) > 0), splitByString(', ', c49)))) AS comp_c27, +c41, +c45, +c46, +c38 +FROM +( +SELECT +'c57' AS c39, +toString(c27) AS c49, +1 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE c27 IN (322) +UNION ALL +SELECT +'c58' AS c39, +arrayStringConcat(groupArray(toString(c27)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE chain_id IN +( +SELECT chain_id +FROM database.table12 +WHERE c27 IN (322) +) +UNION ALL +SELECT +'c59' AS c39, +assumeNotNull(c27s_str) AS c49, +0 AS c41, +c50 AS c45, +c51 AS c46, +c52 AS c38 +FROM +( +SELECT * +FROM table11 +WHERE c27 IN (322) +) AS c1s_c59 +WHERE c27 IN (322) +UNION ALL +SELECT +'superSupercalifragilisticexpialidocious' AS c39, +arrayStringConcat(groupArray(toString(c1_id)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table10 +WHERE c27 IN (322) +) AS table9 +) +) AS a +) AS table8 +) AS table7 +GROUP BY comp_c27 +) AS table6 +WHERE (parseDateTimeBestEffort('2020-01-01') >= min_c32) AND (max_c32 >= (parseDateTimeBestEffort('2021-05-02') - 2)) +) AS table5 +ARRAY JOIN c39 +WHERE isNotNull(c39) +) AS table4 +)) +WHERE (c61 = 0) AND (c37 < (666 * (1 / joinGet('database.table18', 'c60', concat(c26, '_', 'CH'))))) +) AS table21 +) AS table20 +ALL LEFT JOIN +( +SELECT +c27, +avgMerge(avg_c37) * joinGet('database.table18', 'c60', concat('USD', '_', 'CH')) AS c36 +FROM database.table19 +PREWHERE c27 IN +( +SELECT comp_c27 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +groupArray(c39) AS c39, +any(c40) AS c40, +any(c41) AS c41, +any(c42) AS c42, +any(c29) AS c29, +any(c43) AS c43, +any(c44) AS c44, +any(min_c32) AS min_c32, +any(max_c32) AS max_c32, +any(c45) AS c45, +any(c46) AS c46, +any(c38) AS c38, +any(c47) AS c47 +FROM +( +SELECT +c27 AS comp_c27, +if(comp_c27 = 0, toDate('2010-01-01'), toDate(minMerge(min_c32))) AS min_c32, +if(comp_c27 = 0, toDate(now()), toDate(maxMerge(max_c32))) + 1 AS max_c32, +NULL AS c39, +NULL AS c40, +NULL AS c41, +NULL AS c42, +NULL AS c29, +NULL AS c43, +NULL AS c44, +NULL AS c45, +NULL AS c46, +NULL AS c38, +NULL AS c47 +FROM database.table15 +GROUP BY comp_c27 +UNION ALL +SELECT +comp_c27, +NULL AS min_c32, +NULL AS max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38, +c47 +FROM +( +SELECT +c39, +comp_c27 AS c27, +comp_c27, +c40, +c41, +assumeNotNull(c45) AS c45, +assumeNotNull(c46) AS c46, +assumeNotNull(c38) AS c38, +joinGet('database.table14', 'c48', c40) AS c42, +joinGet('database.table14', 'c29', c40) AS c29, +joinGet('database.table14', 'c43', c40) AS c43, +joinGet('database.table14', 'property_c44', c40) AS c44, +splitByChar(',', assumeNotNull(joinGet('database.jointable13', 'prefix_c33', comp_c27))) AS c33s, +joinGet('database.jointable13', 'c47', comp_c27) AS c47 +FROM +( +SELECT +c39, +comp_c27, +joinGet('database.jointable13', 'c40', comp_c27) AS c40, +c41, +c45, +c46, +c38 +FROM +( +SELECT +c39, +arrayJoin(arrayMap(x -> toInt64(x), arrayFilter(x -> (length(x) > 0), splitByString(', ', c49)))) AS comp_c27, +c41, +c45, +c46, +c38 +FROM +( +SELECT +'c57' AS c39, +toString(c27) AS c49, +1 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE c27 IN (322) +UNION ALL +SELECT +'c58' AS c39, +arrayStringConcat(groupArray(toString(c27)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE chain_id IN +( +SELECT chain_id +FROM database.table12 +WHERE c27 IN (322) +) +UNION ALL +SELECT +'c59' AS c39, +assumeNotNull(c27s_str) AS c49, +0 AS c41, +c50 AS c45, +c51 AS c46, +c52 AS c38 +FROM +( +SELECT * +FROM table11 +WHERE c27 IN (322) +) AS c1s_c59 +WHERE c27 IN (322) +UNION ALL +SELECT +'superSupercalifragilisticexpialidocious' AS c39, +arrayStringConcat(groupArray(toString(c1_id)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table10 +WHERE c27 IN (322) +) AS table9 +) +) AS a +) AS table8 +) AS table7 +GROUP BY comp_c27 +) AS table6 +WHERE (parseDateTimeBestEffort('2020-01-01') >= min_c32) AND (max_c32 >= (parseDateTimeBestEffort('2021-05-02') - 2)) +) AS table5 +ARRAY JOIN c39 +WHERE isNotNull(c39) +) AS table4 +) +WHERE date > (now() - toIntervalMonth(3)) +GROUP BY c27 +) AS table17 USING (c27) +GROUP BY c27 +) AS table16 +ALL LEFT JOIN +( +SELECT +comp_c27 AS c27, +assumeNotNull(c39) AS c39, +c29 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +groupArray(c39) AS c39, +any(c40) AS c40, +any(c41) AS c41, +any(c42) AS c42, +any(c29) AS c29, +any(c43) AS c43, +any(c44) AS c44, +any(min_c32) AS min_c32, +any(max_c32) AS max_c32, +any(c45) AS c45, +any(c46) AS c46, +any(c38) AS c38, +any(c47) AS c47 +FROM +( +SELECT +c27 AS comp_c27, +if(comp_c27 = 0, toDate('2010-01-01'), toDate(minMerge(min_c32))) AS min_c32, +if(comp_c27 = 0, toDate(now()), toDate(maxMerge(max_c32))) + 1 AS max_c32, +NULL AS c39, +NULL AS c40, +NULL AS c41, +NULL AS c42, +NULL AS c29, +NULL AS c43, +NULL AS c44, +NULL AS c45, +NULL AS c46, +NULL AS c38, +NULL AS c47 +FROM database.table15 +GROUP BY comp_c27 +UNION ALL +SELECT +comp_c27, +NULL AS min_c32, +NULL AS max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38, +c47 +FROM +( +SELECT +c39, +comp_c27 AS c27, +comp_c27, +c40, +c41, +assumeNotNull(c45) AS c45, +assumeNotNull(c46) AS c46, +assumeNotNull(c38) AS c38, +joinGet('database.table14', 'c48', c40) AS c42, +joinGet('database.table14', 'c29', c40) AS c29, +joinGet('database.table14', 'c43', c40) AS c43, +joinGet('database.table14', 'property_c44', c40) AS c44, +splitByChar(',', assumeNotNull(joinGet('database.jointable13', 'prefix_c33', comp_c27))) AS c33s, +joinGet('database.jointable13', 'c47', comp_c27) AS c47 +FROM +( +SELECT +c39, +comp_c27, +joinGet('database.jointable13', 'c40', comp_c27) AS c40, +c41, +c45, +c46, +c38 +FROM +( +SELECT +c39, +arrayJoin(arrayMap(x -> toInt64(x), arrayFilter(x -> (length(x) > 0), splitByString(', ', c49)))) AS comp_c27, +c41, +c45, +c46, +c38 +FROM +( +SELECT +'c57' AS c39, +toString(c27) AS c49, +1 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE c27 IN (322) +UNION ALL +SELECT +'c58' AS c39, +arrayStringConcat(groupArray(toString(c27)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE chain_id IN +( +SELECT chain_id +FROM database.table12 +WHERE c27 IN (322) +) +UNION ALL +SELECT +'c59' AS c39, +assumeNotNull(c27s_str) AS c49, +0 AS c41, +c50 AS c45, +c51 AS c46, +c52 AS c38 +FROM +( +SELECT * +FROM table11 +WHERE c27 IN (322) +) AS c1s_c59 +WHERE c27 IN (322) +UNION ALL +SELECT +'superSupercalifragilisticexpialidocious' AS c39, +arrayStringConcat(groupArray(toString(c1_id)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table10 +WHERE c27 IN (322) +) AS table9 +) +) AS a +) AS table8 +) AS table7 +GROUP BY comp_c27 +) AS table6 +WHERE (parseDateTimeBestEffort('2020-01-01') >= min_c32) AND (max_c32 >= (parseDateTimeBestEffort('2021-05-02') - 2)) +) AS table5 +ARRAY JOIN c39 +WHERE isNotNull(c39) +) AS table4 +) USING (c27) +) AS t +ALL LEFT JOIN +( +SELECT +c1, +c2 +FROM +( +SELECT +c39 AS c1, +groupArray(comp_c27) AS c49, +multiIf(c1 = 'c58', if(length(c49) <= 2, 0, 1), c1 = 'c57', 1, if(length(c49) <= 3, 0, 1)) AS c2 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +groupArray(c39) AS c39, +any(c40) AS c40, +any(c41) AS c41, +any(c42) AS c42, +any(c29) AS c29, +any(c43) AS c43, +any(c44) AS c44, +any(min_c32) AS min_c32, +any(max_c32) AS max_c32, +any(c45) AS c45, +any(c46) AS c46, +any(c38) AS c38, +any(c47) AS c47 +FROM +( +SELECT +c27 AS comp_c27, +if(comp_c27 = 0, toDate('2010-01-01'), toDate(minMerge(min_c32))) AS min_c32, +if(comp_c27 = 0, toDate(now()), toDate(maxMerge(max_c32))) + 1 AS max_c32, +NULL AS c39, +NULL AS c40, +NULL AS c41, +NULL AS c42, +NULL AS c29, +NULL AS c43, +NULL AS c44, +NULL AS c45, +NULL AS c46, +NULL AS c38, +NULL AS c47 +FROM database.table15 +GROUP BY comp_c27 +UNION ALL +SELECT +comp_c27, +NULL AS min_c32, +NULL AS max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38, +c47 +FROM +( +SELECT +c39, +comp_c27 AS c27, +comp_c27, +c40, +c41, +assumeNotNull(c45) AS c45, +assumeNotNull(c46) AS c46, +assumeNotNull(c38) AS c38, +joinGet('database.table14', 'c48', c40) AS c42, +joinGet('database.table14', 'c29', c40) AS c29, +joinGet('database.table14', 'c43', c40) AS c43, +joinGet('database.table14', 'property_c44', c40) AS c44, +splitByChar(',', assumeNotNull(joinGet('database.jointable13', 'prefix_c33', comp_c27))) AS c33s, +joinGet('database.jointable13', 'c47', comp_c27) AS c47 +FROM +( +SELECT +c39, +comp_c27, +joinGet('database.jointable13', 'c40', comp_c27) AS c40, +c41, +c45, +c46, +c38 +FROM +( +SELECT +c39, +arrayJoin(arrayMap(x -> toInt64(x), arrayFilter(x -> (length(x) > 0), splitByString(', ', c49)))) AS comp_c27, +c41, +c45, +c46, +c38 +FROM +( +SELECT +'c57' AS c39, +toString(c27) AS c49, +1 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE c27 IN (322) +UNION ALL +SELECT +'c58' AS c39, +arrayStringConcat(groupArray(toString(c27)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE chain_id IN +( +SELECT chain_id +FROM database.table12 +WHERE c27 IN (322) +) +UNION ALL +SELECT +'c59' AS c39, +assumeNotNull(c27s_str) AS c49, +0 AS c41, +c50 AS c45, +c51 AS c46, +c52 AS c38 +FROM +( +SELECT * +FROM table11 +WHERE c27 IN (322) +) AS c1s_c59 +WHERE c27 IN (322) +UNION ALL +SELECT +'superSupercalifragilisticexpialidocious' AS c39, +arrayStringConcat(groupArray(toString(c1_id)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table10 +WHERE c27 IN (322) +) AS table9 +) +) AS a +) AS table8 +) AS table7 +GROUP BY comp_c27 +) AS table6 +WHERE (parseDateTimeBestEffort('2020-01-01') >= min_c32) AND (max_c32 >= (parseDateTimeBestEffort('2021-05-02') - 2)) +) AS table5 +ARRAY JOIN c39 +WHERE isNotNull(c39) +) AS table4 +GROUP BY c39 +) AS table3 +) USING (c1) +GROUP BY +c1, +c2 +) AS table2 +ORDER BY c1 ASC ) AS table1 FORMAT Null ]]> diff --git a/tests/performance/questdb_sum_float32.xml b/tests/performance/questdb_sum_float32.xml deleted file mode 100644 index 0b830857e62..00000000000 --- a/tests/performance/questdb_sum_float32.xml +++ /dev/null @@ -1,33 +0,0 @@ - - - 4 - 20G - 1 - 2000000000 - 10G - - - - - engine - - Memory - MergeTree ORDER BY tuple() - - - - type - - Float32 - Float32 NULL - - - - - CREATE TABLE `zz_{type}_{engine}` (x {type}) ENGINE {engine} - INSERT INTO `zz_{type}_{engine}` SELECT rand() FROM numbers(500000000) - - SELECT sum(x) FROM `zz_{type}_{engine}` - - DROP TABLE IF EXISTS `zz_{type}_{engine}` - diff --git a/tests/performance/questdb_sum_float64.xml b/tests/performance/questdb_sum_float64.xml deleted file mode 100644 index fde475a1431..00000000000 --- a/tests/performance/questdb_sum_float64.xml +++ /dev/null @@ -1,33 +0,0 @@ - - - 4 - 20G - 1 - 2000000000 - 10G - - - - - engine - - Memory - MergeTree ORDER BY tuple() - - - - type - - Float64 - Float64 NULL - - - - - CREATE TABLE `zz_{type}_{engine}` (x {type}) ENGINE {engine} - INSERT INTO `zz_{type}_{engine}` SELECT rand() FROM numbers(500000000) - - SELECT sum(x) FROM `zz_{type}_{engine}` - - DROP TABLE IF EXISTS `zz_{type}_{engine}` - diff --git a/tests/performance/questdb_sum_int32.xml b/tests/performance/questdb_sum_int32.xml deleted file mode 100644 index ba1eed6b074..00000000000 --- a/tests/performance/questdb_sum_int32.xml +++ /dev/null @@ -1,34 +0,0 @@ - - - 4 - 20G - 1 - 2000000000 - 10G - - - - - engine - - Memory - MergeTree ORDER BY tuple() - - - - type - - Int32 - Int32 NULL - - - - - CREATE TABLE `zz_{type}_{engine}` (x {type}) ENGINE {engine} - INSERT INTO `zz_{type}_{engine}` SELECT rand() FROM numbers_mt(300000000) SETTINGS max_insert_threads = 8 - OPTIMIZE TABLE `zz_{type}_MergeTree ORDER BY tuple()` FINAL - - SELECT sum(x) FROM `zz_{type}_{engine}` - - DROP TABLE IF EXISTS `zz_{type}_{engine}` - diff --git a/tests/performance/schema_inference_text_formats.xml b/tests/performance/schema_inference_text_formats.xml new file mode 100644 index 00000000000..4a57e2a5eda --- /dev/null +++ b/tests/performance/schema_inference_text_formats.xml @@ -0,0 +1,23 @@ + + + + + format + + TabSeparated + CSV + Values + JSONEachRow + JSONCompactEachRow + + + + + +INSERT INTO function file(data.{format}) SELECT WatchID, Title, EventTime, RefererCategories, RefererRegions FROM test.hits LIMIT 25000 SETTINGS engine_file_truncate_on_insert=1 + +DESC file(data.{format}) SETTINGS schema_inference_use_cache_for_file=0 + +INSERT INTO FUNCTION file(data.{format}) SELECT * FROM numbers(0) SETTINGS engine_file_truncate_on_insert=1 + + diff --git a/tests/queries/0_stateless/00057_join_aliases.sql b/tests/queries/0_stateless/00057_join_aliases.sql index 481b0621ed7..b994e26a747 100644 --- a/tests/queries/0_stateless/00057_join_aliases.sql +++ b/tests/queries/0_stateless/00057_join_aliases.sql @@ -3,4 +3,5 @@ SELECT * FROM ( FROM system.numbers ANY LEFT JOIN (SELECT number / 3 AS n, number AS j1, 'Hello' AS j2 FROM system.numbers LIMIT 10) js2 USING n LIMIT 10 -) ORDER BY n; +) ORDER BY n +SETTINGS join_algorithm = 'hash'; -- the query does not finish with merge join diff --git a/tests/queries/0_stateless/00609_prewhere_and_default.sql b/tests/queries/0_stateless/00609_prewhere_and_default.sql index 7da809cd140..f1aa69c1320 100644 --- a/tests/queries/0_stateless/00609_prewhere_and_default.sql +++ b/tests/queries/0_stateless/00609_prewhere_and_default.sql @@ -3,11 +3,25 @@ create table `table_00609` (key UInt64, val UInt64) engine = MergeTree order by insert into `table_00609` select number, number / 8192 from system.numbers limit 100000; alter table `table_00609` add column def UInt64 default val + 1; select * from `table_00609` prewhere val > 2 format Null; +select * from `table_00609` prewhere val > 2 format Null SETTINGS max_block_size=100; +select * from `table_00609` prewhere val > 2 format Null SETTINGS max_block_size=1000; +select * from `table_00609` prewhere val > 2 format Null SETTINGS max_block_size=10000; +select * from `table_00609` prewhere val > 2 format Null SETTINGS max_block_size=20000; +select * from `table_00609` prewhere val > 2 format Null SETTINGS max_block_size=30000; +select * from `table_00609` prewhere val > 2 format Null SETTINGS max_block_size=40000; +select * from `table_00609` prewhere val > 2 format Null SETTINGS max_block_size=80000; drop table if exists `table_00609`; create table `table_00609` (key UInt64, val UInt64) engine = MergeTree order by key settings index_granularity=8192; insert into `table_00609` select number, number / 8192 from system.numbers limit 100000; alter table `table_00609` add column def UInt64; select * from `table_00609` prewhere val > 2 format Null; +select * from `table_00609` prewhere val > 2 format Null SETTINGS max_block_size=100; +select * from `table_00609` prewhere val > 2 format Null SETTINGS max_block_size=1000; +select * from `table_00609` prewhere val > 2 format Null SETTINGS max_block_size=10000; +select * from `table_00609` prewhere val > 2 format Null SETTINGS max_block_size=20000; +select * from `table_00609` prewhere val > 2 format Null SETTINGS max_block_size=30000; +select * from `table_00609` prewhere val > 2 format Null SETTINGS max_block_size=40000; +select * from `table_00609` prewhere val > 2 format Null SETTINGS max_block_size=80000; drop table if exists `table_00609`; diff --git a/tests/queries/0_stateless/00718_format_datetime.reference b/tests/queries/0_stateless/00718_format_datetime.reference index bc98dd59d5f..17937514396 100644 --- a/tests/queries/0_stateless/00718_format_datetime.reference +++ b/tests/queries/0_stateless/00718_format_datetime.reference @@ -34,3 +34,11 @@ no formatting pattern no formatting pattern -1100 +0300 +0530 +1234560 +000340 +2022-12-08 18:11:29.123400000 +2022-12-08 18:11:29.1 +2022-12-08 18:11:29.0 +2022-12-08 18:11:29.0 +2022-12-08 00:00:00.0 +2022-12-08 00:00:00.0 diff --git a/tests/queries/0_stateless/00718_format_datetime.sql b/tests/queries/0_stateless/00718_format_datetime.sql index deb5fb96c6c..f6fb2ce15bc 100644 --- a/tests/queries/0_stateless/00718_format_datetime.sql +++ b/tests/queries/0_stateless/00718_format_datetime.sql @@ -54,3 +54,13 @@ SELECT formatDateTime(toDateTime('2020-01-01 01:00:00', 'UTC'), '%z'); SELECT formatDateTime(toDateTime('2020-01-01 01:00:00', 'US/Samoa'), '%z'); SELECT formatDateTime(toDateTime('2020-01-01 01:00:00', 'Europe/Moscow'), '%z'); SELECT formatDateTime(toDateTime('1970-01-01 00:00:00', 'Asia/Kolkata'), '%z'); + +select formatDateTime(toDateTime64('2010-01-04 12:34:56.123456', 7), '%f'); +select formatDateTime(toDateTime64('2022-12-08 18:11:29.00034', 6, 'UTC'), '%f'); + +select formatDateTime(toDateTime64('2022-12-08 18:11:29.1234', 9, 'UTC'), '%F %T.%f'); +select formatDateTime(toDateTime64('2022-12-08 18:11:29.1234', 1, 'UTC'), '%F %T.%f'); +select formatDateTime(toDateTime64('2022-12-08 18:11:29.1234', 0, 'UTC'), '%F %T.%f'); +select formatDateTime(toDateTime('2022-12-08 18:11:29', 'UTC'), '%F %T.%f'); +select formatDateTime(toDate32('2022-12-08 18:11:29', 'UTC'), '%F %T.%f'); +select formatDateTime(toDate('2022-12-08 18:11:29', 'UTC'), '%F %T.%f'); diff --git a/tests/queries/0_stateless/00855_join_with_array_join.sql b/tests/queries/0_stateless/00855_join_with_array_join.sql index 05180573525..c1ea0bbb429 100644 --- a/tests/queries/0_stateless/00855_join_with_array_join.sql +++ b/tests/queries/0_stateless/00855_join_with_array_join.sql @@ -43,7 +43,7 @@ JOIN system.one AS y USING dummy; SELECT * FROM ( SELECT [toUInt32(dummy), toUInt32(dummy)] AS dummy FROM system.one ) AS x ARRAY JOIN dummy JOIN (select toInt32(dummy) as dummy from system.one ) AS y USING dummy; -SELECT dummy > 0, toTypeName(any(dummy)), any(toTypeName(dummy)) +SELECT dummy > 0, toTypeName(any(dummy)), any(toTypeName(dummy)) FROM ( SELECT [toUInt32(dummy), toUInt32(dummy)] AS dummy FROM system.one ) AS x ARRAY JOIN dummy JOIN ( SELECT toInt32(dummy) AS dummy FROM system.one ) AS y USING dummy GROUP BY (dummy > 0); diff --git a/tests/queries/0_stateless/01044_great_circle_angle.reference b/tests/queries/0_stateless/01044_great_circle_angle.reference index ebdeaa10067..c247e398824 100644 --- a/tests/queries/0_stateless/01044_great_circle_angle.reference +++ b/tests/queries/0_stateless/01044_great_circle_angle.reference @@ -29,24 +29,24 @@ ██████████████████████████████████▎ ████████████████████████████████████▏ ██████████████████████████████████████ -███████████████████████████████████████▊ -█████████████████████████████████████████▋ +███████████████████████████████████████▉ +█████████████████████████████████████████▊ ███████████████████████████████████████████▌ █████████████████████████████████████████████▍ ███████████████████████████████████████████████▏ -████████████████████████████████████████████████▊ +████████████████████████████████████████████████▉ ██████████████████████████████████████████████████▌ ████████████████████████████████████████████████████▏ -█████████████████████████████████████████████████████▊ +█████████████████████████████████████████████████████▉ ███████████████████████████████████████████████████████▍ █████████████████████████████████████████████████████████ ██████████████████████████████████████████████████████████▌ ████████████████████████████████████████████████████████████ █████████████████████████████████████████████████████████████▌ -██████████████████████████████████████████████████████████████▊ +██████████████████████████████████████████████████████████████▉ ████████████████████████████████████████████████████████████████▎ █████████████████████████████████████████████████████████████████▌ -██████████████████████████████████████████████████████████████████▋ +██████████████████████████████████████████████████████████████████▊ ████████████████████████████████████████████████████████████████████ █████████████████████████████████████████████████████████████████████▏ ██████████████████████████████████████████████████████████████████████▎ @@ -59,13 +59,13 @@ ████████████████████████████████████████████████████████████████████████████▍ █████████████████████████████████████████████████████████████████████████████ █████████████████████████████████████████████████████████████████████████████▌ -█████████████████████████████████████████████████████████████████████████████▊ +█████████████████████████████████████████████████████████████████████████████▉ ██████████████████████████████████████████████████████████████████████████████▎ ██████████████████████████████████████████████████████████████████████████████▋ -██████████████████████████████████████████████████████████████████████████████▋ -██████████████████████████████████████████████████████████████████████████████▊ -██████████████████████████████████████████████████████████████████████████████▊ ██████████████████████████████████████████████████████████████████████████████▊ +██████████████████████████████████████████████████████████████████████████████▉ +██████████████████████████████████████████████████████████████████████████████▉ +██████████████████████████████████████████████████████████████████████████████▉ ██████████████████████████████████████████████████████████████████████████████▋ ██████████████████████████████████████████████████████████████████████████████▍ ██████████████████████████████████████████████████████████████████████████████ @@ -84,18 +84,18 @@ ██████████████████████████████████████████████████████████████▌ ████████████████████████████████████████████████████████████▍ ██████████████████████████████████████████████████████████▏ -███████████████████████████████████████████████████████▋ +███████████████████████████████████████████████████████▊ █████████████████████████████████████████████████████▏ ██████████████████████████████████████████████████▍ ███████████████████████████████████████████████▌ ████████████████████████████████████████████▌ █████████████████████████████████████████▎ -█████████████████████████████████████▊ +█████████████████████████████████████▉ ██████████████████████████████████▍ ██████████████████████████████▋ -██████████████████████████▋ -██████████████████████▋ +██████████████████████████▊ +██████████████████████▊ ██████████████████▌ ██████████████▏ █████████▋ -████▊ +████▉ diff --git a/tests/queries/0_stateless/01072_window_view_multiple_columns_groupby.sh b/tests/queries/0_stateless/01072_window_view_multiple_columns_groupby.sh index ccc4ed3e08d..15d4da504f1 100755 --- a/tests/queries/0_stateless/01072_window_view_multiple_columns_groupby.sh +++ b/tests/queries/0_stateless/01072_window_view_multiple_columns_groupby.sh @@ -1,4 +1,5 @@ #!/usr/bin/env bash +# Tags: no-random-settings, no-parallel CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh @@ -18,7 +19,7 @@ INSERT INTO mt VALUES ('test1', 'test2'); EOF while true; do - $CLICKHOUSE_CLIENT --query="SELECT count(*) FROM dst" | grep -q "1" && break || sleep .5 ||: + $CLICKHOUSE_CLIENT --query="SELECT count(*) FROM dst" | grep -q "1" && break || sleep .1 ||: done $CLICKHOUSE_CLIENT --query="SELECT colA, colB FROM dst" diff --git a/tests/queries/0_stateless/01092_memory_profiler.sql b/tests/queries/0_stateless/01092_memory_profiler.sql index 3869bf941c0..b69d3faf94e 100644 --- a/tests/queries/0_stateless/01092_memory_profiler.sql +++ b/tests/queries/0_stateless/01092_memory_profiler.sql @@ -6,8 +6,9 @@ SET memory_profiler_step = 1000000; SET memory_profiler_sample_probability = 1; SET log_queries = 1; -SELECT ignore(groupArray(number), 'test memory profiler') FROM numbers(10000000); +SELECT ignore(groupArray(number), 'test memory profiler') FROM numbers(10000000) SETTINGS log_comment = '01092_memory_profiler'; + SYSTEM FLUSH LOGS; -WITH addressToSymbol(arrayJoin(trace)) AS symbol SELECT count() > 0 FROM system.trace_log t WHERE event_date >= yesterday() AND trace_type = 'Memory' AND query_id = (SELECT query_id FROM system.query_log WHERE current_database = currentDatabase() AND event_date >= yesterday() AND query LIKE '%test memory profiler%' ORDER BY event_time DESC LIMIT 1); -WITH addressToSymbol(arrayJoin(trace)) AS symbol SELECT count() > 0 FROM system.trace_log t WHERE event_date >= yesterday() AND trace_type = 'MemoryPeak' AND query_id = (SELECT query_id FROM system.query_log WHERE current_database = currentDatabase() AND event_date >= yesterday() AND query LIKE '%test memory profiler%' ORDER BY event_time DESC LIMIT 1); -WITH addressToSymbol(arrayJoin(trace)) AS symbol SELECT count() > 0 FROM system.trace_log t WHERE event_date >= yesterday() AND trace_type = 'MemorySample' AND query_id = (SELECT query_id FROM system.query_log WHERE current_database = currentDatabase() AND event_date >= yesterday() AND query LIKE '%test memory profiler%' ORDER BY event_time DESC LIMIT 1); +WITH addressToSymbol(arrayJoin(trace)) AS symbol SELECT count() > 0 FROM system.trace_log t WHERE event_date >= yesterday() AND trace_type = 'Memory' AND query_id = (SELECT query_id FROM system.query_log WHERE current_database = currentDatabase() AND event_date >= yesterday() AND query LIKE '%test memory profiler%' AND has(used_table_functions, 'numbers') AND log_comment = '01092_memory_profiler' ORDER BY event_time DESC LIMIT 1); +WITH addressToSymbol(arrayJoin(trace)) AS symbol SELECT count() > 0 FROM system.trace_log t WHERE event_date >= yesterday() AND trace_type = 'MemoryPeak' AND query_id = (SELECT query_id FROM system.query_log WHERE current_database = currentDatabase() AND event_date >= yesterday() AND query LIKE '%test memory profiler%' AND has(used_table_functions, 'numbers') AND log_comment = '01092_memory_profiler' ORDER BY event_time DESC LIMIT 1); +WITH addressToSymbol(arrayJoin(trace)) AS symbol SELECT count() > 0 FROM system.trace_log t WHERE event_date >= yesterday() AND trace_type = 'MemorySample' AND query_id = (SELECT query_id FROM system.query_log WHERE current_database = currentDatabase() AND event_date >= yesterday() AND query LIKE '%test memory profiler%' AND has(used_table_functions, 'numbers') AND log_comment = '01092_memory_profiler' ORDER BY event_time DESC LIMIT 1); diff --git a/tests/queries/0_stateless/01097_one_more_range_reader_test_wide_part.reference b/tests/queries/0_stateless/01097_one_more_range_reader_test_wide_part.reference new file mode 100644 index 00000000000..b4dfe343bbe --- /dev/null +++ b/tests/queries/0_stateless/01097_one_more_range_reader_test_wide_part.reference @@ -0,0 +1,3 @@ +foo +foo +foo diff --git a/tests/queries/0_stateless/01097_one_more_range_reader_test_wide_part.sql b/tests/queries/0_stateless/01097_one_more_range_reader_test_wide_part.sql new file mode 100644 index 00000000000..244f58b6717 --- /dev/null +++ b/tests/queries/0_stateless/01097_one_more_range_reader_test_wide_part.sql @@ -0,0 +1,17 @@ +drop table if exists t; + +create table t (id UInt32, a Int) engine = MergeTree order by id settings min_bytes_for_wide_part=0; + +insert into t values (1, 0) (2, 1) (3, 0) (4, 0) (5, 0); +alter table t add column s String default 'foo'; +select s from t prewhere a = 1; + +drop table t; + +create table t (id UInt32, a Int) engine = MergeTree order by id settings min_bytes_for_wide_part=0; + +insert into t values (1, 1) (2, 1) (3, 0) (4, 0) (5, 0); +alter table t add column s String default 'foo'; +select s from t prewhere a = 1; + +drop table t; diff --git a/tests/queries/0_stateless/01111_create_drop_replicated_db_stress.sh b/tests/queries/0_stateless/01111_create_drop_replicated_db_stress.sh index 983cb515d8e..bbe3a5a51c0 100755 --- a/tests/queries/0_stateless/01111_create_drop_replicated_db_stress.sh +++ b/tests/queries/0_stateless/01111_create_drop_replicated_db_stress.sh @@ -64,7 +64,7 @@ function alter_table() if [ -z "$table" ]; then continue; fi $CLICKHOUSE_CLIENT --distributed_ddl_task_timeout=0 -q \ "alter table $table update n = n + (select max(n) from merge(REGEXP('${CLICKHOUSE_DATABASE}.*'), '.*')) where 1 settings allow_nondeterministic_mutations=1" \ - 2>&1| grep -Fa "Exception: " | grep -Fv "Cannot enqueue query" | grep -Fv "ZooKeeper session expired" | grep -Fv UNKNOWN_DATABASE | grep -Fv UNKNOWN_TABLE | grep -Fv TABLE_IS_READ_ONLY + 2>&1| grep -Fa "Exception: " | grep -Fv "Cannot enqueue query" | grep -Fv "ZooKeeper session expired" | grep -Fv UNKNOWN_DATABASE | grep -Fv UNKNOWN_TABLE | grep -Fv TABLE_IS_READ_ONLY | grep -Fv TABLE_IS_DROPPED sleep 0.$RANDOM done } @@ -75,7 +75,7 @@ function insert() table=$($CLICKHOUSE_CLIENT -q "select database || '.' || name from system.tables where database like '${CLICKHOUSE_DATABASE}%' order by rand() limit 1") if [ -z "$table" ]; then continue; fi $CLICKHOUSE_CLIENT -q \ - "insert into $table values ($RANDOM)" 2>&1| grep -Fa "Exception: " | grep -Fv UNKNOWN_DATABASE | grep -Fv UNKNOWN_TABLE | grep -Fv TABLE_IS_READ_ONLY + "insert into $table values ($RANDOM)" 2>&1| grep -Fa "Exception: " | grep -Fv UNKNOWN_DATABASE | grep -Fv UNKNOWN_TABLE | grep -Fv TABLE_IS_READ_ONLY | grep -Fv TABLE_IS_DROPPED done } diff --git a/tests/queries/0_stateless/01159_combinators_with_parameters.reference b/tests/queries/0_stateless/01159_combinators_with_parameters.reference index cc0cb604bf3..c1edc826fcb 100644 --- a/tests/queries/0_stateless/01159_combinators_with_parameters.reference +++ b/tests/queries/0_stateless/01159_combinators_with_parameters.reference @@ -3,7 +3,6 @@ AggregateFunction(topKDistinct(10), String) AggregateFunction(topKForEach(10), Array(String)) AggregateFunction(topKIf(10), String, UInt8) AggregateFunction(topK(10), String) -AggregateFunction(topKOrNull(10), String) AggregateFunction(topKOrDefault(10), String) AggregateFunction(topKResample(10, 1, 2, 42), String, UInt64) AggregateFunction(topK(10), String) diff --git a/tests/queries/0_stateless/01159_combinators_with_parameters.sql b/tests/queries/0_stateless/01159_combinators_with_parameters.sql index 69508d8e304..8b2dbde6480 100644 --- a/tests/queries/0_stateless/01159_combinators_with_parameters.sql +++ b/tests/queries/0_stateless/01159_combinators_with_parameters.sql @@ -3,7 +3,7 @@ SELECT toTypeName(topKDistinctState(10)(toString(number))) FROM numbers(100); SELECT toTypeName(topKForEachState(10)([toString(number)])) FROM numbers(100); SELECT toTypeName(topKIfState(10)(toString(number), number % 2)) FROM numbers(100); SELECT toTypeName(topKMergeState(10)(state)) FROM (SELECT topKState(10)(toString(number)) as state FROM numbers(100)); -SELECT toTypeName(topKOrNullState(10)(toString(number))) FROM numbers(100); +SELECT toTypeName(topKOrNullState(10)(toString(number))) FROM numbers(100); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } SELECT toTypeName(topKOrDefaultState(10)(toString(number))) FROM numbers(100); SELECT toTypeName(topKResampleState(10, 1, 2, 42)(toString(number), number)) FROM numbers(100); SELECT toTypeName(topKState(10)(toString(number))) FROM numbers(100); diff --git a/tests/queries/0_stateless/01408_range_overflow.reference b/tests/queries/0_stateless/01408_range_overflow.reference index e69de29bb2d..4f31f0710ee 100644 --- a/tests/queries/0_stateless/01408_range_overflow.reference +++ b/tests/queries/0_stateless/01408_range_overflow.reference @@ -0,0 +1,5 @@ +[1025,9223372036854776832] +[1025,9223372036854776832] +[1025,9223372036854776832] +[1025,9223372036854776832] +[1025,9223372036854776832] diff --git a/tests/queries/0_stateless/01408_range_overflow.sql b/tests/queries/0_stateless/01408_range_overflow.sql index 1640798999c..2107e8c3f36 100644 --- a/tests/queries/0_stateless/01408_range_overflow.sql +++ b/tests/queries/0_stateless/01408_range_overflow.sql @@ -1,12 +1,13 @@ -- executeGeneric() -SELECT range(1025, 1048576 + 9223372036854775807, 9223372036854775807); -- { serverError 69; } -SELECT range(1025, 1048576 + (9223372036854775807 AS i), i); -- { serverError 69; } +SELECT range(1025, 1048576 + 9223372036854775807, 9223372036854775807); +SELECT range(1025, 1048576 + (9223372036854775807 AS i), i); +SELECT range(1025, 18446744073709551615, 1); -- { serverError 69; } -- executeConstStep() -SELECT range(number, 1048576 + 9223372036854775807, 9223372036854775807) FROM system.numbers LIMIT 1 OFFSET 1025; -- { serverError 69; } +SELECT range(number, 1048576 + 9223372036854775807, 9223372036854775807) FROM system.numbers LIMIT 1 OFFSET 1025; -- executeConstStartStep() -SELECT range(1025, number + 9223372036854775807, 9223372036854775807) FROM system.numbers LIMIT 1 OFFSET 1048576; -- { serverError 69; } +SELECT range(1025, number + 9223372036854775807, 9223372036854775807) FROM system.numbers LIMIT 1 OFFSET 1048576; -- executeConstStart() -SELECT range(1025, 1048576 + 9223372036854775807, number + 9223372036854775807) FROM system.numbers LIMIT 1; -- { serverError 69; } +SELECT range(1025, 1048576 + 9223372036854775807, number + 9223372036854775807) FROM system.numbers LIMIT 1; diff --git a/tests/queries/0_stateless/01459_manual_write_to_replicas_quorum_detach_attach.reference b/tests/queries/0_stateless/01459_manual_write_to_replicas_quorum_detach_attach.reference new file mode 100644 index 00000000000..0d6e68f032f --- /dev/null +++ b/tests/queries/0_stateless/01459_manual_write_to_replicas_quorum_detach_attach.reference @@ -0,0 +1,6 @@ +10 0 9 45 +10 0 9 45 +10 0 9 45 +10 0 9 45 +10 0 9 45 +10 0 9 45 diff --git a/tests/queries/0_stateless/01459_manual_write_to_replicas_quorum_detach_attach.sh b/tests/queries/0_stateless/01459_manual_write_to_replicas_quorum_detach_attach.sh new file mode 100755 index 00000000000..b97fcece267 --- /dev/null +++ b/tests/queries/0_stateless/01459_manual_write_to_replicas_quorum_detach_attach.sh @@ -0,0 +1,46 @@ +#!/usr/bin/env bash +# Tags: replica, no-replicated-database, no-parallel +# Tag no-replicated-database: Fails due to additional replicas or shards + +set -e + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +NUM_REPLICAS=6 + +for i in $(seq 1 $NUM_REPLICAS); do + $CLICKHOUSE_CLIENT -n -q " + DROP TABLE IF EXISTS r$i SYNC; + CREATE TABLE r$i (x UInt64) ENGINE = ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/r', 'r$i') ORDER BY x; + " +done + +valid_exceptions_to_retry='Quorum for previous write has not been satisfied yet|Another quorum insert has been already started|Unexpected logical error while adding block' + +function thread { + for x in {0..9}; do + while true; do + $CLICKHOUSE_CLIENT --query "DETACH TABLE r$1" + $CLICKHOUSE_CLIENT --query "ATTACH TABLE r$1" + $CLICKHOUSE_CLIENT --insert_quorum 3 --insert_quorum_parallel 0 --insert_keeper_fault_injection_probability=0 --query "INSERT INTO r$1 SELECT $x" 2>&1 | grep -qE "$valid_exceptions_to_retry" || break + done + done +} + +for i in $(seq 1 $NUM_REPLICAS); do + thread $i & +done + +wait + +for i in $(seq 1 $NUM_REPLICAS); do + $CLICKHOUSE_CLIENT -n -q " + SYSTEM SYNC REPLICA r$i; + SELECT count(), min(x), max(x), sum(x) FROM r$i;" +done + +for i in $(seq 1 $NUM_REPLICAS); do + $CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS r$i SYNC;" +done diff --git a/tests/queries/0_stateless/01502_long_log_tinylog_deadlock_race.sh b/tests/queries/0_stateless/01502_long_log_tinylog_deadlock_race.sh index 1087a7ed96b..3cf94a0b2bd 100755 --- a/tests/queries/0_stateless/01502_long_log_tinylog_deadlock_race.sh +++ b/tests/queries/0_stateless/01502_long_log_tinylog_deadlock_race.sh @@ -88,3 +88,9 @@ test_with_engine Log $CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS t1" $CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS t2" + +# It is not enough to kill the commands running the queries, we also have to kill the queries, the server might be still running +# to avoid the following error: +# Code: 219. DB::Exception: New table appeared in database being dropped or detached. Try again. (DATABASE_NOT_EMPTY) + +$CLICKHOUSE_CLIENT -q "KILL QUERY WHERE current_database = currentDatabase() SYNC FORMAT Null" diff --git a/tests/queries/0_stateless/01606_git_import.sh b/tests/queries/0_stateless/01606_git_import.sh index 585b39e21ab..c9aa2c7d82e 100755 --- a/tests/queries/0_stateless/01606_git_import.sh +++ b/tests/queries/0_stateless/01606_git_import.sh @@ -1,5 +1,6 @@ #!/usr/bin/env bash -# Tags: no-debug +# Tags: no-debug, no-tsan, no-msan, no-ubsan, no-asan +# ^ because inserting a 50 MB file can be slow. CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh @@ -126,4 +127,3 @@ DROP TABLE commits; DROP TABLE file_changes; DROP TABLE line_changes; " - diff --git a/tests/queries/0_stateless/01674_filter_by_uint8.reference b/tests/queries/0_stateless/01674_filter_by_uint8.reference index 6b522898280..435423ba455 100644 --- a/tests/queries/0_stateless/01674_filter_by_uint8.reference +++ b/tests/queries/0_stateless/01674_filter_by_uint8.reference @@ -2,7 +2,12 @@ 0 255 1 ['foo','bar'] 1 1 -2 ['foo','bar'] 2 1 -3 ['foo','bar'] 3 1 -4 ['foo','bar'] 4 1 -5 ['foo','bar'] 5 1 +2 ['foo','bar'] 2 2 +3 ['foo','bar'] 3 3 +4 ['foo','bar'] 4 4 +5 ['foo','bar'] 5 5 +1 ['foo','bar'] 1 1 +2 ['foo','bar'] 2 2 +3 ['foo','bar'] 3 3 +4 ['foo','bar'] 4 4 +5 ['foo','bar'] 5 5 diff --git a/tests/queries/0_stateless/01674_filter_by_uint8.sql b/tests/queries/0_stateless/01674_filter_by_uint8.sql index 960153d9c5a..0bf11cea59b 100644 --- a/tests/queries/0_stateless/01674_filter_by_uint8.sql +++ b/tests/queries/0_stateless/01674_filter_by_uint8.sql @@ -10,5 +10,6 @@ ENGINE = MergeTree ORDER BY u; INSERT INTO t_filter SELECT toString(number), ['foo', 'bar'], number, toUInt8(number) FROM numbers(1000); SELECT * FROM t_filter WHERE f LIMIT 5; +SELECT * FROM t_filter WHERE f != 0 LIMIT 5; DROP TABLE IF EXISTS t_filter; diff --git a/tests/queries/0_stateless/01906_lc_in_bug.reference b/tests/queries/0_stateless/01906_lc_in_bug.reference index 9fe1650abf0..adce940e346 100644 --- a/tests/queries/0_stateless/01906_lc_in_bug.reference +++ b/tests/queries/0_stateless/01906_lc_in_bug.reference @@ -1,2 +1,3 @@ 1 0 3 1 +0 diff --git a/tests/queries/0_stateless/01906_lc_in_bug.sql b/tests/queries/0_stateless/01906_lc_in_bug.sql index f8f41da31ae..581053e14e1 100644 --- a/tests/queries/0_stateless/01906_lc_in_bug.sql +++ b/tests/queries/0_stateless/01906_lc_in_bug.sql @@ -6,3 +6,8 @@ insert into tab values ('a'), ('bb'), ('a'), ('cc'); select count() as c, x in ('a', 'bb') as g from tab group by g order by c; drop table if exists tab; + +-- https://github.com/ClickHouse/ClickHouse/issues/44503 +CREATE TABLE test(key Int32) ENGINE = MergeTree ORDER BY (key); +insert into test select intDiv(number,100) from numbers(10000000); +SELECT COUNT() FROM test WHERE key <= 100000 AND (NOT (toLowCardinality('') IN (SELECT ''))); diff --git a/tests/queries/0_stateless/01956_fuse_quantile_optimization.reference b/tests/queries/0_stateless/01956_fuse_quantile_optimization.reference deleted file mode 100644 index dddab828a25..00000000000 --- a/tests/queries/0_stateless/01956_fuse_quantile_optimization.reference +++ /dev/null @@ -1,99 +0,0 @@ -2016-06-15 23:00:00 2016-06-15 23:00:00 -2016-06-15 23:00:00 2016-06-15 23:00:00 -2016-06-15 23:00:00 2016-06-15 23:00:00 -2016-06-15 23:00:00 2016-06-15 23:00:00 2016-06-15 23:00:00 -30000 30000 30000 -30000 30000 30000 -2016-06-15 23:00:16 2016-06-15 23:00:16 2016-06-15 23:00:16 -2016-06-15 23:00:16 2016-06-15 23:00:16 2016-06-15 23:00:16 -2016-04-02 17:23:12 2016-04-02 17:23:12 2016-04-02 17:23:12 ----------After fuse result----------- -quantile: -SELECT - quantiles(0.2, 0.3)(d)[1], - quantiles(0.2, 0.3)(d)[2] -FROM datetime -2016-06-15 23:00:00 2016-06-15 23:00:00 -quantileDeterministic: -SELECT - quantilesDeterministic(0.2, 0.5)(d, 1)[1], - quantilesDeterministic(0.2, 0.5)(d, 1)[2] -FROM datetime -2016-06-15 23:00:00 2016-06-15 23:00:00 -quantileExact: -SELECT - quantilesExact(0.2, 0.5)(d)[1], - quantilesExact(0.2, 0.5)(d)[2] -FROM datetime -2016-06-15 23:00:00 2016-06-15 23:00:00 -quantileExactWeighted: -SELECT - quantilesExactWeighted(0.2, 0.4)(d, 1)[1], - quantilesExactWeighted(0.2, 0.4)(d, 1)[2], - quantileExactWeighted(0.3)(d, 2) -FROM datetime -2016-06-15 23:00:00 2016-06-15 23:00:00 2016-06-15 23:00:00 -quantileTiming: -SELECT - quantilesTiming(0.2, 0.3)(d)[1], - quantilesTiming(0.2, 0.3)(d)[2], - quantileTiming(0.2)(d + 1) -FROM datetime -30000 30000 30000 -quantileTimingWeighted: -SELECT - quantilesTimingWeighted(0.2, 0.3)(d, 1)[1], - quantilesTimingWeighted(0.2, 0.3)(d, 1)[2], - quantileTimingWeighted(0.2)(d, 2) -FROM datetime -30000 30000 30000 -quantileTDigest: -SELECT - quantilesTDigest(0.2, 0.3)(d)[1], - quantilesTDigest(0.2, 0.3)(d)[2], - quantileTDigest(0.2)(d + 1) -FROM datetime -2016-06-15 23:00:16 2016-06-15 23:00:16 2016-06-15 23:00:16 -quantileTDigestWeighted: -SELECT - quantilesTDigestWeighted(0.2, 0.3)(d, 1)[1], - quantilesTDigestWeighted(0.2, 0.3)(d, 1)[2], - quantileTDigestWeighted(0.4)(d, 2) -FROM datetime -2016-06-15 23:00:16 2016-06-15 23:00:16 2016-06-15 23:00:16 -quantileBFloat16: -SELECT - quantilesBFloat16(0.2, 0.3)(d)[1], - quantilesBFloat16(0.2, 0.3)(d)[2], - quantileBFloat16(0.4)(d + 1) -FROM datetime -2016-04-02 17:23:12 2016-04-02 17:23:12 2016-04-02 17:23:12 -quantileBFloat16Weighted: -SELECT - quantilesBFloat16Weighted(0.2, 0.3)(d, 1)[1], - quantilesBFloat16Weighted(0.2, 0.3)(d, 1)[2], - quantileBFloat16Weighted(0.2)(d, 2) -FROM datetime -2016-04-02 17:23:12 2016-04-02 17:23:12 2016-04-02 17:23:12 -SELECT - quantiles(0.2, 0.3, 0.2)(d)[1] AS k, - quantiles(0.2, 0.3, 0.2)(d)[2] -FROM datetime -ORDER BY quantiles(0.2, 0.3, 0.2)(d)[3] ASC -0 4 7.2 7.6 -1 5 8.2 8.6 -SELECT - b, - quantiles(0.5, 0.9, 0.95)(x)[1] AS a, - quantiles(0.5, 0.9, 0.95)(x)[2] AS y, - quantiles(0.5, 0.9, 0.95)(x)[3] -FROM -( - SELECT - number AS x, - number % 2 AS b - FROM numbers(10) -) -GROUP BY b -ORDER BY b ASC -1 1 1 diff --git a/tests/queries/0_stateless/01956_fuse_quantile_optimization.sql b/tests/queries/0_stateless/01956_fuse_quantile_optimization.sql deleted file mode 100644 index 1f08439c0b6..00000000000 --- a/tests/queries/0_stateless/01956_fuse_quantile_optimization.sql +++ /dev/null @@ -1,76 +0,0 @@ -DROP TABLE IF EXISTS datetime; -CREATE TABLE datetime (d DateTime('UTC')) ENGINE = Memory; -INSERT INTO datetime(d) VALUES(toDateTime('2016-06-15 23:00:00', 'UTC')) - -SET optimize_syntax_fuse_functions = true; - -SELECT quantile(0.2)(d), quantile(0.3)(d) FROM datetime; -SELECT quantileDeterministic(0.2)(d, 1), quantileDeterministic(0.5)(d, 1) FROM datetime; -SELECT quantileExact(0.2)(d), quantileExact(0.5)(d) FROM datetime; -SELECT quantileExactWeighted(0.2)(d, 1), quantileExactWeighted(0.4)(d, 1), quantileExactWeighted(0.3)(d, 2) FROM datetime; -SELECT quantileTiming(0.2)(d), quantileTiming(0.3)(d), quantileTiming(0.2)(d+1) FROM datetime; -SELECT quantileTimingWeighted(0.2)(d, 1), quantileTimingWeighted(0.3)(d, 1), quantileTimingWeighted(0.2)(d, 2) FROM datetime; -SELECT quantileTDigest(0.2)(d), quantileTDigest(0.3)(d), quantileTDigest(0.2)(d + 1) FROM datetime; -SELECT quantileTDigestWeighted(0.2)(d, 1), quantileTDigestWeighted(0.3)(d, 1), quantileTDigestWeighted(0.4)(d, 2) FROM datetime; -SELECT quantileBFloat16(0.2)(d), quantileBFloat16(0.3)(d), quantileBFloat16(0.4)(d + 1) FROM datetime; - - -SELECT '---------After fuse result-----------'; -SELECT 'quantile:'; -EXPLAIN SYNTAX SELECT quantile(0.2)(d), quantile(0.3)(d) FROM datetime; -SELECT quantile(0.2)(d), quantile(0.3)(d) FROM datetime; - -SELECT 'quantileDeterministic:'; -EXPLAIN SYNTAX SELECT quantileDeterministic(0.2)(d, 1), quantileDeterministic(0.5)(d, 1) FROM datetime; -SELECT quantileDeterministic(0.2)(d, 1), quantileDeterministic(0.5)(d, 1) FROM datetime; - -SELECT 'quantileExact:'; -EXPLAIN SYNTAX SELECT quantileExact(0.2)(d), quantileExact(0.5)(d) FROM datetime; -SELECT quantileExact(0.2)(d), quantileExact(0.5)(d) FROM datetime; - -SELECT 'quantileExactWeighted:'; -EXPLAIN SYNTAX SELECT quantileExactWeighted(0.2)(d, 1), quantileExactWeighted(0.4)(d, 1), quantileExactWeighted(0.3)(d, 2) FROM datetime; -SELECT quantileExactWeighted(0.2)(d, 1), quantileExactWeighted(0.4)(d, 1), quantileExactWeighted(0.3)(d, 2) FROM datetime; - -SELECT 'quantileTiming:'; -EXPLAIN SYNTAX SELECT quantileTiming(0.2)(d), quantileTiming(0.3)(d), quantileTiming(0.2)(d+1) FROM datetime; -SELECT quantileTiming(0.2)(d), quantileTiming(0.3)(d), quantileTiming(0.2)(d+1) FROM datetime; - -SELECT 'quantileTimingWeighted:'; -EXPLAIN SYNTAX SELECT quantileTimingWeighted(0.2)(d, 1), quantileTimingWeighted(0.3)(d, 1), quantileTimingWeighted(0.2)(d, 2) FROM datetime; -SELECT quantileTimingWeighted(0.2)(d, 1), quantileTimingWeighted(0.3)(d, 1), quantileTimingWeighted(0.2)(d, 2) FROM datetime; - -SELECT 'quantileTDigest:'; -EXPLAIN SYNTAX SELECT quantileTDigest(0.2)(d), quantileTDigest(0.3)(d), quantileTDigest(0.2)(d + 1) FROM datetime; -SELECT quantileTDigest(0.2)(d), quantileTDigest(0.3)(d), quantileTDigest(0.2)(d + 1) FROM datetime; - -SELECT 'quantileTDigestWeighted:'; -EXPLAIN SYNTAX SELECT quantileTDigestWeighted(0.2)(d, 1), quantileTDigestWeighted(0.3)(d, 1), quantileTDigestWeighted(0.4)(d, 2) FROM datetime; -SELECT quantileTDigestWeighted(0.2)(d, 1), quantileTDigestWeighted(0.3)(d, 1), quantileTDigestWeighted(0.4)(d, 2) FROM datetime; - -SELECT 'quantileBFloat16:'; -EXPLAIN SYNTAX SELECT quantileBFloat16(0.2)(d), quantileBFloat16(0.3)(d), quantileBFloat16(0.4)(d + 1) FROM datetime; -SELECT quantileBFloat16(0.2)(d), quantileBFloat16(0.3)(d), quantileBFloat16(0.4)(d + 1) FROM datetime; - -SELECT 'quantileBFloat16Weighted:'; -EXPLAIN SYNTAX SELECT quantileBFloat16Weighted(0.2)(d, 1), quantileBFloat16Weighted(0.3)(d, 1), quantileBFloat16Weighted(0.2)(d, 2) FROM datetime; -SELECT quantileBFloat16Weighted(0.2)(d, 1), quantileBFloat16Weighted(0.3)(d, 1), quantileBFloat16Weighted(0.2)(d, 2) FROM datetime; - -EXPLAIN SYNTAX SELECT quantile(0.2)(d) as k, quantile(0.3)(d) FROM datetime order by quantile(0.2)(d); - -SELECT b, quantile(0.5)(x) as a, quantile(0.9)(x) as y, quantile(0.95)(x) FROM (select number as x, number % 2 as b from numbers(10)) group by b order by b; -EXPLAIN SYNTAX SELECT b, quantile(0.5)(x) as a, quantile(0.9)(x) as y, quantile(0.95)(x) FROM (select number as x, number % 2 as b from numbers(10)) group by b order by b; - --- fuzzer -SELECT quantileDeterministic(0.99)(1023) FROM datetime FORMAT Null; -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } -SELECT quantileTiming(0.5)(NULL, NULL, quantileTiming(-inf)(NULL), NULL) FROM datetime FORMAT Null; -- { serverError ILLEGAL_AGGREGATION } -SELECT quantileTDigest(NULL)(NULL, quantileTDigest(3.14)(NULL, d + NULL), 2.), NULL FORMAT Null; -- { serverError ILLEGAL_AGGREGATION } -SELECT quantile(1, 0.3)(d), quantile(0.3)(d) FROM datetime; -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } -SELECT quantile(quantileDeterministic('', '2.47')('0.02', '0.2', NULL), 0.9)(d), quantile(0.3)(d) FROM datetime; -- { serverError ILLEGAL_AGGREGATION } -SELECT quantileTimingWeighted([[[[['-214748364.8'], NULL]], [[[quantileTimingWeighted([[[[['-214748364.8'], NULL], '-922337203.6854775808'], [[['-214748364.7']]], NULL]])([NULL], NULL), '-214748364.7']]], NULL]])([NULL], NULL); -- { serverError ILLEGAL_AGGREGATION } -SELECT quantileTimingWeighted([quantileTimingWeighted(0.5)(1, 1)])(1, 1); -- { serverError ILLEGAL_AGGREGATION } - -DROP TABLE datetime; - -SET optimize_syntax_fuse_functions = 1; -SELECT quantile(1 AS a), quantile(a AS b), quantile(b AS c); diff --git a/tests/queries/0_stateless/01961_roaring_memory_tracking.sql b/tests/queries/0_stateless/01961_roaring_memory_tracking.sql index 9e14bb9e138..85db40f1104 100644 --- a/tests/queries/0_stateless/01961_roaring_memory_tracking.sql +++ b/tests/queries/0_stateless/01961_roaring_memory_tracking.sql @@ -1,4 +1,4 @@ -- Tags: no-replicated-database -SET max_memory_usage = '100M'; +SET max_memory_usage = '75M'; SELECT cityHash64(rand() % 1000) as n, groupBitmapState(number) FROM numbers_mt(2000000000) GROUP BY n FORMAT Null; -- { serverError 241 } diff --git a/tests/queries/0_stateless/02000_join_on_const.sql b/tests/queries/0_stateless/02000_join_on_const.sql index 7496e754a0a..cab5a838250 100644 --- a/tests/queries/0_stateless/02000_join_on_const.sql +++ b/tests/queries/0_stateless/02000_join_on_const.sql @@ -43,14 +43,29 @@ SELECT * FROM t1 RIGHT JOIN t2 ON NULL ORDER BY t1.id NULLS FIRST, t2.id SETTING SELECT '- full -'; SELECT * FROM t1 FULL JOIN t2 ON NULL ORDER BY t1.id NULLS FIRST, t2.id SETTINGS join_use_nulls = 1; -SELECT * FROM t1 JOIN t2 ON 1 = 1 SETTINGS join_algorithm = 'full_sorting_merge'; -- { serverError 48 } -SELECT * FROM t1 JOIN t2 ON 1 = 1 SETTINGS join_algorithm = 'partial_merge'; -- { serverError 48 } -SELECT * FROM t1 JOIN t2 ON 1 = 1 SETTINGS join_algorithm = 'auto'; -- { serverError 48 } -SELECT * FROM t1 JOIN t2 ON NULL SETTINGS join_algorithm = 'full_sorting_merge'; -- { serverError 48 } -SELECT * FROM t1 JOIN t2 ON NULL SETTINGS join_algorithm = 'partial_merge'; -- { serverError 48 } -SELECT * FROM t1 LEFT JOIN t2 ON NULL SETTINGS join_algorithm = 'partial_merge'; -- { serverError 48 } -SELECT * FROM t1 RIGHT JOIN t2 ON NULL SETTINGS join_algorithm = 'auto'; -- { serverError 48 } -SELECT * FROM t1 FULL JOIN t2 ON NULL SETTINGS join_algorithm = 'partial_merge'; -- { serverError 48 } +-- in this cases we have AMBIGUOUS_COLUMN_NAME instead of INVALID_JOIN_ON_EXPRESSION +-- because there's some function in ON expression is not constant itself (result is constant) +SELECT * FROM t1 JOIN t2 ON 1 = 1 SETTINGS join_algorithm = 'full_sorting_merge'; -- { serverError AMBIGUOUS_COLUMN_NAME } +SELECT * FROM t1 JOIN t2 ON 1 = 1 SETTINGS join_algorithm = 'partial_merge'; -- { serverError AMBIGUOUS_COLUMN_NAME } +SELECT * FROM t1 JOIN t2 ON 1 = 1 SETTINGS join_algorithm = 'auto'; -- { serverError AMBIGUOUS_COLUMN_NAME } + +SELECT * FROM t1 JOIN t2 ON NULL SETTINGS join_algorithm = 'full_sorting_merge'; -- { serverError INVALID_JOIN_ON_EXPRESSION } +SELECT * FROM t1 JOIN t2 ON NULL SETTINGS join_algorithm = 'partial_merge'; -- { serverError INVALID_JOIN_ON_EXPRESSION } +SELECT * FROM t1 LEFT JOIN t2 ON NULL SETTINGS join_algorithm = 'partial_merge'; -- { serverError INVALID_JOIN_ON_EXPRESSION } +SELECT * FROM t1 RIGHT JOIN t2 ON NULL SETTINGS join_algorithm = 'auto'; -- { serverError INVALID_JOIN_ON_EXPRESSION } +SELECT * FROM t1 FULL JOIN t2 ON NULL SETTINGS join_algorithm = 'partial_merge'; -- { serverError INVALID_JOIN_ON_EXPRESSION } + +-- mixing of constant and non-constant expressions in ON is not allowed +SELECT * FROM t1 JOIN t2 ON t1.id = t2.id AND 1 == 1; -- { serverError AMBIGUOUS_COLUMN_NAME } +SELECT * FROM t1 JOIN t2 ON t1.id = t2.id AND 1 == 2; -- { serverError AMBIGUOUS_COLUMN_NAME } + +SELECT * FROM t1 JOIN t2 ON t1.id = t2.id AND 1 != 1; -- { serverError INVALID_JOIN_ON_EXPRESSION } +SELECT * FROM t1 JOIN t2 ON t1.id = t2.id AND NULL; -- { serverError INVALID_JOIN_ON_EXPRESSION } +SELECT * FROM t1 JOIN t2 ON t1.id = t2.id AND 'aaa'; -- { serverError INVALID_JOIN_ON_EXPRESSION } +SELECT * FROM t1 JOIN t2 ON 'aaa'; -- { serverError INVALID_JOIN_ON_EXPRESSION } + +SELECT * FROM t1 JOIN t2 ON t1.id = t2.id AND 0; -- { serverError INVALID_JOIN_ON_EXPRESSION } +SELECT * FROM t1 JOIN t2 ON t1.id = t2.id AND 1; -- { serverError INVALID_JOIN_ON_EXPRESSION } DROP TABLE IF EXISTS t1; DROP TABLE IF EXISTS t2; diff --git a/tests/queries/0_stateless/02015_async_inserts_2.sh b/tests/queries/0_stateless/02015_async_inserts_2.sh index fd20f846897..8934dcc66e0 100755 --- a/tests/queries/0_stateless/02015_async_inserts_2.sh +++ b/tests/queries/0_stateless/02015_async_inserts_2.sh @@ -5,10 +5,10 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -url="${CLICKHOUSE_URL}&async_insert=1&wait_for_async_insert=1&max_insert_threads=0&group_by_two_level_threshold=100000&group_by_two_level_threshold_bytes=50000000&distributed_aggregation_memory_efficient=1&fsync_metadata=1&priority=1&output_format_parallel_formatting=0&input_format_parallel_parsing=0&min_chunk_bytes_for_parallel_parsing=4031398&max_read_buffer_size=554729&prefer_localhost_replica=0&max_block_size=51672&max_threads=20" +url="${CLICKHOUSE_URL}&async_insert=1&wait_for_async_insert=1&async_insert_busy_timeout_ms=600000&async_insert_max_query_number=3" -${CLICKHOUSE_CLIENT} --max_insert_threads=0 --group_by_two_level_threshold=100000 --group_by_two_level_threshold_bytes=50000000 --distributed_aggregation_memory_efficient=1 --fsync_metadata=1 --priority=1 --output_format_parallel_formatting=0 --input_format_parallel_parsing=0 --min_chunk_bytes_for_parallel_parsing=4031398 --max_read_buffer_size=554729 --prefer_localhost_replica=0 --max_block_size=51672 --max_threads=20 -q "DROP TABLE IF EXISTS async_inserts" -${CLICKHOUSE_CLIENT} --max_insert_threads=0 --group_by_two_level_threshold=100000 --group_by_two_level_threshold_bytes=50000000 --distributed_aggregation_memory_efficient=1 --fsync_metadata=1 --priority=1 --output_format_parallel_formatting=0 --input_format_parallel_parsing=0 --min_chunk_bytes_for_parallel_parsing=4031398 --max_read_buffer_size=554729 --prefer_localhost_replica=0 --max_block_size=51672 --max_threads=20 -q "CREATE TABLE async_inserts (id UInt32, s String) ENGINE = MergeTree ORDER BY id" +${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS async_inserts" +${CLICKHOUSE_CLIENT} -q "CREATE TABLE async_inserts (id UInt32, s String) ENGINE = MergeTree ORDER BY id" ${CLICKHOUSE_CURL} -sS "$url" -d 'INSERT INTO async_inserts FORMAT CSV 1,"a" @@ -23,7 +23,7 @@ ${CLICKHOUSE_CURL} -sS "$url" -d 'INSERT INTO async_inserts FORMAT CSV wait -${CLICKHOUSE_CLIENT} --max_insert_threads=0 --group_by_two_level_threshold=100000 --group_by_two_level_threshold_bytes=50000000 --distributed_aggregation_memory_efficient=1 --fsync_metadata=1 --priority=1 --output_format_parallel_formatting=0 --input_format_parallel_parsing=0 --min_chunk_bytes_for_parallel_parsing=4031398 --max_read_buffer_size=554729 --prefer_localhost_replica=0 --max_block_size=51672 --max_threads=20 -q "SELECT * FROM async_inserts ORDER BY id" -${CLICKHOUSE_CLIENT} --max_insert_threads=0 --group_by_two_level_threshold=100000 --group_by_two_level_threshold_bytes=50000000 --distributed_aggregation_memory_efficient=1 --fsync_metadata=1 --priority=1 --output_format_parallel_formatting=0 --input_format_parallel_parsing=0 --min_chunk_bytes_for_parallel_parsing=4031398 --max_read_buffer_size=554729 --prefer_localhost_replica=0 --max_block_size=51672 --max_threads=20 -q "SELECT name, rows, level FROM system.parts WHERE table = 'async_inserts' AND database = '$CLICKHOUSE_DATABASE' ORDER BY name" +${CLICKHOUSE_CLIENT} -q "SELECT * FROM async_inserts ORDER BY id" +${CLICKHOUSE_CLIENT} -q "SELECT name, rows, level FROM system.parts WHERE table = 'async_inserts' AND database = '$CLICKHOUSE_DATABASE' ORDER BY name" -${CLICKHOUSE_CLIENT} --max_insert_threads=0 --group_by_two_level_threshold=100000 --group_by_two_level_threshold_bytes=50000000 --distributed_aggregation_memory_efficient=1 --fsync_metadata=1 --priority=1 --output_format_parallel_formatting=0 --input_format_parallel_parsing=0 --min_chunk_bytes_for_parallel_parsing=4031398 --max_read_buffer_size=554729 --prefer_localhost_replica=0 --max_block_size=51672 --max_threads=20 -q "DROP TABLE async_inserts" +${CLICKHOUSE_CLIENT} -q "DROP TABLE async_inserts" diff --git a/tests/queries/0_stateless/02020_exponential_smoothing.reference b/tests/queries/0_stateless/02020_exponential_smoothing.reference index 95f5cb0c310..1e98bca8a23 100644 --- a/tests/queries/0_stateless/02020_exponential_smoothing.reference +++ b/tests/queries/0_stateless/02020_exponential_smoothing.reference @@ -31,14 +31,14 @@ exponentialMovingAverage 9 9 8.002 1 0 0.067 ███▎ 0 1 0.062 ███ -0 2 0.058 ██▊ +0 2 0.058 ██▉ 0 3 0.054 ██▋ 0 4 0.051 ██▌ 0 5 0.047 ██▎ 0 6 0.044 ██▏ 0 7 0.041 ██ -0 8 0.038 █▊ -0 9 0.036 █▋ +0 8 0.038 █▉ +0 9 0.036 █▊ 0 10 0.033 █▋ 0 11 0.031 █▌ 0 12 0.029 █▍ @@ -47,16 +47,16 @@ exponentialMovingAverage 0 15 0.024 █▏ 0 16 0.022 █ 0 17 0.021 █ -0 18 0.019 ▊ -0 19 0.018 ▊ -0 20 0.017 ▋ -0 21 0.016 ▋ +0 18 0.019 ▉ +0 19 0.018 ▉ +0 20 0.017 ▊ +0 21 0.016 ▊ 0 22 0.015 ▋ 0 23 0.014 ▋ 0 24 0.013 ▋ -1 25 0.079 ███▊ +1 25 0.079 ███▉ 1 26 0.14 ███████ -1 27 0.198 █████████▊ +1 27 0.198 █████████▉ 1 28 0.252 ████████████▌ 1 29 0.302 ███████████████ 1 30 0.349 █████████████████▍ @@ -64,68 +64,68 @@ exponentialMovingAverage 1 32 0.433 █████████████████████▋ 1 33 0.471 ███████████████████████▌ 1 34 0.506 █████████████████████████▎ -1 35 0.539 ██████████████████████████▊ +1 35 0.539 ██████████████████████████▉ 1 36 0.57 ████████████████████████████▌ -1 37 0.599 █████████████████████████████▊ +1 37 0.599 █████████████████████████████▉ 1 38 0.626 ███████████████████████████████▎ 1 39 0.651 ████████████████████████████████▌ 1 40 0.674 █████████████████████████████████▋ -1 41 0.696 ██████████████████████████████████▋ -1 42 0.716 ███████████████████████████████████▋ -1 43 0.735 ████████████████████████████████████▋ +1 41 0.696 ██████████████████████████████████▊ +1 42 0.716 ███████████████████████████████████▊ +1 43 0.735 ████████████████████████████████████▊ 1 44 0.753 █████████████████████████████████████▋ 1 45 0.77 ██████████████████████████████████████▍ 1 46 0.785 ███████████████████████████████████████▎ -1 47 0.8 ███████████████████████████████████████▊ +1 47 0.8 ███████████████████████████████████████▉ 1 48 0.813 ████████████████████████████████████████▋ 1 49 0.825 █████████████████████████████████████████▎ 1 0 0.5 █████████████████████████ 0 1 0.25 ████████████▌ 0 2 0.125 ██████▎ -0 3 0.062 ███ +0 3 0.062 ███▏ 0 4 0.031 █▌ -1 5 0.516 █████████████████████████▋ -0 6 0.258 ████████████▊ +1 5 0.516 █████████████████████████▊ +0 6 0.258 ████████████▉ 0 7 0.129 ██████▍ 0 8 0.064 ███▏ 0 9 0.032 █▌ -1 10 0.516 █████████████████████████▋ -0 11 0.258 ████████████▊ +1 10 0.516 █████████████████████████▊ +0 11 0.258 ████████████▉ 0 12 0.129 ██████▍ 0 13 0.065 ███▏ 0 14 0.032 █▌ -1 15 0.516 █████████████████████████▋ -0 16 0.258 ████████████▊ +1 15 0.516 █████████████████████████▊ +0 16 0.258 ████████████▉ 0 17 0.129 ██████▍ 0 18 0.065 ███▏ 0 19 0.032 █▌ -1 20 0.516 █████████████████████████▋ -0 21 0.258 ████████████▊ +1 20 0.516 █████████████████████████▊ +0 21 0.258 ████████████▉ 0 22 0.129 ██████▍ 0 23 0.065 ███▏ 0 24 0.032 █▌ -1 25 0.516 █████████████████████████▋ -0 26 0.258 ████████████▊ +1 25 0.516 █████████████████████████▊ +0 26 0.258 ████████████▉ 0 27 0.129 ██████▍ 0 28 0.065 ███▏ 0 29 0.032 █▌ -1 30 0.516 █████████████████████████▋ -0 31 0.258 ████████████▊ +1 30 0.516 █████████████████████████▊ +0 31 0.258 ████████████▉ 0 32 0.129 ██████▍ 0 33 0.065 ███▏ 0 34 0.032 █▌ -1 35 0.516 █████████████████████████▋ -0 36 0.258 ████████████▊ +1 35 0.516 █████████████████████████▊ +0 36 0.258 ████████████▉ 0 37 0.129 ██████▍ 0 38 0.065 ███▏ 0 39 0.032 █▌ -1 40 0.516 █████████████████████████▋ -0 41 0.258 ████████████▊ +1 40 0.516 █████████████████████████▊ +0 41 0.258 ████████████▉ 0 42 0.129 ██████▍ 0 43 0.065 ███▏ 0 44 0.032 █▌ -1 45 0.516 █████████████████████████▋ -0 46 0.258 ████████████▊ +1 45 0.516 █████████████████████████▊ +0 46 0.258 ████████████▉ 0 47 0.129 ██████▍ 0 48 0.065 ███▏ 0 49 0.032 █▌ @@ -170,15 +170,15 @@ exponentialTimeDecayedSum 0 7 0.497 ██▍ 0 8 0.449 ██▏ 0 9 0.407 ██ -0 10 0.368 █▋ +0 10 0.368 █▊ 0 11 0.333 █▋ 0 12 0.301 █▌ 0 13 0.273 █▎ 0 14 0.247 █▏ 0 15 0.223 █ 0 16 0.202 █ -0 17 0.183 ▊ -0 18 0.165 ▋ +0 17 0.183 ▉ +0 18 0.165 ▊ 0 19 0.15 ▋ 0 20 0.135 ▋ 0 21 0.122 ▌ @@ -186,80 +186,80 @@ exponentialTimeDecayedSum 0 23 0.1 ▌ 0 24 0.091 ▍ 1 25 1.082 █████▍ -1 26 1.979 █████████▊ -1 27 2.791 █████████████▊ +1 26 1.979 █████████▉ +1 27 2.791 █████████████▉ 1 28 3.525 █████████████████▋ -1 29 4.19 ████████████████████▊ -1 30 4.791 ███████████████████████▊ +1 29 4.19 ████████████████████▉ +1 30 4.791 ███████████████████████▉ 1 31 5.335 ██████████████████████████▋ 1 32 5.827 █████████████████████████████▏ 1 33 6.273 ███████████████████████████████▎ 1 34 6.676 █████████████████████████████████▍ 1 35 7.041 ███████████████████████████████████▏ -1 36 7.371 ████████████████████████████████████▋ +1 36 7.371 ████████████████████████████████████▊ 1 37 7.669 ██████████████████████████████████████▎ 1 38 7.939 ███████████████████████████████████████▋ -1 39 8.184 ████████████████████████████████████████▊ +1 39 8.184 ████████████████████████████████████████▉ 1 40 8.405 ██████████████████████████████████████████ 1 41 8.605 ███████████████████████████████████████████ -1 42 8.786 ███████████████████████████████████████████▊ -1 43 8.95 ████████████████████████████████████████████▋ +1 42 8.786 ███████████████████████████████████████████▉ +1 43 8.95 ████████████████████████████████████████████▊ 1 44 9.098 █████████████████████████████████████████████▍ 1 45 9.233 ██████████████████████████████████████████████▏ -1 46 9.354 ██████████████████████████████████████████████▋ +1 46 9.354 ██████████████████████████████████████████████▊ 1 47 9.464 ███████████████████████████████████████████████▎ -1 48 9.563 ███████████████████████████████████████████████▋ +1 48 9.563 ███████████████████████████████████████████████▊ 1 49 9.653 ████████████████████████████████████████████████▎ 1 0 1 ██████████████████████████████████████████████████ 0 1 0.368 ██████████████████▍ -0 2 0.135 ██████▋ +0 2 0.135 ██████▊ 0 3 0.05 ██▍ -0 4 0.018 ▊ +0 4 0.018 ▉ 1 5 1.007 ██████████████████████████████████████████████████ 0 6 0.37 ██████████████████▌ -0 7 0.136 ██████▋ +0 7 0.136 ██████▊ 0 8 0.05 ██▌ -0 9 0.018 ▊ +0 9 0.018 ▉ 1 10 1.007 ██████████████████████████████████████████████████ 0 11 0.37 ██████████████████▌ -0 12 0.136 ██████▋ +0 12 0.136 ██████▊ 0 13 0.05 ██▌ -0 14 0.018 ▊ +0 14 0.018 ▉ 1 15 1.007 ██████████████████████████████████████████████████ 0 16 0.37 ██████████████████▌ -0 17 0.136 ██████▋ +0 17 0.136 ██████▊ 0 18 0.05 ██▌ -0 19 0.018 ▊ +0 19 0.018 ▉ 1 20 1.007 ██████████████████████████████████████████████████ 0 21 0.37 ██████████████████▌ -0 22 0.136 ██████▋ +0 22 0.136 ██████▊ 0 23 0.05 ██▌ -0 24 0.018 ▊ +0 24 0.018 ▉ 1 25 1.007 ██████████████████████████████████████████████████ 0 26 0.37 ██████████████████▌ -0 27 0.136 ██████▋ +0 27 0.136 ██████▊ 0 28 0.05 ██▌ -0 29 0.018 ▊ +0 29 0.018 ▉ 1 30 1.007 ██████████████████████████████████████████████████ 0 31 0.37 ██████████████████▌ -0 32 0.136 ██████▋ +0 32 0.136 ██████▊ 0 33 0.05 ██▌ -0 34 0.018 ▊ +0 34 0.018 ▉ 1 35 1.007 ██████████████████████████████████████████████████ 0 36 0.37 ██████████████████▌ -0 37 0.136 ██████▋ +0 37 0.136 ██████▊ 0 38 0.05 ██▌ -0 39 0.018 ▊ +0 39 0.018 ▉ 1 40 1.007 ██████████████████████████████████████████████████ 0 41 0.37 ██████████████████▌ -0 42 0.136 ██████▋ +0 42 0.136 ██████▊ 0 43 0.05 ██▌ -0 44 0.018 ▊ +0 44 0.018 ▉ 1 45 1.007 ██████████████████████████████████████████████████ 0 46 0.37 ██████████████████▌ -0 47 0.136 ██████▋ +0 47 0.136 ██████▊ 0 48 0.05 ██▌ -0 49 0.018 ▊ +0 49 0.018 ▉ exponentialTimeDecayedMax 1 0 1 0 1 0.368 @@ -301,15 +301,15 @@ exponentialTimeDecayedMax 0 7 0.497 ██▍ 0 8 0.449 ██▏ 0 9 0.407 ██ -0 10 0.368 █▋ +0 10 0.368 █▊ 0 11 0.333 █▋ 0 12 0.301 █▌ 0 13 0.273 █▎ 0 14 0.247 █▏ 0 15 0.223 █ 0 16 0.202 █ -0 17 0.183 ▊ -0 18 0.165 ▋ +0 17 0.183 ▉ +0 18 0.165 ▊ 0 19 0.15 ▋ 0 20 0.135 ▋ 0 21 0.122 ▌ @@ -343,54 +343,54 @@ exponentialTimeDecayedMax 1 49 1 █████ 1 0 1 ██████████████████████████████████████████████████ 0 1 0.368 ██████████████████▍ -0 2 0.135 ██████▋ +0 2 0.135 ██████▊ 0 3 0.05 ██▍ -0 4 0.018 ▊ +0 4 0.018 ▉ 1 5 1 ██████████████████████████████████████████████████ 0 6 0.368 ██████████████████▍ -0 7 0.135 ██████▋ +0 7 0.135 ██████▊ 0 8 0.05 ██▍ -0 9 0.018 ▊ +0 9 0.018 ▉ 1 10 1 ██████████████████████████████████████████████████ 0 11 0.368 ██████████████████▍ -0 12 0.135 ██████▋ +0 12 0.135 ██████▊ 0 13 0.05 ██▍ -0 14 0.018 ▊ +0 14 0.018 ▉ 1 15 1 ██████████████████████████████████████████████████ 0 16 0.368 ██████████████████▍ -0 17 0.135 ██████▋ +0 17 0.135 ██████▊ 0 18 0.05 ██▍ -0 19 0.018 ▊ +0 19 0.018 ▉ 1 20 1 ██████████████████████████████████████████████████ 0 21 0.368 ██████████████████▍ -0 22 0.135 ██████▋ +0 22 0.135 ██████▊ 0 23 0.05 ██▍ -0 24 0.018 ▊ +0 24 0.018 ▉ 1 25 1 ██████████████████████████████████████████████████ 0 26 0.368 ██████████████████▍ -0 27 0.135 ██████▋ +0 27 0.135 ██████▊ 0 28 0.05 ██▍ -0 29 0.018 ▊ +0 29 0.018 ▉ 1 30 1 ██████████████████████████████████████████████████ 0 31 0.368 ██████████████████▍ -0 32 0.135 ██████▋ +0 32 0.135 ██████▊ 0 33 0.05 ██▍ -0 34 0.018 ▊ +0 34 0.018 ▉ 1 35 1 ██████████████████████████████████████████████████ 0 36 0.368 ██████████████████▍ -0 37 0.135 ██████▋ +0 37 0.135 ██████▊ 0 38 0.05 ██▍ -0 39 0.018 ▊ +0 39 0.018 ▉ 1 40 1 ██████████████████████████████████████████████████ 0 41 0.368 ██████████████████▍ -0 42 0.135 ██████▋ +0 42 0.135 ██████▊ 0 43 0.05 ██▍ -0 44 0.018 ▊ +0 44 0.018 ▉ 1 45 1 ██████████████████████████████████████████████████ 0 46 0.368 ██████████████████▍ -0 47 0.135 ██████▋ +0 47 0.135 ██████▊ 0 48 0.05 ██▍ -0 49 0.018 ▊ +0 49 0.018 ▉ exponentialTimeDecayedCount 1 0 1 0 1 1.368 @@ -428,19 +428,19 @@ exponentialTimeDecayedCount 0 3 3.038 ███████████████▏ 0 4 3.487 █████████████████▍ 0 5 3.855 ███████████████████▎ -0 6 4.156 ████████████████████▋ +0 6 4.156 ████████████████████▊ 0 7 4.403 ██████████████████████ 0 8 4.605 ███████████████████████ -0 9 4.77 ███████████████████████▋ +0 9 4.77 ███████████████████████▊ 0 10 4.905 ████████████████████████▌ 0 11 5.016 █████████████████████████ 0 12 5.107 █████████████████████████▌ -0 13 5.181 █████████████████████████▊ +0 13 5.181 █████████████████████████▉ 0 14 5.242 ██████████████████████████▏ 0 15 5.292 ██████████████████████████▍ 0 16 5.333 ██████████████████████████▋ -0 17 5.366 ██████████████████████████▋ -0 18 5.393 ██████████████████████████▊ +0 17 5.366 ██████████████████████████▊ +0 18 5.393 ██████████████████████████▉ 0 19 5.416 ███████████████████████████ 0 20 5.434 ███████████████████████████▏ 0 21 5.449 ███████████████████████████▏ @@ -473,11 +473,11 @@ exponentialTimeDecayedCount 1 48 5.516 ███████████████████████████▌ 1 49 5.516 ███████████████████████████▌ 1 0 1 ██▌ -0 1 1.905 ████▋ -0 2 2.724 ██████▋ +0 1 1.905 ████▊ +0 2 2.724 ██████▊ 0 3 3.464 ████████▋ 0 4 4.135 ██████████▎ -1 5 4.741 ███████████▋ +1 5 4.741 ███████████▊ 0 6 5.29 █████████████▏ 0 7 5.787 ██████████████▍ 0 8 6.236 ███████████████▌ @@ -485,23 +485,23 @@ exponentialTimeDecayedCount 1 10 7.01 █████████████████▌ 0 11 7.343 ██████████████████▎ 0 12 7.644 ███████████████████ -0 13 7.917 ███████████████████▋ +0 13 7.917 ███████████████████▊ 0 14 8.164 ████████████████████▍ -1 15 8.387 ████████████████████▊ +1 15 8.387 ████████████████████▉ 0 16 8.589 █████████████████████▍ -0 17 8.771 █████████████████████▊ +0 17 8.771 █████████████████████▉ 0 18 8.937 ██████████████████████▎ 0 19 9.086 ██████████████████████▋ 1 20 9.222 ███████████████████████ 0 21 9.344 ███████████████████████▎ 0 22 9.455 ███████████████████████▋ -0 23 9.555 ███████████████████████▊ +0 23 9.555 ███████████████████████▉ 0 24 9.646 ████████████████████████ 1 25 9.728 ████████████████████████▎ 0 26 9.802 ████████████████████████▌ 0 27 9.869 ████████████████████████▋ -0 28 9.93 ████████████████████████▋ -0 29 9.985 ████████████████████████▊ +0 28 9.93 ████████████████████████▊ +0 29 9.985 ████████████████████████▉ 1 30 10.035 █████████████████████████ 0 31 10.08 █████████████████████████▏ 0 32 10.121 █████████████████████████▎ @@ -511,12 +511,12 @@ exponentialTimeDecayedCount 0 36 10.249 █████████████████████████▌ 0 37 10.273 █████████████████████████▋ 0 38 10.296 █████████████████████████▋ -0 39 10.316 █████████████████████████▋ -1 40 10.334 █████████████████████████▋ -0 41 10.351 █████████████████████████▊ -0 42 10.366 █████████████████████████▊ -0 43 10.379 █████████████████████████▊ -0 44 10.392 █████████████████████████▊ +0 39 10.316 █████████████████████████▊ +1 40 10.334 █████████████████████████▊ +0 41 10.351 █████████████████████████▉ +0 42 10.366 █████████████████████████▉ +0 43 10.379 █████████████████████████▉ +0 44 10.392 █████████████████████████▉ 1 45 10.403 ██████████████████████████ 0 46 10.413 ██████████████████████████ 0 47 10.422 ██████████████████████████ @@ -554,13 +554,13 @@ exponentialTimeDecayedAvg 8 8 7.419 9 9 8.418 1 0 1 ██████████ -0 1 0.475 ████▋ +0 1 0.475 ████▊ 0 2 0.301 ███ 0 3 0.214 ██▏ 0 4 0.162 █▌ 0 5 0.128 █▎ 0 6 0.104 █ -0 7 0.086 ▋ +0 7 0.086 ▊ 0 8 0.072 ▋ 0 9 0.061 ▌ 0 10 0.052 ▌ @@ -580,42 +580,42 @@ exponentialTimeDecayedAvg 0 24 0.009 1 25 0.111 █ 1 26 0.202 ██ -1 27 0.283 ██▋ +1 27 0.283 ██▊ 1 28 0.355 ███▌ 1 29 0.42 ████▏ -1 30 0.477 ████▋ +1 30 0.477 ████▊ 1 31 0.529 █████▎ -1 32 0.576 █████▋ +1 32 0.576 █████▊ 1 33 0.618 ██████▏ 1 34 0.655 ██████▌ -1 35 0.689 ██████▊ +1 35 0.689 ██████▉ 1 36 0.719 ███████▏ 1 37 0.747 ███████▍ 1 38 0.771 ███████▋ -1 39 0.793 ███████▊ +1 39 0.793 ███████▉ 1 40 0.813 ████████▏ 1 41 0.831 ████████▎ 1 42 0.848 ████████▍ 1 43 0.862 ████████▌ -1 44 0.876 ████████▋ -1 45 0.888 ████████▊ -1 46 0.898 ████████▊ +1 44 0.876 ████████▊ +1 45 0.888 ████████▉ +1 46 0.898 ████████▉ 1 47 0.908 █████████ 1 48 0.917 █████████▏ 1 49 0.925 █████████▏ 1 0 1 ██████████████████████████████████████████████████ -0 1 0.498 █████████████████████████████████████████████████▋ +0 1 0.498 █████████████████████████████████████████████████▊ 0 2 0.33 █████████████████████████████████ 0 3 0.246 ████████████████████████▋ 0 4 0.196 ███████████████████▌ 1 5 0.333 █████████████████████████████████▎ 0 6 0.284 ████████████████████████████▍ -0 7 0.248 ████████████████████████▋ -0 8 0.219 █████████████████████▊ +0 7 0.248 ████████████████████████▊ +0 8 0.219 █████████████████████▉ 0 9 0.196 ███████████████████▌ 1 10 0.273 ███████████████████████████▎ -0 11 0.249 ████████████████████████▊ -0 12 0.229 ██████████████████████▋ +0 11 0.249 ████████████████████████▉ +0 12 0.229 ██████████████████████▊ 0 13 0.211 █████████████████████ 0 14 0.196 ███████████████████▌ 1 15 0.25 █████████████████████████ @@ -623,7 +623,7 @@ exponentialTimeDecayedAvg 0 17 0.22 ██████████████████████ 0 18 0.207 ████████████████████▋ 0 19 0.196 ███████████████████▌ -1 20 0.238 ███████████████████████▋ +1 20 0.238 ███████████████████████▊ 0 21 0.226 ██████████████████████▌ 0 22 0.215 █████████████████████▌ 0 23 0.205 ████████████████████▌ @@ -634,21 +634,21 @@ exponentialTimeDecayedAvg 0 28 0.204 ████████████████████▍ 0 29 0.196 ███████████████████▌ 1 30 0.226 ██████████████████████▌ -0 31 0.218 █████████████████████▋ +0 31 0.218 █████████████████████▊ 0 32 0.21 █████████████████████ 0 33 0.203 ████████████████████▎ 0 34 0.196 ███████████████████▌ 1 35 0.222 ██████████████████████▏ 0 36 0.215 █████████████████████▌ -0 37 0.209 ████████████████████▋ +0 37 0.209 ████████████████████▊ 0 38 0.202 ████████████████████▏ 0 39 0.196 ███████████████████▌ -1 40 0.22 █████████████████████▊ +1 40 0.22 █████████████████████▉ 0 41 0.213 █████████████████████▎ 0 42 0.207 ████████████████████▋ 0 43 0.202 ████████████████████▏ 0 44 0.196 ███████████████████▌ -1 45 0.218 █████████████████████▋ +1 45 0.218 █████████████████████▊ 0 46 0.212 █████████████████████▏ 0 47 0.206 ████████████████████▋ 0 48 0.201 ████████████████████ diff --git a/tests/queries/0_stateless/02102_row_binary_with_names_and_types.reference b/tests/queries/0_stateless/02102_row_binary_with_names_and_types.reference index 9011f20cd6a..4b057f29039 100644 --- a/tests/queries/0_stateless/02102_row_binary_with_names_and_types.reference +++ b/tests/queries/0_stateless/02102_row_binary_with_names_and_types.reference @@ -8,7 +8,7 @@ 1 default 1970-01-01 1 1970-01-01 1 1970-01-01 -OK +1 1 default 1970-01-01 -OK -OK +1 +1 diff --git a/tests/queries/0_stateless/02102_row_binary_with_names_and_types.sh b/tests/queries/0_stateless/02102_row_binary_with_names_and_types.sh index e7307ad3ad5..ba7aac94ddb 100755 --- a/tests/queries/0_stateless/02102_row_binary_with_names_and_types.sh +++ b/tests/queries/0_stateless/02102_row_binary_with_names_and_types.sh @@ -55,7 +55,7 @@ $CLICKHOUSE_CLIENT -q "SELECT * FROM test_02102" $CLICKHOUSE_CLIENT -q "TRUNCATE TABLE test_02102" -$CLICKHOUSE_CLIENT -q "SELECT toUInt32(1) AS x, [[1, 2, 3], [4, 5], []] as a FORMAT RowBinaryWithNames" | $CLICKHOUSE_CLIENT --input_format_skip_unknown_fields=1 --input_format_with_names_use_header=1 -q "INSERT INTO test_02102 FORMAT RowBinaryWithNames" 2>&1 | grep -F -q "CANNOT_SKIP_UNKNOWN_FIELD" && echo 'OK' || echo 'FAIL' +$CLICKHOUSE_CLIENT -q "SELECT toUInt32(1) AS x, [[1, 2, 3], [4, 5], []] as a FORMAT RowBinaryWithNames" | $CLICKHOUSE_CLIENT --input_format_skip_unknown_fields=1 --input_format_with_names_use_header=1 -q "INSERT INTO test_02102 FORMAT RowBinaryWithNames" 2>&1 | grep -F -c "CANNOT_SKIP_UNKNOWN_FIELD" $CLICKHOUSE_CLIENT -q "SELECT toUInt32(1) AS x, [[1, 2, 3], [4, 5], []] as a FORMAT RowBinaryWithNamesAndTypes" | $CLICKHOUSE_CLIENT --input_format_skip_unknown_fields=1 --input_format_with_names_use_header=1 --input_format_with_types_use_header=1 -q "INSERT INTO test_02102 FORMAT RowBinaryWithNamesAndTypes" @@ -63,9 +63,8 @@ $CLICKHOUSE_CLIENT -q "SELECT * FROM test_02102" $CLICKHOUSE_CLIENT -q "TRUNCATE TABLE test_02102" -$CLICKHOUSE_CLIENT -q "SELECT 'text' AS x, toDate('2020-01-01') AS y, toUInt32(1) AS z FORMAT RowBinaryWithNamesAndTypes" | $CLICKHOUSE_CLIENT --input_format_with_names_use_header=1 --input_format_with_types_use_header=1 -q "INSERT INTO test_02102 FORMAT RowBinaryWithNamesAndTypes" 2>&1 | grep -F -q "INCORRECT_DATA" && echo 'OK' || echo 'FAIL' +$CLICKHOUSE_CLIENT -q "SELECT 'text' AS x, toDate('2020-01-01') AS y, toUInt32(1) AS z FORMAT RowBinaryWithNamesAndTypes" | $CLICKHOUSE_CLIENT --input_format_with_names_use_header=1 --input_format_with_types_use_header=1 -q "INSERT INTO test_02102 FORMAT RowBinaryWithNamesAndTypes" 2>&1 | grep -F -c "INCORRECT_DATA" -$CLICKHOUSE_CLIENT -q "SELECT toUInt32(1) AS x, 'text' as z, toDate('2020-01-01') AS y FORMAT RowBinaryWithNamesAndTypes" | $CLICKHOUSE_CLIENT --input_format_with_names_use_header=1 --input_format_with_types_use_header=1 -q "INSERT INTO test_02102 FORMAT RowBinaryWithNamesAndTypes" 2>&1 | grep -F -q "INCORRECT_DATA" && echo 'OK' || echo 'FAIL' +$CLICKHOUSE_CLIENT -q "SELECT toUInt32(1) AS x, 'text' as z, toDate('2020-01-01') AS y FORMAT RowBinaryWithNamesAndTypes" | $CLICKHOUSE_CLIENT --input_format_with_names_use_header=1 --input_format_with_types_use_header=1 -q "INSERT INTO test_02102 FORMAT RowBinaryWithNamesAndTypes" 2>&1 | grep -F -c "INCORRECT_DATA" $CLICKHOUSE_CLIENT -q "DROP TABLE test_02102" - diff --git a/tests/queries/0_stateless/02149_schema_inference_formats_with_schema.reference b/tests/queries/0_stateless/02149_schema_inference_formats_with_schema.reference index b0ec4bef499..b881fce1539 100644 --- a/tests/queries/0_stateless/02149_schema_inference_formats_with_schema.reference +++ b/tests/queries/0_stateless/02149_schema_inference_formats_with_schema.reference @@ -24,12 +24,12 @@ fixed_string Nullable(String) Str: 0 100 Str: 1 200 array Array(Nullable(UInt64)) -tuple Tuple(Nullable(UInt64), Nullable(String)) +tuple Tuple(`1` Nullable(UInt64), `2` Nullable(String)) map Map(String, Nullable(UInt64)) [0,1] (0,'0') {'0':0} [1,2] (1,'1') {'1':1} -nested1 Array(Tuple(Array(Nullable(UInt64)), Map(String, Nullable(UInt64)))) -nested2 Tuple(Tuple(Array(Array(Nullable(UInt64))), Map(UInt64, Array(Tuple(Nullable(UInt64), Nullable(String))))), Nullable(UInt8)) +nested1 Array(Tuple(`1` Array(Nullable(UInt64)), `2` Map(String, Nullable(UInt64)))) +nested2 Tuple(`1` Tuple(`1` Array(Array(Nullable(UInt64))), `2` Map(UInt64, Array(Tuple(`1` Nullable(UInt64), `2` Nullable(String))))), `2` Nullable(UInt8)) [([0,1],{'42':0}),([],{}),([42],{'42':42})] (([[0],[1],[]],{0:[(0,'42'),(1,'42')]}),42) [([1,2],{'42':1}),([],{}),([42],{'42':42})] (([[1],[2],[]],{1:[(1,'42'),(2,'42')]}),42) ArrowStream @@ -58,12 +58,12 @@ fixed_string Nullable(String) Str: 0 100 Str: 1 200 array Array(Nullable(UInt64)) -tuple Tuple(Nullable(UInt64), Nullable(String)) +tuple Tuple(`1` Nullable(UInt64), `2` Nullable(String)) map Map(String, Nullable(UInt64)) [0,1] (0,'0') {'0':0} [1,2] (1,'1') {'1':1} -nested1 Array(Tuple(Array(Nullable(UInt64)), Map(String, Nullable(UInt64)))) -nested2 Tuple(Tuple(Array(Array(Nullable(UInt64))), Map(UInt64, Array(Tuple(Nullable(UInt64), Nullable(String))))), Nullable(UInt8)) +nested1 Array(Tuple(`1` Array(Nullable(UInt64)), `2` Map(String, Nullable(UInt64)))) +nested2 Tuple(`1` Tuple(`1` Array(Array(Nullable(UInt64))), `2` Map(UInt64, Array(Tuple(`1` Nullable(UInt64), `2` Nullable(String))))), `2` Nullable(UInt8)) [([0,1],{'42':0}),([],{}),([42],{'42':42})] (([[0],[1],[]],{0:[(0,'42'),(1,'42')]}),42) [([1,2],{'42':1}),([],{}),([42],{'42':42})] (([[1],[2],[]],{1:[(1,'42'),(2,'42')]}),42) Parquet @@ -92,12 +92,12 @@ fixed_string Nullable(String) Str: 0 100 Str: 1 200 array Array(Nullable(UInt64)) -tuple Tuple(Nullable(UInt64), Nullable(String)) +tuple Tuple(`1` Nullable(UInt64), `2` Nullable(String)) map Map(String, Nullable(UInt64)) [0,1] (0,'0') {'0':0} [1,2] (1,'1') {'1':1} -nested1 Array(Tuple(Array(Nullable(UInt64)), Map(String, Nullable(UInt64)))) -nested2 Tuple(Tuple(Array(Array(Nullable(UInt64))), Map(UInt64, Array(Tuple(Nullable(UInt64), Nullable(String))))), Nullable(UInt8)) +nested1 Array(Tuple(`1` Array(Nullable(UInt64)), `2` Map(String, Nullable(UInt64)))) +nested2 Tuple(`1` Tuple(`1` Array(Array(Nullable(UInt64))), `2` Map(UInt64, Array(Tuple(`1` Nullable(UInt64), `2` Nullable(String))))), `2` Nullable(UInt8)) [([0,1],{'42':0}),([],{}),([42],{'42':42})] (([[0],[1],[]],{0:[(0,'42'),(1,'42')]}),42) [([1,2],{'42':1}),([],{}),([42],{'42':42})] (([[1],[2],[]],{1:[(1,'42'),(2,'42')]}),42) ORC @@ -126,12 +126,12 @@ fixed_string Nullable(String) Str: 0 100 Str: 1 200 array Array(Nullable(Int64)) -tuple Tuple(Nullable(Int64), Nullable(String)) +tuple Tuple(`1` Nullable(Int64), `2` Nullable(String)) map Map(String, Nullable(Int64)) [0,1] (0,'0') {'0':0} [1,2] (1,'1') {'1':1} -nested1 Array(Tuple(Array(Nullable(Int64)), Map(String, Nullable(Int64)))) -nested2 Tuple(Tuple(Array(Array(Nullable(Int64))), Map(Int64, Array(Tuple(Nullable(Int64), Nullable(String))))), Nullable(Int8)) +nested1 Array(Tuple(`1` Array(Nullable(Int64)), `2` Map(String, Nullable(Int64)))) +nested2 Tuple(`1` Tuple(`1` Array(Array(Nullable(Int64))), `2` Map(Int64, Array(Tuple(`1` Nullable(Int64), `2` Nullable(String))))), `2` Nullable(Int8)) [([0,1],{'42':0}),([],{}),([42],{'42':42})] (([[0],[1],[]],{0:[(0,'42'),(1,'42')]}),42) [([1,2],{'42':1}),([],{}),([42],{'42':42})] (([[1],[2],[]],{1:[(1,'42'),(2,'42')]}),42) Native diff --git a/tests/queries/0_stateless/02179_dict_reload_on_cluster.sql b/tests/queries/0_stateless/02179_dict_reload_on_cluster.sql index 686025acbf8..f21fa16ba23 100644 --- a/tests/queries/0_stateless/02179_dict_reload_on_cluster.sql +++ b/tests/queries/0_stateless/02179_dict_reload_on_cluster.sql @@ -23,6 +23,7 @@ SELECT query_count FROM system.dictionaries WHERE database = 'dict_db_02179' AND SELECT 'SYSTEM RELOAD DICTIONARIES ON CLUSTER test_shard_localhost'; SET distributed_ddl_output_mode='throw'; +SYSTEM RELOAD DICTIONARIES ON CLUSTER; -- { clientError SYNTAX_ERROR } SYSTEM RELOAD DICTIONARIES ON CLUSTER test_shard_localhost; SET distributed_ddl_output_mode='none'; SELECT query_count FROM system.dictionaries WHERE database = 'dict_db_02179' AND name = 'dict'; diff --git a/tests/queries/0_stateless/02187_msg_pack_uuid.reference b/tests/queries/0_stateless/02187_msg_pack_uuid.reference index c567cc14ad2..68104e8f901 100644 --- a/tests/queries/0_stateless/02187_msg_pack_uuid.reference +++ b/tests/queries/0_stateless/02187_msg_pack_uuid.reference @@ -1,4 +1,4 @@ 5e7084e0-019f-461f-9e70-84e0019f561f 5e7084e0-019f-461f-9e70-84e0019f561f 5e7084e0-019f-461f-9e70-84e0019f561f -5e7084e0-019f-461f-9e70-84e0019f561f UUID +5e7084e0-019f-461f-9e70-84e0019f561f Nullable(UUID) diff --git a/tests/queries/0_stateless/02205_ephemeral_1.reference b/tests/queries/0_stateless/02205_ephemeral_1.reference index ba39033668f..7c034ca72ea 100644 --- a/tests/queries/0_stateless/02205_ephemeral_1.reference +++ b/tests/queries/0_stateless/02205_ephemeral_1.reference @@ -7,7 +7,7 @@ z UInt32 DEFAULT 5 7 5 21 5 x UInt32 DEFAULT y -y UInt32 EPHEMERAL 0 +y UInt32 EPHEMERAL defaultValueOfTypeName(\'UInt32\') z UInt32 DEFAULT 5 1 2 0 2 diff --git a/tests/queries/0_stateless/02207_subseconds_intervals.reference b/tests/queries/0_stateless/02207_subseconds_intervals.reference index f2e40137851..91f0ecb8606 100644 --- a/tests/queries/0_stateless/02207_subseconds_intervals.reference +++ b/tests/queries/0_stateless/02207_subseconds_intervals.reference @@ -60,3 +60,19 @@ test add[...]seconds() 2220-12-12 12:12:12.124 2220-12-12 12:12:12.121 2220-12-12 12:12:12.124456 +test subtract[...]seconds() +- test nanoseconds +2022-12-31 23:59:59.999999999 +2022-12-31 23:59:59.999999900 +2023-01-01 00:00:00.000000001 +2023-01-01 00:00:00.000000100 +- test microseconds +2022-12-31 23:59:59.999999 +2022-12-31 23:59:59.999900 +2023-01-01 00:00:00.000001 +2023-01-01 00:00:00.000100 +- test milliseconds +2022-12-31 23:59:59.999 +2022-12-31 23:59:59.900 +2023-01-01 00:00:00.001 +2023-01-01 00:00:00.100 diff --git a/tests/queries/0_stateless/02207_subseconds_intervals.sql b/tests/queries/0_stateless/02207_subseconds_intervals.sql index a7ce03d9330..c30b3c460dc 100644 --- a/tests/queries/0_stateless/02207_subseconds_intervals.sql +++ b/tests/queries/0_stateless/02207_subseconds_intervals.sql @@ -92,3 +92,22 @@ select addMilliseconds(toDateTime64('1930-12-12 12:12:12.123456', 6), 1); -- Bel select addMilliseconds(toDateTime64('2220-12-12 12:12:12.123', 3), 1); -- Above normal range, source scale matches result select addMilliseconds(toDateTime64('2220-12-12 12:12:12.12', 2), 1); -- Above normal range, source scale less than result select addMilliseconds(toDateTime64('2220-12-12 12:12:12.123456', 6), 1); -- Above normal range, source scale greater than result + +select 'test subtract[...]seconds()'; +select '- test nanoseconds'; +select subtractNanoseconds(toDateTime64('2023-01-01 00:00:00.0000000', 7, 'UTC'), 1); +select subtractNanoseconds(toDateTime64('2023-01-01 00:00:00.0000000', 7, 'UTC'), 100); +select subtractNanoseconds(toDateTime64('2023-01-01 00:00:00.0000000', 7, 'UTC'), -1); +select subtractNanoseconds(toDateTime64('2023-01-01 00:00:00.0000000', 7, 'UTC'), -100); + +select '- test microseconds'; +select subtractMicroseconds(toDateTime64('2023-01-01 00:00:00.0000', 4, 'UTC'), 1); +select subtractMicroseconds(toDateTime64('2023-01-01 00:00:00.0000', 4, 'UTC'), 100); +select subtractMicroseconds(toDateTime64('2023-01-01 00:00:00.0000', 4, 'UTC'), -1); +select subtractMicroseconds(toDateTime64('2023-01-01 00:00:00.0000', 4, 'UTC'), -100); + +select '- test milliseconds'; +select subtractMilliseconds(toDateTime64('2023-01-01 00:00:00.0', 1, 'UTC'), 1); +select subtractMilliseconds(toDateTime64('2023-01-01 00:00:00.0', 1, 'UTC'), 100); +select subtractMilliseconds(toDateTime64('2023-01-01 00:00:00.0', 1, 'UTC'), -1); +select subtractMilliseconds(toDateTime64('2023-01-01 00:00:00.0', 1, 'UTC'), -100); diff --git a/tests/queries/0_stateless/02223_insert_select_schema_inference.sql b/tests/queries/0_stateless/02223_insert_select_schema_inference.sql index ff39ca83b9b..031ced1b299 100644 --- a/tests/queries/0_stateless/02223_insert_select_schema_inference.sql +++ b/tests/queries/0_stateless/02223_insert_select_schema_inference.sql @@ -1,5 +1,5 @@ drop table if exists test; create table test (x UInt32, y String, d Date) engine=Memory() as select number as x, toString(number) as y, toDate(number) as d from numbers(10); -insert into table function file('data.native.zst') select * from test; +insert into table function file('data.native.zst') select * from test settings engine_file_truncate_on_insert=1; desc file('data.native.zst'); select * from file('data.native.zst'); diff --git a/tests/queries/0_stateless/02226_filesystem_cache_profile_events.reference b/tests/queries/0_stateless/02226_filesystem_cache_profile_events.reference index a2c04163136..d895040ef59 100644 --- a/tests/queries/0_stateless/02226_filesystem_cache_profile_events.reference +++ b/tests/queries/0_stateless/02226_filesystem_cache_profile_events.reference @@ -3,16 +3,13 @@ Using storage policy: s3_cache 0 1 0 0 0 1 0 - Using storage policy: local_cache 1 0 1 0 1 0 0 0 1 0 - Using storage policy: azure_cache 1 0 1 0 1 0 0 0 1 0 - diff --git a/tests/queries/0_stateless/02226_filesystem_cache_profile_events.sh b/tests/queries/0_stateless/02226_filesystem_cache_profile_events.sh index 1807fd6bc8e..96e51a58cc4 100755 --- a/tests/queries/0_stateless/02226_filesystem_cache_profile_events.sh +++ b/tests/queries/0_stateless/02226_filesystem_cache_profile_events.sh @@ -7,15 +7,90 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -TMP_PATH=${CLICKHOUSE_TEST_UNIQUE_NAME} -QUERIES_FILE=02226_filesystem_cache_profile_events.sh -TEST_FILE=$CUR_DIR/filesystem_cache_queries/$QUERIES_FILE -for storagePolicy in 's3_cache' 'local_cache' 'azure_cache'; do - echo "Using storage policy: $storagePolicy" - cat $TEST_FILE | sed -e "s/_storagePolicy/${storagePolicy}/" > $TMP_PATH - chmod +x $TMP_PATH - ./$TMP_PATH - rm $TMP_PATH - echo +for STORAGE_POLICY in 's3_cache' 'local_cache' 'azure_cache'; do + echo "Using storage policy: $STORAGE_POLICY" + + clickhouse client --multiquery --multiline --query """ + SET max_memory_usage='20G'; + SET enable_filesystem_cache_on_write_operations = 0; + + DROP TABLE IF EXISTS test_02226; + CREATE TABLE test_02226 (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='$STORAGE_POLICY'; + INSERT INTO test_02226 SELECT * FROM generateRandom('key UInt32, value String') LIMIT 10000; + + SET remote_filesystem_read_method='threadpool'; + """ + + query="SELECT * FROM test_02226 LIMIT 10" + + query_id=$(clickhouse client --query "select queryID() from ($query) limit 1" 2>&1) + + clickhouse client --multiquery --multiline --query """ + SYSTEM FLUSH LOGS; + SELECT ProfileEvents['CachedReadBufferReadFromSourceBytes'] > 0 as remote_fs_read, + ProfileEvents['CachedReadBufferReadFromCacheBytes'] > 0 as remote_fs_cache_read, + ProfileEvents['CachedReadBufferCacheWriteBytes'] > 0 as remote_fs_read_and_download + FROM system.query_log + WHERE query_id='$query_id' + AND type = 'QueryFinish' + AND current_database = currentDatabase() + ORDER BY query_start_time DESC + LIMIT 1; + """ + + clickhouse client --multiquery --multiline --query """ + set remote_filesystem_read_method = 'read'; + set local_filesystem_read_method = 'pread'; + """ + + query_id=$(clickhouse client --query "select queryID() from ($query) limit 1" 2>&1) + + clickhouse client --multiquery --multiline --query """ + SYSTEM FLUSH LOGS; + SELECT ProfileEvents['CachedReadBufferReadFromSourceBytes'] > 0 as remote_fs_read, + ProfileEvents['CachedReadBufferReadFromCacheBytes'] > 0 as remote_fs_cache_read, + ProfileEvents['CachedReadBufferCacheWriteBytes'] > 0 as remote_fs_read_and_download + FROM system.query_log + WHERE query_id='$query_id' + AND type = 'QueryFinish' + AND current_database = currentDatabase() + ORDER BY query_start_time DESC + LIMIT 1; + """ + + + clickhouse client --multiquery --multiline --query """ + set remote_filesystem_read_method='threadpool'; + """ + + clickhouse client --multiquery --multiline --query """ + SELECT * FROM test_02226 WHERE value LIKE '%abc%' ORDER BY value LIMIT 10 FORMAT Null; + + SET enable_filesystem_cache_on_write_operations = 1; + + TRUNCATE TABLE test_02226; + SELECT count() FROM test_02226; + + SYSTEM DROP FILESYSTEM CACHE; + + INSERT INTO test_02226 SELECT * FROM generateRandom('key UInt32, value String') LIMIT 10000; + """ + + query_id=$(clickhouse client --query "select queryID() from ($query) limit 1") + + clickhouse client --multiquery --multiline --query """ + SYSTEM FLUSH LOGS; + SELECT ProfileEvents['CachedReadBufferReadFromSourceBytes'] > 0 as remote_fs_read, + ProfileEvents['CachedReadBufferReadFromCacheBytes'] > 0 as remote_fs_cache_read, + ProfileEvents['CachedReadBufferCacheWriteBytes'] > 0 as remote_fs_read_and_download + FROM system.query_log + WHERE query_id='$query_id' + AND type = 'QueryFinish' + AND current_database = currentDatabase() + ORDER BY query_start_time DESC + LIMIT 1; + + DROP TABLE test_02226; + """ done diff --git a/tests/queries/0_stateless/02232_allow_only_replicated_engine.sh b/tests/queries/0_stateless/02232_allow_only_replicated_engine.sh index 3ff2dabfa43..193d5fdb6d5 100755 --- a/tests/queries/0_stateless/02232_allow_only_replicated_engine.sh +++ b/tests/queries/0_stateless/02232_allow_only_replicated_engine.sh @@ -16,3 +16,5 @@ ${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none -n --query "CREATE TABLE ${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --user "user_${CLICKHOUSE_DATABASE}" -n --query "CREATE TABLE ${CLICKHOUSE_DATABASE}_db.tab_rmt (x UInt32) engine = ReplicatedMergeTree order by x;" ${CLICKHOUSE_CLIENT} --query "DROP DATABASE ${CLICKHOUSE_DATABASE}_db" ${CLICKHOUSE_CLIENT} -q "DROP USER user_${CLICKHOUSE_DATABASE}" + +${CLICKHOUSE_CLIENT} -q "drop table mute_stylecheck" diff --git a/tests/queries/0_stateless/02235_add_part_offset_virtual_column.reference b/tests/queries/0_stateless/02235_add_part_offset_virtual_column.reference index 0edbea64065..2455f50b7f2 100644 --- a/tests/queries/0_stateless/02235_add_part_offset_virtual_column.reference +++ b/tests/queries/0_stateless/02235_add_part_offset_virtual_column.reference @@ -32,9 +32,27 @@ 0 0 198401_1_1_1 1 1 198401_1_1_1 999998 999998 198401_1_1_1 +0 +1 +2 +0 foo +1 foo +2 foo SOME GRANULES FILTERED OUT 335872 166463369216 166463369216 34464 1510321840 1510321840 301408 164953047376 164953047376 +100000 +100001 +100002 +100000 foo +100001 foo +100002 foo PREWHERE 301408 164953047376 164953047376 +42 +10042 +20042 +42 foo +10042 foo +20042 foo diff --git a/tests/queries/0_stateless/02235_add_part_offset_virtual_column.sql b/tests/queries/0_stateless/02235_add_part_offset_virtual_column.sql index eb1f01e65f7..1de6447172d 100644 --- a/tests/queries/0_stateless/02235_add_part_offset_virtual_column.sql +++ b/tests/queries/0_stateless/02235_add_part_offset_virtual_column.sql @@ -24,6 +24,8 @@ INSERT INTO t_1 select rowNumberInAllBlocks(), *, '1984-01-01' from t_random_1 l OPTIMIZE TABLE t_1 FINAL; +ALTER TABLE t_1 ADD COLUMN foo String DEFAULT 'foo'; + SELECT COUNT(DISTINCT(_part)) FROM t_1; SELECT min(_part_offset), max(_part_offset) FROM t_1; @@ -37,13 +39,19 @@ SELECT order_0, _part_offset, _part FROM t_1 WHERE order_0 <= 1 OR (order_0 BETW SELECT order_0, _part_offset, computed FROM t_1 ORDER BY order_0, _part_offset, computed LIMIT 3; SELECT order_0, _part_offset, computed FROM t_1 ORDER BY order_0 DESC, _part_offset DESC, computed DESC LIMIT 3; SELECT order_0, _part_offset, _part FROM t_1 WHERE order_0 <= 1 OR order_0 >= 999998 ORDER BY order_0 LIMIT 3; +SELECT _part_offset FROM t_1 ORDER BY order_0 LIMIT 3; +SELECT _part_offset, foo FROM t_1 ORDER BY order_0 LIMIT 3; SELECT 'SOME GRANULES FILTERED OUT'; SELECT count(*), sum(_part_offset), sum(order_0) from t_1 where granule == 0; SELECT count(*), sum(_part_offset), sum(order_0) from t_1 where granule == 0 AND _part_offset < 100000; SELECT count(*), sum(_part_offset), sum(order_0) from t_1 where granule == 0 AND _part_offset >= 100000; +SELECT _part_offset FROM t_1 where granule == 0 AND _part_offset >= 100000 ORDER BY order_0 LIMIT 3; +SELECT _part_offset, foo FROM t_1 where granule == 0 AND _part_offset >= 100000 ORDER BY order_0 LIMIT 3; SELECT 'PREWHERE'; SELECT count(*), sum(_part_offset), sum(order_0) from t_1 prewhere granule == 0 where _part_offset >= 100000; SELECT count(*), sum(_part_offset), sum(order_0) from t_1 prewhere _part != '' where granule == 0; -- { serverError 10 } SELECT count(*), sum(_part_offset), sum(order_0) from t_1 prewhere _part_offset > 100000 where granule == 0; -- { serverError 10 } +SELECT _part_offset FROM t_1 PREWHERE order_0 % 10000 == 42 ORDER BY order_0 LIMIT 3; +SELECT _part_offset, foo FROM t_1 PREWHERE order_0 % 10000 == 42 ORDER BY order_0 LIMIT 3; diff --git a/tests/queries/0_stateless/02240_system_filesystem_cache_table.reference b/tests/queries/0_stateless/02240_system_filesystem_cache_table.reference index 6b96da0be59..d3be4855b36 100644 --- a/tests/queries/0_stateless/02240_system_filesystem_cache_table.reference +++ b/tests/queries/0_stateless/02240_system_filesystem_cache_table.reference @@ -1,70 +1,10 @@ Using storage policy: s3_cache --- { echo } - -SYSTEM DROP FILESYSTEM CACHE; -SET enable_filesystem_cache_on_write_operations=0; -DROP TABLE IF EXISTS test; -CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache', min_bytes_for_wide_part = 10485760, compress_marks=false, compress_primary_key=false; -SYSTEM STOP MERGES test; -INSERT INTO test SELECT number, toString(number) FROM numbers(100); -SELECT * FROM test FORMAT Null; -SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache ORDER BY file_segment_range_end, size; 0 79 80 0 745 746 -SYSTEM DROP FILESYSTEM CACHE; -SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache; -SELECT * FROM test FORMAT Null; -SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache; 0 745 746 -SYSTEM DROP FILESYSTEM CACHE; -SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache; -DROP TABLE IF EXISTS test; -CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache_3', min_bytes_for_wide_part = 10485760, compress_marks=false, compress_primary_key=false; -INSERT INTO test SELECT number, toString(number) FROM numbers(100); -SELECT * FROM test FORMAT Null; -SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache ORDER BY file_segment_range_end, size; -SELECT * FROM test FORMAT Null; -SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache ORDER BY file_segment_range_end, size; 0 745 746 -SYSTEM DROP FILESYSTEM CACHE; -SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache; -SELECT * FROM test FORMAT Null; -SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache; -SYSTEM DROP FILESYSTEM CACHE; -SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache; - Using storage policy: local_cache --- { echo } - -SYSTEM DROP FILESYSTEM CACHE; -SET enable_filesystem_cache_on_write_operations=0; -DROP TABLE IF EXISTS test; -CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='local_cache', min_bytes_for_wide_part = 10485760, compress_marks=false, compress_primary_key=false; -SYSTEM STOP MERGES test; -INSERT INTO test SELECT number, toString(number) FROM numbers(100); -SELECT * FROM test FORMAT Null; -SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache ORDER BY file_segment_range_end, size; 0 79 80 0 745 746 -SYSTEM DROP FILESYSTEM CACHE; -SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache; -SELECT * FROM test FORMAT Null; -SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache; 0 745 746 -SYSTEM DROP FILESYSTEM CACHE; -SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache; -DROP TABLE IF EXISTS test; -CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='local_cache_3', min_bytes_for_wide_part = 10485760, compress_marks=false, compress_primary_key=false; -INSERT INTO test SELECT number, toString(number) FROM numbers(100); -SELECT * FROM test FORMAT Null; -SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache ORDER BY file_segment_range_end, size; -SELECT * FROM test FORMAT Null; -SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache ORDER BY file_segment_range_end, size; 0 745 746 -SYSTEM DROP FILESYSTEM CACHE; -SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache; -SELECT * FROM test FORMAT Null; -SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache; -SYSTEM DROP FILESYSTEM CACHE; -SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache; - diff --git a/tests/queries/0_stateless/02240_system_filesystem_cache_table.sh b/tests/queries/0_stateless/02240_system_filesystem_cache_table.sh index 5f0412cd237..a487f3ca739 100755 --- a/tests/queries/0_stateless/02240_system_filesystem_cache_table.sh +++ b/tests/queries/0_stateless/02240_system_filesystem_cache_table.sh @@ -7,14 +7,36 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -TMP_PATH=${CLICKHOUSE_TEST_UNIQUE_NAME} -QUERIES_FILE=02240_system_filesystem_cache_table.queries -TEST_FILE=$CUR_DIR/filesystem_cache_queries/$QUERIES_FILE +for STORAGE_POLICY in 's3_cache' 'local_cache'; do + echo "Using storage policy: $STORAGE_POLICY" + ${CLICKHOUSE_CLIENT} --query "SYSTEM DROP FILESYSTEM CACHE" -for storagePolicy in 's3_cache' 'local_cache'; do - echo "Using storage policy: $storagePolicy" - cat $TEST_FILE | sed -e "s/_storagePolicy/${storagePolicy}/" > $TMP_PATH - ${CLICKHOUSE_CLIENT} --queries-file $TMP_PATH - rm $TMP_PATH - echo + ${CLICKHOUSE_CLIENT} --query "DROP TABLE IF EXISTS test_02240_storage_policy" + ${CLICKHOUSE_CLIENT} --query "CREATE TABLE test_02240_storage_policy (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='${STORAGE_POLICY}', min_bytes_for_wide_part = 10485760, compress_marks=false, compress_primary_key=false" + ${CLICKHOUSE_CLIENT} --query "SYSTEM STOP MERGES test_02240_storage_policy" + ${CLICKHOUSE_CLIENT} --enable_filesystem_cache_on_write_operations=0 --query "INSERT INTO test_02240_storage_policy SELECT number, toString(number) FROM numbers(100)" + ${CLICKHOUSE_CLIENT} --query "SELECT * FROM test_02240_storage_policy FORMAT Null" + ${CLICKHOUSE_CLIENT} --query "SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache ORDER BY file_segment_range_end, size" + + ${CLICKHOUSE_CLIENT} --query "SYSTEM DROP FILESYSTEM CACHE" + ${CLICKHOUSE_CLIENT} --query "SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache" + ${CLICKHOUSE_CLIENT} --query "SELECT * FROM test_02240_storage_policy FORMAT Null" + ${CLICKHOUSE_CLIENT} --query "SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache" + ${CLICKHOUSE_CLIENT} --query "SYSTEM DROP FILESYSTEM CACHE" + ${CLICKHOUSE_CLIENT} --query "SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache" + + ${CLICKHOUSE_CLIENT} --query "DROP TABLE IF EXISTS test_02240_storage_policy_3" + ${CLICKHOUSE_CLIENT} --query "CREATE TABLE test_02240_storage_policy_3 (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='${STORAGE_POLICY}_3', min_bytes_for_wide_part = 10485760, compress_marks=false, compress_primary_key=false" + ${CLICKHOUSE_CLIENT} --enable_filesystem_cache_on_write_operations=0 --query "INSERT INTO test_02240_storage_policy_3 SELECT number, toString(number) FROM numbers(100)" + ${CLICKHOUSE_CLIENT} --query "SELECT * FROM test_02240_storage_policy_3 FORMAT Null" + ${CLICKHOUSE_CLIENT} --query "SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache ORDER BY file_segment_range_end, size" + ${CLICKHOUSE_CLIENT} --query "SELECT * FROM test_02240_storage_policy_3 FORMAT Null" + ${CLICKHOUSE_CLIENT} --query "SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache ORDER BY file_segment_range_end, size" + + ${CLICKHOUSE_CLIENT} --query "SYSTEM DROP FILESYSTEM CACHE" + ${CLICKHOUSE_CLIENT} --query "SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache" + ${CLICKHOUSE_CLIENT} --query "SELECT * FROM test_02240_storage_policy_3 FORMAT Null" + ${CLICKHOUSE_CLIENT} --query "SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache" + ${CLICKHOUSE_CLIENT} --query "SYSTEM DROP FILESYSTEM CACHE" + ${CLICKHOUSE_CLIENT} --query "SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache" done diff --git a/tests/queries/0_stateless/02241_filesystem_cache_on_write_operations.reference b/tests/queries/0_stateless/02241_filesystem_cache_on_write_operations.reference index 9405b9eb614..bbca9bbbfee 100644 --- a/tests/queries/0_stateless/02241_filesystem_cache_on_write_operations.reference +++ b/tests/queries/0_stateless/02241_filesystem_cache_on_write_operations.reference @@ -1,252 +1,60 @@ Using storage policy: s3_cache --- { echo } - -SET enable_filesystem_cache_on_write_operations=1; -DROP TABLE IF EXISTS test; -CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache', min_bytes_for_wide_part = 10485760, compress_marks=false, compress_primary_key=false; -SYSTEM STOP MERGES test; -SYSTEM DROP FILESYSTEM CACHE; -SELECT file_segment_range_begin, file_segment_range_end, size, state -FROM -( - SELECT file_segment_range_begin, file_segment_range_end, size, state, local_path - FROM - ( - SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path - FROM system.remote_data_paths - ) AS data_paths - INNER JOIN - system.filesystem_cache AS caches - ON data_paths.cache_path = caches.cache_path -) -WHERE endsWith(local_path, 'data.bin') -FORMAT Vertical; -SELECT count() FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path; 0 -SELECT count() FROM system.filesystem_cache; 0 -INSERT INTO test SELECT number, toString(number) FROM numbers(100); -SELECT file_segment_range_begin, file_segment_range_end, size, state -FROM -( - SELECT file_segment_range_begin, file_segment_range_end, size, state, local_path - FROM - ( - SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path - FROM system.remote_data_paths - ) AS data_paths - INNER JOIN - system.filesystem_cache AS caches - ON data_paths.cache_path = caches.cache_path -) -WHERE endsWith(local_path, 'data.bin') -FORMAT Vertical; Row 1: ────── file_segment_range_begin: 0 file_segment_range_end: 745 size: 746 state: DOWNLOADED -SELECT count() FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path; 7 -SELECT count() FROM system.filesystem_cache; 7 -SELECT count() FROM system.filesystem_cache WHERE cache_hits > 0; 0 -SELECT * FROM test FORMAT Null; -SELECT count() FROM system.filesystem_cache WHERE cache_hits > 0; 2 -SELECT * FROM test FORMAT Null; -SELECT count() FROM system.filesystem_cache WHERE cache_hits > 0; 2 -SELECT count() size FROM system.filesystem_cache; 7 -SYSTEM DROP FILESYSTEM CACHE; -INSERT INTO test SELECT number, toString(number) FROM numbers(100, 200); -SELECT file_segment_range_begin, file_segment_range_end, size, state -FROM -( - SELECT file_segment_range_begin, file_segment_range_end, size, state, local_path - FROM - ( - SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path - FROM system.remote_data_paths - ) AS data_paths - INNER JOIN - system.filesystem_cache AS caches - ON data_paths.cache_path = caches.cache_path -) -WHERE endsWith(local_path, 'data.bin') -FORMAT Vertical; Row 1: ────── file_segment_range_begin: 0 file_segment_range_end: 1659 size: 1660 state: DOWNLOADED -SELECT count() FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path; 7 -SELECT count() FROM system.filesystem_cache; 7 -SELECT count() FROM system.filesystem_cache; 7 -INSERT INTO test SELECT number, toString(number) FROM numbers(100) SETTINGS enable_filesystem_cache_on_write_operations=0; -SELECT count() FROM system.filesystem_cache; 7 -INSERT INTO test SELECT number, toString(number) FROM numbers(100); -INSERT INTO test SELECT number, toString(number) FROM numbers(300, 10000); -SELECT count() FROM system.filesystem_cache; 21 -SYSTEM START MERGES test; -OPTIMIZE TABLE test FINAL; -SELECT count() FROM system.filesystem_cache; 31 -SET mutations_sync=2; -ALTER TABLE test UPDATE value = 'kek' WHERE key = 100; -SELECT count() FROM system.filesystem_cache; 38 -INSERT INTO test SELECT number, toString(number) FROM numbers(5000000); -SYSTEM FLUSH LOGS; -SELECT - query, ProfileEvents['RemoteFSReadBytes'] > 0 as remote_fs_read -FROM - system.query_log -WHERE - query LIKE 'SELECT number, toString(number) FROM numbers(5000000)%' - AND type = 'QueryFinish' - AND current_database = currentDatabase() -ORDER BY - query_start_time - DESC -LIMIT 1; -SELECT count() FROM test; 5010500 -SELECT count() FROM test WHERE value LIKE '%010%'; 18816 - Using storage policy: local_cache --- { echo } - -SET enable_filesystem_cache_on_write_operations=1; -DROP TABLE IF EXISTS test; -CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='local_cache', min_bytes_for_wide_part = 10485760, compress_marks=false, compress_primary_key=false; -SYSTEM STOP MERGES test; -SYSTEM DROP FILESYSTEM CACHE; -SELECT file_segment_range_begin, file_segment_range_end, size, state -FROM -( - SELECT file_segment_range_begin, file_segment_range_end, size, state, local_path - FROM - ( - SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path - FROM system.remote_data_paths - ) AS data_paths - INNER JOIN - system.filesystem_cache AS caches - ON data_paths.cache_path = caches.cache_path -) -WHERE endsWith(local_path, 'data.bin') -FORMAT Vertical; -SELECT count() FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path; 0 -SELECT count() FROM system.filesystem_cache; 0 -INSERT INTO test SELECT number, toString(number) FROM numbers(100); -SELECT file_segment_range_begin, file_segment_range_end, size, state -FROM -( - SELECT file_segment_range_begin, file_segment_range_end, size, state, local_path - FROM - ( - SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path - FROM system.remote_data_paths - ) AS data_paths - INNER JOIN - system.filesystem_cache AS caches - ON data_paths.cache_path = caches.cache_path -) -WHERE endsWith(local_path, 'data.bin') -FORMAT Vertical; Row 1: ────── file_segment_range_begin: 0 file_segment_range_end: 745 size: 746 state: DOWNLOADED -SELECT count() FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path; 7 -SELECT count() FROM system.filesystem_cache; 7 -SELECT count() FROM system.filesystem_cache WHERE cache_hits > 0; 0 -SELECT * FROM test FORMAT Null; -SELECT count() FROM system.filesystem_cache WHERE cache_hits > 0; 2 -SELECT * FROM test FORMAT Null; -SELECT count() FROM system.filesystem_cache WHERE cache_hits > 0; 2 -SELECT count() size FROM system.filesystem_cache; 7 -SYSTEM DROP FILESYSTEM CACHE; -INSERT INTO test SELECT number, toString(number) FROM numbers(100, 200); -SELECT file_segment_range_begin, file_segment_range_end, size, state -FROM -( - SELECT file_segment_range_begin, file_segment_range_end, size, state, local_path - FROM - ( - SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path - FROM system.remote_data_paths - ) AS data_paths - INNER JOIN - system.filesystem_cache AS caches - ON data_paths.cache_path = caches.cache_path -) -WHERE endsWith(local_path, 'data.bin') -FORMAT Vertical; Row 1: ────── file_segment_range_begin: 0 file_segment_range_end: 1659 size: 1660 state: DOWNLOADED -SELECT count() FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path; 7 -SELECT count() FROM system.filesystem_cache; 7 -SELECT count() FROM system.filesystem_cache; 7 -INSERT INTO test SELECT number, toString(number) FROM numbers(100) SETTINGS enable_filesystem_cache_on_write_operations=0; -SELECT count() FROM system.filesystem_cache; 7 -INSERT INTO test SELECT number, toString(number) FROM numbers(100); -INSERT INTO test SELECT number, toString(number) FROM numbers(300, 10000); -SELECT count() FROM system.filesystem_cache; 21 -SYSTEM START MERGES test; -OPTIMIZE TABLE test FINAL; -SELECT count() FROM system.filesystem_cache; 31 -SET mutations_sync=2; -ALTER TABLE test UPDATE value = 'kek' WHERE key = 100; -SELECT count() FROM system.filesystem_cache; 38 -INSERT INTO test SELECT number, toString(number) FROM numbers(5000000); -SYSTEM FLUSH LOGS; -SELECT - query, ProfileEvents['RemoteFSReadBytes'] > 0 as remote_fs_read -FROM - system.query_log -WHERE - query LIKE 'SELECT number, toString(number) FROM numbers(5000000)%' - AND type = 'QueryFinish' - AND current_database = currentDatabase() -ORDER BY - query_start_time - DESC -LIMIT 1; -SELECT count() FROM test; 5010500 -SELECT count() FROM test WHERE value LIKE '%010%'; 18816 - diff --git a/tests/queries/0_stateless/02241_filesystem_cache_on_write_operations.sh b/tests/queries/0_stateless/02241_filesystem_cache_on_write_operations.sh index baf1fdf7fed..048fb792e6e 100755 --- a/tests/queries/0_stateless/02241_filesystem_cache_on_write_operations.sh +++ b/tests/queries/0_stateless/02241_filesystem_cache_on_write_operations.sh @@ -7,14 +7,121 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -TMP_PATH=${CLICKHOUSE_TEST_UNIQUE_NAME} -QUERIES_FILE=02241_filesystem_cache_on_write_operations.queries -TEST_FILE=$CUR_DIR/filesystem_cache_queries/$QUERIES_FILE +for STORAGE_POLICY in 's3_cache' 'local_cache'; do + echo "Using storage policy: $STORAGE_POLICY" -for storagePolicy in 's3_cache' 'local_cache'; do - echo "Using storage policy: $storagePolicy" - cat $TEST_FILE | sed -e "s/_storagePolicy/${storagePolicy}/" > $TMP_PATH - ${CLICKHOUSE_CLIENT} --queries-file $TMP_PATH - rm $TMP_PATH - echo + $CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS test_02241" + $CLICKHOUSE_CLIENT --query "CREATE TABLE test_02241 (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='$STORAGE_POLICY', min_bytes_for_wide_part = 10485760, compress_marks=false, compress_primary_key=false" + $CLICKHOUSE_CLIENT --query "SYSTEM STOP MERGES test_02241" + + $CLICKHOUSE_CLIENT --query "SYSTEM DROP FILESYSTEM CACHE" + + $CLICKHOUSE_CLIENT -n --query "SELECT file_segment_range_begin, file_segment_range_end, size, state + FROM + ( + SELECT file_segment_range_begin, file_segment_range_end, size, state, local_path + FROM + ( + SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path + FROM system.remote_data_paths + ) AS data_paths + INNER JOIN + system.filesystem_cache AS caches + ON data_paths.cache_path = caches.cache_path + ) + WHERE endsWith(local_path, 'data.bin') + FORMAT Vertical" + + $CLICKHOUSE_CLIENT --query "SELECT count() FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path" + $CLICKHOUSE_CLIENT --query "SELECT count() FROM system.filesystem_cache" + + $CLICKHOUSE_CLIENT --enable_filesystem_cache_on_write_operations=1 --query "INSERT INTO test_02241 SELECT number, toString(number) FROM numbers(100)" + + $CLICKHOUSE_CLIENT -n --query "SELECT file_segment_range_begin, file_segment_range_end, size, state + FROM + ( + SELECT file_segment_range_begin, file_segment_range_end, size, state, local_path + FROM + ( + SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path + FROM system.remote_data_paths + ) AS data_paths + INNER JOIN + system.filesystem_cache AS caches + ON data_paths.cache_path = caches.cache_path + ) + WHERE endsWith(local_path, 'data.bin') + FORMAT Vertical" + + $CLICKHOUSE_CLIENT --query "SELECT count() FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path" + $CLICKHOUSE_CLIENT --query "SELECT count() FROM system.filesystem_cache" + + $CLICKHOUSE_CLIENT --query "SELECT count() FROM system.filesystem_cache WHERE cache_hits > 0" + + $CLICKHOUSE_CLIENT --query "SELECT * FROM test_02241 FORMAT Null" + $CLICKHOUSE_CLIENT --query "SELECT count() FROM system.filesystem_cache WHERE cache_hits > 0" + + $CLICKHOUSE_CLIENT --query "SELECT * FROM test_02241 FORMAT Null" + $CLICKHOUSE_CLIENT --query "SELECT count() FROM system.filesystem_cache WHERE cache_hits > 0" + + $CLICKHOUSE_CLIENT --query "SELECT count() size FROM system.filesystem_cache" + + $CLICKHOUSE_CLIENT --query "SYSTEM DROP FILESYSTEM CACHE" + + $CLICKHOUSE_CLIENT --enable_filesystem_cache_on_write_operations=1 --query "INSERT INTO test_02241 SELECT number, toString(number) FROM numbers(100, 200)" + + $CLICKHOUSE_CLIENT -n --query "SELECT file_segment_range_begin, file_segment_range_end, size, state + FROM + ( + SELECT file_segment_range_begin, file_segment_range_end, size, state, local_path + FROM + ( + SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path + FROM system.remote_data_paths + ) AS data_paths + INNER JOIN + system.filesystem_cache AS caches + ON data_paths.cache_path = caches.cache_path + ) + WHERE endsWith(local_path, 'data.bin') + FORMAT Vertical;" + + $CLICKHOUSE_CLIENT --query "SELECT count() FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path" + $CLICKHOUSE_CLIENT --query "SELECT count() FROM system.filesystem_cache" + + $CLICKHOUSE_CLIENT --query "SELECT count() FROM system.filesystem_cache" + $CLICKHOUSE_CLIENT --enable_filesystem_cache_on_write_operations=1 --query "INSERT INTO test_02241 SELECT number, toString(number) FROM numbers(100) SETTINGS enable_filesystem_cache_on_write_operations=0" + $CLICKHOUSE_CLIENT --query "SELECT count() FROM system.filesystem_cache" + + $CLICKHOUSE_CLIENT --enable_filesystem_cache_on_write_operations=1 --query "INSERT INTO test_02241 SELECT number, toString(number) FROM numbers(100)" + $CLICKHOUSE_CLIENT --enable_filesystem_cache_on_write_operations=1 --query "INSERT INTO test_02241 SELECT number, toString(number) FROM numbers(300, 10000)" + $CLICKHOUSE_CLIENT --query "SELECT count() FROM system.filesystem_cache" + + $CLICKHOUSE_CLIENT --query "SYSTEM START MERGES test_02241" + + $CLICKHOUSE_CLIENT --enable_filesystem_cache_on_write_operations=1 --query "OPTIMIZE TABLE test_02241 FINAL" + $CLICKHOUSE_CLIENT --query "SELECT count() FROM system.filesystem_cache" + + $CLICKHOUSE_CLIENT --enable_filesystem_cache_on_write_operations=1 --mutations_sync=2 --query "ALTER TABLE test_02241 UPDATE value = 'kek' WHERE key = 100" + $CLICKHOUSE_CLIENT --query "SELECT count() FROM system.filesystem_cache" + + $CLICKHOUSE_CLIENT --enable_filesystem_cache_on_write_operations=1 --query "INSERT INTO test_02241 SELECT number, toString(number) FROM numbers(5000000)" + + $CLICKHOUSE_CLIENT --query "SYSTEM FLUSH LOGS" + + $CLICKHOUSE_CLIENT -n --query "SELECT + query, ProfileEvents['RemoteFSReadBytes'] > 0 as remote_fs_read + FROM + system.query_log + WHERE + query LIKE 'SELECT number, toString(number) FROM numbers(5000000)%' + AND type = 'QueryFinish' + AND current_database = currentDatabase() + ORDER BY + query_start_time + DESC + LIMIT 1" + + $CLICKHOUSE_CLIENT --query "SELECT count() FROM test_02241" + $CLICKHOUSE_CLIENT --query "SELECT count() FROM test_02241 WHERE value LIKE '%010%'" done diff --git a/tests/queries/0_stateless/02242_arrow_orc_parquet_nullable_schema_inference.reference b/tests/queries/0_stateless/02242_arrow_orc_parquet_nullable_schema_inference.reference index debc5c58936..2ecce985eb4 100644 --- a/tests/queries/0_stateless/02242_arrow_orc_parquet_nullable_schema_inference.reference +++ b/tests/queries/0_stateless/02242_arrow_orc_parquet_nullable_schema_inference.reference @@ -2,7 +2,7 @@ Arrow x Nullable(UInt64) arr1 Array(Nullable(UInt64)) arr2 Array(Array(Nullable(String))) -arr3 Array(Tuple(Nullable(String), Nullable(UInt64))) +arr3 Array(Tuple(`1` Nullable(String), `2` Nullable(UInt64))) 0 [0,1] [[NULL,'String'],[NULL],[]] [(NULL,NULL),('String',NULL),(NULL,0)] \N [NULL,2] [[NULL,'String'],[NULL],[]] [(NULL,NULL),('String',NULL),(NULL,1)] 2 [2,3] [[NULL,'String'],[NULL],[]] [(NULL,NULL),('String',NULL),(NULL,2)] @@ -12,7 +12,7 @@ ArrowStream x Nullable(UInt64) arr1 Array(Nullable(UInt64)) arr2 Array(Array(Nullable(String))) -arr3 Array(Tuple(Nullable(String), Nullable(UInt64))) +arr3 Array(Tuple(`1` Nullable(String), `2` Nullable(UInt64))) 0 [0,1] [[NULL,'String'],[NULL],[]] [(NULL,NULL),('String',NULL),(NULL,0)] \N [NULL,2] [[NULL,'String'],[NULL],[]] [(NULL,NULL),('String',NULL),(NULL,1)] 2 [2,3] [[NULL,'String'],[NULL],[]] [(NULL,NULL),('String',NULL),(NULL,2)] @@ -22,7 +22,7 @@ Parquet x Nullable(UInt64) arr1 Array(Nullable(UInt64)) arr2 Array(Array(Nullable(String))) -arr3 Array(Tuple(Nullable(String), Nullable(UInt64))) +arr3 Array(Tuple(`1` Nullable(String), `2` Nullable(UInt64))) 0 [0,1] [[NULL,'String'],[NULL],[]] [(NULL,NULL),('String',NULL),(NULL,0)] \N [NULL,2] [[NULL,'String'],[NULL],[]] [(NULL,NULL),('String',NULL),(NULL,1)] 2 [2,3] [[NULL,'String'],[NULL],[]] [(NULL,NULL),('String',NULL),(NULL,2)] @@ -32,7 +32,7 @@ ORC x Nullable(Int64) arr1 Array(Nullable(Int64)) arr2 Array(Array(Nullable(String))) -arr3 Array(Tuple(Nullable(String), Nullable(Int64))) +arr3 Array(Tuple(`1` Nullable(String), `2` Nullable(Int64))) 0 [0,1] [[NULL,'String'],[NULL],[]] [(NULL,NULL),('String',NULL),(NULL,0)] \N [NULL,2] [[NULL,'String'],[NULL],[]] [(NULL,NULL),('String',NULL),(NULL,1)] 2 [2,3] [[NULL,'String'],[NULL],[]] [(NULL,NULL),('String',NULL),(NULL,2)] diff --git a/tests/queries/0_stateless/02242_system_filesystem_cache_log_table.reference b/tests/queries/0_stateless/02242_system_filesystem_cache_log_table.reference index 91587dc8e79..99f31df7def 100644 --- a/tests/queries/0_stateless/02242_system_filesystem_cache_log_table.reference +++ b/tests/queries/0_stateless/02242_system_filesystem_cache_log_table.reference @@ -1,42 +1,8 @@ Using storage policy: s3_cache --- { echo } - -SYSTEM DROP FILESYSTEM CACHE; -SET enable_filesystem_cache_log=1; -SET enable_filesystem_cache_on_write_operations=0; -DROP TABLE IF EXISTS test; -DROP TABLE IF EXISTS system.filesystem_cache_log; -CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache', min_bytes_for_wide_part = 10485760, compress_marks=false, compress_primary_key=false; -SYSTEM STOP MERGES test; -INSERT INTO test SELECT number, toString(number) FROM numbers(100000); -SELECT 2240, 's3_cache', * FROM test FORMAT Null; -SYSTEM FLUSH LOGS; -SELECT file_segment_range, read_type FROM system.filesystem_cache_log WHERE query_id = (SELECT query_id from system.query_log where query LIKE '%SELECT 2240%s3_cache%' AND current_database = currentDatabase() AND type = 'QueryFinish' ORDER BY event_time desc LIMIT 1) ORDER BY file_segment_range, read_type; (0,519) READ_FROM_FS_AND_DOWNLOADED_TO_CACHE (0,808110) READ_FROM_FS_AND_DOWNLOADED_TO_CACHE -SELECT 2241, 's3_cache', * FROM test FORMAT Null; -SYSTEM FLUSH LOGS; -SELECT file_segment_range, read_type FROM system.filesystem_cache_log WHERE query_id = (SELECT query_id from system.query_log where query LIKE '%SELECT 2241%s3_cache%' AND current_database = currentDatabase() AND type = 'QueryFinish' ORDER BY event_time desc LIMIT 1) ORDER BY file_segment_range, read_type; (0,808110) READ_FROM_CACHE - Using storage policy: local_cache --- { echo } - -SYSTEM DROP FILESYSTEM CACHE; -SET enable_filesystem_cache_log=1; -SET enable_filesystem_cache_on_write_operations=0; -DROP TABLE IF EXISTS test; -DROP TABLE IF EXISTS system.filesystem_cache_log; -CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='local_cache', min_bytes_for_wide_part = 10485760, compress_marks=false, compress_primary_key=false; -SYSTEM STOP MERGES test; -INSERT INTO test SELECT number, toString(number) FROM numbers(100000); -SELECT 2240, 'local_cache', * FROM test FORMAT Null; -SYSTEM FLUSH LOGS; -SELECT file_segment_range, read_type FROM system.filesystem_cache_log WHERE query_id = (SELECT query_id from system.query_log where query LIKE '%SELECT 2240%local_cache%' AND current_database = currentDatabase() AND type = 'QueryFinish' ORDER BY event_time desc LIMIT 1) ORDER BY file_segment_range, read_type; (0,519) READ_FROM_FS_AND_DOWNLOADED_TO_CACHE (0,808110) READ_FROM_FS_AND_DOWNLOADED_TO_CACHE -SELECT 2241, 'local_cache', * FROM test FORMAT Null; -SYSTEM FLUSH LOGS; -SELECT file_segment_range, read_type FROM system.filesystem_cache_log WHERE query_id = (SELECT query_id from system.query_log where query LIKE '%SELECT 2241%local_cache%' AND current_database = currentDatabase() AND type = 'QueryFinish' ORDER BY event_time desc LIMIT 1) ORDER BY file_segment_range, read_type; (0,808110) READ_FROM_CACHE - diff --git a/tests/queries/0_stateless/02242_system_filesystem_cache_log_table.sh b/tests/queries/0_stateless/02242_system_filesystem_cache_log_table.sh index b5dbc5c9f78..4c92d1d2954 100755 --- a/tests/queries/0_stateless/02242_system_filesystem_cache_log_table.sh +++ b/tests/queries/0_stateless/02242_system_filesystem_cache_log_table.sh @@ -7,14 +7,23 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -TMP_PATH=${CLICKHOUSE_TEST_UNIQUE_NAME} -QUERIES_FILE=02242_system_filesystem_cache_log_table.queries -TEST_FILE=$CUR_DIR/filesystem_cache_queries/$QUERIES_FILE +for STORAGE_POLICY in 's3_cache' 'local_cache'; do + echo "Using storage policy: $STORAGE_POLICY" + + $CLICKHOUSE_CLIENT --query "SYSTEM DROP FILESYSTEM CACHE" + + $CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS test_2242" + $CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS system.filesystem_cache_log" + $CLICKHOUSE_CLIENT --query "CREATE TABLE test_2242 (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='$STORAGE_POLICY', min_bytes_for_wide_part = 10485760, compress_marks=false, compress_primary_key=false" + $CLICKHOUSE_CLIENT --query "SYSTEM STOP MERGES test_2242" + $CLICKHOUSE_CLIENT --enable_filesystem_cache_on_write_operations=0 --enable_filesystem_cache_log=1 --query "INSERT INTO test_2242 SELECT number, toString(number) FROM numbers(100000)" + + $CLICKHOUSE_CLIENT --enable_filesystem_cache_on_write_operations=0 --enable_filesystem_cache_log=1 --query "SELECT 2242, '$STORAGE_POLICY', * FROM test_2242 FORMAT Null" + $CLICKHOUSE_CLIENT --query "SYSTEM FLUSH LOGS" + $CLICKHOUSE_CLIENT --query "SELECT file_segment_range, read_type FROM system.filesystem_cache_log WHERE query_id = (SELECT query_id from system.query_log where query LIKE '%SELECT 2242%$STORAGE_POLICY%' AND current_database = currentDatabase() AND type = 'QueryFinish' ORDER BY event_time desc LIMIT 1) ORDER BY file_segment_range, read_type" + + $CLICKHOUSE_CLIENT --enable_filesystem_cache_on_write_operations=0 --enable_filesystem_cache_log=1 --query "SELECT 2243, '$STORAGE_POLICY', * FROM test_2242 FORMAT Null" + $CLICKHOUSE_CLIENT --query "SYSTEM FLUSH LOGS" + $CLICKHOUSE_CLIENT --query "SELECT file_segment_range, read_type FROM system.filesystem_cache_log WHERE query_id = (SELECT query_id from system.query_log where query LIKE '%SELECT 2243%$STORAGE_POLICY%' AND current_database = currentDatabase() AND type = 'QueryFinish' ORDER BY event_time desc LIMIT 1) ORDER BY file_segment_range, read_type" -for storagePolicy in 's3_cache' 'local_cache'; do - echo "Using storage policy: $storagePolicy" - cat $TEST_FILE | sed -e "s/_storagePolicy/${storagePolicy}/" > $TMP_PATH - ${CLICKHOUSE_CLIENT} --queries-file $TMP_PATH - rm $TMP_PATH - echo done diff --git a/tests/queries/0_stateless/02245_s3_support_read_nested_column.reference b/tests/queries/0_stateless/02245_s3_support_read_nested_column.reference index e9754463ba1..fb198bd8401 100644 --- a/tests/queries/0_stateless/02245_s3_support_read_nested_column.reference +++ b/tests/queries/0_stateless/02245_s3_support_read_nested_column.reference @@ -4,7 +4,8 @@ drop table if exists test_02245_s3_nested_parquet2; set input_format_parquet_import_nested = 1; create table test_02245_s3_nested_parquet1(a Int64, b Tuple(a Int64, b String)) engine=S3(s3_conn, filename='test_02245_s3_nested_parquet1_{_partition_id}', format='Parquet') partition by a; insert into test_02245_s3_nested_parquet1 values (1, (2, 'a')); -select a, b.a, b.b from s3(s3_conn, filename='test_02245_s3_nested_parquet1_*', format='Parquet'); -- { serverError 47 } +select a, b.a, b.b from s3(s3_conn, filename='test_02245_s3_nested_parquet1_*', format='Parquet'); +1 2 a create table test_02245_s3_nested_parquet2(a Int64, b Tuple(a Int64, b Tuple(c Int64, d String))) engine=S3(s3_conn, filename='test_02245_s3_nested_parquet2_{_partition_id}', format='Parquet') partition by a; insert into test_02245_s3_nested_parquet2 values (1, (2, (3, 'a'))); select a, b.a, b.b.c, b.b.d from s3(s3_conn, filename='test_02245_s3_nested_parquet2_*', format='Parquet', structure='a Int64, b Tuple(a Int64, b Tuple(c Int64, d String))'); @@ -14,7 +15,8 @@ drop table if exists test_02245_s3_nested_arrow2; set input_format_arrow_import_nested=1; create table test_02245_s3_nested_arrow1(a Int64, b Tuple(a Int64, b String)) engine=S3(s3_conn, filename='test_02245_s3_nested_arrow1_{_partition_id}', format='Arrow') partition by a; insert into test_02245_s3_nested_arrow1 values (1, (2, 'a')); -select a, b.a, b.b from s3(s3_conn, filename='test_02245_s3_nested_arrow1_*', format='Arrow'); -- { serverError 47 } +select a, b.a, b.b from s3(s3_conn, filename='test_02245_s3_nested_arrow1_*', format='Arrow'); +1 2 a create table test_02245_s3_nested_arrow2(a Int64, b Tuple(a Int64, b Tuple(c Int64, d String))) engine=S3(s3_conn, filename='test_02245_s3_nested_arrow2_{_partition_id}', format='Arrow') partition by a; insert into test_02245_s3_nested_arrow2 values (1, (2, (3, 'a'))); select a, b.a, b.b.c, b.b.d from s3(s3_conn, filename='test_02245_s3_nested_arrow2_*', format='Arrow', structure='a Int64, b Tuple(a Int64, b Tuple(c Int64, d String))'); @@ -24,7 +26,8 @@ drop table if exists test_02245_s3_nested_orc2; set input_format_orc_import_nested=1; create table test_02245_s3_nested_orc1(a Int64, b Tuple(a Int64, b String)) engine=S3(s3_conn, filename='test_02245_s3_nested_orc1_{_partition_id}', format='ORC') partition by a; insert into test_02245_s3_nested_orc1 values (1, (2, 'a')); -select a, b.a, b.b from s3(s3_conn, filename='test_02245_s3_nested_orc1_*', format='ORC'); -- { serverError 47 } +select a, b.a, b.b from s3(s3_conn, filename='test_02245_s3_nested_orc1_*', format='ORC'); +1 2 a create table test_02245_s3_nested_orc2(a Int64, b Tuple(a Int64, b Tuple(c Int64, d String))) engine=S3(s3_conn, filename='test_02245_s3_nested_orc2_{_partition_id}', format='ORC') partition by a; insert into test_02245_s3_nested_orc2 values (1, (2, (3, 'a'))); select a, b.a, b.b.c, b.b.d from s3(s3_conn, filename='test_02245_s3_nested_orc2_*', format='ORC', structure='a Int64, b Tuple(a Int64, b Tuple(c Int64, d String))'); diff --git a/tests/queries/0_stateless/02245_s3_support_read_nested_column.sql b/tests/queries/0_stateless/02245_s3_support_read_nested_column.sql index 14fc7cee7dc..08788306de7 100644 --- a/tests/queries/0_stateless/02245_s3_support_read_nested_column.sql +++ b/tests/queries/0_stateless/02245_s3_support_read_nested_column.sql @@ -8,7 +8,7 @@ set input_format_parquet_import_nested = 1; create table test_02245_s3_nested_parquet1(a Int64, b Tuple(a Int64, b String)) engine=S3(s3_conn, filename='test_02245_s3_nested_parquet1_{_partition_id}', format='Parquet') partition by a; insert into test_02245_s3_nested_parquet1 values (1, (2, 'a')); -select a, b.a, b.b from s3(s3_conn, filename='test_02245_s3_nested_parquet1_*', format='Parquet'); -- { serverError 47 } +select a, b.a, b.b from s3(s3_conn, filename='test_02245_s3_nested_parquet1_*', format='Parquet'); create table test_02245_s3_nested_parquet2(a Int64, b Tuple(a Int64, b Tuple(c Int64, d String))) engine=S3(s3_conn, filename='test_02245_s3_nested_parquet2_{_partition_id}', format='Parquet') partition by a; insert into test_02245_s3_nested_parquet2 values (1, (2, (3, 'a'))); @@ -22,7 +22,7 @@ set input_format_arrow_import_nested=1; create table test_02245_s3_nested_arrow1(a Int64, b Tuple(a Int64, b String)) engine=S3(s3_conn, filename='test_02245_s3_nested_arrow1_{_partition_id}', format='Arrow') partition by a; insert into test_02245_s3_nested_arrow1 values (1, (2, 'a')); -select a, b.a, b.b from s3(s3_conn, filename='test_02245_s3_nested_arrow1_*', format='Arrow'); -- { serverError 47 } +select a, b.a, b.b from s3(s3_conn, filename='test_02245_s3_nested_arrow1_*', format='Arrow'); create table test_02245_s3_nested_arrow2(a Int64, b Tuple(a Int64, b Tuple(c Int64, d String))) engine=S3(s3_conn, filename='test_02245_s3_nested_arrow2_{_partition_id}', format='Arrow') partition by a; insert into test_02245_s3_nested_arrow2 values (1, (2, (3, 'a'))); @@ -36,7 +36,7 @@ set input_format_orc_import_nested=1; create table test_02245_s3_nested_orc1(a Int64, b Tuple(a Int64, b String)) engine=S3(s3_conn, filename='test_02245_s3_nested_orc1_{_partition_id}', format='ORC') partition by a; insert into test_02245_s3_nested_orc1 values (1, (2, 'a')); -select a, b.a, b.b from s3(s3_conn, filename='test_02245_s3_nested_orc1_*', format='ORC'); -- { serverError 47 } +select a, b.a, b.b from s3(s3_conn, filename='test_02245_s3_nested_orc1_*', format='ORC'); create table test_02245_s3_nested_orc2(a Int64, b Tuple(a Int64, b Tuple(c Int64, d String))) engine=S3(s3_conn, filename='test_02245_s3_nested_orc2_{_partition_id}', format='ORC') partition by a; insert into test_02245_s3_nested_orc2 values (1, (2, (3, 'a'))); diff --git a/tests/queries/0_stateless/02267_file_globs_schema_inference.sh b/tests/queries/0_stateless/02267_file_globs_schema_inference.sh index 701e18a0259..d4b3b89b3ba 100755 --- a/tests/queries/0_stateless/02267_file_globs_schema_inference.sh +++ b/tests/queries/0_stateless/02267_file_globs_schema_inference.sh @@ -14,5 +14,5 @@ $CLICKHOUSE_CLIENT -q "select * from file('${CLICKHOUSE_TEST_UNIQUE_NAME}_data*. $CLICKHOUSE_CLIENT -q "insert into function file('${CLICKHOUSE_TEST_UNIQUE_NAME}_data4.jsonl', 'TSV') select 1 as x"; $CLICKHOUSE_CLIENT -q "insert into function file('${CLICKHOUSE_TEST_UNIQUE_NAME}_data1.jsonl', 'TSV') select [1,2,3] as x SETTINGS engine_file_truncate_on_insert = 1"; -$CLICKHOUSE_CLIENT -q "select * from file('${CLICKHOUSE_TEST_UNIQUE_NAME}_data*.jsonl') settings schema_inference_use_cache_for_file=0" 2>&1 | grep -F -q "INCORRECT_DATA" && echo "OK" || echo "FAIL"; +$CLICKHOUSE_CLIENT -q "select * from file('${CLICKHOUSE_TEST_UNIQUE_NAME}_data*.jsonl') settings schema_inference_use_cache_for_file=0" 2>&1 | grep -F -q "CANNOT_PARSE_INPUT_ASSERTION_FAILED" && echo "OK" || echo "FAIL"; diff --git a/tests/queries/0_stateless/02268_json_wrong_root_type_in_schema_inference.sql b/tests/queries/0_stateless/02268_json_wrong_root_type_in_schema_inference.sql index 7427426602a..5462d38f1a3 100644 --- a/tests/queries/0_stateless/02268_json_wrong_root_type_in_schema_inference.sql +++ b/tests/queries/0_stateless/02268_json_wrong_root_type_in_schema_inference.sql @@ -1,7 +1,7 @@ -- Tags: no-fasttest insert into function file('02268_data.jsonl', 'TSV') select 1; -select * from file('02268_data.jsonl'); --{serverError 117} +select * from file('02268_data.jsonl'); --{serverError CANNOT_PARSE_INPUT_ASSERTION_FAILED} insert into function file('02268_data.jsonCompactEachRow', 'TSV') select 1; -select * from file('02268_data.jsonCompactEachRow'); --{serverError 117} +select * from file('02268_data.jsonCompactEachRow'); --{serverError CANNOT_PARSE_INPUT_ASSERTION_FAILED} diff --git a/tests/queries/0_stateless/02269_insert_select_with_format_without_schema_inference.sql b/tests/queries/0_stateless/02269_insert_select_with_format_without_schema_inference.sql index 8ea9dba2696..82b433e5948 100644 --- a/tests/queries/0_stateless/02269_insert_select_with_format_without_schema_inference.sql +++ b/tests/queries/0_stateless/02269_insert_select_with_format_without_schema_inference.sql @@ -1,2 +1,2 @@ -insert into function file('02269_data', 'RowBinary') select 1; +insert into function file('02269_data', 'RowBinary') select 1 settings engine_file_truncate_on_insert=1; select * from file('02269_data', 'RowBinary', 'x UInt8'); diff --git a/tests/queries/0_stateless/02286_drop_filesystem_cache.reference b/tests/queries/0_stateless/02286_drop_filesystem_cache.reference index b37f87afc28..62907a7c81c 100644 --- a/tests/queries/0_stateless/02286_drop_filesystem_cache.reference +++ b/tests/queries/0_stateless/02286_drop_filesystem_cache.reference @@ -1,138 +1,22 @@ Using storage policy: s3_cache --- { echo } - -SET enable_filesystem_cache_on_write_operations=0; -DROP TABLE IF EXISTS test; -CREATE TABLE test (key UInt32, value String) -Engine=MergeTree() -ORDER BY key -SETTINGS storage_policy='s3_cache', min_bytes_for_wide_part = 10485760; -SYSTEM STOP MERGES; -SYSTEM DROP FILESYSTEM CACHE; -SELECT count() FROM system.filesystem_cache; 0 -INSERT INTO test SELECT number, toString(number) FROM numbers(100); -SELECT * FROM test FORMAT Null; -SELECT count() FROM system.filesystem_cache; 2 -SYSTEM DROP FILESYSTEM CACHE; -SELECT count() FROM system.filesystem_cache; 0 -SELECT * FROM test FORMAT Null; -SELECT count() FROM system.filesystem_cache; 1 -SYSTEM DROP FILESYSTEM CACHE './data'; -- { serverError 36 } -SELECT count() FROM system.filesystem_cache; 1 -SELECT * FROM test FORMAT Null; -SELECT count() FROM system.filesystem_cache; 1 -SELECT count() -FROM ( - SELECT - arrayJoin(cache_paths) AS cache_path, - local_path, - remote_path - FROM - system.remote_data_paths - ) AS data_paths -INNER JOIN system.filesystem_cache AS caches -ON data_paths.cache_path = caches.cache_path; 1 -DROP TABLE test NO DELAY; -SELECT count() FROM system.filesystem_cache; 0 -SELECT cache_path FROM system.filesystem_cache; -SELECT cache_path, local_path -FROM ( - SELECT - arrayJoin(cache_paths) AS cache_path, - local_path, - remote_path - FROM - system.remote_data_paths - ) AS data_paths -INNER JOIN system.filesystem_cache AS caches -ON data_paths.cache_path = caches.cache_path; -DROP TABLE IF EXISTS test2; -CREATE TABLE test2 (key UInt32, value String) -Engine=MergeTree() -ORDER BY key -SETTINGS storage_policy='s3_cache_2', min_bytes_for_wide_part = 10485760; -INSERT INTO test2 SELECT number, toString(number) FROM numbers(100); -SELECT * FROM test2 FORMAT Null; -SELECT count() FROM system.filesystem_cache; 2 -SYSTEM DROP FILESYSTEM CACHE 's3_cache_2/'; -SELECT count() FROM system.filesystem_cache; 0 - Using storage policy: local_cache --- { echo } - -SET enable_filesystem_cache_on_write_operations=0; -DROP TABLE IF EXISTS test; -CREATE TABLE test (key UInt32, value String) -Engine=MergeTree() -ORDER BY key -SETTINGS storage_policy='local_cache', min_bytes_for_wide_part = 10485760; -SYSTEM STOP MERGES; -SYSTEM DROP FILESYSTEM CACHE; -SELECT count() FROM system.filesystem_cache; 0 -INSERT INTO test SELECT number, toString(number) FROM numbers(100); -SELECT * FROM test FORMAT Null; -SELECT count() FROM system.filesystem_cache; 2 -SYSTEM DROP FILESYSTEM CACHE; -SELECT count() FROM system.filesystem_cache; 0 -SELECT * FROM test FORMAT Null; -SELECT count() FROM system.filesystem_cache; 1 -SYSTEM DROP FILESYSTEM CACHE './data'; -- { serverError 36 } -SELECT count() FROM system.filesystem_cache; 1 -SELECT * FROM test FORMAT Null; -SELECT count() FROM system.filesystem_cache; 1 -SELECT count() -FROM ( - SELECT - arrayJoin(cache_paths) AS cache_path, - local_path, - remote_path - FROM - system.remote_data_paths - ) AS data_paths -INNER JOIN system.filesystem_cache AS caches -ON data_paths.cache_path = caches.cache_path; 1 -DROP TABLE test NO DELAY; -SELECT count() FROM system.filesystem_cache; 0 -SELECT cache_path FROM system.filesystem_cache; -SELECT cache_path, local_path -FROM ( - SELECT - arrayJoin(cache_paths) AS cache_path, - local_path, - remote_path - FROM - system.remote_data_paths - ) AS data_paths -INNER JOIN system.filesystem_cache AS caches -ON data_paths.cache_path = caches.cache_path; -DROP TABLE IF EXISTS test2; -CREATE TABLE test2 (key UInt32, value String) -Engine=MergeTree() -ORDER BY key -SETTINGS storage_policy='local_cache_2', min_bytes_for_wide_part = 10485760; -INSERT INTO test2 SELECT number, toString(number) FROM numbers(100); -SELECT * FROM test2 FORMAT Null; -SELECT count() FROM system.filesystem_cache; 2 -SYSTEM DROP FILESYSTEM CACHE 'local_cache_2/'; -SELECT count() FROM system.filesystem_cache; 0 - diff --git a/tests/queries/0_stateless/02286_drop_filesystem_cache.sh b/tests/queries/0_stateless/02286_drop_filesystem_cache.sh index 30d04743b34..b563c487646 100755 --- a/tests/queries/0_stateless/02286_drop_filesystem_cache.sh +++ b/tests/queries/0_stateless/02286_drop_filesystem_cache.sh @@ -7,14 +7,78 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -TMP_PATH=${CLICKHOUSE_TEST_UNIQUE_NAME} -QUERIES_FILE=02286_drop_filesystem_cache.queries -TEST_FILE=$CUR_DIR/filesystem_cache_queries/$QUERIES_FILE +for STORAGE_POLICY in 's3_cache' 'local_cache'; do + echo "Using storage policy: $STORAGE_POLICY" + $CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS test_02286" -for storagePolicy in 's3_cache' 'local_cache'; do - echo "Using storage policy: $storagePolicy" - cat $TEST_FILE | sed -e "s/_storagePolicy/${storagePolicy}/" > $TMP_PATH - ${CLICKHOUSE_CLIENT} --queries-file $TMP_PATH - rm $TMP_PATH - echo + $CLICKHOUSE_CLIENT -n --query "CREATE TABLE test_02286 (key UInt32, value String) + Engine=MergeTree() + ORDER BY key + SETTINGS storage_policy='$STORAGE_POLICY', min_bytes_for_wide_part = 10485760" + + $CLICKHOUSE_CLIENT --query "SYSTEM STOP MERGES" + $CLICKHOUSE_CLIENT --query "SYSTEM DROP FILESYSTEM CACHE" + + $CLICKHOUSE_CLIENT --query "SELECT count() FROM system.filesystem_cache" + $CLICKHOUSE_CLIENT --enable_filesystem_cache_on_write_operations=0 --query "INSERT INTO test_02286 SELECT number, toString(number) FROM numbers(100)" + + $CLICKHOUSE_CLIENT --query "SELECT * FROM test_02286 FORMAT Null" + $CLICKHOUSE_CLIENT --query "SELECT count() FROM system.filesystem_cache" + + $CLICKHOUSE_CLIENT --query "SYSTEM DROP FILESYSTEM CACHE" + $CLICKHOUSE_CLIENT --query "SELECT count() FROM system.filesystem_cache" + + $CLICKHOUSE_CLIENT --query "SELECT * FROM test_02286 FORMAT Null" + $CLICKHOUSE_CLIENT --query "SELECT count() FROM system.filesystem_cache" + + $CLICKHOUSE_CLIENT --multiline --multiquery --query "SYSTEM DROP FILESYSTEM CACHE './data'; --{serverError 36}" + + $CLICKHOUSE_CLIENT --query "SELECT count() FROM system.filesystem_cache" + + $CLICKHOUSE_CLIENT --query "SELECT * FROM test_02286 FORMAT Null" + $CLICKHOUSE_CLIENT --query "SELECT count() FROM system.filesystem_cache" + + $CLICKHOUSE_CLIENT -n --query "SELECT count() + FROM ( + SELECT + arrayJoin(cache_paths) AS cache_path, + local_path, + remote_path + FROM + system.remote_data_paths + ) AS data_paths + INNER JOIN system.filesystem_cache AS caches + ON data_paths.cache_path = caches.cache_path" + + $CLICKHOUSE_CLIENT --query "DROP TABLE test_02286 NO DELAY" + $CLICKHOUSE_CLIENT --query "SELECT count() FROM system.filesystem_cache" + + $CLICKHOUSE_CLIENT --query "SELECT cache_path FROM system.filesystem_cache" + $CLICKHOUSE_CLIENT -n --query "SELECT cache_path, local_path + FROM ( + SELECT + arrayJoin(cache_paths) AS cache_path, + local_path, + remote_path + FROM + system.remote_data_paths + ) AS data_paths + INNER JOIN system.filesystem_cache AS caches + ON data_paths.cache_path = caches.cache_path" + + $CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS test_022862" + + $CLICKHOUSE_CLIENT -n --query "CREATE TABLE test_022862 (key UInt32, value String) + Engine=MergeTree() + ORDER BY key + SETTINGS storage_policy='${STORAGE_POLICY}_2', min_bytes_for_wide_part = 10485760" + + $CLICKHOUSE_CLIENT --enable_filesystem_cache_on_write_operations=0 --query "INSERT INTO test_022862 SELECT number, toString(number) FROM numbers(100)" + $CLICKHOUSE_CLIENT --query "SELECT * FROM test_022862 FORMAT Null" + $CLICKHOUSE_CLIENT --query "SELECT count() FROM system.filesystem_cache" + + $CLICKHOUSE_CLIENT --query "SYSTEM DROP FILESYSTEM CACHE '${STORAGE_POLICY}_2/'" + $CLICKHOUSE_CLIENT --query "SELECT count() FROM system.filesystem_cache" + + $CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS test_022862" done diff --git a/tests/queries/0_stateless/02287_ephemeral_format_crash.reference b/tests/queries/0_stateless/02287_ephemeral_format_crash.reference index e69de29bb2d..39bbe7c68eb 100644 --- a/tests/queries/0_stateless/02287_ephemeral_format_crash.reference +++ b/tests/queries/0_stateless/02287_ephemeral_format_crash.reference @@ -0,0 +1,2 @@ +CREATE TABLE default.test\n(\n `a` UInt8,\n `b` String EPHEMERAL\n)\nENGINE = Memory +CREATE TABLE default.test\n(\n `a` UInt8,\n `b` String EPHEMERAL 1 + 2\n)\nENGINE = Memory diff --git a/tests/queries/0_stateless/02287_ephemeral_format_crash.sql b/tests/queries/0_stateless/02287_ephemeral_format_crash.sql index 8fd9a4b4332..466532970ab 100644 --- a/tests/queries/0_stateless/02287_ephemeral_format_crash.sql +++ b/tests/queries/0_stateless/02287_ephemeral_format_crash.sql @@ -1,10 +1,13 @@ DROP TABLE IF EXISTS test; CREATE TABLE test(a UInt8, b String EPHEMERAL) Engine=Memory(); - +SHOW CREATE TABLE test; DROP TABLE test; -CREATE TABLE test(a UInt8, b EPHEMERAL String) Engine=Memory(); -- { clientError SYNTAX_ERROR } +CREATE TABLE test(a UInt8, b EPHEMERAL String) Engine=Memory(); -- { serverError UNKNOWN_IDENTIFIER } CREATE TABLE test(a UInt8, b EPHEMERAL 'a' String) Engine=Memory(); -- { clientError SYNTAX_ERROR } -CREATE TABLE test(a UInt8, b String EPHEMERAL test) Engine=Memory(); -- { clientError SYNTAX_ERROR } -CREATE TABLE test(a UInt8, b String EPHEMERAL 1+2) Engine=Memory(); -- { clientError SYNTAX_ERROR } +CREATE TABLE test(a UInt8, b String EPHEMERAL test) Engine=Memory(); -- { serverError UNKNOWN_IDENTIFIER } + +CREATE TABLE test(a UInt8, b String EPHEMERAL 1+2) Engine=Memory(); +SHOW CREATE TABLE test; +DROP TABLE test; diff --git a/tests/queries/0_stateless/02293_compatibility_ignore_auto_increment_in_create_table.reference b/tests/queries/0_stateless/02293_compatibility_ignore_auto_increment_in_create_table.reference index 2db591f7e5a..63d74e4ea1e 100644 --- a/tests/queries/0_stateless/02293_compatibility_ignore_auto_increment_in_create_table.reference +++ b/tests/queries/0_stateless/02293_compatibility_ignore_auto_increment_in_create_table.reference @@ -11,7 +11,7 @@ s String create table, several columns with different default specifiers di UInt8 DEFAULT 1 id Int32 -s String EPHEMERAL \'\' +s String EPHEMERAL defaultValueOfTypeName(\'String\') create table failed, column +type +DEFAULT +AUTO_INCREMENT create table failed, column -type +DEFAULT +AUTO_INCREMENT create table failed, column +type +AUTO_INCREMENT +DEFAULT diff --git a/tests/queries/0_stateless/02313_filesystem_cache_seeks.reference b/tests/queries/0_stateless/02313_filesystem_cache_seeks.reference index 73871f55856..062aac259a4 100644 --- a/tests/queries/0_stateless/02313_filesystem_cache_seeks.reference +++ b/tests/queries/0_stateless/02313_filesystem_cache_seeks.reference @@ -1,6 +1,3 @@ Using storage policy: s3_cache - Using storage policy: local_cache - Using storage policy: s3_cache_multi - diff --git a/tests/queries/0_stateless/02313_filesystem_cache_seeks.sh b/tests/queries/0_stateless/02313_filesystem_cache_seeks.sh index 7ed92cbf36d..f5de4346fd6 100755 --- a/tests/queries/0_stateless/02313_filesystem_cache_seeks.sh +++ b/tests/queries/0_stateless/02313_filesystem_cache_seeks.sh @@ -7,14 +7,31 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -TMP_PATH=${CLICKHOUSE_TEST_UNIQUE_NAME} -QUERIES_FILE=02313_filesystem_cache_seeks.queries -TEST_FILE=$CUR_DIR/filesystem_cache_queries/$QUERIES_FILE -for storagePolicy in 's3_cache' 'local_cache' 's3_cache_multi'; do - echo "Using storage policy: $storagePolicy" - cat $TEST_FILE | sed -e "s/_storagePolicy/${storagePolicy}/" > $TMP_PATH - ${CLICKHOUSE_CLIENT} --queries-file $TMP_PATH - rm $TMP_PATH - echo +for STORAGE_POLICY in 's3_cache' 'local_cache' 's3_cache_multi'; do + echo "Using storage policy: $STORAGE_POLICY" + $CLICKHOUSE_CLIENT --query "SYSTEM DROP FILESYSTEM CACHE" + + $CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS test_02313" + + $CLICKHOUSE_CLIENT --query "CREATE TABLE test_02313 (id Int32, val String) + ENGINE = MergeTree() + ORDER BY tuple() + SETTINGS storage_policy = '$STORAGE_POLICY'" + + $CLICKHOUSE_CLIENT --enable_filesystem_cache_on_write_operations=0 -n --query "INSERT INTO test_02313 + SELECT * FROM + generateRandom('id Int32, val String') + LIMIT 100000" + + $CLICKHOUSE_CLIENT --query "SELECT * FROM test_02313 WHERE val LIKE concat('%', randomPrintableASCII(3), '%') FORMAT Null" + $CLICKHOUSE_CLIENT --query "SELECT * FROM test_02313 WHERE val LIKE concat('%', randomPrintableASCII(3), '%') FORMAT Null" + $CLICKHOUSE_CLIENT --query "SELECT * FROM test_02313 WHERE val LIKE concat('%', randomPrintableASCII(3), '%') FORMAT Null" + $CLICKHOUSE_CLIENT --query "SELECT * FROM test_02313 WHERE val LIKE concat('%', randomPrintableASCII(3), '%') FORMAT Null" + $CLICKHOUSE_CLIENT --query "SELECT * FROM test_02313 WHERE val LIKE concat('%', randomPrintableASCII(3), '%') FORMAT Null" + $CLICKHOUSE_CLIENT --query "SELECT * FROM test_02313 WHERE val LIKE concat('%', randomPrintableASCII(3), '%') FORMAT Null" + $CLICKHOUSE_CLIENT --query "SELECT * FROM test_02313 WHERE val LIKE concat('%', randomPrintableASCII(3), '%') FORMAT Null" + + $CLICKHOUSE_CLIENT --query "DROP TABLE test_02313" + done diff --git a/tests/queries/0_stateless/02319_lightweight_delete_on_merge_tree.sql b/tests/queries/0_stateless/02319_lightweight_delete_on_merge_tree.sql index 67513a1cdff..c7f8b67e740 100644 --- a/tests/queries/0_stateless/02319_lightweight_delete_on_merge_tree.sql +++ b/tests/queries/0_stateless/02319_lightweight_delete_on_merge_tree.sql @@ -103,7 +103,7 @@ ALTER TABLE t_proj ADD PROJECTION p_1 (SELECT avg(a), avg(b), count()); INSERT INTO t_proj SELECT number + 1, number + 1 FROM numbers(1000); -DELETE FROM t_proj WHERE a < 100; -- { serverError NOT_IMPLEMENTED } +DELETE FROM t_proj WHERE a < 100; -- { serverError BAD_ARGUMENTS } SELECT avg(a), avg(b), count() FROM t_proj; diff --git a/tests/queries/0_stateless/02343_aggregation_pipeline.sql b/tests/queries/0_stateless/02343_aggregation_pipeline.sql index 85e9fd1be1e..b018cf21f91 100644 --- a/tests/queries/0_stateless/02343_aggregation_pipeline.sql +++ b/tests/queries/0_stateless/02343_aggregation_pipeline.sql @@ -1,3 +1,6 @@ +-- produces different pipeline if enabled +set enable_memory_bound_merging_of_aggregation_results = 0; + set max_threads = 16; set prefer_localhost_replica = 1; set optimize_aggregation_in_order = 0; diff --git a/tests/queries/0_stateless/02344_show_caches.reference b/tests/queries/0_stateless/02344_show_caches.reference index 68882f63e1f..2ee4f902ba1 100644 --- a/tests/queries/0_stateless/02344_show_caches.reference +++ b/tests/queries/0_stateless/02344_show_caches.reference @@ -5,6 +5,7 @@ s3_cache_3 s3_cache_multi s3_cache_4 s3_cache_5 +s3_cache_small_segment_size local_cache s3_cache_6 s3_cache_small diff --git a/tests/queries/0_stateless/02361_fsync_profile_events.sh b/tests/queries/0_stateless/02361_fsync_profile_events.sh index d54da9a49e5..85f82c59c71 100755 --- a/tests/queries/0_stateless/02361_fsync_profile_events.sh +++ b/tests/queries/0_stateless/02361_fsync_profile_events.sh @@ -45,13 +45,16 @@ for i in {1..100}; do # Non retriable errors if [[ $FileSync -ne 7 ]]; then + echo "FileSync: $FileSync != 11" >&2 exit 2 fi # Check that all files was synced if [[ $FileSync -ne $FileOpen ]]; then + echo "$FileSync (FileSync) != $FileOpen (FileOpen)" >&2 exit 3 fi if [[ $DirectorySync -ne 2 ]]; then + echo "DirectorySync: $DirectorySync != 2" >&2 exit 4 fi diff --git a/tests/queries/0_stateless/02416_json_tuple_to_array_schema_inference.reference b/tests/queries/0_stateless/02416_json_tuple_to_array_schema_inference.reference new file mode 100644 index 00000000000..f44e051e6bf --- /dev/null +++ b/tests/queries/0_stateless/02416_json_tuple_to_array_schema_inference.reference @@ -0,0 +1,3 @@ +x Array(Array(Nullable(Int64))) +x Tuple(Array(Array(Nullable(Int64))), Nullable(Int64)) +x Map(String, Array(Nullable(Int64))) diff --git a/tests/queries/0_stateless/02416_json_tuple_to_array_schema_inference.sql b/tests/queries/0_stateless/02416_json_tuple_to_array_schema_inference.sql new file mode 100644 index 00000000000..ae3142f0b21 --- /dev/null +++ b/tests/queries/0_stateless/02416_json_tuple_to_array_schema_inference.sql @@ -0,0 +1,4 @@ +desc format(JSONEachRow, '{"x" : [[42, null], [24, null]]}'); +desc format(JSONEachRow, '{"x" : [[[42, null], []], 24]}'); +desc format(JSONEachRow, '{"x" : {"key" : [42, null]}}'); + diff --git a/tests/queries/0_stateless/02447_drop_database_replica.reference b/tests/queries/0_stateless/02447_drop_database_replica.reference new file mode 100644 index 00000000000..1d65fe66c6e --- /dev/null +++ b/tests/queries/0_stateless/02447_drop_database_replica.reference @@ -0,0 +1,15 @@ +t +1 +2 +2 +2 +2 +2 +2 +rdb_default 1 1 +rdb_default 1 2 +2 +2 +2 +t +rdb_default_3 1 1 diff --git a/tests/queries/0_stateless/02447_drop_database_replica.sh b/tests/queries/0_stateless/02447_drop_database_replica.sh new file mode 100755 index 00000000000..4bfd6243c2e --- /dev/null +++ b/tests/queries/0_stateless/02447_drop_database_replica.sh @@ -0,0 +1,47 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +db="rdb_$CLICKHOUSE_DATABASE" + +$CLICKHOUSE_CLIENT -q "system flush logs" +$CLICKHOUSE_CLIENT --allow_experimental_database_replicated=1 -q "create database $db engine=Replicated('/test/$CLICKHOUSE_DATABASE/rdb', 's1', 'r1')" +$CLICKHOUSE_CLIENT --distributed_ddl_output_mode=none -q "create table $db.t as system.query_log" # Suppress style check: current_database=$CLICKHOUSE_DATABASE +$CLICKHOUSE_CLIENT -q "show tables from $db" + +$CLICKHOUSE_CLIENT -q "system drop database replica 's1|r1' from table t" 2>&1| grep -Fac "SYNTAX_ERROR" +$CLICKHOUSE_CLIENT -q "system drop database replica 's1|r1' from database $db" 2>&1| grep -Fac "There is a local database" +$CLICKHOUSE_CLIENT -q "system drop database replica 's1|r1' from zkpath '/test/$CLICKHOUSE_DATABASE/rdb'" 2>&1| grep -Fac "There is a local database" +$CLICKHOUSE_CLIENT -q "system drop database replica 's1|r1' from zkpath '/test/$CLICKHOUSE_DATABASE/rdb/'" 2>&1| grep -Fac "There is a local database" + +$CLICKHOUSE_CLIENT -q "system drop database replica 's1|r1' from zkpath '/test/$CLICKHOUSE_DATABASE/'" 2>&1| grep -Fac "does not look like a path of Replicated database" +$CLICKHOUSE_CLIENT -q "system drop database replica 's2|r1' from zkpath '/test/$CLICKHOUSE_DATABASE/rdb'" 2>&1| grep -Fac "does not exist" +$CLICKHOUSE_CLIENT -q "system drop database replica 's2/r1' from zkpath '/test/$CLICKHOUSE_DATABASE/rdb'" 2>&1| grep -Fac "Invalid replica name" + +db2="${db}_2" +$CLICKHOUSE_CLIENT --allow_experimental_database_replicated=1 -q "create database $db2 engine=Replicated('/test/$CLICKHOUSE_DATABASE/rdb', 's1', 'r2')" +$CLICKHOUSE_CLIENT -q "system sync database replica $db" +$CLICKHOUSE_CLIENT -q "select cluster, shard_num, replica_num from system.clusters where cluster='$db' order by shard_num, replica_num" +$CLICKHOUSE_CLIENT -q "system drop database replica 's1|r1' from database $db2" 2>&1| grep -Fac "is active, cannot drop it" + +$CLICKHOUSE_CLIENT -q "detach database $db2" +$CLICKHOUSE_CLIENT -q "system drop database replica 's1|r2' from database $db" +$CLICKHOUSE_CLIENT -q "attach database $db2" 2>/dev/null +$CLICKHOUSE_CLIENT --distributed_ddl_output_mode=none -q "create table $db2.t2 as system.query_log" 2>&1| grep -Fac "Database is in readonly mode" # Suppress style check: current_database=$CLICKHOUSE_DATABASE + +$CLICKHOUSE_CLIENT -q "detach database $db" +$CLICKHOUSE_CLIENT -q "system drop database replica 's1|r1' from zkpath '/test/$CLICKHOUSE_DATABASE/rdb/'" +$CLICKHOUSE_CLIENT -q "attach database $db" 2>/dev/null +$CLICKHOUSE_CLIENT --distributed_ddl_output_mode=none -q "create table $db.t2 as system.query_log" 2>&1| grep -Fac "Database is in readonly mode" # Suppress style check: current_database=$CLICKHOUSE_DATABASE +$CLICKHOUSE_CLIENT -q "show tables from $db" + +db3="${db}_3" +$CLICKHOUSE_CLIENT --allow_experimental_database_replicated=1 -q "create database $db3 engine=Replicated('/test/$CLICKHOUSE_DATABASE/rdb', 's1', 'r1')" +$CLICKHOUSE_CLIENT -q "system sync database replica $db3" +$CLICKHOUSE_CLIENT -q "select cluster, shard_num, replica_num from system.clusters where cluster='$db3'" + +$CLICKHOUSE_CLIENT -q "drop database $db" +$CLICKHOUSE_CLIENT -q "drop database $db2" +$CLICKHOUSE_CLIENT -q "drop database $db3" diff --git a/tests/queries/0_stateless/02448_clone_replica_lost_part.sql b/tests/queries/0_stateless/02448_clone_replica_lost_part.sql index 67a329ee1f0..4befe952a14 100644 --- a/tests/queries/0_stateless/02448_clone_replica_lost_part.sql +++ b/tests/queries/0_stateless/02448_clone_replica_lost_part.sql @@ -120,9 +120,14 @@ insert into rmt1 values (100); insert into rmt2 values (100); insert into rmt1 values (200); insert into rmt2 values (200); + +-- otherwise we can get exception on drop part +system sync replica rmt2; +system sync replica rmt1; + detach table rmt1; --- create a gap in block numbers buy dropping part +-- create a gap in block numbers by dropping part insert into rmt2 values (300); alter table rmt2 drop part 'all_19_19_0'; -- remove 200 insert into rmt2 values (400); diff --git a/tests/queries/0_stateless/02460_prewhere_row_level_policy.reference b/tests/queries/0_stateless/02460_prewhere_row_level_policy.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02460_prewhere_row_level_policy.sql b/tests/queries/0_stateless/02460_prewhere_row_level_policy.sql new file mode 100644 index 00000000000..fc98fa773b4 --- /dev/null +++ b/tests/queries/0_stateless/02460_prewhere_row_level_policy.sql @@ -0,0 +1,9 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/40956#issuecomment-1262096612 +DROP TABLE IF EXISTS row_level_policy_prewhere; +DROP ROW POLICY IF EXISTS row_level_policy_prewhere_policy0 ON row_level_policy_prewhere; + +CREATE TABLE row_level_policy_prewhere (x Int16, y String) ENGINE = MergeTree ORDER BY x; +INSERT INTO row_level_policy_prewhere(y, x) VALUES ('A',1), ('B',2), ('C',3); +CREATE ROW POLICY row_level_policy_prewhere_policy0 ON row_level_policy_prewhere FOR SELECT USING x >= 0 TO default; +SELECT * FROM row_level_policy_prewhere PREWHERE y = 'foo'; +DROP TABLE row_level_policy_prewhere; diff --git a/tests/queries/0_stateless/02461_prewhere_row_level_policy_lightweight_delete.reference.j2 b/tests/queries/0_stateless/02461_prewhere_row_level_policy_lightweight_delete.reference.j2 new file mode 100644 index 00000000000..ca7b300e00e --- /dev/null +++ b/tests/queries/0_stateless/02461_prewhere_row_level_policy_lightweight_delete.reference.j2 @@ -0,0 +1,29 @@ +{% for index_granularity in [999, 1000, 1001, 9999, 10000, 10001] -%} +-- { echoOn } + +SELECT count() FROM url_na_log; +130000 +SELECT rows FROM system.parts WHERE database = currentDatabase() AND table = 'url_na_log' AND active; +130000 +SELECT count() FROM url_na_log PREWHERE DateVisit >= '2022-08-10' AND DateVisit <= '2022-08-20' WHERE SiteId = 209 SETTINGS max_block_size = 200000, max_threads = 1; +110000 +-- Delete more than a half rows (60K) from the range 2022-08-10 .. 2022-08-20 +-- There should be 50K rows remaining in this range +DELETE FROM url_na_log WHERE SiteId = 209 AND DateVisit >= '2022-08-13' AND DateVisit <= '2022-08-18'; +SELECT count() FROM url_na_log; +70000 +SELECT rows FROM system.parts WHERE database = currentDatabase() AND table = 'url_na_log' AND active; +130000 +SELECT count() FROM url_na_log PREWHERE DateVisit >= '2022-08-10' AND DateVisit <= '2022-08-20' WHERE SiteId = 209 SETTINGS max_block_size = 200000, max_threads = 1; +50000 +-- Hide more than a half of remaining rows (30K) from the range 2022-08-10 .. 2022-08-20 using row policy +-- Now the this range should have 20K rows left +CREATE ROW POLICY url_na_log_policy0 ON url_na_log FOR SELECT USING DateVisit < '2022-08-11' or DateVisit > '2022-08-19' TO default; +SELECT count() FROM url_na_log; +40000 +SELECT rows FROM system.parts WHERE database = currentDatabase() AND table = 'url_na_log' AND active; +130000 +SELECT count() FROM url_na_log PREWHERE DateVisit >= '2022-08-10' AND DateVisit <= '2022-08-20' WHERE SiteId = 209 SETTINGS max_block_size = 200000, max_threads = 1; +20000 +DROP ROW POLICY url_na_log_policy0 ON url_na_log; +{% endfor -%} diff --git a/tests/queries/0_stateless/02461_prewhere_row_level_policy_lightweight_delete.sql.j2 b/tests/queries/0_stateless/02461_prewhere_row_level_policy_lightweight_delete.sql.j2 new file mode 100644 index 00000000000..e1ec348e6ac --- /dev/null +++ b/tests/queries/0_stateless/02461_prewhere_row_level_policy_lightweight_delete.sql.j2 @@ -0,0 +1,59 @@ +{% for index_granularity in [999, 1000, 1001, 9999, 10000, 10001] %} + +DROP TABLE IF EXISTS url_na_log; + +CREATE TABLE url_na_log(SiteId UInt32, DateVisit Date, PRIMARY KEY (SiteId)) +ENGINE = MergeTree() +ORDER BY (SiteId, DateVisit) +SETTINGS index_granularity = {{ index_granularity }}, min_bytes_for_wide_part = 0; + +-- Insert some data to have 110K rows in the range 2022-08-10 .. 2022-08-20 and some more rows before and after that range +insert into url_na_log select 209, '2022-08-09' from numbers(10000); +insert into url_na_log select 209, '2022-08-10' from numbers(10000); +insert into url_na_log select 209, '2022-08-11' from numbers(10000); +insert into url_na_log select 209, '2022-08-12' from numbers(10000); +insert into url_na_log select 209, '2022-08-13' from numbers(10000); +insert into url_na_log select 209, '2022-08-14' from numbers(10000); +insert into url_na_log select 209, '2022-08-15' from numbers(10000); +insert into url_na_log select 209, '2022-08-16' from numbers(10000); +insert into url_na_log select 209, '2022-08-17' from numbers(10000); +insert into url_na_log select 209, '2022-08-18' from numbers(10000); +insert into url_na_log select 209, '2022-08-19' from numbers(10000); +insert into url_na_log select 209, '2022-08-20' from numbers(10000); +insert into url_na_log select 209, '2022-08-21' from numbers(10000); + + +SET mutations_sync=2; +SET allow_experimental_lightweight_delete=1; + +OPTIMIZE TABLE url_na_log FINAL; + +-- { echoOn } + +SELECT count() FROM url_na_log; +SELECT rows FROM system.parts WHERE database = currentDatabase() AND table = 'url_na_log' AND active; +SELECT count() FROM url_na_log PREWHERE DateVisit >= '2022-08-10' AND DateVisit <= '2022-08-20' WHERE SiteId = 209 SETTINGS max_block_size = 200000, max_threads = 1; + + +-- Delete more than a half rows (60K) from the range 2022-08-10 .. 2022-08-20 +-- There should be 50K rows remaining in this range +DELETE FROM url_na_log WHERE SiteId = 209 AND DateVisit >= '2022-08-13' AND DateVisit <= '2022-08-18'; + +SELECT count() FROM url_na_log; +SELECT rows FROM system.parts WHERE database = currentDatabase() AND table = 'url_na_log' AND active; +SELECT count() FROM url_na_log PREWHERE DateVisit >= '2022-08-10' AND DateVisit <= '2022-08-20' WHERE SiteId = 209 SETTINGS max_block_size = 200000, max_threads = 1; + + +-- Hide more than a half of remaining rows (30K) from the range 2022-08-10 .. 2022-08-20 using row policy +-- Now the this range should have 20K rows left +CREATE ROW POLICY url_na_log_policy0 ON url_na_log FOR SELECT USING DateVisit < '2022-08-11' or DateVisit > '2022-08-19' TO default; + +SELECT count() FROM url_na_log; +SELECT rows FROM system.parts WHERE database = currentDatabase() AND table = 'url_na_log' AND active; +SELECT count() FROM url_na_log PREWHERE DateVisit >= '2022-08-10' AND DateVisit <= '2022-08-20' WHERE SiteId = 209 SETTINGS max_block_size = 200000, max_threads = 1; + +DROP ROW POLICY url_na_log_policy0 ON url_na_log; + +-- { echoOff } + +{% endfor %} diff --git a/tests/queries/0_stateless/02473_multistep_prewhere.python b/tests/queries/0_stateless/02473_multistep_prewhere.python new file mode 100644 index 00000000000..a12656f636b --- /dev/null +++ b/tests/queries/0_stateless/02473_multistep_prewhere.python @@ -0,0 +1,148 @@ +#!/usr/bin/env python3 +import requests +import os +import sys + +CURDIR = os.path.dirname(os.path.realpath(__file__)) +sys.path.insert(0, os.path.join(CURDIR, 'helpers')) + +from pure_http_client import ClickHouseClient + + +class Tester: + ''' + - Creates test table + - Deletes the specified range of rows + - Masks another range using row-level policy + - Runs some read queries and checks that the results + ''' + def __init__(self, session, url, index_granularity, total_rows): + self.session = session + self.url = url + self.index_granularity = index_granularity + self.total_rows = total_rows + self.reported_errors = set() + self.repro_queries = [] + + def report_error(self): + print('Repro steps:', '\n\n\t'.join(self.repro_queries)) + exit(1) + + def query(self, query_text, include_in_repro_steps = True, expected_data = None): + self.repro_queries.append(query_text) + resp = self.session.post(self.url, data=query_text) + if resp.status_code != 200: + # Group similar errors + error = resp.text[0:40] + if error not in self.reported_errors: + self.reported_errors.add(error) + print('Code:', resp.status_code) + print('Result:', resp.text) + self.report_error() + + result = resp.text + # Check that the result is as expected + if ((not expected_data is None) and (int(result) != len(expected_data))): + print('Expected {} rows, got {}'.format(len(expected_data), result)) + print('Expected data:' + str(expected_data)) + self.report_error() + + if not include_in_repro_steps: + self.repro_queries.pop() + + + def check_data(self, all_data, delete_range_start, delete_range_end, row_level_policy_range_start, row_level_policy_range_end): + all_data_after_delete = all_data[ + ~((all_data.a == 0) & + (all_data.b > delete_range_start) & + (all_data.b <= delete_range_end))] + all_data_after_row_policy = all_data_after_delete[ + (all_data_after_delete.b <= row_level_policy_range_start) | + (all_data_after_delete.b > row_level_policy_range_end)] + + for to_select in ['count()', 'sum(d)']: # Test reading with and without column with default value + self.query('SELECT {} FROM tab_02473;'.format(to_select), False, all_data_after_row_policy) + + delta = 10 + for query_range_start in [0, delta]: + for query_range_end in [self.total_rows - delta]: #, self.total_rows]: + expected = all_data_after_row_policy[ + (all_data_after_row_policy.a == 0) & + (all_data_after_row_policy.b > query_range_start) & + (all_data_after_row_policy.b <= query_range_end)] + self.query('SELECT {} from tab_02473 PREWHERE b > {} AND b <= {} WHERE a == 0;'.format( + to_select, query_range_start, query_range_end), False, expected) + + expected = all_data_after_row_policy[ + (all_data_after_row_policy.a == 0) & + (all_data_after_row_policy.c > query_range_start) & + (all_data_after_row_policy.c <= query_range_end)] + self.query('SELECT {} from tab_02473 PREWHERE c > {} AND c <= {} WHERE a == 0;'.format( + to_select, query_range_start, query_range_end), False, expected) + + expected = all_data_after_row_policy[ + (all_data_after_row_policy.a == 0) & + ((all_data_after_row_policy.c <= query_range_start) | + (all_data_after_row_policy.c > query_range_end))] + self.query('SELECT {} from tab_02473 PREWHERE c <= {} OR c > {} WHERE a == 0;'.format( + to_select, query_range_start, query_range_end), False, expected) + + + def run_test(self, delete_range_start, delete_range_end, row_level_policy_range_start, row_level_policy_range_end): + self.repro_queries = [] + + self.query(''' + CREATE TABLE tab_02473 (a Int8, b Int32, c Int32, PRIMARY KEY (a)) + ENGINE = MergeTree() ORDER BY (a, b) + SETTINGS min_bytes_for_wide_part = 0, index_granularity = {};'''.format(self.index_granularity)) + + self.query('INSERT INTO tab_02473 select 0, number+1, number+1 FROM numbers({});'.format(self.total_rows)) + + client = ClickHouseClient() + all_data = client.query_return_df("SELECT a, b, c, 1 as d FROM tab_02473 FORMAT TabSeparatedWithNames;") + + self.query('OPTIMIZE TABLE tab_02473 FINAL SETTINGS mutations_sync=2;') + + # After all data has been written add a column with default value + self.query('ALTER TABLE tab_02473 ADD COLUMN d Int64 DEFAULT 1;') + + self.check_data(all_data, -100, -100, -100, -100) + + self.query('DELETE FROM tab_02473 WHERE a = 0 AND b > {} AND b <= {};'.format( + delete_range_start, delete_range_end)) + + self.check_data(all_data, delete_range_start, delete_range_end, -100, -100) + + self.query('CREATE ROW POLICY policy_tab_02473 ON tab_02473 FOR SELECT USING b <= {} OR b > {} TO default;'.format( + row_level_policy_range_start, row_level_policy_range_end)) + + self.check_data(all_data, delete_range_start, delete_range_end, row_level_policy_range_start, row_level_policy_range_end) + + self.query('DROP POLICY policy_tab_02473 ON tab_02473;') + + self.query('DROP TABLE tab_02473;') + + + +def main(): + # Set mutations to synchronous mode and enable lightweight DELETE's + url = os.environ['CLICKHOUSE_URL'] + '&mutations_sync=2&allow_experimental_lightweight_delete=1&max_threads=1' + + default_index_granularity = 10; + total_rows = 8 * default_index_granularity + step = default_index_granularity + session = requests.Session() + for index_granularity in [default_index_granularity-1, default_index_granularity]: # [default_index_granularity-1, default_index_granularity+1, default_index_granularity]: + tester = Tester(session, url, index_granularity, total_rows) + # Test combinations of ranges of various size masked by lightweight DELETES + # along with ranges of various size masked by row-level policies + for delete_range_start in range(0, total_rows, 3 * step): + for delete_range_end in range(delete_range_start + 3 * step, total_rows, 2 * step): + for row_level_policy_range_start in range(0, total_rows, 3 * step): + for row_level_policy_range_end in range(row_level_policy_range_start + 3 * step, total_rows, 2 * step): + tester.run_test(delete_range_start, delete_range_end, row_level_policy_range_start, row_level_policy_range_end) + + +if __name__ == "__main__": + main() + diff --git a/tests/queries/0_stateless/02473_multistep_prewhere.reference b/tests/queries/0_stateless/02473_multistep_prewhere.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02473_multistep_prewhere.sh b/tests/queries/0_stateless/02473_multistep_prewhere.sh new file mode 100755 index 00000000000..bbb411b0a32 --- /dev/null +++ b/tests/queries/0_stateless/02473_multistep_prewhere.sh @@ -0,0 +1,11 @@ +#!/usr/bin/env bash +# Tags: long + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +# We should have correct env vars from shell_config.sh to run this test + +python3 "$CURDIR"/02473_multistep_prewhere.python + diff --git a/tests/queries/0_stateless/02475_bson_each_row_format.reference b/tests/queries/0_stateless/02475_bson_each_row_format.reference index b4a841ed3eb..5922167dc97 100644 --- a/tests/queries/0_stateless/02475_bson_each_row_format.reference +++ b/tests/queries/0_stateless/02475_bson_each_row_format.reference @@ -233,11 +233,11 @@ Schema inference x Nullable(Int32) x Nullable(Int64) x Nullable(Int64) -FAIL +OK x Array(Nullable(Int32)) x Array(Nullable(Int64)) x Array(Nullable(Int64)) -FAIL +OK OK OK OK diff --git a/tests/queries/0_stateless/02475_bson_each_row_format.sh b/tests/queries/0_stateless/02475_bson_each_row_format.sh index 6de33b38183..b4efea7e326 100755 --- a/tests/queries/0_stateless/02475_bson_each_row_format.sh +++ b/tests/queries/0_stateless/02475_bson_each_row_format.sh @@ -164,7 +164,7 @@ $CLICKHOUSE_CLIENT -q "insert into function file(02475_data.bsonEachRow) select $CLICKHOUSE_CLIENT -q "desc file(02475_data.bsonEachRow)" $CLICKHOUSE_CLIENT -q "insert into function file(02475_data.bsonEachRow) select number::Int64 as x from numbers(2)" $CLICKHOUSE_CLIENT -q "desc file(02475_data.bsonEachRow)" -$CLICKHOUSE_CLIENT -q "insert into function file(02475_data.bsonEachRow) select number::UInt64 as x from numbers(2)" +$CLICKHOUSE_CLIENT -q "insert into function file(02475_data.bsonEachRow) select toString(number) as x from numbers(2)" $CLICKHOUSE_CLIENT -q "desc file(02475_data.bsonEachRow)" 2>&1 | grep -q -F "TYPE_MISMATCH" && echo "OK" || echo "FAIL" $CLICKHOUSE_CLIENT -q "insert into function file(02475_data.bsonEachRow) select [number::Bool] as x from numbers(2) settings engine_file_truncate_on_insert=1" @@ -174,7 +174,7 @@ $CLICKHOUSE_CLIENT -q "insert into function file(02475_data.bsonEachRow) select $CLICKHOUSE_CLIENT -q "desc file(02475_data.bsonEachRow)" $CLICKHOUSE_CLIENT -q "insert into function file(02475_data.bsonEachRow) select [number::Int64] as x from numbers(2)" $CLICKHOUSE_CLIENT -q "desc file(02475_data.bsonEachRow)" -$CLICKHOUSE_CLIENT -q "insert into function file(02475_data.bsonEachRow) select [number::UInt64] as x from numbers(2)" +$CLICKHOUSE_CLIENT -q "insert into function file(02475_data.bsonEachRow) select [toString(number)] as x from numbers(2)" $CLICKHOUSE_CLIENT -q "desc file(02475_data.bsonEachRow)" 2>&1 | grep -q -F "TYPE_MISMATCH" && echo "OK" || echo "FAIL" $CLICKHOUSE_CLIENT -q "insert into function file(02475_data.bsonEachRow) select [] as x from numbers(2) settings engine_file_truncate_on_insert=1" diff --git a/tests/queries/0_stateless/02476_fix_lambda_parsing.reference b/tests/queries/0_stateless/02476_fix_lambda_parsing.reference index 18cb46ce23c..de508c7a0d3 100644 --- a/tests/queries/0_stateless/02476_fix_lambda_parsing.reference +++ b/tests/queries/0_stateless/02476_fix_lambda_parsing.reference @@ -1,8 +1,4 @@ SELECT f(x, y -> z) -SELECT f(x, y -> z) -SELECT f((x, y) -> z) SELECT f((x, y) -> z) SELECT f((x, y) -> z) SELECT f(x, (x, y) -> z) -SELECT f(x, (x, y) -> z) -CREATE FUNCTION func AS x -> plus(x, (x -> ('2' + 2)) -> plus(1), 1) diff --git a/tests/queries/0_stateless/02476_fix_lambda_parsing.sh b/tests/queries/0_stateless/02476_fix_lambda_parsing.sh index 641ef59a170..d47588c100c 100755 --- a/tests/queries/0_stateless/02476_fix_lambda_parsing.sh +++ b/tests/queries/0_stateless/02476_fix_lambda_parsing.sh @@ -8,14 +8,7 @@ set -e format="$CLICKHOUSE_FORMAT" -echo "SELECT f(x, tuple(y) -> z)" | $format echo "SELECT f(x, (y) -> z)" | $format - echo "SELECT f(x, y -> z)" | $format echo "SELECT f((x, y) -> z)" | $format -echo "SELECT f(tuple(x, y) -> z)" | $format - echo "SELECT f(x, (x, y) -> z)" | $format -echo "SELECT f(x, tuple(x, y) -> z)" | $format - -echo "CREATE FUNCTION func AS x -> plus(x, (x -> ('2' + 2)) -> plus(1), 1)" | $format | $format diff --git a/tests/queries/0_stateless/02479_mysql_connect_to_self.sql b/tests/queries/0_stateless/02479_mysql_connect_to_self.sql index a7aa6a96c1d..7ff5b3e3382 100644 --- a/tests/queries/0_stateless/02479_mysql_connect_to_self.sql +++ b/tests/queries/0_stateless/02479_mysql_connect_to_self.sql @@ -1,3 +1,4 @@ -- Tags: no-fasttest SELECT * FROM mysql('127.0.0.1:9004', system, one, 'default', '') +SETTINGS send_logs_level = 'fatal'; -- failed connection tries are ok, if it succeeded after retry. diff --git a/tests/queries/0_stateless/02481_array_join_with_map.reference b/tests/queries/0_stateless/02481_array_join_with_map.reference new file mode 100644 index 00000000000..81fa77358db --- /dev/null +++ b/tests/queries/0_stateless/02481_array_join_with_map.reference @@ -0,0 +1,33 @@ +Hello 1 (1,'1') +Hello 2 (2,'2') +World 3 (3,'3') +World 4 (4,'4') +World 5 (5,'5') +Hello 1 (1,'1') +Hello 2 (2,'2') +World 3 (3,'3') +World 4 (4,'4') +World 5 (5,'5') +Goodbye 0 (0,'') +Hello (1,'1') +Hello (2,'2') +World (3,'3') +World (4,'4') +World (5,'5') +Hello (1,'1') +Hello (2,'2') +World (3,'3') +World (4,'4') +World (5,'5') +Goodbye (0,'') +Hello (1,'1') (1,'1') +Hello (2,'2') (0,'') +World (3,'3') (3,'3') +World (4,'4') (4,'4') +World (5,'5') (0,'') +Hello (1,'1') (1,'1') +Hello (2,'2') (0,'') +World (3,'3') (3,'3') +World (4,'4') (4,'4') +World (5,'5') (0,'') +Goodbye (0,'') (0,'') diff --git a/tests/queries/0_stateless/02481_array_join_with_map.sql b/tests/queries/0_stateless/02481_array_join_with_map.sql new file mode 100644 index 00000000000..564b99e6e47 --- /dev/null +++ b/tests/queries/0_stateless/02481_array_join_with_map.sql @@ -0,0 +1,25 @@ +DROP TABLE IF EXISTS arrays_test; + +CREATE TABLE arrays_test +( + s String, + arr1 Array(UInt8), + map1 Map(UInt8, String), + map2 Map(UInt8, String) +) ENGINE = Memory; + +INSERT INTO arrays_test +VALUES ('Hello', [1,2], map(1, '1', 2, '2'), map(1, '1')), ('World', [3,4,5], map(3, '3', 4, '4', 5, '5'), map(3, '3', 4, '4')), ('Goodbye', [], map(), map()); + + +select s, arr1, map1 from arrays_test array join arr1, map1 settings enable_unaligned_array_join = 1; + +select s, arr1, map1 from arrays_test left array join arr1, map1 settings enable_unaligned_array_join = 1; + +select s, map1 from arrays_test array join map1; + +select s, map1 from arrays_test left array join map1; + +select s, map1, map2 from arrays_test array join map1, map2 settings enable_unaligned_array_join = 1; + +select s, map1, map2 from arrays_test left array join map1, map2 settings enable_unaligned_array_join = 1; diff --git a/tests/queries/0_stateless/02481_async_insert_dedup.python b/tests/queries/0_stateless/02481_async_insert_dedup.python index 404165941b9..16808aeb7a2 100644 --- a/tests/queries/0_stateless/02481_async_insert_dedup.python +++ b/tests/queries/0_stateless/02481_async_insert_dedup.python @@ -104,13 +104,17 @@ while (True): result = result.split() err = False errMsg = "" - for i in range(total_number): - expect = str(i+1) - real = result[i] - if expect != real: - err = True - errMsg = "error, {} is not equal to {} for {}-th elements, total rows is {}".format(real, expect, i, len(result)) - break + if len(result) != total_number: + err = True + errMsg = f"the size of result is {len(result)}. we expect {total_number}." + else: + for i in range(total_number): + expect = str(i+1) + real = result[i] + if expect != real: + err = True + errMsg = f"error, real value {real} is not equal to expect value {expect} for {i}-th elements" + break # retry several times to get stable results. if err and retry >= 5: print (errMsg, flush=True) diff --git a/tests/queries/0_stateless/02481_prewhere_filtered_rows_div_by_zero.reference b/tests/queries/0_stateless/02481_prewhere_filtered_rows_div_by_zero.reference new file mode 100644 index 00000000000..bb8ce4a8396 --- /dev/null +++ b/tests/queries/0_stateless/02481_prewhere_filtered_rows_div_by_zero.reference @@ -0,0 +1,76 @@ +-- { echoOn } +CREATE TABLE test_filter(a Int32, b Int32, c Int32) ENGINE = MergeTree() ORDER BY a SETTINGS index_granularity = 3; +INSERT INTO test_filter SELECT number, number+1, (number/2 + 1) % 2 FROM numbers(15); +SELECT _part_offset, intDiv(_part_offset, 3) as granule, * FROM test_filter ORDER BY _part_offset; +0 0 0 1 1 +1 0 1 2 1 +2 0 2 3 0 +3 1 3 4 0 +4 1 4 5 1 +5 1 5 6 1 +6 2 6 7 0 +7 2 7 8 0 +8 2 8 9 1 +9 3 9 10 1 +10 3 10 11 0 +11 3 11 12 0 +12 4 12 13 1 +13 4 13 14 1 +14 4 14 15 0 +-- Check that division by zero occurs on some rows +SELECT intDiv(b, c) FROM test_filter; -- { serverError ILLEGAL_DIVISION } +-- Filter out those rows using WHERE or PREWHERE +SELECT intDiv(b, c) FROM test_filter WHERE c != 0; +1 +2 +5 +6 +9 +10 +13 +14 +SELECT intDiv(b, c) FROM test_filter PREWHERE c != 0; +1 +2 +5 +6 +9 +10 +13 +14 +SELECT intDiv(b, c) FROM test_filter PREWHERE c != 0 WHERE b%2 != 0; +1 +5 +9 +13 +SET mutations_sync = 2, allow_experimental_lightweight_delete = 1; +-- Delete all rows where division by zero could occur +DELETE FROM test_filter WHERE c = 0; +-- Test that now division by zero doesn't occur without explicit condition +SELECT intDiv(b, c) FROM test_filter; +1 +2 +5 +6 +9 +10 +13 +14 +SELECT * FROM test_filter PREWHERE intDiv(b, c) > 0; +0 1 1 +1 2 1 +4 5 1 +5 6 1 +8 9 1 +9 10 1 +12 13 1 +13 14 1 +SELECT * FROM test_filter PREWHERE b != 0 WHERE intDiv(b, c) > 0; +0 1 1 +1 2 1 +4 5 1 +5 6 1 +8 9 1 +9 10 1 +12 13 1 +13 14 1 diff --git a/tests/queries/0_stateless/02481_prewhere_filtered_rows_div_by_zero.sql b/tests/queries/0_stateless/02481_prewhere_filtered_rows_div_by_zero.sql new file mode 100644 index 00000000000..94ffb1b8730 --- /dev/null +++ b/tests/queries/0_stateless/02481_prewhere_filtered_rows_div_by_zero.sql @@ -0,0 +1,28 @@ +DROP TABLE IF EXISTS test_filter; + +-- { echoOn } +CREATE TABLE test_filter(a Int32, b Int32, c Int32) ENGINE = MergeTree() ORDER BY a SETTINGS index_granularity = 3; + +INSERT INTO test_filter SELECT number, number+1, (number/2 + 1) % 2 FROM numbers(15); + +SELECT _part_offset, intDiv(_part_offset, 3) as granule, * FROM test_filter ORDER BY _part_offset; + +-- Check that division by zero occurs on some rows +SELECT intDiv(b, c) FROM test_filter; -- { serverError ILLEGAL_DIVISION } +-- Filter out those rows using WHERE or PREWHERE +SELECT intDiv(b, c) FROM test_filter WHERE c != 0; +SELECT intDiv(b, c) FROM test_filter PREWHERE c != 0; +SELECT intDiv(b, c) FROM test_filter PREWHERE c != 0 WHERE b%2 != 0; + + +SET mutations_sync = 2, allow_experimental_lightweight_delete = 1; + +-- Delete all rows where division by zero could occur +DELETE FROM test_filter WHERE c = 0; +-- Test that now division by zero doesn't occur without explicit condition +SELECT intDiv(b, c) FROM test_filter; +SELECT * FROM test_filter PREWHERE intDiv(b, c) > 0; +SELECT * FROM test_filter PREWHERE b != 0 WHERE intDiv(b, c) > 0; + +-- { echoOff } +DROP TABLE test_filter; diff --git a/tests/queries/0_stateless/02482_capnp_list_of_structs.reference b/tests/queries/0_stateless/02482_capnp_list_of_structs.reference new file mode 100644 index 00000000000..002eae70f97 --- /dev/null +++ b/tests/queries/0_stateless/02482_capnp_list_of_structs.reference @@ -0,0 +1,4 @@ +[(1,3),(2,4)] +[1,2] [3,4] +[1,2] [3,4] +[1,2] diff --git a/tests/queries/0_stateless/02482_capnp_list_of_structs.sh b/tests/queries/0_stateless/02482_capnp_list_of_structs.sh new file mode 100755 index 00000000000..091bd4dba2a --- /dev/null +++ b/tests/queries/0_stateless/02482_capnp_list_of_structs.sh @@ -0,0 +1,25 @@ +#!/usr/bin/env bash +# Tags: no-fasttest, no-parallel + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +USER_FILES_PATH=$(clickhouse-client --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') +touch $USER_FILES_PATH/data.capnp + +SCHEMADIR=$(clickhouse-client --query "select * from file('data.capnp', 'CapnProto', 'val1 char') settings format_schema='nonexist:Message'" 2>&1 | grep Exception | grep -oP "file \K.*(?=/nonexist.capnp)") +CLIENT_SCHEMADIR=$CURDIR/format_schemas +SERVER_SCHEMADIR=test_02482 +mkdir -p $SCHEMADIR/$SERVER_SCHEMADIR +cp -r $CLIENT_SCHEMADIR/02482_* $SCHEMADIR/$SERVER_SCHEMADIR/ + + +$CLICKHOUSE_CLIENT -q "insert into function file(02482_data.capnp, auto, 'nested Nested(x Int64, y Int64)') select [1,2], [3,4] settings format_schema='$SERVER_SCHEMADIR/02482_list_of_structs.capnp:Nested', engine_file_truncate_on_insert=1" +$CLICKHOUSE_CLIENT -q "select * from file(02482_data.capnp) settings format_schema='$SERVER_SCHEMADIR/02482_list_of_structs.capnp:Nested'" +$CLICKHOUSE_CLIENT -q "select * from file(02482_data.capnp, auto, 'nested Nested(x Int64, y Int64)') settings format_schema='$SERVER_SCHEMADIR/02482_list_of_structs.capnp:Nested'" +$CLICKHOUSE_CLIENT -q "select * from file(02482_data.capnp, auto, '\`nested.x\` Array(Int64), \`nested.y\` Array(Int64)') settings format_schema='$SERVER_SCHEMADIR/02482_list_of_structs.capnp:Nested'" +$CLICKHOUSE_CLIENT -q "select * from file(02482_data.capnp, auto, '\`nested.x\` Array(Int64)') settings format_schema='$SERVER_SCHEMADIR/02482_list_of_structs.capnp:Nested'" + +rm $USER_FILES_PATH/data.capnp +rm $USER_FILES_PATH/02482_data.capnp diff --git a/tests/queries/0_stateless/02482_execute_functions_before_sorting_bug.reference b/tests/queries/0_stateless/02482_execute_functions_before_sorting_bug.reference new file mode 100644 index 00000000000..a7eb5000556 --- /dev/null +++ b/tests/queries/0_stateless/02482_execute_functions_before_sorting_bug.reference @@ -0,0 +1,6 @@ +10000000001 +10000000002 +100000000010000000000 +100000000010000000000 +14 +15 diff --git a/tests/queries/0_stateless/02482_execute_functions_before_sorting_bug.sql b/tests/queries/0_stateless/02482_execute_functions_before_sorting_bug.sql new file mode 100644 index 00000000000..f1a17df5fe5 --- /dev/null +++ b/tests/queries/0_stateless/02482_execute_functions_before_sorting_bug.sql @@ -0,0 +1,9 @@ +set allow_suspicious_low_cardinality_types=1; +drop table if exists test; +create table test (x LowCardinality(Int32)) engine=Memory; +insert into test select 1; +insert into test select 2; +select x + 1e10 from test order by 1e10, x; +select x + (1e10 + 1e20) from test order by (1e10 + 1e20), x; +select x + (pow(2, 2) + pow(3, 2)) from test order by (pow(2,2) + pow(3, 2)), x; +drop table test; diff --git a/tests/queries/0_stateless/02482_value_block_assert.reference b/tests/queries/0_stateless/02482_value_block_assert.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02482_value_block_assert.sql b/tests/queries/0_stateless/02482_value_block_assert.sql new file mode 100644 index 00000000000..8684776f45f --- /dev/null +++ b/tests/queries/0_stateless/02482_value_block_assert.sql @@ -0,0 +1,24 @@ +SET allow_suspicious_low_cardinality_types=1; +CREATE TABLE range_key_dictionary_source_table__fuzz_323 +( + `key` UInt256, + `start_date` Int8, + `end_date` LowCardinality(UInt256), + `value` Tuple(UInt8, Array(DateTime), Decimal(9, 1), Array(Int16), Array(UInt8)), + `value_nullable` UUID +) +ENGINE = TinyLog; +INSERT INTO range_key_dictionary_source_table__fuzz_323 FORMAT Values +(1, toDate('2019-05-20'), toDate('2019-05-20'), 'First', 'First'); -- { clientError CANNOT_PARSE_INPUT_ASSERTION_FAILED } + + +CREATE TABLE complex_key_dictionary_source_table__fuzz_267 +( + `id` Decimal(38, 30), + `id_key` Array(UUID), + `value` Array(Nullable(DateTime64(3))), + `value_nullable` Nullable(UUID) +) +ENGINE = TinyLog; +INSERT INTO complex_key_dictionary_source_table__fuzz_267 FORMAT Values +(1, 'key', 'First', 'First'); -- { clientError CANNOT_READ_ARRAY_FROM_TEXT } diff --git a/tests/queries/0_stateless/02482_value_block_parsing.reference b/tests/queries/0_stateless/02482_value_block_parsing.reference new file mode 100644 index 00000000000..e8183f05f5d --- /dev/null +++ b/tests/queries/0_stateless/02482_value_block_parsing.reference @@ -0,0 +1,3 @@ +1 +1 +1 diff --git a/tests/queries/0_stateless/02482_value_block_parsing.sh b/tests/queries/0_stateless/02482_value_block_parsing.sh new file mode 100755 index 00000000000..b74d3f395f0 --- /dev/null +++ b/tests/queries/0_stateless/02482_value_block_parsing.sh @@ -0,0 +1,26 @@ +#!/usr/bin/env bash + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +${CLICKHOUSE_CLIENT} --query=" + CREATE TABLE simple_key_dictionary_source_table__fuzz_48 + ( + id Nullable(Int8), + value Array(Date), + value_nullable UUID + ) + ENGINE = TinyLog;" + +echo "INSERT INTO simple_key_dictionary_source_table__fuzz_48 FORMAT Values (null, [], '61f0c404-5cb3-11e7-907b-a6006ad3dba0') +( -- Bu " | ${CLICKHOUSE_CURL} -s "${CLICKHOUSE_URL}" --data-binary @- -v 2>&1 | grep -c 'X-ClickHouse-Exception-Code: 62' + + +echo "INSERT INTO simple_key_dictionary_source_table__fuzz_48 FORMAT Values + (!Invalid" | ${CLICKHOUSE_CURL} -s "${CLICKHOUSE_URL}" --data-binary @- -v 2>&1 | grep -c 'X-ClickHouse-Exception-Code: 62' + +echo "INSERT INTO simple_key_dictionary_source_table__fuzz_48 FORMAT Values (null, [], '61f0c404-5cb3-11e7-907b-a6006ad3dba0') + ,(null, [], '61f0c404-5cb3-11e7-907b-a6006ad3dba0'), + (!!!!!!3adas + )" | ${CLICKHOUSE_CURL} -s "${CLICKHOUSE_URL}" --data-binary @- -v 2>&1 | grep -c 'X-ClickHouse-Exception-Code: 62' diff --git a/tests/queries/0_stateless/02483_capnp_decimals.reference b/tests/queries/0_stateless/02483_capnp_decimals.reference new file mode 100644 index 00000000000..9885da95ce2 --- /dev/null +++ b/tests/queries/0_stateless/02483_capnp_decimals.reference @@ -0,0 +1,2 @@ +4242424242 42420 +4242.424242 42.42 diff --git a/tests/queries/0_stateless/02483_capnp_decimals.sh b/tests/queries/0_stateless/02483_capnp_decimals.sh new file mode 100755 index 00000000000..bdfa9dac3d5 --- /dev/null +++ b/tests/queries/0_stateless/02483_capnp_decimals.sh @@ -0,0 +1,24 @@ +#!/usr/bin/env bash +# Tags: no-fasttest, no-parallel + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +USER_FILES_PATH=$(clickhouse-client --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') +touch $USER_FILES_PATH/data.capnp + +SCHEMADIR=$(clickhouse-client --query "select * from file('data.capnp', 'CapnProto', 'val1 char') settings format_schema='nonexist:Message'" 2>&1 | grep Exception | grep -oP "file \K.*(?=/nonexist.capnp)") +CLIENT_SCHEMADIR=$CURDIR/format_schemas +SERVER_SCHEMADIR=test_02483 +mkdir -p $SCHEMADIR/$SERVER_SCHEMADIR +cp -r $CLIENT_SCHEMADIR/02483_* $SCHEMADIR/$SERVER_SCHEMADIR/ + + +$CLICKHOUSE_CLIENT -q "insert into function file(02483_data.capnp, auto, 'decimal32 Decimal32(3), decimal64 Decimal64(6)') select 42.42, 4242.424242 settings format_schema='$SERVER_SCHEMADIR/02483_decimals.capnp:Message', engine_file_truncate_on_insert=1" +$CLICKHOUSE_CLIENT -q "select * from file(02483_data.capnp) settings format_schema='$SERVER_SCHEMADIR/02483_decimals.capnp:Message'" +$CLICKHOUSE_CLIENT -q "select * from file(02483_data.capnp, auto, 'decimal64 Decimal64(6), decimal32 Decimal32(3)') settings format_schema='$SERVER_SCHEMADIR/02483_decimals.capnp:Message'" + +rm $USER_FILES_PATH/data.capnp +rm $USER_FILES_PATH/02483_data.capnp + diff --git a/tests/queries/0_stateless/02483_elapsed_time.reference b/tests/queries/0_stateless/02483_elapsed_time.reference new file mode 100644 index 00000000000..0fc2ca00f45 --- /dev/null +++ b/tests/queries/0_stateless/02483_elapsed_time.reference @@ -0,0 +1,14 @@ +1 1 +Greater (Ok) +Greater (Ok) +Row 1: +────── +type: QueryFinish +elapsed_more_than_one_second: 1 +end_minus_start_more_than_a_second: 1 + +Row 2: +────── +type: QueryFinish +elapsed_more_than_one_second: 1 +end_minus_start_more_than_a_second: 1 diff --git a/tests/queries/0_stateless/02483_elapsed_time.sh b/tests/queries/0_stateless/02483_elapsed_time.sh new file mode 100755 index 00000000000..724bd1d297b --- /dev/null +++ b/tests/queries/0_stateless/02483_elapsed_time.sh @@ -0,0 +1,69 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + + +# The following query fails during query interpretation so it throws an ExceptionBeforeStart +EXCEPTION_BEFORE_START_QUERY="WITH + ( + SELECT sleepEachRow(1) + ) AS sub + SELECT * + FROM + ( + SELECT * + FROM system.numbers + WHERE number IN (sub) + ) + SETTINGS enable_global_with_statement = 0" + + +# For this query the system.query_log needs to show ExceptionBeforeStart and elapsed seconds >= 1.0 +QUERY_ID="${CLICKHOUSE_DATABASE}_$(date +%s)_02883_q1" +${CLICKHOUSE_CLIENT} -m --query "$EXCEPTION_BEFORE_START_QUERY" --query_id="$QUERY_ID" >/dev/null 2>&1 + +${CLICKHOUSE_CLIENT} --query "SYSTEM FLUSH LOGS" +${CLICKHOUSE_CLIENT} --query "SELECT type == 'ExceptionBeforeStart' as expected_type, query_duration_ms >= 1000 as elapsed_more_than_one_second FROM system.query_log WHERE query_id='$QUERY_ID'" + +# Now we test with a query that will take 1+ seconds. The CLI should show that as part of the output format +OK_QUERY_JSON=" +WITH ( + SELECT sleepEachRow(1.0) + ) AS sub +SELECT * +FROM +( + SELECT * + FROM system.one +) +FORMAT JSON +SETTINGS enable_global_with_statement = 1" +QUERY_ID_2="${CLICKHOUSE_DATABASE}_$(date +%s)_02883_q2" +${CLICKHOUSE_CLIENT} --query "$OK_QUERY_JSON" --query_id="${QUERY_ID_2}" | grep elapsed | awk '{ if($2 >= 1.0) { print "Greater (Ok)" } else { print "Smaller than expected: " $2 } }' + +OK_QUERY_XML=" +WITH ( + SELECT sleepEachRow(1.0) + ) AS sub +SELECT * +FROM +( + SELECT * + FROM system.one +) +FORMAT XML +SETTINGS enable_global_with_statement = 1" +QUERY_ID_3="${CLICKHOUSE_DATABASE}_$(date +%s)_02883_q3" +${CLICKHOUSE_CLIENT} --query "$OK_QUERY_XML" --query_id="${QUERY_ID_3}" | grep elapsed | awk -F '[<>]' '{ if($3 >= 1.0) { print "Greater (Ok)" } else { print "Smaller than expected: " $3 } }' + +${CLICKHOUSE_CLIENT} --query "SYSTEM FLUSH LOGS" +${CLICKHOUSE_CLIENT} --query " + SELECT + type, + query_duration_ms >= 1000 as elapsed_more_than_one_second, + (toDecimal64(event_time_microseconds, 6) - toDecimal64(query_start_time_microseconds, 6)) > 1.0 AS end_minus_start_more_than_a_second + FROM system.query_log + WHERE type='QueryFinish' AND (query_id='$QUERY_ID_2' OR query_id='${QUERY_ID_3}') + FORMAT Vertical" diff --git a/tests/queries/0_stateless/02494_array_function_range.reference b/tests/queries/0_stateless/02494_array_function_range.reference new file mode 100644 index 00000000000..133d78d4a37 --- /dev/null +++ b/tests/queries/0_stateless/02494_array_function_range.reference @@ -0,0 +1,19 @@ +1 +1 +1 +[-1,0] +[-1] +[] +[5,4,3,2,1] +[5,4,3,2,1,0] +[1] +[-5,-4,-3,-2,-1,0,1,2,3,4] +[-4,-3,-2,-1,0,1,2,3,4,5] +[-3,-2,-1,0,1,2,3,4,5,6] +[-2,-1,0,1,2,3,4,5,6,7] +[-1,0,1,2,3,4,5,6,7,8] +[0,1,2,3,4,5,6,7,8,9] +[1,2,3,4,5,6,7,8,9,10] +[2,3,4,5,6,7,8,9,10,11] +[3,4,5,6,7,8,9,10,11,12] +[4,5,6,7,8,9,10,11,12,13] diff --git a/tests/queries/0_stateless/02494_array_function_range.sql b/tests/queries/0_stateless/02494_array_function_range.sql new file mode 100644 index 00000000000..bd945d55254 --- /dev/null +++ b/tests/queries/0_stateless/02494_array_function_range.sql @@ -0,0 +1,10 @@ +SELECT range(100) == range(0, 100) and range(0, 100) == range(0, 100, 1); +SELECT range(100) == range(cast('100', 'Int8')) and range(100) == range(cast('100', 'Int16')) and range(100) == range(cast('100', 'Int32')) and range(100) == range(cast('100', 'Int64')); +SELECT range(cast('100', 'Int8')) == range(0, cast('100', 'Int8')) and range(0, cast('100', 'Int8')) == range(0, cast('100', 'Int8'), 1) and range(0, cast('100', 'Int8')) == range(0, cast('100', 'Int8'), cast('1', 'Int8')); +SELECT range(-1, 1); +SELECT range(-1, 1, 2); +SELECT range(1,1); +SELECT range(5, 0, -1); +SELECT range(5, -1, -1); +SELECT range(1, 257, 65535); +SELECT range(cast(number - 5, 'Int8'), cast(number + 5, 'Int8')) from system.numbers limit 10; \ No newline at end of file diff --git a/tests/queries/0_stateless/02494_combinators_with_null_argument.reference b/tests/queries/0_stateless/02494_combinators_with_null_argument.reference new file mode 100644 index 00000000000..a891c305dde --- /dev/null +++ b/tests/queries/0_stateless/02494_combinators_with_null_argument.reference @@ -0,0 +1,18 @@ +-- { echoOn } + +select sumIf(1, NULL); +0 +select sumIf(NULL, 1); +\N +select sumIf(NULL, NULL); +\N +select countIf(1, NULL); +0 +select countIf(NULL, 1); +0 +select countIf(1, NULL); +0 +select sumArray([NULL, NULL]); +\N +select countArray([NULL, NULL]); +0 diff --git a/tests/queries/0_stateless/02494_combinators_with_null_argument.sql b/tests/queries/0_stateless/02494_combinators_with_null_argument.sql new file mode 100644 index 00000000000..e18fd741aab --- /dev/null +++ b/tests/queries/0_stateless/02494_combinators_with_null_argument.sql @@ -0,0 +1,11 @@ +-- { echoOn } + +select sumIf(1, NULL); +select sumIf(NULL, 1); +select sumIf(NULL, NULL); +select countIf(1, NULL); +select countIf(NULL, 1); +select countIf(1, NULL); +select sumArray([NULL, NULL]); +select countArray([NULL, NULL]); + diff --git a/tests/queries/0_stateless/02494_parser_string_binary_literal.reference b/tests/queries/0_stateless/02494_parser_string_binary_literal.reference new file mode 100644 index 00000000000..4fbadddcd21 --- /dev/null +++ b/tests/queries/0_stateless/02494_parser_string_binary_literal.reference @@ -0,0 +1,24 @@ + +1 +0 +10 +1 + +1 +0 +10 +1 + +1 +0 +10 +1 + +1 +0 +10 +1 +1 +1 +1 +1 diff --git a/tests/queries/0_stateless/02494_parser_string_binary_literal.sql b/tests/queries/0_stateless/02494_parser_string_binary_literal.sql new file mode 100644 index 00000000000..ebfe2a198b5 --- /dev/null +++ b/tests/queries/0_stateless/02494_parser_string_binary_literal.sql @@ -0,0 +1,29 @@ +select b''; +select b'0' == '\0'; +select b'00110000'; -- 0 +select b'0011000100110000'; -- 10 +select b'111001101011010110001011111010001010111110010101' == '测试'; + +select B''; +select B'0' == '\0'; +select B'00110000'; -- 0 +select B'0011000100110000'; -- 10 +select B'111001101011010110001011111010001010111110010101' == '测试'; + +select x''; +select x'0' == '\0'; +select x'30'; -- 0 +select x'3130'; -- 10 +select x'e6b58be8af95' == '测试'; + +select X''; +select X'0' == '\0'; +select X'30'; -- 0 +select X'3130'; -- 10 +select X'e6b58be8af95' == '测试'; + + +select x'' == b''; +select x'0' == b'0'; +select X'' == X''; +select X'0' == X'0'; diff --git a/tests/queries/0_stateless/02495_parser_string_binary_literal.reference b/tests/queries/0_stateless/02495_parser_string_binary_literal.reference new file mode 100644 index 00000000000..0f91f17602d --- /dev/null +++ b/tests/queries/0_stateless/02495_parser_string_binary_literal.reference @@ -0,0 +1,6 @@ +Syntax error +Syntax error +Syntax error +Syntax error +Syntax error +Syntax error diff --git a/tests/queries/0_stateless/02495_parser_string_binary_literal.sh b/tests/queries/0_stateless/02495_parser_string_binary_literal.sh new file mode 100755 index 00000000000..88998b06a01 --- /dev/null +++ b/tests/queries/0_stateless/02495_parser_string_binary_literal.sh @@ -0,0 +1,15 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + + +$CLICKHOUSE_CLIENT --query="SELECT b '0';" 2>&1 | grep -o 'Syntax error' +$CLICKHOUSE_CLIENT --query="SELECT x 'a'" 2>&1 | grep -o 'Syntax error' + +$CLICKHOUSE_CLIENT --query="SELECT b'3';" 2>&1 | grep -o 'Syntax error' +$CLICKHOUSE_CLIENT --query="SELECT x'k'" 2>&1 | grep -o 'Syntax error' + +$CLICKHOUSE_CLIENT --query="SELECT b'1" 2>&1 | grep -o 'Syntax error' +$CLICKHOUSE_CLIENT --query="SELECT x'a" 2>&1 | grep -o 'Syntax error' diff --git a/tests/queries/0_stateless/02495_sum_if_to_count_if_bug.reference b/tests/queries/0_stateless/02495_sum_if_to_count_if_bug.reference new file mode 100644 index 00000000000..4bda3243d2e --- /dev/null +++ b/tests/queries/0_stateless/02495_sum_if_to_count_if_bug.reference @@ -0,0 +1,3 @@ +1024 +0 +1024 diff --git a/tests/queries/0_stateless/02495_sum_if_to_count_if_bug.sql b/tests/queries/0_stateless/02495_sum_if_to_count_if_bug.sql new file mode 100644 index 00000000000..0791b374668 --- /dev/null +++ b/tests/queries/0_stateless/02495_sum_if_to_count_if_bug.sql @@ -0,0 +1,4 @@ +select sum(if((number % NULL) = 2, 0, 1)) FROM numbers(1024) settings optimize_rewrite_sum_if_to_count_if=0; +select sum(if((number % NULL) = 2, 0, 1)) FROM numbers(1024) settings optimize_rewrite_sum_if_to_count_if=1, allow_experimental_analyzer=0; +select sum(if((number % NULL) = 2, 0, 1)) FROM numbers(1024) settings optimize_rewrite_sum_if_to_count_if=1, allow_experimental_analyzer=1; + diff --git a/tests/queries/0_stateless/02497_if_transform_strings_to_enum.reference b/tests/queries/0_stateless/02497_if_transform_strings_to_enum.reference index 06863f1858b..c6265e195c4 100644 --- a/tests/queries/0_stateless/02497_if_transform_strings_to_enum.reference +++ b/tests/queries/0_stateless/02497_if_transform_strings_to_enum.reference @@ -19,7 +19,7 @@ QUERY id: 0 FUNCTION id: 2, function_name: toString, function_type: ordinary, result_type: String ARGUMENTS LIST id: 3, nodes: 1 - FUNCTION id: 4, function_name: transform, function_type: ordinary, result_type: String + FUNCTION id: 4, function_name: transform, function_type: ordinary, result_type: Enum8(\'censor.net\' = 1, \'google\' = 2, \'other\' = 3, \'yahoo\' = 4) ARGUMENTS LIST id: 5, nodes: 4 COLUMN id: 6, column_name: number, result_type: UInt64, source_id: 7 @@ -59,7 +59,7 @@ QUERY id: 0 FUNCTION id: 2, function_name: toString, function_type: ordinary, result_type: String ARGUMENTS LIST id: 3, nodes: 1 - FUNCTION id: 4, function_name: if, function_type: ordinary, result_type: String + FUNCTION id: 4, function_name: if, function_type: ordinary, result_type: Enum8(\'censor.net\' = 1, \'google\' = 2) ARGUMENTS LIST id: 5, nodes: 3 FUNCTION id: 6, function_name: greater, function_type: ordinary, result_type: UInt8 @@ -105,7 +105,7 @@ QUERY id: 0 FUNCTION id: 4, function_name: toString, function_type: ordinary, result_type: String ARGUMENTS LIST id: 5, nodes: 1 - FUNCTION id: 6, function_name: transform, function_type: ordinary, result_type: String + FUNCTION id: 6, function_name: transform, function_type: ordinary, result_type: Enum8(\'censor.net\' = 1, \'google\' = 2, \'other\' = 3, \'yahoo\' = 4) ARGUMENTS LIST id: 7, nodes: 4 COLUMN id: 8, column_name: number, result_type: UInt64, source_id: 9 @@ -149,7 +149,7 @@ QUERY id: 0 FUNCTION id: 4, function_name: toString, function_type: ordinary, result_type: String ARGUMENTS LIST id: 5, nodes: 1 - FUNCTION id: 6, function_name: if, function_type: ordinary, result_type: String + FUNCTION id: 6, function_name: if, function_type: ordinary, result_type: Enum8(\'censor.net\' = 1, \'google\' = 2) ARGUMENTS LIST id: 7, nodes: 3 FUNCTION id: 8, function_name: greater, function_type: ordinary, result_type: UInt8 @@ -204,7 +204,7 @@ QUERY id: 0 FUNCTION id: 5, function_name: toString, function_type: ordinary, result_type: String ARGUMENTS LIST id: 6, nodes: 1 - FUNCTION id: 7, function_name: if, function_type: ordinary, result_type: String + FUNCTION id: 7, function_name: if, function_type: ordinary, result_type: Enum8(\'censor.net\' = 1, \'google\' = 2) ARGUMENTS LIST id: 8, nodes: 3 FUNCTION id: 9, function_name: greater, function_type: ordinary, result_type: UInt8 @@ -258,7 +258,7 @@ QUERY id: 0 FUNCTION id: 5, function_name: toString, function_type: ordinary, result_type: String ARGUMENTS LIST id: 6, nodes: 1 - FUNCTION id: 7, function_name: transform, function_type: ordinary, result_type: String + FUNCTION id: 7, function_name: transform, function_type: ordinary, result_type: Enum8(\'censor.net\' = 1, \'google\' = 2, \'other\' = 3, \'yahoo\' = 4) ARGUMENTS LIST id: 8, nodes: 4 COLUMN id: 9, column_name: number, result_type: UInt64, source_id: 10 @@ -301,7 +301,7 @@ QUERY id: 0 FUNCTION id: 2, function_name: toString, function_type: ordinary, result_type: String ARGUMENTS LIST id: 3, nodes: 1 - FUNCTION id: 4, function_name: if, function_type: ordinary, result_type: String + FUNCTION id: 4, function_name: if, function_type: ordinary, result_type: Enum8(\'censor.net\' = 1, \'google\' = 2) ARGUMENTS LIST id: 5, nodes: 3 FUNCTION id: 6, function_name: greater, function_type: ordinary, result_type: UInt8 @@ -322,7 +322,7 @@ QUERY id: 0 FUNCTION id: 2, function_name: toString, function_type: ordinary, result_type: String ARGUMENTS LIST id: 3, nodes: 1 - FUNCTION id: 4, function_name: if, function_type: ordinary, result_type: String + FUNCTION id: 4, function_name: if, function_type: ordinary, result_type: Enum8(\'censor.net\' = 1, \'google\' = 2) ARGUMENTS LIST id: 5, nodes: 3 FUNCTION id: 6, function_name: greater, function_type: ordinary, result_type: UInt8 @@ -368,7 +368,7 @@ QUERY id: 0 FUNCTION id: 2, function_name: toString, function_type: ordinary, result_type: String ARGUMENTS LIST id: 3, nodes: 1 - FUNCTION id: 4, function_name: transform, function_type: ordinary, result_type: String + FUNCTION id: 4, function_name: transform, function_type: ordinary, result_type: Enum8(\'censor.net\' = 1, \'google\' = 2, \'other\' = 3, \'yahoo\' = 4) ARGUMENTS LIST id: 5, nodes: 4 COLUMN id: 6, column_name: number, result_type: UInt64, source_id: 7 @@ -386,7 +386,7 @@ QUERY id: 0 FUNCTION id: 2, function_name: toString, function_type: ordinary, result_type: String ARGUMENTS LIST id: 3, nodes: 1 - FUNCTION id: 4, function_name: transform, function_type: ordinary, result_type: String + FUNCTION id: 4, function_name: transform, function_type: ordinary, result_type: Enum8(\'censor.net\' = 1, \'google\' = 2, \'other\' = 3, \'yahoo\' = 4) ARGUMENTS LIST id: 5, nodes: 4 COLUMN id: 6, column_name: number, result_type: UInt64, source_id: 7 diff --git a/tests/queries/0_stateless/02497_schema_inference_nulls.reference b/tests/queries/0_stateless/02497_schema_inference_nulls.reference new file mode 100644 index 00000000000..42dea6294e2 --- /dev/null +++ b/tests/queries/0_stateless/02497_schema_inference_nulls.reference @@ -0,0 +1,48 @@ +JSONEachRow +x Array(Nullable(Int64)) +x Array(Nullable(Int64)) +x Array(Nullable(Int64)) +x Array(Nullable(Int64)) +x Tuple(Nullable(String), Nullable(Int64)) +x Tuple(Nullable(String), Nullable(Int64)) +x Map(String, Nullable(Int64)) +x Map(String, Nullable(Int64)) +x Array(Nullable(Int64)) +x Array(Array(Nullable(Int64))) +x Array(Map(String, Nullable(Int64))) +x Array(Array(Nullable(String))) +x Array(Int64) +x Array(Nullable(Int64)) +x Array(Int64) +x Array(Nullable(Int64)) +JSONCompactEachRow +c1 Array(Nullable(Int64)) +c1 Array(Nullable(Int64)) +c1 Array(Nullable(Int64)) +c1 Array(Nullable(Int64)) +c1 Tuple(Nullable(String), Nullable(Int64)) +c1 Tuple(Nullable(String), Nullable(Int64)) +c1 Map(String, Nullable(Int64)) +c1 Map(String, Nullable(Int64)) +c1 Array(Nullable(Int64)) +c1 Array(Array(Nullable(Int64))) +c1 Array(Map(String, Nullable(Int64))) +c1 Array(Array(Nullable(String))) +c1 Array(Int64) +c1 Array(Nullable(Int64)) +c1 Array(Int64) +c1 Array(Nullable(Int64)) +CSV +c1 Array(Nullable(Int64)) +c1 Array(Nullable(Int64)) +c1 Array(Nullable(Int64)) +c1 Array(Nullable(Int64)) +c1 Map(String, Nullable(Int64)) +c1 Map(String, Nullable(Int64)) +c1 Array(Array(Nullable(Int64))) +c1 Array(Map(String, Nullable(Int64))) +c1 Array(Array(Nullable(String))) +c1 Array(Int64) +c1 Array(Nullable(Int64)) +c1 Array(Int64) +c1 Array(Nullable(Int64)) diff --git a/tests/queries/0_stateless/02497_schema_inference_nulls.sql b/tests/queries/0_stateless/02497_schema_inference_nulls.sql new file mode 100644 index 00000000000..60cdaedcbd9 --- /dev/null +++ b/tests/queries/0_stateless/02497_schema_inference_nulls.sql @@ -0,0 +1,63 @@ +select 'JSONEachRow'; +set schema_inference_make_columns_nullable=1; +desc format(JSONEachRow, '{"x" : 1234}, {"x" : "String"}') settings input_format_json_try_infer_numbers_from_strings=1; -- { serverError TYPE_MISMATCH } +desc format(JSONEachRow, '{"x" : [null, 1]}'); +desc format(JSONEachRow, '{"x" : [null, 1]}, {"x" : []}'); +desc format(JSONEachRow, '{"x" : [null, 1]}, {"x" : [null]}'); +desc format(JSONEachRow, '{"x" : [null, 1]}, {"x" : [1, null]}'); +desc format(JSONEachRow, '{"x" : [null, 1]}, {"x" : ["abc", 1]}'); +desc format(JSONEachRow, '{"x" : [null, 1]}, {"x" : ["abc", null]}'); +desc format(JSONEachRow, '{"x" : {}}, {"x" : {"a" : 1}}'); +desc format(JSONEachRow, '{"x" : {"a" : null}}, {"x" : {"b" : 1}}'); +desc format(JSONEachRow, '{"x" : null}, {"x" : [1, 2]}'); +desc format(JSONEachRow, '{"x" : [[], [null], [1, 2, 3]]}'); +desc format(JSONEachRow, '{"x" : [{"a" : null}, {"b" : 1}]}'); +desc format(JSONEachRow, '{"x" : [["2020-01-01", null, "1234"], ["abcd"]]}'); + +set schema_inference_make_columns_nullable=0; +desc format(JSONEachRow, '{"x" : [1, 2]}'); +desc format(JSONEachRow, '{"x" : [null, 1]}'); +desc format(JSONEachRow, '{"x" : [1, 2]}, {"x" : [3]}'); +desc format(JSONEachRow, '{"x" : [1, 2]}, {"x" : [null]}'); + +select 'JSONCompactEachRow'; +set schema_inference_make_columns_nullable=1; +desc format(JSONCompactEachRow, '[1234], ["String"]') settings input_format_json_try_infer_numbers_from_strings=1; -- { serverError TYPE_MISMATCH } +desc format(JSONCompactEachRow, '[[null, 1]]'); +desc format(JSONCompactEachRow, '[[null, 1]], [[]]'); +desc format(JSONCompactEachRow, '[[null, 1]], [[null]]'); +desc format(JSONCompactEachRow, '[[null, 1]], [[1, null]]'); +desc format(JSONCompactEachRow, '[[null, 1]], [["abc", 1]]'); +desc format(JSONCompactEachRow, '[[null, 1]], [["abc", null]]'); +desc format(JSONCompactEachRow, '[{}], [{"a" : 1}]'); +desc format(JSONCompactEachRow, '[{"a" : null}], [{"b" : 1}]'); +desc format(JSONCompactEachRow, '[null], [[1, 2]]'); +desc format(JSONCompactEachRow, '[[[], [null], [1, 2, 3]]]'); +desc format(JSONCompactEachRow, '[[{"a" : null}, {"b" : 1}]]'); +desc format(JSONCompactEachRow, '[[["2020-01-01", null, "1234"], ["abcd"]]]'); + +set schema_inference_make_columns_nullable=0; +desc format(JSONCompactEachRow, '[[1, 2]]'); +desc format(JSONCompactEachRow, '[[null, 1]]'); +desc format(JSONCompactEachRow, '[[1, 2]], [[3]]'); +desc format(JSONCompactEachRow, '[[1, 2]], [[null]]'); + + +select 'CSV'; +set schema_inference_make_columns_nullable=1; +desc format(CSV, '"[null, 1]"'); +desc format(CSV, '"[null, 1]"\n"[]"'); +desc format(CSV, '"[null, 1]"\n"[null]"'); +desc format(CSV, '"[null, 1]"\n"[1, null]"'); +desc format(CSV, '"{}"\n"{\'a\' : 1}"'); +desc format(CSV, '"{\'a\' : null}"\n"{\'b\' : 1}"'); +desc format(CSV, '"[[], [null], [1, 2, 3]]"'); +desc format(CSV, '"[{\'a\' : null}, {\'b\' : 1}]"'); +desc format(CSV, '"[[\'2020-01-01\', null, \'1234\'], [\'abcd\']]"'); + +set schema_inference_make_columns_nullable=0; +desc format(CSV, '"[1,2]"'); +desc format(CSV, '"[NULL, 1]"'); +desc format(CSV, '"[1, 2]"\n"[3]"'); +desc format(CSV, '"[1, 2]"\n"[null]"'); + diff --git a/tests/queries/0_stateless/02498_random_string_in_json_schema_inference.reference b/tests/queries/0_stateless/02498_random_string_in_json_schema_inference.reference new file mode 100644 index 00000000000..ab45d56f303 --- /dev/null +++ b/tests/queries/0_stateless/02498_random_string_in_json_schema_inference.reference @@ -0,0 +1 @@ +s Nullable(String) diff --git a/tests/queries/0_stateless/02498_random_string_in_json_schema_inference.sh b/tests/queries/0_stateless/02498_random_string_in_json_schema_inference.sh new file mode 100755 index 00000000000..f9319af4fcb --- /dev/null +++ b/tests/queries/0_stateless/02498_random_string_in_json_schema_inference.sh @@ -0,0 +1,7 @@ +#!/usr/bin/env bash + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +$CLICKHOUSE_LOCAL -q "select randomString(100) as s format JSONEachRow" | $CLICKHOUSE_LOCAL -q "desc test" --table='test' --input-format='JSONEachRow' diff --git a/tests/queries/0_stateless/02498_storage_join_key_positions.sql.j2 b/tests/queries/0_stateless/02498_storage_join_key_positions.sql.j2 index 697f37fd535..e2dad61a93e 100644 --- a/tests/queries/0_stateless/02498_storage_join_key_positions.sql.j2 +++ b/tests/queries/0_stateless/02498_storage_join_key_positions.sql.j2 @@ -33,24 +33,34 @@ SELECT * FROM (SELECT key3 AS c, key1 AS a, key2 AS b FROM t1) AS t1 ALL INNER J SELECT * FROM (SELECT key3 AS c, key1 AS a, key2 AS b FROM t1) AS t1 ALL INNER JOIN tj ON t1.a = tj.key1 AND t1.b = tj.key2 AND t1.c = tj.key3 ORDER BY t1.a; SELECT * FROM (SELECT key3 AS c, key1 AS a, key2 AS b FROM t1) AS t1 ALL INNER JOIN tj ON t1.c = tj.key3 AND t1.a = tj.key1 AND t1.b = tj.key2 ORDER BY t1.a; --- TODO (vdimir): uncomment after https://github.com/ClickHouse/ClickHouse/pull/44016 --- SELECT * FROM t1 ALL INNER JOIN tj ON t1.key1 = tj.key1 AND t1.key3 = tj.key3 AND t1.key2 = tj.key2 AND 1; -- { serverError INCOMPATIBLE_TYPE_OF_JOIN } --- SELECT * FROM t1 ALL INNER JOIN tj ON t1.key1 = tj.key1 AND t1.key3 = tj.key3 AND t1.key2 = tj.key2 AND 0; -- { serverError INCOMPATIBLE_TYPE_OF_JOIN } --- SELECT * FROM t1 ALL INNER JOIN tj ON t1.key1 = tj.key1 AND t1.key3 = tj.key3 AND t1.key2 = tj.key2 AND 1 == 1; -- { serverError INCOMPATIBLE_TYPE_OF_JOIN } --- SELECT * FROM t1 ALL INNER JOIN tj ON t1.key1 = tj.key1 AND t1.key3 = tj.key3 AND t1.key2 = tj.key2 AND 1 > 1; -- { serverError INCOMPATIBLE_TYPE_OF_JOIN } +{% set expected_error = 'INCOMPATIBLE_TYPE_OF_JOIN' if use_analyzer else 'INVALID_JOIN_ON_EXPRESSION' %} + +SELECT * FROM t1 ALL INNER JOIN tj ON t1.key1 = tj.key1 AND t1.key3 = tj.key3 AND t1.key2 = tj.key2 AND 1; -- { serverError {{ expected_error }} } +SELECT * FROM t1 ALL INNER JOIN tj ON t1.key1 = tj.key1 AND t1.key3 = tj.key3 AND t1.key2 = tj.key2 AND 0; -- { serverError {{ expected_error }} } +SELECT * FROM t1 ALL INNER JOIN tj ON t1.key1 = tj.key1 AND t1.key3 = tj.key3 AND t1.key2 = tj.key2 AND 1 > 1; -- { serverError {{ expected_error }} } SELECT '--- incompatible ---'; -SELECT * FROM t1 ALL INNER JOIN tj ON 1; -- { serverError INCOMPATIBLE_TYPE_OF_JOIN } -SELECT * FROM t1 ALL INNER JOIN tj ON 0; -- { serverError INCOMPATIBLE_TYPE_OF_JOIN } -SELECT * FROM t1 ALL INNER JOIN tj ON NULL; -- { serverError INCOMPATIBLE_TYPE_OF_JOIN } -SELECT * FROM t1 ALL INNER JOIN tj ON 1 == 1; -- { serverError INCOMPATIBLE_TYPE_OF_JOIN } -SELECT * FROM t1 ALL INNER JOIN tj ON 1 != 1; -- { serverError INCOMPATIBLE_TYPE_OF_JOIN } +SELECT * FROM t1 ALL INNER JOIN tj ON 1; -- { serverError {{ expected_error }} } +SELECT * FROM t1 ALL INNER JOIN tj ON 0; -- { serverError {{ expected_error }} } +SELECT * FROM t1 ALL INNER JOIN tj ON NULL; -- { serverError {{ expected_error }} } +SELECT * FROM t1 ALL INNER JOIN tj ON 1 != 1; -- { serverError {{ expected_error }} } + +{% set expected_error = 'INCOMPATIBLE_TYPE_OF_JOIN' if use_analyzer else 'AMBIGUOUS_COLUMN_NAME' %} + +-- Here is another error code because equality is handled differently in CollectJoinOnKeysVisitor. +-- We can change the error code, but it will become inconsistent for other cases +-- where we actually expect AMBIGUOUS_COLUMN_NAME instead of INVALID_JOIN_ON_EXPRESSION/INCOMPATIBLE_TYPE_OF_JOIN. +-- These checks are more reliable after switching to a new analyzer, they return INCOMPATIBLE_TYPE_OF_JOIN consistent with cases above +SELECT * FROM t1 ALL INNER JOIN tj ON t1.key1 = tj.key1 AND t1.key3 = tj.key3 AND t1.key2 = tj.key2 AND 1 == 1; -- { serverError {{ expected_error }} } +SELECT * FROM t1 ALL INNER JOIN tj ON 1 == 1; -- { serverError {{ expected_error }} } +SELECT * FROM t1 ALL INNER JOIN tj ON 1 == 2; -- { serverError {{ expected_error }} } + +{% set expected_error = 'UNKNOWN_IDENTIFIER' if use_analyzer else 'INCOMPATIBLE_TYPE_OF_JOIN' %} + +SELECT * FROM t1 ALL INNER JOIN tj USING (key1, key2, attr); -- { serverError {{ expected_error }} } +SELECT * FROM t1 ALL INNER JOIN tj USING (key1, key2, key3, attr); -- { serverError {{ expected_error }} } SELECT * FROM t1 ALL INNER JOIN tj USING (key2, key3); -- { serverError INCOMPATIBLE_TYPE_OF_JOIN } -SELECT * FROM t1 ALL INNER JOIN tj USING (key1, key2, attr) SETTINGS allow_experimental_analyzer = 0; -- { serverError INCOMPATIBLE_TYPE_OF_JOIN } -SELECT * FROM t1 ALL INNER JOIN tj USING (key1, key2, attr) SETTINGS allow_experimental_analyzer = 1; -- { serverError UNKNOWN_IDENTIFIER } -SELECT * FROM t1 ALL INNER JOIN tj USING (key1, key2, key3, attr) SETTINGS allow_experimental_analyzer = 0; -- { serverError INCOMPATIBLE_TYPE_OF_JOIN } -SELECT * FROM t1 ALL INNER JOIN tj USING (key1, key2, key3, attr) SETTINGS allow_experimental_analyzer = 1; -- { serverError UNKNOWN_IDENTIFIER } SELECT * FROM t1 ALL INNER JOIN tj ON t1.key1 = tj.attr; -- { serverError INCOMPATIBLE_TYPE_OF_JOIN } SELECT * FROM t1 ALL INNER JOIN tj ON t1.key1 = tj.key1; -- { serverError INCOMPATIBLE_TYPE_OF_JOIN } SELECT * FROM t1 ALL INNER JOIN tj ON t1.key1 = tj.key1 AND t1.key2 = tj.key2 AND t1.key3 = tj.attr; -- { serverError INCOMPATIBLE_TYPE_OF_JOIN } diff --git a/tests/queries/0_stateless/02499_analyzer_set_index.reference b/tests/queries/0_stateless/02499_analyzer_set_index.reference new file mode 100644 index 00000000000..6ed281c757a --- /dev/null +++ b/tests/queries/0_stateless/02499_analyzer_set_index.reference @@ -0,0 +1,2 @@ +1 +1 diff --git a/tests/queries/0_stateless/02499_analyzer_set_index.sql b/tests/queries/0_stateless/02499_analyzer_set_index.sql new file mode 100644 index 00000000000..f90ae61541f --- /dev/null +++ b/tests/queries/0_stateless/02499_analyzer_set_index.sql @@ -0,0 +1,18 @@ +SET allow_experimental_analyzer = 1; + +DROP TABLE IF EXISTS test_table; +CREATE TABLE test_table +( + id UInt64, + value String, + INDEX value_idx (value) TYPE set(1000) GRANULARITY 1 +) ENGINE=MergeTree ORDER BY id; + +INSERT INTO test_table SELECT number, toString(number) FROM numbers(10); + +SELECT count() FROM test_table WHERE value = '1' SETTINGS force_data_skipping_indices = 'value_idx'; + +SELECT count() FROM test_table AS t1 INNER JOIN (SELECT number AS id FROM numbers(10)) AS t2 ON t1.id = t2.id +WHERE t1.value = '1' SETTINGS force_data_skipping_indices = 'value_idx'; + +DROP TABLE test_table; diff --git a/tests/queries/0_stateless/02499_escaped_quote_schema_inference.reference b/tests/queries/0_stateless/02499_escaped_quote_schema_inference.reference new file mode 100644 index 00000000000..768063eb8f4 --- /dev/null +++ b/tests/queries/0_stateless/02499_escaped_quote_schema_inference.reference @@ -0,0 +1,2 @@ +c1 Array(Nullable(String)) +c1 Nullable(String) diff --git a/tests/queries/0_stateless/02499_escaped_quote_schema_inference.sql b/tests/queries/0_stateless/02499_escaped_quote_schema_inference.sql new file mode 100644 index 00000000000..34c523387ee --- /dev/null +++ b/tests/queries/0_stateless/02499_escaped_quote_schema_inference.sql @@ -0,0 +1,2 @@ +desc format(CSV, '"[\'abc\\\'\']"'); +desc format(Values, '(\'abc\\\'\')'); diff --git a/tests/queries/0_stateless/02499_read_json_objects_as_strings.reference b/tests/queries/0_stateless/02499_read_json_objects_as_strings.reference index 4042c1f4389..a78bd4be3e8 100644 --- a/tests/queries/0_stateless/02499_read_json_objects_as_strings.reference +++ b/tests/queries/0_stateless/02499_read_json_objects_as_strings.reference @@ -1,3 +1,6 @@ x Nullable(String) abc {"a" : 10, "b" : "abc"} +x Nullable(String) +{"a" : "b"} +{"a" : 1, "b" : [1,2,3]} diff --git a/tests/queries/0_stateless/02499_read_json_objects_as_strings.sql b/tests/queries/0_stateless/02499_read_json_objects_as_strings.sql index bb4c9e9da0f..12d709bdde1 100644 --- a/tests/queries/0_stateless/02499_read_json_objects_as_strings.sql +++ b/tests/queries/0_stateless/02499_read_json_objects_as_strings.sql @@ -2,3 +2,5 @@ set input_format_json_read_objects_as_strings=1; desc format(JSONEachRow, '{"x" : "abc"}, {"x" : {"a" : 10, "b" : "abc"}}'); select * from format(JSONEachRow, '{"x" : "abc"}, {"x" : {"a" : 10, "b" : "abc"}}'); +desc format(JSONEachRow, '{"x" : {"a" : "b"}}, {"x" : {"a" : 1, "b" : [1,2,3]}}'); +select * from format(JSONEachRow, '{"x" : {"a" : "b"}}, {"x" : {"a" : 1, "b" : [1,2,3]}}'); diff --git a/tests/queries/0_stateless/02500_bson_read_object_id.reference b/tests/queries/0_stateless/02500_bson_read_object_id.reference new file mode 100644 index 00000000000..860d79a30da --- /dev/null +++ b/tests/queries/0_stateless/02500_bson_read_object_id.reference @@ -0,0 +1,6 @@ +_id Nullable(FixedString(12)) +name Nullable(String) +email Nullable(String) +movie_id Nullable(FixedString(12)) +text Nullable(String) +date Nullable(DateTime64(6, \'UTC\')) diff --git a/tests/queries/0_stateless/02500_bson_read_object_id.sh b/tests/queries/0_stateless/02500_bson_read_object_id.sh new file mode 100755 index 00000000000..015b5402fa4 --- /dev/null +++ b/tests/queries/0_stateless/02500_bson_read_object_id.sh @@ -0,0 +1,10 @@ +#!/usr/bin/env bash +# Tags: no-fasttest + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_LOCAL -q "desc file('$CURDIR/data_bson/comments.bson')" +$CLICKHOUSE_LOCAL -q "select _id from file('$CURDIR/data_bson/comments.bson') format Null" + diff --git a/tests/queries/0_stateless/02500_numbers_inference.reference b/tests/queries/0_stateless/02500_numbers_inference.reference new file mode 100644 index 00000000000..bff7211f66a --- /dev/null +++ b/tests/queries/0_stateless/02500_numbers_inference.reference @@ -0,0 +1,20 @@ +x Nullable(Float64) +x Nullable(Float64) +x Nullable(Int64) +x Nullable(Int64) +x Nullable(Float64) +x Nullable(Float64) +x Array(Nullable(Float64)) +x Array(Nullable(Float64)) +x Array(Nullable(Float64)) +x Array(Nullable(Float64)) +c1 Nullable(Float64) +c1 Nullable(Float64) +c1 Nullable(Int64) +c1 Nullable(Int64) +c1 Nullable(Float64) +c1 Nullable(Float64) +c1 Array(Nullable(Float64)) +c1 Array(Nullable(Float64)) +c1 Array(Nullable(Float64)) +c1 Array(Nullable(Float64)) diff --git a/tests/queries/0_stateless/02500_numbers_inference.sh b/tests/queries/0_stateless/02500_numbers_inference.sh new file mode 100755 index 00000000000..ce9cd5bdc9f --- /dev/null +++ b/tests/queries/0_stateless/02500_numbers_inference.sh @@ -0,0 +1,28 @@ +#!/usr/bin/env bash + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +$CLICKHOUSE_LOCAL -q "desc format(JSONEachRow, '{\"x\" : 1.2}')"; +echo '{"x" : 1.2}' | $CLICKHOUSE_LOCAL --input-format='JSONEachRow' --table='test' -q "desc test"; +$CLICKHOUSE_LOCAL -q "desc format(JSONEachRow, '{\"x\" : 1}')"; +echo '{"x" : 1}' | $CLICKHOUSE_LOCAL --input-format='JSONEachRow' --table='test' -q "desc test"; +$CLICKHOUSE_LOCAL -q "desc format(JSONEachRow, '{\"x\" : 1e10}')"; +echo '{"x" : 1e10}' | $CLICKHOUSE_LOCAL --input-format='JSONEachRow' --table='test' -q "desc test"; +$CLICKHOUSE_LOCAL -q "desc format(JSONEachRow, '{\"x\" : [1, 42.42, 1, 1e10]}')"; +echo '{"x" : [1, 42.42, 1, 1e10]}' | $CLICKHOUSE_LOCAL --input-format='JSONEachRow' --table='test' -q "desc test"; +$CLICKHOUSE_LOCAL -q "desc format(JSONEachRow, '{\"x\" : [1, 42.42, false]}')"; +echo '{"x" : [1, 42.42, false]}' | $CLICKHOUSE_LOCAL --input-format='JSONEachRow' --table='test' -q "desc test"; + +$CLICKHOUSE_LOCAL -q "desc format(TSV, '1.2')"; +echo '1.2' | $CLICKHOUSE_LOCAL --input-format='TSV' --table='test' -q "desc test"; +$CLICKHOUSE_LOCAL -q "desc format(TSV, '1')"; +echo '1' | $CLICKHOUSE_LOCAL --input-format='TSV' --table='test' -q "desc test"; +$CLICKHOUSE_LOCAL -q "desc format(TSV, '1e10')"; +echo '1e10' | $CLICKHOUSE_LOCAL --input-format='TSV' --table='test' -q "desc test"; +$CLICKHOUSE_LOCAL -q "desc format(TSV, '[1, 42.42, 1, 1e10]')"; +echo '[1, 42.42, 1, 1e10]' | $CLICKHOUSE_LOCAL --input-format='TSV' --table='test' -q "desc test"; +$CLICKHOUSE_LOCAL -q "desc format(TSV, '[1, 42.42, false]')"; +echo '[1, 42.42, false]' | $CLICKHOUSE_LOCAL --input-format='TSV' --table='test' -q "desc test"; + diff --git a/tests/queries/0_stateless/02501_deep_recusion_schema_inference.reference b/tests/queries/0_stateless/02501_deep_recusion_schema_inference.reference new file mode 100644 index 00000000000..d86bac9de59 --- /dev/null +++ b/tests/queries/0_stateless/02501_deep_recusion_schema_inference.reference @@ -0,0 +1 @@ +OK diff --git a/tests/queries/0_stateless/02501_deep_recusion_schema_inference.sh b/tests/queries/0_stateless/02501_deep_recusion_schema_inference.sh new file mode 100755 index 00000000000..96142432557 --- /dev/null +++ b/tests/queries/0_stateless/02501_deep_recusion_schema_inference.sh @@ -0,0 +1,11 @@ +#!/usr/bin/env bash +# Tags: no-parallel + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +$CLICKHOUSE_LOCAL -q "select repeat('[', 10000) || '1,2,3' || repeat(']', 10000)" > 02501_deep_nested_array.tsv +$CLICKHOUSE_LOCAL -q "desc file(02501_deep_nested_array.tsv)" 2>&1 | grep -q -F "TOO_DEEP_RECURSION" && echo "OK" || echo "FAIL" +rm 02501_deep_nested_array.tsv + diff --git a/tests/queries/0_stateless/02501_limits_on_result_for_view.reference b/tests/queries/0_stateless/02501_limits_on_result_for_view.reference new file mode 100644 index 00000000000..0691f67b202 --- /dev/null +++ b/tests/queries/0_stateless/02501_limits_on_result_for_view.reference @@ -0,0 +1 @@ +52 diff --git a/tests/queries/0_stateless/02501_limits_on_result_for_view.sql b/tests/queries/0_stateless/02501_limits_on_result_for_view.sql new file mode 100644 index 00000000000..17e6024d973 --- /dev/null +++ b/tests/queries/0_stateless/02501_limits_on_result_for_view.sql @@ -0,0 +1,25 @@ +DROP TABLE IF EXISTS 02501_test; +DROP TABLE IF EXISTS 02501_dist; +DROP VIEW IF EXISTS 02501_view; + + +-- create local table +CREATE TABLE 02501_test(`a` UInt64) ENGINE = Memory; + +-- create dist table +CREATE TABLE 02501_dist(`a` UInt64) ENGINE = Distributed(test_cluster_two_shards, currentDatabase(), 02501_test); + +-- create view +CREATE VIEW 02501_view(`a` UInt64) AS SELECT a FROM 02501_dist; + +-- insert data +insert into 02501_test values(5),(6),(7),(8); + +-- test +SELECT * from 02501_view settings max_result_rows = 1; -- { serverError 396 } +SELECT sum(a) from 02501_view settings max_result_rows = 1; + + +DROP TABLE IF EXISTS 02501_test; +DROP TABLE IF EXISTS 02501_dist; +DROP VIEW IF EXISTS 02501_view; \ No newline at end of file diff --git a/tests/queries/0_stateless/02502_bad_values_schema_inference.reference b/tests/queries/0_stateless/02502_bad_values_schema_inference.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02502_bad_values_schema_inference.sql b/tests/queries/0_stateless/02502_bad_values_schema_inference.sql new file mode 100644 index 00000000000..4c796842c0d --- /dev/null +++ b/tests/queries/0_stateless/02502_bad_values_schema_inference.sql @@ -0,0 +1,2 @@ +desc format(Values, '(\'abc)'); -- { serverError CANNOT_PARSE_INPUT_ASSERTION_FAILED } + diff --git a/tests/queries/0_stateless/02503_bad_compatibility_setting.reference b/tests/queries/0_stateless/02503_bad_compatibility_setting.reference new file mode 100644 index 00000000000..5b7d2a449a0 --- /dev/null +++ b/tests/queries/0_stateless/02503_bad_compatibility_setting.reference @@ -0,0 +1 @@ + 0 diff --git a/tests/queries/0_stateless/02503_bad_compatibility_setting.sql b/tests/queries/0_stateless/02503_bad_compatibility_setting.sql new file mode 100644 index 00000000000..178c6a87531 --- /dev/null +++ b/tests/queries/0_stateless/02503_bad_compatibility_setting.sql @@ -0,0 +1,3 @@ +set compatibility='a.a'; -- { serverError BAD_ARGUMENTS } +select value, changed from system.settings where name = 'compatibility' + diff --git a/tests/queries/0_stateless/02503_cache_on_write_with_small_segment_size.reference b/tests/queries/0_stateless/02503_cache_on_write_with_small_segment_size.reference new file mode 100644 index 00000000000..1823b83ae28 --- /dev/null +++ b/tests/queries/0_stateless/02503_cache_on_write_with_small_segment_size.reference @@ -0,0 +1,3 @@ +0 +83 +100000 diff --git a/tests/queries/0_stateless/02503_cache_on_write_with_small_segment_size.sh b/tests/queries/0_stateless/02503_cache_on_write_with_small_segment_size.sh new file mode 100755 index 00000000000..918adc12de6 --- /dev/null +++ b/tests/queries/0_stateless/02503_cache_on_write_with_small_segment_size.sh @@ -0,0 +1,37 @@ +#!/usr/bin/env bash +# Tags: no-parallel, no-fasttest, no-s3-storage, no-random-settings + +CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL=none + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +function random { + cat /dev/urandom | LC_ALL=C tr -dc 'a-zA-Z' | fold -w ${1:-8} | head -n 1 +} + +${CLICKHOUSE_CLIENT} --multiline --multiquery -q " +drop table if exists ttt; +create table ttt (id Int32, value String) engine=MergeTree() order by tuple() settings storage_policy='s3_cache_small_segment_size', min_bytes_for_wide_part=0; +insert into ttt select number, toString(number) from numbers(100000) settings throw_on_error_from_cache_on_write_operations = 1; +" + +query_id=$(random 8) + +${CLICKHOUSE_CLIENT} --query_id "$query_id" -q " +select * from ttt format Null settings enable_filesystem_cache_log=1; +" +${CLICKHOUSE_CLIENT} --query_id "$query_id" -q " system flush logs" + +${CLICKHOUSE_CLIENT} -q " +select count() from system.filesystem_cache_log where query_id = '$query_id' AND read_type != 'READ_FROM_CACHE'; +" +${CLICKHOUSE_CLIENT} -q " +select count() from system.filesystem_cache_log where query_id = '$query_id' AND read_type == 'READ_FROM_CACHE'; +" + +${CLICKHOUSE_CLIENT} --multiline --multiquery -q " +select count() from ttt; +drop table ttt no delay; +" diff --git a/tests/queries/0_stateless/02503_in_lc_const_args_bug.reference b/tests/queries/0_stateless/02503_in_lc_const_args_bug.reference new file mode 100644 index 00000000000..8baef1b4abc --- /dev/null +++ b/tests/queries/0_stateless/02503_in_lc_const_args_bug.reference @@ -0,0 +1 @@ +abc diff --git a/tests/queries/0_stateless/02503_in_lc_const_args_bug.sql b/tests/queries/0_stateless/02503_in_lc_const_args_bug.sql new file mode 100644 index 00000000000..6756e381586 --- /dev/null +++ b/tests/queries/0_stateless/02503_in_lc_const_args_bug.sql @@ -0,0 +1,2 @@ +SELECT substr(toLowCardinality('abc'), 1 in 1) AS x GROUP BY x; + diff --git a/tests/queries/0_stateless/02503_insert_storage_snapshot.reference b/tests/queries/0_stateless/02503_insert_storage_snapshot.reference new file mode 100644 index 00000000000..4e07416f18a --- /dev/null +++ b/tests/queries/0_stateless/02503_insert_storage_snapshot.reference @@ -0,0 +1 @@ +all_1_1_0 1 1 diff --git a/tests/queries/0_stateless/02503_insert_storage_snapshot.sh b/tests/queries/0_stateless/02503_insert_storage_snapshot.sh new file mode 100755 index 00000000000..af2952839df --- /dev/null +++ b/tests/queries/0_stateless/02503_insert_storage_snapshot.sh @@ -0,0 +1,21 @@ +#!/usr/bin/env bash + +set -e + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS t_insert_storage_snapshot" +$CLICKHOUSE_CLIENT -q "CREATE TABLE t_insert_storage_snapshot (a UInt64) ENGINE = MergeTree ORDER BY a" +$CLICKHOUSE_CLIENT -q "INSERT INTO t_insert_storage_snapshot VALUES (1)" + +query_id="$CLICKHOUSE_DATABASE-$RANDOM" +$CLICKHOUSE_CLIENT --query_id $query_id -q "INSERT INTO t_insert_storage_snapshot SELECT sleep(1) FROM numbers(1000) SETTINGS max_block_size = 1" 2>/dev/null & + +$CLICKHOUSE_CLIENT -q "SELECT name, active, refcount FROM system.parts WHERE database = '$CLICKHOUSE_DATABASE' AND table = 't_insert_storage_snapshot'" +$CLICKHOUSE_CLIENT -q "KILL QUERY WHERE query_id = '$query_id' SYNC" >/dev/null + +wait + +$CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS t_insert_storage_snapshot" diff --git a/tests/queries/0_stateless/02503_join_switch_alias_fuzz.reference b/tests/queries/0_stateless/02503_join_switch_alias_fuzz.reference new file mode 100644 index 00000000000..af591cd7818 --- /dev/null +++ b/tests/queries/0_stateless/02503_join_switch_alias_fuzz.reference @@ -0,0 +1 @@ +1 \N 1 \N diff --git a/tests/queries/0_stateless/02503_join_switch_alias_fuzz.sql b/tests/queries/0_stateless/02503_join_switch_alias_fuzz.sql new file mode 100644 index 00000000000..28d64bf3881 --- /dev/null +++ b/tests/queries/0_stateless/02503_join_switch_alias_fuzz.sql @@ -0,0 +1,4 @@ +SELECT * FROM (SELECT 1 AS id, '' AS test) AS a +LEFT JOIN (SELECT test, 1 AS id, NULL AS test) AS b ON b.id = a.id +SETTINGS join_algorithm = 'auto', max_rows_in_join = 1, allow_experimental_analyzer = 1 +; diff --git a/tests/queries/0_stateless/02503_mysql_compat_utc_timestamp.reference b/tests/queries/0_stateless/02503_mysql_compat_utc_timestamp.reference new file mode 100644 index 00000000000..573541ac970 --- /dev/null +++ b/tests/queries/0_stateless/02503_mysql_compat_utc_timestamp.reference @@ -0,0 +1 @@ +0 diff --git a/tests/queries/0_stateless/02503_mysql_compat_utc_timestamp.sql b/tests/queries/0_stateless/02503_mysql_compat_utc_timestamp.sql new file mode 100644 index 00000000000..d6716f272c6 --- /dev/null +++ b/tests/queries/0_stateless/02503_mysql_compat_utc_timestamp.sql @@ -0,0 +1,2 @@ +-- PowerBI is doing this query. It should work at least somehow, not necessarily in the same way as in MySQL. +SELECT TIMEDIFF(NOW(), UTC_TIMESTAMP()) DIV 600; diff --git a/tests/queries/0_stateless/02504_bar_fractions.reference b/tests/queries/0_stateless/02504_bar_fractions.reference new file mode 100644 index 00000000000..2a7b46069df --- /dev/null +++ b/tests/queries/0_stateless/02504_bar_fractions.reference @@ -0,0 +1,20 @@ +0 +0.125 ▏ E2968F +0.25 ▎ ▏ E2968E E2968F +0.375 ▍ ▎ E2968D E2968E +0.5 ▌ ▍ E2968C E2968D +0.625 ▋ ▌ E2968B E2968C +0.75 ▊ ▋ E2968A E2968B +0.875 ▉ ▊ E29689 E2968A +1 █ ▉ E29688 E29689 +1.125 █▏ █ E29688E2968F E29688 +1.25 █▎ █▏ E29688E2968E E29688E2968F +1.375 █▍ █▎ E29688E2968D E29688E2968E +1.5 █▌ █▍ E29688E2968C E29688E2968D +1.625 █▋ █▌ E29688E2968B E29688E2968C +1.75 █▊ █▋ E29688E2968A E29688E2968B +1.875 █▉ █▊ E29688E29689 E29688E2968A +2 ██ █▉ E29688E29688 E29688E29689 +2.125 ██▏ ██ E29688E29688E2968F E29688E29688 +2.25 ██▎ ██▏ E29688E29688E2968E E29688E29688E2968F +2.375 ██▍ ██▎ E29688E29688E2968D E29688E29688E2968E diff --git a/tests/queries/0_stateless/02504_bar_fractions.sql b/tests/queries/0_stateless/02504_bar_fractions.sql new file mode 100644 index 00000000000..d182bced55e --- /dev/null +++ b/tests/queries/0_stateless/02504_bar_fractions.sql @@ -0,0 +1,7 @@ +SELECT + number / 8 AS width, + bar(width, 0, 3, 3) AS b, + bar(width - 0.001, 0, 3, 3) AS `b_minus`, + hex(b), + hex(b_minus) +FROM numbers(20); diff --git a/tests/queries/0_stateless/02504_explain_ast_insert.reference b/tests/queries/0_stateless/02504_explain_ast_insert.reference new file mode 100644 index 00000000000..1c149a0f2f4 --- /dev/null +++ b/tests/queries/0_stateless/02504_explain_ast_insert.reference @@ -0,0 +1,4 @@ +InsertQuery (children 1) + Identifier test +InsertQuery (children 1) + Identifier test diff --git a/tests/queries/0_stateless/02504_explain_ast_insert.sql b/tests/queries/0_stateless/02504_explain_ast_insert.sql new file mode 100644 index 00000000000..fc50feebaa4 --- /dev/null +++ b/tests/queries/0_stateless/02504_explain_ast_insert.sql @@ -0,0 +1,2 @@ +explain ast insert into test values balabala; +explain ast insert into test format TabSeparated balabala; \ No newline at end of file diff --git a/tests/queries/0_stateless/02504_parse_datetime_best_effort_calebeaires.reference b/tests/queries/0_stateless/02504_parse_datetime_best_effort_calebeaires.reference new file mode 100644 index 00000000000..f0fc06bc742 --- /dev/null +++ b/tests/queries/0_stateless/02504_parse_datetime_best_effort_calebeaires.reference @@ -0,0 +1 @@ +2148 1969-01-01 2105 2105 1969-01-01 10:42:00.000 diff --git a/tests/queries/0_stateless/02504_parse_datetime_best_effort_calebeaires.sql b/tests/queries/0_stateless/02504_parse_datetime_best_effort_calebeaires.sql new file mode 100644 index 00000000000..e551ec51524 --- /dev/null +++ b/tests/queries/0_stateless/02504_parse_datetime_best_effort_calebeaires.sql @@ -0,0 +1,5 @@ +CREATE TEMPORARY TABLE my_table (col_date Date, col_date32 Date32, col_datetime DateTime('UTC'), col_datetime32 DateTime32('UTC'), col_datetime64 DateTime64); +insert into `my_table` (`col_date`, `col_date32`, `col_datetime`, `col_datetime32`, `col_datetime64`) values (parseDateTime64BestEffort('1969-01-01'), '1969-01-01', parseDateTime64BestEffort('1969-01-01 10:42:00'), parseDateTime64BestEffort('1969-01-01 10:42:00'), parseDateTime64BestEffort('1969-01-01 10:42:00')); + +-- The values for Date32 and DateTime64 will be year 1969, while the values of Date, DateTime will contain a value affected by implementation-defined overflow and can be arbitrary. +SELECT toYear(col_date), col_date32, toYear(col_datetime), toYear(col_datetime32), col_datetime64 FROM my_table; diff --git a/tests/queries/0_stateless/02505_forbid_paths_in_datetime_timezone.reference b/tests/queries/0_stateless/02505_forbid_paths_in_datetime_timezone.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02505_forbid_paths_in_datetime_timezone.sql b/tests/queries/0_stateless/02505_forbid_paths_in_datetime_timezone.sql new file mode 100644 index 00000000000..b08abcb8a19 --- /dev/null +++ b/tests/queries/0_stateless/02505_forbid_paths_in_datetime_timezone.sql @@ -0,0 +1,6 @@ +select toDateTime(0, '/abc'); -- { serverError POCO_EXCEPTION } +select toDateTime(0, './abc'); -- { serverError POCO_EXCEPTION } +select toDateTime(0, '../abc'); -- { serverError POCO_EXCEPTION } +select toDateTime(0, '~/abc'); -- { serverError POCO_EXCEPTION } +select toDateTime(0, 'abc/../../cba'); -- { serverError POCO_EXCEPTION } + diff --git a/tests/queries/0_stateless/02506_date_time64_floating_point_negative_value.reference b/tests/queries/0_stateless/02506_date_time64_floating_point_negative_value.reference new file mode 100644 index 00000000000..b5d0547dc4a --- /dev/null +++ b/tests/queries/0_stateless/02506_date_time64_floating_point_negative_value.reference @@ -0,0 +1,4 @@ +-3600001 +-1 +1970-01-01 00:59:59.999 +1969-12-31 23:59:59.999 diff --git a/tests/queries/0_stateless/02506_date_time64_floating_point_negative_value.sql b/tests/queries/0_stateless/02506_date_time64_floating_point_negative_value.sql new file mode 100644 index 00000000000..dd663c7806e --- /dev/null +++ b/tests/queries/0_stateless/02506_date_time64_floating_point_negative_value.sql @@ -0,0 +1,4 @@ +select toUnixTimestamp64Milli(toDateTime64('1969-12-31 23:59:59.999', 3, 'Europe/Amsterdam')); +select toUnixTimestamp64Milli(toDateTime64('1969-12-31 23:59:59.999', 3, 'UTC')); +select fromUnixTimestamp64Milli(toInt64(-1), 'Europe/Amsterdam'); +select fromUnixTimestamp64Milli(toInt64(-1), 'UTC'); diff --git a/tests/queries/0_stateless/02507_to_unix_timestamp_overflow.reference b/tests/queries/0_stateless/02507_to_unix_timestamp_overflow.reference new file mode 100644 index 00000000000..f6e8cd50296 --- /dev/null +++ b/tests/queries/0_stateless/02507_to_unix_timestamp_overflow.reference @@ -0,0 +1 @@ +-1293882467 diff --git a/tests/queries/0_stateless/02507_to_unix_timestamp_overflow.sql b/tests/queries/0_stateless/02507_to_unix_timestamp_overflow.sql new file mode 100644 index 00000000000..42479f6dbec --- /dev/null +++ b/tests/queries/0_stateless/02507_to_unix_timestamp_overflow.sql @@ -0,0 +1,2 @@ +SELECT toUnixTimestamp(toDateTime64('1928-12-31 12:12:12.123', 3, 'UTC')); -- { serverError DECIMAL_OVERFLOW } +SELECT toInt64(toDateTime64('1928-12-31 12:12:12.123', 3, 'UTC')); diff --git a/tests/queries/0_stateless/02508_bad_graphite.reference b/tests/queries/0_stateless/02508_bad_graphite.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02508_bad_graphite.sql b/tests/queries/0_stateless/02508_bad_graphite.sql new file mode 100644 index 00000000000..a0ca9dcf690 --- /dev/null +++ b/tests/queries/0_stateless/02508_bad_graphite.sql @@ -0,0 +1,6 @@ +DROP TABLE IF EXISTS test_graphite; +create table test_graphite (key UInt32, Path String, Time DateTime('UTC'), Value UInt8, Version UInt32, col UInt64) + engine = GraphiteMergeTree('graphite_rollup') order by key; + +INSERT INTO test_graphite (key) VALUES (0); -- { serverError BAD_ARGUMENTS } +DROP TABLE test_graphite; diff --git a/tests/queries/0_stateless/02508_index_analysis_to_date_timezone.reference b/tests/queries/0_stateless/02508_index_analysis_to_date_timezone.reference new file mode 100644 index 00000000000..28c3774e947 --- /dev/null +++ b/tests/queries/0_stateless/02508_index_analysis_to_date_timezone.reference @@ -0,0 +1,11 @@ +4c36abda-8bd8-11eb-8204-005056aa8bf6 2021-03-24 01:04:27 1 +4c408902-8bd8-11eb-8204-005056aa8bf6 2021-03-24 01:04:27 1 +4c5bf20a-8bd8-11eb-8204-005056aa8bf6 2021-03-24 01:04:27 1 +4c61623a-8bd8-11eb-8204-005056aa8bf6 2021-03-24 01:04:27 1 +4c6efab2-8bd8-11eb-a952-005056aa8bf6 2021-03-24 01:04:27 1 +--- +4c36abda-8bd8-11eb-8204-005056aa8bf6 2021-03-24 01:04:27 1 +4c408902-8bd8-11eb-8204-005056aa8bf6 2021-03-24 01:04:27 1 +4c5bf20a-8bd8-11eb-8204-005056aa8bf6 2021-03-24 01:04:27 1 +4c61623a-8bd8-11eb-8204-005056aa8bf6 2021-03-24 01:04:27 1 +4c6efab2-8bd8-11eb-a952-005056aa8bf6 2021-03-24 01:04:27 1 diff --git a/tests/queries/0_stateless/02508_index_analysis_to_date_timezone.sql b/tests/queries/0_stateless/02508_index_analysis_to_date_timezone.sql new file mode 100644 index 00000000000..a7e4f6e7a0e --- /dev/null +++ b/tests/queries/0_stateless/02508_index_analysis_to_date_timezone.sql @@ -0,0 +1,10 @@ +DROP TABLE IF EXISTS table; +CREATE TABLE table (uid UUID, date DateTime('Asia/Kamchatka')) ENGINE = MergeTree ORDER BY date; + +INSERT INTO `table` VALUES ('4c36abda-8bd8-11eb-8204-005056aa8bf6', '2021-03-24 01:04:27'), ('4c408902-8bd8-11eb-8204-005056aa8bf6', '2021-03-24 01:04:27'), ('4c5bf20a-8bd8-11eb-8204-005056aa8bf6', '2021-03-24 01:04:27'), ('4c61623a-8bd8-11eb-8204-005056aa8bf6', '2021-03-24 01:04:27'), ('4c6efab2-8bd8-11eb-a952-005056aa8bf6', '2021-03-24 01:04:27'); + +SELECT uid, date, toDate(date) = toDate('2021-03-24') AS res FROM table WHERE res = 1 ORDER BY uid, date; +SELECT '---'; +SELECT uid, date, toDate(date) = toDate('2021-03-24') AS res FROM table WHERE toDate(date) = toDate('2021-03-24') ORDER BY uid, date; + +DROP TABLE table; diff --git a/tests/queries/0_stateless/02509_h3_arguments.reference b/tests/queries/0_stateless/02509_h3_arguments.reference new file mode 100644 index 00000000000..3054598cf87 --- /dev/null +++ b/tests/queries/0_stateless/02509_h3_arguments.reference @@ -0,0 +1,23 @@ +583031433791012863 +583031433791012863 +587531185127686143 +614047082918969343 +614047153853038591 +614048195802038271 +614054260742553599 +614054419345965055 +614552348391374847 +614553222213795839 +614554538768072703 +614555412668088319 +614790495813500927 +614047082918969343 +614047153853038591 +614048195802038271 +614054260742553599 +614054419345965055 +614552348391374847 +614553222213795839 +614554538768072703 +614555412668088319 +614790495813500927 diff --git a/tests/queries/0_stateless/02509_h3_arguments.sql b/tests/queries/0_stateless/02509_h3_arguments.sql new file mode 100644 index 00000000000..b5b8b9497f9 --- /dev/null +++ b/tests/queries/0_stateless/02509_h3_arguments.sql @@ -0,0 +1,13 @@ +-- Tags: no-fasttest + +select h3ToParent(641573946153969375, 1); +select h3ToParent(641573946153969375, arrayJoin([1,2])); + +DROP TABLE IF EXISTS data_table; + +CREATE TABLE data_table (id UInt64, longitude Float64, latitude Float64) ENGINE=MergeTree ORDER BY id; +INSERT INTO data_table SELECT number, number, number FROM numbers(10); +SELECT geoToH3(longitude, latitude, toUInt8(8)) AS h3Index FROM data_table ORDER BY 1; +SELECT geoToH3(longitude, latitude, toUInt8(longitude - longitude + 8)) AS h3Index FROM data_table ORDER BY 1; + +DROP TABLE data_table; diff --git a/tests/queries/0_stateless/02510_group_by_prewhere_null.reference b/tests/queries/0_stateless/02510_group_by_prewhere_null.reference new file mode 100644 index 00000000000..d2bd2bb4dc6 --- /dev/null +++ b/tests/queries/0_stateless/02510_group_by_prewhere_null.reference @@ -0,0 +1 @@ +1 6 diff --git a/tests/queries/0_stateless/02510_group_by_prewhere_null.sql b/tests/queries/0_stateless/02510_group_by_prewhere_null.sql new file mode 100644 index 00000000000..90a638d0b5c --- /dev/null +++ b/tests/queries/0_stateless/02510_group_by_prewhere_null.sql @@ -0,0 +1,25 @@ +DROP TABLE IF EXISTS table1; + +create table table1 ( + col1 Int32, + col2 Int32 +) +ENGINE = MergeTree +partition by tuple() +order by col1; + +INSERT INTO table1 VALUES (1, 2), (1, 4); + +with NULL as pid +select a.col1, sum(a.col2) as summ +from table1 a +prewhere (pid is null or a.col2 = pid) +group by a.col1; + +with 123 as pid +select a.col1, sum(a.col2) as summ +from table1 a +prewhere (pid is null or a.col2 = pid) +group by a.col1; + +DROP TABLE table1; diff --git a/tests/queries/0_stateless/02510_orc_map_indexes.reference b/tests/queries/0_stateless/02510_orc_map_indexes.reference new file mode 100644 index 00000000000..8aa75d1e92d --- /dev/null +++ b/tests/queries/0_stateless/02510_orc_map_indexes.reference @@ -0,0 +1,3 @@ +0 {0:0} Hello +1 {1:1} Hello +2 {2:2} Hello diff --git a/tests/queries/0_stateless/02510_orc_map_indexes.sh b/tests/queries/0_stateless/02510_orc_map_indexes.sh new file mode 100755 index 00000000000..77fe30f48de --- /dev/null +++ b/tests/queries/0_stateless/02510_orc_map_indexes.sh @@ -0,0 +1,9 @@ +#!/usr/bin/env bash +# Tags: no-parallel, no-fasttest + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_LOCAL -q "select number, map(number, number) as map, 'Hello' as str from numbers(3) format ORC" | $CLICKHOUSE_LOCAL --input-format=ORC -q "select * from table"; + diff --git a/tests/queries/0_stateless/02511_complex_literals_as_aggregate_function_parameters.reference b/tests/queries/0_stateless/02511_complex_literals_as_aggregate_function_parameters.reference new file mode 100644 index 00000000000..ab6afce21ef --- /dev/null +++ b/tests/queries/0_stateless/02511_complex_literals_as_aggregate_function_parameters.reference @@ -0,0 +1,4 @@ +AggregateFunction(1, sumMapFiltered([1, 2]), Array(UInt8), Array(UInt8)) +02010A00000000000000020A00000000000000 +02010A00000000000000020A00000000000000 +([1,2],[20,20]) diff --git a/tests/queries/0_stateless/02511_complex_literals_as_aggregate_function_parameters.sql b/tests/queries/0_stateless/02511_complex_literals_as_aggregate_function_parameters.sql new file mode 100644 index 00000000000..92b5f0143ed --- /dev/null +++ b/tests/queries/0_stateless/02511_complex_literals_as_aggregate_function_parameters.sql @@ -0,0 +1,4 @@ +SELECT toTypeName(sumMapFilteredState([1, 2])([1, 2, 3], [10, 10, 10])); +SELECT hex(sumMapFilteredState([1, 2])([1, 2, 3], [10, 10, 10])); +SELECT hex(unhex('02010A00000000000000020A00000000000000')::AggregateFunction(1, sumMapFiltered([1, 2]), Array(UInt8), Array(UInt8))); +SELECT sumMapFilteredMerge([1, 2])(*) FROM remote('127.0.0.{1,2}', view(SELECT sumMapFilteredState([1, 2])([1, 2, 3], [10, 10, 10]))); diff --git a/tests/queries/0_stateless/02511_parquet_orc_missing_columns.reference b/tests/queries/0_stateless/02511_parquet_orc_missing_columns.reference new file mode 100644 index 00000000000..d5318a96f1a --- /dev/null +++ b/tests/queries/0_stateless/02511_parquet_orc_missing_columns.reference @@ -0,0 +1,8 @@ +Hello +Hello +Hello +6 6 +Hello +Hello +Hello +6 6 diff --git a/tests/queries/0_stateless/02511_parquet_orc_missing_columns.sh b/tests/queries/0_stateless/02511_parquet_orc_missing_columns.sh new file mode 100755 index 00000000000..455dccafbb9 --- /dev/null +++ b/tests/queries/0_stateless/02511_parquet_orc_missing_columns.sh @@ -0,0 +1,19 @@ +#!/usr/bin/env bash +#Tags: no-fasttest, no-parallel + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +$CLICKHOUSE_LOCAL -q "select number as x from numbers(3) format Parquet" > 02511_data1.parquet +$CLICKHOUSE_LOCAL -q "select y from file(02511_data1.parquet, auto, 'x UInt64, y String default \'Hello\'') settings input_format_parquet_allow_missing_columns=1" +$CLICKHOUSE_LOCAL -q "select number as x, 'Hello' as y from numbers(3) format Parquet" > 02511_data2.parquet +$CLICKHOUSE_LOCAL -q "select count(*), count(y) from file('02511_data*.parquet', auto, 'x UInt64, y String') settings input_format_parquet_allow_missing_columns=1" + +$CLICKHOUSE_LOCAL -q "select number as x from numbers(3) format ORC" > 02511_data1.orc +$CLICKHOUSE_LOCAL -q "select y from file(02511_data1.orc, auto, 'x UInt64, y String default \'Hello\'') settings input_format_orc_allow_missing_columns=1" +$CLICKHOUSE_LOCAL -q "select number as x, 'Hello' as y from numbers(3) format ORC" > 02511_data2.orc +$CLICKHOUSE_LOCAL -q "select count(*), count(y) from file('02511_data*.orc', auto, 'x UInt64, y String') settings input_format_orc_allow_missing_columns=1" + +rm 02511_data* + diff --git a/tests/queries/0_stateless/02512_array_join_name_resolution.reference b/tests/queries/0_stateless/02512_array_join_name_resolution.reference new file mode 100644 index 00000000000..62263461e0d --- /dev/null +++ b/tests/queries/0_stateless/02512_array_join_name_resolution.reference @@ -0,0 +1,2 @@ +Hello test +World test diff --git a/tests/queries/0_stateless/02512_array_join_name_resolution.sql b/tests/queries/0_stateless/02512_array_join_name_resolution.sql new file mode 100644 index 00000000000..5bcea967791 --- /dev/null +++ b/tests/queries/0_stateless/02512_array_join_name_resolution.sql @@ -0,0 +1,19 @@ +DROP TABLE IF EXISTS x; +CREATE TABLE x ( `arr.key` Array(String), `arr.value` Array(String), `n` String ) ENGINE = Memory; +INSERT INTO x VALUES (['Hello', 'World'], ['abc', 'def'], 'test'); + +SELECT + key, + any(toString(n)) +FROM +( + SELECT + arr.key AS key, + n + FROM x + ARRAY JOIN arr +) +GROUP BY key +ORDER BY key; + +DROP TABLE x; diff --git a/tests/queries/0_stateless/02513_analyzer_sort_msan.reference b/tests/queries/0_stateless/02513_analyzer_sort_msan.reference new file mode 100644 index 00000000000..d00491fd7e5 --- /dev/null +++ b/tests/queries/0_stateless/02513_analyzer_sort_msan.reference @@ -0,0 +1 @@ +1 diff --git a/tests/queries/0_stateless/02513_analyzer_sort_msan.sql b/tests/queries/0_stateless/02513_analyzer_sort_msan.sql new file mode 100644 index 00000000000..e5beccaff2a --- /dev/null +++ b/tests/queries/0_stateless/02513_analyzer_sort_msan.sql @@ -0,0 +1,8 @@ +DROP TABLE IF EXISTS products; + +SET allow_experimental_analyzer = 1; + +CREATE TABLE products (`price` UInt32) ENGINE = Memory; +INSERT INTO products VALUES (1); + +SELECT rank() OVER (ORDER BY price) AS rank FROM products ORDER BY rank; diff --git a/tests/queries/0_stateless/02513_csv_bool_allow_crlf.reference b/tests/queries/0_stateless/02513_csv_bool_allow_crlf.reference new file mode 100644 index 00000000000..da29283aaa4 --- /dev/null +++ b/tests/queries/0_stateless/02513_csv_bool_allow_crlf.reference @@ -0,0 +1,2 @@ +true +false diff --git a/tests/queries/0_stateless/02513_csv_bool_allow_crlf.sh b/tests/queries/0_stateless/02513_csv_bool_allow_crlf.sh new file mode 100755 index 00000000000..ef75514cac6 --- /dev/null +++ b/tests/queries/0_stateless/02513_csv_bool_allow_crlf.sh @@ -0,0 +1,7 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +echo -ne "True\r\nFalse\r\n" | $CLICKHOUSE_LOCAL --structure='x Bool' --input-format=CSV -q "select * from table"; diff --git a/tests/queries/0_stateless/02513_date_string_comparison.reference b/tests/queries/0_stateless/02513_date_string_comparison.reference new file mode 100644 index 00000000000..931f2f594f3 --- /dev/null +++ b/tests/queries/0_stateless/02513_date_string_comparison.reference @@ -0,0 +1,27 @@ +Date +2 +2 +2 +2 +DateTime +3 +3 +3 +3 +3 +Date String +2 +2 +2 +DateTime String +3 +3 +3 +Date LC +2 +2 +2 +DateTime LC +3 +3 +3 diff --git a/tests/queries/0_stateless/02513_date_string_comparison.sql b/tests/queries/0_stateless/02513_date_string_comparison.sql new file mode 100644 index 00000000000..40bc8070987 --- /dev/null +++ b/tests/queries/0_stateless/02513_date_string_comparison.sql @@ -0,0 +1,65 @@ +CREATE TABLE datetime_date_table ( + col_date Date, + col_datetime DateTime, + col_datetime64 DateTime64(3), + col_date_string String, + col_datetime_string String, + col_datetime64_string DateTime64, + col_date_lc LowCardinality(String), + col_datetime_lc LowCardinality(String), + col_datetime64_lc LowCardinality(String), + PRIMARY KEY col_date +) ENGINE = MergeTree; + +INSERT INTO datetime_date_table VALUES ('2020-03-04', '2020-03-04 10:23:45', '2020-03-04 10:23:45.123', '2020-03-04', '2020-03-04 10:23:45', '2020-03-04 10:23:45.123', '2020-03-04', '2020-03-04 10:23:45', '2020-03-04 10:23:45.123'); +INSERT INTO datetime_date_table VALUES ('2020-03-05', '2020-03-05 12:23:45', '2020-03-05 12:23:45.123', '2020-03-05', '2020-03-05 12:23:45', '2020-03-05 12:23:45.123', '2020-03-05', '2020-03-05 12:23:45', '2020-03-05 12:23:45.123'); +INSERT INTO datetime_date_table VALUES ('2020-04-05', '2020-04-05 00:10:45', '2020-04-05 00:10:45.123', '2020-04-05', '2020-04-05 00:10:45', '2020-04-05 00:10:45.123', '2020-04-05', '2020-04-05 00:10:45', '2020-04-05 00:10:45.123'); + +SELECT 'Date'; +SELECT count() FROM datetime_date_table WHERE col_date > '2020-03-04'; +SELECT count() FROM datetime_date_table WHERE col_date > '2020-03-04'::Date; +SELECT count() FROM datetime_date_table WHERE col_date > '2020-03-04 10:20:45'; -- { serverError TYPE_MISMATCH } +SELECT count() FROM datetime_date_table WHERE col_date > '2020-03-04 10:20:45'::DateTime; +SELECT count() FROM datetime_date_table WHERE col_date > '2020-03-04 10:20:45.100'; -- { serverError TYPE_MISMATCH } +SELECT count() FROM datetime_date_table WHERE col_date > '2020-03-04 10:20:45.100'::DateTime64(3); + +SELECT 'DateTime'; +SELECT count() FROM datetime_date_table WHERE col_datetime > '2020-03-04'; +SELECT count() FROM datetime_date_table WHERE col_datetime > '2020-03-04'::Date; +SELECT count() FROM datetime_date_table WHERE col_datetime > '2020-03-04 10:20:45'; +SELECT count() FROM datetime_date_table WHERE col_datetime > '2020-03-04 10:20:45'::DateTime; +SELECT count() FROM datetime_date_table WHERE col_datetime > '2020-03-04 10:20:45.100'; -- { serverError TYPE_MISMATCH } +SELECT count() FROM datetime_date_table WHERE col_datetime > '2020-03-04 10:20:45.100'::DateTime64(3); + +SELECT 'Date String'; +SELECT count() FROM datetime_date_table WHERE col_date_string > '2020-03-04'; +SELECT count() FROM datetime_date_table WHERE col_date_string > '2020-03-04'::Date; -- { serverError NO_COMMON_TYPE } +SELECT count() FROM datetime_date_table WHERE col_date_string > '2020-03-04 10:20:45'; +SELECT count() FROM datetime_date_table WHERE col_date_string > '2020-03-04 10:20:45'::DateTime; -- { serverError NO_COMMON_TYPE } +SELECT count() FROM datetime_date_table WHERE col_date_string > '2020-03-04 10:20:45.100'; +SELECT count() FROM datetime_date_table WHERE col_date_string > '2020-03-04 10:20:45.100'::DateTime64(3); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } + +SELECT 'DateTime String'; +SELECT count() FROM datetime_date_table WHERE col_datetime_string > '2020-03-04'; +SELECT count() FROM datetime_date_table WHERE col_datetime_string > '2020-03-04'::Date; -- { serverError NO_COMMON_TYPE } +SELECT count() FROM datetime_date_table WHERE col_datetime_string > '2020-03-04 10:20:45'; +SELECT count() FROM datetime_date_table WHERE col_datetime_string > '2020-03-04 10:20:45'::DateTime; -- { serverError NO_COMMON_TYPE } +SELECT count() FROM datetime_date_table WHERE col_datetime_string > '2020-03-04 10:20:45.100'; +SELECT count() FROM datetime_date_table WHERE col_datetime_string > '2020-03-04 10:20:45.100'::DateTime64(3); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } + +SELECT 'Date LC'; +SELECT count() FROM datetime_date_table WHERE col_date_lc > '2020-03-04'; +SELECT count() FROM datetime_date_table WHERE col_date_lc > '2020-03-04'::Date; -- { serverError NO_COMMON_TYPE } +SELECT count() FROM datetime_date_table WHERE col_date_lc > '2020-03-04 10:20:45'; +SELECT count() FROM datetime_date_table WHERE col_date_lc > '2020-03-04 10:20:45'::DateTime; -- { serverError NO_COMMON_TYPE } +SELECT count() FROM datetime_date_table WHERE col_date_lc > '2020-03-04 10:20:45.100'; +SELECT count() FROM datetime_date_table WHERE col_date_lc > '2020-03-04 10:20:45.100'::DateTime64(3); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } + +SELECT 'DateTime LC'; +SELECT count() FROM datetime_date_table WHERE col_datetime_lc > '2020-03-04'; +SELECT count() FROM datetime_date_table WHERE col_datetime_lc > '2020-03-04'::Date; -- { serverError NO_COMMON_TYPE } +SELECT count() FROM datetime_date_table WHERE col_datetime_lc > '2020-03-04 10:20:45'; +SELECT count() FROM datetime_date_table WHERE col_datetime_lc > '2020-03-04 10:20:45'::DateTime; -- { serverError NO_COMMON_TYPE } +SELECT count() FROM datetime_date_table WHERE col_datetime_lc > '2020-03-04 10:20:45.100'; +SELECT count() FROM datetime_date_table WHERE col_datetime_lc > '2020-03-04 10:20:45.100'::DateTime64(3); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } + diff --git a/tests/queries/0_stateless/02513_insert_without_materialized_columns.reference b/tests/queries/0_stateless/02513_insert_without_materialized_columns.reference new file mode 100644 index 00000000000..6ed281c757a --- /dev/null +++ b/tests/queries/0_stateless/02513_insert_without_materialized_columns.reference @@ -0,0 +1,2 @@ +1 +1 diff --git a/tests/queries/0_stateless/02513_insert_without_materialized_columns.sh b/tests/queries/0_stateless/02513_insert_without_materialized_columns.sh new file mode 100755 index 00000000000..3faa404917d --- /dev/null +++ b/tests/queries/0_stateless/02513_insert_without_materialized_columns.sh @@ -0,0 +1,29 @@ +#!/usr/bin/env bash +# Tags: no-parallel + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + + +FILE_NAME="${CLICKHOUSE_DATABASE}_test.native.zstd" + +${CLICKHOUSE_CLIENT} --query "DROP TABLE IF EXISTS test" + +${CLICKHOUSE_CLIENT} --query "CREATE TABLE test (a Int64, b Int64 MATERIALIZED a) ENGINE = MergeTree() PRIMARY KEY tuple()" + +${CLICKHOUSE_CLIENT} --query "INSERT INTO test VALUES (1)" + +${CLICKHOUSE_CLIENT} --query "SELECT * FROM test" + +${CLICKHOUSE_CLIENT} --query "SELECT * FROM test INTO OUTFILE '${CLICKHOUSE_TMP}/${FILE_NAME}' FORMAT Native" + +${CLICKHOUSE_CLIENT} --query "TRUNCATE TABLE test" + +${CLICKHOUSE_CLIENT} --query "INSERT INTO test FROM INFILE '${CLICKHOUSE_TMP}/${FILE_NAME}'" + +${CLICKHOUSE_CLIENT} --query "SELECT * FROM test" + +${CLICKHOUSE_CLIENT} --query "DROP TABLE test" + +rm -f "${CLICKHOUSE_TMP}/${FILE_NAME}" diff --git a/tests/queries/0_stateless/02513_prewhere_combine_step_filters.reference b/tests/queries/0_stateless/02513_prewhere_combine_step_filters.reference new file mode 100644 index 00000000000..85adb1850d4 --- /dev/null +++ b/tests/queries/0_stateless/02513_prewhere_combine_step_filters.reference @@ -0,0 +1,110 @@ +-- { echoOn } +SELECT * FROM table_02513; +143001 +143002 +143003 +143004 +143005 +143006 +143007 +143008 +143009 +143011 +143012 +143013 +143014 +143015 +143016 +143017 +143018 +143019 +SELECT * FROM table_02513 WHERE n%11; +143001 +143002 +143003 +143004 +143005 +143006 +143007 +143008 +143009 +143012 +143013 +143014 +143015 +143016 +143017 +143018 +143019 +SELECT * FROM table_02513 PREWHERE n%11; +143001 +143002 +143003 +143004 +143005 +143006 +143007 +143008 +143009 +143012 +143013 +143014 +143015 +143016 +143017 +143018 +143019 +SELECT * FROM table_02513 WHERE n%11 AND n%13; +143001 +143002 +143003 +143004 +143005 +143006 +143007 +143008 +143009 +143012 +143014 +143015 +143016 +143017 +143018 +143019 +SELECT * FROM table_02513 PREWHERE n%11 WHERE n%13; +143001 +143002 +143003 +143004 +143005 +143006 +143007 +143008 +143009 +143012 +143014 +143015 +143016 +143017 +143018 +143019 +SELECT * FROM table_02513 WHERE n%143011; -- { serverError ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER } +SELECT * FROM table_02513 PREWHERE n%143011; -- { serverError ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER } +SELECT * FROM table_02513 WHERE n%143011 AND n%13; +143001 +143002 +143003 +143004 +143005 +143006 +143007 +143008 +143009 +143012 +143014 +143015 +143016 +143017 +143018 +143019 +SELECT * FROM table_02513 PREWHERE n%143011 WHERE n%13; -- { serverError ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER } diff --git a/tests/queries/0_stateless/02513_prewhere_combine_step_filters.sql b/tests/queries/0_stateless/02513_prewhere_combine_step_filters.sql new file mode 100644 index 00000000000..771893ce674 --- /dev/null +++ b/tests/queries/0_stateless/02513_prewhere_combine_step_filters.sql @@ -0,0 +1,26 @@ +DROP TABLE IF EXISTS table_02513; + +CREATE TABLE table_02513 (n UInt64) ENGINE=MergeTree() ORDER BY tuple() SETTINGS index_granularity=100; + +INSERT INTO table_02513 SELECT number+11*13*1000 FROM numbers(20); + +SET allow_experimental_lightweight_delete=1; +SET mutations_sync=2; +SET max_threads=1; + +DELETE FROM table_02513 WHERE n%10=0; + +-- { echoOn } +SELECT * FROM table_02513; +SELECT * FROM table_02513 WHERE n%11; +SELECT * FROM table_02513 PREWHERE n%11; +SELECT * FROM table_02513 WHERE n%11 AND n%13; +SELECT * FROM table_02513 PREWHERE n%11 WHERE n%13; + +SELECT * FROM table_02513 WHERE n%143011; -- { serverError ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER } +SELECT * FROM table_02513 PREWHERE n%143011; -- { serverError ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER } +SELECT * FROM table_02513 WHERE n%143011 AND n%13; +SELECT * FROM table_02513 PREWHERE n%143011 WHERE n%13; -- { serverError ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER } +-- { echoOff } + +DROP TABLE table_02513; diff --git a/tests/queries/0_stateless/02514_bad_index_granularity.reference b/tests/queries/0_stateless/02514_bad_index_granularity.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02514_bad_index_granularity.sql b/tests/queries/0_stateless/02514_bad_index_granularity.sql new file mode 100644 index 00000000000..975af2d0728 --- /dev/null +++ b/tests/queries/0_stateless/02514_bad_index_granularity.sql @@ -0,0 +1,7 @@ +CREATE TABLE t +( + id Int64, + d String, + p Map(String, String) +) +ENGINE = ReplacingMergeTree order by id settings index_granularity = 0; -- { serverError BAD_ARGUMENTS } diff --git a/tests/queries/0_stateless/02514_database_replicated_no_arguments_for_rmt.reference b/tests/queries/0_stateless/02514_database_replicated_no_arguments_for_rmt.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02514_database_replicated_no_arguments_for_rmt.sh b/tests/queries/0_stateless/02514_database_replicated_no_arguments_for_rmt.sh new file mode 100755 index 00000000000..ee51640488e --- /dev/null +++ b/tests/queries/0_stateless/02514_database_replicated_no_arguments_for_rmt.sh @@ -0,0 +1,21 @@ +#!/usr/bin/env bash +# Tags: replica, no-replicated-database +# I don't understand why this test fails in ReplicatedDatabase run +# but too many magic included in it, so I just disabled it for ReplicatedDatabase run becase +# here we explicitely create it and check is alright. + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +${CLICKHOUSE_CLIENT} -q "create table mute_stylecheck (x UInt32) engine = ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/root', '1') order by x" + +${CLICKHOUSE_CLIENT} -q "CREATE USER user_${CLICKHOUSE_DATABASE} settings database_replicated_allow_replicated_engine_arguments=0" +${CLICKHOUSE_CLIENT} -q "GRANT CREATE TABLE ON ${CLICKHOUSE_DATABASE}_db.* TO user_${CLICKHOUSE_DATABASE}" +${CLICKHOUSE_CLIENT} --allow_experimental_database_replicated=1 --query "CREATE DATABASE ${CLICKHOUSE_DATABASE}_db engine = Replicated('/clickhouse/databases/${CLICKHOUSE_TEST_ZOOKEEPER_PREFIX}/${CLICKHOUSE_DATABASE}_db', '{shard}', '{replica}')" +${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --user "user_${CLICKHOUSE_DATABASE}" -n --query "CREATE TABLE ${CLICKHOUSE_DATABASE}_db.tab_rmt_ok (x UInt32) engine = ReplicatedMergeTree order by x;" +${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --user "user_${CLICKHOUSE_DATABASE}" -n --query "CREATE TABLE ${CLICKHOUSE_DATABASE}_db.tab_rmt_fail (x UInt32) engine = ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/root/{shard}', '{replica}') order by x; -- { serverError 80 }" +${CLICKHOUSE_CLIENT} --query "DROP DATABASE ${CLICKHOUSE_DATABASE}_db" +${CLICKHOUSE_CLIENT} -q "DROP USER user_${CLICKHOUSE_DATABASE}" + +${CLICKHOUSE_CLIENT} -q "drop table mute_stylecheck" diff --git a/tests/queries/0_stateless/02514_if_with_lazy_low_cardinality.reference b/tests/queries/0_stateless/02514_if_with_lazy_low_cardinality.reference new file mode 100644 index 00000000000..d00491fd7e5 --- /dev/null +++ b/tests/queries/0_stateless/02514_if_with_lazy_low_cardinality.reference @@ -0,0 +1 @@ +1 diff --git a/tests/queries/0_stateless/02514_if_with_lazy_low_cardinality.sql b/tests/queries/0_stateless/02514_if_with_lazy_low_cardinality.sql new file mode 100644 index 00000000000..80e3c0a9ece --- /dev/null +++ b/tests/queries/0_stateless/02514_if_with_lazy_low_cardinality.sql @@ -0,0 +1,5 @@ +create table if not exists t (`arr.key` Array(LowCardinality(String)), `arr.value` Array(LowCardinality(String))) engine = Memory; +insert into t (`arr.key`, `arr.value`) values (['a'], ['b']); +select if(true, if(lowerUTF8(arr.key) = 'a', 1, 2), 3) as x from t left array join arr; +drop table t; + diff --git a/tests/queries/0_stateless/02514_null_dictionary_source.reference b/tests/queries/0_stateless/02514_null_dictionary_source.reference new file mode 100644 index 00000000000..bb08ece2bcf --- /dev/null +++ b/tests/queries/0_stateless/02514_null_dictionary_source.reference @@ -0,0 +1,4 @@ +0 \N 111 0 111 +123 \N 111 123 111 +\N \N 111 +77 diff --git a/tests/queries/0_stateless/02514_null_dictionary_source.sql b/tests/queries/0_stateless/02514_null_dictionary_source.sql new file mode 100644 index 00000000000..74fb57707ff --- /dev/null +++ b/tests/queries/0_stateless/02514_null_dictionary_source.sql @@ -0,0 +1,48 @@ +-- Tags: no-parallel + +DROP DICTIONARY IF EXISTS null_dict; +CREATE DICTIONARY null_dict ( + id UInt64, + val UInt8, + default_val UInt8 DEFAULT 123, + nullable_val Nullable(UInt8) +) +PRIMARY KEY id +SOURCE(NULL()) +LAYOUT(FLAT()) +LIFETIME(0); + +SELECT + dictGet('null_dict', 'val', 1337), + dictGetOrNull('null_dict', 'val', 1337), + dictGetOrDefault('null_dict', 'val', 1337, 111), + dictGetUInt8('null_dict', 'val', 1337), + dictGetUInt8OrDefault('null_dict', 'val', 1337, 111); + +SELECT + dictGet('null_dict', 'default_val', 1337), + dictGetOrNull('null_dict', 'default_val', 1337), + dictGetOrDefault('null_dict', 'default_val', 1337, 111), + dictGetUInt8('null_dict', 'default_val', 1337), + dictGetUInt8OrDefault('null_dict', 'default_val', 1337, 111); + +SELECT + dictGet('null_dict', 'nullable_val', 1337), + dictGetOrNull('null_dict', 'nullable_val', 1337), + dictGetOrDefault('null_dict', 'nullable_val', 1337, 111); + +SELECT val, nullable_val FROM null_dict; + +DROP DICTIONARY IF EXISTS null_ip_dict; +CREATE DICTIONARY null_ip_dict ( + network String, + val UInt8 DEFAULT 77 +) +PRIMARY KEY network +SOURCE(NULL()) +LAYOUT(IP_TRIE()) +LIFETIME(0); + +SELECT dictGet('null_ip_dict', 'val', toIPv4('127.0.0.1')); + +SELECT network, val FROM null_ip_dict; diff --git a/tests/queries/0_stateless/02514_tsv_zero_started_number.reference b/tests/queries/0_stateless/02514_tsv_zero_started_number.reference new file mode 100644 index 00000000000..829ab6bc4d0 --- /dev/null +++ b/tests/queries/0_stateless/02514_tsv_zero_started_number.reference @@ -0,0 +1 @@ +Nullable(String) 0123 diff --git a/tests/queries/0_stateless/02514_tsv_zero_started_number.sql b/tests/queries/0_stateless/02514_tsv_zero_started_number.sql new file mode 100644 index 00000000000..d2058ea8f94 --- /dev/null +++ b/tests/queries/0_stateless/02514_tsv_zero_started_number.sql @@ -0,0 +1,2 @@ +select toTypeName(*), * from format(TSV, '0123'); + diff --git a/tests/queries/0_stateless/02515_analyzer_null_for_empty.reference b/tests/queries/0_stateless/02515_analyzer_null_for_empty.reference new file mode 100644 index 00000000000..13e4ff9b55a --- /dev/null +++ b/tests/queries/0_stateless/02515_analyzer_null_for_empty.reference @@ -0,0 +1 @@ +92233720368547758.06 diff --git a/tests/queries/0_stateless/02515_analyzer_null_for_empty.sql b/tests/queries/0_stateless/02515_analyzer_null_for_empty.sql new file mode 100644 index 00000000000..de21e9b475e --- /dev/null +++ b/tests/queries/0_stateless/02515_analyzer_null_for_empty.sql @@ -0,0 +1,4 @@ +SET allow_experimental_analyzer = 1; +SET aggregate_functions_null_for_empty = 1; + +SELECT max(aggr) FROM (SELECT max('92233720368547758.06') AS aggr FROM system.one); diff --git a/tests/queries/0_stateless/02515_and_or_if_multiif_not_return_lc.reference b/tests/queries/0_stateless/02515_and_or_if_multiif_not_return_lc.reference new file mode 100644 index 00000000000..805bbdf7a59 --- /dev/null +++ b/tests/queries/0_stateless/02515_and_or_if_multiif_not_return_lc.reference @@ -0,0 +1,4 @@ +UInt8 +UInt8 +UInt8 +UInt8 diff --git a/tests/queries/0_stateless/02515_and_or_if_multiif_not_return_lc.sql b/tests/queries/0_stateless/02515_and_or_if_multiif_not_return_lc.sql new file mode 100644 index 00000000000..0ccccd4d9a7 --- /dev/null +++ b/tests/queries/0_stateless/02515_and_or_if_multiif_not_return_lc.sql @@ -0,0 +1,5 @@ +select toTypeName(if(toLowCardinality(number % 2), 1, 2)) from numbers(1); +select toTypeName(multiIf(toLowCardinality(number % 2), 1, 1, 2, 3)) from numbers(1); +select toTypeName(toLowCardinality(number % 2) and 2) from numbers(1); +select toTypeName(toLowCardinality(number % 2) or 2) from numbers(1); + diff --git a/tests/queries/0_stateless/02515_fix_any_parsing.reference b/tests/queries/0_stateless/02515_fix_any_parsing.reference new file mode 100644 index 00000000000..427fa0c4442 --- /dev/null +++ b/tests/queries/0_stateless/02515_fix_any_parsing.reference @@ -0,0 +1,2 @@ +SELECT any(0) = any(1) +SELECT any((NULL + NULL) = 0.0001), '1', NULL + -2147483647, any(NULL), (NULL + NULL) = 1000.0001, (NULL + NULL) = ((NULL + 10.0001) = (NULL, (NULL + 0.9999) = any(inf, 0., NULL, (NULL + 1.0001) = '214748364.6')), (NULL + NULL) = (NULL + nan)) diff --git a/tests/queries/0_stateless/02515_fix_any_parsing.sh b/tests/queries/0_stateless/02515_fix_any_parsing.sh new file mode 100755 index 00000000000..ed7316bdbb8 --- /dev/null +++ b/tests/queries/0_stateless/02515_fix_any_parsing.sh @@ -0,0 +1,12 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +set -e + +format="$CLICKHOUSE_FORMAT --oneline" + +echo "SELECT any(0) = any(1)" | $format +echo "SELECT any((NULL + NULL) = 0.0001), '1', NULL + -2147483647, any(NULL), (NULL + NULL) = 1000.0001, (NULL + NULL) = ((NULL + 10.0001) = (NULL, (NULL + 0.9999) = any(inf, 0., NULL, (NULL + 1.0001) = '214748364.6')), (NULL + NULL) = (NULL + nan))" | $format | $format diff --git a/tests/queries/0_stateless/02515_projections_with_totals.reference b/tests/queries/0_stateless/02515_projections_with_totals.reference new file mode 100644 index 00000000000..c6359cae032 --- /dev/null +++ b/tests/queries/0_stateless/02515_projections_with_totals.reference @@ -0,0 +1,3 @@ +0 + +0 diff --git a/tests/queries/0_stateless/02515_projections_with_totals.sql b/tests/queries/0_stateless/02515_projections_with_totals.sql new file mode 100644 index 00000000000..4d43d5381da --- /dev/null +++ b/tests/queries/0_stateless/02515_projections_with_totals.sql @@ -0,0 +1,6 @@ +DROP TABLE IF EXISTS t; +CREATE TABLE t (x UInt8, PROJECTION p (SELECT x GROUP BY x)) ENGINE = MergeTree ORDER BY (); +INSERT INTO t VALUES (0); +SET group_by_overflow_mode = 'any', max_rows_to_group_by = 1000, totals_mode = 'after_having_auto'; +SELECT x FROM t GROUP BY x WITH TOTALS; +DROP TABLE t; diff --git a/tests/queries/0_stateless/02515_tuple_lambda_parsing.reference b/tests/queries/0_stateless/02515_tuple_lambda_parsing.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02515_tuple_lambda_parsing.sql b/tests/queries/0_stateless/02515_tuple_lambda_parsing.sql new file mode 100644 index 00000000000..4ec49f30e39 --- /dev/null +++ b/tests/queries/0_stateless/02515_tuple_lambda_parsing.sql @@ -0,0 +1,7 @@ +explain ast select tuple(a) -> f(a); -- { clientError SYNTAX_ERROR } +explain ast select tuple(a, b) -> f(a); -- { clientError SYNTAX_ERROR } +explain ast select (tuple(a)) -> f(a); -- { clientError SYNTAX_ERROR } +explain ast select (f(a)) -> f(a); -- { clientError SYNTAX_ERROR } +explain ast select (a::UInt64) -> f(a); -- { clientError SYNTAX_ERROR } +explain ast select (1) -> f(a); -- { clientError SYNTAX_ERROR } +explain ast select (1::UInt64) -> f(a); -- { clientError SYNTAX_ERROR } diff --git a/tests/queries/0_stateless/02516_join_with_totals_and_subquery_bug.reference b/tests/queries/0_stateless/02516_join_with_totals_and_subquery_bug.reference new file mode 100644 index 00000000000..fd0b223f8e5 --- /dev/null +++ b/tests/queries/0_stateless/02516_join_with_totals_and_subquery_bug.reference @@ -0,0 +1,7 @@ +1 +1 + +0 +\N + +100000000000000000000 diff --git a/tests/queries/0_stateless/02516_join_with_totals_and_subquery_bug.sql b/tests/queries/0_stateless/02516_join_with_totals_and_subquery_bug.sql new file mode 100644 index 00000000000..b6e60aa2e1f --- /dev/null +++ b/tests/queries/0_stateless/02516_join_with_totals_and_subquery_bug.sql @@ -0,0 +1,53 @@ +SELECT * +FROM +( + SELECT 1 AS a +) AS t1 +INNER JOIN +( + SELECT 1 AS a + GROUP BY 1 + WITH TOTALS + UNION ALL + SELECT 1 + GROUP BY 1 + WITH TOTALS +) AS t2 USING (a); + +SELECT a +FROM +( + SELECT + NULL AS a, + NULL AS b, + NULL AS c + UNION ALL + SELECT + 100000000000000000000., + NULL, + NULL + WHERE 0 + GROUP BY + GROUPING SETS ((NULL)) + WITH TOTALS +) AS js1 +ALL LEFT JOIN +( + SELECT + NULL AS a, + 2147483647 AS d + GROUP BY + NULL, + '214748364.8' + WITH CUBE + WITH TOTALS + UNION ALL + SELECT + 2147483646, + NULL + GROUP BY + base58Encode(materialize(NULL)), + NULL + WITH TOTALS +) AS js2 USING (a) +ORDER BY b ASC NULLS FIRST; diff --git a/tests/queries/0_stateless/02516_projections_with_rollup.reference b/tests/queries/0_stateless/02516_projections_with_rollup.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02516_projections_with_rollup.sql b/tests/queries/0_stateless/02516_projections_with_rollup.sql new file mode 100644 index 00000000000..e670fbb7827 --- /dev/null +++ b/tests/queries/0_stateless/02516_projections_with_rollup.sql @@ -0,0 +1,120 @@ +DROP TABLE IF EXISTS video_log; +DROP TABLE IF EXISTS video_log_result__fuzz_0; +DROP TABLE IF EXISTS rng; + +CREATE TABLE video_log +( + `datetime` DateTime, + `user_id` UInt64, + `device_id` UInt64, + `domain` LowCardinality(String), + `bytes` UInt64, + `duration` UInt64 +) +ENGINE = MergeTree +PARTITION BY toDate(datetime) +ORDER BY (user_id, device_id); + +CREATE TABLE video_log_result__fuzz_0 +( + `hour` Nullable(DateTime), + `sum_bytes` UInt64, + `avg_duration` Float64 +) +ENGINE = MergeTree +PARTITION BY toDate(hour) +ORDER BY sum_bytes +SETTINGS allow_nullable_key = 1; + +CREATE TABLE rng +( + `user_id_raw` UInt64, + `device_id_raw` UInt64, + `domain_raw` UInt64, + `bytes_raw` UInt64, + `duration_raw` UInt64 +) +ENGINE = GenerateRandom(1024); + +INSERT INTO video_log SELECT + toUnixTimestamp('2022-07-22 01:00:00') + (rowNumberInAllBlocks() / 20000), + user_id_raw % 100000000 AS user_id, + device_id_raw % 200000000 AS device_id, + domain_raw % 100, + (bytes_raw % 1024) + 128, + (duration_raw % 300) + 100 +FROM rng +LIMIT 1728000; + +INSERT INTO video_log SELECT + toUnixTimestamp('2022-07-22 01:00:00') + (rowNumberInAllBlocks() / 20000), + user_id_raw % 100000000 AS user_id, + 100 AS device_id, + domain_raw % 100, + (bytes_raw % 1024) + 128, + (duration_raw % 300) + 100 +FROM rng +LIMIT 10; + +ALTER TABLE video_log + ADD PROJECTION p_norm + ( + SELECT + datetime, + device_id, + bytes, + duration + ORDER BY device_id + ); + +ALTER TABLE video_log + MATERIALIZE PROJECTION p_norm +SETTINGS mutations_sync = 1; + +ALTER TABLE video_log + ADD PROJECTION p_agg + ( + SELECT + toStartOfHour(datetime) AS hour, + domain, + sum(bytes), + avg(duration) + GROUP BY + hour, + domain + ); + +ALTER TABLE video_log + MATERIALIZE PROJECTION p_agg +SETTINGS mutations_sync = 1; + +-- We are not interested in the result of this query, but it should not produce a logical error. +SELECT + avg_duration1, + avg_duration1 = avg_duration2 +FROM +( + SELECT + sum(bytes), + hour, + toStartOfHour(datetime) AS hour, + avg(duration) AS avg_duration1 + FROM video_log + GROUP BY hour + WITH ROLLUP + WITH TOTALS +) +LEFT JOIN +( + SELECT + hour, + sum_bytes AS sum_bytes2, + avg_duration AS avg_duration2 + FROM video_log_result__fuzz_0 +) USING (hour) +SETTINGS joined_subquery_requires_alias = 0 +FORMAT Null; + +DROP TABLE video_log; +DROP TABLE video_log_result__fuzz_0; +DROP TABLE rng; diff --git a/tests/queries/0_stateless/02517_avro_bool_type.reference b/tests/queries/0_stateless/02517_avro_bool_type.reference new file mode 100644 index 00000000000..c383ecf3857 --- /dev/null +++ b/tests/queries/0_stateless/02517_avro_bool_type.reference @@ -0,0 +1 @@ +true Bool diff --git a/tests/queries/0_stateless/02517_avro_bool_type.sh b/tests/queries/0_stateless/02517_avro_bool_type.sh new file mode 100755 index 00000000000..a26dfbd06ea --- /dev/null +++ b/tests/queries/0_stateless/02517_avro_bool_type.sh @@ -0,0 +1,9 @@ +#!/usr/bin/env bash +# Tags: no-fasttest + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_LOCAL -q "select true::Bool as b format Avro" | $CLICKHOUSE_LOCAL --table=test --input-format=Avro -q "select b, toTypeName(b) from test"; + diff --git a/tests/queries/0_stateless/02517_executable_pool_bad_input_query.reference b/tests/queries/0_stateless/02517_executable_pool_bad_input_query.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02517_executable_pool_bad_input_query.sql b/tests/queries/0_stateless/02517_executable_pool_bad_input_query.sql new file mode 100644 index 00000000000..c016c93b672 --- /dev/null +++ b/tests/queries/0_stateless/02517_executable_pool_bad_input_query.sql @@ -0,0 +1,4 @@ +CREATE TABLE test_table (value String) ENGINE=ExecutablePool('nonexist.py', 'TabSeparated', (foobar)); -- {serverError BAD_ARGUMENTS} +CREATE TABLE test_table (value String) ENGINE=ExecutablePool('nonexist.py', 'TabSeparated', '(SELECT 1)'); -- {serverError BAD_ARGUMENTS} +CREATE TABLE test_table (value String) ENGINE=ExecutablePool('nonexist.py', 'TabSeparated', [1,2,3]); -- {serverError BAD_ARGUMENTS} + diff --git a/tests/queries/0_stateless/02517_fuse_bug_44712.reference b/tests/queries/0_stateless/02517_fuse_bug_44712.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02517_fuse_bug_44712.sql b/tests/queries/0_stateless/02517_fuse_bug_44712.sql new file mode 100644 index 00000000000..894bf9e06d5 --- /dev/null +++ b/tests/queries/0_stateless/02517_fuse_bug_44712.sql @@ -0,0 +1,10 @@ +DROP TABLE IF EXISTS fuse_tbl__fuzz_35; + +CREATE TABLE fuse_tbl__fuzz_35 (`a` UInt8, `b` Nullable(Int16)) ENGINE = Log; +INSERT INTO fuse_tbl__fuzz_35 SELECT number, number + 1 FROM numbers(1000); + +set allow_experimental_analyzer = 0, optimize_syntax_fuse_functions = 1, optimize_fuse_sum_count_avg = 1; + +SELECT quantile(0.5)(b), quantile(0.9)(b) FROM (SELECT x + 2147483648 AS b FROM (SELECT quantile(0.5)(b) AS x FROM fuse_tbl__fuzz_35) GROUP BY x) FORMAT Null; + +DROP TABLE IF EXISTS fuse_tbl__fuzz_35; diff --git a/tests/queries/0_stateless/02518_delete_on_materialized_view.reference b/tests/queries/0_stateless/02518_delete_on_materialized_view.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02518_delete_on_materialized_view.sql b/tests/queries/0_stateless/02518_delete_on_materialized_view.sql new file mode 100644 index 00000000000..73abca4ea53 --- /dev/null +++ b/tests/queries/0_stateless/02518_delete_on_materialized_view.sql @@ -0,0 +1,14 @@ +DROP TABLE IF EXISTS kek; +DROP TABLE IF EXISTS kekv; + +CREATE TABLE kek (a UInt32) ENGINE = MergeTree ORDER BY a; +CREATE MATERIALIZED VIEW kekv ENGINE = MergeTree ORDER BY tuple() AS SELECT * FROM kek; + +INSERT INTO kek VALUES (1); +DELETE FROM kekv WHERE a = 1; -- { serverError BAD_ARGUMENTS} + +SET allow_experimental_lightweight_delete=1; +DELETE FROM kekv WHERE a = 1; -- { serverError BAD_ARGUMENTS} + +DROP TABLE IF EXISTS kek; +DROP TABLE IF EXISTS kekv; diff --git a/tests/queries/0_stateless/02519_monotonicity_fuzz.reference b/tests/queries/0_stateless/02519_monotonicity_fuzz.reference new file mode 100644 index 00000000000..9459d4ba2a0 --- /dev/null +++ b/tests/queries/0_stateless/02519_monotonicity_fuzz.reference @@ -0,0 +1 @@ +1.1 diff --git a/tests/queries/0_stateless/02519_monotonicity_fuzz.sql b/tests/queries/0_stateless/02519_monotonicity_fuzz.sql new file mode 100644 index 00000000000..4a0860702bb --- /dev/null +++ b/tests/queries/0_stateless/02519_monotonicity_fuzz.sql @@ -0,0 +1,5 @@ +DROP TABLE IF EXISTS t; +CREATE TABLE t (x Decimal(18, 3)) ENGINE = MergeTree ORDER BY x; +INSERT INTO t VALUES (1.1); +SELECT * FROM t WHERE toUInt64(x) = 1; +DROP TABLE t; diff --git a/tests/queries/0_stateless/add-test b/tests/queries/0_stateless/add-test index 2173a4d8cc2..39f6742f71c 100755 --- a/tests/queries/0_stateless/add-test +++ b/tests/queries/0_stateless/add-test @@ -25,4 +25,7 @@ fi set -x touch ${TESTS_PATH}/${NEW_TEST_NO}_${FILENAME}.${FILEEXT} +if [[ $FILEEXT == "sh" ]] ; then + chmod +x ${TESTS_PATH}/${NEW_TEST_NO}_${FILENAME}.${FILEEXT} +fi touch ${TESTS_PATH}/${NEW_TEST_NO}_${FILENAME}.reference diff --git a/tests/queries/0_stateless/data_bson/comments.bson b/tests/queries/0_stateless/data_bson/comments.bson new file mode 100644 index 00000000000..9aa4b6e6562 Binary files /dev/null and b/tests/queries/0_stateless/data_bson/comments.bson differ diff --git a/tests/queries/0_stateless/filesystem_cache_queries/02226_filesystem_cache_profile_events.sh b/tests/queries/0_stateless/filesystem_cache_queries/02226_filesystem_cache_profile_events.sh deleted file mode 100755 index ab8511d85b3..00000000000 --- a/tests/queries/0_stateless/filesystem_cache_queries/02226_filesystem_cache_profile_events.sh +++ /dev/null @@ -1,85 +0,0 @@ -#!/usr/bin/env bash -# Tags: no-fasttest, no-parallel, no-s3-storage, no-random-settings, no-cpu-aarch64, no-replicated-database - -clickhouse client --multiquery --multiline --query """ -SET max_memory_usage='20G'; -SET enable_filesystem_cache_on_write_operations = 0; - -DROP TABLE IF EXISTS test; -CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='_storagePolicy'; -INSERT INTO test SELECT * FROM generateRandom('key UInt32, value String') LIMIT 10000; - -SET remote_filesystem_read_method='threadpool'; -""" - -query="SELECT * FROM test LIMIT 10" - -query_id=$(clickhouse client --query "select queryID() from ($query) limit 1" 2>&1) - -clickhouse client --multiquery --multiline --query """ -SYSTEM FLUSH LOGS; -SELECT ProfileEvents['CachedReadBufferReadFromSourceBytes'] > 0 as remote_fs_read, - ProfileEvents['CachedReadBufferReadFromCacheBytes'] > 0 as remote_fs_cache_read, - ProfileEvents['CachedReadBufferCacheWriteBytes'] > 0 as remote_fs_read_and_download -FROM system.query_log -WHERE query_id='$query_id' -AND type = 'QueryFinish' -AND current_database = currentDatabase() -ORDER BY query_start_time DESC -LIMIT 1; -""" - -clickhouse client --multiquery --multiline --query """ -set remote_filesystem_read_method = 'read'; -set local_filesystem_read_method = 'pread'; -""" - -query_id=$(clickhouse client --query "select queryID() from ($query) limit 1" 2>&1) - -clickhouse client --multiquery --multiline --query """ -SYSTEM FLUSH LOGS; -SELECT ProfileEvents['CachedReadBufferReadFromSourceBytes'] > 0 as remote_fs_read, - ProfileEvents['CachedReadBufferReadFromCacheBytes'] > 0 as remote_fs_cache_read, - ProfileEvents['CachedReadBufferCacheWriteBytes'] > 0 as remote_fs_read_and_download -FROM system.query_log -WHERE query_id='$query_id' -AND type = 'QueryFinish' -AND current_database = currentDatabase() -ORDER BY query_start_time DESC -LIMIT 1; -""" - - -clickhouse client --multiquery --multiline --query """ -set remote_filesystem_read_method='threadpool'; -""" - -clickhouse client --multiquery --multiline --query """ -SELECT * FROM test WHERE value LIKE '%abc%' ORDER BY value LIMIT 10 FORMAT Null; - -SET enable_filesystem_cache_on_write_operations = 1; - -TRUNCATE TABLE test; -SELECT count() FROM test; - -SYSTEM DROP FILESYSTEM CACHE; - -INSERT INTO test SELECT * FROM generateRandom('key UInt32, value String') LIMIT 10000; -""" - -query_id=$(clickhouse client --query "select queryID() from ($query) limit 1") - -clickhouse client --multiquery --multiline --query """ -SYSTEM FLUSH LOGS; -SELECT ProfileEvents['CachedReadBufferReadFromSourceBytes'] > 0 as remote_fs_read, - ProfileEvents['CachedReadBufferReadFromCacheBytes'] > 0 as remote_fs_cache_read, - ProfileEvents['CachedReadBufferCacheWriteBytes'] > 0 as remote_fs_read_and_download -FROM system.query_log -WHERE query_id='$query_id' -AND type = 'QueryFinish' -AND current_database = currentDatabase() -ORDER BY query_start_time DESC -LIMIT 1; - -DROP TABLE test; -""" diff --git a/tests/queries/0_stateless/filesystem_cache_queries/02240_system_filesystem_cache_table.queries b/tests/queries/0_stateless/filesystem_cache_queries/02240_system_filesystem_cache_table.queries deleted file mode 100644 index 228dccfcb5b..00000000000 --- a/tests/queries/0_stateless/filesystem_cache_queries/02240_system_filesystem_cache_table.queries +++ /dev/null @@ -1,33 +0,0 @@ --- { echo } - -SYSTEM DROP FILESYSTEM CACHE; -SET enable_filesystem_cache_on_write_operations=0; -DROP TABLE IF EXISTS test; -CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='_storagePolicy', min_bytes_for_wide_part = 10485760, compress_marks=false, compress_primary_key=false; -SYSTEM STOP MERGES test; -INSERT INTO test SELECT number, toString(number) FROM numbers(100); - -SELECT * FROM test FORMAT Null; -SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache ORDER BY file_segment_range_end, size; - -SYSTEM DROP FILESYSTEM CACHE; -SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache; -SELECT * FROM test FORMAT Null; -SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache; -SYSTEM DROP FILESYSTEM CACHE; -SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache; - -DROP TABLE IF EXISTS test; -CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='_storagePolicy_3', min_bytes_for_wide_part = 10485760, compress_marks=false, compress_primary_key=false; -INSERT INTO test SELECT number, toString(number) FROM numbers(100); -SELECT * FROM test FORMAT Null; -SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache ORDER BY file_segment_range_end, size; -SELECT * FROM test FORMAT Null; -SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache ORDER BY file_segment_range_end, size; - -SYSTEM DROP FILESYSTEM CACHE; -SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache; -SELECT * FROM test FORMAT Null; -SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache; -SYSTEM DROP FILESYSTEM CACHE; -SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache; diff --git a/tests/queries/0_stateless/filesystem_cache_queries/02241_filesystem_cache_on_write_operations.queries b/tests/queries/0_stateless/filesystem_cache_queries/02241_filesystem_cache_on_write_operations.queries deleted file mode 100644 index bd185942e6c..00000000000 --- a/tests/queries/0_stateless/filesystem_cache_queries/02241_filesystem_cache_on_write_operations.queries +++ /dev/null @@ -1,115 +0,0 @@ --- { echo } - -SET enable_filesystem_cache_on_write_operations=1; - -DROP TABLE IF EXISTS test; -CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='_storagePolicy', min_bytes_for_wide_part = 10485760, compress_marks=false, compress_primary_key=false; -SYSTEM STOP MERGES test; -SYSTEM DROP FILESYSTEM CACHE; -SELECT file_segment_range_begin, file_segment_range_end, size, state -FROM -( - SELECT file_segment_range_begin, file_segment_range_end, size, state, local_path - FROM - ( - SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path - FROM system.remote_data_paths - ) AS data_paths - INNER JOIN - system.filesystem_cache AS caches - ON data_paths.cache_path = caches.cache_path -) -WHERE endsWith(local_path, 'data.bin') -FORMAT Vertical; - -SELECT count() FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path; -SELECT count() FROM system.filesystem_cache; - -INSERT INTO test SELECT number, toString(number) FROM numbers(100); - -SELECT file_segment_range_begin, file_segment_range_end, size, state -FROM -( - SELECT file_segment_range_begin, file_segment_range_end, size, state, local_path - FROM - ( - SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path - FROM system.remote_data_paths - ) AS data_paths - INNER JOIN - system.filesystem_cache AS caches - ON data_paths.cache_path = caches.cache_path -) -WHERE endsWith(local_path, 'data.bin') -FORMAT Vertical; - -SELECT count() FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path; -SELECT count() FROM system.filesystem_cache; - -SELECT count() FROM system.filesystem_cache WHERE cache_hits > 0; - -SELECT * FROM test FORMAT Null; -SELECT count() FROM system.filesystem_cache WHERE cache_hits > 0; - -SELECT * FROM test FORMAT Null; -SELECT count() FROM system.filesystem_cache WHERE cache_hits > 0; - -SELECT count() size FROM system.filesystem_cache; - -SYSTEM DROP FILESYSTEM CACHE; - -INSERT INTO test SELECT number, toString(number) FROM numbers(100, 200); - -SELECT file_segment_range_begin, file_segment_range_end, size, state -FROM -( - SELECT file_segment_range_begin, file_segment_range_end, size, state, local_path - FROM - ( - SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path - FROM system.remote_data_paths - ) AS data_paths - INNER JOIN - system.filesystem_cache AS caches - ON data_paths.cache_path = caches.cache_path -) -WHERE endsWith(local_path, 'data.bin') -FORMAT Vertical; - -SELECT count() FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path; -SELECT count() FROM system.filesystem_cache; - -SELECT count() FROM system.filesystem_cache; -INSERT INTO test SELECT number, toString(number) FROM numbers(100) SETTINGS enable_filesystem_cache_on_write_operations=0; -SELECT count() FROM system.filesystem_cache; - -INSERT INTO test SELECT number, toString(number) FROM numbers(100); -INSERT INTO test SELECT number, toString(number) FROM numbers(300, 10000); -SELECT count() FROM system.filesystem_cache; - -SYSTEM START MERGES test; - -OPTIMIZE TABLE test FINAL; -SELECT count() FROM system.filesystem_cache; - -SET mutations_sync=2; -ALTER TABLE test UPDATE value = 'kek' WHERE key = 100; -SELECT count() FROM system.filesystem_cache; - -INSERT INTO test SELECT number, toString(number) FROM numbers(5000000); -SYSTEM FLUSH LOGS; -SELECT - query, ProfileEvents['RemoteFSReadBytes'] > 0 as remote_fs_read -FROM - system.query_log -WHERE - query LIKE 'SELECT number, toString(number) FROM numbers(5000000)%' - AND type = 'QueryFinish' - AND current_database = currentDatabase() -ORDER BY - query_start_time - DESC -LIMIT 1; - -SELECT count() FROM test; -SELECT count() FROM test WHERE value LIKE '%010%'; diff --git a/tests/queries/0_stateless/filesystem_cache_queries/02242_system_filesystem_cache_log_table.queries b/tests/queries/0_stateless/filesystem_cache_queries/02242_system_filesystem_cache_log_table.queries deleted file mode 100644 index 56a8710cc93..00000000000 --- a/tests/queries/0_stateless/filesystem_cache_queries/02242_system_filesystem_cache_log_table.queries +++ /dev/null @@ -1,19 +0,0 @@ --- { echo } - -SYSTEM DROP FILESYSTEM CACHE; -SET enable_filesystem_cache_log=1; -SET enable_filesystem_cache_on_write_operations=0; - -DROP TABLE IF EXISTS test; -DROP TABLE IF EXISTS system.filesystem_cache_log; -CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='_storagePolicy', min_bytes_for_wide_part = 10485760, compress_marks=false, compress_primary_key=false; -SYSTEM STOP MERGES test; -INSERT INTO test SELECT number, toString(number) FROM numbers(100000); - -SELECT 2240, '_storagePolicy', * FROM test FORMAT Null; -SYSTEM FLUSH LOGS; -SELECT file_segment_range, read_type FROM system.filesystem_cache_log WHERE query_id = (SELECT query_id from system.query_log where query LIKE '%SELECT 2240%_storagePolicy%' AND current_database = currentDatabase() AND type = 'QueryFinish' ORDER BY event_time desc LIMIT 1) ORDER BY file_segment_range, read_type; - -SELECT 2241, '_storagePolicy', * FROM test FORMAT Null; -SYSTEM FLUSH LOGS; -SELECT file_segment_range, read_type FROM system.filesystem_cache_log WHERE query_id = (SELECT query_id from system.query_log where query LIKE '%SELECT 2241%_storagePolicy%' AND current_database = currentDatabase() AND type = 'QueryFinish' ORDER BY event_time desc LIMIT 1) ORDER BY file_segment_range, read_type; diff --git a/tests/queries/0_stateless/filesystem_cache_queries/02286_drop_filesystem_cache.queries b/tests/queries/0_stateless/filesystem_cache_queries/02286_drop_filesystem_cache.queries deleted file mode 100644 index 96774db32ed..00000000000 --- a/tests/queries/0_stateless/filesystem_cache_queries/02286_drop_filesystem_cache.queries +++ /dev/null @@ -1,71 +0,0 @@ --- { echo } - -SET enable_filesystem_cache_on_write_operations=0; - -DROP TABLE IF EXISTS test; - -CREATE TABLE test (key UInt32, value String) -Engine=MergeTree() -ORDER BY key -SETTINGS storage_policy='_storagePolicy', min_bytes_for_wide_part = 10485760; - -SYSTEM STOP MERGES; -SYSTEM DROP FILESYSTEM CACHE; - -SELECT count() FROM system.filesystem_cache; -INSERT INTO test SELECT number, toString(number) FROM numbers(100); - -SELECT * FROM test FORMAT Null; -SELECT count() FROM system.filesystem_cache; - -SYSTEM DROP FILESYSTEM CACHE; -SELECT count() FROM system.filesystem_cache; - -SELECT * FROM test FORMAT Null; -SELECT count() FROM system.filesystem_cache; - -SYSTEM DROP FILESYSTEM CACHE './data'; -- { serverError 36 } -SELECT count() FROM system.filesystem_cache; - -SELECT * FROM test FORMAT Null; -SELECT count() FROM system.filesystem_cache; -SELECT count() -FROM ( - SELECT - arrayJoin(cache_paths) AS cache_path, - local_path, - remote_path - FROM - system.remote_data_paths - ) AS data_paths -INNER JOIN system.filesystem_cache AS caches -ON data_paths.cache_path = caches.cache_path; - -DROP TABLE test NO DELAY; -SELECT count() FROM system.filesystem_cache; -SELECT cache_path FROM system.filesystem_cache; -SELECT cache_path, local_path -FROM ( - SELECT - arrayJoin(cache_paths) AS cache_path, - local_path, - remote_path - FROM - system.remote_data_paths - ) AS data_paths -INNER JOIN system.filesystem_cache AS caches -ON data_paths.cache_path = caches.cache_path; - -DROP TABLE IF EXISTS test2; - -CREATE TABLE test2 (key UInt32, value String) -Engine=MergeTree() -ORDER BY key -SETTINGS storage_policy='_storagePolicy_2', min_bytes_for_wide_part = 10485760; - -INSERT INTO test2 SELECT number, toString(number) FROM numbers(100); -SELECT * FROM test2 FORMAT Null; -SELECT count() FROM system.filesystem_cache; - -SYSTEM DROP FILESYSTEM CACHE '_storagePolicy_2/'; -SELECT count() FROM system.filesystem_cache; diff --git a/tests/queries/0_stateless/filesystem_cache_queries/02313_filesystem_cache_seeks.queries b/tests/queries/0_stateless/filesystem_cache_queries/02313_filesystem_cache_seeks.queries deleted file mode 100644 index 7f343fb83bd..00000000000 --- a/tests/queries/0_stateless/filesystem_cache_queries/02313_filesystem_cache_seeks.queries +++ /dev/null @@ -1,24 +0,0 @@ -SYSTEM DROP FILESYSTEM CACHE; -SET send_logs_level = 'fatal'; -- Ignore retriable errors like "AWSClient: Failed to make request" - -DROP TABLE IF EXISTS test_02313; -CREATE TABLE test_02313 (id Int32, val String) -ENGINE = MergeTree() -ORDER BY tuple() -SETTINGS storage_policy = '_storagePolicy'; - -INSERT INTO test_02313 - SELECT * FROM - generateRandom('id Int32, val String') - LIMIT 100000 -SETTINGS enable_filesystem_cache_on_write_operations = 0; - -SELECT * FROM test_02313 WHERE val LIKE concat('%', randomPrintableASCII(3), '%') FORMAT Null; -SELECT * FROM test_02313 WHERE val LIKE concat('%', randomPrintableASCII(3), '%') FORMAT Null; -SELECT * FROM test_02313 WHERE val LIKE concat('%', randomPrintableASCII(3), '%') FORMAT Null; -SELECT * FROM test_02313 WHERE val LIKE concat('%', randomPrintableASCII(3), '%') FORMAT Null; -SELECT * FROM test_02313 WHERE val LIKE concat('%', randomPrintableASCII(3), '%') FORMAT Null; -SELECT * FROM test_02313 WHERE val LIKE concat('%', randomPrintableASCII(3), '%') FORMAT Null; -SELECT * FROM test_02313 WHERE val LIKE concat('%', randomPrintableASCII(3), '%') FORMAT Null; - -DROP TABLE test_02313; diff --git a/tests/queries/0_stateless/format_schemas/02482_list_of_structs.capnp b/tests/queries/0_stateless/format_schemas/02482_list_of_structs.capnp new file mode 100644 index 00000000000..b203b5b1bdf --- /dev/null +++ b/tests/queries/0_stateless/format_schemas/02482_list_of_structs.capnp @@ -0,0 +1,11 @@ +@0xb6ecde1cd54a101d; + +struct Nested { + nested @0 :List(MyField); +} + +struct MyField { + x @0 :Int64; + y @1 :Int64; +} + diff --git a/tests/queries/0_stateless/format_schemas/02483_decimals.capnp b/tests/queries/0_stateless/format_schemas/02483_decimals.capnp new file mode 100644 index 00000000000..eff4d488420 --- /dev/null +++ b/tests/queries/0_stateless/format_schemas/02483_decimals.capnp @@ -0,0 +1,7 @@ +@0xb6acde1cd54a101d; + +struct Message { + decimal64 @0 :Int64; + decimal32 @1 :Int32; +} + diff --git a/tests/queries/1_stateful/00047_bar.reference b/tests/queries/1_stateful/00047_bar.reference index c038f59946e..86f7ca3a9b6 100644 --- a/tests/queries/1_stateful/00047_bar.reference +++ b/tests/queries/1_stateful/00047_bar.reference @@ -2,21 +2,21 @@ 732797 475698 ████████████████████████████████████████████████████████████████████████▋ 598875 337212 ███████████████████████████████████████████████████▌ 792887 252197 ██████████████████████████████████████▌ -3807842 196036 █████████████████████████████▊ +3807842 196036 █████████████████████████████▉ 25703952 147211 ██████████████████████▌ -716829 90109 █████████████▋ +716829 90109 █████████████▊ 59183 85379 █████████████ -33010362 77807 ███████████▊ -800784 77492 ███████████▋ +33010362 77807 ███████████▉ +800784 77492 ███████████▊ 20810645 73213 ███████████▏ 25843850 68945 ██████████▌ 23447120 67570 ██████████▎ -14739804 64174 █████████▋ +14739804 64174 █████████▊ 32077710 60456 █████████▏ -22446879 58389 ████████▊ +22446879 58389 ████████▉ 170282 57017 ████████▋ 11482817 52345 ████████ -63469 52142 ███████▊ +63469 52142 ███████▉ 29103473 47758 ███████▎ 10136747 44080 ██████▋ 27528801 43395 ██████▋ @@ -27,12 +27,12 @@ 28600281 32776 █████ 32046685 28788 ████▍ 10130880 26603 ████ -8676831 25733 ███▊ -53230 25595 ███▊ -20271226 25585 ███▊ -17420663 25496 ███▊ -631207 25270 ███▋ -633130 24744 ███▋ +8676831 25733 ███▉ +53230 25595 ███▉ +20271226 25585 ███▉ +17420663 25496 ███▉ +631207 25270 ███▊ +633130 24744 ███▊ 14324015 23349 ███▌ 8537965 21270 ███▎ 11285298 20825 ███▏ @@ -41,9 +41,9 @@ 16368233 19897 ███ 81602 19724 ███ 62896 19717 ███ -12967664 19402 ██▊ -15996597 18557 ██▋ -4379238 18370 ██▋ +12967664 19402 ██▉ +15996597 18557 ██▊ +4379238 18370 ██▊ 90982 17443 ██▋ 18211045 17390 ██▋ 14625884 17302 ██▋ @@ -68,19 +68,19 @@ 125776 13308 ██ 11312316 13181 ██ 32667326 13181 ██ -28628973 12922 █▊ -122804 12520 █▊ -12322758 12352 █▊ -1301819 12283 █▊ -10769545 12183 █▋ -21566939 12170 █▋ -28905364 12158 █▋ -4250765 12049 █▋ -15009727 11818 █▋ -12761932 11733 █▋ -26995888 11658 █▋ -12759346 11514 █▋ -1507911 11452 █▋ +28628973 12922 █▉ +122804 12520 █▉ +12322758 12352 █▉ +1301819 12283 █▉ +10769545 12183 █▊ +21566939 12170 █▊ +28905364 12158 █▊ +4250765 12049 █▊ +15009727 11818 █▊ +12761932 11733 █▊ +26995888 11658 █▊ +12759346 11514 █▊ +1507911 11452 █▊ 968488 11444 █▋ 15736172 11358 █▋ 54310 11193 █▋ @@ -102,21 +102,21 @@ 732797 475698 ████████████████████████████████████████████████████████████████████████▋ 598875 337212 ███████████████████████████████████████████████████▌ 792887 252197 ██████████████████████████████████████▌ -3807842 196036 █████████████████████████████▊ +3807842 196036 █████████████████████████████▉ 25703952 147211 ██████████████████████▌ -716829 90109 █████████████▋ +716829 90109 █████████████▊ 59183 85379 █████████████ -33010362 77807 ███████████▊ -800784 77492 ███████████▋ +33010362 77807 ███████████▉ +800784 77492 ███████████▊ 20810645 73213 ███████████▏ 25843850 68945 ██████████▌ 23447120 67570 ██████████▎ -14739804 64174 █████████▋ +14739804 64174 █████████▊ 32077710 60456 █████████▏ -22446879 58389 ████████▊ +22446879 58389 ████████▉ 170282 57017 ████████▋ 11482817 52345 ████████ -63469 52142 ███████▊ +63469 52142 ███████▉ 29103473 47758 ███████▎ 10136747 44080 ██████▋ 27528801 43395 ██████▋ @@ -127,12 +127,12 @@ 28600281 32776 █████ 32046685 28788 ████▍ 10130880 26603 ████ -8676831 25733 ███▊ -53230 25595 ███▊ -20271226 25585 ███▊ -17420663 25496 ███▊ -631207 25270 ███▋ -633130 24744 ███▋ +8676831 25733 ███▉ +53230 25595 ███▉ +20271226 25585 ███▉ +17420663 25496 ███▉ +631207 25270 ███▊ +633130 24744 ███▊ 14324015 23349 ███▌ 8537965 21270 ███▎ 11285298 20825 ███▏ @@ -141,9 +141,9 @@ 16368233 19897 ███ 81602 19724 ███ 62896 19717 ███ -12967664 19402 ██▊ -15996597 18557 ██▋ -4379238 18370 ██▋ +12967664 19402 ██▉ +15996597 18557 ██▊ +4379238 18370 ██▊ 90982 17443 ██▋ 18211045 17390 ██▋ 14625884 17302 ██▋ @@ -168,19 +168,19 @@ 125776 13308 ██ 11312316 13181 ██ 32667326 13181 ██ -28628973 12922 █▊ -122804 12520 █▊ -12322758 12352 █▊ -1301819 12283 █▊ -10769545 12183 █▋ -21566939 12170 █▋ -28905364 12158 █▋ -4250765 12049 █▋ -15009727 11818 █▋ -12761932 11733 █▋ -26995888 11658 █▋ -12759346 11514 █▋ -1507911 11452 █▋ +28628973 12922 █▉ +122804 12520 █▉ +12322758 12352 █▉ +1301819 12283 █▉ +10769545 12183 █▊ +21566939 12170 █▊ +28905364 12158 █▊ +4250765 12049 █▊ +15009727 11818 █▊ +12761932 11733 █▊ +26995888 11658 █▊ +12759346 11514 █▊ +1507911 11452 █▊ 968488 11444 █▋ 15736172 11358 █▋ 54310 11193 █▋ diff --git a/tests/queries/1_stateful/00062_loyalty.reference b/tests/queries/1_stateful/00062_loyalty.reference index 605e4881dd4..f6451faa815 100644 --- a/tests/queries/1_stateful/00062_loyalty.reference +++ b/tests/queries/1_stateful/00062_loyalty.reference @@ -1,12 +1,12 @@ -10 5604 ███████████████████████████████████████████████████████████████████████████████▎ --9 603 ██████████████████████████████████████████████████████████▊ +-9 603 ██████████████████████████████████████████████████████████▉ -8 236 ██████████████████████████████████████████████████▎ -7 133 █████████████████████████████████████████████ -6 123 ████████████████████████████████████████████▎ --5 105 ██████████████████████████████████████████▊ +-5 105 ██████████████████████████████████████████▉ 5 82 ████████████████████████████████████████▋ 6 91 █████████████████████████████████████████▌ 7 102 ██████████████████████████████████████████▌ 8 156 ██████████████████████████████████████████████▍ 9 222 █████████████████████████████████████████████████▋ -10 4291 ████████████████████████████████████████████████████████████████████████████▊ +10 4291 ████████████████████████████████████████████████████████████████████████████▉ diff --git a/tests/queries/1_stateful/00063_loyalty_joins.reference b/tests/queries/1_stateful/00063_loyalty_joins.reference index e4c3619bf5a..f925b457c6a 100644 --- a/tests/queries/1_stateful/00063_loyalty_joins.reference +++ b/tests/queries/1_stateful/00063_loyalty_joins.reference @@ -37,15 +37,15 @@ 8 74083 9 145771 10 1244506 --10 2932018 ███████████████████████████████████████████████████████████████████████████████▊ +-10 2932018 ███████████████████████████████████████████████████████████████████████████████▉ -9 472052 ██████████████████████████████████████████████████████████████████████ -8 136048 ███████████████████████████████████████████████████████████████▍ -7 73688 ████████████████████████████████████████████████████████████ -6 56766 ██████████████████████████████████████████████████████████▋ -5 55691 ██████████████████████████████████████████████████████████▌ 5 47082 █████████████████████████████████████████████████████████▋ -6 32860 ███████████████████████████████████████████████████████▋ +6 32860 ███████████████████████████████████████████████████████▊ 7 52819 ██████████████████████████████████████████████████████████▎ 8 74083 ████████████████████████████████████████████████████████████▏ -9 145771 ███████████████████████████████████████████████████████████████▋ +9 145771 ███████████████████████████████████████████████████████████████▊ 10 1244506 ███████████████████████████████████████████████████████████████████████████▎ diff --git a/tests/queries/1_stateful/00172_parallel_join.reference.j2 b/tests/queries/1_stateful/00172_hits_joins.reference.j2 similarity index 99% rename from tests/queries/1_stateful/00172_parallel_join.reference.j2 rename to tests/queries/1_stateful/00172_hits_joins.reference.j2 index 1a43f1fb6ef..c357ede4c2c 100644 --- a/tests/queries/1_stateful/00172_parallel_join.reference.j2 +++ b/tests/queries/1_stateful/00172_hits_joins.reference.j2 @@ -1,4 +1,4 @@ -{% for join_algorithm in ['hash', 'parallel_hash', 'full_sorting_merge', 'grace_hash'] -%} +{% for join_algorithm in ['hash', 'parallel_hash', 'full_sorting_merge'] -%} --- {{ join_algorithm }} --- 2014-03-17 1406958 265108 2014-03-19 1405797 261624 diff --git a/tests/queries/1_stateful/00172_parallel_join.sql.j2 b/tests/queries/1_stateful/00172_hits_joins.sql.j2 similarity index 99% rename from tests/queries/1_stateful/00172_parallel_join.sql.j2 rename to tests/queries/1_stateful/00172_hits_joins.sql.j2 index ff077f43874..07ea899f536 100644 --- a/tests/queries/1_stateful/00172_parallel_join.sql.j2 +++ b/tests/queries/1_stateful/00172_hits_joins.sql.j2 @@ -1,4 +1,4 @@ -{% for join_algorithm in ['hash', 'parallel_hash', 'full_sorting_merge', 'grace_hash'] -%} +{% for join_algorithm in ['hash', 'parallel_hash', 'full_sorting_merge'] -%} SET max_bytes_in_join = '{% if join_algorithm == 'grace_hash' %}20K{% else %}0{% endif %}'; diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt index 161c0f60eee..761034ac8dc 100644 --- a/utils/check-style/aspell-ignore/en/aspell-dict.txt +++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt @@ -33,6 +33,7 @@ CustomSeparatedWithNames CustomSeparatedWithNamesAndTypes DBMSs DateTime +DateTimes DockerHub Doxygen Encodings @@ -55,6 +56,7 @@ IPv IntN Integrations JSONAsString +JSONAsObject JSONColumns JSONColumnsWithMetadata JSONCompact @@ -175,6 +177,7 @@ Werror Woboq WriteBuffer WriteBuffers +WithNamesAndTypes XCode YAML YYYY @@ -251,6 +254,7 @@ datafiles dataset datasets datetime +datetimes dbms ddl deallocation @@ -365,6 +369,7 @@ mysqldump mysqljs noop nullable +nullability num obfuscator odbc @@ -434,6 +439,7 @@ rowbinary rowbinarywithnames rowbinarywithnamesandtypes rsync +runnable runningAccumulate runtime russian diff --git a/utils/check-style/check-style b/utils/check-style/check-style index 5e5631e7e58..3db014a4026 100755 --- a/utils/check-style/check-style +++ b/utils/check-style/check-style @@ -292,6 +292,7 @@ std_cerr_cout_excludes=( # IProcessor::dump() src/Processors/IProcessor.cpp src/Client/ClientBase.cpp + src/Client/LineReader.cpp src/Client/QueryFuzzer.cpp src/Client/Suggest.cpp src/Bridge/IBridge.cpp diff --git a/utils/check-style/check-submodules b/utils/check-style/check-submodules new file mode 100755 index 00000000000..815e6c13c0f --- /dev/null +++ b/utils/check-style/check-submodules @@ -0,0 +1,20 @@ +#!/usr/bin/env bash + +# The script checks if all submodules defined in $GIT_ROOT/.gitmodules exist in $GIT_ROOT/contrib + +set -e + +GIT_ROOT=$(git rev-parse --show-cdup) +GIT_ROOT=${GIT_ROOT:-.} + +cd "$GIT_ROOT" + +# Remove keys for submodule.*.path parameters, the values are separated by \0 +# and check if the directory exists +git config --file .gitmodules --null --get-regexp path | sed -z 's|.*\n||' | \ + xargs -P100 -0 --no-run-if-empty -I{} bash -c 'if ! test -d {}; then echo Directory for submodule {} is not found; exit 1; fi' 2>&1 + + +# And check that the submodule is fine +git config --file .gitmodules --null --get-regexp path | sed -z 's|.*\n||' | \ + xargs -P100 -0 --no-run-if-empty -I{} git submodule status -q {} 2>&1 diff --git a/utils/zookeeper-cli/CMakeLists.txt b/utils/zookeeper-cli/CMakeLists.txt index edccb69755e..be8cf81320c 100644 --- a/utils/zookeeper-cli/CMakeLists.txt +++ b/utils/zookeeper-cli/CMakeLists.txt @@ -1,2 +1,4 @@ -clickhouse_add_executable(clickhouse-zookeeper-cli zookeeper-cli.cpp) +clickhouse_add_executable(clickhouse-zookeeper-cli + zookeeper-cli.cpp + ${ClickHouse_SOURCE_DIR}/src/Client/LineReader.cpp) target_link_libraries(clickhouse-zookeeper-cli PRIVATE clickhouse_common_zookeeper_no_log) diff --git a/utils/zookeeper-cli/zookeeper-cli.cpp b/utils/zookeeper-cli/zookeeper-cli.cpp index bfcdb0a90de..fe11c66ea9c 100644 --- a/utils/zookeeper-cli/zookeeper-cli.cpp +++ b/utils/zookeeper-cli/zookeeper-cli.cpp @@ -3,7 +3,7 @@ #include #include #include -#include +#include #include #include @@ -70,7 +70,7 @@ int main(int argc, char ** argv) Poco::Logger::root().setLevel("trace"); zkutil::ZooKeeper zk{zkutil::ZooKeeperArgs(argv[1])}; - LineReader lr({}, false, {"\\"}, {}); + DB::LineReader lr({}, false, {"\\"}, {}); do {