diff --git a/.clang-tidy b/.clang-tidy index 706730c464d..33d7a66d14c 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -1,3 +1,6 @@ +# Enable all checks + disale selected checks. Feel free to remove disabled checks from below list if +# a) the new check is not controversial (this includes many checks in readability-* and google-*) or +# b) too noisy (checks with > 100 new warnings are considered noisy, this includes e.g. cppcoreguidelines-*). Checks: '*, -abseil-*, @@ -9,10 +12,8 @@ Checks: '*, -bugprone-easily-swappable-parameters, -bugprone-exception-escape, -bugprone-implicit-widening-of-multiplication-result, - -bugprone-lambda-function-name, -bugprone-narrowing-conversions, -bugprone-not-null-terminated-result, - -bugprone-unhandled-self-assignment, -cert-dcl16-c, -cert-err58-cpp, @@ -27,7 +28,31 @@ Checks: '*, -clang-analyzer-security.insecureAPI.bzero, -clang-analyzer-security.insecureAPI.strcpy, - -cppcoreguidelines-*, + -cppcoreguidelines-avoid-c-arrays, + -cppcoreguidelines-avoid-goto, + -cppcoreguidelines-avoid-magic-numbers, + -cppcoreguidelines-avoid-non-const-global-variables, + -cppcoreguidelines-explicit-virtual-functions, + -cppcoreguidelines-init-variables, + -cppcoreguidelines-interfaces-global-init, + -cppcoreguidelines-macro-usage, + -cppcoreguidelines-narrowing-conversions, + -cppcoreguidelines-no-malloc, + -cppcoreguidelines-non-private-member-variables-in-classes, + -cppcoreguidelines-owning-memory, + -cppcoreguidelines-prefer-member-initializer, + -cppcoreguidelines-pro-bounds-array-to-pointer-decay, + -cppcoreguidelines-pro-bounds-constant-array-index, + -cppcoreguidelines-pro-bounds-pointer-arithmetic, + -cppcoreguidelines-pro-type-const-cast, + -cppcoreguidelines-pro-type-cstyle-cast, + -cppcoreguidelines-pro-type-member-init, + -cppcoreguidelines-pro-type-reinterpret-cast, + -cppcoreguidelines-pro-type-static-cast-downcast, + -cppcoreguidelines-pro-type-union-access, + -cppcoreguidelines-pro-type-vararg, + -cppcoreguidelines-slicing, + -cppcoreguidelines-special-member-functions, -concurrency-mt-unsafe, @@ -47,7 +72,6 @@ Checks: '*, -hicpp-braces-around-statements, -hicpp-explicit-conversions, -hicpp-function-size, - -hicpp-invalid-access-moved, -hicpp-member-init, -hicpp-move-const-arg, -hicpp-multiway-paths-covered, @@ -60,10 +84,10 @@ Checks: '*, -hicpp-uppercase-literal-suffix, -hicpp-use-auto, -hicpp-use-emplace, - -hicpp-use-noexcept, - -hicpp-use-override, -hicpp-vararg, + -linuxkernel-*, + -llvm-*, -llvmlibc-*, @@ -86,7 +110,6 @@ Checks: '*, -performance-inefficient-string-concatenation, -performance-no-int-to-ptr, - -performance-type-promotion-in-math-fn, -performance-unnecessary-value-param, -portability-simd-intrinsics, @@ -95,6 +118,7 @@ Checks: '*, -readability-else-after-return, -readability-function-cognitive-complexity, -readability-function-size, + -readability-identifier-length, -readability-implicit-bool-conversion, -readability-isolate-declaration, -readability-magic-numbers, diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 731119a9957..fd6e6546824 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -1,3 +1,9 @@ + ### Changelog category (leave one): - New Feature - Improvement diff --git a/.github/workflows/backport_branches.yml b/.github/workflows/backport_branches.yml index 0e490e9b1ab..57474c3d9dd 100644 --- a/.github/workflows/backport_branches.yml +++ b/.github/workflows/backport_branches.yml @@ -131,7 +131,6 @@ jobs: IMAGES_PATH=${{runner.temp}}/images_path REPO_COPY=${{runner.temp}}/build_check/ClickHouse CACHES_PATH=${{runner.temp}}/../ccaches - CHECK_NAME=ClickHouse build check (actions) BUILD_NAME=package_release EOF - name: Download changed images @@ -151,7 +150,7 @@ jobs: sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" - cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 @@ -177,7 +176,6 @@ jobs: IMAGES_PATH=${{runner.temp}}/images_path REPO_COPY=${{runner.temp}}/build_check/ClickHouse CACHES_PATH=${{runner.temp}}/../ccaches - CHECK_NAME=ClickHouse build check (actions) BUILD_NAME=package_aarch64 EOF - name: Download changed images @@ -197,7 +195,7 @@ jobs: sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" - cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 @@ -223,7 +221,6 @@ jobs: IMAGES_PATH=${{runner.temp}}/images_path REPO_COPY=${{runner.temp}}/build_check/ClickHouse CACHES_PATH=${{runner.temp}}/../ccaches - CHECK_NAME=ClickHouse build check (actions) BUILD_NAME=package_asan EOF - name: Download changed images @@ -243,7 +240,7 @@ jobs: sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" - cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 @@ -269,7 +266,6 @@ jobs: IMAGES_PATH=${{runner.temp}}/images_path REPO_COPY=${{runner.temp}}/build_check/ClickHouse CACHES_PATH=${{runner.temp}}/../ccaches - CHECK_NAME=ClickHouse build check (actions) BUILD_NAME=package_tsan EOF - name: Download changed images @@ -289,7 +285,7 @@ jobs: sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" - cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 @@ -315,7 +311,6 @@ jobs: IMAGES_PATH=${{runner.temp}}/images_path REPO_COPY=${{runner.temp}}/build_check/ClickHouse CACHES_PATH=${{runner.temp}}/../ccaches - CHECK_NAME=ClickHouse build check (actions) BUILD_NAME=package_debug EOF - name: Download changed images @@ -335,7 +330,7 @@ jobs: sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" - cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 diff --git a/.github/workflows/docs_release.yml b/.github/workflows/docs_release.yml index b697fb78738..aed691844da 100644 --- a/.github/workflows/docs_release.yml +++ b/.github/workflows/docs_release.yml @@ -7,16 +7,17 @@ env: concurrency: group: master-release cancel-in-progress: true -on: # yamllint disable-line rule:truthy +'on': push: branches: - master paths: - - 'docs/**' - - 'website/**' - - 'benchmark/**' - - 'docker/**' - '.github/**' + - 'benchmark/**' + - 'docker/docs/release/**' + - 'docs/**' + - 'utils/list-versions/version_date.tsv' + - 'website/**' workflow_dispatch: jobs: DockerHubPushAarch64: diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml index 9241c4c6c8c..2af54da5e16 100644 --- a/.github/workflows/master.yml +++ b/.github/workflows/master.yml @@ -199,7 +199,6 @@ jobs: IMAGES_PATH=${{runner.temp}}/images_path REPO_COPY=${{runner.temp}}/build_check/ClickHouse CACHES_PATH=${{runner.temp}}/../ccaches - CHECK_NAME=ClickHouse build check (actions) BUILD_NAME=package_release EOF - name: Download changed images @@ -213,7 +212,7 @@ jobs: - name: Check out repository code uses: actions/checkout@v2 with: - fetch-depth: 0 # otherwise we will have no info about contributors + fetch-depth: 0 # For a proper version and performance artifacts - name: Build run: | git -C "$GITHUB_WORKSPACE" submodule sync --recursive @@ -221,7 +220,7 @@ jobs: sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" - cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 @@ -247,7 +246,6 @@ jobs: IMAGES_PATH=${{runner.temp}}/images_path REPO_COPY=${{runner.temp}}/build_check/ClickHouse CACHES_PATH=${{runner.temp}}/../ccaches - CHECK_NAME=ClickHouse build check (actions) BUILD_NAME=package_aarch64 EOF - name: Download changed images @@ -258,7 +256,7 @@ jobs: - name: Check out repository code uses: actions/checkout@v2 with: - fetch-depth: 0 # otherwise we will have no info about contributors + fetch-depth: 0 # For a proper version and performance artifacts - name: Build run: | git -C "$GITHUB_WORKSPACE" submodule sync --recursive @@ -266,7 +264,7 @@ jobs: sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" - cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts uses: actions/upload-artifact@v2 with: @@ -280,54 +278,6 @@ jobs: # shellcheck disable=SC2046 docker rm -f $(docker ps -a -q) ||: sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" - BuilderPerformance: - needs: DockerHubPush - runs-on: [self-hosted, builder] - steps: - - name: Set envs - run: | - cat >> "$GITHUB_ENV" << 'EOF' - TEMP_PATH=${{runner.temp}}/build_check - IMAGES_PATH=${{runner.temp}}/images_path - REPO_COPY=${{runner.temp}}/build_check/ClickHouse - CACHES_PATH=${{runner.temp}}/../ccaches - CHECK_NAME=ClickHouse build check (actions) - BUILD_NAME=performance - EOF - - name: Download changed images - uses: actions/download-artifact@v2 - with: - name: changed_images - path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - - name: Check out repository code - uses: actions/checkout@v2 - with: - fetch-depth: 0 # is needed for ancestor commit search - - name: Build - run: | - git -C "$GITHUB_WORKSPACE" submodule sync --recursive - git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10 - sudo rm -fr "$TEMP_PATH" - mkdir -p "$TEMP_PATH" - cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" - cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME" - - name: Upload build URLs to artifacts - if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 - with: - name: ${{ env.BUILD_URLS }} - path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json - - name: Cleanup - if: always() - run: | - # shellcheck disable=SC2046 - docker kill $(docker ps -q) ||: - # shellcheck disable=SC2046 - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" BuilderBinRelease: needs: [DockerHubPush] runs-on: [self-hosted, builder] @@ -339,7 +289,6 @@ jobs: IMAGES_PATH=${{runner.temp}}/images_path REPO_COPY=${{runner.temp}}/build_check/ClickHouse CACHES_PATH=${{runner.temp}}/../ccaches - CHECK_NAME=ClickHouse build check (actions) BUILD_NAME=binary_release EOF - name: Download changed images @@ -361,7 +310,7 @@ jobs: sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" - cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 @@ -387,7 +336,6 @@ jobs: # IMAGES_PATH=${{runner.temp}}/images_path # REPO_COPY=${{runner.temp}}/build_check/ClickHouse # CACHES_PATH=${{runner.temp}}/../ccaches - # CHECK_NAME=ClickHouse build check (actions) # BUILD_NAME=binary_gcc # EOF # - name: Download changed images @@ -407,7 +355,7 @@ jobs: # sudo rm -fr "$TEMP_PATH" # mkdir -p "$TEMP_PATH" # cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" - # cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME" + # cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" # - name: Upload build URLs to artifacts # if: ${{ success() || failure() }} # uses: actions/upload-artifact@v2 @@ -433,7 +381,6 @@ jobs: IMAGES_PATH=${{runner.temp}}/images_path REPO_COPY=${{runner.temp}}/build_check/ClickHouse CACHES_PATH=${{runner.temp}}/../ccaches - CHECK_NAME=ClickHouse build check (actions) BUILD_NAME=package_asan EOF - name: Download changed images @@ -453,7 +400,7 @@ jobs: sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" - cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 @@ -479,7 +426,6 @@ jobs: IMAGES_PATH=${{runner.temp}}/images_path REPO_COPY=${{runner.temp}}/build_check/ClickHouse CACHES_PATH=${{runner.temp}}/../ccaches - CHECK_NAME=ClickHouse build check (actions) BUILD_NAME=package_ubsan EOF - name: Download changed images @@ -499,7 +445,7 @@ jobs: sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" - cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 @@ -525,7 +471,6 @@ jobs: IMAGES_PATH=${{runner.temp}}/images_path REPO_COPY=${{runner.temp}}/build_check/ClickHouse CACHES_PATH=${{runner.temp}}/../ccaches - CHECK_NAME=ClickHouse build check (actions) BUILD_NAME=package_tsan EOF - name: Download changed images @@ -545,7 +490,7 @@ jobs: sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" - cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 @@ -571,7 +516,6 @@ jobs: IMAGES_PATH=${{runner.temp}}/images_path REPO_COPY=${{runner.temp}}/build_check/ClickHouse CACHES_PATH=${{runner.temp}}/../ccaches - CHECK_NAME=ClickHouse build check (actions) BUILD_NAME=package_msan EOF - name: Download changed images @@ -591,7 +535,7 @@ jobs: sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" - cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 @@ -617,7 +561,6 @@ jobs: IMAGES_PATH=${{runner.temp}}/images_path REPO_COPY=${{runner.temp}}/build_check/ClickHouse CACHES_PATH=${{runner.temp}}/../ccaches - CHECK_NAME=ClickHouse build check (actions) BUILD_NAME=package_debug EOF - name: Download changed images @@ -637,7 +580,7 @@ jobs: sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" - cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 @@ -666,7 +609,6 @@ jobs: IMAGES_PATH=${{runner.temp}}/images_path REPO_COPY=${{runner.temp}}/build_check/ClickHouse CACHES_PATH=${{runner.temp}}/../ccaches - CHECK_NAME=ClickHouse build check (actions) BUILD_NAME=binary_splitted EOF - name: Download changed images @@ -686,7 +628,7 @@ jobs: sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" - cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 @@ -712,7 +654,6 @@ jobs: IMAGES_PATH=${{runner.temp}}/images_path REPO_COPY=${{runner.temp}}/build_check/ClickHouse CACHES_PATH=${{runner.temp}}/../ccaches - CHECK_NAME=ClickHouse build check (actions) BUILD_NAME=binary_tidy EOF - name: Download changed images @@ -732,7 +673,7 @@ jobs: sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" - cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 @@ -758,7 +699,6 @@ jobs: IMAGES_PATH=${{runner.temp}}/images_path REPO_COPY=${{runner.temp}}/build_check/ClickHouse CACHES_PATH=${{runner.temp}}/../ccaches - CHECK_NAME=ClickHouse build check (actions) BUILD_NAME=binary_darwin EOF - name: Download changed images @@ -780,7 +720,7 @@ jobs: sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" - cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 @@ -806,7 +746,6 @@ jobs: IMAGES_PATH=${{runner.temp}}/images_path REPO_COPY=${{runner.temp}}/build_check/ClickHouse CACHES_PATH=${{runner.temp}}/../ccaches - CHECK_NAME=ClickHouse build check (actions) BUILD_NAME=binary_aarch64 EOF - name: Download changed images @@ -828,7 +767,7 @@ jobs: sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" - cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 @@ -854,7 +793,6 @@ jobs: IMAGES_PATH=${{runner.temp}}/images_path REPO_COPY=${{runner.temp}}/build_check/ClickHouse CACHES_PATH=${{runner.temp}}/../ccaches - CHECK_NAME=ClickHouse build check (actions) BUILD_NAME=binary_freebsd EOF - name: Download changed images @@ -876,7 +814,7 @@ jobs: sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" - cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 @@ -902,7 +840,6 @@ jobs: IMAGES_PATH=${{runner.temp}}/images_path REPO_COPY=${{runner.temp}}/build_check/ClickHouse CACHES_PATH=${{runner.temp}}/../ccaches - CHECK_NAME=ClickHouse build check (actions) BUILD_NAME=binary_darwin_aarch64 EOF - name: Download changed images @@ -924,7 +861,7 @@ jobs: sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" - cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 @@ -950,7 +887,6 @@ jobs: IMAGES_PATH=${{runner.temp}}/images_path REPO_COPY=${{runner.temp}}/build_check/ClickHouse CACHES_PATH=${{runner.temp}}/../ccaches - CHECK_NAME=ClickHouse build check (actions) BUILD_NAME=binary_ppc64le EOF - name: Download changed images @@ -972,7 +908,7 @@ jobs: sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" - cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 @@ -1002,7 +938,7 @@ jobs: - name: Check out repository code uses: actions/checkout@v2 with: - fetch-depth: 0 # otherwise we will have no version info + fetch-depth: 0 # It MUST BE THE SAME for all dependencies and the job itself - name: Check docker clickhouse/clickhouse-server building run: | cd "$GITHUB_WORKSPACE/tests/ci" @@ -2969,7 +2905,7 @@ jobs: #################################### PERFORMANCE TESTS ###################################### ############################################################################################# PerformanceComparison0: - needs: [BuilderPerformance] + needs: [BuilderDebRelease] runs-on: [self-hosted, stress-tester] steps: - name: Set envs @@ -3007,7 +2943,7 @@ jobs: docker rm -f $(docker ps -a -q) ||: sudo rm -fr "$TEMP_PATH" PerformanceComparison1: - needs: [BuilderPerformance] + needs: [BuilderDebRelease] runs-on: [self-hosted, stress-tester] steps: - name: Set envs @@ -3045,7 +2981,7 @@ jobs: docker rm -f $(docker ps -a -q) ||: sudo rm -fr "$TEMP_PATH" PerformanceComparison2: - needs: [BuilderPerformance] + needs: [BuilderDebRelease] runs-on: [self-hosted, stress-tester] steps: - name: Set envs @@ -3083,7 +3019,7 @@ jobs: docker rm -f $(docker ps -a -q) ||: sudo rm -fr "$TEMP_PATH" PerformanceComparison3: - needs: [BuilderPerformance] + needs: [BuilderDebRelease] runs-on: [self-hosted, stress-tester] steps: - name: Set envs diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index d6619a938fa..6482ddebe06 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -260,7 +260,6 @@ jobs: IMAGES_PATH=${{runner.temp}}/images_path REPO_COPY=${{runner.temp}}/build_check/ClickHouse CACHES_PATH=${{runner.temp}}/../ccaches - CHECK_NAME=ClickHouse build check (actions) BUILD_NAME=package_release EOF - name: Download changed images @@ -273,6 +272,8 @@ jobs: sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 + with: + fetch-depth: 0 # for performance artifact - name: Build run: | git -C "$GITHUB_WORKSPACE" submodule sync --recursive @@ -280,7 +281,7 @@ jobs: sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" - cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 @@ -295,54 +296,6 @@ jobs: # shellcheck disable=SC2046 docker rm -f $(docker ps -a -q) ||: sudo rm -fr "$TEMP_PATH" - BuilderPerformance: - needs: [DockerHubPush, FastTest] - runs-on: [self-hosted, builder] - steps: - - name: Set envs - run: | - cat >> "$GITHUB_ENV" << 'EOF' - TEMP_PATH=${{runner.temp}}/build_check - IMAGES_PATH=${{runner.temp}}/images_path - REPO_COPY=${{runner.temp}}/build_check/ClickHouse - CACHES_PATH=${{runner.temp}}/../ccaches - CHECK_NAME=ClickHouse build check (actions) - BUILD_NAME=performance - EOF - - name: Download changed images - uses: actions/download-artifact@v2 - with: - name: changed_images - path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - - name: Check out repository code - uses: actions/checkout@v2 - with: - fetch-depth: 0 # is needed for ancestor commit search - - name: Build - run: | - git -C "$GITHUB_WORKSPACE" submodule sync --recursive - git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10 - sudo rm -fr "$TEMP_PATH" - mkdir -p "$TEMP_PATH" - cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" - cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME" - - name: Upload build URLs to artifacts - if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 - with: - name: ${{ env.BUILD_URLS }} - path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json - - name: Cleanup - if: always() - run: | - # shellcheck disable=SC2046 - docker kill $(docker ps -q) ||: - # shellcheck disable=SC2046 - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" BuilderBinRelease: needs: [DockerHubPush, FastTest] runs-on: [self-hosted, builder] @@ -354,7 +307,6 @@ jobs: IMAGES_PATH=${{runner.temp}}/images_path REPO_COPY=${{runner.temp}}/build_check/ClickHouse CACHES_PATH=${{runner.temp}}/../ccaches - CHECK_NAME=ClickHouse build check (actions) BUILD_NAME=binary_release EOF - name: Download changed images @@ -374,7 +326,7 @@ jobs: sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" - cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 @@ -400,7 +352,6 @@ jobs: # IMAGES_PATH=${{runner.temp}}/images_path # REPO_COPY=${{runner.temp}}/build_check/ClickHouse # CACHES_PATH=${{runner.temp}}/../ccaches - # CHECK_NAME=ClickHouse build check (actions) # BUILD_NAME=binary_gcc # EOF # - name: Download changed images @@ -420,7 +371,7 @@ jobs: # sudo rm -fr "$TEMP_PATH" # mkdir -p "$TEMP_PATH" # cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" - # cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME" + # cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" # - name: Upload build URLs to artifacts # if: ${{ success() || failure() }} # uses: actions/upload-artifact@v2 @@ -446,7 +397,6 @@ jobs: IMAGES_PATH=${{runner.temp}}/images_path REPO_COPY=${{runner.temp}}/build_check/ClickHouse CACHES_PATH=${{runner.temp}}/../ccaches - CHECK_NAME=ClickHouse build check (actions) BUILD_NAME=package_aarch64 EOF - name: Download changed images @@ -459,6 +409,8 @@ jobs: sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code uses: actions/checkout@v2 + with: + fetch-depth: 0 # for performance artifact - name: Build run: | git -C "$GITHUB_WORKSPACE" submodule sync --recursive @@ -466,7 +418,7 @@ jobs: sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" - cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 @@ -492,7 +444,6 @@ jobs: IMAGES_PATH=${{runner.temp}}/images_path REPO_COPY=${{runner.temp}}/build_check/ClickHouse CACHES_PATH=${{runner.temp}}/../ccaches - CHECK_NAME=ClickHouse build check (actions) BUILD_NAME=package_asan EOF - name: Download changed images @@ -512,7 +463,7 @@ jobs: sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" - cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 @@ -538,7 +489,6 @@ jobs: IMAGES_PATH=${{runner.temp}}/images_path REPO_COPY=${{runner.temp}}/build_check/ClickHouse CACHES_PATH=${{runner.temp}}/../ccaches - CHECK_NAME=ClickHouse build check (actions) BUILD_NAME=package_ubsan EOF - name: Download changed images @@ -558,7 +508,7 @@ jobs: sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" - cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 @@ -584,7 +534,6 @@ jobs: IMAGES_PATH=${{runner.temp}}/images_path REPO_COPY=${{runner.temp}}/build_check/ClickHouse CACHES_PATH=${{runner.temp}}/../ccaches - CHECK_NAME=ClickHouse build check (actions) BUILD_NAME=package_tsan EOF - name: Download changed images @@ -604,7 +553,7 @@ jobs: sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" - cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 @@ -630,7 +579,6 @@ jobs: IMAGES_PATH=${{runner.temp}}/images_path REPO_COPY=${{runner.temp}}/build_check/ClickHouse CACHES_PATH=${{runner.temp}}/../ccaches - CHECK_NAME=ClickHouse build check (actions) BUILD_NAME=package_msan EOF - name: Download changed images @@ -650,7 +598,7 @@ jobs: sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" - cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 @@ -676,7 +624,6 @@ jobs: IMAGES_PATH=${{runner.temp}}/images_path REPO_COPY=${{runner.temp}}/build_check/ClickHouse CACHES_PATH=${{runner.temp}}/../ccaches - CHECK_NAME=ClickHouse build check (actions) BUILD_NAME=package_debug EOF - name: Download changed images @@ -696,7 +643,7 @@ jobs: sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" - cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 @@ -725,7 +672,6 @@ jobs: IMAGES_PATH=${{runner.temp}}/images_path REPO_COPY=${{runner.temp}}/build_check/ClickHouse CACHES_PATH=${{runner.temp}}/../ccaches - CHECK_NAME=ClickHouse build check (actions) BUILD_NAME=binary_splitted EOF - name: Download changed images @@ -745,7 +691,7 @@ jobs: sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" - cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 @@ -771,7 +717,6 @@ jobs: IMAGES_PATH=${{runner.temp}}/images_path REPO_COPY=${{runner.temp}}/build_check/ClickHouse CACHES_PATH=${{runner.temp}}/../ccaches - CHECK_NAME=ClickHouse build check (actions) BUILD_NAME=binary_tidy EOF - name: Download changed images @@ -791,7 +736,7 @@ jobs: sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" - cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 @@ -817,7 +762,6 @@ jobs: IMAGES_PATH=${{runner.temp}}/images_path REPO_COPY=${{runner.temp}}/build_check/ClickHouse CACHES_PATH=${{runner.temp}}/../ccaches - CHECK_NAME=ClickHouse build check (actions) BUILD_NAME=binary_darwin EOF - name: Download changed images @@ -837,7 +781,7 @@ jobs: sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" - cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 @@ -863,7 +807,6 @@ jobs: IMAGES_PATH=${{runner.temp}}/images_path REPO_COPY=${{runner.temp}}/build_check/ClickHouse CACHES_PATH=${{runner.temp}}/../ccaches - CHECK_NAME=ClickHouse build check (actions) BUILD_NAME=binary_aarch64 EOF - name: Download changed images @@ -883,7 +826,7 @@ jobs: sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" - cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 @@ -909,7 +852,6 @@ jobs: IMAGES_PATH=${{runner.temp}}/images_path REPO_COPY=${{runner.temp}}/build_check/ClickHouse CACHES_PATH=${{runner.temp}}/../ccaches - CHECK_NAME=ClickHouse build check (actions) BUILD_NAME=binary_freebsd EOF - name: Download changed images @@ -929,7 +871,7 @@ jobs: sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" - cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 @@ -955,7 +897,6 @@ jobs: IMAGES_PATH=${{runner.temp}}/images_path REPO_COPY=${{runner.temp}}/build_check/ClickHouse CACHES_PATH=${{runner.temp}}/../ccaches - CHECK_NAME=ClickHouse build check (actions) BUILD_NAME=binary_darwin_aarch64 EOF - name: Download changed images @@ -975,7 +916,7 @@ jobs: sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" - cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 @@ -1001,7 +942,6 @@ jobs: IMAGES_PATH=${{runner.temp}}/images_path REPO_COPY=${{runner.temp}}/build_check/ClickHouse CACHES_PATH=${{runner.temp}}/../ccaches - CHECK_NAME=ClickHouse build check (actions) BUILD_NAME=binary_ppc64le EOF - name: Download changed images @@ -1021,7 +961,7 @@ jobs: sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" - cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 @@ -1051,7 +991,7 @@ jobs: - name: Check out repository code uses: actions/checkout@v2 with: - fetch-depth: 1 # It MUST BE THE SAME for all dependencies and the job itself + fetch-depth: 0 # It MUST BE THE SAME for all dependencies and the job itself - name: Check docker clickhouse/clickhouse-server building run: | cd "$GITHUB_WORKSPACE/tests/ci" @@ -3180,7 +3120,7 @@ jobs: #################################### PERFORMANCE TESTS ###################################### ############################################################################################# PerformanceComparison0: - needs: [BuilderPerformance] + needs: [BuilderDebRelease] runs-on: [self-hosted, stress-tester] steps: - name: Set envs @@ -3218,7 +3158,7 @@ jobs: docker rm -f $(docker ps -a -q) ||: sudo rm -fr "$TEMP_PATH" PerformanceComparison1: - needs: [BuilderPerformance] + needs: [BuilderDebRelease] runs-on: [self-hosted, stress-tester] steps: - name: Set envs @@ -3256,7 +3196,7 @@ jobs: docker rm -f $(docker ps -a -q) ||: sudo rm -fr "$TEMP_PATH" PerformanceComparison2: - needs: [BuilderPerformance] + needs: [BuilderDebRelease] runs-on: [self-hosted, stress-tester] steps: - name: Set envs @@ -3294,7 +3234,7 @@ jobs: docker rm -f $(docker ps -a -q) ||: sudo rm -fr "$TEMP_PATH" PerformanceComparison3: - needs: [BuilderPerformance] + needs: [BuilderDebRelease] runs-on: [self-hosted, stress-tester] steps: - name: Set envs diff --git a/.github/workflows/release_branches.yml b/.github/workflows/release_branches.yml index 4d409a98c4f..c16a4a6a568 100644 --- a/.github/workflows/release_branches.yml +++ b/.github/workflows/release_branches.yml @@ -122,7 +122,6 @@ jobs: IMAGES_PATH=${{runner.temp}}/images_path REPO_COPY=${{runner.temp}}/build_check/ClickHouse CACHES_PATH=${{runner.temp}}/../ccaches - CHECK_NAME=ClickHouse build check (actions) BUILD_NAME=package_release EOF - name: Download changed images @@ -144,7 +143,7 @@ jobs: sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" - cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 @@ -170,7 +169,6 @@ jobs: IMAGES_PATH=${{runner.temp}}/images_path REPO_COPY=${{runner.temp}}/build_check/ClickHouse CACHES_PATH=${{runner.temp}}/../ccaches - CHECK_NAME=ClickHouse build check (actions) BUILD_NAME=package_aarch64 EOF - name: Download changed images @@ -189,7 +187,7 @@ jobs: sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" - cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts uses: actions/upload-artifact@v2 with: @@ -214,7 +212,6 @@ jobs: IMAGES_PATH=${{runner.temp}}/images_path REPO_COPY=${{runner.temp}}/build_check/ClickHouse CACHES_PATH=${{runner.temp}}/../ccaches - CHECK_NAME=ClickHouse build check (actions) BUILD_NAME=package_asan EOF - name: Download changed images @@ -234,7 +231,7 @@ jobs: sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" - cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 @@ -260,7 +257,6 @@ jobs: IMAGES_PATH=${{runner.temp}}/images_path REPO_COPY=${{runner.temp}}/build_check/ClickHouse CACHES_PATH=${{runner.temp}}/../ccaches - CHECK_NAME=ClickHouse build check (actions) BUILD_NAME=package_ubsan EOF - name: Download changed images @@ -280,7 +276,7 @@ jobs: sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" - cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 @@ -306,7 +302,6 @@ jobs: IMAGES_PATH=${{runner.temp}}/images_path REPO_COPY=${{runner.temp}}/build_check/ClickHouse CACHES_PATH=${{runner.temp}}/../ccaches - CHECK_NAME=ClickHouse build check (actions) BUILD_NAME=package_tsan EOF - name: Download changed images @@ -326,7 +321,7 @@ jobs: sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" - cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 @@ -352,7 +347,6 @@ jobs: IMAGES_PATH=${{runner.temp}}/images_path REPO_COPY=${{runner.temp}}/build_check/ClickHouse CACHES_PATH=${{runner.temp}}/../ccaches - CHECK_NAME=ClickHouse build check (actions) BUILD_NAME=package_msan EOF - name: Download changed images @@ -372,7 +366,7 @@ jobs: sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" - cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 @@ -398,7 +392,6 @@ jobs: IMAGES_PATH=${{runner.temp}}/images_path REPO_COPY=${{runner.temp}}/build_check/ClickHouse CACHES_PATH=${{runner.temp}}/../ccaches - CHECK_NAME=ClickHouse build check (actions) BUILD_NAME=package_debug EOF - name: Download changed images @@ -418,7 +411,7 @@ jobs: sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" - cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 diff --git a/.github/workflows/tags_stable.yml b/.github/workflows/tags_stable.yml index 9c55c619039..0e0eefb4a35 100644 --- a/.github/workflows/tags_stable.yml +++ b/.github/workflows/tags_stable.yml @@ -32,7 +32,7 @@ jobs: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} run: | ./utils/list-versions/list-versions.sh > ./utils/list-versions/version_date.tsv - GID=$(id -d "${UID}") + GID=$(id -g "${UID}") docker run -u "${UID}:${GID}" -e PYTHONUNBUFFERED=1 \ --volume="${GITHUB_WORKSPACE}:/ClickHouse" clickhouse/style-test \ /ClickHouse/utils/changelog/changelog.py -vv --gh-user-or-token="$GITHUB_TOKEN" \ diff --git a/.gitmodules b/.gitmodules index ba97de232e7..0177ebd2367 100644 --- a/.gitmodules +++ b/.gitmodules @@ -268,6 +268,9 @@ [submodule "contrib/eigen"] path = contrib/eigen url = https://github.com/eigen-mirror/eigen +[submodule "contrib/hashidsxx"] + path = contrib/hashidsxx + url = https://github.com/schoentoon/hashidsxx.git [submodule "contrib/nats-io"] path = contrib/nats-io url = https://github.com/ClickHouse/nats.c.git diff --git a/CHANGELOG.md b/CHANGELOG.md index 3c855995d6c..e92f93b54ac 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,11 +1,174 @@ ### Table of Contents +**[ClickHouse release v22.5, 2022-05-19](#225)**
**[ClickHouse release v22.4, 2022-04-20](#224)**
**[ClickHouse release v22.3-lts, 2022-03-17](#223)**
**[ClickHouse release v22.2, 2022-02-17](#222)**
**[ClickHouse release v22.1, 2022-01-18](#221)**
**[Changelog for 2021](https://github.com/ClickHouse/ClickHouse/blob/master/docs/en/whats-new/changelog/2021.md)**
-### ClickHouse release master FIXME as compared to v22.3.3.44-lts +### ClickHouse release 22.5, 2022-05-19 + +#### Upgrade Notes + +* Now, background merges, mutations and `OPTIMIZE` will not increment `SelectedRows` and `SelectedBytes` metrics. They (still) will increment `MergedRows` and `MergedUncompressedBytes` as it was before. This only affects the metric values, and makes them better. This change does not introduce any incompatibility, but you may wonder about the changes of metrics, so we put in this category. [#37040](https://github.com/ClickHouse/ClickHouse/pull/37040) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Updated the BoringSSL module to the official FIPS compliant version. This makes ClickHouse FIPS compliant. [#35914](https://github.com/ClickHouse/ClickHouse/pull/35914) ([Meena-Renganathan](https://github.com/Meena-Renganathan)). The ciphers `aes-192-cfb128` and `aes-256-cfb128` were removed, because they are not included in the FIPS certified version of BoringSSL. +* `max_memory_usage` setting is removed from the default user profile in `users.xml`. This enables flexible memory limits for queries instead of the old rigid limit of 10 GB. +* Disable `log_query_threads` setting by default. It controls the logging of statistics about every thread participating in query execution. After supporting asynchronous reads, the total number of distinct thread ids became too large, and logging into the `query_thread_log` has become too heavy. [#37077](https://github.com/ClickHouse/ClickHouse/pull/37077) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Remove function `groupArraySorted` which has a bug. [#36822](https://github.com/ClickHouse/ClickHouse/pull/36822) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### New Feature + +* Enable memory overcommit by default. [#35921](https://github.com/ClickHouse/ClickHouse/pull/35921) ([Dmitry Novik](https://github.com/novikd)). +* Add support of GROUPING SETS in GROUP BY clause. This implementation supports a parallel processing of grouping sets. [#33631](https://github.com/ClickHouse/ClickHouse/pull/33631) ([Dmitry Novik](https://github.com/novikd)). +* Added `system.certificates` table. [#37142](https://github.com/ClickHouse/ClickHouse/pull/37142) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Adds `h3Line`, `h3Distance` and `h3HexRing` functions. [#37030](https://github.com/ClickHouse/ClickHouse/pull/37030) ([Bharat Nallan](https://github.com/bharatnc)). +* New single binary based diagnostics tool (clickhouse-diagnostics). [#36705](https://github.com/ClickHouse/ClickHouse/pull/36705) ([Dale McDiarmid](https://github.com/gingerwizard)). +* Add output format `Prometheus` [#36051](https://github.com/ClickHouse/ClickHouse/issues/36051). [#36206](https://github.com/ClickHouse/ClickHouse/pull/36206) ([Vladimir C](https://github.com/vdimir)). +* Add `MySQLDump` input format. It reads all data from INSERT queries belonging to one table in dump. If there are more than one table, by default it reads data from the first one. [#36667](https://github.com/ClickHouse/ClickHouse/pull/36667) ([Kruglov Pavel](https://github.com/Avogar)). +* Show the `total_rows` and `total_bytes` fields in `system.tables` for temporary tables. [#36401](https://github.com/ClickHouse/ClickHouse/issues/36401). [#36439](https://github.com/ClickHouse/ClickHouse/pull/36439) ([xiedeyantu](https://github.com/xiedeyantu)). +* Allow to override `parts_to_delay_insert` and `parts_to_throw_insert` with query-level settings. If they are defined, they will override table-level settings. [#36371](https://github.com/ClickHouse/ClickHouse/pull/36371) ([Memo](https://github.com/Joeywzr)). + +#### Experimental Feature + +* Implemented L1, L2, Linf, Cosine distance functions for arrays and L1, L2, Linf norm functions for arrays. + [#37033](https://github.com/ClickHouse/ClickHouse/pull/37033) ([qieqieplus](https://github.com/qieqieplus)). Caveat: the functions will be renamed. +* Improve the `WATCH` query in WindowView: 1. Reduce the latency of providing query results by calling the `fire_condition` signal. 2. Makes the cancel query operation(ctrl-c) faster, by checking `isCancelled()` more frequently. [#37226](https://github.com/ClickHouse/ClickHouse/pull/37226) ([vxider](https://github.com/Vxider)). +* Introspection for remove filesystem cache. [#36802](https://github.com/ClickHouse/ClickHouse/pull/36802) ([Han Shukai](https://github.com/KinderRiven)). +* Added new hash function `wyHash64` for SQL. [#36467](https://github.com/ClickHouse/ClickHouse/pull/36467) ([olevino](https://github.com/olevino)). +* Improvement for replicated databases: Added `SYSTEM SYNC DATABASE REPLICA` query which allows to sync tables metadata inside Replicated database, because currently synchronisation is asynchronous. [#35944](https://github.com/ClickHouse/ClickHouse/pull/35944) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Improvement for remote filesystem cache: Better read from cache. [#37054](https://github.com/ClickHouse/ClickHouse/pull/37054) ([Kseniia Sumarokova](https://github.com/kssenii)). Improve `SYSTEM DROP FILESYSTEM CACHE` query: `` option and `FORCE` option. [#36639](https://github.com/ClickHouse/ClickHouse/pull/36639) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Improvement for semistructured data: Allow to cast columns of type `Object(...)` to `Object(Nullable(...))`. [#36564](https://github.com/ClickHouse/ClickHouse/pull/36564) ([awakeljw](https://github.com/awakeljw)). +* Improvement for parallel replicas: We create a local interpreter if we want to execute query on localhost replica. But for when executing query on multiple replicas we rely on the fact that a connection exists so replicas can talk to coordinator. It is now improved and localhost replica can talk to coordinator directly in the same process. [#36281](https://github.com/ClickHouse/ClickHouse/pull/36281) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). + +#### Performance Improvement + +* Improve performance of `avg`, `sum` aggregate functions if used without GROUP BY expression. [#37257](https://github.com/ClickHouse/ClickHouse/pull/37257) ([Maksim Kita](https://github.com/kitaisreal)). +* Improve performance of unary arithmetic functions (`bitCount`, `bitNot`, `abs`, `intExp2`, `intExp10`, `negate`, `roundAge`, `roundDuration`, `roundToExp2`, `sign`) using dynamic dispatch. [#37289](https://github.com/ClickHouse/ClickHouse/pull/37289) ([Maksim Kita](https://github.com/kitaisreal)). +* Improve performance of ORDER BY, MergeJoin, insertion into MergeTree using JIT compilation of sort columns comparator. [#34469](https://github.com/ClickHouse/ClickHouse/pull/34469) ([Maksim Kita](https://github.com/kitaisreal)). +* Change structure of `system.asynchronous_metric_log`. It will take about 10 times less space. This closes [#36357](https://github.com/ClickHouse/ClickHouse/issues/36357). The field `event_time_microseconds` was removed, because it is useless. [#36360](https://github.com/ClickHouse/ClickHouse/pull/36360) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Load marks for only necessary columns when reading wide parts. [#36879](https://github.com/ClickHouse/ClickHouse/pull/36879) ([Anton Kozlov](https://github.com/tonickkozlov)). +* Improves performance of file descriptor cache by narrowing mutex scopes. [#36682](https://github.com/ClickHouse/ClickHouse/pull/36682) ([Anton Kozlov](https://github.com/tonickkozlov)). +* Improve performance of reading from storage `File` and table functions `file` in case when path has globs and matched directory contains large number of files. [#36647](https://github.com/ClickHouse/ClickHouse/pull/36647) ([Anton Popov](https://github.com/CurtizJ)). +* Apply parallel parsing for input format `HiveText`, which can speed up HiveText parsing by 2x when reading local file. [#36650](https://github.com/ClickHouse/ClickHouse/pull/36650) ([李扬](https://github.com/taiyang-li)). +* The default `HashJoin` is not thread safe for inserting right table's rows and run it in a single thread. When the right table is large, the join process is too slow with low cpu utilization. [#36415](https://github.com/ClickHouse/ClickHouse/pull/36415) ([lgbo](https://github.com/lgbo-ustc)). +* Allow to rewrite `select countDistinct(a) from t` to `select count(1) from (select a from t groupBy a)`. [#35993](https://github.com/ClickHouse/ClickHouse/pull/35993) ([zhanglistar](https://github.com/zhanglistar)). +* Transform OR LIKE chain to multiMatchAny. Will enable once we have more confidence it works. [#34932](https://github.com/ClickHouse/ClickHouse/pull/34932) ([Daniel Kutenin](https://github.com/danlark1)). +* Improve performance of some functions with inlining. [#34544](https://github.com/ClickHouse/ClickHouse/pull/34544) ([Daniel Kutenin](https://github.com/danlark1)). +* Add a branch to avoid unnecessary memcpy in readBig. It improves performance somewhat. [#36095](https://github.com/ClickHouse/ClickHouse/pull/36095) ([jasperzhu](https://github.com/jinjunzh)). +* Implement partial GROUP BY key for optimize_aggregation_in_order. [#35111](https://github.com/ClickHouse/ClickHouse/pull/35111) ([Azat Khuzhin](https://github.com/azat)). + +#### Improvement + +* Show names of erroneous files in case of parsing errors while executing table functions `file`, `s3` and `url`. [#36314](https://github.com/ClickHouse/ClickHouse/pull/36314) ([Anton Popov](https://github.com/CurtizJ)). +* Allowed to increase the number of threads for executing background operations (merges, mutations, moves and fetches) at runtime if they are specified at top level config. [#36425](https://github.com/ClickHouse/ClickHouse/pull/36425) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Now date time conversion functions that generates time before 1970-01-01 00:00:00 with partial hours/minutes timezones will be saturated to zero instead of overflow. This is the continuation of https://github.com/ClickHouse/ClickHouse/pull/29953 which addresses https://github.com/ClickHouse/ClickHouse/pull/29953#discussion_r800550280 . Mark as improvement because it's implementation defined behavior (and very rare case) and we are allowed to break it. [#36656](https://github.com/ClickHouse/ClickHouse/pull/36656) ([Amos Bird](https://github.com/amosbird)). +* Add a warning if someone running clickhouse-server with log level "test". The log level "test" was added recently and cannot be used in production due to inevitable, unavoidable, fatal and life-threatening performance degradation. [#36824](https://github.com/ClickHouse/ClickHouse/pull/36824) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Parse collations in CREATE TABLE, throw exception or ignore. closes [#35892](https://github.com/ClickHouse/ClickHouse/issues/35892). [#36271](https://github.com/ClickHouse/ClickHouse/pull/36271) ([yuuch](https://github.com/yuuch)). +* Option `compatibility_ignore_auto_increment_in_create_table` allows ignoring `AUTO_INCREMENT` keyword in a column declaration to simplify migration from MySQL. [#37178](https://github.com/ClickHouse/ClickHouse/pull/37178) ([Igor Nikonov](https://github.com/devcrafter)). +* Add aliases `JSONLines` and `NDJSON` for `JSONEachRow`. Closes [#36303](https://github.com/ClickHouse/ClickHouse/issues/36303). [#36327](https://github.com/ClickHouse/ClickHouse/pull/36327) ([flynn](https://github.com/ucasfl)). +* Limit the max partitions could be queried for each hive table. Avoid resource overruns. [#37281](https://github.com/ClickHouse/ClickHouse/pull/37281) ([lgbo](https://github.com/lgbo-ustc)). +* Added implicit cast for `h3kRing` function second argument to improve usability. Closes [#35432](https://github.com/ClickHouse/ClickHouse/issues/35432). [#37189](https://github.com/ClickHouse/ClickHouse/pull/37189) ([Maksim Kita](https://github.com/kitaisreal)). +* Fix progress indication for `INSERT SELECT` in `clickhouse-local` for any query and for file progress in client, more correct file progress. [#37075](https://github.com/ClickHouse/ClickHouse/pull/37075) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix bug which can lead to forgotten outdated parts in MergeTree table engines family in case of filesystem failures during parts removal. Before fix they will be removed only after first server restart. [#37014](https://github.com/ClickHouse/ClickHouse/pull/37014) ([alesapin](https://github.com/alesapin)). +* Implemented a new mode of handling row policies which can be enabled in the main configuration which enables users without permissive row policies to read rows. [#36997](https://github.com/ClickHouse/ClickHouse/pull/36997) ([Vitaly Baranov](https://github.com/vitlibar)). +* Play UI: Nullable numbers will be aligned to the right in table cells. This closes [#36982](https://github.com/ClickHouse/ClickHouse/issues/36982). [#36988](https://github.com/ClickHouse/ClickHouse/pull/36988) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Play UI: If there is one row in result and more than a few columns, display the result vertically. Continuation of [#36811](https://github.com/ClickHouse/ClickHouse/issues/36811). [#36842](https://github.com/ClickHouse/ClickHouse/pull/36842) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Cleanup CSS in Play UI. The pixels are more evenly placed. Better usability for long content in table cells. [#36569](https://github.com/ClickHouse/ClickHouse/pull/36569) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Finalize write buffers in case of exception to avoid doing it in destructors. Hope it fixes: [#36907](https://github.com/ClickHouse/ClickHouse/issues/36907). [#36979](https://github.com/ClickHouse/ClickHouse/pull/36979) ([Kruglov Pavel](https://github.com/Avogar)). +* After [#36425](https://github.com/ClickHouse/ClickHouse/issues/36425) settings like `background_fetches_pool_size` became obsolete and can appear in top level config, but clickhouse throws and exception like `Error updating configuration from '/etc/clickhouse-server/config.xml' config.: Code: 137. DB::Exception: A setting 'background_fetches_pool_size' appeared at top level in config /etc/clickhouse-server/config.xml.` This is fixed. [#36917](https://github.com/ClickHouse/ClickHouse/pull/36917) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Add extra diagnostic info (if applicable) when sending exception to other server. [#36872](https://github.com/ClickHouse/ClickHouse/pull/36872) ([tavplubix](https://github.com/tavplubix)). +* Allow to execute hash functions with arguments of type `Array(Tuple(..))`. [#36812](https://github.com/ClickHouse/ClickHouse/pull/36812) ([Anton Popov](https://github.com/CurtizJ)). +* Added `user_defined_path` config setting. [#36753](https://github.com/ClickHouse/ClickHouse/pull/36753) ([Maksim Kita](https://github.com/kitaisreal)). +* Allow cluster macro in `s3Cluster` table function. [#36726](https://github.com/ClickHouse/ClickHouse/pull/36726) ([Vadim Volodin](https://github.com/PolyProgrammist)). +* Properly cancel INSERT queries in `clickhouse-client`/`clickhouse-local`. [#36710](https://github.com/ClickHouse/ClickHouse/pull/36710) ([Azat Khuzhin](https://github.com/azat)). +* Allow to cancel a query while still keeping a decent query id in `MySQLHandler`. [#36699](https://github.com/ClickHouse/ClickHouse/pull/36699) ([Amos Bird](https://github.com/amosbird)). +* Add `is_all_data_sent` column into `system.processes`, and improve internal testing hardening check based on it. [#36649](https://github.com/ClickHouse/ClickHouse/pull/36649) ([Azat Khuzhin](https://github.com/azat)). +* The metrics about time spent reading from s3 now calculated correctly. Close [#35483](https://github.com/ClickHouse/ClickHouse/issues/35483). [#36572](https://github.com/ClickHouse/ClickHouse/pull/36572) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Allow file descriptors in table function file if it is run in clickhouse-local. [#36562](https://github.com/ClickHouse/ClickHouse/pull/36562) ([wuxiaobai24](https://github.com/wuxiaobai24)). +* Allow names of tuple elements that start from digits. [#36544](https://github.com/ClickHouse/ClickHouse/pull/36544) ([Anton Popov](https://github.com/CurtizJ)). +* Now clickhouse-benchmark can read authentication info from environment variables. [#36497](https://github.com/ClickHouse/ClickHouse/pull/36497) ([Anton Kozlov](https://github.com/tonickkozlov)). +* `clickhouse-keeper` improvement: add support for force recovery which allows you to reconfigure cluster without quorum. [#36258](https://github.com/ClickHouse/ClickHouse/pull/36258) ([Antonio Andelic](https://github.com/antonio2368)). +* Improve schema inference for JSON objects. [#36207](https://github.com/ClickHouse/ClickHouse/pull/36207) ([Kruglov Pavel](https://github.com/Avogar)). +* Refactor code around schema inference with globs. Try next file from glob only if it makes sense (previously we tried next file in case of any error). Also it fixes [#36317](https://github.com/ClickHouse/ClickHouse/issues/36317). [#36205](https://github.com/ClickHouse/ClickHouse/pull/36205) ([Kruglov Pavel](https://github.com/Avogar)). +* Add a separate `CLUSTER` grant (and `access_control_improvements.on_cluster_queries_require_cluster_grant` configuration directive, for backward compatibility, default to `false`). [#35767](https://github.com/ClickHouse/ClickHouse/pull/35767) ([Azat Khuzhin](https://github.com/azat)). +* If the required amount of memory is available before the selected query stopped, all waiting queries continue execution. Now we don't stop any query if memory is freed before the moment when the selected query knows about the cancellation. [#35637](https://github.com/ClickHouse/ClickHouse/pull/35637) ([Dmitry Novik](https://github.com/novikd)). +* Nullables detection in protobuf. In proto3, default values are not sent on the wire. This makes it non-trivial to distinguish between null and default values for Nullable columns. A standard way to deal with this problem is to use Google wrappers to nest the target value within an inner message (see https://github.com/protocolbuffers/protobuf/blob/master/src/google/protobuf/wrappers.proto). In this case, a missing field is interpreted as null value, a field with missing value if interpreted as default value, and a field with regular value is interpreted as regular value. However, ClickHouse interprets Google wrappers as nested columns. We propose to introduce special behaviour to detect Google wrappers and interpret them like in the description above. For example, to serialize values for a Nullable column `test`, we would use `google.protobuf.StringValue test` in our .proto schema. Note that these types are so called "well-known types" in Protobuf, implemented in the library itself. [#35149](https://github.com/ClickHouse/ClickHouse/pull/35149) ([Jakub Kuklis](https://github.com/jkuklis)). +* Added support for specifying `content_type` in predefined and static HTTP handler config. [#34916](https://github.com/ClickHouse/ClickHouse/pull/34916) ([Roman Nikonov](https://github.com/nic11)). +* Warn properly if use clickhouse-client --file without preceeding --external. Close [#34747](https://github.com/ClickHouse/ClickHouse/issues/34747). [#34765](https://github.com/ClickHouse/ClickHouse/pull/34765) ([李扬](https://github.com/taiyang-li)). +* Improve MySQL database engine to compatible with binary(0) dataType. [#37232](https://github.com/ClickHouse/ClickHouse/pull/37232) ([zzsmdfj](https://github.com/zzsmdfj)). +* Improve JSON report of clickhouse-benchmark. [#36473](https://github.com/ClickHouse/ClickHouse/pull/36473) ([Tian Xinhui](https://github.com/xinhuitian)). +* Server might refuse to start if it cannot resolve hostname of external ClickHouse dictionary. It's fixed. Fixes [#36451](https://github.com/ClickHouse/ClickHouse/issues/36451). [#36463](https://github.com/ClickHouse/ClickHouse/pull/36463) ([tavplubix](https://github.com/tavplubix)). + +#### Build/Testing/Packaging Improvement + +* Now `clickhouse-keeper` for the `x86_64` architecture is statically linked with [musl](https://musl.libc.org/) and doesn't depend on any system libraries. [#31833](https://github.com/ClickHouse/ClickHouse/pull/31833) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* ClickHouse builds for `PowerPC64LE` architecture are now available in universal installation script `curl https://clickhouse.com/ | sh` and by direct link `https://builds.clickhouse.com/master/powerpc64le/clickhouse`. [#37095](https://github.com/ClickHouse/ClickHouse/pull/37095) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Limit PowerPC code generation to Power8 for better compatibility. This closes [#36025](https://github.com/ClickHouse/ClickHouse/issues/36025). [#36529](https://github.com/ClickHouse/ClickHouse/pull/36529) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Simplify performance test. This will give a chance for us to use it. [#36769](https://github.com/ClickHouse/ClickHouse/pull/36769) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fail performance comparison on errors in the report. [#34797](https://github.com/ClickHouse/ClickHouse/pull/34797) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Add ZSTD support for Arrow. This fixes [#35283](https://github.com/ClickHouse/ClickHouse/issues/35283). [#35486](https://github.com/ClickHouse/ClickHouse/pull/35486) ([Sean Lafferty](https://github.com/seanlaff)). + +#### Bug Fix + +* Extracts Version ID if present from the URI and adds a request to the AWS HTTP URI. Closes [#31221](https://github.com/ClickHouse/ClickHouse/issues/31221). - [x] Extract `Version ID` from URI if present and reassemble without it. - [x] Configure `AWS HTTP URI` object with request. - [x] Unit Tests: [`gtest_s3_uri`](https://github.com/ClickHouse/ClickHouse/blob/2340a6c6849ebc05a8efbf97ba8de3ff9dc0eff4/src/IO/tests/gtest_s3_uri.cpp) - [x] Drop instrumentation commit. [#34571](https://github.com/ClickHouse/ClickHouse/pull/34571) ([Saad Ur Rahman](https://github.com/surahman)). +* Fix system.opentelemetry_span_log attribute.values alias to values instead of keys. [#37275](https://github.com/ClickHouse/ClickHouse/pull/37275) ([Aleksandr Razumov](https://github.com/ernado)). +* Fix Nullable(String) to Nullable(Bool/IPv4/IPv6) conversion Closes [#37221](https://github.com/ClickHouse/ClickHouse/issues/37221). [#37270](https://github.com/ClickHouse/ClickHouse/pull/37270) ([Kruglov Pavel](https://github.com/Avogar)). +* Experimental feature: Fix execution of mutations in tables, in which there exist columns of type `Object`. Using subcolumns of type `Object` in `WHERE` expression of `UPDATE` or `DELETE` queries is now allowed yet, as well as manipulating (`DROP`, `MODIFY`) of separate subcolumns. Fixes [#37205](https://github.com/ClickHouse/ClickHouse/issues/37205). [#37266](https://github.com/ClickHouse/ClickHouse/pull/37266) ([Anton Popov](https://github.com/CurtizJ)). +* Kafka does not need `group.id` on producer stage. In console log you can find Warning that describe this issue: ``` 2022.05.15 17:59:13.270227 [ 137 ] {} StorageKafka (topic-name): [rdk:CONFWARN] [thrd:app]: Configuration property group.id is a consumer property and will be ignored by this producer instance ```. [#37228](https://github.com/ClickHouse/ClickHouse/pull/37228) ([Mark Andreev](https://github.com/mrk-andreev)). +* Experimental feature (WindowView): Update `max_fired_watermark ` after blocks actually fired, in case delete data that hasn't been fired yet. [#37225](https://github.com/ClickHouse/ClickHouse/pull/37225) ([vxider](https://github.com/Vxider)). +* Fix "Cannot create column of type Set" for distributed queries with LIMIT BY. [#37193](https://github.com/ClickHouse/ClickHouse/pull/37193) ([Azat Khuzhin](https://github.com/azat)). +* Experimental feature: Now WindowView `WATCH EVENTS` query will not be terminated due to the nonempty Chunk created in `WindowViewSource.h:58`. [#37182](https://github.com/ClickHouse/ClickHouse/pull/37182) ([vxider](https://github.com/Vxider)). +* Enable `enable_global_with_statement` for subqueries, close [#37141](https://github.com/ClickHouse/ClickHouse/issues/37141). [#37166](https://github.com/ClickHouse/ClickHouse/pull/37166) ([Vladimir C](https://github.com/vdimir)). +* Fix implicit cast for optimize_skip_unused_shards_rewrite_in. [#37153](https://github.com/ClickHouse/ClickHouse/pull/37153) ([Azat Khuzhin](https://github.com/azat)). +* The ILIKE function on FixedString columns could have returned wrong results (i.e. match less than it should). [#37117](https://github.com/ClickHouse/ClickHouse/pull/37117) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix `GROUP BY` `AggregateFunction` (i.e. you `GROUP BY` by the column that has `AggregateFunction` type). [#37093](https://github.com/ClickHouse/ClickHouse/pull/37093) ([Azat Khuzhin](https://github.com/azat)). +* Experimental feature: Fix optimize_aggregation_in_order with prefix GROUP BY and *Array aggregate functions. [#37050](https://github.com/ClickHouse/ClickHouse/pull/37050) ([Azat Khuzhin](https://github.com/azat)). +* Fixed performance degradation of some INSERT SELECT queries with implicit aggregation. Fixes [#36792](https://github.com/ClickHouse/ClickHouse/issues/36792). [#37047](https://github.com/ClickHouse/ClickHouse/pull/37047) ([tavplubix](https://github.com/tavplubix)). +* Experimental feature: Fix in-order `GROUP BY` (`optimize_aggregation_in_order=1`) with `*Array` (`groupArrayArray`/...) aggregate functions. [#37046](https://github.com/ClickHouse/ClickHouse/pull/37046) ([Azat Khuzhin](https://github.com/azat)). +* Fix LowCardinality->ArrowDictionary invalid output when type of indexes is not UInt8. Closes [#36832](https://github.com/ClickHouse/ClickHouse/issues/36832). [#37043](https://github.com/ClickHouse/ClickHouse/pull/37043) ([Kruglov Pavel](https://github.com/Avogar)). +* Fixed problem with infs in `quantileTDigest`. Fixes [#32107](https://github.com/ClickHouse/ClickHouse/issues/32107). [#37021](https://github.com/ClickHouse/ClickHouse/pull/37021) ([Vladimir Chebotarev](https://github.com/excitoon)). +* Fix sending external tables data in HedgedConnections with max_parallel_replicas != 1. [#36981](https://github.com/ClickHouse/ClickHouse/pull/36981) ([Kruglov Pavel](https://github.com/Avogar)). +* Fixed logical error on `TRUNCATE` query in `Replicated` database. Fixes [#33747](https://github.com/ClickHouse/ClickHouse/issues/33747). [#36976](https://github.com/ClickHouse/ClickHouse/pull/36976) ([tavplubix](https://github.com/tavplubix)). +* Experimental feature: Fix stuck when dropping source table in WindowView. Closes [#35678](https://github.com/ClickHouse/ClickHouse/issues/35678). [#36967](https://github.com/ClickHouse/ClickHouse/pull/36967) ([vxider](https://github.com/Vxider)). +* Experimental feature (rocksdb cache): Fix issue: [#36671](https://github.com/ClickHouse/ClickHouse/issues/36671). [#36929](https://github.com/ClickHouse/ClickHouse/pull/36929) ([李扬](https://github.com/taiyang-li)). +* Experimental feature: Fix bugs when using multiple columns in WindowView by adding converting actions to make it possible to call`writeIntoWindowView` with a slightly different schema. [#36928](https://github.com/ClickHouse/ClickHouse/pull/36928) ([vxider](https://github.com/Vxider)). +* Fix bug in clickhouse-keeper which can lead to corrupted compressed log files in case of small load and restarts. [#36910](https://github.com/ClickHouse/ClickHouse/pull/36910) ([alesapin](https://github.com/alesapin)). +* Fix incorrect query result when doing constant aggregation. This fixes [#36728](https://github.com/ClickHouse/ClickHouse/issues/36728) . [#36888](https://github.com/ClickHouse/ClickHouse/pull/36888) ([Amos Bird](https://github.com/amosbird)). +* Experimental feature: Fix `current_size` count in cache. [#36887](https://github.com/ClickHouse/ClickHouse/pull/36887) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Experimental feature: Fix fire in window view with hop window [#34044](https://github.com/ClickHouse/ClickHouse/issues/34044). [#36861](https://github.com/ClickHouse/ClickHouse/pull/36861) ([vxider](https://github.com/Vxider)). +* Experimental feature: Fix incorrect cast in cached buffer from remote fs. [#36809](https://github.com/ClickHouse/ClickHouse/pull/36809) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix creation of tables with `flatten_nested = 0`. Previously unflattened `Nested` columns could be flattened after server restart. [#36803](https://github.com/ClickHouse/ClickHouse/pull/36803) ([Anton Popov](https://github.com/CurtizJ)). +* Fix some issues with async reads from remote filesystem which happened when reading low cardinality. [#36763](https://github.com/ClickHouse/ClickHouse/pull/36763) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Experimental feature: Fix insertion to columns of type `Object` from multiple files, e.g. via table function `file` with globs. [#36762](https://github.com/ClickHouse/ClickHouse/pull/36762) ([Anton Popov](https://github.com/CurtizJ)). +* Fix timeouts in Hedged requests. Connection hang right after sending remote query could lead to eternal waiting. [#36749](https://github.com/ClickHouse/ClickHouse/pull/36749) ([Kruglov Pavel](https://github.com/Avogar)). +* Experimental feature: Fix a bug of `groupBitmapAndState`/`groupBitmapOrState`/`groupBitmapXorState` on distributed table. [#36739](https://github.com/ClickHouse/ClickHouse/pull/36739) ([Zhang Yifan](https://github.com/zhangyifan27)). +* Experimental feature: During the [test](https://s3.amazonaws.com/clickhouse-test-reports/36376/1cb1c7275cb53769ab826772db9b71361bb3e413/stress_test__thread__actions_/clickhouse-server.clean.log) in [PR](https://github.com/ClickHouse/ClickHouse/pull/36376), I found that the one cache class was initialized twice, it throws a exception. Although the cause of this problem is not clear, there should be code logic of repeatedly loading disk in ClickHouse, so we need to make special judgment for this situation. [#36737](https://github.com/ClickHouse/ClickHouse/pull/36737) ([Han Shukai](https://github.com/KinderRiven)). +* Fix vertical merges in wide parts. Previously an exception `There is no column` can be thrown during merge. [#36707](https://github.com/ClickHouse/ClickHouse/pull/36707) ([Anton Popov](https://github.com/CurtizJ)). +* Fix server reload on port change (do not wait for current connections from query context). [#36700](https://github.com/ClickHouse/ClickHouse/pull/36700) ([Azat Khuzhin](https://github.com/azat)). +* Experimental feature: In the previous [PR](https://github.com/ClickHouse/ClickHouse/pull/36376), I found that testing (stateless tests, flaky check (address, actions)) is timeout. Moreover, testing locally can also trigger unstable system deadlocks. This problem still exists when using the latest source code of master. [#36697](https://github.com/ClickHouse/ClickHouse/pull/36697) ([Han Shukai](https://github.com/KinderRiven)). +* Experimental feature: Fix server restart if cache configuration changed. [#36685](https://github.com/ClickHouse/ClickHouse/pull/36685) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix possible heap-use-after-free in schema inference. Closes [#36661](https://github.com/ClickHouse/ClickHouse/issues/36661). [#36679](https://github.com/ClickHouse/ClickHouse/pull/36679) ([Kruglov Pavel](https://github.com/Avogar)). +* Fixed parsing of query settings in `CREATE` query when engine is not specified. Fixes https://github.com/ClickHouse/ClickHouse/pull/34187#issuecomment-1103812419. [#36642](https://github.com/ClickHouse/ClickHouse/pull/36642) ([tavplubix](https://github.com/tavplubix)). +* Experimental feature: Fix merges of wide parts with type `Object`. [#36637](https://github.com/ClickHouse/ClickHouse/pull/36637) ([Anton Popov](https://github.com/CurtizJ)). +* Fix format crash when default expression follow EPHEMERAL not literal. Closes [#36618](https://github.com/ClickHouse/ClickHouse/issues/36618). [#36633](https://github.com/ClickHouse/ClickHouse/pull/36633) ([flynn](https://github.com/ucasfl)). +* Fix `Missing column` exception which could happen while using `INTERPOLATE` with `ENGINE = MergeTree` table. [#36549](https://github.com/ClickHouse/ClickHouse/pull/36549) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Fix potential error with literals in `WHERE` for join queries. Close [#36279](https://github.com/ClickHouse/ClickHouse/issues/36279). [#36542](https://github.com/ClickHouse/ClickHouse/pull/36542) ([Vladimir C](https://github.com/vdimir)). +* Fix offset update ReadBufferFromEncryptedFile, which could cause undefined behaviour. [#36493](https://github.com/ClickHouse/ClickHouse/pull/36493) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix hostname sanity checks for Keeper cluster configuration. Add `keeper_server.host_checks_enabled` config to enable/disable those checks. [#36492](https://github.com/ClickHouse/ClickHouse/pull/36492) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix usage of executable user defined functions in GROUP BY. Before executable user defined functions cannot be used as expressions in GROUP BY. Closes [#36448](https://github.com/ClickHouse/ClickHouse/issues/36448). [#36486](https://github.com/ClickHouse/ClickHouse/pull/36486) ([Maksim Kita](https://github.com/kitaisreal)). +* Fix possible exception with unknown packet from server in client. [#36481](https://github.com/ClickHouse/ClickHouse/pull/36481) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Experimental feature (please never use `system.session_log`, it is going to be removed): Add missing enum values in system.session_log table. Closes [#36474](https://github.com/ClickHouse/ClickHouse/issues/36474). [#36480](https://github.com/ClickHouse/ClickHouse/pull/36480) ([Memo](https://github.com/Joeywzr)). +* Fix bug in s3Cluster schema inference that let to the fact that not all data was read in the select from s3Cluster. The bug appeared in https://github.com/ClickHouse/ClickHouse/pull/35544. [#36434](https://github.com/ClickHouse/ClickHouse/pull/36434) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix nullptr dereference in JOIN and COLUMNS matcher. This fixes [#36416](https://github.com/ClickHouse/ClickHouse/issues/36416). This is for https://github.com/ClickHouse/ClickHouse/pull/36417. [#36430](https://github.com/ClickHouse/ClickHouse/pull/36430) ([Amos Bird](https://github.com/amosbird)). +* Fix dictionary reload for `ClickHouseDictionarySource` if it contains scalar subqueries. [#36390](https://github.com/ClickHouse/ClickHouse/pull/36390) ([lthaooo](https://github.com/lthaooo)). +* Fix assertion in JOIN, close [#36199](https://github.com/ClickHouse/ClickHouse/issues/36199). [#36201](https://github.com/ClickHouse/ClickHouse/pull/36201) ([Vladimir C](https://github.com/vdimir)). +* Queries with aliases inside special operators returned parsing error (was broken in 22.1). Example: `SELECT substring('test' AS t, 1, 1)`. [#36167](https://github.com/ClickHouse/ClickHouse/pull/36167) ([Maksim Kita](https://github.com/kitaisreal)). +* Experimental feature: Fix insertion of complex JSONs with nested arrays to columns of type `Object`. [#36077](https://github.com/ClickHouse/ClickHouse/pull/36077) ([Anton Popov](https://github.com/CurtizJ)). +* Fix ALTER DROP COLUMN of nested column with compact parts (i.e. `ALTER TABLE x DROP COLUMN n`, when there is column `n.d`). [#35797](https://github.com/ClickHouse/ClickHouse/pull/35797) ([Azat Khuzhin](https://github.com/azat)). +* Fix substring function range error length when `offset` and `length` is negative constant and `s` is not constant. [#33861](https://github.com/ClickHouse/ClickHouse/pull/33861) ([RogerYK](https://github.com/RogerYK)). + + +### ClickHouse release 22.4, 2022-04-19 #### Backward Incompatible Change diff --git a/base/base/JSON.cpp b/base/base/JSON.cpp index 745fd821b83..54074f20f16 100644 --- a/base/base/JSON.cpp +++ b/base/base/JSON.cpp @@ -12,7 +12,7 @@ #define JSON_MAX_DEPTH 100 -POCO_IMPLEMENT_EXCEPTION(JSONException, Poco::Exception, "JSONException") // NOLINT(cert-err60-cpp, modernize-use-noexcept) +POCO_IMPLEMENT_EXCEPTION(JSONException, Poco::Exception, "JSONException") // NOLINT(cert-err60-cpp, modernize-use-noexcept, hicpp-use-noexcept) /// Прочитать беззнаковое целое в простом формате из не-0-terminated строки. diff --git a/base/base/ReplxxLineReader.cpp b/base/base/ReplxxLineReader.cpp index 9ea53bb132b..0569567d4f8 100644 --- a/base/base/ReplxxLineReader.cpp +++ b/base/base/ReplxxLineReader.cpp @@ -378,4 +378,4 @@ void ReplxxLineReader::enableBracketedPaste() { bracketed_paste_enabled = true; rx.enable_bracketed_paste(); -}; +} diff --git a/cmake/autogenerated_versions.txt b/cmake/autogenerated_versions.txt index caf6f217f6a..210c927b2fd 100644 --- a/cmake/autogenerated_versions.txt +++ b/cmake/autogenerated_versions.txt @@ -2,11 +2,11 @@ # NOTE: has nothing common with DBMS_TCP_PROTOCOL_VERSION, # only DBMS_TCP_PROTOCOL_VERSION should be incremented on protocol changes. -SET(VERSION_REVISION 54462) +SET(VERSION_REVISION 54463) SET(VERSION_MAJOR 22) -SET(VERSION_MINOR 5) +SET(VERSION_MINOR 6) SET(VERSION_PATCH 1) -SET(VERSION_GITHASH 77a82cc090dd5dba2d995946e82a12a2cadaaff3) -SET(VERSION_DESCRIBE v22.5.1.1-testing) -SET(VERSION_STRING 22.5.1.1) +SET(VERSION_GITHASH df0cb0620985eb5ec59760cc76f7736e5b6209bb) +SET(VERSION_DESCRIBE v22.6.1.1-testing) +SET(VERSION_STRING 22.6.1.1) # end of autochange diff --git a/cmake/ccache.cmake b/cmake/ccache.cmake index 23c2ada513d..f0769f337d0 100644 --- a/cmake/ccache.cmake +++ b/cmake/ccache.cmake @@ -1,56 +1,53 @@ +# Setup integration with ccache to speed up builds, see https://ccache.dev/ + if (CMAKE_CXX_COMPILER_LAUNCHER MATCHES "ccache" OR CMAKE_C_COMPILER_LAUNCHER MATCHES "ccache") - set(COMPILER_MATCHES_CCACHE 1) -else() - set(COMPILER_MATCHES_CCACHE 0) -endif() - -if ((ENABLE_CCACHE OR NOT DEFINED ENABLE_CCACHE) AND NOT COMPILER_MATCHES_CCACHE) - find_program (CCACHE_FOUND ccache) - if (CCACHE_FOUND) - set(ENABLE_CCACHE_BY_DEFAULT 1) - else() - set(ENABLE_CCACHE_BY_DEFAULT 0) - endif() -endif() - -if (NOT CCACHE_FOUND AND NOT DEFINED ENABLE_CCACHE AND NOT COMPILER_MATCHES_CCACHE) - message(WARNING "CCache is not found. We recommend setting it up if you build ClickHouse from source often. " - "Setting it up will significantly reduce compilation time for 2nd and consequent builds") -endif() - -# https://ccache.dev/ -option(ENABLE_CCACHE "Speedup re-compilations using ccache (external tool)" ${ENABLE_CCACHE_BY_DEFAULT}) - -if (NOT ENABLE_CCACHE) + # custom compiler launcher already defined, most likely because cmake was invoked with like "-DCMAKE_CXX_COMPILER_LAUNCHER=ccache" or + # via environment variable --> respect setting and trust that the launcher was specified correctly + message(STATUS "Using custom C compiler launcher: ${CMAKE_C_COMPILER_LAUNCHER}") + message(STATUS "Using custom C++ compiler launcher: ${CMAKE_CXX_COMPILER_LAUNCHER}") return() endif() -if (CCACHE_FOUND AND NOT COMPILER_MATCHES_CCACHE) - execute_process(COMMAND ${CCACHE_FOUND} "-V" OUTPUT_VARIABLE CCACHE_VERSION) - string(REGEX REPLACE "ccache version ([0-9\\.]+).*" "\\1" CCACHE_VERSION ${CCACHE_VERSION}) +option(ENABLE_CCACHE "Speedup re-compilations using ccache (external tool)" ON) - if (CCACHE_VERSION VERSION_GREATER "3.2.0" OR NOT CMAKE_CXX_COMPILER_ID STREQUAL "Clang") - message(STATUS "Using ccache: ${CCACHE_FOUND} (version ${CCACHE_VERSION})") - set(LAUNCHER ${CCACHE_FOUND}) +if (NOT ENABLE_CCACHE) + message(STATUS "Using ccache: no (disabled via configuration)") + return() +endif() - # debian (debhelpers) set SOURCE_DATE_EPOCH environment variable, that is - # filled from the debian/changelog or current time. - # - # - 4.0+ ccache always includes this environment variable into the hash - # of the manifest, which do not allow to use previous cache, - # - 4.2+ ccache ignores SOURCE_DATE_EPOCH for every file w/o __DATE__/__TIME__ - # - # Exclude SOURCE_DATE_EPOCH env for ccache versions between [4.0, 4.2). - if (CCACHE_VERSION VERSION_GREATER_EQUAL "4.0" AND CCACHE_VERSION VERSION_LESS "4.2") - message(STATUS "Ignore SOURCE_DATE_EPOCH for ccache") - set(LAUNCHER env -u SOURCE_DATE_EPOCH ${CCACHE_FOUND}) - endif() +find_program (CCACHE_EXECUTABLE ccache) - set (CMAKE_CXX_COMPILER_LAUNCHER ${LAUNCHER} ${CMAKE_CXX_COMPILER_LAUNCHER}) - set (CMAKE_C_COMPILER_LAUNCHER ${LAUNCHER} ${CMAKE_C_COMPILER_LAUNCHER}) - else () - message(${RECONFIGURE_MESSAGE_LEVEL} "Using ccache: No. Found ${CCACHE_FOUND} (version ${CCACHE_VERSION}) but disabled because of bug: https://bugzilla.samba.org/show_bug.cgi?id=8118") - endif () -elseif (NOT CCACHE_FOUND AND NOT COMPILER_MATCHES_CCACHE) - message (${RECONFIGURE_MESSAGE_LEVEL} "Using ccache: No") -endif () +if (NOT CCACHE_EXECUTABLE) + message(${RECONFIGURE_MESSAGE_LEVEL} "Using ccache: no (Could not find find ccache. To significantly reduce compile times for the 2nd, 3rd, etc. build, it is highly recommended to install ccache. To suppress this message, run cmake with -DENABLE_CCACHE=0)") + return() +endif() + +execute_process(COMMAND ${CCACHE_EXECUTABLE} "-V" OUTPUT_VARIABLE CCACHE_VERSION) +string(REGEX REPLACE "ccache version ([0-9\\.]+).*" "\\1" CCACHE_VERSION ${CCACHE_VERSION}) + +set (CCACHE_MINIMUM_VERSION 3.3) + +if (CCACHE_VERSION VERSION_LESS_EQUAL ${CCACHE_MINIMUM_VERSION}) + message(${RECONFIGURE_MESSAGE_LEVEL} "Using ccache: no (found ${CCACHE_EXECUTABLE} (version ${CCACHE_VERSION}), the minimum required version is ${CCACHE_MINIMUM_VERSION}") + return() +endif() + +message(STATUS "Using ccache: ${CCACHE_EXECUTABLE} (version ${CCACHE_VERSION})") +set(LAUNCHER ${CCACHE_EXECUTABLE}) + +# Work around a well-intended but unfortunate behavior of ccache 4.0 & 4.1 with +# environment variable SOURCE_DATE_EPOCH. This variable provides an alternative +# to source-code embedded timestamps (__DATE__/__TIME__) and therefore helps with +# reproducible builds (*). SOURCE_DATE_EPOCH is set automatically by the +# distribution, e.g. Debian. Ccache 4.0 & 4.1 incorporate SOURCE_DATE_EPOCH into +# the hash calculation regardless they contain timestamps or not. This invalidates +# the cache whenever SOURCE_DATE_EPOCH changes. As a fix, ignore SOURCE_DATE_EPOCH. +# +# (*) https://reproducible-builds.org/specs/source-date-epoch/ +if (CCACHE_VERSION VERSION_GREATER_EQUAL "4.0" AND CCACHE_VERSION VERSION_LESS "4.2") + message(STATUS "Ignore SOURCE_DATE_EPOCH for ccache 4.0 / 4.1") + set(LAUNCHER env -u SOURCE_DATE_EPOCH ${CCACHE_EXECUTABLE}) +endif() + +set (CMAKE_CXX_COMPILER_LAUNCHER ${LAUNCHER} ${CMAKE_CXX_COMPILER_LAUNCHER}) +set (CMAKE_C_COMPILER_LAUNCHER ${LAUNCHER} ${CMAKE_C_COMPILER_LAUNCHER}) diff --git a/cmake/cpu_features.cmake b/cmake/cpu_features.cmake index 37e6c356265..fd4cc51b6f9 100644 --- a/cmake/cpu_features.cmake +++ b/cmake/cpu_features.cmake @@ -32,7 +32,8 @@ elseif (ARCH_AARCH64) set (COMPILER_FLAGS "${COMPILER_FLAGS} -march=armv8-a+crc") elseif (ARCH_PPC64LE) - set (COMPILER_FLAGS "${COMPILER_FLAGS} -maltivec -mcpu=power8 -DNO_WARN_X86_INTRINSICS") + # Note that gcc and clang have support for x86 SSE2 intrinsics when building for PowerPC + set (COMPILER_FLAGS "${COMPILER_FLAGS} -maltivec -mcpu=power8 -D__SSE2__=1 -DNO_WARN_X86_INTRINSICS") elseif (ARCH_AMD64) set (TEST_FLAG "-mssse3") diff --git a/cmake/dbms_glob_sources.cmake b/cmake/dbms_glob_sources.cmake index 0f5c6106b70..01c4a8b16e9 100644 --- a/cmake/dbms_glob_sources.cmake +++ b/cmake/dbms_glob_sources.cmake @@ -1,5 +1,5 @@ macro(add_glob cur_list) - file(GLOB __tmp RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} ${ARGN}) + file(GLOB __tmp CONFIGURE_DEPENDS RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} ${ARGN}) list(APPEND ${cur_list} ${__tmp}) endmacro() diff --git a/cmake/generate_code.cmake b/cmake/generate_code.cmake deleted file mode 100644 index 8eb9da24d1d..00000000000 --- a/cmake/generate_code.cmake +++ /dev/null @@ -1,5 +0,0 @@ -function(generate_code TEMPLATE_FILE) - foreach(NAME IN LISTS ARGN) - configure_file (${TEMPLATE_FILE}.cpp.in ${CMAKE_CURRENT_BINARY_DIR}/generated/${TEMPLATE_FILE}_${NAME}.cpp) - endforeach() -endfunction() diff --git a/cmake/git_status.cmake b/cmake/git_status.cmake index feab4acef1d..c1047c0ccbf 100644 --- a/cmake/git_status.cmake +++ b/cmake/git_status.cmake @@ -1,17 +1,22 @@ # Print the status of the git repository (if git is available). # This is useful for troubleshooting build failure reports + find_package(Git) if (Git_FOUND) + execute_process( COMMAND ${GIT_EXECUTABLE} rev-parse HEAD WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} OUTPUT_VARIABLE GIT_COMMIT_ID OUTPUT_STRIP_TRAILING_WHITESPACE) + message(STATUS "HEAD's commit hash ${GIT_COMMIT_ID}") + execute_process( COMMAND ${GIT_EXECUTABLE} status WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} OUTPUT_STRIP_TRAILING_WHITESPACE) + else() - message(STATUS "The git program could not be found.") + message(STATUS "Git could not be found.") endif() diff --git a/cmake/strip_binary.cmake b/cmake/strip_binary.cmake index 2d6a3888503..be23a4c1c30 100644 --- a/cmake/strip_binary.cmake +++ b/cmake/strip_binary.cmake @@ -27,7 +27,7 @@ macro(clickhouse_strip_binary) ) install(PROGRAMS ${STRIP_DESTINATION_DIR}/bin/${STRIP_TARGET} DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) - install(FILES ${STRIP_DESTINATION_DIR}/lib/debug/bin/${STRIP_TARGET}.debug DESTINATION ${CMAKE_INSTALL_LIBDIR}/debug/${CMAKE_INSTALL_FULL_BINDIR}/${STRIP_TARGET}.debug COMPONENT clickhouse) + install(FILES ${STRIP_DESTINATION_DIR}/lib/debug/bin/${STRIP_TARGET}.debug DESTINATION ${CMAKE_INSTALL_LIBDIR}/debug/${CMAKE_INSTALL_FULL_BINDIR} COMPONENT clickhouse) endmacro() diff --git a/cmake/warnings.cmake b/cmake/warnings.cmake index e79771d2e6f..4b8f83df090 100644 --- a/cmake/warnings.cmake +++ b/cmake/warnings.cmake @@ -9,11 +9,6 @@ set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra") -# Add some warnings that are not available even with -Wall -Wextra -Wpedantic. -# Intended for exploration of new compiler warnings that may be found useful. -# Applies to clang only -option (WEVERYTHING "Enable -Weverything option with some exceptions." ON) - # Control maximum size of stack frames. It can be important if the code is run in fibers with small stack size. # Only in release build because debug has too large stack frames. if ((NOT CMAKE_BUILD_TYPE_UC STREQUAL "DEBUG") AND (NOT SANITIZE) AND (NOT CMAKE_CXX_COMPILER_ID MATCHES "AppleClang")) @@ -21,81 +16,42 @@ if ((NOT CMAKE_BUILD_TYPE_UC STREQUAL "DEBUG") AND (NOT SANITIZE) AND (NOT CMAKE endif () if (COMPILER_CLANG) + # Add some warnings that are not available even with -Wall -Wextra -Wpedantic. + # We want to get everything out of the compiler for code quality. + add_warning(everything) + add_warning(pedantic) no_warning(vla-extension) no_warning(zero-length-array) no_warning(c11-extensions) no_warning(unused-command-line-argument) - - if (WEVERYTHING) - add_warning(everything) - no_warning(c++98-compat-pedantic) - no_warning(c++98-compat) - no_warning(c99-extensions) - no_warning(conversion) - no_warning(ctad-maybe-unsupported) # clang 9+, linux-only - no_warning(deprecated-dynamic-exception-spec) - no_warning(disabled-macro-expansion) - no_warning(documentation-unknown-command) - no_warning(double-promotion) - no_warning(exit-time-destructors) - no_warning(float-equal) - no_warning(global-constructors) - no_warning(missing-prototypes) - no_warning(missing-variable-declarations) - no_warning(nested-anon-types) - no_warning(packed) - no_warning(padded) - no_warning(return-std-move-in-c++11) # clang 7+ - no_warning(shift-sign-overflow) - no_warning(sign-conversion) - no_warning(switch-enum) - no_warning(undefined-func-template) - no_warning(unused-template) - no_warning(vla) - no_warning(weak-template-vtables) - no_warning(weak-vtables) - - # TODO Enable conversion, sign-conversion, double-promotion warnings. - else () - add_warning(comma) - add_warning(conditional-uninitialized) - add_warning(covered-switch-default) - add_warning(deprecated) - add_warning(embedded-directive) - add_warning(empty-init-stmt) # linux-only - add_warning(extra-semi-stmt) # linux-only - add_warning(extra-semi) - add_warning(gnu-case-range) - add_warning(inconsistent-missing-destructor-override) - add_warning(newline-eof) - add_warning(old-style-cast) - add_warning(range-loop-analysis) - add_warning(redundant-parens) - add_warning(reserved-id-macro) - add_warning(shadow-field) - add_warning(shadow-uncaptured-local) - add_warning(shadow) - add_warning(string-plus-int) - add_warning(undef) - add_warning(unreachable-code-return) - add_warning(unreachable-code) - add_warning(unused-exception-parameter) - add_warning(unused-macros) - add_warning(unused-member-function) - add_warning(unneeded-internal-declaration) - add_warning(implicit-int-float-conversion) - add_warning(no-delete-null-pointer-checks) - add_warning(anon-enum-enum-conversion) - add_warning(assign-enum) - add_warning(bitwise-op-parentheses) - add_warning(int-in-bool-context) - add_warning(sometimes-uninitialized) - add_warning(tautological-bitwise-compare) - - # XXX: libstdc++ has some of these for 3way compare - add_warning(zero-as-null-pointer-constant) - endif () + no_warning(c++98-compat-pedantic) + no_warning(c++98-compat) + no_warning(c99-extensions) + no_warning(conversion) + no_warning(ctad-maybe-unsupported) # clang 9+, linux-only + no_warning(deprecated-dynamic-exception-spec) + no_warning(disabled-macro-expansion) + no_warning(documentation-unknown-command) + no_warning(double-promotion) + no_warning(exit-time-destructors) + no_warning(float-equal) + no_warning(global-constructors) + no_warning(missing-prototypes) + no_warning(missing-variable-declarations) + no_warning(nested-anon-types) + no_warning(packed) + no_warning(padded) + no_warning(return-std-move-in-c++11) # clang 7+ + no_warning(shift-sign-overflow) + no_warning(sign-conversion) + no_warning(switch-enum) + no_warning(undefined-func-template) + no_warning(unused-template) + no_warning(vla) + no_warning(weak-template-vtables) + no_warning(weak-vtables) + # TODO Enable conversion, sign-conversion, double-promotion warnings. elseif (COMPILER_GCC) # Add compiler options only to c++ compiler function(add_cxx_compile_options option) diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index 01b0e8de5c9..5daa1e71870 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -141,6 +141,7 @@ add_contrib (libpq-cmake libpq) add_contrib (nuraft-cmake NuRaft) add_contrib (fast_float-cmake fast_float) add_contrib (datasketches-cpp-cmake datasketches-cpp) +add_contrib (hashidsxx-cmake hashidsxx) option(ENABLE_NLP "Enable NLP functions support" ${ENABLE_LIBRARIES}) if (ENABLE_NLP) diff --git a/contrib/eigen-cmake/CMakeLists.txt b/contrib/eigen-cmake/CMakeLists.txt index 6bdf3ab7c35..a37d341109c 100644 --- a/contrib/eigen-cmake/CMakeLists.txt +++ b/contrib/eigen-cmake/CMakeLists.txt @@ -2,22 +2,15 @@ set(EIGEN_LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/eigen") add_library (_eigen INTERFACE) -option (ENABLE_MKL "Build Eigen with Intel MKL" OFF) -if (ENABLE_MKL) - set(MKL_THREADING sequential) - set(MKL_INTERFACE lp64) - find_package(MKL REQUIRED) - if (MKL_FOUND) - message("MKL INCLUDE: ${MKL_INCLUDE}") - message("MKL LIBRARIES: ${MKL_LIBRARIES}") - target_compile_definitions(_eigen INTERFACE EIGEN_USE_MKL_ALL) - target_include_directories(_eigen INTERFACE ${MKL_INCLUDE}) - target_link_libraries(_eigen INTERFACE ${MKL_LIBRARIES}) - endif() -endif() - # Only include MPL2 code from Eigen library target_compile_definitions(_eigen INTERFACE EIGEN_MPL2_ONLY) +# Clang by default mimics gcc 4.2.1 compatibility but Eigen checks __GNUC__ version to enable +# a workaround for bug https://gcc.gnu.org/bugzilla/show_bug.cgi?id=72867 fixed in 6.3 +# So we fake gcc > 6.3 when building with clang +if (COMPILER_CLANG AND ARCH_PPC64LE) + target_compile_options(_eigen INTERFACE -fgnuc-version=6.4) +endif() + target_include_directories (_eigen SYSTEM INTERFACE ${EIGEN_LIBRARY_DIR}) add_library(ch_contrib::eigen ALIAS _eigen) diff --git a/contrib/hashidsxx b/contrib/hashidsxx new file mode 160000 index 00000000000..783f6911ccf --- /dev/null +++ b/contrib/hashidsxx @@ -0,0 +1 @@ +Subproject commit 783f6911ccfdaca83e3cfac084c4aad888a80cee diff --git a/contrib/hashidsxx-cmake/CMakeLists.txt b/contrib/hashidsxx-cmake/CMakeLists.txt new file mode 100644 index 00000000000..17f3888bd94 --- /dev/null +++ b/contrib/hashidsxx-cmake/CMakeLists.txt @@ -0,0 +1,14 @@ +set (LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/hashidsxx") + +set (SRCS + "${LIBRARY_DIR}/hashids.cpp" +) + +set (HDRS + "${LIBRARY_DIR}/hashids.h" +) + +add_library(_hashidsxx ${SRCS} ${HDRS}) +target_include_directories(_hashidsxx SYSTEM PUBLIC "${LIBRARY_DIR}") + +add_library(ch_contrib::hashidsxx ALIAS _hashidsxx) diff --git a/docker/docs/release/Dockerfile b/docker/docs/release/Dockerfile new file mode 100644 index 00000000000..024cf8e6cc6 --- /dev/null +++ b/docker/docs/release/Dockerfile @@ -0,0 +1,45 @@ +# rebuild in #33610 +# docker build -t clickhouse/docs-release . +FROM ubuntu:20.04 + +# ARG for quick switch to a given ubuntu mirror +ARG apt_archive="http://archive.ubuntu.com" +RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list + +ENV LANG=C.UTF-8 + +RUN apt-get update \ + && DEBIAN_FRONTEND=noninteractive apt-get install --yes --no-install-recommends \ + wget \ + bash \ + python \ + curl \ + python3-requests \ + sudo \ + git \ + openssl \ + python3-pip \ + software-properties-common \ + fonts-arphic-ukai \ + fonts-arphic-uming \ + fonts-ipafont-mincho \ + fonts-ipafont-gothic \ + fonts-unfonts-core \ + xvfb \ + ssh-client \ + && apt-get autoremove --yes \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* + +RUN pip3 install --ignore-installed --upgrade setuptools pip virtualenv + +# We create the most popular default 1000:1000 ubuntu user to not have ssh issues when running with UID==1000 +RUN useradd --create-home --uid 1000 --user-group ubuntu \ + && ssh-keyscan -t rsa github.com >> /etc/ssh/ssh_known_hosts + +COPY run.sh / + +ENV REPO_PATH=/repo_path +ENV OUTPUT_PATH=/output_path + +CMD ["/bin/bash", "/run.sh"] diff --git a/docker/docs/release/run.sh b/docker/docs/release/run.sh new file mode 100644 index 00000000000..e5a9f2101aa --- /dev/null +++ b/docker/docs/release/run.sh @@ -0,0 +1,12 @@ +#!/usr/bin/env bash +set -euo pipefail + +cd "$REPO_PATH/docs/tools" +if ! [ -d venv ]; then + mkdir -p venv + virtualenv -p "$(which python3)" venv + source venv/bin/activate + python3 -m pip install --ignore-installed -r requirements.txt +fi +source venv/bin/activate +./release.sh 2>&1 | tee "$OUTPUT_PATH/output.log" diff --git a/docker/images.json b/docker/images.json index bdef55e0efc..181452f17bc 100644 --- a/docker/images.json +++ b/docker/images.json @@ -1,8 +1,4 @@ { - "docker/packager/deb": { - "name": "clickhouse/deb-builder", - "dependent": [] - }, "docker/packager/binary": { "name": "clickhouse/binary-builder", "dependent": [ @@ -150,5 +146,9 @@ "name": "clickhouse/docs-builder", "dependent": [ ] + }, + "docker/docs/release": { + "name": "clickhouse/docs-release", + "dependent": [] } } diff --git a/docker/packager/binary/Dockerfile b/docker/packager/binary/Dockerfile index a57a734e3df..d4e824838c2 100644 --- a/docker/packager/binary/Dockerfile +++ b/docker/packager/binary/Dockerfile @@ -97,12 +97,15 @@ RUN add-apt-repository ppa:ubuntu-toolchain-r/test --yes \ # Architecture of the image when BuildKit/buildx is used ARG TARGETARCH -ARG NFPM_VERSION=2.15.0 +ARG NFPM_VERSION=2.15.1 RUN arch=${TARGETARCH:-amd64} \ && curl -Lo /tmp/nfpm.deb "https://github.com/goreleaser/nfpm/releases/download/v${NFPM_VERSION}/nfpm_${arch}.deb" \ && dpkg -i /tmp/nfpm.deb \ && rm /tmp/nfpm.deb +RUN mkdir /workdir && chmod 777 /workdir +WORKDIR /workdir + COPY build.sh / CMD ["bash", "-c", "/build.sh 2>&1"] diff --git a/docker/packager/binary/build.sh b/docker/packager/binary/build.sh index c893263ef09..b8d11e9c293 100755 --- a/docker/packager/binary/build.sh +++ b/docker/packager/binary/build.sh @@ -1,18 +1,18 @@ #!/usr/bin/env bash +set -x -e exec &> >(ts) -set -x -e cache_status () { ccache --show-config ||: ccache --show-stats ||: } -git config --global --add safe.directory /build +[ -O /build ] || git config --global --add safe.directory /build -mkdir -p build/cmake/toolchain/darwin-x86_64 -tar xJf MacOSX11.0.sdk.tar.xz -C build/cmake/toolchain/darwin-x86_64 --strip-components=1 -ln -sf darwin-x86_64 build/cmake/toolchain/darwin-aarch64 +mkdir -p /build/cmake/toolchain/darwin-x86_64 +tar xJf /MacOSX11.0.sdk.tar.xz -C /build/cmake/toolchain/darwin-x86_64 --strip-components=1 +ln -sf darwin-x86_64 /build/cmake/toolchain/darwin-aarch64 # Uncomment to debug ccache. Don't put ccache log in /output right away, or it # will be confusingly packed into the "performance" package. @@ -20,8 +20,8 @@ ln -sf darwin-x86_64 build/cmake/toolchain/darwin-aarch64 # export CCACHE_DEBUG=1 -mkdir -p build/build_docker -cd build/build_docker +mkdir -p /build/build_docker +cd /build/build_docker rm -f CMakeCache.txt # Read cmake arguments into array (possibly empty) read -ra CMAKE_FLAGS <<< "${CMAKE_FLAGS:-}" @@ -61,10 +61,10 @@ fi if [ "coverity" == "$COMBINED_OUTPUT" ] then - mkdir -p /opt/cov-analysis + mkdir -p /workdir/cov-analysis - wget --post-data "token=$COVERITY_TOKEN&project=ClickHouse%2FClickHouse" -qO- https://scan.coverity.com/download/linux64 | tar xz -C /opt/cov-analysis --strip-components 1 - export PATH=$PATH:/opt/cov-analysis/bin + wget --post-data "token=$COVERITY_TOKEN&project=ClickHouse%2FClickHouse" -qO- https://scan.coverity.com/download/linux64 | tar xz -C /workdir/cov-analysis --strip-components 1 + export PATH=$PATH:/workdir/cov-analysis/bin cov-configure --config ./coverity.config --template --comptype clangcc --compiler "$CC" SCAN_WRAPPER="cov-build --config ./coverity.config --dir cov-int" fi @@ -89,16 +89,36 @@ mv ./src/unit_tests_dbms /output ||: # may not exist for some binary builds find . -name '*.so' -print -exec mv '{}' /output \; find . -name '*.so.*' -print -exec mv '{}' /output \; -# Different files for performance test. -if [ "performance" == "$COMBINED_OUTPUT" ] -then - cp -r ../tests/performance /output - cp -r ../tests/config/top_level_domains /output - cp -r ../docker/test/performance-comparison/config /output ||: - rm /output/unit_tests_dbms ||: - rm /output/clickhouse-odbc-bridge ||: +prepare_combined_output () { + local OUTPUT + OUTPUT="$1" - cp -r ../docker/test/performance-comparison /output/scripts ||: + mkdir -p "$OUTPUT"/config + cp /build/programs/server/config.xml "$OUTPUT"/config + cp /build/programs/server/users.xml "$OUTPUT"/config + cp -r --dereference /build/programs/server/config.d "$OUTPUT"/config +} + +# Different files for performance test. +if [ "$WITH_PERFORMANCE" == 1 ] +then + PERF_OUTPUT=/workdir/performance/output + mkdir -p "$PERF_OUTPUT" + cp -r ../tests/performance "$PERF_OUTPUT" + cp -r ../tests/config/top_level_domains "$PERF_OUTPUT" + cp -r ../docker/test/performance-comparison/config "$PERF_OUTPUT" ||: + for SRC in /output/clickhouse*; do + # Copy all clickhouse* files except packages and bridges + [[ "$SRC" != *.* ]] && [[ "$SRC" != *-bridge ]] && \ + cp -d "$SRC" "$PERF_OUTPUT" + done + if [ -x "$PERF_OUTPUT"/clickhouse-keeper ]; then + # Replace standalone keeper by symlink + ln -sf clickhouse "$PERF_OUTPUT"/clickhouse-keeper + fi + + cp -r ../docker/test/performance-comparison "$PERF_OUTPUT"/scripts ||: + prepare_combined_output "$PERF_OUTPUT" # We have to know the revision that corresponds to this binary build. # It is not the nominal SHA from pull/*/head, but the pull/*/merge, which is @@ -111,22 +131,23 @@ then # for a given nominal SHA, but it is not accessible outside Yandex. # This is why we add this repository snapshot from CI to the performance test # package. - mkdir /output/ch - git -C /output/ch init --bare - git -C /output/ch remote add origin /build - git -C /output/ch fetch --no-tags --depth 50 origin HEAD:pr - git -C /output/ch fetch --no-tags --depth 50 origin master:master - git -C /output/ch reset --soft pr - git -C /output/ch log -5 + mkdir "$PERF_OUTPUT"/ch + git -C "$PERF_OUTPUT"/ch init --bare + git -C "$PERF_OUTPUT"/ch remote add origin /build + git -C "$PERF_OUTPUT"/ch fetch --no-tags --depth 50 origin HEAD:pr + git -C "$PERF_OUTPUT"/ch fetch --no-tags --depth 50 origin master:master + git -C "$PERF_OUTPUT"/ch reset --soft pr + git -C "$PERF_OUTPUT"/ch log -5 + ( + cd "$PERF_OUTPUT"/.. + tar -cv -I pigz -f /output/performance.tgz output + ) fi # May be set for split build or for performance test. if [ "" != "$COMBINED_OUTPUT" ] then - mkdir -p /output/config - cp ../programs/server/config.xml /output/config - cp ../programs/server/users.xml /output/config - cp -r --dereference ../programs/server/config.d /output/config + prepare_combined_output /output tar -cv -I pigz -f "$COMBINED_OUTPUT.tgz" /output rm -r /output/* mv "$COMBINED_OUTPUT.tgz" /output @@ -138,13 +159,6 @@ then mv "coverity-scan.tgz" /output fi -# Also build fuzzers if any sanitizer specified -# if [ -n "$SANITIZER" ] -# then -# # Currently we are in build/build_docker directory -# ../docker/packager/other/fuzzer.sh -# fi - cache_status if [ "${CCACHE_DEBUG:-}" == "1" ] @@ -159,3 +173,5 @@ then # files in place, and will fail because this directory is not writable. tar -cv -I pixz -f /output/ccache.log.txz "$CCACHE_LOGFILE" fi + +ls -l /output diff --git a/docker/packager/deb/Dockerfile b/docker/packager/deb/Dockerfile deleted file mode 100644 index 76a5f1d91c0..00000000000 --- a/docker/packager/deb/Dockerfile +++ /dev/null @@ -1,81 +0,0 @@ -# rebuild in #33610 -# docker build -t clickhouse/deb-builder . -FROM ubuntu:20.04 - -# ARG for quick switch to a given ubuntu mirror -ARG apt_archive="http://archive.ubuntu.com" -RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list - -ENV DEBIAN_FRONTEND=noninteractive LLVM_VERSION=13 - -RUN apt-get update \ - && apt-get install ca-certificates lsb-release wget gnupg apt-transport-https \ - --yes --no-install-recommends --verbose-versions \ - && export LLVM_PUBKEY_HASH="bda960a8da687a275a2078d43c111d66b1c6a893a3275271beedf266c1ff4a0cdecb429c7a5cccf9f486ea7aa43fd27f" \ - && wget -nv -O /tmp/llvm-snapshot.gpg.key https://apt.llvm.org/llvm-snapshot.gpg.key \ - && echo "${LLVM_PUBKEY_HASH} /tmp/llvm-snapshot.gpg.key" | sha384sum -c \ - && apt-key add /tmp/llvm-snapshot.gpg.key \ - && export CODENAME="$(lsb_release --codename --short | tr 'A-Z' 'a-z')" \ - && echo "deb [trusted=yes] http://apt.llvm.org/${CODENAME}/ llvm-toolchain-${CODENAME}-${LLVM_VERSION} main" >> \ - /etc/apt/sources.list - -# initial packages -RUN apt-get update \ - && apt-get install \ - bash \ - fakeroot \ - ccache \ - curl \ - software-properties-common \ - --yes --no-install-recommends - -# Architecture of the image when BuildKit/buildx is used -ARG TARGETARCH - -# Special dpkg-deb (https://github.com/ClickHouse-Extras/dpkg) version which is able -# to compress files using pigz (https://zlib.net/pigz/) instead of gzip. -# Significantly increase deb packaging speed and compatible with old systems -RUN arch=${TARGETARCH:-amd64} \ - && curl -Lo /usr/bin/dpkg-deb https://github.com/ClickHouse-Extras/dpkg/releases/download/1.21.1-clickhouse/dpkg-deb-${arch} - -RUN apt-get update \ - && apt-get install \ - alien \ - clang-${LLVM_VERSION} \ - clang-tidy-${LLVM_VERSION} \ - cmake \ - debhelper \ - devscripts \ - gdb \ - git \ - gperf \ - lld-${LLVM_VERSION} \ - llvm-${LLVM_VERSION} \ - llvm-${LLVM_VERSION}-dev \ - moreutils \ - ninja-build \ - perl \ - pigz \ - pixz \ - pkg-config \ - tzdata \ - --yes --no-install-recommends - -# NOTE: Seems like gcc-11 is too new for ubuntu20 repository -RUN add-apt-repository ppa:ubuntu-toolchain-r/test --yes \ - && apt-get update \ - && apt-get install gcc-11 g++-11 --yes - - -# These symlinks are required: -# /usr/bin/ld.lld: by gcc to find lld compiler -# /usr/bin/aarch64-linux-gnu-obj*: for debug symbols stripping -RUN ln -s /usr/bin/lld-${LLVM_VERSION} /usr/bin/ld.lld \ - && ln -sf /usr/lib/llvm-${LLVM_VERSION}/bin/llvm-objcopy /usr/bin/aarch64-linux-gnu-strip \ - && ln -sf /usr/lib/llvm-${LLVM_VERSION}/bin/llvm-objcopy /usr/bin/aarch64-linux-gnu-objcopy \ - && ln -sf /usr/lib/llvm-${LLVM_VERSION}/bin/llvm-objdump /usr/bin/aarch64-linux-gnu-objdump - - -COPY build.sh / - -CMD ["/bin/bash", "/build.sh"] diff --git a/docker/packager/deb/build.sh b/docker/packager/deb/build.sh deleted file mode 100755 index e1272317c8a..00000000000 --- a/docker/packager/deb/build.sh +++ /dev/null @@ -1,58 +0,0 @@ -#!/usr/bin/env bash - -set -x -e - -# Uncomment to debug ccache. -# export CCACHE_LOGFILE=/build/ccache.log -# export CCACHE_DEBUG=1 - -ccache --show-config ||: -ccache --show-stats ||: -ccache --zero-stats ||: - -read -ra ALIEN_PKGS <<< "${ALIEN_PKGS:-}" -build/release "${ALIEN_PKGS[@]}" | ts '%Y-%m-%d %H:%M:%S' -mv /*.deb /output -mv -- *.changes /output -mv -- *.buildinfo /output -mv /*.rpm /output ||: # if exists -mv /*.tgz /output ||: # if exists - -if [ -n "$BINARY_OUTPUT" ] && { [ "$BINARY_OUTPUT" = "programs" ] || [ "$BINARY_OUTPUT" = "tests" ] ;} -then - echo "Place $BINARY_OUTPUT to output" - mkdir /output/binary ||: # if exists - mv /build/obj-*/programs/clickhouse* /output/binary - - if [ "$BINARY_OUTPUT" = "tests" ] - then - mv /build/obj-*/src/unit_tests_dbms /output/binary - fi -fi - -# Also build fuzzers if any sanitizer specified -# if [ -n "$SANITIZER" ] -# then -# # Script is supposed that we are in build directory. -# mkdir -p build/build_docker -# cd build/build_docker -# # Launching build script -# ../docker/packager/other/fuzzer.sh -# cd -# fi - -ccache --show-config ||: -ccache --show-stats ||: - -if [ "${CCACHE_DEBUG:-}" == "1" ] -then - find /build -name '*.ccache-*' -print0 \ - | tar -c -I pixz -f /output/ccache-debug.txz --null -T - -fi - -if [ -n "$CCACHE_LOGFILE" ] -then - # Compress the log as well, or else the CI will try to compress all log - # files in place, and will fail because this directory is not writable. - tar -cv -I pixz -f /output/ccache.log.txz "$CCACHE_LOGFILE" -fi diff --git a/docker/packager/other/fuzzer.sh b/docker/packager/other/fuzzer.sh deleted file mode 100755 index ac820d9e689..00000000000 --- a/docker/packager/other/fuzzer.sh +++ /dev/null @@ -1,36 +0,0 @@ -#!/usr/bin/env bash - -# This script is responsible for building all fuzzers, and copy them to output directory -# as an archive. -# Script is supposed that we are in build directory. - -set -x -e - -printenv - -# Delete previous cache, because we add a new flags -DENABLE_FUZZING=1 and -DFUZZER=libfuzzer -rm -f CMakeCache.txt -read -ra CMAKE_FLAGS <<< "${CMAKE_FLAGS:-}" -# Hope, that the most part of files will be in cache, so we just link new executables -# Please, add or change flags directly in cmake -cmake --debug-trycompile --verbose=1 -DCMAKE_VERBOSE_MAKEFILE=1 -LA -DCMAKE_C_COMPILER="$CC" -DCMAKE_CXX_COMPILER="$CXX" \ - -DSANITIZE="$SANITIZER" -DENABLE_FUZZING=1 -DFUZZER='libfuzzer' -DENABLE_PROTOBUF=1 "${CMAKE_FLAGS[@]}" .. - -FUZZER_TARGETS=$(find ../src -name '*_fuzzer.cpp' -execdir basename {} .cpp ';' | tr '\n' ' ') - -NUM_JOBS=$(($(nproc || grep -c ^processor /proc/cpuinfo))) - -mkdir -p /output/fuzzers -for FUZZER_TARGET in $FUZZER_TARGETS -do - # shellcheck disable=SC2086 # No quotes because I want it to expand to nothing if empty. - ninja $NINJA_FLAGS $FUZZER_TARGET -j $NUM_JOBS - # Find this binary in build directory and strip it - FUZZER_PATH=$(find ./src -name "$FUZZER_TARGET") - strip --strip-unneeded "$FUZZER_PATH" - mv "$FUZZER_PATH" /output/fuzzers -done - - -tar -zcvf /output/fuzzers.tar.gz /output/fuzzers -rm -rf /output/fuzzers diff --git a/docker/packager/packager b/docker/packager/packager index b7ffdd698a4..578b5a38bfb 100755 --- a/docker/packager/packager +++ b/docker/packager/packager @@ -5,8 +5,10 @@ import os import argparse import logging import sys +from typing import List SCRIPT_PATH = os.path.realpath(__file__) +IMAGE_TYPE = "binary" def check_image_exists_locally(image_name): @@ -38,8 +40,40 @@ def build_image(image_name, filepath): ) +def pre_build(repo_path: str, env_variables: List[str]): + if "WITH_PERFORMANCE=1" in env_variables: + current_branch = subprocess.check_output( + "git branch --show-current", shell=True, encoding="utf-8" + ).strip() + is_shallow = ( + subprocess.check_output( + "git rev-parse --is-shallow-repository", shell=True, encoding="utf-8" + ) + == "true\n" + ) + if is_shallow: + # I've spent quite some time on looking around the problem, and my + # conclusion is: in the current state the easiest way to go is to force + # unshallow repository for performance artifacts. + # To change it we need to rework our performance tests docker image + raise Exception("shallow repository is not suitable for performance builds") + if current_branch != "master": + cmd = ( + f"git -C {repo_path} fetch --no-recurse-submodules " + "--no-tags origin master:master" + ) + logging.info("Getting master branch for performance artifact: ''%s'", cmd) + subprocess.check_call(cmd, shell=True) + + def run_docker_image_with_env( - image_name, output, env_variables, ch_root, ccache_dir, docker_image_version + image_name, + as_root, + output, + env_variables, + ch_root, + ccache_dir, + docker_image_version, ): env_part = " -e ".join(env_variables) if env_part: @@ -50,8 +84,13 @@ def run_docker_image_with_env( else: interactive = "" + if as_root: + user = "0:0" + else: + user = f"{os.geteuid()}:{os.getegid()}" + cmd = ( - f"docker run --network=host --rm --volume={output}:/output " + f"docker run --network=host --user={user} --rm --volume={output}:/output " f"--volume={ch_root}:/build --volume={ccache_dir}:/ccache {env_part} " f"{interactive} {image_name}:{docker_image_version}" ) @@ -75,7 +114,6 @@ def parse_env_variables( compiler, sanitizer, package_type, - image_type, cache, distcc_hosts, split_binary, @@ -153,7 +191,7 @@ def parse_env_variables( cxx = cc.replace("gcc", "g++").replace("clang", "clang++") - if image_type == "deb": + if package_type == "deb": result.append("MAKE_DEB=true") cmake_flags.append("-DENABLE_TESTS=0") cmake_flags.append("-DENABLE_UTILS=0") @@ -165,6 +203,7 @@ def parse_env_variables( cmake_flags.append("-DCMAKE_INSTALL_LOCALSTATEDIR=/var") if is_release_build(build_type, package_type, sanitizer, split_binary): cmake_flags.append("-DINSTALL_STRIPPED_BINARIES=ON") + result.append("WITH_PERFORMANCE=1") if is_cross_arm: cmake_flags.append("-DBUILD_STANDALONE_KEEPER=1") else: @@ -176,10 +215,7 @@ def parse_env_variables( cmake_flags.append(f"-DCMAKE_CXX_COMPILER={cxx}") # Create combined output archive for split build and for performance tests. - if package_type == "performance": - result.append("COMBINED_OUTPUT=performance") - cmake_flags.append("-DENABLE_TESTS=0") - elif package_type == "coverity": + if package_type == "coverity": result.append("COMBINED_OUTPUT=coverity") result.append('COVERITY_TOKEN="$COVERITY_TOKEN"') elif split_binary: @@ -258,27 +294,30 @@ def parse_env_variables( return result +def dir_name(name: str) -> str: + if not os.path.isabs(name): + name = os.path.abspath(os.path.join(os.getcwd(), name)) + return name + + if __name__ == "__main__": logging.basicConfig(level=logging.INFO, format="%(asctime)s %(message)s") parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter, description="ClickHouse building script using prebuilt Docker image", ) - # 'performance' creates a combined .tgz with server - # and configs to be used for performance test. parser.add_argument( "--package-type", - choices=["deb", "binary", "performance", "coverity"], + choices=["deb", "binary", "coverity"], required=True, ) parser.add_argument( "--clickhouse-repo-path", - default=os.path.join( - os.path.dirname(os.path.abspath(__file__)), os.pardir, os.pardir - ), + default=os.path.join(os.path.dirname(SCRIPT_PATH), os.pardir, os.pardir), + type=dir_name, help="ClickHouse git repository", ) - parser.add_argument("--output-dir", required=True) + parser.add_argument("--output-dir", type=dir_name, required=True) parser.add_argument("--build-type", choices=("debug", ""), default="") parser.add_argument( "--compiler", @@ -315,6 +354,7 @@ if __name__ == "__main__": parser.add_argument( "--ccache_dir", default=os.getenv("HOME", "") + "/.ccache", + type=dir_name, help="a directory with ccache", ) parser.add_argument("--distcc-hosts", nargs="+") @@ -330,39 +370,28 @@ if __name__ == "__main__": parser.add_argument( "--docker-image-version", default="latest", help="docker image tag to use" ) + parser.add_argument( + "--as-root", action="store_true", help="if the container should run as root" + ) args = parser.parse_args() - if not os.path.isabs(args.output_dir): - args.output_dir = os.path.abspath(os.path.join(os.getcwd(), args.output_dir)) - image_type = ( - "binary" - if args.package_type in ("performance", "coverity") - else args.package_type - ) - image_name = "clickhouse/binary-builder" + image_name = f"clickhouse/{IMAGE_TYPE}-builder" - if not os.path.isabs(args.clickhouse_repo_path): - ch_root = os.path.abspath(os.path.join(os.getcwd(), args.clickhouse_repo_path)) - else: - ch_root = args.clickhouse_repo_path + ch_root = args.clickhouse_repo_path - if args.additional_pkgs and image_type != "deb": + if args.additional_pkgs and args.package_type != "deb": raise Exception("Can build additional packages only in deb build") - if args.with_binaries != "" and image_type != "deb": + if args.with_binaries != "" and args.package_type != "deb": raise Exception("Can add additional binaries only in deb build") - if args.with_binaries != "" and image_type == "deb": + if args.with_binaries != "" and args.package_type == "deb": logging.info("Should place %s to output", args.with_binaries) - dockerfile = os.path.join(ch_root, "docker/packager", image_type, "Dockerfile") + dockerfile = os.path.join(ch_root, "docker/packager", IMAGE_TYPE, "Dockerfile") image_with_version = image_name + ":" + args.docker_image_version - if ( - image_type != "freebsd" - and not check_image_exists_locally(image_name) - or args.force_build_image - ): + if not check_image_exists_locally(image_name) or args.force_build_image: if not pull_image(image_with_version) or args.force_build_image: build_image(image_with_version, dockerfile) env_prepared = parse_env_variables( @@ -370,7 +399,6 @@ if __name__ == "__main__": args.compiler, args.sanitizer, args.package_type, - image_type, args.cache, args.distcc_hosts, args.split_binary, @@ -383,8 +411,10 @@ if __name__ == "__main__": args.with_binaries, ) + pre_build(args.clickhouse_repo_path, env_prepared) run_docker_image_with_env( image_name, + args.as_root, args.output_dir, env_prepared, ch_root, diff --git a/docker/test/fasttest/run.sh b/docker/test/fasttest/run.sh index 6f651a254c4..3a660d9cf15 100755 --- a/docker/test/fasttest/run.sh +++ b/docker/test/fasttest/run.sh @@ -178,7 +178,7 @@ function clone_submodules contrib/replxx contrib/wyhash contrib/eigen - contrib/nats-io + contrib/hashidsxx ) git submodule sync diff --git a/docker/test/performance-comparison/download.sh b/docker/test/performance-comparison/download.sh index ae9e677713f..463c08c5304 100755 --- a/docker/test/performance-comparison/download.sh +++ b/docker/test/performance-comparison/download.sh @@ -3,6 +3,7 @@ set -ex set -o pipefail trap "exit" INT TERM trap 'kill $(jobs -pr) ||:' EXIT +BUILD_NAME=${BUILD_NAME:-package_release} mkdir db0 ||: mkdir left ||: @@ -26,7 +27,10 @@ function download { # Historically there were various paths for the performance test package. # Test all of them. - declare -a urls_to_try=("https://s3.amazonaws.com/clickhouse-builds/$left_pr/$left_sha/performance/performance.tgz") + declare -a urls_to_try=( + "https://s3.amazonaws.com/clickhouse-builds/$left_pr/$left_sha/$BUILD_NAME/performance.tgz" + "https://s3.amazonaws.com/clickhouse-builds/$left_pr/$left_sha/performance/performance.tgz" + ) for path in "${urls_to_try[@]}" do @@ -41,7 +45,7 @@ function download # download anything, for example in some manual runs. In this case, SHAs are not set. if ! [ "$left_sha" = "$right_sha" ] then - wget -nv -nd -c "$left_path" -O- | tar -C left --strip-components=1 -zxv & + wget -nv -nd -c "$left_path" -O- | tar -C left --no-same-owner --strip-components=1 -zxv & elif [ "$right_sha" != "" ] then mkdir left ||: diff --git a/docker/test/performance-comparison/entrypoint.sh b/docker/test/performance-comparison/entrypoint.sh index cc6e55dac1f..4b9a66d2195 100755 --- a/docker/test/performance-comparison/entrypoint.sh +++ b/docker/test/performance-comparison/entrypoint.sh @@ -5,6 +5,7 @@ CHPC_CHECK_START_TIMESTAMP="$(date +%s)" export CHPC_CHECK_START_TIMESTAMP S3_URL=${S3_URL:="https://clickhouse-builds.s3.amazonaws.com"} +BUILD_NAME=${BUILD_NAME:-package_release} COMMON_BUILD_PREFIX="/clickhouse_build_check" if [[ $S3_URL == *"s3.amazonaws.com"* ]]; then @@ -64,7 +65,12 @@ function find_reference_sha # Historically there were various path for the performance test package, # test all of them. unset found - declare -a urls_to_try=("https://s3.amazonaws.com/clickhouse-builds/0/$REF_SHA/performance/performance.tgz") + declare -a urls_to_try=( + "https://s3.amazonaws.com/clickhouse-builds/0/$REF_SHA/$BUILD_NAME/performance.tgz" + # FIXME: the following link is left there for backward compatibility. + # We should remove it after 2022-11-01 + "https://s3.amazonaws.com/clickhouse-builds/0/$REF_SHA/performance/performance.tgz" + ) for path in "${urls_to_try[@]}" do if curl_with_retry "$path" @@ -88,13 +94,13 @@ chmod 777 workspace output cd workspace # Download the package for the version we are going to test. -if curl_with_retry "$S3_URL/$PR_TO_TEST/$SHA_TO_TEST$COMMON_BUILD_PREFIX/performance/performance.tgz" +if curl_with_retry "$S3_URL/$PR_TO_TEST/$SHA_TO_TEST$COMMON_BUILD_PREFIX/$BUILD_NAME/performance.tgz" then - right_path="$S3_URL/$PR_TO_TEST/$SHA_TO_TEST$COMMON_BUILD_PREFIX/performance/performance.tgz" + right_path="$S3_URL/$PR_TO_TEST/$SHA_TO_TEST$COMMON_BUILD_PREFIX/$BUILD_NAME/performance.tgz" fi mkdir right -wget -nv -nd -c "$right_path" -O- | tar -C right --strip-components=1 -zxv +wget -nv -nd -c "$right_path" -O- | tar -C right --no-same-owner --strip-components=1 -zxv # Find reference revision if not specified explicitly if [ "$REF_SHA" == "" ]; then find_reference_sha; fi @@ -155,7 +161,7 @@ ulimit -c unlimited cat /proc/sys/kernel/core_pattern # Start the main comparison script. -{ \ +{ time ../download.sh "$REF_PR" "$REF_SHA" "$PR_TO_TEST" "$SHA_TO_TEST" && \ time stage=configure "$script_path"/compare.sh ; \ } 2>&1 | ts "$(printf '%%Y-%%m-%%d %%H:%%M:%%S\t')" | tee compare.log @@ -178,4 +184,6 @@ ls -lath report analyze benchmark metrics \ ./*.core.dmp ./*.core -cp compare.log /output +# If the files aren't same, copy it +cmp --silent compare.log /output/compare.log || \ + cp compare.log /output diff --git a/docs/changelogs/v22.1.1.2542-prestable.md b/docs/changelogs/v22.1.1.2542-prestable.md index b552da5cfb8..f6418c5c3b9 100644 --- a/docs/changelogs/v22.1.1.2542-prestable.md +++ b/docs/changelogs/v22.1.1.2542-prestable.md @@ -178,6 +178,19 @@ * Fix segfault in Avro that appears after the second insert into file. [#33566](https://github.com/ClickHouse/ClickHouse/pull/33566) ([Kruglov Pavel](https://github.com/Avogar)). * Fix wrong database for JOIN w/o explicit database in distributed queries (Fixes: [#10471](https://github.com/ClickHouse/ClickHouse/issues/10471)). [#33611](https://github.com/ClickHouse/ClickHouse/pull/33611) ([Azat Khuzhin](https://github.com/azat)). +#### Bug Fix (user-visible misbehaviour in official stable or prestable release): + +* Fix possible crash (or incorrect result) in case of `LowCardinality` arguments of window function. Fixes [#31114](https://github.com/ClickHouse/ClickHouse/issues/31114). [#31888](https://github.com/ClickHouse/ClickHouse/pull/31888) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). + +#### Bug Fix (v21.9.4.35-stable) + +* Fix [#32964](https://github.com/ClickHouse/ClickHouse/issues/32964). [#32965](https://github.com/ClickHouse/ClickHouse/pull/32965) ([save-my-heart](https://github.com/save-my-heart)). + +#### NO CL CATEGORY + +* Fix Regular Expression while key path search. [#33023](https://github.com/ClickHouse/ClickHouse/pull/33023) ([mreddy017](https://github.com/mreddy017)). +* - Allow to split GraphiteMergeTree rollup rules for plain/tagged metrics (optional rule_type field). [#33494](https://github.com/ClickHouse/ClickHouse/pull/33494) ([Michail Safronov](https://github.com/msaf1980)). + #### NO CL ENTRY * NO CL ENTRY: 'Update CHANGELOG.md'. [#32472](https://github.com/ClickHouse/ClickHouse/pull/32472) ([Rich Raposa](https://github.com/rfraposa)). @@ -198,19 +211,6 @@ * NO CL ENTRY: 'Added Superwall to adopters list'. [#33573](https://github.com/ClickHouse/ClickHouse/pull/33573) ([Justin Hilliard](https://github.com/jahilliard)). * NO CL ENTRY: 'Revert "Ignore parse failure of opentelemetry header"'. [#33594](https://github.com/ClickHouse/ClickHouse/pull/33594) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). -#### Bug Fix (user-visible misbehaviour in official stable or prestable release): - -* Fix possible crash (or incorrect result) in case of `LowCardinality` arguments of window function. Fixes [#31114](https://github.com/ClickHouse/ClickHouse/issues/31114). [#31888](https://github.com/ClickHouse/ClickHouse/pull/31888) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). - -#### NO CL CATEGORY - -* Fix Regular Expression while key path search. [#33023](https://github.com/ClickHouse/ClickHouse/pull/33023) ([mreddy017](https://github.com/mreddy017)). -* - Allow to split GraphiteMergeTree rollup rules for plain/tagged metrics (optional rule_type field). [#33494](https://github.com/ClickHouse/ClickHouse/pull/33494) ([Michail Safronov](https://github.com/msaf1980)). - -#### Bug Fix (v21.9.4.35-stable) - -* Fix [#32964](https://github.com/ClickHouse/ClickHouse/issues/32964). [#32965](https://github.com/ClickHouse/ClickHouse/pull/32965) ([save-my-heart](https://github.com/save-my-heart)). - #### New Feature / New Tool * Tool for collecting diagnostics data. [#33175](https://github.com/ClickHouse/ClickHouse/pull/33175) ([Alexander Burmak](https://github.com/Alex-Burmak)). diff --git a/docs/changelogs/v22.2.1.2139-prestable.md b/docs/changelogs/v22.2.1.2139-prestable.md index 187a2a26a66..26ac24a7778 100644 --- a/docs/changelogs/v22.2.1.2139-prestable.md +++ b/docs/changelogs/v22.2.1.2139-prestable.md @@ -38,6 +38,7 @@ #### Improvement * Now ReplicatedMergeTree can recover data when some of its disks are broken. [#13544](https://github.com/ClickHouse/ClickHouse/pull/13544) ([Amos Bird](https://github.com/amosbird)). +* Dynamic reload of server TLS certificates on config reload. Closes [#15764](https://github.com/ClickHouse/ClickHouse/issues/15764). [#15765](https://github.com/ClickHouse/ClickHouse/pull/15765) ([johnskopis](https://github.com/johnskopis)). * Merge [#15765](https://github.com/ClickHouse/ClickHouse/issues/15765) (Dynamic reload of server TLS certificates on config reload) cc @johnskopis. [#31257](https://github.com/ClickHouse/ClickHouse/pull/31257) ([Filatenkov Artur](https://github.com/FArthur-cmd)). * Added `UUID` data type support for functions `hex`, `bin`. [#32170](https://github.com/ClickHouse/ClickHouse/pull/32170) ([Frank Chen](https://github.com/FrankChen021)). * Support `optimize_read_in_order` if prefix of sorting key is already sorted. E.g. if we have sorting key `ORDER BY (a, b)` in table and query with `WHERE a = const ORDER BY b` clauses, now it will be applied reading in order of sorting key instead of full sort. [#32748](https://github.com/ClickHouse/ClickHouse/pull/32748) ([Anton Popov](https://github.com/CurtizJ)). @@ -194,15 +195,16 @@ * Fixed the assertion in case of using `allow_experimental_parallel_reading_from_replicas` with `max_parallel_replicas` equals to 1. This fixes [#34525](https://github.com/ClickHouse/ClickHouse/issues/34525). [#34613](https://github.com/ClickHouse/ClickHouse/pull/34613) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). * - Add Debug workflow to get variables for all actions on demand - Fix lack of pr_info.number for some edge case. [#34644](https://github.com/ClickHouse/ClickHouse/pull/34644) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +#### NO CL CATEGORY + +* Reverting to previous docker images, will take a closer look at failing tests from [#34373](https://github.com/ClickHouse/ClickHouse/issues/34373). [#34413](https://github.com/ClickHouse/ClickHouse/pull/34413) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + #### NO CL ENTRY +* NO CL ENTRY: 'Switch gosu to su-exec'. [#33563](https://github.com/ClickHouse/ClickHouse/pull/33563) ([Anselmo D. Adams](https://github.com/anselmodadams)). * NO CL ENTRY: 'Revert "Additionally check remote_fs_execute_merges_on_single_replica_time_threshold inside ReplicatedMergeTreeQueue"'. [#34201](https://github.com/ClickHouse/ClickHouse/pull/34201) ([Alexey Milovidov](https://github.com/alexey-milovidov)). * NO CL ENTRY: 'Revert "Add func tests run with s3"'. [#34211](https://github.com/ClickHouse/ClickHouse/pull/34211) ([Alexey Milovidov](https://github.com/alexey-milovidov)). * NO CL ENTRY: 'Revert "Add pool to WriteBufferFromS3"'. [#34212](https://github.com/ClickHouse/ClickHouse/pull/34212) ([Alexey Milovidov](https://github.com/alexey-milovidov)). * NO CL ENTRY: 'Add support agreement page and snippets.'. [#34512](https://github.com/ClickHouse/ClickHouse/pull/34512) ([Tom Risse](https://github.com/flickerbox-tom)). * NO CL ENTRY: 'Add Gigasheet to adopters'. [#34589](https://github.com/ClickHouse/ClickHouse/pull/34589) ([Brian Hunter](https://github.com/bjhunter)). -#### NO CL CATEGORY - -* Reverting to previous docker images, will take a closer look at failing tests from [#34373](https://github.com/ClickHouse/ClickHouse/issues/34373). [#34413](https://github.com/ClickHouse/ClickHouse/pull/34413) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). - diff --git a/docs/changelogs/v22.3.2.2-lts.md b/docs/changelogs/v22.3.2.2-lts.md index fc37facc7af..ef45265c7bd 100644 --- a/docs/changelogs/v22.3.2.2-lts.md +++ b/docs/changelogs/v22.3.2.2-lts.md @@ -1,6 +1,2 @@ ### ClickHouse release v22.3.2.2-lts FIXME as compared to v22.3.1.1262-prestable -#### Bug Fix (user-visible misbehaviour in official stable or prestable release) - -* Fix bug in S3 zero-copy replication which can lead to errors like `Found parts with the same min block and with the same max block as the missing part` after concurrent fetch/drop table. [#35348](https://github.com/ClickHouse/ClickHouse/pull/35348) ([alesapin](https://github.com/alesapin)). - diff --git a/docs/changelogs/v22.3.4.20-lts.md b/docs/changelogs/v22.3.4.20-lts.md index e746a8e3e0b..4bb4f1bf0f4 100644 --- a/docs/changelogs/v22.3.4.20-lts.md +++ b/docs/changelogs/v22.3.4.20-lts.md @@ -8,7 +8,6 @@ * Backported in [#36244](https://github.com/ClickHouse/ClickHouse/issues/36244): Fix usage of quota with asynchronous inserts. [#35645](https://github.com/ClickHouse/ClickHouse/pull/35645) ([Anton Popov](https://github.com/CurtizJ)). * Backported in [#36240](https://github.com/ClickHouse/ClickHouse/issues/36240): Fix possible loss of subcolumns in type `Object`. [#35682](https://github.com/ClickHouse/ClickHouse/pull/35682) ([Anton Popov](https://github.com/CurtizJ)). * Backported in [#36242](https://github.com/ClickHouse/ClickHouse/issues/36242): Fix possible `Can't adjust last granule` exception while reading subcolumns of type `Object`. [#35687](https://github.com/ClickHouse/ClickHouse/pull/35687) ([Anton Popov](https://github.com/CurtizJ)). -* Backported in [#35938](https://github.com/ClickHouse/ClickHouse/issues/35938): Avoid processing per-column TTL multiple times. [#35820](https://github.com/ClickHouse/ClickHouse/pull/35820) ([Azat Khuzhin](https://github.com/azat)). * Backported in [#36147](https://github.com/ClickHouse/ClickHouse/issues/36147): Fix reading from `Kafka` tables when `kafka_num_consumers > 1` and `kafka_thread_per_consumer = 0`. Returns parallel & multithreaded reading, accidentally broken in 21.11. Closes [#35153](https://github.com/ClickHouse/ClickHouse/issues/35153). [#35973](https://github.com/ClickHouse/ClickHouse/pull/35973) ([filimonov](https://github.com/filimonov)). * Backported in [#36276](https://github.com/ClickHouse/ClickHouse/issues/36276): Fix reading of empty arrays in reverse order (in queries with descending sorting by prefix of primary key). [#36215](https://github.com/ClickHouse/ClickHouse/pull/36215) ([Anton Popov](https://github.com/CurtizJ)). diff --git a/docs/changelogs/v22.3.6.5-lts.md b/docs/changelogs/v22.3.6.5-lts.md index 70a81bbe9ad..16cf390c703 100644 --- a/docs/changelogs/v22.3.6.5-lts.md +++ b/docs/changelogs/v22.3.6.5-lts.md @@ -2,6 +2,5 @@ #### Bug Fix (user-visible misbehaviour in official stable or prestable release) -* Backported in [#36525](https://github.com/ClickHouse/ClickHouse/issues/36525): Queries with aliases inside special operators returned parsing error (was broken in 22.1). Example: `SELECT substring('test' AS t, 1, 1)`. [#36167](https://github.com/ClickHouse/ClickHouse/pull/36167) ([Maksim Kita](https://github.com/kitaisreal)). * Backported in [#36795](https://github.com/ClickHouse/ClickHouse/issues/36795): Fix vertical merges in wide parts. Previously an exception `There is no column` can be thrown during merge. [#36707](https://github.com/ClickHouse/ClickHouse/pull/36707) ([Anton Popov](https://github.com/CurtizJ)). diff --git a/docs/changelogs/v22.4.1.2305-prestable.md b/docs/changelogs/v22.4.1.2305-prestable.md index ffd11c7d9c4..eb1ed6decd3 100644 --- a/docs/changelogs/v22.4.1.2305-prestable.md +++ b/docs/changelogs/v22.4.1.2305-prestable.md @@ -150,6 +150,10 @@ * Check a number of required reports in BuilderSpecialReport. [#36413](https://github.com/ClickHouse/ClickHouse/pull/36413) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). * Add a labeling for `Revert` PRs. [#36422](https://github.com/ClickHouse/ClickHouse/pull/36422) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +#### Bug Fix (prestable release) + +* call RemoteQueryExecutor with original_query instead of an rewritten query, elimate the AMBIGUOUS_COLUMN_NAME exception. [#35748](https://github.com/ClickHouse/ClickHouse/pull/35748) ([lgbo](https://github.com/lgbo-ustc)). + #### Bug Fix (user-visible misbehaviour in official stable or prestable release) * Disallow ALTER TTL for engines that does not support it, to avoid breaking ATTACH TABLE (closes [#33344](https://github.com/ClickHouse/ClickHouse/issues/33344)). [#33391](https://github.com/ClickHouse/ClickHouse/pull/33391) ([zhongyuankai](https://github.com/zhongyuankai)). @@ -158,7 +162,6 @@ * Fix mutations in tables with enabled sparse columns. [#35284](https://github.com/ClickHouse/ClickHouse/pull/35284) ([Anton Popov](https://github.com/CurtizJ)). * Fix schema inference for TSKV format while using small max_read_buffer_size. [#35332](https://github.com/ClickHouse/ClickHouse/pull/35332) ([Kruglov Pavel](https://github.com/Avogar)). * Fix partition pruning in case of comparison with constant in `WHERE`. If column and constant had different types, overflow was possible. Query could return an incorrect empty result. This fixes [#35304](https://github.com/ClickHouse/ClickHouse/issues/35304). [#35334](https://github.com/ClickHouse/ClickHouse/pull/35334) ([Amos Bird](https://github.com/amosbird)). -* Fix bug in S3 zero-copy replication which can lead to errors like `Found parts with the same min block and with the same max block as the missing part` after concurrent fetch/drop table. [#35348](https://github.com/ClickHouse/ClickHouse/pull/35348) ([alesapin](https://github.com/alesapin)). * Fix issue with non-existing directory https://github.com/ClickHouse/ClickHouse/runs/5588046879?check_suite_focus=true. [#35376](https://github.com/ClickHouse/ClickHouse/pull/35376) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). * Fix possible deadlock in cache. [#35378](https://github.com/ClickHouse/ClickHouse/pull/35378) ([Kseniia Sumarokova](https://github.com/kssenii)). * Fix wrong assets path in release workflow. [#35379](https://github.com/ClickHouse/ClickHouse/pull/35379) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). @@ -233,7 +236,3 @@ * NO CL ENTRY: 'Revert "clang-tidy report issues with Medium priority"'. [#35941](https://github.com/ClickHouse/ClickHouse/pull/35941) ([Alexey Milovidov](https://github.com/alexey-milovidov)). * NO CL ENTRY: 'Revert "Fix crash in ParallelReadBuffer"'. [#36210](https://github.com/ClickHouse/ClickHouse/pull/36210) ([Alexander Tokmakov](https://github.com/tavplubix)). -#### Bug Fix (prestable release) - -* call RemoteQueryExecutor with original_query instead of an rewritten query, elimate the AMBIGUOUS_COLUMN_NAME exception. [#35748](https://github.com/ClickHouse/ClickHouse/pull/35748) ([lgbo](https://github.com/lgbo-ustc)). - diff --git a/docs/changelogs/v22.4.2.1-stable.md b/docs/changelogs/v22.4.2.1-stable.md index c40bf8d92c9..cd7ee75997c 100644 --- a/docs/changelogs/v22.4.2.1-stable.md +++ b/docs/changelogs/v22.4.2.1-stable.md @@ -1,6 +1,2 @@ ### ClickHouse release v22.4.2.1-stable FIXME as compared to v22.4.1.2305-prestable -#### Bug Fix (user-visible misbehaviour in official stable or prestable release) - -* Fix projection analysis which might lead to wrong query result when IN subquery is used. This fixes [#35336](https://github.com/ClickHouse/ClickHouse/issues/35336). [#35631](https://github.com/ClickHouse/ClickHouse/pull/35631) ([Amos Bird](https://github.com/amosbird)). - diff --git a/docs/changelogs/v22.4.4.7-stable.md b/docs/changelogs/v22.4.4.7-stable.md index 794082328df..9004ce2f6bc 100644 --- a/docs/changelogs/v22.4.4.7-stable.md +++ b/docs/changelogs/v22.4.4.7-stable.md @@ -3,6 +3,5 @@ #### Bug Fix (user-visible misbehaviour in official stable or prestable release) * Backported in [#36524](https://github.com/ClickHouse/ClickHouse/issues/36524): Queries with aliases inside special operators returned parsing error (was broken in 22.1). Example: `SELECT substring('test' AS t, 1, 1)`. [#36167](https://github.com/ClickHouse/ClickHouse/pull/36167) ([Maksim Kita](https://github.com/kitaisreal)). -* Backported in [#36582](https://github.com/ClickHouse/ClickHouse/issues/36582): Fix nullptr dereference in JOIN and COLUMNS matcher. This fixes [#36416](https://github.com/ClickHouse/ClickHouse/issues/36416) . This is for https://github.com/ClickHouse/ClickHouse/pull/36417. [#36430](https://github.com/ClickHouse/ClickHouse/pull/36430) ([Amos Bird](https://github.com/amosbird)). * Backported in [#36673](https://github.com/ClickHouse/ClickHouse/issues/36673): Fix merges of wide parts with type `Object`. [#36637](https://github.com/ClickHouse/ClickHouse/pull/36637) ([Anton Popov](https://github.com/CurtizJ)). diff --git a/docs/changelogs/v22.4.5.9-stable.md b/docs/changelogs/v22.4.5.9-stable.md index 63dfd117816..ab43bf3eade 100644 --- a/docs/changelogs/v22.4.5.9-stable.md +++ b/docs/changelogs/v22.4.5.9-stable.md @@ -2,7 +2,6 @@ #### Bug Fix (user-visible misbehaviour in official stable or prestable release) -* Backported in [#36524](https://github.com/ClickHouse/ClickHouse/issues/36524): Queries with aliases inside special operators returned parsing error (was broken in 22.1). Example: `SELECT substring('test' AS t, 1, 1)`. [#36167](https://github.com/ClickHouse/ClickHouse/pull/36167) ([Maksim Kita](https://github.com/kitaisreal)). * Backported in [#36635](https://github.com/ClickHouse/ClickHouse/issues/36635): Fix `Missing column` exception which could happen while using `INTERPOLATE` with `ENGINE = MergeTree` table. [#36549](https://github.com/ClickHouse/ClickHouse/pull/36549) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). * Backported in [#36794](https://github.com/ClickHouse/ClickHouse/issues/36794): Fix vertical merges in wide parts. Previously an exception `There is no column` can be thrown during merge. [#36707](https://github.com/ClickHouse/ClickHouse/pull/36707) ([Anton Popov](https://github.com/CurtizJ)). * Backported in [#36926](https://github.com/ClickHouse/ClickHouse/issues/36926): Fix bug in clickhouse-keeper which can lead to corrupted compressed log files in case of small load and restarts. [#36910](https://github.com/ClickHouse/ClickHouse/pull/36910) ([alesapin](https://github.com/alesapin)). diff --git a/docs/changelogs/v22.5.1.2079-stable.md b/docs/changelogs/v22.5.1.2079-stable.md new file mode 100644 index 00000000000..aab8266c115 --- /dev/null +++ b/docs/changelogs/v22.5.1.2079-stable.md @@ -0,0 +1,182 @@ +### ClickHouse release v22.5.1.2079-stable FIXME as compared to v22.4.1.2305-prestable + +#### Backward Incompatible Change +* Updated the BoringSSL module to the official FIPS compliant version. This makes ClickHouse FIPS compliant. [#35914](https://github.com/ClickHouse/ClickHouse/pull/35914) ([Meena-Renganathan](https://github.com/Meena-Renganathan)). +* Now, background merges, mutations and `OPTIMIZE` will not increment `SelectedRows` and `SelectedBytes` metrics. They (still) will increment `MergedRows` and `MergedUncompressedBytes` as it was before. [#37040](https://github.com/ClickHouse/ClickHouse/pull/37040) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). + +#### New Feature +* add implementation of MeiliSearch storage and table function. [#33332](https://github.com/ClickHouse/ClickHouse/pull/33332) ([Mikhail Artemenko](https://github.com/Michicosun)). +* Add support of GROUPING SETS in GROUP BY clause. Follow up after [#33186](https://github.com/ClickHouse/ClickHouse/issues/33186). This implementation supports a parallel processing of grouping sets. [#33631](https://github.com/ClickHouse/ClickHouse/pull/33631) ([Dmitry Novik](https://github.com/novikd)). +* According to the design mentioned at :[#19627](https://github.com/ClickHouse/ClickHouse/issues/19627)#issuecomment-1068772646. [#35318](https://github.com/ClickHouse/ClickHouse/pull/35318) ([徐炘](https://github.com/weeds085490)). +* Added `SYSTEM SYNC DATABASE REPLICA` query which allows to sync tables metadata inside Replicated database, because currently synchronisation is asynchronous. [#35944](https://github.com/ClickHouse/ClickHouse/pull/35944) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* - Add output format Prometheus, [#36051](https://github.com/ClickHouse/ClickHouse/issues/36051). [#36206](https://github.com/ClickHouse/ClickHouse/pull/36206) ([Vladimir C](https://github.com/vdimir)). +* Parse collations in CREATE TABLE, throw exception or ignore. closes [#35892](https://github.com/ClickHouse/ClickHouse/issues/35892). [#36271](https://github.com/ClickHouse/ClickHouse/pull/36271) ([yuuch](https://github.com/yuuch)). +* Add aliases JSONLines and NDJSON for JSONEachRow. Closes [#36303](https://github.com/ClickHouse/ClickHouse/issues/36303). [#36327](https://github.com/ClickHouse/ClickHouse/pull/36327) ([flynn](https://github.com/ucasfl)). +* Set parts_to_delay_insert and parts_to_throw_insert as query-level settings. If they are defined, they can override table-level settings. [#36371](https://github.com/ClickHouse/ClickHouse/pull/36371) ([Memo](https://github.com/Joeywzr)). +* temporary table can show total rows and total bytes. [#36401](https://github.com/ClickHouse/ClickHouse/issues/36401). [#36439](https://github.com/ClickHouse/ClickHouse/pull/36439) ([xiedeyantu](https://github.com/xiedeyantu)). +* Added new hash function - wyHash64. [#36467](https://github.com/ClickHouse/ClickHouse/pull/36467) ([olevino](https://github.com/olevino)). +* Window function nth_value was added. [#36601](https://github.com/ClickHouse/ClickHouse/pull/36601) ([Nikolay](https://github.com/ndchikin)). +* Add MySQLDump input format. It reads all data from INSERT queries belonging to one table in dump. If there are more than one table, by default it reads data from the first one. [#36667](https://github.com/ClickHouse/ClickHouse/pull/36667) ([Kruglov Pavel](https://github.com/Avogar)). +* New single binary based diagnostics tool. [#36705](https://github.com/ClickHouse/ClickHouse/pull/36705) ([Dale McDiarmid](https://github.com/gingerwizard)). +* **Description:** It is used to count the system table of a request for remote file access, which can help users analyze the causes of performance fluctuations in the scenario of separation of storage and computer. The current system table structure is as follows. When a query reads a segment of a remote file, a record is generated. Read type include **READ_FROM_FS_AND_DOWNLOADED_TO_CACHE、READ_FROM_CACHE、READ_FROM_FS_BYPASSING_CACHE**, which used to indicate whether the query accesses the segment from the cache or from a remote file. [#36802](https://github.com/ClickHouse/ClickHouse/pull/36802) ([Han Shukai](https://github.com/KinderRiven)). +* Adds `h3Line`, `h3Distance` and `h3HexRing` functions. [#37030](https://github.com/ClickHouse/ClickHouse/pull/37030) ([Bharat Nallan](https://github.com/bharatnc)). +* Related issue - [#35101](https://github.com/ClickHouse/ClickHouse/issues/35101). [#37033](https://github.com/ClickHouse/ClickHouse/pull/37033) ([qieqieplus](https://github.com/qieqieplus)). +* Added system.certificates table. [#37142](https://github.com/ClickHouse/ClickHouse/pull/37142) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). + +#### Performance Improvement +* Improve performance or ORDER BY, MergeJoin, insertion into MergeTree using JIT compilation of sort columns comparator. [#34469](https://github.com/ClickHouse/ClickHouse/pull/34469) ([Maksim Kita](https://github.com/kitaisreal)). +* First commit is to increase the inline threshold. Next commits will improve queries by inlining for those who have shown better performance. This way we will not increase the compile time and binary size and optimize the program. [#34544](https://github.com/ClickHouse/ClickHouse/pull/34544) ([Daniel Kutenin](https://github.com/danlark1)). +* Transform OR LIKE chain to multiMatchAny. Will enable once we have more confidence it works. [#34932](https://github.com/ClickHouse/ClickHouse/pull/34932) ([Daniel Kutenin](https://github.com/danlark1)). +* Rewrite 'select countDistinct(a) from t' to 'select count(1) from (select a from t groupBy a)'. [#35993](https://github.com/ClickHouse/ClickHouse/pull/35993) ([zhanglistar](https://github.com/zhanglistar)). +* Change structure of `system.asynchronous_metric_log`. It will take about 10 times less space. This closes [#36357](https://github.com/ClickHouse/ClickHouse/issues/36357). The field `event_time_microseconds` was removed, because it is useless. [#36360](https://github.com/ClickHouse/ClickHouse/pull/36360) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* The default `HashJoin` is not thread safe for inserting right table's rows and run it in a single thread. When the right table is large, the join process is too slow with low cpu utilization. [#36415](https://github.com/ClickHouse/ClickHouse/pull/36415) ([lgbo](https://github.com/lgbo-ustc)). +* Improve performance of reading from storage `File` and table functions `file` in case when path has globs and matched directory contains large number of files. [#36647](https://github.com/ClickHouse/ClickHouse/pull/36647) ([Anton Popov](https://github.com/CurtizJ)). +* Appy parallel parsing for input format `HiveText`, which can speed up HiveText parsing by 2x when reading local file. [#36650](https://github.com/ClickHouse/ClickHouse/pull/36650) ([李扬](https://github.com/taiyang-li)). +* Improves performance of file descriptor cache by narrowing mutex scopes. [#36682](https://github.com/ClickHouse/ClickHouse/pull/36682) ([Anton Kozlov](https://github.com/tonickkozlov)). +* This PR improves the `WATCH` query in WindowView: 1. Reduce the latency of providing query results by calling the `fire_condition` signal. 2. Makes the cancel query operation(ctrl-c) faster, by checking `isCancelled()` more frequently. [#37226](https://github.com/ClickHouse/ClickHouse/pull/37226) ([vxider](https://github.com/Vxider)). +* Improve performance of `avg`, `sum` aggregate functions if used without GROUP BY expression. [#37257](https://github.com/ClickHouse/ClickHouse/pull/37257) ([Maksim Kita](https://github.com/kitaisreal)). +* Improve performance of unary arithmetic functions (`bitCount`, `bitNot`, `abs`, `intExp2`, `intExp10`, `negate`, `roundAge`, `roundDuration`, `roundToExp2`, `sign`) using dynamic dispatch. [#37289](https://github.com/ClickHouse/ClickHouse/pull/37289) ([Maksim Kita](https://github.com/kitaisreal)). + +#### Improvement +* Remind properly if use clickhouse-client --file without preceeding --external. Close [#34747](https://github.com/ClickHouse/ClickHouse/issues/34747). [#34765](https://github.com/ClickHouse/ClickHouse/pull/34765) ([李扬](https://github.com/taiyang-li)). +* Added support for specifying `content_type` in predefined and static HTTP handler config. [#34916](https://github.com/ClickHouse/ClickHouse/pull/34916) ([Roman Nikonov](https://github.com/nic11)). +* Implement partial GROUP BY key for optimize_aggregation_in_order. [#35111](https://github.com/ClickHouse/ClickHouse/pull/35111) ([Azat Khuzhin](https://github.com/azat)). +* Nullables detection in protobuf using Google wrappers. [#35149](https://github.com/ClickHouse/ClickHouse/pull/35149) ([Jakub Kuklis](https://github.com/jkuklis)). +* If the required amount of memory is available before the selected query stopped, all waiting queries continue execution. Now we don't stop any query if memory is freed before the moment when the selected query knows about the cancellation. [#35637](https://github.com/ClickHouse/ClickHouse/pull/35637) ([Dmitry Novik](https://github.com/novikd)). +* Enable memory overcommit by default. [#35921](https://github.com/ClickHouse/ClickHouse/pull/35921) ([Dmitry Novik](https://github.com/novikd)). +* - Add branch to avoid unnecessary memcpy in readbig. [#36095](https://github.com/ClickHouse/ClickHouse/pull/36095) ([jasperzhu](https://github.com/jinjunzh)). +* Refactor code around schema inference with globs. Try next file from glob only if it makes sense (previously we tried next file in case of any error). Also it fixes [#36317](https://github.com/ClickHouse/ClickHouse/issues/36317). [#36205](https://github.com/ClickHouse/ClickHouse/pull/36205) ([Kruglov Pavel](https://github.com/Avogar)). +* Improve schema inference for json objects. [#36207](https://github.com/ClickHouse/ClickHouse/pull/36207) ([Kruglov Pavel](https://github.com/Avogar)). +* Add support for force recovery which allows you to reconfigure cluster without quorum. [#36258](https://github.com/ClickHouse/ClickHouse/pull/36258) ([Antonio Andelic](https://github.com/antonio2368)). +* We create a local interpreter if we want to execute query on localhost replica. But for when executing query on multiple replicas we rely on the fact that a connection exists so replicas can talk to coordinator. It is now improved and localhost replica can talk to coordinator directly in the same process. [#36281](https://github.com/ClickHouse/ClickHouse/pull/36281) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Show names of erroneous files in case of parsing errors while executing table functions `file`, `s3` and `url`. [#36314](https://github.com/ClickHouse/ClickHouse/pull/36314) ([Anton Popov](https://github.com/CurtizJ)). +* Allowed to increase the number of threads for executing background operations (merges, mutations, moves and fetches) at runtime if they are specified at top level config. [#36425](https://github.com/ClickHouse/ClickHouse/pull/36425) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* clickhouse-benchmark can read auth from environment variables. [#36497](https://github.com/ClickHouse/ClickHouse/pull/36497) ([Anton Kozlov](https://github.com/tonickkozlov)). +* Allow names of tuple elements that start from digits. [#36544](https://github.com/ClickHouse/ClickHouse/pull/36544) ([Anton Popov](https://github.com/CurtizJ)). +* Allow file descriptors in table function file if it is run in clickhouse-local. [#36562](https://github.com/ClickHouse/ClickHouse/pull/36562) ([wuxiaobai24](https://github.com/wuxiaobai24)). +* Allow to cast columns of type `Object(...)` to `Object(Nullable(...))`. [#36564](https://github.com/ClickHouse/ClickHouse/pull/36564) ([awakeljw](https://github.com/awakeljw)). +* Cleanup CSS in Play UI. The pixels are more evenly placed. Better usability for long content in table cells. [#36569](https://github.com/ClickHouse/ClickHouse/pull/36569) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* The metrics about time spent reading from s3 now calculated correctly. Close [#35483](https://github.com/ClickHouse/ClickHouse/issues/35483). [#36572](https://github.com/ClickHouse/ClickHouse/pull/36572) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Improve `SYSTEM DROP FILESYSTEM CACHE` query: `` option and `FORCE` option. [#36639](https://github.com/ClickHouse/ClickHouse/pull/36639) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Add `is_all_data_sent` column into `system.processes`, and improve internal testing hardening check based on it. [#36649](https://github.com/ClickHouse/ClickHouse/pull/36649) ([Azat Khuzhin](https://github.com/azat)). +* Now date time conversion functions that generates time before 1970-01-01 00:00:00 with partial hours/minutes timezones will be saturated to zero instead of overflow. This is the continuation of https://github.com/ClickHouse/ClickHouse/pull/29953 which addresses https://github.com/ClickHouse/ClickHouse/pull/29953#discussion_r800550280 . Mark as improvement because it's implementation defined behavior (and very rare case) and we are allowed to break it. [#36656](https://github.com/ClickHouse/ClickHouse/pull/36656) ([Amos Bird](https://github.com/amosbird)). +* Allow to cancel query while still keep decent query id in MySQLHandler. [#36699](https://github.com/ClickHouse/ClickHouse/pull/36699) ([Amos Bird](https://github.com/amosbird)). +* Properly cancel INSERT queries in `clickhouse-client`/`clickhouse-local`. [#36710](https://github.com/ClickHouse/ClickHouse/pull/36710) ([Azat Khuzhin](https://github.com/azat)). +* Allow cluster macro in s3Cluster table function. [#36726](https://github.com/ClickHouse/ClickHouse/pull/36726) ([Vadim Volodin](https://github.com/PolyProgrammist)). +* Added `user_defined_path` config setting. [#36753](https://github.com/ClickHouse/ClickHouse/pull/36753) ([Maksim Kita](https://github.com/kitaisreal)). +* Allow to execute hash functions with arguments of type `Array(Tuple(..))`. [#36812](https://github.com/ClickHouse/ClickHouse/pull/36812) ([Anton Popov](https://github.com/CurtizJ)). +* Add warning if someone running clickhouse-server with log level "test". The log level "test" was added recently and cannot be used in production due to inevitable, unavoidable, fatal and life-threatening performance degradation. [#36824](https://github.com/ClickHouse/ClickHouse/pull/36824) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Play UI: If there is one row in result and more than a few columns, display the result vertically. Continuation of [#36811](https://github.com/ClickHouse/ClickHouse/issues/36811). [#36842](https://github.com/ClickHouse/ClickHouse/pull/36842) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add extra diagnostic info (if applicable) when sending exception to other server. [#36872](https://github.com/ClickHouse/ClickHouse/pull/36872) ([Alexander Tokmakov](https://github.com/tavplubix)). +* After [#36425](https://github.com/ClickHouse/ClickHouse/issues/36425) settings like `background_fetches_pool_size` became obsolete and can appear in top level config, but clickhouse throws and exception like `Error updating configuration from '/etc/clickhouse-server/config.xml' config.: Code: 137. DB::Exception: A setting 'background_fetches_pool_size' appeared at top level in config /etc/clickhouse-server/config.xml.` This is fixed. [#36917](https://github.com/ClickHouse/ClickHouse/pull/36917) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Finalize write buffers in case of exception to avoid doing it in destructors. Hope it fixes: [#36907](https://github.com/ClickHouse/ClickHouse/issues/36907). [#36979](https://github.com/ClickHouse/ClickHouse/pull/36979) ([Kruglov Pavel](https://github.com/Avogar)). +* Play UI: Nullable numbers will be aligned to the right in table cells. This closes [#36982](https://github.com/ClickHouse/ClickHouse/issues/36982). [#36988](https://github.com/ClickHouse/ClickHouse/pull/36988) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Implemented a new mode of handling row policies which can be enabled in the main configuration which enables users without permissive row policies to read rows. [#36997](https://github.com/ClickHouse/ClickHouse/pull/36997) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix bug which can lead to forgotten outdated parts in MergeTree table engines family in case of filesystem failures during parts removal. Before fix they will be removed only after first server restart. [#37014](https://github.com/ClickHouse/ClickHouse/pull/37014) ([alesapin](https://github.com/alesapin)). +* Modify query div in play.html to be extendable beyond 200px height. In case of very long queries it is helpful to extend the textarea element, only today, since the div is fixed height, the extended textarea hides the data div underneath. With this fix, extending the textarea element will push the data div down/up such the extended textarea won't hide it. [#37051](https://github.com/ClickHouse/ClickHouse/pull/37051) ([guyco87](https://github.com/guyco87)). +* Better read from cache. [#37054](https://github.com/ClickHouse/ClickHouse/pull/37054) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix progress indication for `INSERT SELECT` in clickhouse-local for any query and for file progress in client, more correct file progress. [#37075](https://github.com/ClickHouse/ClickHouse/pull/37075) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Disable `log_query_threads` setting by default. It controls the logging of statistics about every thread participating in query execution. After supporting asynchronous reads, the total number of distinct thread ids became too large, and logging into the `query_thread_log` has become too heavy. [#37077](https://github.com/ClickHouse/ClickHouse/pull/37077) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Option `compatibility_ignore_auto_increment_in_create_table` allows ignoring `AUTO_INCREMENT` keyword in a column declaration to simplify migration from MySQL. [#37178](https://github.com/ClickHouse/ClickHouse/pull/37178) ([Igor Nikonov](https://github.com/devcrafter)). +* Added implicit cast for `h3kRing` function second argument to improve usability. Closes [#35432](https://github.com/ClickHouse/ClickHouse/issues/35432). [#37189](https://github.com/ClickHouse/ClickHouse/pull/37189) ([Maksim Kita](https://github.com/kitaisreal)). +* Limit the max partitions could be queried for each hive table. Avoid resource overruns. [#37281](https://github.com/ClickHouse/ClickHouse/pull/37281) ([lgbo](https://github.com/lgbo-ustc)). + +#### Bug Fix +* Extracts Version ID if present from the URI and adds a request to the AWS HTTP URI. Closes [#31221](https://github.com/ClickHouse/ClickHouse/issues/31221). - [x] Extract `Version ID` from URI if present and reassemble without it. - [x] Configure `AWS HTTP URI` object with request. - [x] Unit Tests: [`gtest_s3_uri`](https://github.com/ClickHouse/ClickHouse/blob/2340a6c6849ebc05a8efbf97ba8de3ff9dc0eff4/src/IO/tests/gtest_s3_uri.cpp) - [x] Drop instrumentation commit. [#34571](https://github.com/ClickHouse/ClickHouse/pull/34571) ([Saad Ur Rahman](https://github.com/surahman)). + +#### Build/Testing/Packaging Improvement +* Now `clickhouse-keeper` for the `x86_64` architecture is statically linked with [musl](https://musl.libc.org/) and doesn't depend on any system libraries. [#31833](https://github.com/ClickHouse/ClickHouse/pull/31833) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fail performance comparison on errors in the report. [#34797](https://github.com/ClickHouse/ClickHouse/pull/34797) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Check out the most of build jobs with depth=1. [#36091](https://github.com/ClickHouse/ClickHouse/pull/36091) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Bump minizip-ng to a sane version, or else old git won't be able to address dangling remote ref. [#35656](https://github.com/ClickHouse/ClickHouse/issues/35656). [#36295](https://github.com/ClickHouse/ClickHouse/pull/36295) ([Amos Bird](https://github.com/amosbird)). +* Use consistent `force tests` label in CI. [#36496](https://github.com/ClickHouse/ClickHouse/pull/36496) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Limit PowerPC code generation to Power8 for better compatibility. This closes [#36025](https://github.com/ClickHouse/ClickHouse/issues/36025). [#36529](https://github.com/ClickHouse/ClickHouse/pull/36529) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* - More robust handling of unknown architectures in CMake. [#36614](https://github.com/ClickHouse/ClickHouse/pull/36614) ([Robert Schulze](https://github.com/rschu1ze)). +* Simplify performance test. This will give a chance for us to use it. [#36769](https://github.com/ClickHouse/ClickHouse/pull/36769) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix checking for rabbitmq liveness in tests. Fixed incorrect import. [#36938](https://github.com/ClickHouse/ClickHouse/pull/36938) ([tchepavel](https://github.com/tchepavel)). +* ClickHouse builds for `PowerPC64LE` architecture are now available in universal installation script `curl https://clickhouse.com/ | sh` and by direct link `https://builds.clickhouse.com/master/powerpc64le/clickhouse`. [#37095](https://github.com/ClickHouse/ClickHouse/pull/37095) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* - Make cmake build scripts a bit more robust. [#37169](https://github.com/ClickHouse/ClickHouse/pull/37169) ([Robert Schulze](https://github.com/rschu1ze)). + +#### Bug Fix (user-visible misbehavior in official stable or prestable release) + +* The ilike() function on FixedString columns could have returned wrong results (i.e. match less than it should). [#37117](https://github.com/ClickHouse/ClickHouse/pull/37117) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix implicit cast for optimize_skip_unused_shards_rewrite_in. [#37153](https://github.com/ClickHouse/ClickHouse/pull/37153) ([Azat Khuzhin](https://github.com/azat)). +* Enable `enable_global_with_statement` for subqueries, close [#37141](https://github.com/ClickHouse/ClickHouse/issues/37141). [#37166](https://github.com/ClickHouse/ClickHouse/pull/37166) ([Vladimir C](https://github.com/vdimir)). +* Now WindowView `WATCH EVENTS` query will not be terminated due to the nonempty Chunk created in `WindowViewSource.h:58`. [#37182](https://github.com/ClickHouse/ClickHouse/pull/37182) ([vxider](https://github.com/Vxider)). +* Fix "Cannot create column of type Set" for distributed queries with LIMIT BY. [#37193](https://github.com/ClickHouse/ClickHouse/pull/37193) ([Azat Khuzhin](https://github.com/azat)). +* Fix possible overflow during `OvercommitRatio` comparison. cc @tavplubix. [#37197](https://github.com/ClickHouse/ClickHouse/pull/37197) ([Dmitry Novik](https://github.com/novikd)). +* Update `max_fired_watermark ` after blocks **actually** fired, in case delete data that hasn't been fired yet. [#37225](https://github.com/ClickHouse/ClickHouse/pull/37225) ([vxider](https://github.com/Vxider)). +* Kafka does not need `group.id` on producer stage. In console log you can find Warning that describe this issue: ``` 2022.05.15 17:59:13.270227 [ 137 ] {} StorageKafka (topic-name): [rdk:CONFWARN] [thrd:app]: Configuration property group.id is a consumer property and will be ignored by this producer instance ```. [#37228](https://github.com/ClickHouse/ClickHouse/pull/37228) ([Mark Andreev](https://github.com/mrk-andreev)). +* fix MySQL database engine to compatible with binary(0) dataType. [#37232](https://github.com/ClickHouse/ClickHouse/pull/37232) ([zzsmdfj](https://github.com/zzsmdfj)). +* Fix execution of mutations in tables, in which there exist columns of type `Object`. Using subcolumns of type `Object` in `WHERE` expression of `UPDATE` or `DELETE` queries is now allowed yet, as well as manipulating (`DROP`, `MODIFY`) of separate subcolumns. Fixes [#37205](https://github.com/ClickHouse/ClickHouse/issues/37205). [#37266](https://github.com/ClickHouse/ClickHouse/pull/37266) ([Anton Popov](https://github.com/CurtizJ)). +* Fix Nullable(String) to Nullable(Bool/IPv4/IPv6) conversion Closes [#37221](https://github.com/ClickHouse/ClickHouse/issues/37221). [#37270](https://github.com/ClickHouse/ClickHouse/pull/37270) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix system.opentelemetry_span_log attribute.values alias to values instead of keys. [#37275](https://github.com/ClickHouse/ClickHouse/pull/37275) ([Aleksandr Razumov](https://github.com/ernado)). +* Fix possible deadlock in OvercommitTracker during logging. cc @alesapin @tavplubix Fixes [#37272](https://github.com/ClickHouse/ClickHouse/issues/37272). [#37299](https://github.com/ClickHouse/ClickHouse/pull/37299) ([Dmitry Novik](https://github.com/novikd)). + +#### Bug Fix (user-visible misbehaviour in official stable or prestable release) + +* - fix substring function range error length when `offset` and `length` is negative constant and `s` is not constant. [#33861](https://github.com/ClickHouse/ClickHouse/pull/33861) ([RogerYK](https://github.com/RogerYK)). +* Accidentally ZSTD support for Arrow was not being built. This fixes [#35283](https://github.com/ClickHouse/ClickHouse/issues/35283). [#35486](https://github.com/ClickHouse/ClickHouse/pull/35486) ([Sean Lafferty](https://github.com/seanlaff)). +* Fix ALTER DROP COLUMN of nested column with compact parts (i.e. `ALTER TABLE x DROP COLUMN n`, when there is column `n.d`). [#35797](https://github.com/ClickHouse/ClickHouse/pull/35797) ([Azat Khuzhin](https://github.com/azat)). +* Fix insertion of complex JSONs with nested arrays to columns of type `Object`. [#36077](https://github.com/ClickHouse/ClickHouse/pull/36077) ([Anton Popov](https://github.com/CurtizJ)). +* Queries with aliases inside special operators returned parsing error (was broken in 22.1). Example: `SELECT substring('test' AS t, 1, 1)`. [#36167](https://github.com/ClickHouse/ClickHouse/pull/36167) ([Maksim Kita](https://github.com/kitaisreal)). +* - Fix assertion in JOIN, close [#36199](https://github.com/ClickHouse/ClickHouse/issues/36199). [#36201](https://github.com/ClickHouse/ClickHouse/pull/36201) ([Vladimir C](https://github.com/vdimir)). +* Fix dictionary reload for `ClickHouseDictionarySource` if it contains scalar subqueries. [#36390](https://github.com/ClickHouse/ClickHouse/pull/36390) ([lthaooo](https://github.com/lthaooo)). +* Fix nullptr dereference in JOIN and COLUMNS matcher. This fixes [#36416](https://github.com/ClickHouse/ClickHouse/issues/36416) . This is for https://github.com/ClickHouse/ClickHouse/pull/36417. [#36430](https://github.com/ClickHouse/ClickHouse/pull/36430) ([Amos Bird](https://github.com/amosbird)). +* Fix bug in s3Cluster schema inference that let to the fact that not all data was read in the select from s3Cluster. The bug appeared in https://github.com/ClickHouse/ClickHouse/pull/35544. [#36434](https://github.com/ClickHouse/ClickHouse/pull/36434) ([Kruglov Pavel](https://github.com/Avogar)). +* Server might fail to start if it cannot resolve hostname of external ClickHouse dictionary. It's fixed. Fixes [#36451](https://github.com/ClickHouse/ClickHouse/issues/36451). [#36463](https://github.com/ClickHouse/ClickHouse/pull/36463) ([Alexander Tokmakov](https://github.com/tavplubix)). +* This code segment can prove bug. ``` int main() { RangeGenerator g{1230, 100}; std::cout << g.totalRanges() << std::endl; int count = 0; while(g.nextRange()) ++count; std::cout << "count:" << count << std::endl; return 0; }. [#36469](https://github.com/ClickHouse/ClickHouse/pull/36469) ([李扬](https://github.com/taiyang-li)). +* Fix clickhouse-benchmark json report results. [#36473](https://github.com/ClickHouse/ClickHouse/pull/36473) ([Tian Xinhui](https://github.com/xinhuitian)). +* Add missing enum values in system.session_log table. Closes [#36474](https://github.com/ClickHouse/ClickHouse/issues/36474). [#36480](https://github.com/ClickHouse/ClickHouse/pull/36480) ([Memo](https://github.com/Joeywzr)). +* Fix possible exception with unknown packet from server in client. [#36481](https://github.com/ClickHouse/ClickHouse/pull/36481) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix usage of executable user defined functions in GROUP BY. Before executable user defined functions cannot be used as expressions in GROUP BY. Closes [#36448](https://github.com/ClickHouse/ClickHouse/issues/36448). [#36486](https://github.com/ClickHouse/ClickHouse/pull/36486) ([Maksim Kita](https://github.com/kitaisreal)). +* close [#33906](https://github.com/ClickHouse/ClickHouse/issues/33906). [#36489](https://github.com/ClickHouse/ClickHouse/pull/36489) ([awakeljw](https://github.com/awakeljw)). +* Fix hostname sanity checks for Keeper cluster configuration. Add `keeper_server.host_checks_enabled` config to enable/disable those checks. [#36492](https://github.com/ClickHouse/ClickHouse/pull/36492) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix offset update ReadBufferFromEncryptedFile, which could cause undefined behaviour. [#36493](https://github.com/ClickHouse/ClickHouse/pull/36493) ([Kseniia Sumarokova](https://github.com/kssenii)). +* - Fix potential error with literals in `WHERE` for join queries. Close [#36279](https://github.com/ClickHouse/ClickHouse/issues/36279). [#36542](https://github.com/ClickHouse/ClickHouse/pull/36542) ([Vladimir C](https://github.com/vdimir)). +* Fix `Missing column` exception which could happen while using `INTERPOLATE` with `ENGINE = MergeTree` table. [#36549](https://github.com/ClickHouse/ClickHouse/pull/36549) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Fix format crash when default expression follow EPHEMERAL not literal. Closes [#36618](https://github.com/ClickHouse/ClickHouse/issues/36618). [#36633](https://github.com/ClickHouse/ClickHouse/pull/36633) ([flynn](https://github.com/ucasfl)). +* Fix merges of wide parts with type `Object`. [#36637](https://github.com/ClickHouse/ClickHouse/pull/36637) ([Anton Popov](https://github.com/CurtizJ)). +* Fixed parsing of query settings in `CREATE` query when engine is not specified. Fixes https://github.com/ClickHouse/ClickHouse/pull/34187#issuecomment-1103812419. [#36642](https://github.com/ClickHouse/ClickHouse/pull/36642) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix possible heap-use-after-free in schema inference. Closes [#36661](https://github.com/ClickHouse/ClickHouse/issues/36661). [#36679](https://github.com/ClickHouse/ClickHouse/pull/36679) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix server restart if cache configuration changed. [#36685](https://github.com/ClickHouse/ClickHouse/pull/36685) ([Kseniia Sumarokova](https://github.com/kssenii)). +* In the previous [PR](https://github.com/ClickHouse/ClickHouse/pull/36376), I found that testing **(stateless tests, flaky check (address, actions))** is timeout. Moreover, testing locally can also trigger unstable system deadlocks. This problem still exists when using the latest source code of master. [#36697](https://github.com/ClickHouse/ClickHouse/pull/36697) ([Han Shukai](https://github.com/KinderRiven)). +* Fix server reload on port change (do not wait for current connections from query context). [#36700](https://github.com/ClickHouse/ClickHouse/pull/36700) ([Azat Khuzhin](https://github.com/azat)). +* Fix vertical merges in wide parts. Previously an exception `There is no column` can be thrown during merge. [#36707](https://github.com/ClickHouse/ClickHouse/pull/36707) ([Anton Popov](https://github.com/CurtizJ)). +* During the [test](https://s3.amazonaws.com/clickhouse-test-reports/36376/1cb1c7275cb53769ab826772db9b71361bb3e413/stress_test__thread__actions_/clickhouse-server.clean.log) in [PR](https://github.com/ClickHouse/ClickHouse/pull/36376), I found that the one cache class was initialized twice, it throws a exception. Although the cause of this problem is not clear, there should be code logic of repeatedly loading disk in ClickHouse, so we need to make special judgment for this situation. [#36737](https://github.com/ClickHouse/ClickHouse/pull/36737) ([Han Shukai](https://github.com/KinderRiven)). +* Fix a bug of `groupBitmapAndState`/`groupBitmapOrState`/`groupBitmapXorState` on distributed table. [#36739](https://github.com/ClickHouse/ClickHouse/pull/36739) ([Zhang Yifan](https://github.com/zhangyifan27)). +* Fix timeouts in Hedged requests. Connection hang right after sending remote query could lead to eternal waiting. [#36749](https://github.com/ClickHouse/ClickHouse/pull/36749) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix insertion to columns of type `Object` from multiple files, e.g. via table function `file` with globs. [#36762](https://github.com/ClickHouse/ClickHouse/pull/36762) ([Anton Popov](https://github.com/CurtizJ)). +* Fix some issues with async reads from remote filesystem which happened when reading low cardinality. [#36763](https://github.com/ClickHouse/ClickHouse/pull/36763) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix creation of tables with `flatten_nested = 0`. Previously unflattened `Nested` columns could be flattened after server restart. [#36803](https://github.com/ClickHouse/ClickHouse/pull/36803) ([Anton Popov](https://github.com/CurtizJ)). +* Fix incorrect cast in cached buffer from remote fs. [#36809](https://github.com/ClickHouse/ClickHouse/pull/36809) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Remove function `groupArraySorted` which has a bug. [#36822](https://github.com/ClickHouse/ClickHouse/pull/36822) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix fire in window view with hop window [#34044](https://github.com/ClickHouse/ClickHouse/issues/34044). [#36861](https://github.com/ClickHouse/ClickHouse/pull/36861) ([vxider](https://github.com/Vxider)). +* Fix `current_size` count in cache. [#36887](https://github.com/ClickHouse/ClickHouse/pull/36887) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix incorrect query result when doing constant aggregation. This fixes [#36728](https://github.com/ClickHouse/ClickHouse/issues/36728) . [#36888](https://github.com/ClickHouse/ClickHouse/pull/36888) ([Amos Bird](https://github.com/amosbird)). +* Fix bug in clickhouse-keeper which can lead to corrupted compressed log files in case of small load and restarts. [#36910](https://github.com/ClickHouse/ClickHouse/pull/36910) ([alesapin](https://github.com/alesapin)). +* Fix bugs when using multiple columns in WindowView by adding converting actions to make it possible to call`writeIntoWindowView` with a slightly different schema. [#36928](https://github.com/ClickHouse/ClickHouse/pull/36928) ([vxider](https://github.com/Vxider)). +* Fix issue: [#36671](https://github.com/ClickHouse/ClickHouse/issues/36671). [#36929](https://github.com/ClickHouse/ClickHouse/pull/36929) ([李扬](https://github.com/taiyang-li)). +* Fix stuck when dropping source table in WindowView. Closes [#35678](https://github.com/ClickHouse/ClickHouse/issues/35678). [#36967](https://github.com/ClickHouse/ClickHouse/pull/36967) ([vxider](https://github.com/Vxider)). +* Fixed logical error on `TRUNCATE` query in `Replicated` database. Fixes [#33747](https://github.com/ClickHouse/ClickHouse/issues/33747). [#36976](https://github.com/ClickHouse/ClickHouse/pull/36976) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix sending external tables data in HedgedConnections with max_parallel_replicas != 1. [#36981](https://github.com/ClickHouse/ClickHouse/pull/36981) ([Kruglov Pavel](https://github.com/Avogar)). +* Fixed problem with infs in `quantileTDigest`. Fixes [#32107](https://github.com/ClickHouse/ClickHouse/issues/32107). [#37021](https://github.com/ClickHouse/ClickHouse/pull/37021) ([Vladimir Chebotarev](https://github.com/excitoon)). +* Fix LowCardinality->ArrowDictionary invalid output when type of indexes is not UInt8. Closes [#36832](https://github.com/ClickHouse/ClickHouse/issues/36832). [#37043](https://github.com/ClickHouse/ClickHouse/pull/37043) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix in-order `GROUP BY` (`optimize_aggregation_in_order=1`) with `*Array` (`groupArrayArray`/...) aggregate functions. [#37046](https://github.com/ClickHouse/ClickHouse/pull/37046) ([Azat Khuzhin](https://github.com/azat)). +* Fixed performance degradation of some INSERT SELECT queries with implicit aggregation. Fixes [#36792](https://github.com/ClickHouse/ClickHouse/issues/36792). [#37047](https://github.com/ClickHouse/ClickHouse/pull/37047) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix optimize_aggregation_in_order with prefix GROUP BY and *Array aggregate functions. [#37050](https://github.com/ClickHouse/ClickHouse/pull/37050) ([Azat Khuzhin](https://github.com/azat)). + +#### NO CL ENTRY + +* NO CL ENTRY: 'Revert "Minor refactor to prefer C++ Standard Algorithms"'. [#36511](https://github.com/ClickHouse/ClickHouse/pull/36511) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* NO CL ENTRY: 'Revert "Strict taskstats parser"'. [#36591](https://github.com/ClickHouse/ClickHouse/pull/36591) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* NO CL ENTRY: 'Revert "Translate docs/zh/sql-reference/data-types/map.md"'. [#36594](https://github.com/ClickHouse/ClickHouse/pull/36594) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* NO CL ENTRY: 'Revert "Update setting.md"'. [#36595](https://github.com/ClickHouse/ClickHouse/pull/36595) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* NO CL ENTRY: 'Documentation: Add a missing **ESTIMATE** in explain syntax'. [#36717](https://github.com/ClickHouse/ClickHouse/pull/36717) ([小蝌蚪](https://github.com/kayhaw)). +* NO CL ENTRY: '[Snyk] Security upgrade numpy from 1.16.6 to 1.22.2'. [#36729](https://github.com/ClickHouse/ClickHouse/pull/36729) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* NO CL ENTRY: 'Translate playground.md to Chinese'. [#36821](https://github.com/ClickHouse/ClickHouse/pull/36821) ([小蝌蚪](https://github.com/kayhaw)). +* NO CL ENTRY: 'Revert "Memory overcommit: continue query execution if memory is available"'. [#36858](https://github.com/ClickHouse/ClickHouse/pull/36858) ([alesapin](https://github.com/alesapin)). +* NO CL ENTRY: 'Revert "Revert "Memory overcommit: continue query execution if memory is available""'. [#36859](https://github.com/ClickHouse/ClickHouse/pull/36859) ([Dmitry Novik](https://github.com/novikd)). +* NO CL ENTRY: 'Revert "BLAKE3 hash function documentation"'. [#37092](https://github.com/ClickHouse/ClickHouse/pull/37092) ([Rich Raposa](https://github.com/rfraposa)). +* NO CL ENTRY: 'Revert "Remove height restrictions from the query div in play web tool."'. [#37261](https://github.com/ClickHouse/ClickHouse/pull/37261) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + diff --git a/docs/en/development/cmake-in-clickhouse.md b/docs/en/development/cmake-in-clickhouse.md index 14b98b136b3..65d280df902 100644 --- a/docs/en/development/cmake-in-clickhouse.md +++ b/docs/en/development/cmake-in-clickhouse.md @@ -420,12 +420,6 @@ Note that ClickHouse uses forks of these libraries, see https://github.com/Click Using system libs can cause a lot of warnings in includes (on macro expansion). -WEVERYTHING -ON -Enable -Weverything option with some exceptions. -Add some warnings that are not available even with -Wall -Wextra -Wpedantic. Intended for exploration of new compiler warnings that may be found useful. Applies to clang only - - WITH_COVERAGE OFF Profile the resulting binary/binaries diff --git a/docs/en/development/tests.md b/docs/en/development/tests.md index 4aba0506c2e..f9c7ae37157 100644 --- a/docs/en/development/tests.md +++ b/docs/en/development/tests.md @@ -12,7 +12,7 @@ Functional tests are the most simple and convenient to use. Most of ClickHouse f Each functional test sends one or multiple queries to the running ClickHouse server and compares the result with reference. -Tests are located in `queries` directory. There are two subdirectories: `stateless` and `stateful`. Stateless tests run queries without any preloaded test data - they often create small synthetic datasets on the fly, within the test itself. Stateful tests require preloaded test data from CLickHouse and it is available to general public. +Tests are located in `queries` directory. There are two subdirectories: `stateless` and `stateful`. Stateless tests run queries without any preloaded test data - they often create small synthetic datasets on the fly, within the test itself. Stateful tests require preloaded test data from ClickHouse and it is available to general public. Each test can be one of two types: `.sql` and `.sh`. `.sql` test is the simple SQL script that is piped to `clickhouse-client --multiquery`. `.sh` test is a script that is run by itself. SQL tests are generally preferable to `.sh` tests. You should use `.sh` tests only when you have to test some feature that cannot be exercised from pure SQL, such as piping some input data into `clickhouse-client` or testing `clickhouse-local`. diff --git a/docs/en/engines/table-engines/mergetree-family/mergetree.md b/docs/en/engines/table-engines/mergetree-family/mergetree.md index 1029cceb28a..d59b07b5dd6 100644 --- a/docs/en/engines/table-engines/mergetree-family/mergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/mergetree.md @@ -669,6 +669,7 @@ Storage policies configuration markup: disk_name_from_disks_configuration 1073741824 + round_robin @@ -695,6 +696,8 @@ Tags: - `max_data_part_size_bytes` — the maximum size of a part that can be stored on any of the volume’s disks. If the a size of a merged part estimated to be bigger than `max_data_part_size_bytes` then this part will be written to a next volume. Basically this feature allows to keep new/small parts on a hot (SSD) volume and move them to a cold (HDD) volume when they reach large size. Do not use this setting if your policy has only one volume. - `move_factor` — when the amount of available space gets lower than this factor, data automatically starts to move on the next volume if any (by default, 0.1). ClickHouse sorts existing parts by size from largest to smallest (in descending order) and selects parts with the total size that is sufficient to meet the `move_factor` condition. If the total size of all parts is insufficient, all parts will be moved. - `prefer_not_to_merge` — Disables merging of data parts on this volume. When this setting is enabled, merging data on this volume is not allowed. This allows controlling how ClickHouse works with slow disks. +- `perform_ttl_move_on_insert` — Disables TTL move on data part INSERT. By default if we insert a data part that already expired by the TTL move rule it immediately goes to a volume/disk declared in move rule. This can significantly slowdown insert in case if destination volume/disk is slow (e.g. S3). +- `load_balancing` - Policy for disk balancing, `round_robin` or `least_used`. Cofiguration examples: @@ -724,7 +727,7 @@ Cofiguration examples: 0.2 - +
jbod1 diff --git a/docs/en/getting-started/install.md b/docs/en/getting-started/install.md index f31a78bc1c4..12775749a25 100644 --- a/docs/en/getting-started/install.md +++ b/docs/en/getting-started/install.md @@ -238,7 +238,7 @@ To start the server as a daemon, run: $ sudo clickhouse start ``` -There are also another ways to run ClickHouse: +There are also other ways to run ClickHouse: ``` bash $ sudo service clickhouse-server start diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md index e382bbcddd8..31f948cbb00 100644 --- a/docs/en/interfaces/formats.md +++ b/docs/en/interfaces/formats.md @@ -31,8 +31,11 @@ The supported formats are: | [JSON](#json) | ✗ | ✔ | | [JSONAsString](#jsonasstring) | ✔ | ✗ | | [JSONStrings](#jsonstrings) | ✗ | ✔ | +| [JSONColumns](#jsoncolumns) | ✔ | ✔ | +| [JSONColumnsWithMetadata](#jsoncolumnswithmetadata) | ✗ | ✔ | | [JSONCompact](#jsoncompact) | ✗ | ✔ | | [JSONCompactStrings](#jsoncompactstrings) | ✗ | ✔ | +| [JSONCompactColumns](#jsoncompactcolumns) | ✔ | ✔ | | [JSONEachRow](#jsoneachrow) | ✔ | ✔ | | [JSONEachRowWithProgress](#jsoneachrowwithprogress) | ✗ | ✔ | | [JSONStringsEachRow](#jsonstringseachrow) | ✔ | ✔ | @@ -400,6 +403,8 @@ Both data output and parsing are supported in this format. For parsing, any orde Parsing allows the presence of the additional field `tskv` without the equal sign or a value. This field is ignored. +During import, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](../operations/settings/settings.md#settings-input-format-skip-unknown-fields) is set to 1. + ## CSV {#csv} Comma Separated Values format ([RFC](https://tools.ietf.org/html/rfc4180)). @@ -459,15 +464,15 @@ SELECT SearchPhrase, count() AS c FROM test.hits GROUP BY SearchPhrase WITH TOTA "meta": [ { - "name": "'hello'", + "name": "num", + "type": "Int32" + }, + { + "name": "str", "type": "String" }, { - "name": "multiply(42, number)", - "type": "UInt64" - }, - { - "name": "range(5)", + "name": "arr", "type": "Array(UInt8)" } ], @@ -475,25 +480,32 @@ SELECT SearchPhrase, count() AS c FROM test.hits GROUP BY SearchPhrase WITH TOTA "data": [ { - "'hello'": "hello", - "multiply(42, number)": "0", - "range(5)": [0,1,2,3,4] + "num": 42, + "str": "hello", + "arr": [0,1] }, { - "'hello'": "hello", - "multiply(42, number)": "42", - "range(5)": [0,1,2,3,4] + "num": 43, + "str": "hello", + "arr": [0,1,2] }, { - "'hello'": "hello", - "multiply(42, number)": "84", - "range(5)": [0,1,2,3,4] + "num": 44, + "str": "hello", + "arr": [0,1,2,3] } ], "rows": 3, - "rows_before_limit_at_least": 3 + "rows_before_limit_at_least": 3, + + "statistics": + { + "elapsed": 0.001137687, + "rows_read": 3, + "bytes_read": 24 + } } ``` @@ -528,15 +540,15 @@ Example: "meta": [ { - "name": "'hello'", + "name": "num", + "type": "Int32" + }, + { + "name": "str", "type": "String" }, { - "name": "multiply(42, number)", - "type": "UInt64" - }, - { - "name": "range(5)", + "name": "arr", "type": "Array(UInt8)" } ], @@ -544,25 +556,95 @@ Example: "data": [ { - "'hello'": "hello", - "multiply(42, number)": "0", - "range(5)": "[0,1,2,3,4]" + "num": "42", + "str": "hello", + "arr": "[0,1]" }, { - "'hello'": "hello", - "multiply(42, number)": "42", - "range(5)": "[0,1,2,3,4]" + "num": "43", + "str": "hello", + "arr": "[0,1,2]" }, { - "'hello'": "hello", - "multiply(42, number)": "84", - "range(5)": "[0,1,2,3,4]" + "num": "44", + "str": "hello", + "arr": "[0,1,2,3]" } ], "rows": 3, - "rows_before_limit_at_least": 3 + "rows_before_limit_at_least": 3, + + "statistics": + { + "elapsed": 0.001403233, + "rows_read": 3, + "bytes_read": 24 + } +} +``` + +## JSONColumns {#jsoncolumns} + +In this format, all data is represented as a single JSON Object. +Note that JSONColumns output format buffers all data in memory to output it as a single block and it can lead to high memory consumption. + +Example: +```json +{ + "num": [42, 43, 44], + "str": ["hello", "hello", "hello"], + "arr": [[0,1], [0,1,2], [0,1,2,3]] +} +``` + +During import, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](../operations/settings/settings.md#settings-input-format-skip-unknown-fields) is set to 1. +Columns that are not present in the block will be filled with default values (you can use [input_format_defaults_for_omitted_fields](../operations/settings/settings.md#session_settings-input_format_defaults_for_omitted_fields) setting here) + + +## JSONColumnsWithMetadata {#jsoncolumnsmonoblock} + +Differs from JSONColumns output format in that it also outputs some metadata and statistics (similar to JSON output format). +This format buffers all data in memory and then outputs them as a single block, so, it can lead to high memory consumption. + +Example: +```json +{ + "meta": + [ + { + "name": "num", + "type": "Int32" + }, + { + "name": "str", + "type": "String" + }, + + { + "name": "arr", + "type": "Array(UInt8)" + } + ], + + "data": + { + "num": [42, 43, 44], + "str": ["hello", "hello", "hello"], + "arr": [[0,1], [0,1,2], [0,1,2,3]] + }, + + "rows": 3, + + "rows_before_limit_at_least": 3, + + "statistics": + { + "elapsed": 0.000272376, + "rows_read": 3, + "bytes_read": 24 + } } ``` @@ -618,71 +700,101 @@ Result: Differs from JSON only in that data rows are output in arrays, not in objects. +Examples: + +1) JSONCompact: +```json +{ + "meta": + [ + { + "name": "num", + "type": "Int32" + }, + { + "name": "str", + "type": "String" + }, + { + "name": "arr", + "type": "Array(UInt8)" + } + ], + + "data": + [ + [42, "hello", [0,1]], + [43, "hello", [0,1,2]], + [44, "hello", [0,1,2,3]] + ], + + "rows": 3, + + "rows_before_limit_at_least": 3, + + "statistics": + { + "elapsed": 0.001222069, + "rows_read": 3, + "bytes_read": 24 + } +} +``` + +2) JSONCompactStrings +```json +{ + "meta": + [ + { + "name": "num", + "type": "Int32" + }, + { + "name": "str", + "type": "String" + }, + { + "name": "arr", + "type": "Array(UInt8)" + } + ], + + "data": + [ + ["42", "hello", "[0,1]"], + ["43", "hello", "[0,1,2]"], + ["44", "hello", "[0,1,2,3]"] + ], + + "rows": 3, + + "rows_before_limit_at_least": 3, + + "statistics": + { + "elapsed": 0.001572097, + "rows_read": 3, + "bytes_read": 24 + } +} +``` + +## JSONCompactColumns {#jsoncompactcolumns} + +In this format, all data is represented as a single JSON Array. +Note that JSONCompactColumns output format buffers all data in memory to output it as a single block and it can lead to high memory consumption + Example: - -``` -// JSONCompact -{ - "meta": - [ - { - "name": "'hello'", - "type": "String" - }, - { - "name": "multiply(42, number)", - "type": "UInt64" - }, - { - "name": "range(5)", - "type": "Array(UInt8)" - } - ], - - "data": - [ - ["hello", "0", [0,1,2,3,4]], - ["hello", "42", [0,1,2,3,4]], - ["hello", "84", [0,1,2,3,4]] - ], - - "rows": 3, - - "rows_before_limit_at_least": 3 -} +```json +[ + [42, 43, 44], + ["hello", "hello", "hello"], + [[0,1], [0,1,2], [0,1,2,3]] +] ``` -``` -// JSONCompactStrings -{ - "meta": - [ - { - "name": "'hello'", - "type": "String" - }, - { - "name": "multiply(42, number)", - "type": "UInt64" - }, - { - "name": "range(5)", - "type": "Array(UInt8)" - } - ], - - "data": - [ - ["hello", "0", "[0,1,2,3,4]"], - ["hello", "42", "[0,1,2,3,4]"], - ["hello", "84", "[0,1,2,3,4]"] - ], - - "rows": 3, - - "rows_before_limit_at_least": 3 -} -``` +Columns that are not present in the block will be filled with default values (you can use [input_format_defaults_for_omitted_fields](../operations/settings/settings.md#session_settings-input_format_defaults_for_omitted_fields) setting here) ## JSONEachRow {#jsoneachrow} ## JSONStringsEachRow {#jsonstringseachrow} @@ -699,15 +811,17 @@ When using these formats, ClickHouse outputs rows as separated, newline-delimite When inserting the data, you should provide a separate JSON value for each row. +In JSONEachRow/JSONStringsEachRow input formats columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](../operations/settings/settings.md#settings-input-format-skip-unknown-fields) is set to 1. + ## JSONEachRowWithProgress {#jsoneachrowwithprogress} ## JSONStringsEachRowWithProgress {#jsonstringseachrowwithprogress} Differs from `JSONEachRow`/`JSONStringsEachRow` in that ClickHouse will also yield progress information as JSON values. ```json -{"row":{"'hello'":"hello","multiply(42, number)":"0","range(5)":[0,1,2,3,4]}} -{"row":{"'hello'":"hello","multiply(42, number)":"42","range(5)":[0,1,2,3,4]}} -{"row":{"'hello'":"hello","multiply(42, number)":"84","range(5)":[0,1,2,3,4]}} +{"row":{"num":42,"str":"hello","arr":[0,1]}} +{"row":{"num":43,"str":"hello","arr":[0,1,2]}} +{"row":{"num":44,"str":"hello","arr":[0,1,2,3]}} {"progress":{"read_rows":"3","read_bytes":"24","written_rows":"0","written_bytes":"0","total_rows_to_read":"3"}} ``` @@ -728,11 +842,11 @@ Differs from `JSONCompactStringsEachRow` in that in that it also prints the head Differs from `JSONCompactStringsEachRow` in that it also prints two header rows with column names and types, similar to [TabSeparatedWithNamesAndTypes](#tabseparatedwithnamesandtypes). ```json -["'hello'", "multiply(42, number)", "range(5)"] -["String", "UInt64", "Array(UInt8)"] -["hello", "0", [0,1,2,3,4]] -["hello", "42", [0,1,2,3,4]] -["hello", "84", [0,1,2,3,4]] +["num", "str", "arr"] +["Int32", "String", "Array(UInt8)"] +[42, "hello", [0,1]] +[43, "hello", [0,1,2]] +[44, "hello", [0,1,2,3]] ``` ### Inserting Data {#inserting-data} diff --git a/docs/en/operations/system-tables/tables.md b/docs/en/operations/system-tables/tables.md index 8286d51aed6..6cf1490f14e 100644 --- a/docs/en/operations/system-tables/tables.md +++ b/docs/en/operations/system-tables/tables.md @@ -12,11 +12,13 @@ Columns: - `name` ([String](../../sql-reference/data-types/string.md)) — Table name. +- `uuid` ([UUID](../../sql-reference/data-types/uuid.md)) — Table uuid (Atomic database). + - `engine` ([String](../../sql-reference/data-types/string.md)) — Table engine name (without parameters). - `is_temporary` ([UInt8](../../sql-reference/data-types/int-uint.md)) - Flag that indicates whether the table is temporary. -- `data_path` ([String](../../sql-reference/data-types/string.md)) - Path to the table data in the file system. +- `data_paths` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) - Paths to the table data in the file systems. - `metadata_path` ([String](../../sql-reference/data-types/string.md)) - Path to the table metadata in the file system. @@ -60,6 +62,14 @@ Columns: - `has_own_data` ([UInt8](../../sql-reference/data-types/int-uint.md)) — Flag that indicates whether the table itself stores some data on disk or only accesses some other source. +- `loading_dependencies_database` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) - Database loading dependencies (list of objects which should be loaded before the current object). + +- `loading_dependencies_table` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) - Table loading dependencies (list of objects which should be loaded before the current object). + +- `loading_dependent_database` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) - Dependent loading database. + +- `loading_dependent_table` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) - Dependent loading table. + The `system.tables` table is used in `SHOW TABLES` query implementation. **Example** @@ -95,6 +105,10 @@ lifetime_rows: ᴺᵁᴸᴸ lifetime_bytes: ᴺᵁᴸᴸ comment: has_own_data: 0 +loading_dependencies_database: [] +loading_dependencies_table: [] +loading_dependent_database: [] +loading_dependent_table: [] Row 2: ────── @@ -122,4 +136,8 @@ lifetime_rows: ᴺᵁᴸᴸ lifetime_bytes: ᴺᵁᴸᴸ comment: has_own_data: 0 +loading_dependencies_database: [] +loading_dependencies_table: [] +loading_dependent_database: [] +loading_dependent_table: [] ``` diff --git a/docs/en/sql-reference/data-types/boolean.md b/docs/en/sql-reference/data-types/boolean.md index a21b1c41a21..7b61579c4a9 100644 --- a/docs/en/sql-reference/data-types/boolean.md +++ b/docs/en/sql-reference/data-types/boolean.md @@ -3,10 +3,38 @@ sidebar_position: 43 sidebar_label: Boolean --- -# Boolean Values {#boolean-values} +# Boolean Values bool (boolean) {#boolean-values} -Since https://github.com/ClickHouse/ClickHouse/commit/4076ae77b46794e73594a9f400200088ed1e7a6e , there be a separate type for boolean values. +Type `bool` is stored as UInt8. Possible values `true` (1), `false` (0). -For versions before that, there is no separate type for boolean values. Use UInt8 type, restricted to the values 0 or 1. + +```sql +select true as col, toTypeName(col); +┌─col──┬─toTypeName(true)─┐ +│ true │ Bool │ +└──────┴──────────────────┘ + +select true == 1 as col, toTypeName(col); +┌─col─┬─toTypeName(equals(true, 1))─┐ +│ 1 │ UInt8 │ +└─────┴─────────────────────────────┘ +``` + +```sql +CREATE TABLE test_bool +( + `A` Int64, + `B` Bool +) +ENGINE = Memory; + +INSERT INTO test_bool VALUES (1, true),(2,0); + +SELECT * FROM test_bool; +┌─A─┬─B─────┐ +│ 1 │ true │ +│ 2 │ false │ +└───┴───────┘ +``` [Original article](https://clickhouse.com/docs/en/data_types/boolean/) diff --git a/docs/en/sql-reference/functions/geo/h3.md b/docs/en/sql-reference/functions/geo/h3.md index 50115dd4d75..f4d199e6729 100644 --- a/docs/en/sql-reference/functions/geo/h3.md +++ b/docs/en/sql-reference/functions/geo/h3.md @@ -1026,4 +1026,119 @@ Result: │ 41162 │ └─────────────┘ ``` + +## h3Line {#h3line} + +Returns the line of indices between the two indices that are provided. + +**Syntax** + +``` sql +h3Line(start,end) +``` + +**Parameter** + +- `start` — Hexagon index number that represents a starting point. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `end` — Hexagon index number that represents an ending point. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). + +**Returned value** + +Array of h3 indexes representing the line of indices between the two provided indices: + +Type: [Array](../../../sql-reference/data-types/array.md)([UInt64](../../../sql-reference/data-types/int-uint.md)). + +**Example** + +Query: + +``` sql + SELECT h3Line(590080540275638271,590103561300344831) as indexes; +``` + +Result: + +``` text +┌─indexes────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ +│ [590080540275638271,590080471556161535,590080883873021951,590106516237844479,590104385934065663,590103630019821567,590103561300344831] │ +└────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ +``` + +## h3Distance {#h3distance} + +Returns the distance in grid cells between the two indices that are provided. + +**Syntax** + +``` sql +h3Distance(start,end) +``` + +**Parameter** + +- `start` — Hexagon index number that represents a starting point. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `end` — Hexagon index number that represents an ending point. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). + +**Returned value** + +- Number of grid cells. + +Type: [Int64](../../../sql-reference/data-types/int-uint.md). + +Returns a negative number if finding the distance fails. + +**Example** + +Query: + +``` sql + SELECT h3Distance(590080540275638271,590103561300344831) as distance; +``` + +Result: + +``` text +┌─distance─┐ +│ 7 │ +└──────────┘ +``` + +## h3HexRing {#h3hexring} + +Returns the indexes of the hexagonal ring centered at the provided origin h3Index and length k. + +Returns 0 if no pentagonal distortion was encountered. + +**Syntax** + +``` sql +h3HexRing(index, k) +``` + +**Parameter** + +- `index` — Hexagon index number that represents the origin. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `k` — Distance. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). + +**Returned values** + +- Array of H3 indexes. + +Type: [Array](../../../sql-reference/data-types/array.md)([UInt64](../../../sql-reference/data-types/int-uint.md)). + +**Example** + +Query: + +``` sql + SELECT h3HexRing(590080540275638271, toUInt16(1)) AS hexRing; +``` + +Result: + +``` text +┌─hexRing─────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ +│ [590080815153545215,590080471556161535,590080677714591743,590077585338138623,590077447899185151,590079509483487231] │ +└─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ +``` [Original article](https://clickhouse.com/docs/en/sql-reference/functions/geo/h3) diff --git a/docs/en/sql-reference/functions/random-functions.md b/docs/en/sql-reference/functions/random-functions.md index 5e20a93da1f..3931898f081 100644 --- a/docs/en/sql-reference/functions/random-functions.md +++ b/docs/en/sql-reference/functions/random-functions.md @@ -96,10 +96,14 @@ SELECT fuzzBits(materialize('abacaba'), 0.1) FROM numbers(3) ``` -\`\`\` text -┌─fuzzBits(materialize(‘abacaba’), 0.1)─┐ -│ abaaaja │ -│ a\*cjab+ │ -│ aeca2A │ -└───────────────────────────────────────┘ +Result: + +``` text +┌─fuzzBits(materialize('abacaba'), 0.1)─┐ +│ abaaaja │ +│ a*cjab+ │ +│ aeca2A │ +└───────────────────────────────────────┘ +``` + diff --git a/docs/en/sql-reference/functions/uuid-functions.md b/docs/en/sql-reference/functions/uuid-functions.md index d23b505a93f..08f281ba281 100644 --- a/docs/en/sql-reference/functions/uuid-functions.md +++ b/docs/en/sql-reference/functions/uuid-functions.md @@ -11,10 +11,16 @@ The functions for working with UUID are listed below. Generates the [UUID](../data-types/uuid.md) of [version 4](https://tools.ietf.org/html/rfc4122#section-4.4). +**Syntax** + ``` sql -generateUUIDv4() +generateUUIDv4([x]) ``` +**Arguments** + +- `x` — [Expression](../../sql-reference/syntax.md#syntax-expressions) resulting in any of the [supported data types](../../sql-reference/data-types/index.md#data_types). The resulting value is discarded, but the expression itself if used for bypassing [common subexpression elimination](../../sql-reference/functions/index.md#common-subexpression-elimination) if the function is called multiple times in one query. Optional parameter. + **Returned value** The UUID type value. @@ -37,6 +43,15 @@ SELECT * FROM t_uuid └──────────────────────────────────────┘ ``` +**Usage example if it is needed to generate multiple values in one row** + +```sql +SELECT generateUUIDv4(1), generateUUIDv4(2) +┌─generateUUIDv4(1)────────────────────┬─generateUUIDv4(2)────────────────────┐ +│ 2d49dc6e-ddce-4cd0-afb8-790956df54c1 │ 8abf8c13-7dea-4fdf-af3e-0e18767770e6 │ +└──────────────────────────────────────┴──────────────────────────────────────┘ +``` + ## empty {#empty} Checks whether the input UUID is empty. diff --git a/docs/en/sql-reference/functions/ym-dict-functions.md b/docs/en/sql-reference/functions/ym-dict-functions.md index 85215957443..4fc727844e7 100644 --- a/docs/en/sql-reference/functions/ym-dict-functions.md +++ b/docs/en/sql-reference/functions/ym-dict-functions.md @@ -105,7 +105,7 @@ Example: `regionToCountry(toUInt32(213)) = 225` converts Moscow (213) to Russia Converts a region to a continent. In every other way, this function is the same as ‘regionToCity’. Example: `regionToContinent(toUInt32(213)) = 10001` converts Moscow (213) to Eurasia (10001). -### regionToTopContinent (#regiontotopcontinent) {#regiontotopcontinent-regiontotopcontinent} +### regionToTopContinent(id\[, geobase\]) {#regiontotopcontinentid-geobase} Finds the highest continent in the hierarchy for the region. diff --git a/docs/en/sql-reference/statements/create/user.md b/docs/en/sql-reference/statements/create/user.md index 0aad0961a8b..34f0a13147c 100644 --- a/docs/en/sql-reference/statements/create/user.md +++ b/docs/en/sql-reference/statements/create/user.md @@ -29,12 +29,14 @@ There are multiple ways of user identification: - `IDENTIFIED WITH no_password` - `IDENTIFIED WITH plaintext_password BY 'qwerty'` - `IDENTIFIED WITH sha256_password BY 'qwerty'` or `IDENTIFIED BY 'password'` -- `IDENTIFIED WITH sha256_hash BY 'hash'` +- `IDENTIFIED WITH sha256_hash BY 'hash'` or `IDENTIFIED WITH sha256_hash BY 'hash' SALT 'salt'` - `IDENTIFIED WITH double_sha1_password BY 'qwerty'` - `IDENTIFIED WITH double_sha1_hash BY 'hash'` - `IDENTIFIED WITH ldap SERVER 'server_name'` - `IDENTIFIED WITH kerberos` or `IDENTIFIED WITH kerberos REALM 'realm'` +For identification with sha256_hash using `SALT` - hash must be calculated from concatination of 'password' and 'salt'. + ## User Host {#user-host} User host is a host from which a connection to ClickHouse server could be established. The host can be specified in the `HOST` query section in the following ways: diff --git a/docs/en/sql-reference/statements/grant.md b/docs/en/sql-reference/statements/grant.md index 1ee330061b5..b60114e10c5 100644 --- a/docs/en/sql-reference/statements/grant.md +++ b/docs/en/sql-reference/statements/grant.md @@ -170,6 +170,7 @@ Hierarchy of privileges: - `SYSTEM FLUSH` - `SYSTEM FLUSH DISTRIBUTED` - `SYSTEM FLUSH LOGS` + - `CLUSTER` (see also `access_control_improvements.on_cluster_queries_require_cluster_grant` configuration directive) - [INTROSPECTION](#grant-introspection) - `addressToLine` - `addressToLineWithInlines` diff --git a/docs/ru/faq/use-cases/time-series.md b/docs/ru/faq/use-cases/time-series.md index d2229c30359..d7525214cba 100644 --- a/docs/ru/faq/use-cases/time-series.md +++ b/docs/ru/faq/use-cases/time-series.md @@ -5,7 +5,7 @@ sidebar_position: 101 # Можно ли использовать ClickHouse как базу данных временных рядов? {#can-i-use-clickhouse-as-a-time-series-database} -ClickHouse — это универсальное решение для [OLAP](../../faq/general/olap.md) операций, в то время как существует много специализированных СУБД временных рядов. Однако [высокая скорость выполнения запросов](../../faq/general/why-clickhouse-is-so-fast.md) позволяет CLickHouse во многих случаях "побеждать" специализированные аналоги. В подтверждение этому есть много примеров с конкретными показателями производительности, так что мы не будем останавливаться на этом подробно. Лучше рассмотрим те возможности ClickHouse, которые стоит использовать. +ClickHouse — это универсальное решение для [OLAP](../../faq/general/olap.md) операций, в то время как существует много специализированных СУБД временных рядов. Однако [высокая скорость выполнения запросов](../../faq/general/why-clickhouse-is-so-fast.md) позволяет ClickHouse во многих случаях "побеждать" специализированные аналоги. В подтверждение этому есть много примеров с конкретными показателями производительности, так что мы не будем останавливаться на этом подробно. Лучше рассмотрим те возможности ClickHouse, которые стоит использовать. Во-первых, есть **[специальные кодеки](../../sql-reference/statements/create/table.md#create-query-specialized-codecs)**, которые составляют типичные временные ряды. Это могут быть либо стандартные алгоритмы, такие как `DoubleDelta` или `Gorilla`, либо специфические для ClickHouse, например `T64`. diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md index ebcb8855238..9470e3e0f9b 100644 --- a/docs/ru/operations/settings/settings.md +++ b/docs/ru/operations/settings/settings.md @@ -2432,7 +2432,7 @@ SELECT idx, i FROM null_in WHERE i IN (1, NULL) SETTINGS transform_null_in = 1; Разрешает или запрещает использование типа данных `LowCardinality` с форматом данных [Native](../../interfaces/formats.md#native). -Если использование типа `LowCardinality` ограничено, сервер CLickHouse преобразует столбцы `LowCardinality` в обычные столбцы для запросов `SELECT`, а обычные столбцы - в столбцы `LowCardinality` для запросов `INSERT`. +Если использование типа `LowCardinality` ограничено, сервер ClickHouse преобразует столбцы `LowCardinality` в обычные столбцы для запросов `SELECT`, а обычные столбцы - в столбцы `LowCardinality` для запросов `INSERT`. В основном настройка используется для сторонних клиентов, не поддерживающих тип данных `LowCardinality`. diff --git a/docs/ru/operations/system-tables/tables.md b/docs/ru/operations/system-tables/tables.md index bf47051442e..ae5ca586a88 100644 --- a/docs/ru/operations/system-tables/tables.md +++ b/docs/ru/operations/system-tables/tables.md @@ -12,11 +12,13 @@ - `name` ([String](../../sql-reference/data-types/string.md)) — имя таблицы. +- `uuid` ([UUID](../../sql-reference/data-types/uuid.md)) — Uuid таблицы (Atomic database). + - `engine` ([String](../../sql-reference/data-types/string.md)) — движок таблицы (без параметров). - `is_temporary` ([UInt8](../../sql-reference/data-types/int-uint.md)) — флаг, указывающий на то, временная это таблица или нет. -- `data_path` ([String](../../sql-reference/data-types/string.md)) — путь к данным таблицы в файловой системе. +- `data_paths` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — пути к данным таблицы в файловых системах. - `metadata_path` ([String](../../sql-reference/data-types/string.md)) — путь к табличным метаданным в файловой системе. @@ -60,6 +62,14 @@ - `has_own_data` ([UInt8](../../sql-reference/data-types/int-uint.md)) — флаг, показывающий хранит ли таблица сама какие-то данные на диске или только обращается к какому-то другому источнику. +- `loading_dependencies_database` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) - базы данных необходимые для загрузки объекта. + +- `loading_dependencies_table` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) - таблицы необходимые для загрузки объекта. + +- `loading_dependent_database` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) - базы данных, которым объект необходим для загрузки. + +- `loading_dependent_table` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) - таблицы, которым объект необходим для загрузки. + Таблица `system.tables` используется при выполнении запроса `SHOW TABLES`. **Пример** @@ -95,6 +105,10 @@ lifetime_rows: ᴺᵁᴸᴸ lifetime_bytes: ᴺᵁᴸᴸ comment: has_own_data: 0 +loading_dependencies_database: [] +loading_dependencies_table: [] +loading_dependent_database: [] +loading_dependent_table: [] Row 2: ────── @@ -122,4 +136,8 @@ lifetime_rows: ᴺᵁᴸᴸ lifetime_bytes: ᴺᵁᴸᴸ comment: has_own_data: 0 +loading_dependencies_database: [] +loading_dependencies_table: [] +loading_dependent_database: [] +loading_dependent_table: [] ``` diff --git a/docs/ru/sql-reference/data-types/boolean.md b/docs/ru/sql-reference/data-types/boolean.md index 2e18f1a35fe..621d3550eaa 100644 --- a/docs/ru/sql-reference/data-types/boolean.md +++ b/docs/ru/sql-reference/data-types/boolean.md @@ -3,7 +3,35 @@ sidebar_position: 43 sidebar_label: "Булевы значения" --- -# Булевы значения {#bulevy-znacheniia} +# Булевы значения bool (boolean) {#bulevy-znacheniia} -Отдельного типа для булевых значений нет. Для них используется тип UInt8, в котором используются только значения 0 и 1. +Тип `bool` хранится как UInt8. Значения `true` (1), `false` (0). +```sql +select true as col, toTypeName(col); +┌─col──┬─toTypeName(true)─┐ +│ true │ Bool │ +└──────┴──────────────────┘ + +select true == 1 as col, toTypeName(col); +┌─col─┬─toTypeName(equals(true, 1))─┐ +│ 1 │ UInt8 │ +└─────┴─────────────────────────────┘ +``` + +```sql +CREATE TABLE test_bool +( + `A` Int64, + `B` Bool +) +ENGINE = Memory; + +INSERT INTO test_bool VALUES (1, true),(2,0); + +SELECT * FROM test_bool; +┌─A─┬─B─────┐ +│ 1 │ true │ +│ 2 │ false │ +└───┴───────┘ +``` diff --git a/docs/ru/sql-reference/functions/uuid-functions.md b/docs/ru/sql-reference/functions/uuid-functions.md index babeb0d2693..554e78002b8 100644 --- a/docs/ru/sql-reference/functions/uuid-functions.md +++ b/docs/ru/sql-reference/functions/uuid-functions.md @@ -9,10 +9,16 @@ sidebar_label: "Функции для работы с UUID" Генерирует идентификатор [UUID версии 4](https://tools.ietf.org/html/rfc4122#section-4.4). +**Синтаксис** + ``` sql -generateUUIDv4() +generateUUIDv4([x]) ``` +**Аргументы** + +- `x` — [выражение](../syntax.md#syntax-expressions), возвращающее значение одного из [поддерживаемых типов данных](../data-types/index.md#data_types). Значение используется, чтобы избежать [склейки одинаковых выражений](index.md#common-subexpression-elimination), если функция вызывается несколько раз в одном запросе. Необязательный параметр. + **Возвращаемое значение** Значение типа [UUID](../../sql-reference/functions/uuid-functions.md). @@ -35,6 +41,15 @@ SELECT * FROM t_uuid └──────────────────────────────────────┘ ``` +**Пример использования, для генерации нескольких значений в одной строке** + +```sql +SELECT generateUUIDv4(1), generateUUIDv4(2) +┌─generateUUIDv4(1)────────────────────┬─generateUUIDv4(2)────────────────────┐ +│ 2d49dc6e-ddce-4cd0-afb8-790956df54c1 │ 8abf8c13-7dea-4fdf-af3e-0e18767770e6 │ +└──────────────────────────────────────┴──────────────────────────────────────┘ +``` + ## empty {#empty} Проверяет, является ли входной UUID пустым. diff --git a/docs/ru/sql-reference/statements/create/user.md b/docs/ru/sql-reference/statements/create/user.md index 78c481e8eb7..d7da1748821 100644 --- a/docs/ru/sql-reference/statements/create/user.md +++ b/docs/ru/sql-reference/statements/create/user.md @@ -29,12 +29,14 @@ CREATE USER [IF NOT EXISTS | OR REPLACE] name1 [ON CLUSTER cluster_name1] - `IDENTIFIED WITH no_password` - `IDENTIFIED WITH plaintext_password BY 'qwerty'` - `IDENTIFIED WITH sha256_password BY 'qwerty'` or `IDENTIFIED BY 'password'` -- `IDENTIFIED WITH sha256_hash BY 'hash'` +- `IDENTIFIED WITH sha256_hash BY 'hash'` or `IDENTIFIED WITH sha256_hash BY 'hash' SALT 'salt'` - `IDENTIFIED WITH double_sha1_password BY 'qwerty'` - `IDENTIFIED WITH double_sha1_hash BY 'hash'` - `IDENTIFIED WITH ldap SERVER 'server_name'` - `IDENTIFIED WITH kerberos` or `IDENTIFIED WITH kerberos REALM 'realm'` +Для идентификации с sha256_hash используя `SALT` - хэш должен быть вычислен от конкатенации 'password' и 'salt'. + ## Пользовательский хост Пользовательский хост — это хост, с которого можно установить соединение с сервером ClickHouse. Хост задается в секции `HOST` следующими способами: diff --git a/docs/tools/blog.py b/docs/tools/blog.py deleted file mode 100644 index 9bb6beae972..00000000000 --- a/docs/tools/blog.py +++ /dev/null @@ -1,113 +0,0 @@ -#!/usr/bin/env python3 -import datetime -import logging -import os -import time - -import nav # monkey patches mkdocs - -import mkdocs.commands -from mkdocs import config -from mkdocs import exceptions - -import mdx_clickhouse -import redirects - -import util - - -def build_for_lang(lang, args): - logging.info(f"Building {lang} blog") - - try: - theme_cfg = { - "name": None, - "custom_dir": os.path.join(os.path.dirname(__file__), "..", args.theme_dir), - "language": lang, - "direction": "ltr", - "static_templates": ["404.html"], - "extra": { - "now": int( - time.mktime(datetime.datetime.now().timetuple()) - ) # TODO better way to avoid caching - }, - } - - # the following list of languages is sorted according to - # https://en.wikipedia.org/wiki/List_of_languages_by_total_number_of_speakers - languages = {"en": "English"} - - site_names = {"en": "ClickHouse Blog"} - - assert len(site_names) == len(languages) - - site_dir = os.path.join(args.blog_output_dir, lang) - - plugins = ["macros"] - if args.htmlproofer: - plugins.append("htmlproofer") - - website_url = "https://clickhouse.com" - site_name = site_names.get(lang, site_names["en"]) - blog_nav, post_meta = nav.build_blog_nav(lang, args) - raw_config = dict( - site_name=site_name, - site_url=f"{website_url}/blog/{lang}/", - docs_dir=os.path.join(args.blog_dir, lang), - site_dir=site_dir, - strict=True, - theme=theme_cfg, - nav=blog_nav, - copyright="©2016–2022 ClickHouse, Inc.", - use_directory_urls=True, - repo_name="ClickHouse/ClickHouse", - repo_url="https://github.com/ClickHouse/ClickHouse/", - edit_uri=f"edit/master/website/blog/{lang}", - markdown_extensions=mdx_clickhouse.MARKDOWN_EXTENSIONS, - plugins=plugins, - extra=dict( - now=datetime.datetime.now().isoformat(), - rev=args.rev, - rev_short=args.rev_short, - rev_url=args.rev_url, - website_url=website_url, - events=args.events, - languages=languages, - includes_dir=os.path.join(os.path.dirname(__file__), "..", "_includes"), - is_blog=True, - post_meta=post_meta, - today=datetime.date.today().isoformat(), - ), - ) - - cfg = config.load_config(**raw_config) - mkdocs.commands.build.build(cfg) - - redirects.build_blog_redirects(args) - - env = util.init_jinja2_env(args) - with open( - os.path.join(args.website_dir, "templates", "blog", "rss.xml"), "rb" - ) as f: - rss_template_string = f.read().decode("utf-8").strip() - rss_template = env.from_string(rss_template_string) - with open(os.path.join(args.blog_output_dir, lang, "rss.xml"), "w") as f: - f.write(rss_template.render({"config": raw_config})) - - logging.info(f"Finished building {lang} blog") - - except exceptions.ConfigurationError as e: - raise SystemExit("\n" + str(e)) - - -def build_blog(args): - tasks = [] - for lang in args.blog_lang.split(","): - if lang: - tasks.append( - ( - lang, - args, - ) - ) - util.run_function_in_parallel(build_for_lang, tasks, threads=False) diff --git a/docs/tools/build.py b/docs/tools/build.py index f084a8e5c0c..3756cf66794 100755 --- a/docs/tools/build.py +++ b/docs/tools/build.py @@ -1,144 +1,17 @@ #!/usr/bin/env python3 import argparse -import datetime import logging import os import shutil import subprocess import sys -import time -import jinja2 import livereload -import markdown.util -import nav # monkey patches mkdocs - -from mkdocs import config -from mkdocs import exceptions -import mkdocs.commands.build - -import blog -import mdx_clickhouse import redirects -import util import website -from cmake_in_clickhouse_generator import generate_cmake_flags_files - - -class ClickHouseMarkdown(markdown.extensions.Extension): - class ClickHousePreprocessor(markdown.util.Processor): - def run(self, lines): - for line in lines: - if "" not in line: - yield line - - def extendMarkdown(self, md): - md.preprocessors.register( - self.ClickHousePreprocessor(), "clickhouse_preprocessor", 31 - ) - - -markdown.extensions.ClickHouseMarkdown = ClickHouseMarkdown - - -def build_for_lang(lang, args): - logging.info(f"Building {lang} docs") - - try: - theme_cfg = { - "name": None, - "custom_dir": os.path.join(os.path.dirname(__file__), "..", args.theme_dir), - "language": lang, - "direction": "rtl" if lang == "fa" else "ltr", - "static_templates": ["404.html"], - "extra": { - "now": int( - time.mktime(datetime.datetime.now().timetuple()) - ) # TODO better way to avoid caching - }, - } - - # the following list of languages is sorted according to - # https://en.wikipedia.org/wiki/List_of_languages_by_total_number_of_speakers - languages = {"en": "English", "zh": "中文", "ru": "Русский", "ja": "日本語"} - - site_names = { - "en": "ClickHouse %s Documentation", - "zh": "ClickHouse文档 %s", - "ru": "Документация ClickHouse %s", - "ja": "ClickHouseドキュメント %s", - } - - assert len(site_names) == len(languages) - - site_dir = os.path.join(args.docs_output_dir, lang) - - plugins = ["macros"] - if args.htmlproofer: - plugins.append("htmlproofer") - - website_url = "https://clickhouse.com" - site_name = site_names.get(lang, site_names["en"]) % "" - site_name = site_name.replace(" ", " ") - - raw_config = dict( - site_name=site_name, - site_url=f"{website_url}/docs/{lang}/", - docs_dir=os.path.join(args.docs_dir, lang), - site_dir=site_dir, - strict=True, - theme=theme_cfg, - copyright="©2016–2022 ClickHouse, Inc.", - use_directory_urls=True, - repo_name="ClickHouse/ClickHouse", - repo_url="https://github.com/ClickHouse/ClickHouse/", - edit_uri=f"edit/master/docs/{lang}", - markdown_extensions=mdx_clickhouse.MARKDOWN_EXTENSIONS, - plugins=plugins, - extra=dict( - now=datetime.datetime.now().isoformat(), - rev=args.rev, - rev_short=args.rev_short, - rev_url=args.rev_url, - website_url=website_url, - events=args.events, - languages=languages, - includes_dir=os.path.join(os.path.dirname(__file__), "..", "_includes"), - is_blog=False, - ), - ) - - raw_config["nav"] = nav.build_docs_nav(lang, args) - - cfg = config.load_config(**raw_config) - - if not args.skip_multi_page: - mkdocs.commands.build.build(cfg) - - mdx_clickhouse.PatchedMacrosPlugin.disabled = False - - logging.info(f"Finished building {lang} docs") - - except exceptions.ConfigurationError as e: - raise SystemExit("\n" + str(e)) - - -def build_docs(args): - tasks = [] - for lang in args.lang.split(","): - if lang: - tasks.append( - ( - lang, - args, - ) - ) - util.run_function_in_parallel(build_for_lang, tasks, threads=False) - redirects.build_docs_redirects(args) - def build(args): if os.path.exists(args.output_dir): @@ -147,14 +20,6 @@ def build(args): if not args.skip_website: website.build_website(args) - if not args.skip_docs: - generate_cmake_flags_files() - - build_docs(args) - - if not args.skip_blog: - blog.build_blog(args) - if not args.skip_website: website.process_benchmark_results(args) website.minify_website(args) @@ -171,20 +36,14 @@ if __name__ == "__main__": arg_parser = argparse.ArgumentParser() arg_parser.add_argument("--lang", default="en,ru,zh,ja") - arg_parser.add_argument("--blog-lang", default="en") - arg_parser.add_argument("--docs-dir", default=".") arg_parser.add_argument("--theme-dir", default=website_dir) arg_parser.add_argument("--website-dir", default=website_dir) arg_parser.add_argument("--src-dir", default=src_dir) - arg_parser.add_argument("--blog-dir", default=os.path.join(website_dir, "blog")) arg_parser.add_argument("--output-dir", default="build") arg_parser.add_argument("--nav-limit", type=int, default="0") arg_parser.add_argument("--skip-multi-page", action="store_true") arg_parser.add_argument("--skip-website", action="store_true") - arg_parser.add_argument("--skip-blog", action="store_true") - arg_parser.add_argument("--skip-docs", action="store_true") arg_parser.add_argument("--htmlproofer", action="store_true") - arg_parser.add_argument("--no-docs-macros", action="store_true") arg_parser.add_argument("--livereload", type=int, default="0") arg_parser.add_argument("--verbose", action="store_true") @@ -196,11 +55,6 @@ if __name__ == "__main__": logging.getLogger("MARKDOWN").setLevel(logging.INFO) - args.docs_output_dir = os.path.join(os.path.abspath(args.output_dir), "docs") - args.blog_output_dir = os.path.join(os.path.abspath(args.output_dir), "blog") - - from github import get_events - args.rev = ( subprocess.check_output("git rev-parse HEAD", shell=True) .decode("utf-8") @@ -212,9 +66,6 @@ if __name__ == "__main__": .strip() ) args.rev_url = f"https://github.com/ClickHouse/ClickHouse/commit/{args.rev}" - args.events = get_events(args) - - from build import build build(args) @@ -223,9 +74,6 @@ if __name__ == "__main__": new_args = sys.executable + " " + " ".join(new_args) server = livereload.Server() - server.watch( - args.docs_dir + "**/*", livereload.shell(new_args, cwd="tools", shell=True) - ) server.watch( args.website_dir + "**/*", livereload.shell(new_args, cwd="tools", shell=True), diff --git a/docs/tools/cmake_in_clickhouse_generator.py b/docs/tools/cmake_in_clickhouse_generator.py deleted file mode 100644 index 9bbc94fd206..00000000000 --- a/docs/tools/cmake_in_clickhouse_generator.py +++ /dev/null @@ -1,181 +0,0 @@ -import re -import os -from typing import TextIO, List, Tuple, Optional, Dict - -# name, default value, description -Entity = Tuple[str, str, str] - -# https://regex101.com/r/R6iogw/12 -cmake_option_regex: str = ( - r"^\s*option\s*\(([A-Z_0-9${}]+)\s*(?:\"((?:.|\n)*?)\")?\s*(.*)?\).*$" -) - -ch_master_url: str = "https://github.com/clickhouse/clickhouse/blob/master/" - -name_str: str = '[`{name}`](' + ch_master_url + "{path}#L{line})" -default_anchor_str: str = "[`{name}`](#{anchor})" - -comment_var_regex: str = r"\${(.+)}" -comment_var_replace: str = "`\\1`" - -table_header: str = """ -| Name | Default value | Description | Comment | -|------|---------------|-------------|---------| -""" - -# Needed to detect conditional variables (those which are defined twice) -# name -> (path, values) -entities: Dict[str, Tuple[str, str]] = {} - - -def make_anchor(t: str) -> str: - return "".join( - ["-" if i == "_" else i.lower() for i in t if i.isalpha() or i == "_"] - ) - - -def process_comment(comment: str) -> str: - return re.sub(comment_var_regex, comment_var_replace, comment, flags=re.MULTILINE) - - -def build_entity(path: str, entity: Entity, line_comment: Tuple[int, str]) -> None: - (line, comment) = line_comment - (name, description, default) = entity - - if name in entities: - return - - if len(default) == 0: - formatted_default: str = "`OFF`" - elif default[0] == "$": - formatted_default: str = "`{}`".format(default[2:-1]) - else: - formatted_default: str = "`" + default + "`" - - formatted_name: str = name_str.format( - anchor=make_anchor(name), name=name, path=path, line=line - ) - - formatted_description: str = "".join(description.split("\n")) - - formatted_comment: str = process_comment(comment) - - formatted_entity: str = "| {} | {} | {} | {} |".format( - formatted_name, formatted_default, formatted_description, formatted_comment - ) - - entities[name] = path, formatted_entity - - -def process_file(root_path: str, file_path: str, file_name: str) -> None: - with open(os.path.join(file_path, file_name), "r") as cmake_file: - contents: str = cmake_file.read() - - def get_line_and_comment(target: str) -> Tuple[int, str]: - contents_list: List[str] = contents.split("\n") - comment: str = "" - - for n, line in enumerate(contents_list): - if "option" not in line.lower() or target not in line: - continue - - for maybe_comment_line in contents_list[n - 1 :: -1]: - if not re.match("\s*#\s*", maybe_comment_line): - break - - comment = re.sub("\s*#\s*", "", maybe_comment_line) + " " + comment - - # line numbering starts with 1 - return n + 1, comment - - matches: Optional[List[Entity]] = re.findall( - cmake_option_regex, contents, re.MULTILINE - ) - - file_rel_path_with_name: str = os.path.join( - file_path[len(root_path) :], file_name - ) - if file_rel_path_with_name.startswith("/"): - file_rel_path_with_name = file_rel_path_with_name[1:] - - if matches: - for entity in matches: - build_entity( - file_rel_path_with_name, entity, get_line_and_comment(entity[0]) - ) - - -def process_folder(root_path: str, name: str) -> None: - for root, _, files in os.walk(os.path.join(root_path, name)): - for f in files: - if f == "CMakeLists.txt" or ".cmake" in f: - process_file(root_path, root, f) - - -def generate_cmake_flags_files() -> None: - root_path: str = os.path.join(os.path.dirname(__file__), "..", "..") - - output_file_name: str = os.path.join( - root_path, "docs/en/development/cmake-in-clickhouse.md" - ) - header_file_name: str = os.path.join( - root_path, "docs/_includes/cmake_in_clickhouse_header.md" - ) - footer_file_name: str = os.path.join( - root_path, "docs/_includes/cmake_in_clickhouse_footer.md" - ) - - process_file(root_path, root_path, "CMakeLists.txt") - process_file(root_path, os.path.join(root_path, "programs"), "CMakeLists.txt") - - process_folder(root_path, "base") - process_folder(root_path, "cmake") - process_folder(root_path, "src") - - with open(output_file_name, "w") as f: - with open(header_file_name, "r") as header: - f.write(header.read()) - - sorted_keys: List[str] = sorted(entities.keys()) - ignored_keys: List[str] = [] - - f.write("### ClickHouse modes\n" + table_header) - - for k in sorted_keys: - if k.startswith("ENABLE_CLICKHOUSE_"): - f.write(entities[k][1] + "\n") - ignored_keys.append(k) - - f.write( - "\n### External libraries\nNote that ClickHouse uses forks of these libraries, see https://github.com/ClickHouse-Extras.\n" - + table_header - ) - - for k in sorted_keys: - if k.startswith("ENABLE_") and ".cmake" in entities[k][0]: - f.write(entities[k][1] + "\n") - ignored_keys.append(k) - - f.write("\n\n### Other flags\n" + table_header) - - for k in sorted(set(sorted_keys).difference(set(ignored_keys))): - f.write(entities[k][1] + "\n") - - with open(footer_file_name, "r") as footer: - f.write(footer.read()) - - other_languages = [ - "docs/ja/development/cmake-in-clickhouse.md", - "docs/zh/development/cmake-in-clickhouse.md", - "docs/ru/development/cmake-in-clickhouse.md", - ] - - for lang in other_languages: - other_file_name = os.path.join(root_path, lang) - if os.path.exists(other_file_name): - os.unlink(other_file_name) - os.symlink(output_file_name, other_file_name) - - -if __name__ == "__main__": - generate_cmake_flags_files() diff --git a/docs/tools/easy_diff.py b/docs/tools/easy_diff.py deleted file mode 100755 index 14e3ca91776..00000000000 --- a/docs/tools/easy_diff.py +++ /dev/null @@ -1,186 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- - -import os, sys -import argparse -import subprocess -import contextlib -from git import cmd -from tempfile import NamedTemporaryFile - -SCRIPT_DESCRIPTION = """ - usage: ./easy_diff.py language/document path - - Show the difference between a language document and an English document. - - This script is based on the assumption that documents in other languages are fully synchronized with the en document at a commit. - - For example: - Execute: - ./easy_diff.py --no-pager zh/data_types - Output: - Need translate document:~/ClickHouse/docs/en/data_types/uuid.md - Need link document:~/ClickHouse/docs/en/data_types/decimal.md to ~/ClickHouse/docs/zh/data_types/decimal.md - diff --git a/docs/en/data_types/domains/ipv6.md b/docs/en/data_types/domains/ipv6.md - index 1bfbe3400b..e2abaff017 100644 - --- a/docs/en/data_types/domains/ipv6.md - +++ b/docs/en/data_types/domains/ipv6.md - @@ -4,13 +4,13 @@ - - ### Basic Usage - - -``` sql - +```sql - CREATE TABLE hits (url String, from IPv6) ENGINE = MergeTree() ORDER BY url; - - DESCRIBE TABLE hits; - ``` - - -``` - +```text - ┌─name─┬─type───┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┐ - │ url │ String │ │ │ │ │ - │ from │ IPv6 │ │ │ │ │ - @@ -19,19 +19,19 @@ DESCRIBE TABLE hits; - - OR you can use `IPv6` domain as a key: - - -``` sql - +```sql - CREATE TABLE hits (url String, from IPv6) ENGINE = MergeTree() ORDER BY from; - ... MORE - - OPTIONS: - -h, --help show this help message and exit - --no-pager use stdout as difference result output -""" - -SCRIPT_PATH = os.path.abspath(__file__) -CLICKHOUSE_REPO_HOME = os.path.join(os.path.dirname(SCRIPT_PATH), "..", "..") -SCRIPT_COMMAND_EXECUTOR = cmd.Git(CLICKHOUSE_REPO_HOME) - -SCRIPT_COMMAND_PARSER = argparse.ArgumentParser(add_help=False) -SCRIPT_COMMAND_PARSER.add_argument("path", type=bytes, nargs="?", default=None) -SCRIPT_COMMAND_PARSER.add_argument("--no-pager", action="store_true", default=False) -SCRIPT_COMMAND_PARSER.add_argument("-h", "--help", action="store_true", default=False) - - -def execute(commands): - return SCRIPT_COMMAND_EXECUTOR.execute(commands) - - -def get_hash(file_name): - return execute(["git", "log", "-n", "1", '--pretty=format:"%H"', file_name]) - - -def diff_file(reference_file, working_file, out): - if not os.path.exists(reference_file): - raise RuntimeError( - "reference file [" + os.path.abspath(reference_file) + "] is not exists." - ) - - if os.path.islink(working_file): - out.writelines(["Need translate document:" + os.path.abspath(reference_file)]) - elif not os.path.exists(working_file): - out.writelines( - [ - "Need link document " - + os.path.abspath(reference_file) - + " to " - + os.path.abspath(working_file) - ] - ) - elif get_hash(working_file) != get_hash(reference_file): - out.writelines( - [ - ( - execute( - [ - "git", - "diff", - get_hash(working_file).strip('"'), - reference_file, - ] - ).encode("utf-8") - ) - ] - ) - - return 0 - - -def diff_directory(reference_directory, working_directory, out): - if not os.path.isdir(reference_directory): - return diff_file(reference_directory, working_directory, out) - - for list_item in os.listdir(reference_directory): - working_item = os.path.join(working_directory, list_item) - reference_item = os.path.join(reference_directory, list_item) - if ( - diff_file(reference_item, working_item, out) - if os.path.isfile(reference_item) - else diff_directory(reference_item, working_item, out) != 0 - ): - return 1 - - return 0 - - -def find_language_doc(custom_document, other_language="en", children=[]): - if len(custom_document) == 0: - raise RuntimeError( - "The " - + os.path.join(custom_document, *children) - + " is not in docs directory." - ) - - if os.path.samefile(os.path.join(CLICKHOUSE_REPO_HOME, "docs"), custom_document): - return os.path.join(CLICKHOUSE_REPO_HOME, "docs", other_language, *children[1:]) - children.insert(0, os.path.split(custom_document)[1]) - return find_language_doc( - os.path.split(custom_document)[0], other_language, children - ) - - -class ToPager: - def __init__(self, temp_named_file): - self.temp_named_file = temp_named_file - - def writelines(self, lines): - self.temp_named_file.writelines(lines) - - def close(self): - self.temp_named_file.flush() - git_pager = execute(["git", "var", "GIT_PAGER"]) - subprocess.check_call([git_pager, self.temp_named_file.name]) - self.temp_named_file.close() - - -class ToStdOut: - def writelines(self, lines): - self.system_stdout_stream.writelines(lines) - - def close(self): - self.system_stdout_stream.flush() - - def __init__(self, system_stdout_stream): - self.system_stdout_stream = system_stdout_stream - - -if __name__ == "__main__": - arguments = SCRIPT_COMMAND_PARSER.parse_args() - if arguments.help or not arguments.path: - sys.stdout.write(SCRIPT_DESCRIPTION) - sys.exit(0) - - working_language = os.path.join(CLICKHOUSE_REPO_HOME, "docs", arguments.path) - with contextlib.closing( - ToStdOut(sys.stdout) - if arguments.no_pager - else ToPager(NamedTemporaryFile("r+")) - ) as writer: - exit( - diff_directory( - find_language_doc(working_language), working_language, writer - ) - ) diff --git a/docs/tools/github.py b/docs/tools/github.py deleted file mode 100644 index 3a6f155e25d..00000000000 --- a/docs/tools/github.py +++ /dev/null @@ -1,41 +0,0 @@ -import collections -import copy -import io -import logging -import os -import random -import sys -import tarfile -import time - -import requests - -import util - - -def get_events(args): - events = [] - skip = True - with open(os.path.join(args.docs_dir, "..", "README.md")) as f: - for line in f: - if skip: - if "Upcoming Events" in line: - skip = False - else: - if not line: - continue - line = line.strip().split("](") - if len(line) == 2: - tail = line[1].split(") ") - events.append( - { - "signup_link": tail[0], - "event_name": line[0].replace("* [", ""), - "event_date": tail[1].replace("on ", "").replace(".", ""), - } - ) - return events - - -if __name__ == "__main__": - logging.basicConfig(level=logging.DEBUG, stream=sys.stderr) diff --git a/docs/tools/nav.py b/docs/tools/nav.py deleted file mode 100644 index e3df85bbe4e..00000000000 --- a/docs/tools/nav.py +++ /dev/null @@ -1,190 +0,0 @@ -import collections -import datetime -import hashlib -import logging -import os - -import mkdocs.structure.nav - -import util - - -def find_first_header(content): - for line in content.split("\n"): - if line.startswith("#"): - no_hash = line.lstrip("#") - return no_hash.split("{", 1)[0].strip() - - -def build_nav_entry(root, args): - if root.endswith("images"): - return None, None, None - result_items = [] - index_meta, index_content = util.read_md_file(os.path.join(root, "index.md")) - current_title = index_meta.get("toc_folder_title", index_meta.get("toc_title")) - current_title = current_title or index_meta.get( - "title", find_first_header(index_content) - ) - for filename in os.listdir(root): - path = os.path.join(root, filename) - if os.path.isdir(path): - prio, title, payload = build_nav_entry(path, args) - if title and payload: - result_items.append((prio, title, payload)) - elif filename.endswith(".md"): - path = os.path.join(root, filename) - - meta = "" - content = "" - - try: - meta, content = util.read_md_file(path) - except: - print("Error in file: {}".format(path)) - raise - - path = path.split("/", 2)[-1] - title = meta.get("toc_title", find_first_header(content)) - if title: - title = title.strip().rstrip(".") - else: - title = meta.get("toc_folder_title", "hidden") - prio = meta.get("toc_priority", 9999) - logging.debug(f"Nav entry: {prio}, {title}, {path}") - if meta.get("toc_hidden") or not content.strip(): - title = "hidden" - if title == "hidden": - title = "hidden-" + hashlib.sha1(content.encode("utf-8")).hexdigest() - if args.nav_limit and len(result_items) >= args.nav_limit: - break - result_items.append((prio, title, path)) - result_items = sorted(result_items, key=lambda x: (x[0], x[1])) - result = collections.OrderedDict([(item[1], item[2]) for item in result_items]) - if index_meta.get("toc_hidden_folder"): - current_title += "|hidden-folder" - return index_meta.get("toc_priority", 10000), current_title, result - - -def build_docs_nav(lang, args): - docs_dir = os.path.join(args.docs_dir, lang) - _, _, nav = build_nav_entry(docs_dir, args) - result = [] - index_key = None - for key, value in list(nav.items()): - if key and value: - if value == "index.md": - index_key = key - continue - result.append({key: value}) - if args.nav_limit and len(result) >= args.nav_limit: - break - if index_key: - key = list(result[0].keys())[0] - result[0][key][index_key] = "index.md" - result[0][key].move_to_end(index_key, last=False) - return result - - -def build_blog_nav(lang, args): - blog_dir = os.path.join(args.blog_dir, lang) - years = sorted(os.listdir(blog_dir), reverse=True) - result_nav = [{"hidden": "index.md"}] - post_meta = collections.OrderedDict() - for year in years: - year_dir = os.path.join(blog_dir, year) - if not os.path.isdir(year_dir): - continue - result_nav.append({year: collections.OrderedDict()}) - posts = [] - post_meta_items = [] - for post in os.listdir(year_dir): - post_path = os.path.join(year_dir, post) - if not post.endswith(".md"): - raise RuntimeError( - f"Unexpected non-md file in posts folder: {post_path}" - ) - meta, _ = util.read_md_file(post_path) - post_date = meta["date"] - post_title = meta["title"] - if datetime.date.fromisoformat(post_date) > datetime.date.today(): - continue - posts.append( - ( - post_date, - post_title, - os.path.join(year, post), - ) - ) - if post_title in post_meta: - raise RuntimeError(f"Duplicate post title: {post_title}") - if not post_date.startswith(f"{year}-"): - raise RuntimeError( - f"Post date {post_date} doesn't match the folder year {year}: {post_title}" - ) - post_url_part = post.replace(".md", "") - post_meta_items.append( - ( - post_date, - { - "date": post_date, - "title": post_title, - "image": meta.get("image"), - "url": f"/blog/{lang}/{year}/{post_url_part}/", - }, - ) - ) - for _, title, path in sorted(posts, reverse=True): - result_nav[-1][year][title] = path - for _, post_meta_item in sorted( - post_meta_items, reverse=True, key=lambda item: item[0] - ): - post_meta[post_meta_item["title"]] = post_meta_item - return result_nav, post_meta - - -def _custom_get_navigation(files, config): - nav_config = config["nav"] or mkdocs.structure.nav.nest_paths( - f.src_path for f in files.documentation_pages() - ) - items = mkdocs.structure.nav._data_to_navigation(nav_config, files, config) - if not isinstance(items, list): - items = [items] - - pages = mkdocs.structure.nav._get_by_type(items, mkdocs.structure.nav.Page) - - mkdocs.structure.nav._add_previous_and_next_links(pages) - mkdocs.structure.nav._add_parent_links(items) - - missing_from_config = [ - file for file in files.documentation_pages() if file.page is None - ] - if missing_from_config: - files._files = [ - file for file in files._files if file not in missing_from_config - ] - - links = mkdocs.structure.nav._get_by_type(items, mkdocs.structure.nav.Link) - for link in links: - scheme, netloc, path, params, query, fragment = mkdocs.structure.nav.urlparse( - link.url - ) - if scheme or netloc: - mkdocs.structure.nav.log.debug( - "An external link to '{}' is included in " - "the 'nav' configuration.".format(link.url) - ) - elif link.url.startswith("/"): - mkdocs.structure.nav.log.debug( - "An absolute path to '{}' is included in the 'nav' configuration, " - "which presumably points to an external resource.".format(link.url) - ) - else: - msg = ( - "A relative path to '{}' is included in the 'nav' configuration, " - "which is not found in the documentation files".format(link.url) - ) - mkdocs.structure.nav.log.warning(msg) - return mkdocs.structure.nav.Navigation(items, pages) - - -mkdocs.structure.nav.get_navigation = _custom_get_navigation diff --git a/docs/tools/redirects.py b/docs/tools/redirects.py index 5d222376683..1b5490a040f 100644 --- a/docs/tools/redirects.py +++ b/docs/tools/redirects.py @@ -27,45 +27,6 @@ def write_redirect_html(out_path, to_url): ) -def build_redirect_html(args, base_prefix, lang, output_dir, from_path, to_path): - out_path = os.path.join( - output_dir, - lang, - from_path.replace("/index.md", "/index.html").replace(".md", "/index.html"), - ) - target_path = to_path.replace("/index.md", "/").replace(".md", "/") - - if target_path[0:7] != "http://" and target_path[0:8] != "https://": - to_url = f"/{base_prefix}/{lang}/{target_path}" - else: - to_url = target_path - - to_url = to_url.strip() - write_redirect_html(out_path, to_url) - - -def build_docs_redirects(args): - with open(os.path.join(args.docs_dir, "redirects.txt"), "r") as f: - for line in f: - for lang in args.lang.split(","): - from_path, to_path = line.split(" ", 1) - build_redirect_html( - args, "docs", lang, args.docs_output_dir, from_path, to_path - ) - - -def build_blog_redirects(args): - for lang in args.blog_lang.split(","): - redirects_path = os.path.join(args.blog_dir, lang, "redirects.txt") - if os.path.exists(redirects_path): - with open(redirects_path, "r") as f: - for line in f: - from_path, to_path = line.split(" ", 1) - build_redirect_html( - args, "blog", lang, args.blog_output_dir, from_path, to_path - ) - - def build_static_redirects(args): for static_redirect in [ ("benchmark.html", "/benchmark/dbms/"), diff --git a/docs/tools/requirements.txt b/docs/tools/requirements.txt index dd641c13629..b6f2d4549e5 100644 --- a/docs/tools/requirements.txt +++ b/docs/tools/requirements.txt @@ -1,39 +1,32 @@ Babel==2.9.1 -backports-abc==0.5 -backports.functools-lru-cache==1.6.1 -beautifulsoup4==4.9.1 -certifi==2020.4.5.2 -chardet==3.0.4 -click==7.1.2 -closure==20191111 -cssmin==0.2.0 -future==0.18.2 -htmlmin==0.1.12 -idna==2.10 Jinja2==3.0.3 -jinja2-highlight==0.6.1 -jsmin==3.0.0 -livereload==2.6.3 Markdown==3.3.2 -MarkupSafe==2.1.0 -mkdocs==1.3.0 -mkdocs-htmlproofer-plugin==0.0.3 -mkdocs-macros-plugin==0.4.20 -nltk==3.7 -nose==1.3.7 -protobuf==3.14.0 -numpy==1.21.2 -pymdown-extensions==8.0 -python-slugify==4.0.1 +MarkupSafe==2.1.1 +MarkupSafe==2.1.1 PyYAML==6.0 -repackage==0.7.3 -requests==2.25.1 -singledispatch==3.4.0.3 +Pygments>=2.12.0 +beautifulsoup4==4.9.1 +click==7.1.2 +ghp_import==2.1.0 +importlib_metadata==4.11.4 +jinja2-highlight==0.6.1 +livereload==2.6.3 +mergedeep==1.3.4 +mkdocs-macros-plugin==0.4.20 +mkdocs-macros-test==0.1.0 +mkdocs-material==8.2.15 +mkdocs==1.3.0 +mkdocs_material_extensions==1.0.3 +packaging==21.3 +pygments==2.12.0 +pymdown_extensions==9.4 +pyparsing==3.0.9 +python-slugify==4.0.1 +python_dateutil==2.8.2 +pytz==2022.1 six==1.15.0 -soupsieve==2.0.1 +soupsieve==2.3.2 termcolor==1.1.0 +text_unidecode==1.3 tornado==6.1 -Unidecode==1.1.1 -urllib3>=1.26.8 -Pygments>=2.11.2 - +zipp==3.8.0 diff --git a/docs/tools/util.py b/docs/tools/util.py index ec670725122..a5ebb1b11b2 100644 --- a/docs/tools/util.py +++ b/docs/tools/util.py @@ -124,7 +124,7 @@ def init_jinja2_env(args): env = jinja2.Environment( loader=jinja2.FileSystemLoader( - [args.website_dir, os.path.join(args.docs_dir, "_includes")] + [args.website_dir, os.path.join(args.src_dir, "docs", "_includes")] ), extensions=["jinja2.ext.i18n", "jinja2_highlight.HighlightExtension"], ) diff --git a/docs/tools/webpack.config.js b/docs/tools/webpack.config.js deleted file mode 100644 index e0dea964101..00000000000 --- a/docs/tools/webpack.config.js +++ /dev/null @@ -1,81 +0,0 @@ -const path = require('path'); -const jsPath = path.resolve(__dirname, '../../website/src/js'); -const scssPath = path.resolve(__dirname, '../../website/src/scss'); - -console.log(path.resolve(__dirname, 'node_modules/bootstrap', require('bootstrap/package.json').sass)); - -module.exports = { - - mode: ('development' === process.env.NODE_ENV) && 'development' || 'production', - - ...(('development' === process.env.NODE_ENV) && { - watch: true, - }), - - entry: [ - path.resolve(scssPath, 'bootstrap.scss'), - path.resolve(scssPath, 'main.scss'), - path.resolve(jsPath, 'main.js'), - ], - - output: { - path: path.resolve(__dirname, '../../website'), - filename: 'js/main.js', - }, - - resolve: { - alias: { - bootstrap: path.resolve(__dirname, 'node_modules/bootstrap', require('bootstrap/package.json').sass), - }, - }, - - module: { - rules: [{ - test: /\.js$/, - exclude: /(node_modules)/, - use: [{ - loader: 'babel-loader', - options: { - presets: ['@babel/preset-env'], - }, - }], - }, { - test: /\.scss$/, - use: [{ - loader: 'file-loader', - options: { - sourceMap: true, - outputPath: (url, entryPath, context) => { - if (0 === entryPath.indexOf(scssPath)) { - const outputFile = entryPath.slice(entryPath.lastIndexOf('/') + 1, -5) - const outputPath = entryPath.slice(0, entryPath.lastIndexOf('/')).slice(scssPath.length + 1) - return `./css/${outputPath}/${outputFile}.css` - } - return `./css/${url}` - }, - }, - }, { - loader: 'postcss-loader', - options: { - options: {}, - plugins: () => ([ - require('autoprefixer'), - ('production' === process.env.NODE_ENV) && require('cssnano'), - ].filter(plugin => plugin)), - } - }, { - loader: 'sass-loader', - options: { - implementation: require('sass'), - implementation: require('sass'), - sourceMap: ('development' === process.env.NODE_ENV), - sassOptions: { - importer: require('node-sass-glob-importer')(), - precision: 10, - }, - }, - }], - }], - }, - -}; diff --git a/docs/zh/sql-reference/functions/encoding-functions.md b/docs/zh/sql-reference/functions/encoding-functions.md index f1152965d2d..b9a3cbf0550 100644 --- a/docs/zh/sql-reference/functions/encoding-functions.md +++ b/docs/zh/sql-reference/functions/encoding-functions.md @@ -68,12 +68,306 @@ SELECT char(0xE4, 0xBD, 0xA0, 0xE5, 0xA5, 0xBD) AS hello; ## hex {#hex} -接受`String`,`unsigned integer`,`Date`或`DateTime`类型的参数。返回包含参数的十六进制表示的字符串。使用大写字母`A-F`。不使用`0x`前缀或`h`后缀。对于字符串,所有字节都简单地编码为两个十六进制数字。数字转换为大端(«易阅读»)格式。对于数字,去除其中较旧的零,但仅限整个字节。例如,`hex(1)='01'`。 `Date`被编码为自Unix时间开始以来的天数。 `DateTime`编码为自Unix时间开始以来的秒数。 +返回包含参数的十六进制表示的字符串。 -## unhex(str) {#unhexstr} +别名为: `HEX`。 -接受包含任意数量的十六进制数字的字符串,并返回包含相应字节的字符串。支持大写和小写字母A-F。十六进制数字的数量不必是偶数。如果是奇数,则最后一位数被解释为00-0F字节的低位。如果参数字符串包含除十六进制数字以外的任何内容,则返回一些实现定义的结果(不抛出异常)。 -如果要将结果转换为数字,可以使用«reverse»和«reinterpretAsType»函数。 +**语法** + +``` sql +hex(arg) +``` + +该函数使用大写字母`A-F`,不使用任何前缀(如`0x`)或后缀(如`h`) + +对于整数参数,它从高到低(大端或“人类可读”顺序)打印十六进制数字(“半字节”)。它从左侧第一个非零字节开始(省略前导零字节),但即使前导数字为零,也始终打印每个字节的两个数字。 + +类型为[Date](../../sql-reference/data-types/date.md)和[DateTime](../../sql-reference/data-types/datetime.md)的值将被格式化为相应的整数(日期为 Epoch 以来的天数,DateTime 为 Unix Timestamp 的值)。 + +对于[String](../../sql-reference/data-types/string.md)和[FixedString](../../sql-reference/data-types/fixedstring.md),所有字节都被简单地编码为两个十六进制数字。零字节不会被省略。 + +类型为[Float](../../sql-reference/data-types/float.md)和[Decimal](../../sql-reference/data-types/decimal.md)的值被编码为它们在内存中的表示。由于我们支持小端架构,它们以小端编码。零前导尾随字节不会被省略。 + +类型为[UUID](../data-types/uuid.md)的值被编码为大端顺序字符串。 + +**参数** + +- `arg` — 要转换为十六进制的值。类型为[String](../../sql-reference/data-types/string.md),[UInt](../../sql-reference/data-types/int-uint.md),[Float](../../sql-reference/data-types/float.md),[Decimal](../../sql-reference/data-types/decimal.md),[Date](../../sql-reference/data-types/date.md)或者[DateTime](../../sql-reference/data-types/datetime.md)。 + +**返回值** + +- 具有参数的十六进制表示的字符串。 + +类型为:[String](../../sql-reference/data-types/string.md)。 + +**示例** + +查询语句: + +``` sql +SELECT hex(1); +``` + +结果: + +``` text +01 +``` + +查询语句: + +``` sql +SELECT hex(toFloat32(number)) AS hex_presentation FROM numbers(15, 2); +``` + +结果: + +``` text +┌─hex_presentation─┐ +│ 00007041 │ +│ 00008041 │ +└──────────────────┘ +``` + +查询语句: + +``` sql +SELECT hex(toFloat64(number)) AS hex_presentation FROM numbers(15, 2); +``` + +结果: + +``` text +┌─hex_presentation─┐ +│ 0000000000002E40 │ +│ 0000000000003040 │ +└──────────────────┘ +``` + +查询语句: + +``` sql +SELECT lower(hex(toUUID('61f0c404-5cb3-11e7-907b-a6006ad3dba0'))) as uuid_hex +``` + +结果: + +``` text +┌─uuid_hex─────────────────────────┐ +│ 61f0c4045cb311e7907ba6006ad3dba0 │ +└──────────────────────────────────┘ +``` + +## unhex {#unhexstr} + +执行[hex](#hex)函数的相反操作。它将每对十六进制数字(在参数中)解释为一个数字,并将其转换为该数字表示的字节。返回值是一个二进制字符串 (BLOB)。 + +如果要将结果转换为数字,可以使用 [reverse](../../sql-reference/functions/string-functions.md#reverse) 和 [reinterpretAs<Type>](../../sql-reference/functions/type-conversion-functions.md#type-conversion-functions) 函数。 + +:::注意 +如果从 `clickhouse-client` 中调用 `unhex`,二进制字符串将使用 UTF-8 显示。 +::: + +别名为:`UNHEX`。 + +**语法** + +``` sql +unhex(arg) +``` + +**参数** + +- `arg` — 包含任意数量的十六进制数字的字符串。类型为:[String](../../sql-reference/data-types/string.md)。 + +支持大写和小写字母A-F。十六进制数字的数量不必是偶数。如果是奇数,则最后一位数被解释为00-0F字节的低位。如果参数字符串包含除十六进制数字以外的任何内容,则返回一些实现定义的结果(不抛出异常)。对于数字参数, unhex()不执行 hex(N) 的倒数。 + +**返回值** + +- 二进制字符串 (BLOB)。 + +类型为: [String](../../sql-reference/data-types/string.md)。 + +**示例** + +查询语句: +``` sql +SELECT unhex('303132'), UNHEX('4D7953514C'); +``` + +结果: +``` text +┌─unhex('303132')─┬─unhex('4D7953514C')─┐ +│ 012 │ MySQL │ +└─────────────────┴─────────────────────┘ +``` + +查询语句: + +``` sql +SELECT reinterpretAsUInt64(reverse(unhex('FFF'))) AS num; +``` + +结果: + +``` text +┌──num─┐ +│ 4095 │ +└──────┘ +``` + +## bin {#bin} + +返回一个包含参数二进制表示的字符串。 + +**语法** + +``` sql +bin(arg) +``` + +别名为: `BIN`。 + +对于整数参数,它从最高有效到最低有效(大端或“人类可读”顺序)打印 bin 数字。它从最重要的非零字节开始(省略前导零字节),但如果前导数字为零,则始终打印每个字节的八位数字。 + +类型为[Date](../../sql-reference/data-types/date.md)和[DateTime](../../sql-reference/data-types/datetime.md)的值被格式化为相应的整数(`Date` 为 Epoch 以来的天数,`DateTime` 为 Unix Timestamp 的值)。 + +对于[String](../../sql-reference/data-types/string.md)和[FixedString](../../sql-reference/data-types/fixedstring.md),所有字节都被简单地编码为八个二进制数。零字节不会被省略。 + +类型为[Float](../../sql-reference/data-types/float.md)和[Decimal](../../sql-reference/data-types/decimal.md)的值被编码为它们在内存中的表示。由于我们支持小端架构,它们以小端编码。零前导尾随字节不会被省略。 + +类型为[UUID](../data-types/uuid.md)的值被编码为大端顺序字符串。 + +**参数** + +- `arg` — 要转换为二进制的值。类型为[String](../../sql-reference/data-types/string.md),[FixedString](../../sql-reference/data-types/fixedstring.md),[UInt](../../sql-reference/data-types/int-uint.md),[Float](../../sql-reference/data-types/float.md),[Decimal](../../sql-reference/data-types/decimal.md),[Date](../../sql-reference/data-types/date.md)或者[DateTime](../../sql-reference/data-types/datetime.md)。 + +**返回值** + +- 具有参数的二进制表示的字符串。 + +类型为: [String](../../sql-reference/data-types/string.md)。 + +**示例** + +查询语句: + +``` sql +SELECT bin(14); +``` + +结果: + +``` text +┌─bin(14)──┐ +│ 00001110 │ +└──────────┘ +``` + +查询语句: + +``` sql +SELECT bin(toFloat32(number)) AS bin_presentation FROM numbers(15, 2); +``` + +结果: + +``` text +┌─bin_presentation─────────────────┐ +│ 00000000000000000111000001000001 │ +│ 00000000000000001000000001000001 │ +└──────────────────────────────────┘ +``` + +查询语句: + +``` sql +SELECT bin(toFloat64(number)) AS bin_presentation FROM numbers(15, 2); +``` + +结果: + +``` text +┌─bin_presentation─────────────────────────────────────────────────┐ +│ 0000000000000000000000000000000000000000000000000010111001000000 │ +│ 0000000000000000000000000000000000000000000000000011000001000000 │ +└──────────────────────────────────────────────────────────────────┘ +``` + +查询语句: + +``` sql +SELECT bin(toUUID('61f0c404-5cb3-11e7-907b-a6006ad3dba0')) as bin_uuid +``` + +结果: + +``` text +┌─bin_uuid─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ +│ 01100001111100001100010000000100010111001011001100010001111001111001000001111011101001100000000001101010110100111101101110100000 │ +└──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ +``` + + +## unbin {#unbinstr} + +将每对二进制数字(在参数中)解释为一个数字,并将其转换为该数字表示的字节。这些函数执行与 [bin](#bin) 相反的操作。 + +**语法** + +``` sql +unbin(arg) +``` + +别名为: `UNBIN`。 + +对于数字参数,`unbin()` 不会返回 `bin()` 的倒数。如果要将结果转换为数字,可以使用[reverse](../../sql-reference/functions/string-functions.md#reverse) 和 [reinterpretAs<Type>](../../sql-reference/functions/type-conversion-functions.md#reinterpretasuint8163264) 函数。 + +:::note +如果从 `clickhouse-client` 中调用 `unbin`,则使用 UTF-8 显示二进制字符串。 +::: + +支持二进制数字`0`和`1`。二进制位数不必是八的倍数。如果参数字符串包含二进制数字以外的任何内容,则返回一些实现定义的结果(不抛出异常)。 + +**参数** + +- `arg` — 包含任意数量的二进制数字的字符串。类型为[String](../../sql-reference/data-types/string.md)。 + +**返回值** + +- 二进制字符串 (BLOB)。 + +类型为:[String](../../sql-reference/data-types/string.md)。 + +**示例** + +查询语句: + +``` sql +SELECT UNBIN('001100000011000100110010'), UNBIN('0100110101111001010100110101000101001100'); +``` + +结果: + +``` text +┌─unbin('001100000011000100110010')─┬─unbin('0100110101111001010100110101000101001100')─┐ +│ 012 │ MySQL │ +└───────────────────────────────────┴───────────────────────────────────────────────────┘ +``` + +查询语句: + +``` sql +SELECT reinterpretAsUInt64(reverse(unbin('1110'))) AS num; +``` + +结果: + +``` text +┌─num─┐ +│ 14 │ +└─────┘ +``` ## UUIDStringToNum(str) {#uuidstringtonumstr} @@ -91,4 +385,55 @@ SELECT char(0xE4, 0xBD, 0xA0, 0xE5, 0xA5, 0xBD) AS hello; 接受一个整数。返回一个UInt64类型数组,其中包含一组2的幂列表,其列表中的所有值相加等于这个整数。数组中的数字按升序排列。 +## bitPositionsToArray(num) {#bitpositionstoarraynum} + +接受整数并将其转换为无符号整数。返回一个 `UInt64` 数字数组,其中包含 `arg` 中等于 `1` 的位的位置列表,按升序排列。 + +**语法** + +```sql +bitPositionsToArray(arg) +``` + +**参数** + +- `arg` — 整数值。类型为[Int/UInt](../../sql-reference/data-types/int-uint.md)。 + +**返回值** + +- 包含等于 `1` 的位位置列表的数组,按升序排列。 + +类型为: [Array](../../sql-reference/data-types/array.md)([UInt64](../../sql-reference/data-types/int-uint.md))。 + +**示例** + +查询语句: + +``` sql +SELECT bitPositionsToArray(toInt8(1)) AS bit_positions; +``` + +结果: + +``` text +┌─bit_positions─┐ +│ [0] │ +└───────────────┘ +``` + +查询语句: + +``` sql +SELECT bitPositionsToArray(toInt8(-1)) AS bit_positions; +``` + +结果: + +``` text +┌─bit_positions─────┐ +│ [0,1,2,3,4,5,6,7] │ +└───────────────────┘ +``` + + [来源文章](https://clickhouse.com/docs/en/query_language/functions/encoding_functions/) diff --git a/programs/bash-completion/completions/clickhouse-bootstrap b/programs/bash-completion/completions/clickhouse-bootstrap index 98fcd68db16..8684f122503 100644 --- a/programs/bash-completion/completions/clickhouse-bootstrap +++ b/programs/bash-completion/completions/clickhouse-bootstrap @@ -34,6 +34,12 @@ CLICKHOUSE_QueryProcessingStage=( with_mergeable_state_after_aggregation_and_limit ) +CLICKHOUSE_QueryKind=( + initial_query + secondary_query + no_query +) + CLICKHOUSE_Format=( CapnProto PostgreSQLWire @@ -124,6 +130,10 @@ function _complete_for_clickhouse_generic_bin_impl() COMPREPLY=( $(compgen -W "${CLICKHOUSE_QueryProcessingStage[*]}" -- "$cur") ) return 1 ;; + --query_kind) + COMPREPLY=( $(compgen -W "${CLICKHOUSE_QueryKind[*]}" -- "$cur") ) + return 1 + ;; --send_logs_level) COMPREPLY=( $(compgen -W "${CLICKHOUSE_logs_level[*]}" -- "$cur") ) return 1 diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp index 4e4e0cc07f5..cbbf195a68c 100644 --- a/programs/client/Client.cpp +++ b/programs/client/Client.cpp @@ -1038,6 +1038,7 @@ void Client::processConfig() ClientInfo & client_info = global_context->getClientInfo(); client_info.setInitialQuery(); client_info.quota_key = config().getString("quota_key", ""); + client_info.query_kind = query_kind; } diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index f3fa7ff2bfa..4f3b92bbcf0 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -544,8 +544,7 @@ void LocalServer::processConfig() if (uncompressed_cache_size) global_context->setUncompressedCache(uncompressed_cache_size); - /// Size of cache for marks (index of MergeTree family of tables). It is necessary. - /// Specify default value for mark_cache_size explicitly! + /// Size of cache for marks (index of MergeTree family of tables). size_t mark_cache_size = config().getUInt64("mark_cache_size", 5368709120); if (mark_cache_size) global_context->setMarkCache(mark_cache_size); @@ -555,8 +554,7 @@ void LocalServer::processConfig() if (index_uncompressed_cache_size) global_context->setIndexUncompressedCache(index_uncompressed_cache_size); - /// Size of cache for index marks (index of MergeTree skip indices). It is necessary. - /// Specify default value for index_mark_cache_size explicitly! + /// Size of cache for index marks (index of MergeTree skip indices). size_t index_mark_cache_size = config().getUInt64("index_mark_cache_size", 0); if (index_mark_cache_size) global_context->setIndexMarkCache(index_mark_cache_size); @@ -626,6 +624,7 @@ void LocalServer::processConfig() ClientInfo & client_info = global_context->getClientInfo(); client_info.setInitialQuery(); + client_info.query_kind = query_kind; } diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 752ff51ba4f..defc66b0ed9 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -1351,8 +1351,8 @@ int Server::main(const std::vector & /*args*/) settings.async_insert_max_data_size, AsynchronousInsertQueue::Timeout{.busy = settings.async_insert_busy_timeout_ms, .stale = settings.async_insert_stale_timeout_ms})); - /// Size of cache for marks (index of MergeTree family of tables). It is mandatory. - size_t mark_cache_size = config().getUInt64("mark_cache_size"); + /// Size of cache for marks (index of MergeTree family of tables). + size_t mark_cache_size = config().getUInt64("mark_cache_size", 5368709120); if (!mark_cache_size) LOG_ERROR(log, "Too low mark cache size will lead to severe performance degradation."); if (mark_cache_size > max_cache_size) @@ -1368,8 +1368,7 @@ int Server::main(const std::vector & /*args*/) if (index_uncompressed_cache_size) global_context->setIndexUncompressedCache(index_uncompressed_cache_size); - /// Size of cache for index marks (index of MergeTree skip indices). It is necessary. - /// Specify default value for index_mark_cache_size explicitly! + /// Size of cache for index marks (index of MergeTree skip indices). size_t index_mark_cache_size = config().getUInt64("index_mark_cache_size", 0); if (index_mark_cache_size) global_context->setIndexMarkCache(index_mark_cache_size); diff --git a/programs/server/config.xml b/programs/server/config.xml index bd54051be19..203684a9e00 100644 --- a/programs/server/config.xml +++ b/programs/server/config.xml @@ -365,6 +365,59 @@ /var/lib/clickhouse/ + + + + /var/lib/clickhouse/tmp/ @@ -551,6 +604,9 @@ if this setting is true the user B will see all rows, and if this setting is false the user B will see no rows. By default this setting is false for compatibility with earlier access configurations. --> false + + false diff --git a/programs/server/embedded.xml b/programs/server/embedded.xml index ba0df99dfe0..2b6c4d9f770 100644 --- a/programs/server/embedded.xml +++ b/programs/server/embedded.xml @@ -13,7 +13,6 @@ ./ 8589934592 - 5368709120 true diff --git a/programs/static-files-disk-uploader/static-files-disk-uploader.cpp b/programs/static-files-disk-uploader/static-files-disk-uploader.cpp index 20307c0ccd3..a10c25c3342 100644 --- a/programs/static-files-disk-uploader/static-files-disk-uploader.cpp +++ b/programs/static-files-disk-uploader/static-files-disk-uploader.cpp @@ -71,7 +71,7 @@ void processFile(const fs::path & file_path, const fs::path & dst_path, bool tes dst_buf->next(); dst_buf->finalize(); } -}; +} void processTableFiles(const fs::path & data_path, fs::path dst_path, bool test_mode, bool link) diff --git a/src/Access/AccessControl.cpp b/src/Access/AccessControl.cpp index 263b8d0604f..d74695e645e 100644 --- a/src/Access/AccessControl.cpp +++ b/src/Access/AccessControl.cpp @@ -163,6 +163,10 @@ void AccessControl::setUpFromMainConfig(const Poco::Util::AbstractConfiguration "access_control_improvements.users_without_row_policies_can_read_rows", false /* false because we need to be compatible with earlier access configurations */)); + setOnClusterQueriesRequireClusterGrant(config_.getBool( + "access_control_improvements.on_cluster_queries_require_cluster_grant", + false /* false because we need to be compatible with earlier access configurations */)); + addStoragesFromMainConfig(config_, config_path_, get_zookeeper_function_); } diff --git a/src/Access/AccessControl.h b/src/Access/AccessControl.h index 954fec8b97f..4ee29aa20c7 100644 --- a/src/Access/AccessControl.h +++ b/src/Access/AccessControl.h @@ -131,6 +131,10 @@ public: void setEnabledUsersWithoutRowPoliciesCanReadRows(bool enable) { users_without_row_policies_can_read_rows = enable; } bool isEnabledUsersWithoutRowPoliciesCanReadRows() const { return users_without_row_policies_can_read_rows; } + /// Require CLUSTER grant for ON CLUSTER queries. + void setOnClusterQueriesRequireClusterGrant(bool enable) { on_cluster_queries_require_cluster_grant = enable; } + bool doesOnClusterQueriesRequireClusterGrant() const { return on_cluster_queries_require_cluster_grant; } + UUID authenticate(const Credentials & credentials, const Poco::Net::IPAddress & address) const; void setExternalAuthenticatorsConfig(const Poco::Util::AbstractConfiguration & config); @@ -188,6 +192,7 @@ private: std::atomic_bool allow_plaintext_password = true; std::atomic_bool allow_no_password = true; std::atomic_bool users_without_row_policies_can_read_rows = false; + std::atomic_bool on_cluster_queries_require_cluster_grant = false; }; } diff --git a/src/Access/Common/AccessType.h b/src/Access/Common/AccessType.h index 82dc04db684..8c10fd7e150 100644 --- a/src/Access/Common/AccessType.h +++ b/src/Access/Common/AccessType.h @@ -188,6 +188,8 @@ enum class AccessType M(HIVE, "", GLOBAL, SOURCES) \ M(SOURCES, "", GROUP, ALL) \ \ + M(CLUSTER, "", GLOBAL, ALL) /* ON CLUSTER queries */ \ + \ M(ALL, "ALL PRIVILEGES", GROUP, NONE) /* full access */ \ M(NONE, "USAGE, NO PRIVILEGES", GROUP, NONE) /* no access */ diff --git a/src/Access/ContextAccess.cpp b/src/Access/ContextAccess.cpp index aa4ed6cb41f..28926310c20 100644 --- a/src/Access/ContextAccess.cpp +++ b/src/Access/ContextAccess.cpp @@ -359,7 +359,7 @@ std::shared_ptr ContextAccess::getAccessRightsWithImplicit() template -bool ContextAccess::checkAccessImplHelper(const AccessFlags & flags, const Args &... args) const +bool ContextAccess::checkAccessImplHelper(AccessFlags flags, const Args &... args) const { auto access_granted = [&] { @@ -379,6 +379,9 @@ bool ContextAccess::checkAccessImplHelper(const AccessFlags & flags, const Args return false; }; + if (flags & AccessType::CLUSTER && !access_control->doesOnClusterQueriesRequireClusterGrant()) + flags &= ~AccessType::CLUSTER; + if (!flags || is_full_access) return access_granted(); diff --git a/src/Access/ContextAccess.h b/src/Access/ContextAccess.h index 44073320a4c..5742b6a3222 100644 --- a/src/Access/ContextAccess.h +++ b/src/Access/ContextAccess.h @@ -179,7 +179,7 @@ private: bool checkAccessImpl(const AccessRightsElements & elements) const; template - bool checkAccessImplHelper(const AccessFlags & flags, const Args &... args) const; + bool checkAccessImplHelper(AccessFlags flags, const Args &... args) const; template bool checkAccessImplHelper(const AccessRightsElement & element) const; diff --git a/src/AggregateFunctions/AggregateFunctionMannWhitney.h b/src/AggregateFunctions/AggregateFunctionMannWhitney.h index 887769dfbf5..089f70cd26b 100644 --- a/src/AggregateFunctions/AggregateFunctionMannWhitney.h +++ b/src/AggregateFunctions/AggregateFunctionMannWhitney.h @@ -245,4 +245,4 @@ public: }; -}; +} diff --git a/src/AggregateFunctions/AggregateFunctionMeanZTest.h b/src/AggregateFunctions/AggregateFunctionMeanZTest.h index e4be2503d87..7fecff591e6 100644 --- a/src/AggregateFunctions/AggregateFunctionMeanZTest.h +++ b/src/AggregateFunctions/AggregateFunctionMeanZTest.h @@ -136,4 +136,4 @@ public: } }; -}; +} diff --git a/src/AggregateFunctions/AggregateFunctionRankCorrelation.h b/src/AggregateFunctions/AggregateFunctionRankCorrelation.h index 733416d4721..a9bf8254f35 100644 --- a/src/AggregateFunctions/AggregateFunctionRankCorrelation.h +++ b/src/AggregateFunctions/AggregateFunctionRankCorrelation.h @@ -102,4 +102,4 @@ public: }; -}; +} diff --git a/src/AggregateFunctions/AggregateFunctionTTest.h b/src/AggregateFunctions/AggregateFunctionTTest.h index 4c939121a72..7ef5cfce9c9 100644 --- a/src/AggregateFunctions/AggregateFunctionTTest.h +++ b/src/AggregateFunctions/AggregateFunctionTTest.h @@ -234,4 +234,4 @@ public: } }; -}; +} diff --git a/src/Backups/BackupUtils.cpp b/src/Backups/BackupUtils.cpp index 33312e8b44b..270dad2d594 100644 --- a/src/Backups/BackupUtils.cpp +++ b/src/Backups/BackupUtils.cpp @@ -366,7 +366,7 @@ namespace { if (info.zk_path.empty()) { - for (auto & [relative_path, backup_entry] : info.data) + for (const auto & [relative_path, backup_entry] : info.data) res.emplace_back(info.data_path + relative_path, backup_entry); return; } @@ -374,7 +374,7 @@ namespace Strings data_paths = backup_coordination->getReplicatedTableDataPaths(info.zk_path); Strings part_names = backup_coordination->getReplicatedTablePartNames(backup_settings.host_id, info.table_name, info.zk_path); std::unordered_set part_names_set{part_names.begin(), part_names.end()}; - for (auto & [relative_path, backup_entry] : info.data) + for (const auto & [relative_path, backup_entry] : info.data) { size_t slash_pos = relative_path.find('/'); if (slash_pos != String::npos) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index dad1c9e6bc8..60596aea609 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -37,18 +37,6 @@ if (USE_DEBUG_HELPERS) add_compile_options($<$:${INCLUDE_DEBUG_HELPERS}>) endif () -if (COMPILER_GCC) - # If we leave this optimization enabled, gcc-7 replaces a pair of SSE intrinsics (16 byte load, store) with a call to memcpy. - # It leads to slow code. This is compiler bug. It looks like this: - # - # (gdb) bt - #0 memcpy (destination=0x7faa6e9f1638, source=0x7faa81d9e9a8, size=16) at ../libs/libmemcpy/memcpy.h:11 - #1 0x0000000005341c5f in _mm_storeu_si128 (__B=..., __P=) at /usr/lib/gcc/x86_64-linux-gnu/7/include/emmintrin.h:720 - #2 memcpySmallAllowReadWriteOverflow15Impl (n=, src=, dst=) at ../src/Common/memcpySmall.h:37 - - add_definitions ("-fno-tree-loop-distribute-patterns") -endif () - # ClickHouse developers may use platform-dependent code under some macro (e.g. `#ifdef ENABLE_MULTITARGET`). # If turned ON, this option defines such macro. # See `src/Common/TargetSpecific.h` @@ -570,7 +558,7 @@ include ("${ClickHouse_SOURCE_DIR}/cmake/add_check.cmake") if (ENABLE_TESTS) macro (grep_gtest_sources BASE_DIR DST_VAR) # Cold match files that are not in tests/ directories - file(GLOB_RECURSE "${DST_VAR}" RELATIVE "${BASE_DIR}" "gtest*.cpp") + file(GLOB_RECURSE "${DST_VAR}" CONFIGURE_DEPENDS RELATIVE "${BASE_DIR}" "gtest*.cpp") endmacro() # attach all dbms gtest sources diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index 35ef55a1387..9cc31df0b43 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -119,6 +119,17 @@ namespace ProfileEvents namespace DB { +static ClientInfo::QueryKind parseQueryKind(const String & query_kind) +{ + if (query_kind == "initial_query") + return ClientInfo::QueryKind::INITIAL_QUERY; + if (query_kind == "secondary_query") + return ClientInfo::QueryKind::SECONDARY_QUERY; + if (query_kind == "no_query") + return ClientInfo::QueryKind::NO_QUERY; + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown query kind {}", query_kind); +} + static void incrementProfileEventsBlock(Block & dst, const Block & src) { if (!dst) @@ -2125,6 +2136,7 @@ void ClientBase::init(int argc, char ** argv) ("query,q", po::value(), "query") ("stage", po::value()->default_value("complete"), "Request query processing up to specified stage: complete,fetch_columns,with_mergeable_state,with_mergeable_state_after_aggregation,with_mergeable_state_after_aggregation_and_limit") + ("query_kind", po::value()->default_value("initial_query"), "One of initial_query/secondary_query/no_query") ("query_id", po::value(), "query_id") ("progress", "print progress of queries execution") @@ -2255,6 +2267,7 @@ void ClientBase::init(int argc, char ** argv) server_logs_file = options["server_logs_file"].as(); query_processing_stage = QueryProcessingStage::fromString(options["stage"].as()); + query_kind = parseQueryKind(options["query_kind"].as()); profile_events.print = options.count("print-profile-events"); profile_events.delay_ms = options["profile-events-delay-ms"].as(); diff --git a/src/Client/ClientBase.h b/src/Client/ClientBase.h index d373ce5f60b..d11977e984a 100644 --- a/src/Client/ClientBase.h +++ b/src/Client/ClientBase.h @@ -256,6 +256,7 @@ protected: } profile_events; QueryProcessingStage::Enum query_processing_stage; + ClientInfo::QueryKind query_kind; bool fake_drop = false; diff --git a/src/Client/Connection.cpp b/src/Client/Connection.cpp index e53d55f6964..50413b45c6b 100644 --- a/src/Client/Connection.cpp +++ b/src/Client/Connection.cpp @@ -93,37 +93,58 @@ void Connection::connect(const ConnectionTimeouts & timeouts) { try { - if (connected) - disconnect(); - LOG_TRACE(log_wrapper.get(), "Connecting. Database: {}. User: {}{}{}", default_database.empty() ? "(not specified)" : default_database, user, static_cast(secure) ? ". Secure" : "", static_cast(compression) ? "" : ". Uncompressed"); - if (static_cast(secure)) - { -#if USE_SSL - socket = std::make_unique(); - - /// we resolve the ip when we open SecureStreamSocket, so to make Server Name Indication (SNI) - /// work we need to pass host name separately. It will be send into TLS Hello packet to let - /// the server know which host we want to talk with (single IP can process requests for multiple hosts using SNI). - static_cast(socket.get())->setPeerHostName(host); -#else - throw Exception{"tcp_secure protocol is disabled because poco library was built without NetSSL support.", ErrorCodes::SUPPORT_IS_DISABLED}; -#endif - } - else - { - socket = std::make_unique(); - } - - current_resolved_address = DNSResolver::instance().resolveAddress(host, port); - + auto addresses = DNSResolver::instance().resolveAddressList(host, port); const auto & connection_timeout = static_cast(secure) ? timeouts.secure_connection_timeout : timeouts.connection_timeout; - socket->connect(*current_resolved_address, connection_timeout); + + for (auto it = addresses.begin(); it != addresses.end();) + { + if (connected) + disconnect(); + + if (static_cast(secure)) + { +#if USE_SSL + socket = std::make_unique(); + + /// we resolve the ip when we open SecureStreamSocket, so to make Server Name Indication (SNI) + /// work we need to pass host name separately. It will be send into TLS Hello packet to let + /// the server know which host we want to talk with (single IP can process requests for multiple hosts using SNI). + static_cast(socket.get())->setPeerHostName(host); +#else + throw Exception{"tcp_secure protocol is disabled because poco library was built without NetSSL support.", ErrorCodes::SUPPORT_IS_DISABLED}; +#endif + } + else + { + socket = std::make_unique(); + } + + try + { + socket->connect(*it, connection_timeout); + current_resolved_address = *it; + break; + } + catch (Poco::Net::NetException &) + { + if (++it == addresses.end()) + throw; + continue; + } + catch (Poco::TimeoutException &) + { + if (++it == addresses.end()) + throw; + continue; + } + } + socket->setReceiveTimeout(timeouts.receive_timeout); socket->setSendTimeout(timeouts.send_timeout); socket->setNoDelay(true); diff --git a/src/Client/HedgedConnections.cpp b/src/Client/HedgedConnections.cpp index 954396af0fa..9f0ead79981 100644 --- a/src/Client/HedgedConnections.cpp +++ b/src/Client/HedgedConnections.cpp @@ -100,7 +100,11 @@ void HedgedConnections::sendExternalTablesData(std::vector & if (data.size() != size()) throw Exception("Mismatch between replicas and data sources", ErrorCodes::MISMATCH_REPLICAS_DATA_SOURCES); - auto send_external_tables_data = [&data](ReplicaState & replica) { replica.connection->sendExternalTablesData(data[0]); }; + auto send_external_tables_data = [&](ReplicaState & replica) + { + size_t offset = fd_to_replica_location[replica.packet_receiver->getFileDescriptor()].offset; + replica.connection->sendExternalTablesData(data[offset]); + }; for (auto & offset_state : offset_states) for (auto & replica : offset_state.replicas) @@ -341,7 +345,7 @@ HedgedConnections::ReplicaLocation HedgedConnections::getReadyReplicaLocation(As else throw Exception("Unknown event from epoll", ErrorCodes::LOGICAL_ERROR); } -}; +} bool HedgedConnections::resumePacketReceiver(const HedgedConnections::ReplicaLocation & location) { diff --git a/src/Client/LocalConnection.cpp b/src/Client/LocalConnection.cpp index 77519423763..0707b0bcdc0 100644 --- a/src/Client/LocalConnection.cpp +++ b/src/Client/LocalConnection.cpp @@ -73,11 +73,15 @@ void LocalConnection::sendQuery( const String & query_id, UInt64 stage, const Settings *, - const ClientInfo *, + const ClientInfo * client_info, bool, std::function process_progress_callback) { - query_context = session.makeQueryContext(); + /// Suggestion comes without client_info. + if (client_info) + query_context = session.makeQueryContext(*client_info); + else + query_context = session.makeQueryContext(); query_context->setCurrentQueryId(query_id); if (send_progress) { diff --git a/src/Common/COW.h b/src/Common/COW.h index f958fe71824..f772acd84e0 100644 --- a/src/Common/COW.h +++ b/src/Common/COW.h @@ -219,7 +219,7 @@ protected: /// Get internal immutable ptr. Does not change internal use counter. immutable_ptr detach() && { return std::move(value); } - operator bool() const { return value != nullptr; } /// NOLINT + explicit operator bool() const { return value != nullptr; } bool operator! () const { return value == nullptr; } bool operator== (const chameleon_ptr & rhs) const { return value == rhs.value; } diff --git a/src/Common/Config/AbstractConfigurationComparison.cpp b/src/Common/Config/AbstractConfigurationComparison.cpp index ea0b3be4b98..711c754743d 100644 --- a/src/Common/Config/AbstractConfigurationComparison.cpp +++ b/src/Common/Config/AbstractConfigurationComparison.cpp @@ -18,7 +18,7 @@ namespace result += '.'; result += subkey; return result; - }; + } } diff --git a/src/Common/Config/configReadClient.cpp b/src/Common/Config/configReadClient.cpp index e7bc0b72814..e5308bc3bc7 100644 --- a/src/Common/Config/configReadClient.cpp +++ b/src/Common/Config/configReadClient.cpp @@ -14,7 +14,7 @@ bool safeFsExists(const String & path) { std::error_code ec; return fs::exists(path, ec); -}; +} bool configReadClient(Poco::Util::LayeredConfiguration & config, const std::string & home_path) { diff --git a/src/Common/DNSResolver.cpp b/src/Common/DNSResolver.cpp index d757ec2ae2a..0616e324b73 100644 --- a/src/Common/DNSResolver.cpp +++ b/src/Common/DNSResolver.cpp @@ -83,25 +83,8 @@ static void splitHostAndPort(const std::string & host_and_port, std::string & ou throw Exception("Port must be numeric", ErrorCodes::BAD_ARGUMENTS); } -static DNSResolver::IPAddresses resolveIPAddressImpl(const std::string & host) +static DNSResolver::IPAddresses hostByName(const std::string & host) { - Poco::Net::IPAddress ip; - - /// NOTE: - /// - Poco::Net::DNS::resolveOne(host) doesn't work for IP addresses like 127.0.0.2 - /// - Poco::Net::IPAddress::tryParse() expect hex string for IPv6 (without brackets) - if (host.starts_with('[')) - { - assert(host.ends_with(']')); - if (Poco::Net::IPAddress::tryParse(host.substr(1, host.size() - 2), ip)) - return DNSResolver::IPAddresses(1, ip); - } - else - { - if (Poco::Net::IPAddress::tryParse(host, ip)) - return DNSResolver::IPAddresses(1, ip); - } - /// Family: AF_UNSPEC /// AI_ALL is required for checking if client is allowed to connect from an address auto flags = Poco::Net::DNS::DNS_HINT_AI_V4MAPPED | Poco::Net::DNS::DNS_HINT_AI_ALL; @@ -131,6 +114,30 @@ static DNSResolver::IPAddresses resolveIPAddressImpl(const std::string & host) return addresses; } +static DNSResolver::IPAddresses resolveIPAddressImpl(const std::string & host) +{ + Poco::Net::IPAddress ip; + + /// NOTE: + /// - Poco::Net::DNS::resolveOne(host) doesn't work for IP addresses like 127.0.0.2 + /// - Poco::Net::IPAddress::tryParse() expect hex string for IPv6 (without brackets) + if (host.starts_with('[')) + { + assert(host.ends_with(']')); + if (Poco::Net::IPAddress::tryParse(host.substr(1, host.size() - 2), ip)) + return DNSResolver::IPAddresses(1, ip); + } + else + { + if (Poco::Net::IPAddress::tryParse(host, ip)) + return DNSResolver::IPAddresses(1, ip); + } + + DNSResolver::IPAddresses addresses = hostByName(host); + + return addresses; +} + static String reverseResolveImpl(const Poco::Net::IPAddress & address) { Poco::Net::SocketAddress sock_addr(address, 0); @@ -208,6 +215,26 @@ Poco::Net::SocketAddress DNSResolver::resolveAddress(const std::string & host, U return Poco::Net::SocketAddress(impl->cache_host(host).front(), port); } +std::vector DNSResolver::resolveAddressList(const std::string & host, UInt16 port) +{ + if (Poco::Net::IPAddress ip; Poco::Net::IPAddress::tryParse(host, ip)) + return std::vector{{ip, port}}; + + std::vector addresses; + + if (!impl->disable_cache) + addToNewHosts(host); + + std::vector ips = impl->disable_cache ? hostByName(host) : impl->cache_host(host); + auto ips_end = std::unique(ips.begin(), ips.end()); + + addresses.reserve(ips_end - ips.begin()); + for (auto ip = ips.begin(); ip != ips_end; ++ip) + addresses.emplace_back(*ip, port); + + return addresses; +} + String DNSResolver::reverseResolve(const Poco::Net::IPAddress & address) { if (impl->disable_cache) diff --git a/src/Common/DNSResolver.h b/src/Common/DNSResolver.h index 3f1773d4050..fdd9799f96f 100644 --- a/src/Common/DNSResolver.h +++ b/src/Common/DNSResolver.h @@ -34,6 +34,8 @@ public: Poco::Net::SocketAddress resolveAddress(const std::string & host, UInt16 port); + std::vector resolveAddressList(const std::string & host, UInt16 port); + /// Accepts host IP and resolves its host name String reverseResolve(const Poco::Net::IPAddress & address); diff --git a/src/Common/Exception.h b/src/Common/Exception.h index b2fc369237e..086b64bf5f9 100644 --- a/src/Common/Exception.h +++ b/src/Common/Exception.h @@ -48,8 +48,8 @@ public: Exception * clone() const override { return new Exception(*this); } void rethrow() const override { throw *this; } - const char * name() const throw() override { return "DB::Exception"; } - const char * what() const throw() override { return message().data(); } + const char * name() const noexcept override { return "DB::Exception"; } + const char * what() const noexcept override { return message().data(); } /// Add something to the existing message. template @@ -77,7 +77,7 @@ private: #endif bool remote = false; - const char * className() const throw() override { return "DB::Exception"; } + const char * className() const noexcept override { return "DB::Exception"; } }; @@ -102,8 +102,8 @@ private: int saved_errno; std::optional path; - const char * name() const throw() override { return "DB::ErrnoException"; } - const char * className() const throw() override { return "DB::ErrnoException"; } + const char * name() const noexcept override { return "DB::ErrnoException"; } + const char * className() const noexcept override { return "DB::ErrnoException"; } }; @@ -143,8 +143,8 @@ private: String file_name; mutable std::string formatted_message; - const char * name() const throw() override { return "DB::ParsingException"; } - const char * className() const throw() override { return "DB::ParsingException"; } + const char * name() const noexcept override { return "DB::ParsingException"; } + const char * className() const noexcept override { return "DB::ParsingException"; } }; diff --git a/src/Common/HashTable/StringHashTable.h b/src/Common/HashTable/StringHashTable.h index 7e259d66cd0..6a8bdc06218 100644 --- a/src/Common/HashTable/StringHashTable.h +++ b/src/Common/HashTable/StringHashTable.h @@ -169,7 +169,7 @@ struct StringHashTableLookupResult auto & operator*() const { return *this; } auto * operator->() { return this; } auto * operator->() const { return this; } - operator bool() const { return mapped_ptr; } /// NOLINT + explicit operator bool() const { return mapped_ptr; } friend bool operator==(const StringHashTableLookupResult & a, const std::nullptr_t &) { return !a.mapped_ptr; } friend bool operator==(const std::nullptr_t &, const StringHashTableLookupResult & b) { return !b.mapped_ptr; } friend bool operator!=(const StringHashTableLookupResult & a, const std::nullptr_t &) { return a.mapped_ptr; } diff --git a/src/Common/MemoryTracker.cpp b/src/Common/MemoryTracker.cpp index 735dcf91a36..0e7803aaa71 100644 --- a/src/Common/MemoryTracker.cpp +++ b/src/Common/MemoryTracker.cpp @@ -310,8 +310,11 @@ void MemoryTracker::free(Int64 size) accounted_size += new_amount; } } - if (auto * overcommit_tracker_ptr = overcommit_tracker.load(std::memory_order_relaxed); overcommit_tracker_ptr) - overcommit_tracker_ptr->tryContinueQueryExecutionAfterFree(accounted_size); + if (!OvercommitTrackerBlockerInThread::isBlocked()) + { + if (auto * overcommit_tracker_ptr = overcommit_tracker.load(std::memory_order_relaxed); overcommit_tracker_ptr) + overcommit_tracker_ptr->tryContinueQueryExecutionAfterFree(accounted_size); + } if (auto * loaded_next = parent.load(std::memory_order_relaxed)) loaded_next->free(size); diff --git a/src/Common/NetException.h b/src/Common/NetException.h index 019a12f23b9..712893ed83b 100644 --- a/src/Common/NetException.h +++ b/src/Common/NetException.h @@ -22,8 +22,8 @@ public: void rethrow() const override { throw *this; } private: - const char * name() const throw() override { return "DB::NetException"; } - const char * className() const throw() override { return "DB::NetException"; } + const char * name() const noexcept override { return "DB::NetException"; } + const char * className() const noexcept override { return "DB::NetException"; } }; } diff --git a/src/Common/OvercommitTracker.cpp b/src/Common/OvercommitTracker.cpp index 37e1bb4a65f..dbacc0d81a4 100644 --- a/src/Common/OvercommitTracker.cpp +++ b/src/Common/OvercommitTracker.cpp @@ -192,3 +192,5 @@ void GlobalOvercommitTracker::pickQueryToExcludeImpl() current_ratio.committed, current_ratio.soft_limit); picked_tracker = query_tracker; } + +thread_local size_t OvercommitTrackerBlockerInThread::counter = 0; diff --git a/src/Common/OvercommitTracker.h b/src/Common/OvercommitTracker.h index fec52b261a7..37de75f4848 100644 --- a/src/Common/OvercommitTracker.h +++ b/src/Common/OvercommitTracker.h @@ -154,3 +154,18 @@ private: DB::ProcessList * process_list; Poco::Logger * logger = &Poco::Logger::get("GlobalOvercommitTracker"); }; + +// This class is used to disallow tracking during logging to avoid deadlocks. +struct OvercommitTrackerBlockerInThread +{ + OvercommitTrackerBlockerInThread() { ++counter; } + ~OvercommitTrackerBlockerInThread() { --counter; } + + OvercommitTrackerBlockerInThread(OvercommitTrackerBlockerInThread const &) = delete; + OvercommitTrackerBlockerInThread & operator=(OvercommitTrackerBlockerInThread const &) = delete; + + static bool isBlocked() { return counter > 0; } + +private: + static thread_local size_t counter; +}; diff --git a/src/Common/SensitiveDataMasker.h b/src/Common/SensitiveDataMasker.h index edd9f10ca91..adb6f5d51e1 100644 --- a/src/Common/SensitiveDataMasker.h +++ b/src/Common/SensitiveDataMasker.h @@ -69,4 +69,4 @@ public: size_t rulesCount() const; }; -}; +} diff --git a/src/Common/TargetSpecific.h b/src/Common/TargetSpecific.h index 2b81ee2fcb3..89c0f467fe3 100644 --- a/src/Common/TargetSpecific.h +++ b/src/Common/TargetSpecific.h @@ -276,7 +276,7 @@ DECLARE_AVX512F_SPECIFIC_CODE( \ FUNCTION_HEADER \ \ - AVX2_FUNCTION_SPECIFIC_ATTRIBUTE \ + SSE42_FUNCTION_SPECIFIC_ATTRIBUTE \ name##SSE42 \ FUNCTION_BODY \ \ diff --git a/src/Common/ZooKeeper/IKeeper.h b/src/Common/ZooKeeper/IKeeper.h index 74b45d411b0..1e79468b7e3 100644 --- a/src/Common/ZooKeeper/IKeeper.h +++ b/src/Common/ZooKeeper/IKeeper.h @@ -401,8 +401,8 @@ public: Exception(const Error code_, const std::string & path); /// NOLINT Exception(const Exception & exc); - const char * name() const throw() override { return "Coordination::Exception"; } - const char * className() const throw() override { return "Coordination::Exception"; } + const char * name() const noexcept override { return "Coordination::Exception"; } + const char * className() const noexcept override { return "Coordination::Exception"; } Exception * clone() const override { return new Exception(*this); } const Error code; diff --git a/src/Common/tests/gtest_lru_file_cache.cpp b/src/Common/tests/gtest_lru_file_cache.cpp index 24e69259241..36137e02a84 100644 --- a/src/Common/tests/gtest_lru_file_cache.cpp +++ b/src/Common/tests/gtest_lru_file_cache.cpp @@ -32,7 +32,7 @@ void assertRange( ASSERT_EQ(range.left, expected_range.left); ASSERT_EQ(range.right, expected_range.right); ASSERT_EQ(file_segment->state(), expected_state); -}; +} void printRanges(const auto & segments) { diff --git a/src/Common/tests/gtest_sensitive_data_masker.cpp b/src/Common/tests/gtest_sensitive_data_masker.cpp index 7ebf141d961..b9ee9025c03 100644 --- a/src/Common/tests/gtest_sensitive_data_masker.cpp +++ b/src/Common/tests/gtest_sensitive_data_masker.cpp @@ -22,7 +22,7 @@ extern const int CANNOT_COMPILE_REGEXP; extern const int NO_ELEMENTS_IN_CONFIG; extern const int INVALID_CONFIG_PARAMETER; } -}; +} TEST(Common, SensitiveDataMasker) diff --git a/src/Compression/CompressedWriteBuffer.cpp b/src/Compression/CompressedWriteBuffer.cpp index 2ade1ef0949..93f163dc1af 100644 --- a/src/Compression/CompressedWriteBuffer.cpp +++ b/src/Compression/CompressedWriteBuffer.cpp @@ -1,11 +1,11 @@ #include #include -#include #include +#include -#include "CompressedWriteBuffer.h" #include +#include "CompressedWriteBuffer.h" namespace DB @@ -22,14 +22,29 @@ void CompressedWriteBuffer::nextImpl() if (!offset()) return; + UInt32 compressed_size = 0; size_t decompressed_size = offset(); UInt32 compressed_reserve_size = codec->getCompressedReserveSize(decompressed_size); - compressed_buffer.resize(compressed_reserve_size); - UInt32 compressed_size = codec->compress(working_buffer.begin(), decompressed_size, compressed_buffer.data()); - CityHash_v1_0_2::uint128 checksum = CityHash_v1_0_2::CityHash128(compressed_buffer.data(), compressed_size); - out.write(reinterpret_cast(&checksum), CHECKSUM_SIZE); - out.write(compressed_buffer.data(), compressed_size); + if (out.available() > compressed_reserve_size + CHECKSUM_SIZE) + { + char * out_checksum_ptr = out.position(); + char * out_compressed_ptr = out.position() + CHECKSUM_SIZE; + compressed_size = codec->compress(working_buffer.begin(), decompressed_size, out_compressed_ptr); + + CityHash_v1_0_2::uint128 checksum = CityHash_v1_0_2::CityHash128(out_compressed_ptr, compressed_size); + memcpy(out_checksum_ptr, reinterpret_cast(&checksum), CHECKSUM_SIZE); + out.position() += CHECKSUM_SIZE + compressed_size; + } + else + { + compressed_buffer.resize(compressed_reserve_size); + compressed_size = codec->compress(working_buffer.begin(), decompressed_size, compressed_buffer.data()); + + CityHash_v1_0_2::uint128 checksum = CityHash_v1_0_2::CityHash128(compressed_buffer.data(), compressed_size); + out.write(reinterpret_cast(&checksum), CHECKSUM_SIZE); + out.write(compressed_buffer.data(), compressed_size); + } } CompressedWriteBuffer::~CompressedWriteBuffer() @@ -37,10 +52,7 @@ CompressedWriteBuffer::~CompressedWriteBuffer() finalize(); } -CompressedWriteBuffer::CompressedWriteBuffer( - WriteBuffer & out_, - CompressionCodecPtr codec_, - size_t buf_size) +CompressedWriteBuffer::CompressedWriteBuffer(WriteBuffer & out_, CompressionCodecPtr codec_, size_t buf_size) : BufferWithOwnMemory(buf_size), out(out_), codec(std::move(codec_)) { } diff --git a/src/Compression/tests/gtest_compressionCodec.cpp b/src/Compression/tests/gtest_compressionCodec.cpp index 2d26cfcd5e1..77050908265 100644 --- a/src/Compression/tests/gtest_compressionCodec.cpp +++ b/src/Compression/tests/gtest_compressionCodec.cpp @@ -790,7 +790,7 @@ std::vector generatePyramidOfSequences(const size_t sequences } return sequences; -}; +} // helper macro to produce human-friendly sequence name from generator #define G(generator) generator, #generator diff --git a/src/Coordination/KeeperServer.cpp b/src/Coordination/KeeperServer.cpp index 6961f31ed20..1f089ba2cb7 100644 --- a/src/Coordination/KeeperServer.cpp +++ b/src/Coordination/KeeperServer.cpp @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -315,6 +316,22 @@ void KeeperServer::startup(const Poco::Util::AbstractConfiguration & config, boo state_manager->loadLogStore(state_machine->last_commit_index() + 1, coordination_settings->reserved_log_items); + auto log_store = state_manager->load_log_store(); + auto next_log_idx = log_store->next_slot(); + if (next_log_idx > 0 && next_log_idx > state_machine->last_commit_index()) + { + auto log_entries = log_store->log_entries(state_machine->last_commit_index() + 1, next_log_idx); + + auto idx = state_machine->last_commit_index() + 1; + for (const auto & entry : *log_entries) + { + if (entry && entry->get_val_type() == nuraft::log_val_type::app_log) + state_machine->preprocess(idx, entry->get_buf()); + + ++idx; + } + } + loadLatestConfig(); last_local_config = state_manager->parseServersConfiguration(config, true).cluster_config; diff --git a/src/Coordination/KeeperStateMachine.cpp b/src/Coordination/KeeperStateMachine.cpp index be7110fa841..fa3a5195226 100644 --- a/src/Coordination/KeeperStateMachine.cpp +++ b/src/Coordination/KeeperStateMachine.cpp @@ -44,7 +44,6 @@ namespace else /// backward compatibility request_for_session.time = std::chrono::duration_cast(std::chrono::system_clock::now().time_since_epoch()).count(); - return request_for_session; } } @@ -114,6 +113,21 @@ void KeeperStateMachine::init() storage = std::make_unique(coordination_settings->dead_session_check_period_ms.totalMilliseconds(), superdigest); } +nuraft::ptr KeeperStateMachine::pre_commit(uint64_t log_idx, nuraft::buffer & data) +{ + preprocess(log_idx, data); + return nullptr; +} + +void KeeperStateMachine::preprocess(const uint64_t log_idx, nuraft::buffer & data) +{ + auto request_for_session = parseRequest(data); + if (request_for_session.request->getOpNum() == Coordination::OpNum::SessionID) + return; + std::lock_guard lock(storage_and_responses_lock); + storage->preprocessRequest(request_for_session.request, request_for_session.session_id, request_for_session.time, log_idx); +} + nuraft::ptr KeeperStateMachine::commit(const uint64_t log_idx, nuraft::buffer & data) { auto request_for_session = parseRequest(data); @@ -182,6 +196,12 @@ void KeeperStateMachine::commit_config(const uint64_t /* log_idx */, nuraft::ptr cluster_config = ClusterConfig::deserialize(*tmp); } +void KeeperStateMachine::rollback(uint64_t log_idx, nuraft::buffer & /*data*/) +{ + std::lock_guard lock(storage_and_responses_lock); + storage->rollbackRequest(log_idx); +} + nuraft::ptr KeeperStateMachine::last_snapshot() { /// Just return the latest snapshot. @@ -343,7 +363,7 @@ void KeeperStateMachine::processReadRequest(const KeeperStorage::RequestForSessi { /// Pure local request, just process it with storage std::lock_guard lock(storage_and_responses_lock); - auto responses = storage->processRequest(request_for_session.request, request_for_session.session_id, request_for_session.time, std::nullopt); + auto responses = storage->processRequest(request_for_session.request, request_for_session.session_id, request_for_session.time, std::nullopt, true /*check_acl*/, true /*is_local*/); for (const auto & response : responses) if (!responses_queue.push(response)) throw Exception(ErrorCodes::SYSTEM_ERROR, "Could not push response with session id {} into responses queue", response.session_id); diff --git a/src/Coordination/KeeperStateMachine.h b/src/Coordination/KeeperStateMachine.h index 73578e6a2ba..aed96a59c13 100644 --- a/src/Coordination/KeeperStateMachine.h +++ b/src/Coordination/KeeperStateMachine.h @@ -27,16 +27,16 @@ public: /// Read state from the latest snapshot void init(); - /// Currently not supported - nuraft::ptr pre_commit(const uint64_t /*log_idx*/, nuraft::buffer & /*data*/) override { return nullptr; } + void preprocess(uint64_t log_idx, nuraft::buffer & data); + + nuraft::ptr pre_commit(uint64_t log_idx, nuraft::buffer & data) override; nuraft::ptr commit(const uint64_t log_idx, nuraft::buffer & data) override; /// NOLINT /// Save new cluster config to our snapshot (copy of the config stored in StateManager) void commit_config(const uint64_t log_idx, nuraft::ptr & new_conf) override; /// NOLINT - /// Currently not supported - void rollback(const uint64_t /*log_idx*/, nuraft::buffer & /*data*/) override {} + void rollback(uint64_t log_idx, nuraft::buffer & data) override; uint64_t last_commit_index() override { return last_committed_idx; } diff --git a/src/Coordination/KeeperStorage.cpp b/src/Coordination/KeeperStorage.cpp index f58776cf843..6c0699be95c 100644 --- a/src/Coordination/KeeperStorage.cpp +++ b/src/Coordination/KeeperStorage.cpp @@ -1,19 +1,21 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include #include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "Common/ZooKeeper/ZooKeeperConstants.h" +#include +#include +#include #include +#include namespace DB { @@ -49,37 +51,10 @@ String getSHA1(const String & userdata) String generateDigest(const String & userdata) { std::vector user_password; - boost::split(user_password, userdata, [](char c) { return c == ':'; }); + boost::split(user_password, userdata, [](char character) { return character == ':'; }); return user_password[0] + ":" + base64Encode(getSHA1(userdata)); } -bool checkACL(int32_t permission, const Coordination::ACLs & node_acls, const std::vector & session_auths) -{ - if (node_acls.empty()) - return true; - - for (const auto & session_auth : session_auths) - if (session_auth.scheme == "super") - return true; - - for (const auto & node_acl : node_acls) - { - if (node_acl.permissions & permission) - { - if (node_acl.scheme == "world" && node_acl.id == "anyone") - return true; - - for (const auto & session_auth : session_auths) - { - if (node_acl.scheme == session_auth.scheme && node_acl.id == session_auth.id) - return true; - } - } - } - - return false; -} - bool fixupACL( const std::vector & request_acls, const std::vector & current_ids, @@ -122,11 +97,12 @@ bool fixupACL( return valid_found; } -KeeperStorage::ResponsesForSessions processWatchesImpl(const String & path, KeeperStorage::Watches & watches, KeeperStorage::Watches & list_watches, Coordination::Event event_type) +KeeperStorage::ResponsesForSessions processWatchesImpl( + const String & path, KeeperStorage::Watches & watches, KeeperStorage::Watches & list_watches, Coordination::Event event_type) { KeeperStorage::ResponsesForSessions result; - auto it = watches.find(path); - if (it != watches.end()) + auto watch_it = watches.find(path); + if (watch_it != watches.end()) { std::shared_ptr watch_response = std::make_shared(); watch_response->path = path; @@ -134,10 +110,10 @@ KeeperStorage::ResponsesForSessions processWatchesImpl(const String & path, Keep watch_response->zxid = -1; watch_response->type = event_type; watch_response->state = Coordination::State::CONNECTED; - for (auto watcher_session : it->second) + for (auto watcher_session : watch_it->second) result.push_back(KeeperStorage::ResponseForSession{watcher_session, watch_response}); - watches.erase(it); + watches.erase(watch_it); } auto parent_path = parentPath(path); @@ -156,10 +132,11 @@ KeeperStorage::ResponsesForSessions processWatchesImpl(const String & path, Keep for (const auto & path_to_check : paths_to_check_for_list_watches) { - it = list_watches.find(path_to_check); - if (it != list_watches.end()) + watch_it = list_watches.find(path_to_check); + if (watch_it != list_watches.end()) { - std::shared_ptr watch_list_response = std::make_shared(); + std::shared_ptr watch_list_response + = std::make_shared(); watch_list_response->path = path_to_check; watch_list_response->xid = Coordination::WATCH_XID; watch_list_response->zxid = -1; @@ -169,14 +146,15 @@ KeeperStorage::ResponsesForSessions processWatchesImpl(const String & path, Keep watch_list_response->type = Coordination::Event::DELETED; watch_list_response->state = Coordination::State::CONNECTED; - for (auto watcher_session : it->second) + for (auto watcher_session : watch_it->second) result.push_back(KeeperStorage::ResponseForSession{watcher_session, watch_list_response}); - list_watches.erase(it); + list_watches.erase(watch_it); } } return result; } + } void KeeperStorage::Node::setData(String new_data) @@ -198,24 +176,322 @@ void KeeperStorage::Node::removeChild(StringRef child_path) } KeeperStorage::KeeperStorage(int64_t tick_time_ms, const String & superdigest_) - : session_expiry_queue(tick_time_ms) - , superdigest(superdigest_) + : session_expiry_queue(tick_time_ms), superdigest(superdigest_) { container.insert("/", Node()); } -using Undo = std::function; +template +struct Overloaded : Ts... +{ + using Ts::operator()...; +}; + +// explicit deduction guide +// https://en.cppreference.com/w/cpp/language/class_template_argument_deduction +template +Overloaded(Ts...) -> Overloaded; + +std::shared_ptr KeeperStorage::UncommittedState::getNode(StringRef path) +{ + std::shared_ptr node{nullptr}; + + if (auto maybe_node_it = storage.container.find(path); maybe_node_it != storage.container.end()) + { + const auto & committed_node = maybe_node_it->value; + node = std::make_shared(); + node->stat = committed_node.stat; + node->seq_num = committed_node.seq_num; + node->setData(committed_node.getData()); + } + + applyDeltas( + path, + Overloaded{ + [&](const CreateNodeDelta & create_delta) + { + assert(!node); + node = std::make_shared(); + node->stat = create_delta.stat; + node->setData(create_delta.data); + }, + [&](const RemoveNodeDelta & /*remove_delta*/) + { + assert(node); + node = nullptr; + }, + [&](const UpdateNodeDelta & update_delta) + { + assert(node); + update_delta.update_fn(*node); + }, + [&](auto && /*delta*/) {}, + }); + + return node; +} + +bool KeeperStorage::UncommittedState::hasNode(StringRef path) const +{ + bool exists = storage.container.contains(std::string{path}); + applyDeltas( + path, + Overloaded{ + [&](const CreateNodeDelta & /*create_delta*/) + { + assert(!exists); + exists = true; + }, + [&](const RemoveNodeDelta & /*remove_delta*/) + { + assert(exists); + exists = false; + }, + [&](auto && /*delta*/) {}, + }); + + return exists; +} + +Coordination::ACLs KeeperStorage::UncommittedState::getACLs(StringRef path) const +{ + std::optional acl_id; + if (auto maybe_node_it = storage.container.find(path); maybe_node_it != storage.container.end()) + acl_id.emplace(maybe_node_it->value.acl_id); + + const Coordination::ACLs * acls{nullptr}; + applyDeltas( + path, + Overloaded{ + [&](const CreateNodeDelta & create_delta) + { + assert(!acl_id); + acls = &create_delta.acls; + }, + [&](const RemoveNodeDelta & /*remove_delta*/) + { + assert(acl_id || acls); + acl_id.reset(); + acls = nullptr; + }, + [&](const SetACLDelta & set_acl_delta) + { + assert(acl_id || acls); + acls = &set_acl_delta.acls; + }, + [&](auto && /*delta*/) {}, + }); + + if (acls) + return *acls; + + return acl_id ? storage.acl_map.convertNumber(*acl_id) : Coordination::ACLs{}; +} + +namespace +{ + +[[noreturn]] void onStorageInconsistency() +{ + LOG_ERROR(&Poco::Logger::get("KeeperStorage"), "Inconsistency found between uncommitted and committed data. Keeper will terminate to avoid undefined behaviour."); + std::terminate(); +} + +} + +Coordination::Error KeeperStorage::commit(int64_t commit_zxid, int64_t session_id) +{ + // Deltas are added with increasing ZXIDs + // If there are no deltas for the commit_zxid (e.g. read requests), we instantly return + // on first delta + for (auto & delta : uncommitted_state.deltas) + { + if (delta.zxid > commit_zxid) + break; + + bool finish_subdelta = false; + auto result = std::visit( + [&, &path = delta.path](DeltaType & operation) -> Coordination::Error + { + if constexpr (std::same_as) + { + if (!createNode( + path, + std::move(operation.data), + operation.stat, + operation.is_sequental, + operation.is_ephemeral, + std::move(operation.acls), + session_id)) + onStorageInconsistency(); + + return Coordination::Error::ZOK; + } + else if constexpr (std::same_as) + { + auto node_it = container.find(path); + if (node_it == container.end()) + onStorageInconsistency(); + + if (operation.version != -1 && operation.version != node_it->value.stat.version) + onStorageInconsistency(); + + container.updateValue(path, operation.update_fn); + return Coordination::Error::ZOK; + } + else if constexpr (std::same_as) + { + if (!removeNode(path, operation.version)) + onStorageInconsistency(); + + return Coordination::Error::ZOK; + } + else if constexpr (std::same_as) + { + auto node_it = container.find(path); + if (node_it == container.end()) + onStorageInconsistency(); + + if (operation.version != -1 && operation.version != node_it->value.stat.aversion) + onStorageInconsistency(); + + acl_map.removeUsage(node_it->value.acl_id); + + uint64_t acl_id = acl_map.convertACLs(operation.acls); + acl_map.addUsage(acl_id); + + container.updateValue(path, [acl_id](KeeperStorage::Node & node) { node.acl_id = acl_id; }); + + return Coordination::Error::ZOK; + } + else if constexpr (std::same_as) + return operation.error; + else if constexpr (std::same_as) + { + finish_subdelta = true; + return Coordination::Error::ZOK; + } + else if constexpr (std::same_as) + { + session_and_auth[operation.session_id].emplace_back(std::move(operation.auth_id)); + return Coordination::Error::ZOK; + } + else + { + // shouldn't be called in any process functions + onStorageInconsistency(); + } + }, + delta.operation); + + if (result != Coordination::Error::ZOK) + return result; + + if (finish_subdelta) + return Coordination::Error::ZOK; + } + + return Coordination::Error::ZOK; +} + +bool KeeperStorage::createNode( + const std::string & path, + String data, + const Coordination::Stat & stat, + bool is_sequental, + bool is_ephemeral, + Coordination::ACLs node_acls, + int64_t session_id) +{ + auto parent_path = parentPath(path); + auto node_it = container.find(parent_path); + + if (node_it == container.end()) + return false; + + if (node_it->value.stat.ephemeralOwner != 0) + return false; + + if (container.contains(path)) + return false; + + KeeperStorage::Node created_node; + + uint64_t acl_id = acl_map.convertACLs(node_acls); + acl_map.addUsage(acl_id); + + created_node.acl_id = acl_id; + created_node.stat = stat; + created_node.setData(std::move(data)); + created_node.is_sequental = is_sequental; + auto [map_key, _] = container.insert(path, created_node); + /// Take child path from key owned by map. + auto child_path = getBaseName(map_key->getKey()); + container.updateValue(parent_path, [child_path](KeeperStorage::Node & parent) { parent.addChild(child_path); }); + + if (is_ephemeral) + ephemerals[session_id].emplace(path); + + return true; +}; + +bool KeeperStorage::removeNode(const std::string & path, int32_t version) +{ + auto node_it = container.find(path); + if (node_it == container.end()) + return false; + + if (version != -1 && version != node_it->value.stat.version) + return false; + + if (node_it->value.stat.numChildren) + return false; + + auto prev_node = node_it->value; + if (prev_node.stat.ephemeralOwner != 0) + { + auto ephemerals_it = ephemerals.find(prev_node.stat.ephemeralOwner); + ephemerals_it->second.erase(path); + if (ephemerals_it->second.empty()) + ephemerals.erase(ephemerals_it); + } + + acl_map.removeUsage(prev_node.acl_id); + + container.updateValue( + parentPath(path), + [child_basename = getBaseName(node_it->key)](KeeperStorage::Node & parent) { parent.removeChild(child_basename); }); + + container.erase(path); + return true; +} + struct KeeperStorageRequestProcessor { Coordination::ZooKeeperRequestPtr zk_request; - explicit KeeperStorageRequestProcessor(const Coordination::ZooKeeperRequestPtr & zk_request_) - : zk_request(zk_request_) - {} - virtual std::pair process(KeeperStorage & storage, int64_t zxid, int64_t session_id, int64_t time) const = 0; - virtual KeeperStorage::ResponsesForSessions processWatches(KeeperStorage::Watches & /*watches*/, KeeperStorage::Watches & /*list_watches*/) const { return {}; } - virtual bool checkAuth(KeeperStorage & /*storage*/, int64_t /*session_id*/) const { return true; } + explicit KeeperStorageRequestProcessor(const Coordination::ZooKeeperRequestPtr & zk_request_) : zk_request(zk_request_) { } + virtual Coordination::ZooKeeperResponsePtr process(KeeperStorage & storage, int64_t zxid, int64_t session_id, int64_t time) const = 0; + virtual std::vector + preprocess(KeeperStorage & /*storage*/, int64_t /*zxid*/, int64_t /*session_id*/, int64_t /*time*/) const + { + return {}; + } + + // process the request using locally committed data + virtual Coordination::ZooKeeperResponsePtr + processLocal(KeeperStorage & /*storage*/, int64_t /*zxid*/, int64_t /*session_id*/, int64_t /*time*/) const + { + throw Exception{DB::ErrorCodes::LOGICAL_ERROR, "Cannot process the request locally"}; + } + + virtual KeeperStorage::ResponsesForSessions + processWatches(KeeperStorage::Watches & /*watches*/, KeeperStorage::Watches & /*list_watches*/) const + { + return {}; + } + virtual bool checkAuth(KeeperStorage & /*storage*/, int64_t /*session_id*/, bool /*is_local*/) const { return true; } virtual ~KeeperStorageRequestProcessor() = default; }; @@ -223,331 +499,328 @@ struct KeeperStorageRequestProcessor struct KeeperStorageHeartbeatRequestProcessor final : public KeeperStorageRequestProcessor { using KeeperStorageRequestProcessor::KeeperStorageRequestProcessor; - std::pair process(KeeperStorage & /* storage */, int64_t /* zxid */, int64_t /* session_id */, int64_t /* time */) const override + Coordination::ZooKeeperResponsePtr + process(KeeperStorage & /* storage */, int64_t /* zxid */, int64_t /* session_id */, int64_t /* time */) const override { - return {zk_request->makeResponse(), {}}; + return zk_request->makeResponse(); } }; struct KeeperStorageSyncRequestProcessor final : public KeeperStorageRequestProcessor { using KeeperStorageRequestProcessor::KeeperStorageRequestProcessor; - std::pair process(KeeperStorage & /* storage */, int64_t /* zxid */, int64_t /* session_id */, int64_t /* time */) const override + Coordination::ZooKeeperResponsePtr + process(KeeperStorage & /* storage */, int64_t /* zxid */, int64_t /* session_id */, int64_t /* time */) const override { auto response = zk_request->makeResponse(); dynamic_cast(*response).path = dynamic_cast(*zk_request).path; - return {response, {}}; - } -}; - -struct KeeperStorageCreateRequestProcessor final : public KeeperStorageRequestProcessor -{ - using KeeperStorageRequestProcessor::KeeperStorageRequestProcessor; - - KeeperStorage::ResponsesForSessions processWatches(KeeperStorage::Watches & watches, KeeperStorage::Watches & list_watches) const override - { - return processWatchesImpl(zk_request->getPath(), watches, list_watches, Coordination::Event::CREATED); - } - - bool checkAuth(KeeperStorage & storage, int64_t session_id) const override - { - auto & container = storage.container; - auto path = zk_request->getPath(); - auto parent_path = parentPath(path); - - auto it = container.find(parent_path); - if (it == container.end()) - return true; - - const auto & node_acls = storage.acl_map.convertNumber(it->value.acl_id); - if (node_acls.empty()) - return true; - - const auto & session_auths = storage.session_and_auth[session_id]; - return checkACL(Coordination::ACL::Create, node_acls, session_auths); - } - - std::pair process(KeeperStorage & storage, int64_t zxid, int64_t session_id, int64_t time) const override - { - auto & container = storage.container; - auto & ephemerals = storage.ephemerals; - - Coordination::ZooKeeperResponsePtr response_ptr = zk_request->makeResponse(); - Undo undo; - Coordination::ZooKeeperCreateResponse & response = dynamic_cast(*response_ptr); - Coordination::ZooKeeperCreateRequest & request = dynamic_cast(*zk_request); - - auto parent_path = parentPath(request.path); - auto it = container.find(parent_path); - - if (it == container.end()) - { - response.error = Coordination::Error::ZNONODE; - return { response_ptr, undo }; - } - else if (it->value.stat.ephemeralOwner != 0) - { - response.error = Coordination::Error::ZNOCHILDRENFOREPHEMERALS; - return { response_ptr, undo }; - } - std::string path_created = request.path; - if (request.is_sequential) - { - auto seq_num = it->value.seq_num; - - std::stringstream seq_num_str; // STYLE_CHECK_ALLOW_STD_STRING_STREAM - seq_num_str.exceptions(std::ios::failbit); - seq_num_str << std::setw(10) << std::setfill('0') << seq_num; - - path_created += seq_num_str.str(); - } - if (container.contains(path_created)) - { - response.error = Coordination::Error::ZNODEEXISTS; - return { response_ptr, undo }; - } - if (getBaseName(path_created).size == 0) - { - response.error = Coordination::Error::ZBADARGUMENTS; - return { response_ptr, undo }; - } - - auto & session_auth_ids = storage.session_and_auth[session_id]; - - KeeperStorage::Node created_node; - - Coordination::ACLs node_acls; - if (!fixupACL(request.acls, session_auth_ids, node_acls)) - { - response.error = Coordination::Error::ZINVALIDACL; - return {response_ptr, {}}; - } - - uint64_t acl_id = storage.acl_map.convertACLs(node_acls); - storage.acl_map.addUsage(acl_id); - - created_node.acl_id = acl_id; - created_node.stat.czxid = zxid; - created_node.stat.mzxid = zxid; - created_node.stat.pzxid = zxid; - created_node.stat.ctime = time; - created_node.stat.mtime = time; - created_node.stat.numChildren = 0; - created_node.stat.dataLength = request.data.length(); - created_node.stat.ephemeralOwner = request.is_ephemeral ? session_id : 0; - created_node.is_sequental = request.is_sequential; - created_node.setData(std::move(request.data)); - - auto [map_key, _] = container.insert(path_created, created_node); - /// Take child path from key owned by map. - auto child_path = getBaseName(map_key->getKey()); - - int32_t parent_cversion = request.parent_cversion; - int64_t prev_parent_zxid; - int32_t prev_parent_cversion; - container.updateValue(parent_path, [child_path, zxid, &prev_parent_zxid, - parent_cversion, &prev_parent_cversion] (KeeperStorage::Node & parent) - { - parent.addChild(child_path); - prev_parent_cversion = parent.stat.cversion; - prev_parent_zxid = parent.stat.pzxid; - - /// Increment sequential number even if node is not sequential - ++parent.seq_num; - - if (parent_cversion == -1) - ++parent.stat.cversion; - else if (parent_cversion > parent.stat.cversion) - parent.stat.cversion = parent_cversion; - - if (zxid > parent.stat.pzxid) - parent.stat.pzxid = zxid; - ++parent.stat.numChildren; - }); - - response.path_created = path_created; - - if (request.is_ephemeral) - ephemerals[session_id].emplace(path_created); - - undo = [&storage, prev_parent_zxid, prev_parent_cversion, session_id, path_created, is_ephemeral = request.is_ephemeral, parent_path, child_path, acl_id] - { - storage.acl_map.removeUsage(acl_id); - - if (is_ephemeral) - storage.ephemerals[session_id].erase(path_created); - - storage.container.updateValue(parent_path, [child_path, prev_parent_zxid, prev_parent_cversion] (KeeperStorage::Node & undo_parent) - { - --undo_parent.stat.numChildren; - --undo_parent.seq_num; - undo_parent.stat.cversion = prev_parent_cversion; - undo_parent.stat.pzxid = prev_parent_zxid; - undo_parent.removeChild(child_path); - }); - - storage.container.erase(path_created); - }; - - response.error = Coordination::Error::ZOK; - return { response_ptr, undo }; - } -}; - -struct KeeperStorageGetRequestProcessor final : public KeeperStorageRequestProcessor -{ - - bool checkAuth(KeeperStorage & storage, int64_t session_id) const override - { - auto & container = storage.container; - auto it = container.find(zk_request->getPath()); - if (it == container.end()) - return true; - - const auto & node_acls = storage.acl_map.convertNumber(it->value.acl_id); - if (node_acls.empty()) - return true; - - const auto & session_auths = storage.session_and_auth[session_id]; - return checkACL(Coordination::ACL::Read, node_acls, session_auths); - } - - using KeeperStorageRequestProcessor::KeeperStorageRequestProcessor; - std::pair process(KeeperStorage & storage, int64_t /* zxid */, int64_t /* session_id */, int64_t /* time */) const override - { - auto & container = storage.container; - Coordination::ZooKeeperResponsePtr response_ptr = zk_request->makeResponse(); - Coordination::ZooKeeperGetResponse & response = dynamic_cast(*response_ptr); - Coordination::ZooKeeperGetRequest & request = dynamic_cast(*zk_request); - - auto it = container.find(request.path); - if (it == container.end()) - { - response.error = Coordination::Error::ZNONODE; - } - else - { - response.stat = it->value.stat; - response.data = it->value.getData(); - response.error = Coordination::Error::ZOK; - } - - return { response_ptr, {} }; + return response; } }; namespace { - /// Garbage required to apply log to "fuzzy" zookeeper snapshot - void updateParentPzxid(const std::string & child_path, int64_t zxid, KeeperStorage::Container & container) + + Coordination::ACLs getNodeACLs(KeeperStorage & storage, StringRef path, bool is_local) { - auto parent_path = parentPath(child_path); - auto parent_it = container.find(parent_path); - if (parent_it != container.end()) + if (is_local) { - container.updateValue(parent_path, [zxid](KeeperStorage::Node & parent) - { - if (parent.stat.pzxid < zxid) - parent.stat.pzxid = zxid; - }); + auto node_it = storage.container.find(path); + if (node_it == storage.container.end()) + return {}; + + return storage.acl_map.convertNumber(node_it->value.acl_id); + } + + return storage.uncommitted_state.getACLs(path); + } + +} +bool KeeperStorage::checkACL(StringRef path, int32_t permission, int64_t session_id, bool is_local) +{ + const auto node_acls = getNodeACLs(*this, path, is_local); + if (node_acls.empty()) + return true; + + if (uncommitted_state.hasACL(session_id, is_local, [](const auto & auth_id) { return auth_id.scheme == "super"; })) + return true; + + + for (const auto & node_acl : node_acls) + { + if (node_acl.permissions & permission) + { + if (node_acl.scheme == "world" && node_acl.id == "anyone") + return true; + + if (uncommitted_state.hasACL( + session_id, + is_local, + [&](const auto & auth_id) { return auth_id.scheme == node_acl.scheme && auth_id.id == node_acl.id; })) + return true; } } + + return false; } -struct KeeperStorageRemoveRequestProcessor final : public KeeperStorageRequestProcessor + +struct KeeperStorageCreateRequestProcessor final : public KeeperStorageRequestProcessor { - bool checkAuth(KeeperStorage & storage, int64_t session_id) const override + using KeeperStorageRequestProcessor::KeeperStorageRequestProcessor; + + KeeperStorage::ResponsesForSessions + processWatches(KeeperStorage::Watches & watches, KeeperStorage::Watches & list_watches) const override { - auto & container = storage.container; - auto it = container.find(parentPath(zk_request->getPath())); - if (it == container.end()) - return true; + return processWatchesImpl(zk_request->getPath(), watches, list_watches, Coordination::Event::CREATED); + } - const auto & node_acls = storage.acl_map.convertNumber(it->value.acl_id); - if (node_acls.empty()) - return true; + bool checkAuth(KeeperStorage & storage, int64_t session_id, bool is_local) const override + { + auto path = zk_request->getPath(); + return storage.checkACL(parentPath(path), Coordination::ACL::Create, session_id, is_local); + } - const auto & session_auths = storage.session_and_auth[session_id]; - return checkACL(Coordination::ACL::Delete, node_acls, session_auths); + std::vector preprocess(KeeperStorage & storage, int64_t zxid, int64_t session_id, int64_t time) const override + { + Coordination::ZooKeeperCreateRequest & request = dynamic_cast(*zk_request); + + std::vector new_deltas; + + auto parent_path = parentPath(request.path); + auto parent_node = storage.uncommitted_state.getNode(parent_path); + if (parent_node == nullptr) + return {{zxid, Coordination::Error::ZNONODE}}; + + else if (parent_node->stat.ephemeralOwner != 0) + return {{zxid, Coordination::Error::ZNOCHILDRENFOREPHEMERALS}}; + + std::string path_created = request.path; + if (request.is_sequential) + { + auto seq_num = parent_node->seq_num; + + std::stringstream seq_num_str; // STYLE_CHECK_ALLOW_STD_STRING_STREAM + seq_num_str.exceptions(std::ios::failbit); + seq_num_str << std::setw(10) << std::setfill('0') << seq_num; + + path_created += seq_num_str.str(); + } + + if (storage.uncommitted_state.hasNode(path_created)) + return {{zxid, Coordination::Error::ZNODEEXISTS}}; + + if (getBaseName(path_created).size == 0) + return {{zxid, Coordination::Error::ZBADARGUMENTS}}; + + Coordination::ACLs node_acls; + if (!fixupACL(request.acls, storage.session_and_auth[session_id], node_acls)) + return {{zxid, Coordination::Error::ZINVALIDACL}}; + + Coordination::Stat stat; + stat.czxid = zxid; + stat.mzxid = zxid; + stat.pzxid = zxid; + stat.ctime = time; + stat.mtime = time; + stat.numChildren = 0; + stat.version = 0; + stat.aversion = 0; + stat.cversion = 0; + stat.dataLength = request.data.length(); + stat.ephemeralOwner = request.is_ephemeral ? session_id : 0; + + new_deltas.emplace_back( + std::move(path_created), + zxid, + KeeperStorage::CreateNodeDelta{stat, request.is_ephemeral, request.is_sequential, std::move(node_acls), request.data}); + + int32_t parent_cversion = request.parent_cversion; + + new_deltas.emplace_back( + std::string{parent_path}, + zxid, + KeeperStorage::UpdateNodeDelta{[parent_cversion, zxid](KeeperStorage::Node & node) + { + ++node.seq_num; + if (parent_cversion == -1) + ++node.stat.cversion; + else if (parent_cversion > node.stat.cversion) + node.stat.cversion = parent_cversion; + + if (zxid > node.stat.pzxid) + node.stat.pzxid = zxid; + ++node.stat.numChildren; + }}); + return new_deltas; + } + + Coordination::ZooKeeperResponsePtr process(KeeperStorage & storage, int64_t zxid, int64_t session_id, int64_t /*time*/) const override + { + Coordination::ZooKeeperResponsePtr response_ptr = zk_request->makeResponse(); + Coordination::ZooKeeperCreateResponse & response = dynamic_cast(*response_ptr); + + if (const auto result = storage.commit(zxid, session_id); result != Coordination::Error::ZOK) + { + response.error = result; + return response_ptr; + } + + const auto & deltas = storage.uncommitted_state.deltas; + auto create_delta_it = std::find_if( + deltas.begin(), + deltas.end(), + [zxid](const auto & delta) + { return delta.zxid == zxid && std::holds_alternative(delta.operation); }); + + assert(create_delta_it != deltas.end()); + + response.path_created = create_delta_it->path; + response.error = Coordination::Error::ZOK; + return response_ptr; + } +}; + +struct KeeperStorageGetRequestProcessor final : public KeeperStorageRequestProcessor +{ + bool checkAuth(KeeperStorage & storage, int64_t session_id, bool is_local) const override + { + return storage.checkACL(zk_request->getPath(), Coordination::ACL::Read, session_id, is_local); } using KeeperStorageRequestProcessor::KeeperStorageRequestProcessor; - std::pair process(KeeperStorage & storage, int64_t zxid, int64_t /*session_id*/, int64_t /* time */) const override + + std::vector + preprocess(KeeperStorage & storage, int64_t zxid, int64_t /*session_id*/, int64_t /*time*/) const override { - auto & container = storage.container; - auto & ephemerals = storage.ephemerals; + Coordination::ZooKeeperGetRequest & request = dynamic_cast(*zk_request); + if (!storage.uncommitted_state.hasNode(request.path)) + return {{zxid, Coordination::Error::ZNONODE}}; + + return {}; + } + + template + Coordination::ZooKeeperResponsePtr processImpl(KeeperStorage & storage, int64_t zxid, int64_t session_id, int64_t /* time */) const + { Coordination::ZooKeeperResponsePtr response_ptr = zk_request->makeResponse(); - Coordination::ZooKeeperRemoveResponse & response = dynamic_cast(*response_ptr); - Coordination::ZooKeeperRemoveRequest & request = dynamic_cast(*zk_request); - Undo undo; + Coordination::ZooKeeperGetResponse & response = dynamic_cast(*response_ptr); + Coordination::ZooKeeperGetRequest & request = dynamic_cast(*zk_request); - auto it = container.find(request.path); - if (it == container.end()) + if constexpr (!local) { - if (request.restored_from_zookeeper_log) - updateParentPzxid(request.path, zxid, container); - response.error = Coordination::Error::ZNONODE; + if (const auto result = storage.commit(zxid, session_id); result != Coordination::Error::ZOK) + { + response.error = result; + return response_ptr; + } } - else if (request.version != -1 && request.version != it->value.stat.version) + + auto & container = storage.container; + auto node_it = container.find(request.path); + if (node_it == container.end()) { - response.error = Coordination::Error::ZBADVERSION; - } - else if (it->value.stat.numChildren) - { - response.error = Coordination::Error::ZNOTEMPTY; + if constexpr (local) + response.error = Coordination::Error::ZNONODE; + else + onStorageInconsistency(); } else { - if (request.restored_from_zookeeper_log) - updateParentPzxid(request.path, zxid, container); - - auto prev_node = it->value; - if (prev_node.stat.ephemeralOwner != 0) - { - auto ephemerals_it = ephemerals.find(prev_node.stat.ephemeralOwner); - ephemerals_it->second.erase(request.path); - if (ephemerals_it->second.empty()) - ephemerals.erase(ephemerals_it); - } - - storage.acl_map.removeUsage(prev_node.acl_id); - - container.updateValue(parentPath(request.path), [child_basename = getBaseName(it->key)] (KeeperStorage::Node & parent) - { - --parent.stat.numChildren; - ++parent.stat.cversion; - parent.removeChild(child_basename); - }); - + response.stat = node_it->value.stat; + response.data = node_it->value.getData(); response.error = Coordination::Error::ZOK; - /// Erase full path from container after child removed from parent - container.erase(request.path); - - undo = [prev_node, &storage, path = request.path] - { - if (prev_node.stat.ephemeralOwner != 0) - storage.ephemerals[prev_node.stat.ephemeralOwner].emplace(path); - - storage.acl_map.addUsage(prev_node.acl_id); - - /// Dangerous place: we are adding StringRef to child into children unordered_hash set. - /// That's why we are taking getBaseName from inserted key, not from the path from request object. - auto [map_key, _] = storage.container.insert(path, prev_node); - storage.container.updateValue(parentPath(path), [child_name = getBaseName(map_key->getKey())] (KeeperStorage::Node & parent) - { - ++parent.stat.numChildren; - --parent.stat.cversion; - parent.addChild(child_name); - }); - }; } - return { response_ptr, undo }; + return response_ptr; } - KeeperStorage::ResponsesForSessions processWatches(KeeperStorage::Watches & watches, KeeperStorage::Watches & list_watches) const override + + Coordination::ZooKeeperResponsePtr process(KeeperStorage & storage, int64_t zxid, int64_t session_id, int64_t time) const override + { + return processImpl(storage, zxid, session_id, time); + } + + Coordination::ZooKeeperResponsePtr processLocal(KeeperStorage & storage, int64_t zxid, int64_t session_id, int64_t time) const override + { + return processImpl(storage, zxid, session_id, time); + } +}; + +struct KeeperStorageRemoveRequestProcessor final : public KeeperStorageRequestProcessor +{ + bool checkAuth(KeeperStorage & storage, int64_t session_id, bool is_local) const override + { + return storage.checkACL(parentPath(zk_request->getPath()), Coordination::ACL::Delete, session_id, is_local); + } + + using KeeperStorageRequestProcessor::KeeperStorageRequestProcessor; + std::vector + preprocess(KeeperStorage & storage, int64_t zxid, int64_t /*session_id*/, int64_t /*time*/) const override + { + Coordination::ZooKeeperRemoveRequest & request = dynamic_cast(*zk_request); + + std::vector new_deltas; + + const auto update_parent_pzxid = [&]() + { + auto parent_path = parentPath(request.path); + if (!storage.uncommitted_state.hasNode(parent_path)) + return; + + new_deltas.emplace_back( + std::string{parent_path}, + zxid, + KeeperStorage::UpdateNodeDelta{[zxid](KeeperStorage::Node & parent) + { + if (parent.stat.pzxid < zxid) + parent.stat.pzxid = zxid; + }}); + }; + + auto node = storage.uncommitted_state.getNode(request.path); + + if (!node) + { + if (request.restored_from_zookeeper_log) + update_parent_pzxid(); + return {{zxid, Coordination::Error::ZNONODE}}; + } + else if (request.version != -1 && request.version != node->stat.version) + return {{zxid, Coordination::Error::ZBADVERSION}}; + else if (node->stat.numChildren) + return {{zxid, Coordination::Error::ZNOTEMPTY}}; + + if (request.restored_from_zookeeper_log) + update_parent_pzxid(); + + new_deltas.emplace_back( + std::string{parentPath(request.path)}, + zxid, + KeeperStorage::UpdateNodeDelta{[](KeeperStorage::Node & parent) + { + --parent.stat.numChildren; + ++parent.stat.cversion; + }}); + + new_deltas.emplace_back(request.path, zxid, KeeperStorage::RemoveNodeDelta{request.version}); + + return new_deltas; + } + + Coordination::ZooKeeperResponsePtr process(KeeperStorage & storage, int64_t zxid, int64_t session_id, int64_t /* time */) const override + { + Coordination::ZooKeeperResponsePtr response_ptr = zk_request->makeResponse(); + Coordination::ZooKeeperRemoveResponse & response = dynamic_cast(*response_ptr); + + response.error = storage.commit(zxid, session_id); + return response_ptr; + } + + KeeperStorage::ResponsesForSessions + processWatches(KeeperStorage::Watches & watches, KeeperStorage::Watches & list_watches) const override { return processWatchesImpl(zk_request->getPath(), watches, list_watches, Coordination::Event::DELETED); } @@ -556,101 +829,140 @@ struct KeeperStorageRemoveRequestProcessor final : public KeeperStorageRequestPr struct KeeperStorageExistsRequestProcessor final : public KeeperStorageRequestProcessor { using KeeperStorageRequestProcessor::KeeperStorageRequestProcessor; - std::pair process(KeeperStorage & storage, int64_t /*zxid*/, int64_t /* session_id */, int64_t /* time */) const override - { - auto & container = storage.container; + std::vector + preprocess(KeeperStorage & storage, int64_t zxid, int64_t /*session_id*/, int64_t /*time*/) const override + { + Coordination::ZooKeeperExistsRequest & request = dynamic_cast(*zk_request); + + if (!storage.uncommitted_state.hasNode(request.path)) + return {{zxid, Coordination::Error::ZNONODE}}; + + return {}; + } + + template + Coordination::ZooKeeperResponsePtr processImpl(KeeperStorage & storage, int64_t zxid, int64_t session_id, int64_t /* time */) const + { Coordination::ZooKeeperResponsePtr response_ptr = zk_request->makeResponse(); Coordination::ZooKeeperExistsResponse & response = dynamic_cast(*response_ptr); Coordination::ZooKeeperExistsRequest & request = dynamic_cast(*zk_request); - auto it = container.find(request.path); - if (it != container.end()) + if constexpr (!local) { - response.stat = it->value.stat; - response.error = Coordination::Error::ZOK; + if (const auto result = storage.commit(zxid, session_id); result != Coordination::Error::ZOK) + { + response.error = result; + return response_ptr; + } + } + + auto & container = storage.container; + auto node_it = container.find(request.path); + if (node_it == container.end()) + { + if constexpr (local) + response.error = Coordination::Error::ZNONODE; + else + onStorageInconsistency(); } else { - response.error = Coordination::Error::ZNONODE; + response.stat = node_it->value.stat; + response.error = Coordination::Error::ZOK; } - return { response_ptr, {} }; + return response_ptr; + } + + Coordination::ZooKeeperResponsePtr process(KeeperStorage & storage, int64_t zxid, int64_t session_id, int64_t time) const override + { + return processImpl(storage, zxid, session_id, time); + } + + Coordination::ZooKeeperResponsePtr processLocal(KeeperStorage & storage, int64_t zxid, int64_t session_id, int64_t time) const override + { + return processImpl(storage, zxid, session_id, time); } }; struct KeeperStorageSetRequestProcessor final : public KeeperStorageRequestProcessor { - bool checkAuth(KeeperStorage & storage, int64_t session_id) const override + bool checkAuth(KeeperStorage & storage, int64_t session_id, bool is_local) const override { - auto & container = storage.container; - auto it = container.find(zk_request->getPath()); - if (it == container.end()) - return true; - - const auto & node_acls = storage.acl_map.convertNumber(it->value.acl_id); - if (node_acls.empty()) - return true; - - const auto & session_auths = storage.session_and_auth[session_id]; - return checkACL(Coordination::ACL::Write, node_acls, session_auths); + return storage.checkACL(zk_request->getPath(), Coordination::ACL::Write, session_id, is_local); } using KeeperStorageRequestProcessor::KeeperStorageRequestProcessor; - std::pair process(KeeperStorage & storage, int64_t zxid, int64_t /* session_id */, int64_t time) const override + std::vector preprocess(KeeperStorage & storage, int64_t zxid, int64_t /*session_id*/, int64_t time) const override + { + Coordination::ZooKeeperSetRequest & request = dynamic_cast(*zk_request); + + std::vector new_deltas; + + if (!storage.uncommitted_state.hasNode(request.path)) + return {{zxid, Coordination::Error::ZNONODE}}; + + auto node = storage.uncommitted_state.getNode(request.path); + + if (request.version != -1 && request.version != node->stat.version) + return {{zxid, Coordination::Error::ZBADVERSION}}; + + new_deltas.emplace_back( + request.path, + zxid, + KeeperStorage::UpdateNodeDelta{ + [zxid, data = request.data, time](KeeperStorage::Node & value) + { + value.stat.version++; + value.stat.mzxid = zxid; + value.stat.mtime = time; + value.stat.dataLength = data.length(); + value.setData(data); + }, + request.version}); + + new_deltas.emplace_back( + parentPath(request.path).toString(), + zxid, + KeeperStorage::UpdateNodeDelta + { + [](KeeperStorage::Node & parent) + { + parent.stat.cversion++; + } + } + ); + + return new_deltas; + } + + Coordination::ZooKeeperResponsePtr process(KeeperStorage & storage, int64_t zxid, int64_t session_id, int64_t /*time*/) const override { auto & container = storage.container; Coordination::ZooKeeperResponsePtr response_ptr = zk_request->makeResponse(); Coordination::ZooKeeperSetResponse & response = dynamic_cast(*response_ptr); Coordination::ZooKeeperSetRequest & request = dynamic_cast(*zk_request); - Undo undo; - auto it = container.find(request.path); - if (it == container.end()) + if (const auto result = storage.commit(zxid, session_id); result != Coordination::Error::ZOK) { - response.error = Coordination::Error::ZNONODE; - } - else if (request.version == -1 || request.version == it->value.stat.version) - { - - auto prev_node = it->value; - - auto itr = container.updateValue(request.path, [zxid, request, time] (KeeperStorage::Node & value) mutable - { - value.stat.version++; - value.stat.mzxid = zxid; - value.stat.mtime = time; - value.stat.dataLength = request.data.length(); - value.setData(std::move(request.data)); - }); - - container.updateValue(parentPath(request.path), [] (KeeperStorage::Node & parent) - { - parent.stat.cversion++; - }); - - response.stat = itr->value.stat; - response.error = Coordination::Error::ZOK; - - undo = [prev_node, &container, path = request.path] - { - container.updateValue(path, [&prev_node] (KeeperStorage::Node & value) { value = prev_node; }); - container.updateValue(parentPath(path), [] (KeeperStorage::Node & parent) - { - parent.stat.cversion--; - }); - }; - } - else - { - response.error = Coordination::Error::ZBADVERSION; + response.error = result; + return response_ptr; } - return { response_ptr, undo }; + auto node_it = container.find(request.path); + if (node_it == container.end()) + onStorageInconsistency(); + + response.stat = node_it->value.stat; + response.error = Coordination::Error::ZOK; + + return response_ptr; } - KeeperStorage::ResponsesForSessions processWatches(KeeperStorage::Watches & watches, KeeperStorage::Watches & list_watches) const override + KeeperStorage::ResponsesForSessions + processWatches(KeeperStorage::Watches & watches, KeeperStorage::Watches & list_watches) const override { return processWatchesImpl(zk_request->getPath(), watches, list_watches, Coordination::Event::CHANGED); } @@ -658,33 +970,48 @@ struct KeeperStorageSetRequestProcessor final : public KeeperStorageRequestProce struct KeeperStorageListRequestProcessor final : public KeeperStorageRequestProcessor { - bool checkAuth(KeeperStorage & storage, int64_t session_id) const override + bool checkAuth(KeeperStorage & storage, int64_t session_id, bool is_local) const override { - auto & container = storage.container; - auto it = container.find(zk_request->getPath()); - if (it == container.end()) - return true; - - const auto & node_acls = storage.acl_map.convertNumber(it->value.acl_id); - if (node_acls.empty()) - return true; - - const auto & session_auths = storage.session_and_auth[session_id]; - return checkACL(Coordination::ACL::Read, node_acls, session_auths); + return storage.checkACL(zk_request->getPath(), Coordination::ACL::Read, session_id, is_local); } using KeeperStorageRequestProcessor::KeeperStorageRequestProcessor; - std::pair process(KeeperStorage & storage, int64_t /*zxid*/, int64_t /*session_id*/, int64_t /* time */) const override + std::vector + preprocess(KeeperStorage & storage, int64_t zxid, int64_t /*session_id*/, int64_t /*time*/) const override + { + Coordination::ZooKeeperListRequest & request = dynamic_cast(*zk_request); + + if (!storage.uncommitted_state.hasNode(request.path)) + return {{zxid, Coordination::Error::ZNONODE}}; + + return {}; + } + + + template + Coordination::ZooKeeperResponsePtr processImpl(KeeperStorage & storage, int64_t zxid, int64_t session_id, int64_t /* time */) const { - auto & container = storage.container; Coordination::ZooKeeperResponsePtr response_ptr = zk_request->makeResponse(); Coordination::ZooKeeperListResponse & response = dynamic_cast(*response_ptr); Coordination::ZooKeeperListRequest & request = dynamic_cast(*zk_request); - auto it = container.find(request.path); - if (it == container.end()) + if constexpr (!local) { - response.error = Coordination::Error::ZNONODE; + if (const auto result = storage.commit(zxid, session_id); result != Coordination::Error::ZOK) + { + response.error = result; + return response_ptr; + } + } + + auto & container = storage.container; + auto node_it = container.find(request.path); + if (node_it == container.end()) + { + if constexpr (local) + response.error = Coordination::Error::ZNONODE; + else + onStorageInconsistency(); } else { @@ -692,174 +1019,247 @@ struct KeeperStorageListRequestProcessor final : public KeeperStorageRequestProc if (path_prefix.empty()) throw DB::Exception("Logical error: path cannot be empty", ErrorCodes::LOGICAL_ERROR); - const auto & children = it->value.getChildren(); + const auto & children = node_it->value.getChildren(); response.names.reserve(children.size()); for (const auto child : children) response.names.push_back(child.toString()); - response.stat = it->value.stat; + response.stat = node_it->value.stat; response.error = Coordination::Error::ZOK; } - return { response_ptr, {} }; + return response_ptr; + } + + Coordination::ZooKeeperResponsePtr process(KeeperStorage & storage, int64_t zxid, int64_t session_id, int64_t time) const override + { + return processImpl(storage, zxid, session_id, time); + } + + Coordination::ZooKeeperResponsePtr processLocal(KeeperStorage & storage, int64_t zxid, int64_t session_id, int64_t time) const override + { + return processImpl(storage, zxid, session_id, time); } }; struct KeeperStorageCheckRequestProcessor final : public KeeperStorageRequestProcessor { - bool checkAuth(KeeperStorage & storage, int64_t session_id) const override + bool checkAuth(KeeperStorage & storage, int64_t session_id, bool is_local) const override { - auto & container = storage.container; - auto it = container.find(zk_request->getPath()); - if (it == container.end()) - return true; - - const auto & node_acls = storage.acl_map.convertNumber(it->value.acl_id); - if (node_acls.empty()) - return true; - - const auto & session_auths = storage.session_and_auth[session_id]; - return checkACL(Coordination::ACL::Read, node_acls, session_auths); + return storage.checkACL(zk_request->getPath(), Coordination::ACL::Read, session_id, is_local); } using KeeperStorageRequestProcessor::KeeperStorageRequestProcessor; - std::pair process(KeeperStorage & storage, int64_t /*zxid*/, int64_t /*session_id*/, int64_t /* time */) const override + std::vector + preprocess(KeeperStorage & storage, int64_t zxid, int64_t /*session_id*/, int64_t /*time*/) const override { - auto & container = storage.container; + Coordination::ZooKeeperCheckRequest & request = dynamic_cast(*zk_request); + if (!storage.uncommitted_state.hasNode(request.path)) + return {{zxid, Coordination::Error::ZNONODE}}; + + auto node = storage.uncommitted_state.getNode(request.path); + if (request.version != -1 && request.version != node->stat.version) + return {{zxid, Coordination::Error::ZBADVERSION}}; + + return {}; + } + + template + Coordination::ZooKeeperResponsePtr processImpl(KeeperStorage & storage, int64_t zxid, int64_t session_id, int64_t /* time */) const + { Coordination::ZooKeeperResponsePtr response_ptr = zk_request->makeResponse(); Coordination::ZooKeeperCheckResponse & response = dynamic_cast(*response_ptr); Coordination::ZooKeeperCheckRequest & request = dynamic_cast(*zk_request); - auto it = container.find(request.path); - if (it == container.end()) + + if constexpr (!local) { - response.error = Coordination::Error::ZNONODE; + if (const auto result = storage.commit(zxid, session_id); result != Coordination::Error::ZOK) + { + response.error = result; + return response_ptr; + } } - else if (request.version != -1 && request.version != it->value.stat.version) + + const auto on_error = [&]([[maybe_unused]] const auto error_code) { - response.error = Coordination::Error::ZBADVERSION; + if constexpr (local) + response.error = error_code; + else + onStorageInconsistency(); + }; + + auto & container = storage.container; + auto node_it = container.find(request.path); + if (node_it == container.end()) + { + on_error(Coordination::Error::ZNONODE); + } + else if (request.version != -1 && request.version != node_it->value.stat.version) + { + on_error(Coordination::Error::ZBADVERSION); } else { response.error = Coordination::Error::ZOK; } - return { response_ptr, {} }; + return response_ptr; + } + + Coordination::ZooKeeperResponsePtr process(KeeperStorage & storage, int64_t zxid, int64_t session_id, int64_t time) const override + { + return processImpl(storage, zxid, session_id, time); + } + + Coordination::ZooKeeperResponsePtr processLocal(KeeperStorage & storage, int64_t zxid, int64_t session_id, int64_t time) const override + { + return processImpl(storage, zxid, session_id, time); } }; struct KeeperStorageSetACLRequestProcessor final : public KeeperStorageRequestProcessor { - bool checkAuth(KeeperStorage & storage, int64_t session_id) const override + bool checkAuth(KeeperStorage & storage, int64_t session_id, bool is_local) const override { - auto & container = storage.container; - auto it = container.find(zk_request->getPath()); - if (it == container.end()) - return true; - - const auto & node_acls = storage.acl_map.convertNumber(it->value.acl_id); - if (node_acls.empty()) - return true; - - const auto & session_auths = storage.session_and_auth[session_id]; - return checkACL(Coordination::ACL::Admin, node_acls, session_auths); + return storage.checkACL(zk_request->getPath(), Coordination::ACL::Admin, session_id, is_local); } using KeeperStorageRequestProcessor::KeeperStorageRequestProcessor; - std::pair process(KeeperStorage & storage, int64_t /*zxid*/, int64_t session_id, int64_t /* time */) const override + std::vector preprocess(KeeperStorage & storage, int64_t zxid, int64_t session_id, int64_t /*time*/) const override { - auto & container = storage.container; + Coordination::ZooKeeperSetACLRequest & request = dynamic_cast(*zk_request); + auto & uncommitted_state = storage.uncommitted_state; + if (!uncommitted_state.hasNode(request.path)) + return {{zxid, Coordination::Error::ZNONODE}}; + + auto node = uncommitted_state.getNode(request.path); + + if (request.version != -1 && request.version != node->stat.aversion) + return {{zxid, Coordination::Error::ZBADVERSION}}; + + + auto & session_auth_ids = storage.session_and_auth[session_id]; + Coordination::ACLs node_acls; + + if (!fixupACL(request.acls, session_auth_ids, node_acls)) + return {{zxid, Coordination::Error::ZINVALIDACL}}; + + return + { + { + request.path, + zxid, + KeeperStorage::SetACLDelta{std::move(node_acls), request.version} + }, + { + request.path, + zxid, + KeeperStorage::UpdateNodeDelta + { + [](KeeperStorage::Node & n) { ++n.stat.aversion; } + } + } + }; + } + + Coordination::ZooKeeperResponsePtr process(KeeperStorage & storage, int64_t zxid, int64_t session_id, int64_t /* time */) const override + { Coordination::ZooKeeperResponsePtr response_ptr = zk_request->makeResponse(); Coordination::ZooKeeperSetACLResponse & response = dynamic_cast(*response_ptr); Coordination::ZooKeeperSetACLRequest & request = dynamic_cast(*zk_request); - auto it = container.find(request.path); - if (it == container.end()) + + if (const auto result = storage.commit(zxid, session_id); result != Coordination::Error::ZOK) { - response.error = Coordination::Error::ZNONODE; - } - else if (request.version != -1 && request.version != it->value.stat.aversion) - { - response.error = Coordination::Error::ZBADVERSION; - } - else - { - auto & session_auth_ids = storage.session_and_auth[session_id]; - Coordination::ACLs node_acls; - - if (!fixupACL(request.acls, session_auth_ids, node_acls)) - { - response.error = Coordination::Error::ZINVALIDACL; - return {response_ptr, {}}; - } - - uint64_t acl_id = storage.acl_map.convertACLs(node_acls); - storage.acl_map.addUsage(acl_id); - - storage.container.updateValue(request.path, [acl_id] (KeeperStorage::Node & node) - { - node.acl_id = acl_id; - ++node.stat.aversion; - }); - - response.stat = it->value.stat; - response.error = Coordination::Error::ZOK; + response.error = result; + return response_ptr; } - /// It cannot be used insied multitransaction? - return { response_ptr, {} }; + auto node_it = storage.container.find(request.path); + if (node_it == storage.container.end()) + onStorageInconsistency(); + response.stat = node_it->value.stat; + response.error = Coordination::Error::ZOK; + + return response_ptr; } }; struct KeeperStorageGetACLRequestProcessor final : public KeeperStorageRequestProcessor { - bool checkAuth(KeeperStorage & storage, int64_t session_id) const override + bool checkAuth(KeeperStorage & storage, int64_t session_id, bool is_local) const override { - auto & container = storage.container; - auto it = container.find(zk_request->getPath()); - if (it == container.end()) - return true; - - const auto & node_acls = storage.acl_map.convertNumber(it->value.acl_id); - if (node_acls.empty()) - return true; - - const auto & session_auths = storage.session_and_auth[session_id]; - /// LOL, GetACL require more permissions, then SetACL... - return checkACL(Coordination::ACL::Admin | Coordination::ACL::Read, node_acls, session_auths); + return storage.checkACL(zk_request->getPath(), Coordination::ACL::Admin | Coordination::ACL::Read, session_id, is_local); } + using KeeperStorageRequestProcessor::KeeperStorageRequestProcessor; - std::pair process(KeeperStorage & storage, int64_t /*zxid*/, int64_t /*session_id*/, int64_t /* time */) const override + std::vector + preprocess(KeeperStorage & storage, int64_t zxid, int64_t /*session_id*/, int64_t /*time*/) const override + { + Coordination::ZooKeeperGetACLRequest & request = dynamic_cast(*zk_request); + + if (!storage.uncommitted_state.hasNode(request.path)) + return {{zxid, Coordination::Error::ZNONODE}}; + + return {}; + } + + template + Coordination::ZooKeeperResponsePtr processImpl(KeeperStorage & storage, int64_t zxid, int64_t session_id, int64_t /* time */) const { Coordination::ZooKeeperResponsePtr response_ptr = zk_request->makeResponse(); Coordination::ZooKeeperGetACLResponse & response = dynamic_cast(*response_ptr); Coordination::ZooKeeperGetACLRequest & request = dynamic_cast(*zk_request); - auto & container = storage.container; - auto it = container.find(request.path); - if (it == container.end()) + + if constexpr (!local) { - response.error = Coordination::Error::ZNONODE; + if (const auto result = storage.commit(zxid, session_id); result != Coordination::Error::ZOK) + { + response.error = result; + return response_ptr; + } + } + + auto & container = storage.container; + auto node_it = container.find(request.path); + if (node_it == container.end()) + { + if constexpr (local) + response.error = Coordination::Error::ZNONODE; + else + onStorageInconsistency(); } else { - response.stat = it->value.stat; - response.acl = storage.acl_map.convertNumber(it->value.acl_id); + response.stat = node_it->value.stat; + response.acl = storage.acl_map.convertNumber(node_it->value.acl_id); } - return {response_ptr, {}}; + return response_ptr; + } + + Coordination::ZooKeeperResponsePtr process(KeeperStorage & storage, int64_t zxid, int64_t session_id, int64_t time) const override + { + return processImpl(storage, zxid, session_id, time); + } + + Coordination::ZooKeeperResponsePtr processLocal(KeeperStorage & storage, int64_t zxid, int64_t session_id, int64_t time) const override + { + return processImpl(storage, zxid, session_id, time); } }; struct KeeperStorageMultiRequestProcessor final : public KeeperStorageRequestProcessor { - bool checkAuth(KeeperStorage & storage, int64_t session_id) const override + bool checkAuth(KeeperStorage & storage, int64_t session_id, bool is_local) const override { for (const auto & concrete_request : concrete_requests) - if (!concrete_request->checkAuth(storage, session_id)) + if (!concrete_request->checkAuth(storage, session_id, is_local)) return false; return true; } @@ -889,65 +1289,124 @@ struct KeeperStorageMultiRequestProcessor final : public KeeperStorageRequestPro concrete_requests.push_back(std::make_shared(sub_zk_request)); break; default: - throw DB::Exception(ErrorCodes::BAD_ARGUMENTS, "Illegal command as part of multi ZooKeeper request {}", sub_zk_request->getOpNum()); + throw DB::Exception( + ErrorCodes::BAD_ARGUMENTS, "Illegal command as part of multi ZooKeeper request {}", sub_zk_request->getOpNum()); } } } - std::pair process(KeeperStorage & storage, int64_t zxid, int64_t session_id, int64_t time) const override + std::vector preprocess(KeeperStorage & storage, int64_t zxid, int64_t session_id, int64_t time) const override + { + // manually add deltas so that the result of previous request in the transaction is used in the next request + auto & saved_deltas = storage.uncommitted_state.deltas; + + std::vector response_errors; + response_errors.reserve(concrete_requests.size()); + for (size_t i = 0; i < concrete_requests.size(); ++i) + { + auto new_deltas = concrete_requests[i]->preprocess(storage, zxid, session_id, time); + + if (!new_deltas.empty()) + { + if (auto * error = std::get_if(&new_deltas.back().operation)) + { + std::erase_if(saved_deltas, [zxid](const auto & delta) { return delta.zxid == zxid; }); + + response_errors.push_back(error->error); + + for (size_t j = i + 1; j < concrete_requests.size(); ++j) + { + response_errors.push_back(Coordination::Error::ZRUNTIMEINCONSISTENCY); + } + + return {{zxid, KeeperStorage::FailedMultiDelta{std::move(response_errors)}}}; + } + } + new_deltas.emplace_back(zxid, KeeperStorage::SubDeltaEnd{}); + response_errors.push_back(Coordination::Error::ZOK); + + saved_deltas.insert(saved_deltas.end(), std::make_move_iterator(new_deltas.begin()), std::make_move_iterator(new_deltas.end())); + } + + return {}; + } + + Coordination::ZooKeeperResponsePtr process(KeeperStorage & storage, int64_t zxid, int64_t session_id, int64_t time) const override { Coordination::ZooKeeperResponsePtr response_ptr = zk_request->makeResponse(); Coordination::ZooKeeperMultiResponse & response = dynamic_cast(*response_ptr); - std::vector undo_actions; - try + auto & deltas = storage.uncommitted_state.deltas; + // the deltas will have at least SubDeltaEnd or FailedMultiDelta + assert(!deltas.empty()); + if (auto * failed_multi = std::get_if(&deltas.front().operation)) { - size_t i = 0; - for (const auto & concrete_request : concrete_requests) + for (size_t i = 0; i < concrete_requests.size(); ++i) { - auto [ cur_response, undo_action ] = concrete_request->process(storage, zxid, session_id, time); - - response.responses[i] = cur_response; - if (cur_response->error != Coordination::Error::ZOK) - { - for (size_t j = 0; j <= i; ++j) - { - auto response_error = response.responses[j]->error; - response.responses[j] = std::make_shared(); - response.responses[j]->error = response_error; - } - - for (size_t j = i + 1; j < response.responses.size(); ++j) - { - response.responses[j] = std::make_shared(); - response.responses[j]->error = Coordination::Error::ZRUNTIMEINCONSISTENCY; - } - - for (auto it = undo_actions.rbegin(); it != undo_actions.rend(); ++it) - if (*it) - (*it)(); - - return { response_ptr, {} }; - } - else - undo_actions.emplace_back(std::move(undo_action)); - - ++i; + response.responses[i] = std::make_shared(); + response.responses[i]->error = failed_multi->error_codes[i]; } - response.error = Coordination::Error::ZOK; - return { response_ptr, {} }; + return response_ptr; } - catch (...) + + for (size_t i = 0; i < concrete_requests.size(); ++i) { - for (auto it = undo_actions.rbegin(); it != undo_actions.rend(); ++it) - if (*it) - (*it)(); - throw; + auto cur_response = concrete_requests[i]->process(storage, zxid, session_id, time); + + while (!deltas.empty()) + { + if (std::holds_alternative(deltas.front().operation)) + { + deltas.pop_front(); + break; + } + + deltas.pop_front(); + } + + response.responses[i] = cur_response; } + + response.error = Coordination::Error::ZOK; + return response_ptr; } - KeeperStorage::ResponsesForSessions processWatches(KeeperStorage::Watches & watches, KeeperStorage::Watches & list_watches) const override + Coordination::ZooKeeperResponsePtr processLocal(KeeperStorage & storage, int64_t zxid, int64_t session_id, int64_t time) const override + { + Coordination::ZooKeeperResponsePtr response_ptr = zk_request->makeResponse(); + Coordination::ZooKeeperMultiResponse & response = dynamic_cast(*response_ptr); + + for (size_t i = 0; i < concrete_requests.size(); ++i) + { + auto cur_response = concrete_requests[i]->process(storage, zxid, session_id, time); + + response.responses[i] = cur_response; + if (cur_response->error != Coordination::Error::ZOK) + { + for (size_t j = 0; j <= i; ++j) + { + auto response_error = response.responses[j]->error; + response.responses[j] = std::make_shared(); + response.responses[j]->error = response_error; + } + + for (size_t j = i + 1; j < response.responses.size(); ++j) + { + response.responses[j] = std::make_shared(); + response.responses[j]->error = Coordination::Error::ZRUNTIMEINCONSISTENCY; + } + + return response_ptr; + } + } + + response.error = Coordination::Error::ZOK; + return response_ptr; + } + + KeeperStorage::ResponsesForSessions + processWatches(KeeperStorage::Watches & watches, KeeperStorage::Watches & list_watches) const override { KeeperStorage::ResponsesForSessions result; for (const auto & generic_request : concrete_requests) @@ -962,7 +1421,7 @@ struct KeeperStorageMultiRequestProcessor final : public KeeperStorageRequestPro struct KeeperStorageCloseRequestProcessor final : public KeeperStorageRequestProcessor { using KeeperStorageRequestProcessor::KeeperStorageRequestProcessor; - std::pair process(KeeperStorage &, int64_t, int64_t, int64_t /* time */) const override + Coordination::ZooKeeperResponsePtr process(KeeperStorage &, int64_t, int64_t, int64_t /* time */) const override { throw DB::Exception("Called process on close request", ErrorCodes::LOGICAL_ERROR); } @@ -971,36 +1430,40 @@ struct KeeperStorageCloseRequestProcessor final : public KeeperStorageRequestPro struct KeeperStorageAuthRequestProcessor final : public KeeperStorageRequestProcessor { using KeeperStorageRequestProcessor::KeeperStorageRequestProcessor; - std::pair process(KeeperStorage & storage, int64_t /*zxid*/, int64_t session_id, int64_t /* time */) const override + std::vector preprocess(KeeperStorage & storage, int64_t zxid, int64_t session_id, int64_t /*time*/) const override { Coordination::ZooKeeperAuthRequest & auth_request = dynamic_cast(*zk_request); Coordination::ZooKeeperResponsePtr response_ptr = zk_request->makeResponse(); - Coordination::ZooKeeperAuthResponse & auth_response = dynamic_cast(*response_ptr); - auto & sessions_and_auth = storage.session_and_auth; if (auth_request.scheme != "digest" || std::count(auth_request.data.begin(), auth_request.data.end(), ':') != 1) + return {{zxid, Coordination::Error::ZAUTHFAILED}}; + + std::vector new_deltas; + auto digest = generateDigest(auth_request.data); + if (digest == storage.superdigest) { - auth_response.error = Coordination::Error::ZAUTHFAILED; + KeeperStorage::AuthID auth{"super", ""}; + new_deltas.emplace_back(zxid, KeeperStorage::AddAuthDelta{session_id, std::move(auth)}); } else { - auto digest = generateDigest(auth_request.data); - if (digest == storage.superdigest) - { - KeeperStorage::AuthID auth{"super", ""}; - sessions_and_auth[session_id].emplace_back(auth); - } - else - { - KeeperStorage::AuthID auth{auth_request.scheme, digest}; - auto & session_ids = sessions_and_auth[session_id]; - if (std::find(session_ids.begin(), session_ids.end(), auth) == session_ids.end()) - sessions_and_auth[session_id].emplace_back(auth); - } - + KeeperStorage::AuthID new_auth{auth_request.scheme, digest}; + if (!storage.uncommitted_state.hasACL(session_id, false, [&](const auto & auth_id) { return new_auth == auth_id; })) + new_deltas.emplace_back(zxid, KeeperStorage::AddAuthDelta{session_id, std::move(new_auth)}); } - return { response_ptr, {} }; + return new_deltas; + } + + Coordination::ZooKeeperResponsePtr process(KeeperStorage & storage, int64_t zxid, int64_t session_id, int64_t /* time */) const override + { + Coordination::ZooKeeperResponsePtr response_ptr = zk_request->makeResponse(); + Coordination::ZooKeeperAuthResponse & auth_response = dynamic_cast(*response_ptr); + + if (const auto result = storage.commit(zxid, session_id); result != Coordination::Error::ZOK) + auth_response.error = result; + + return response_ptr; } }; @@ -1026,7 +1489,6 @@ void KeeperStorage::finalize() class KeeperStorageRequestProcessorsFactory final : private boost::noncopyable { - public: using Creator = std::function; using OpNumToRequest = std::unordered_map; @@ -1039,11 +1501,11 @@ public: KeeperStorageRequestProcessorPtr get(const Coordination::ZooKeeperRequestPtr & zk_request) const { - auto it = op_num_to_request.find(zk_request->getOpNum()); - if (it == op_num_to_request.end()) + auto request_it = op_num_to_request.find(zk_request->getOpNum()); + if (request_it == op_num_to_request.end()) throw DB::Exception("Unknown operation type " + toString(zk_request->getOpNum()), ErrorCodes::LOGICAL_ERROR); - return it->second(zk_request); + return request_it->second(zk_request); } void registerRequest(Coordination::OpNum op_num, Creator creator) @@ -1057,10 +1519,11 @@ private: KeeperStorageRequestProcessorsFactory(); }; -template +template void registerKeeperRequestProcessor(KeeperStorageRequestProcessorsFactory & factory) { - factory.registerRequest(num, [] (const Coordination::ZooKeeperRequestPtr & zk_request) { return std::make_shared(zk_request); }); + factory.registerRequest( + num, [](const Coordination::ZooKeeperRequestPtr & zk_request) { return std::make_shared(zk_request); }); } @@ -1084,13 +1547,66 @@ KeeperStorageRequestProcessorsFactory::KeeperStorageRequestProcessorsFactory() } -KeeperStorage::ResponsesForSessions KeeperStorage::processRequest(const Coordination::ZooKeeperRequestPtr & zk_request, int64_t session_id, int64_t time, std::optional new_last_zxid, bool check_acl) +void KeeperStorage::preprocessRequest( + const Coordination::ZooKeeperRequestPtr & zk_request, int64_t session_id, int64_t time, int64_t new_last_zxid, bool check_acl) +{ + KeeperStorageRequestProcessorPtr request_processor = KeeperStorageRequestProcessorsFactory::instance().get(zk_request); + + if (zk_request->getOpNum() == Coordination::OpNum::Close) /// Close request is special + { + auto & deltas = uncommitted_state.deltas; + auto session_ephemerals = ephemerals.find(session_id); + if (session_ephemerals != ephemerals.end()) + { + for (const auto & ephemeral_path : session_ephemerals->second) + { + // For now just add deltas for removing the node + // On commit, ephemerals nodes will be deleted from storage + // and removed from the session + if (uncommitted_state.hasNode(ephemeral_path)) + { + deltas.emplace_back( + parentPath(ephemeral_path).toString(), + new_last_zxid, + UpdateNodeDelta{[ephemeral_path](Node & parent) + { + --parent.stat.numChildren; + ++parent.stat.cversion; + }}); + + deltas.emplace_back(ephemeral_path, new_last_zxid, RemoveNodeDelta()); + } + } + } + + return; + } + + if (check_acl && !request_processor->checkAuth(*this, session_id, false)) + { + uncommitted_state.deltas.emplace_back(new_last_zxid, Coordination::Error::ZNOAUTH); + return; + } + + auto new_deltas = request_processor->preprocess(*this, new_last_zxid, session_id, time); + uncommitted_state.deltas.insert( + uncommitted_state.deltas.end(), std::make_move_iterator(new_deltas.begin()), std::make_move_iterator(new_deltas.end())); +} + +KeeperStorage::ResponsesForSessions KeeperStorage::processRequest( + const Coordination::ZooKeeperRequestPtr & zk_request, + int64_t session_id, + int64_t time, + std::optional new_last_zxid, + bool check_acl, + bool is_local) { KeeperStorage::ResponsesForSessions results; if (new_last_zxid) { if (zxid >= *new_last_zxid) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Got new ZXID {} smaller or equal than current {}. It's a bug", *new_last_zxid, zxid); + throw Exception( + ErrorCodes::LOGICAL_ERROR, "Got new ZXID {} smaller or equal than current {}. It's a bug", *new_last_zxid, zxid); zxid = *new_last_zxid; } @@ -1099,26 +1615,22 @@ KeeperStorage::ResponsesForSessions KeeperStorage::processRequest(const Coordina if (zk_request->getOpNum() == Coordination::OpNum::Close) /// Close request is special { - auto it = ephemerals.find(session_id); - if (it != ephemerals.end()) + commit(zxid, session_id); + + for (const auto & delta : uncommitted_state.deltas) { - for (const auto & ephemeral_path : it->second) + if (delta.zxid > zxid) + break; + + if (std::holds_alternative(delta.operation)) { - container.updateValue(parentPath(ephemeral_path), [&ephemeral_path] (KeeperStorage::Node & parent) - { - --parent.stat.numChildren; - ++parent.stat.cversion; - auto base_name = getBaseName(ephemeral_path); - parent.removeChild(base_name); - }); - - container.erase(ephemeral_path); - - auto responses = processWatchesImpl(ephemeral_path, watches, list_watches, Coordination::Event::DELETED); + auto responses = processWatchesImpl(delta.path, watches, list_watches, Coordination::Event::DELETED); results.insert(results.end(), responses.begin(), responses.end()); } - ephemerals.erase(it); } + + std::erase_if(uncommitted_state.deltas, [this](const auto & delta) { return delta.zxid == zxid; }); + clearDeadWatches(session_id); auto auth_it = session_and_auth.find(session_id); if (auth_it != session_and_auth.end()) @@ -1135,7 +1647,7 @@ KeeperStorage::ResponsesForSessions KeeperStorage::processRequest(const Coordina else if (zk_request->getOpNum() == Coordination::OpNum::Heartbeat) /// Heartbeat request is also special { KeeperStorageRequestProcessorPtr storage_request = KeeperStorageRequestProcessorsFactory::instance().get(zk_request); - auto [response, _] = storage_request->process(*this, zxid, session_id, time); + auto response = storage_request->process(*this, zxid, session_id, time); response->xid = zk_request->xid; response->zxid = getZXID(); @@ -1146,15 +1658,24 @@ KeeperStorage::ResponsesForSessions KeeperStorage::processRequest(const Coordina KeeperStorageRequestProcessorPtr request_processor = KeeperStorageRequestProcessorsFactory::instance().get(zk_request); Coordination::ZooKeeperResponsePtr response; - if (check_acl && !request_processor->checkAuth(*this, session_id)) + if (is_local) { - response = zk_request->makeResponse(); - /// Original ZooKeeper always throws no auth, even when user provided some credentials - response->error = Coordination::Error::ZNOAUTH; + assert(zk_request->isReadRequest()); + if (check_acl && !request_processor->checkAuth(*this, session_id, true)) + { + response = zk_request->makeResponse(); + /// Original ZooKeeper always throws no auth, even when user provided some credentials + response->error = Coordination::Error::ZNOAUTH; + } + else + { + response = request_processor->processLocal(*this, zxid, session_id, time); + } } else { - std::tie(response, std::ignore) = request_processor->process(*this, zxid, session_id, time); + response = request_processor->process(*this, zxid, session_id, time); + std::erase_if(uncommitted_state.deltas, [this](const auto & delta) { return delta.zxid == zxid; }); } /// Watches for this requests are added to the watches lists @@ -1162,7 +1683,8 @@ KeeperStorage::ResponsesForSessions KeeperStorage::processRequest(const Coordina { if (response->error == Coordination::Error::ZOK) { - auto & watches_type = zk_request->getOpNum() == Coordination::OpNum::List || zk_request->getOpNum() == Coordination::OpNum::SimpleList + auto & watches_type + = zk_request->getOpNum() == Coordination::OpNum::List || zk_request->getOpNum() == Coordination::OpNum::SimpleList ? list_watches : watches; @@ -1192,6 +1714,16 @@ KeeperStorage::ResponsesForSessions KeeperStorage::processRequest(const Coordina return results; } +void KeeperStorage::rollbackRequest(int64_t rollback_zxid) +{ + // we can only rollback the last zxid (if there is any) + // if there is a delta with a larger zxid, we have invalid state + const auto last_zxid = uncommitted_state.deltas.back().zxid; + if (!uncommitted_state.deltas.empty() && last_zxid > rollback_zxid) + throw DB::Exception{DB::ErrorCodes::LOGICAL_ERROR, "Invalid state of deltas found while trying to rollback request. Last ZXID ({}) is larger than the requested ZXID ({})", last_zxid, rollback_zxid}; + + std::erase_if(uncommitted_state.deltas, [rollback_zxid](const auto & delta) { return delta.zxid == rollback_zxid; }); +} void KeeperStorage::clearDeadWatches(int64_t session_id) { diff --git a/src/Coordination/KeeperStorage.h b/src/Coordination/KeeperStorage.h index ccbddcf6e19..7d26ae24dd9 100644 --- a/src/Coordination/KeeperStorage.h +++ b/src/Coordination/KeeperStorage.h @@ -1,14 +1,14 @@ #pragma once -#include -#include -#include -#include -#include -#include -#include #include #include +#include +#include +#include +#include +#include +#include +#include #include @@ -29,7 +29,6 @@ struct KeeperStorageSnapshot; class KeeperStorage { public: - struct Node { uint64_t acl_id = 0; /// 0 -- no ACL by default @@ -41,26 +40,18 @@ public: Node() : size_bytes(sizeof(Node)) { } /// Object memory size - uint64_t sizeInBytes() const - { - return size_bytes; - } + uint64_t sizeInBytes() const { return size_bytes; } void setData(String new_data); - const auto & getData() const noexcept - { - return data; - } + const auto & getData() const noexcept { return data; } void addChild(StringRef child_path); void removeChild(StringRef child_path); - const auto & getChildren() const noexcept - { - return children; - } + const auto & getChildren() const noexcept { return children; } + private: String data; ChildrenSet children{}; @@ -85,10 +76,7 @@ public: std::string scheme; std::string id; - bool operator==(const AuthID & other) const - { - return scheme == other.scheme && id == other.id; - } + bool operator==(const AuthID & other) const { return scheme == other.scheme && id == other.id; } }; using RequestsForSessions = std::vector; @@ -112,6 +100,146 @@ public: /// container. Container container; + // Applying ZooKeeper request to storage consists of two steps: + // - preprocessing which, instead of applying the changes directly to storage, + // generates deltas with those changes, denoted with the request ZXID + // - processing which applies deltas with the correct ZXID to the storage + // + // Delta objects allow us two things: + // - fetch the latest, uncommitted state of an object by getting the committed + // state of that same object from the storage and applying the deltas + // in the same order as they are defined + // - quickly commit the changes to the storage + struct CreateNodeDelta + { + Coordination::Stat stat; + bool is_ephemeral; + bool is_sequental; + Coordination::ACLs acls; + String data; + }; + + struct RemoveNodeDelta + { + int32_t version{-1}; + }; + + struct UpdateNodeDelta + { + std::function update_fn; + int32_t version{-1}; + }; + + struct SetACLDelta + { + Coordination::ACLs acls; + int32_t version{-1}; + }; + + struct ErrorDelta + { + Coordination::Error error; + }; + + struct FailedMultiDelta + { + std::vector error_codes; + }; + + // Denotes end of a subrequest in multi request + struct SubDeltaEnd + { + }; + + struct AddAuthDelta + { + int64_t session_id; + AuthID auth_id; + }; + + using Operation + = std::variant; + + struct Delta + { + Delta(String path_, int64_t zxid_, Operation operation_) : path(std::move(path_)), zxid(zxid_), operation(std::move(operation_)) { } + + Delta(int64_t zxid_, Coordination::Error error) : Delta("", zxid_, ErrorDelta{error}) { } + + Delta(int64_t zxid_, Operation subdelta) : Delta("", zxid_, subdelta) { } + + String path; + int64_t zxid; + Operation operation; + }; + + struct UncommittedState + { + explicit UncommittedState(KeeperStorage & storage_) : storage(storage_) { } + + template + void applyDeltas(StringRef path, const Visitor & visitor) const + { + for (const auto & delta : deltas) + { + if (path.empty() || delta.path == path) + std::visit(visitor, delta.operation); + } + } + + bool hasACL(int64_t session_id, bool is_local, std::function predicate) + { + for (const auto & session_auth : storage.session_and_auth[session_id]) + { + if (predicate(session_auth)) + return true; + } + + if (is_local) + return false; + + + for (const auto & delta : deltas) + { + if (const auto * auth_delta = std::get_if(&delta.operation); + auth_delta && auth_delta->session_id == session_id && predicate(auth_delta->auth_id)) + return true; + } + + return false; + } + + std::shared_ptr getNode(StringRef path); + bool hasNode(StringRef path) const; + Coordination::ACLs getACLs(StringRef path) const; + + std::deque deltas; + KeeperStorage & storage; + }; + + UncommittedState uncommitted_state{*this}; + + Coordination::Error commit(int64_t zxid, int64_t session_id); + + // Create node in the storage + // Returns false if it failed to create the node, true otherwise + // We don't care about the exact failure because we should've caught it during preprocessing + bool createNode( + const std::string & path, + String data, + const Coordination::Stat & stat, + bool is_sequental, + bool is_ephemeral, + Coordination::ACLs node_acls, + int64_t session_id); + + // Remove node in the storage + // Returns false if it failed to remove the node, true otherwise + // We don't care about the exact failure because we should've caught it during preprocessing + bool removeNode(const std::string & path, int32_t version); + + bool checkACL(StringRef path, int32_t permissions, int64_t session_id, bool is_local); + /// Mapping session_id -> set of ephemeral nodes paths Ephemerals ephemerals; /// Mapping session_id -> set of watched nodes paths @@ -130,15 +258,12 @@ public: /// Currently active watches (node_path -> subscribed sessions) Watches watches; - Watches list_watches; /// Watches for 'list' request (watches on children). + Watches list_watches; /// Watches for 'list' request (watches on children). void clearDeadWatches(int64_t session_id); /// Get current zxid - int64_t getZXID() const - { - return zxid; - } + int64_t getZXID() const { return zxid; } const String superdigest; @@ -162,78 +287,53 @@ public: /// Process user request and return response. /// check_acl = false only when converting data from ZooKeeper. - ResponsesForSessions processRequest(const Coordination::ZooKeeperRequestPtr & request, int64_t session_id, int64_t time, std::optional new_last_zxid, bool check_acl = true); + ResponsesForSessions processRequest( + const Coordination::ZooKeeperRequestPtr & request, + int64_t session_id, + int64_t time, + std::optional new_last_zxid, + bool check_acl = true, + bool is_local = false); + void preprocessRequest( + const Coordination::ZooKeeperRequestPtr & request, int64_t session_id, int64_t time, int64_t new_last_zxid, bool check_acl = true); + void rollbackRequest(int64_t rollback_zxid); void finalize(); /// Set of methods for creating snapshots /// Turn on snapshot mode, so data inside Container is not deleted, but replaced with new version. - void enableSnapshotMode(size_t up_to_version) - { - container.enableSnapshotMode(up_to_version); - - } + void enableSnapshotMode(size_t up_to_version) { container.enableSnapshotMode(up_to_version); } /// Turn off snapshot mode. - void disableSnapshotMode() - { - container.disableSnapshotMode(); - } + void disableSnapshotMode() { container.disableSnapshotMode(); } - Container::const_iterator getSnapshotIteratorBegin() const - { - return container.begin(); - } + Container::const_iterator getSnapshotIteratorBegin() const { return container.begin(); } /// Clear outdated data from internal container. - void clearGarbageAfterSnapshot() - { - container.clearOutdatedNodes(); - } + void clearGarbageAfterSnapshot() { container.clearOutdatedNodes(); } /// Get all active sessions - const SessionAndTimeout & getActiveSessions() const - { - return session_and_timeout; - } + const SessionAndTimeout & getActiveSessions() const { return session_and_timeout; } /// Get all dead sessions - std::vector getDeadSessions() const - { - return session_expiry_queue.getExpiredSessions(); - } + std::vector getDeadSessions() const { return session_expiry_queue.getExpiredSessions(); } /// Introspection functions mostly used in 4-letter commands - uint64_t getNodesCount() const - { - return container.size(); - } + uint64_t getNodesCount() const { return container.size(); } - uint64_t getApproximateDataSize() const - { - return container.getApproximateDataSize(); - } + uint64_t getApproximateDataSize() const { return container.getApproximateDataSize(); } - uint64_t getArenaDataSize() const - { - return container.keyArenaSize(); - } + uint64_t getArenaDataSize() const { return container.keyArenaSize(); } uint64_t getTotalWatchesCount() const; - uint64_t getWatchedPathsCount() const - { - return watches.size() + list_watches.size(); - } + uint64_t getWatchedPathsCount() const { return watches.size() + list_watches.size(); } uint64_t getSessionsWithWatchesCount() const; - uint64_t getSessionWithEphemeralNodesCount() const - { - return ephemerals.size(); - } + uint64_t getSessionWithEphemeralNodesCount() const { return ephemerals.size(); } uint64_t getTotalEphemeralNodesCount() const; void dumpWatches(WriteBufferFromOwnString & buf) const; diff --git a/src/Coordination/WriteBufferFromNuraftBuffer.h b/src/Coordination/WriteBufferFromNuraftBuffer.h index d52049edcff..c9ca1e2a227 100644 --- a/src/Coordination/WriteBufferFromNuraftBuffer.h +++ b/src/Coordination/WriteBufferFromNuraftBuffer.h @@ -12,7 +12,6 @@ public: WriteBufferFromNuraftBuffer(); nuraft::ptr getBuffer(); - bool isFinished() const { return finalized; } ~WriteBufferFromNuraftBuffer() override; diff --git a/src/Coordination/ZooKeeperDataReader.cpp b/src/Coordination/ZooKeeperDataReader.cpp index e59c67329ff..4d1745edc6a 100644 --- a/src/Coordination/ZooKeeperDataReader.cpp +++ b/src/Coordination/ZooKeeperDataReader.cpp @@ -520,6 +520,7 @@ bool deserializeTxn(KeeperStorage & storage, ReadBuffer & in, Poco::Logger * /*l if (request->getOpNum() == Coordination::OpNum::Multi && hasErrorsInMultiRequest(request)) return true; + storage.preprocessRequest(request, session_id, time, zxid, /* check_acl = */ false); storage.processRequest(request, session_id, time, zxid, /* check_acl = */ false); } } diff --git a/src/Coordination/tests/gtest_coordination.cpp b/src/Coordination/tests/gtest_coordination.cpp index cf4d1eaf9f2..2742f48f49e 100644 --- a/src/Coordination/tests/gtest_coordination.cpp +++ b/src/Coordination/tests/gtest_coordination.cpp @@ -1,6 +1,8 @@ #include #include +#include "Common/ZooKeeper/IKeeper.h" +#include "Coordination/KeeperStorage.h" #include "config_core.h" #if USE_NURAFT @@ -1261,6 +1263,7 @@ void testLogAndStateMachine(Coordination::CoordinationSettingsPtr settings, uint changelog.append(entry); changelog.end_of_append_batch(0, 0); + state_machine->pre_commit(i, changelog.entry_at(i)->get_buf()); state_machine->commit(i, changelog.entry_at(i)->get_buf()); bool snapshot_created = false; if (i % settings->snapshot_distance == 0) @@ -1305,6 +1308,7 @@ void testLogAndStateMachine(Coordination::CoordinationSettingsPtr settings, uint for (size_t i = restore_machine->last_commit_index() + 1; i < restore_changelog.next_slot(); ++i) { + restore_machine->pre_commit(i, changelog.entry_at(i)->get_buf()); restore_machine->commit(i, changelog.entry_at(i)->get_buf()); } @@ -1407,6 +1411,7 @@ TEST_P(CoordinationTest, TestEphemeralNodeRemove) request_c->path = "/hello"; request_c->is_ephemeral = true; auto entry_c = getLogEntryFromZKRequest(0, 1, request_c); + state_machine->pre_commit(1, entry_c->get_buf()); state_machine->commit(1, entry_c->get_buf()); const auto & storage = state_machine->getStorage(); @@ -1415,6 +1420,7 @@ TEST_P(CoordinationTest, TestEphemeralNodeRemove) request_d->path = "/hello"; /// Delete from other session auto entry_d = getLogEntryFromZKRequest(0, 2, request_d); + state_machine->pre_commit(2, entry_d->get_buf()); state_machine->commit(2, entry_d->get_buf()); EXPECT_EQ(storage.ephemerals.size(), 0); @@ -1777,6 +1783,130 @@ TEST_P(CoordinationTest, TestLogGap) EXPECT_EQ(changelog1.next_slot(), 61); } +template +ResponseType getSingleResponse(const auto & responses) +{ + EXPECT_FALSE(responses.empty()); + return dynamic_cast(*responses[0].response); +} + +TEST_P(CoordinationTest, TestUncommittedStateBasicCrud) +{ + using namespace DB; + using namespace Coordination; + + DB::KeeperStorage storage{500, ""}; + + constexpr std::string_view path = "/test"; + + const auto get_committed_data = [&]() -> std::optional + { + auto request = std::make_shared(); + request->path = path; + auto responses = storage.processRequest(request, 0, 0, std::nullopt, true, true); + const auto & get_response = getSingleResponse(responses); + + if (get_response.error != Error::ZOK) + return std::nullopt; + + return get_response.data; + }; + + const auto preprocess_get = [&](int64_t zxid) + { + auto get_request = std::make_shared(); + get_request->path = path; + storage.preprocessRequest(get_request, 0, 0, zxid); + return get_request; + }; + + const auto create_request = std::make_shared(); + create_request->path = path; + create_request->data = "initial_data"; + storage.preprocessRequest(create_request, 0, 0, 1); + storage.preprocessRequest(create_request, 0, 0, 2); + + ASSERT_FALSE(get_committed_data()); + + const auto after_create_get = preprocess_get(3); + + ASSERT_FALSE(get_committed_data()); + + const auto set_request = std::make_shared(); + set_request->path = path; + set_request->data = "new_data"; + storage.preprocessRequest(set_request, 0, 0, 4); + + const auto after_set_get = preprocess_get(5); + + ASSERT_FALSE(get_committed_data()); + + const auto remove_request = std::make_shared(); + remove_request->path = path; + storage.preprocessRequest(remove_request, 0, 0, 6); + storage.preprocessRequest(remove_request, 0, 0, 7); + + const auto after_remove_get = preprocess_get(8); + + ASSERT_FALSE(get_committed_data()); + + { + const auto responses = storage.processRequest(create_request, 0, 0, 1); + const auto & create_response = getSingleResponse(responses); + ASSERT_EQ(create_response.error, Error::ZOK); + } + + { + const auto responses = storage.processRequest(create_request, 0, 0, 2); + const auto & create_response = getSingleResponse(responses); + ASSERT_EQ(create_response.error, Error::ZNODEEXISTS); + } + + { + const auto responses = storage.processRequest(after_create_get, 0, 0, 3); + const auto & get_response = getSingleResponse(responses); + ASSERT_EQ(get_response.error, Error::ZOK); + ASSERT_EQ(get_response.data, "initial_data"); + } + + ASSERT_EQ(get_committed_data(), "initial_data"); + + { + const auto responses = storage.processRequest(set_request, 0, 0, 4); + const auto & create_response = getSingleResponse(responses); + ASSERT_EQ(create_response.error, Error::ZOK); + } + + { + const auto responses = storage.processRequest(after_set_get, 0, 0, 5); + const auto & get_response = getSingleResponse(responses); + ASSERT_EQ(get_response.error, Error::ZOK); + ASSERT_EQ(get_response.data, "new_data"); + } + + ASSERT_EQ(get_committed_data(), "new_data"); + + { + const auto responses = storage.processRequest(remove_request, 0, 0, 6); + const auto & create_response = getSingleResponse(responses); + ASSERT_EQ(create_response.error, Error::ZOK); + } + + { + const auto responses = storage.processRequest(remove_request, 0, 0, 7); + const auto & create_response = getSingleResponse(responses); + ASSERT_EQ(create_response.error, Error::ZNONODE); + } + + { + const auto responses = storage.processRequest(after_remove_get, 0, 0, 8); + const auto & get_response = getSingleResponse(responses); + ASSERT_EQ(get_response.error, Error::ZNONODE); + } + + ASSERT_FALSE(get_committed_data()); +} + INSTANTIATE_TEST_SUITE_P(CoordinationTestSuite, CoordinationTest, diff --git a/src/Core/BackgroundSchedulePool.h b/src/Core/BackgroundSchedulePool.h index 35a471a367f..fbd7e3f749a 100644 --- a/src/Core/BackgroundSchedulePool.h +++ b/src/Core/BackgroundSchedulePool.h @@ -161,7 +161,7 @@ public: task_info->deactivate(); } - operator bool() const { return task_info != nullptr; } /// NOLINT + explicit operator bool() const { return task_info != nullptr; } BackgroundSchedulePoolTaskInfo * operator->() { return task_info.get(); } const BackgroundSchedulePoolTaskInfo * operator->() const { return task_info.get(); } diff --git a/src/Core/Block.cpp b/src/Core/Block.cpp index a21c96abfdb..34e6c08c718 100644 --- a/src/Core/Block.cpp +++ b/src/Core/Block.cpp @@ -35,7 +35,7 @@ static ReturnType onError(const std::string & message [[maybe_unused]], int code throw Exception(message, code); else return false; -}; +} template @@ -601,6 +601,15 @@ NamesAndTypesList Block::getNamesAndTypesList() const return res; } +NamesAndTypes Block::getNamesAndTypes() const +{ + NamesAndTypes res; + + for (const auto & elem : data) + res.emplace_back(elem.name, elem.type); + + return res; +} Names Block::getNames() const { @@ -756,6 +765,17 @@ void Block::updateHash(SipHash & hash) const col.column->updateHashWithValue(row_no, hash); } +Serializations Block::getSerializations() const +{ + Serializations res; + res.reserve(data.size()); + + for (const auto & column : data) + res.push_back(column.type->getDefaultSerialization()); + + return res; +} + void convertToFullIfSparse(Block & block) { for (auto & column : block) diff --git a/src/Core/Block.h b/src/Core/Block.h index 8089dffd1dc..5a5458cc8f7 100644 --- a/src/Core/Block.h +++ b/src/Core/Block.h @@ -89,11 +89,14 @@ public: const ColumnsWithTypeAndName & getColumnsWithTypeAndName() const; NamesAndTypesList getNamesAndTypesList() const; + NamesAndTypes getNamesAndTypes() const; Names getNames() const; DataTypes getDataTypes() const; Names getDataTypeNames() const; std::unordered_map getNamesToIndexesMap() const; + Serializations getSerializations() const; + /// Returns number of rows from first column in block, not equal to nullptr. If no columns, returns 0. size_t rows() const; @@ -108,7 +111,7 @@ public: /// Approximate number of allocated bytes in memory - for profiling and limits. size_t allocatedBytes() const; - operator bool() const { return !!columns(); } /// NOLINT + explicit operator bool() const { return !!columns(); } bool operator!() const { return !this->operator bool(); } /// NOLINT /** Get a list of column names separated by commas. */ diff --git a/src/Core/BlockInfo.cpp b/src/Core/BlockInfo.cpp index e9aee871be1..81064dec733 100644 --- a/src/Core/BlockInfo.cpp +++ b/src/Core/BlockInfo.cpp @@ -65,6 +65,13 @@ void BlockMissingValues::setBit(size_t column_idx, size_t row_idx) mask[row_idx] = true; } +void BlockMissingValues::setBits(size_t column_idx, size_t rows) +{ + RowsBitMask & mask = rows_mask_by_column_id[column_idx]; + mask.resize(rows); + std::fill(mask.begin(), mask.end(), true); +} + const BlockMissingValues::RowsBitMask & BlockMissingValues::getDefaultsBitmask(size_t column_idx) const { static RowsBitMask none; diff --git a/src/Core/BlockInfo.h b/src/Core/BlockInfo.h index 82d790bbc8e..d431303ca39 100644 --- a/src/Core/BlockInfo.h +++ b/src/Core/BlockInfo.h @@ -56,7 +56,10 @@ public: const RowsBitMask & getDefaultsBitmask(size_t column_idx) const; /// Check that we have to replace default value at least in one of columns bool hasDefaultBits(size_t column_idx) const; + /// Set bit for a specified row in a single column. void setBit(size_t column_idx, size_t row_idx); + /// Set bits for all rows in a single column. + void setBits(size_t column_idx, size_t rows); bool empty() const { return rows_mask_by_column_id.empty(); } size_t size() const { return rows_mask_by_column_id.size(); } void clear() { rows_mask_by_column_id.clear(); } diff --git a/src/Core/ColumnNumbers.h b/src/Core/ColumnNumbers.h index 29b4c49dc83..37507a6d683 100644 --- a/src/Core/ColumnNumbers.h +++ b/src/Core/ColumnNumbers.h @@ -1,7 +1,6 @@ #pragma once #include -#include namespace DB diff --git a/src/Core/Defines.h b/src/Core/Defines.h index 4ff48b8ff63..9665a20a397 100644 --- a/src/Core/Defines.h +++ b/src/Core/Defines.h @@ -33,8 +33,6 @@ #define DEFAULT_TEMPORARY_LIVE_VIEW_TIMEOUT_SEC 5 #define DEFAULT_PERIODIC_LIVE_VIEW_REFRESH_SEC 60 -#define DEFAULT_WINDOW_VIEW_CLEAN_INTERVAL_SEC 5 -#define DEFAULT_WINDOW_VIEW_HEARTBEAT_INTERVAL_SEC 15 #define SHOW_CHARS_ON_SYNTAX_ERROR ptrdiff_t(160) #define DBMS_CONNECTION_POOL_WITH_FAILOVER_DEFAULT_MAX_TRIES 3 /// each period reduces the error counter by 2 times diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 8de8bba41c9..32bae1a6204 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -352,6 +352,8 @@ static constexpr UInt64 operator""_GiB(unsigned long long value) M(UInt64, partial_merge_join_left_table_buffer_bytes, 0, "If not 0 group left table blocks in bigger ones for left-side table in partial merge join. It uses up to 2x of specified memory per joining thread.", 0) \ M(UInt64, partial_merge_join_rows_in_right_blocks, 65536, "Split right-hand joining data in blocks of specified size. It's a portion of data indexed by min-max values and possibly unloaded on disk.", 0) \ M(UInt64, join_on_disk_max_files_to_merge, 64, "For MergeJoin on disk set how much files it's allowed to sort simultaneously. Then this value bigger then more memory used and then less disk I/O needed. Minimum is 2.", 0) \ + M(Bool, compatibility_ignore_collation_in_create_table, true, "Compatibility ignore collation in create table", 0) \ + \ M(String, temporary_files_codec, "LZ4", "Set compression codec for temporary files (sort and join on disk). I.e. LZ4, NONE.", 0) \ \ M(UInt64, max_rows_to_transfer, 0, "Maximum size (in rows) of the transmitted external table obtained when the GLOBAL IN/JOIN section is executed.", 0) \ @@ -434,8 +436,9 @@ static constexpr UInt64 operator""_GiB(unsigned long long value) M(Seconds, live_view_heartbeat_interval, 15, "The heartbeat interval in seconds to indicate live query is alive.", 0) \ M(UInt64, max_live_view_insert_blocks_before_refresh, 64, "Limit maximum number of inserted blocks after which mergeable blocks are dropped and query is re-executed.", 0) \ M(Bool, allow_experimental_window_view, false, "Enable WINDOW VIEW. Not mature enough.", 0) \ - M(Seconds, window_view_clean_interval, DEFAULT_WINDOW_VIEW_CLEAN_INTERVAL_SEC, "The clean interval of window view in seconds to free outdated data.", 0) \ - M(Seconds, window_view_heartbeat_interval, DEFAULT_WINDOW_VIEW_HEARTBEAT_INTERVAL_SEC, "The heartbeat interval in seconds to indicate watch query is alive.", 0) \ + M(Seconds, window_view_clean_interval, 60, "The clean interval of window view in seconds to free outdated data.", 0) \ + M(Seconds, window_view_heartbeat_interval, 15, "The heartbeat interval in seconds to indicate watch query is alive.", 0) \ + M(Seconds, wait_for_window_view_fire_signal_timeout, 10, "Timeout for waiting for window view fire signal in event time processing", 0) \ M(UInt64, min_free_disk_space_for_temporary_data, 0, "The minimum disk space to keep while writing temporary data used in external sorting and aggregation.", 0) \ \ M(DefaultDatabaseEngine, default_database_engine, DefaultDatabaseEngine::Atomic, "Default database engine.", 0) \ @@ -585,6 +588,7 @@ static constexpr UInt64 operator""_GiB(unsigned long long value) /** Experimental functions */ \ M(Bool, allow_experimental_funnel_functions, false, "Enable experimental functions for funnel analysis.", 0) \ M(Bool, allow_experimental_nlp_functions, false, "Enable experimental functions for natural language processing.", 0) \ + M(Bool, allow_experimental_hash_functions, false, "Enable experimental hash functions (hashid, etc)", 0) \ M(Bool, allow_experimental_object_type, false, "Allow Object and JSON data types", 0) \ M(String, insert_deduplication_token, "", "If not empty, used for duplicate detection instead of data digest", 0) \ M(Bool, count_distinct_optimization, false, "Rewrite count distinct to subquery of group by", 0) \ @@ -659,7 +663,7 @@ static constexpr UInt64 operator""_GiB(unsigned long long value) M(Char, input_format_hive_text_map_keys_delimiter, '\x03', "Delimiter between a pair of map key/values in Hive Text File", 0) \ M(UInt64, input_format_msgpack_number_of_columns, 0, "The number of columns in inserted MsgPack data. Used for automatic schema inference from data.", 0) \ M(MsgPackUUIDRepresentation, output_format_msgpack_uuid_representation, FormatSettings::MsgPackUUIDRepresentation::EXT, "The way how to output UUID in MsgPack format.", 0) \ - M(UInt64, input_format_max_rows_to_read_for_schema_inference, 100, "The maximum rows of data to read for automatic schema inference", 0) \ + M(UInt64, input_format_max_rows_to_read_for_schema_inference, 25000, "The maximum rows of data to read for automatic schema inference", 0) \ M(Bool, input_format_csv_use_best_effort_in_schema_inference, true, "Use some tweaks and heuristics to infer schema in CSV format", 0) \ M(Bool, input_format_tsv_use_best_effort_in_schema_inference, true, "Use some tweaks and heuristics to infer schema in TSV format", 0) \ M(Bool, input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference, false, "Allow to skip columns with unsupported types while schema inference for format Parquet", 0) \ @@ -697,6 +701,7 @@ static constexpr UInt64 operator""_GiB(unsigned long long value) M(Bool, output_format_pretty_color, true, "Use ANSI escape sequences to paint colors in Pretty formats", 0) \ M(String, output_format_pretty_grid_charset, "UTF-8", "Charset for printing grid borders. Available charsets: ASCII, UTF-8 (default one).", 0) \ M(UInt64, output_format_parquet_row_group_size, 1000000, "Row group size in rows.", 0) \ + M(Bool, output_format_parquet_string_as_string, false, "Use Parquet String type instead of Binary for String columns.", 0) \ M(String, output_format_avro_codec, "", "Compression codec used for output. Possible values: 'null', 'deflate', 'snappy'.", 0) \ M(UInt64, output_format_avro_sync_interval, 16 * 1024, "Sync interval in bytes.", 0) \ M(String, output_format_avro_string_column_pattern, "", "For Avro format: regexp of String columns to select as AVRO string.", 0) \ @@ -734,6 +739,9 @@ static constexpr UInt64 operator""_GiB(unsigned long long value) M(UInt64, cross_to_inner_join_rewrite, 1, "Use inner join instead of comma/cross join if possible. Possible values: 0 - no rewrite, 1 - apply if possible, 2 - force rewrite all cross joins", 0) \ \ M(Bool, output_format_arrow_low_cardinality_as_dictionary, false, "Enable output LowCardinality type as Dictionary Arrow type", 0) \ + M(Bool, output_format_arrow_string_as_string, false, "Use Arrow String type instead of Binary for String columns", 0) \ + \ + M(Bool, output_format_orc_string_as_string, false, "Use ORC String type instead of Binary for String columns", 0) \ \ M(EnumComparingMode, format_capn_proto_enum_comparising_mode, FormatSettings::EnumComparingMode::BY_VALUES, "How to map ClickHouse Enum and CapnProto Enum", 0) \ \ diff --git a/src/Daemon/BaseDaemon.cpp b/src/Daemon/BaseDaemon.cpp index 6a475e0409c..f7cfbab289a 100644 --- a/src/Daemon/BaseDaemon.cpp +++ b/src/Daemon/BaseDaemon.cpp @@ -451,7 +451,7 @@ static std::string createDirectory(const std::string & file) return ""; fs::create_directories(path); return path; -}; +} static bool tryCreateDirectories(Poco::Logger * logger, const std::string & path) @@ -794,7 +794,7 @@ static void addSignalHandler(const std::vector & signals, signal_function h if (out_handled_signals) std::copy(signals.begin(), signals.end(), std::back_inserter(*out_handled_signals)); -}; +} static void blockSignals(const std::vector & signals) @@ -816,7 +816,7 @@ static void blockSignals(const std::vector & signals) if (pthread_sigmask(SIG_BLOCK, &sig_set, nullptr)) throw Poco::Exception("Cannot block signal."); -}; +} void BaseDaemon::initializeTerminationAndSignalProcessing() diff --git a/src/Daemon/SentryWriter.h b/src/Daemon/SentryWriter.h index 0888b2fe9a3..32aeff2787a 100644 --- a/src/Daemon/SentryWriter.h +++ b/src/Daemon/SentryWriter.h @@ -24,4 +24,4 @@ namespace SentryWriter int sig, const std::string & error_message, const StackTrace & stack_trace); -}; +} diff --git a/src/DataTypes/convertMySQLDataType.cpp b/src/DataTypes/convertMySQLDataType.cpp index 7e2f2e7c6b9..64633c6fd7b 100644 --- a/src/DataTypes/convertMySQLDataType.cpp +++ b/src/DataTypes/convertMySQLDataType.cpp @@ -83,7 +83,11 @@ DataTypePtr convertMySQLDataType(MultiEnum type_support, res = std::make_shared(); } else if (type_name == "binary") + { + //compatible with binary(0) DataType + if (length == 0) length = 1; res = std::make_shared(length); + } else if (type_name == "datetime" || type_name == "timestamp") { if (!type_support.isSet(MySQLDataTypesSupport::DATETIME64)) diff --git a/src/DataTypes/tests/gtest_data_type_get_common_type.cpp b/src/DataTypes/tests/gtest_data_type_get_common_type.cpp index 2a77237e982..a85606618a3 100644 --- a/src/DataTypes/tests/gtest_data_type_get_common_type.cpp +++ b/src/DataTypes/tests/gtest_data_type_get_common_type.cpp @@ -22,7 +22,7 @@ static auto typeFromString(const std::string & str) { auto & data_type_factory = DataTypeFactory::instance(); return data_type_factory.get(str); -}; +} static auto typesFromString(const std::string & str) { @@ -33,7 +33,7 @@ static auto typesFromString(const std::string & str) data_types.push_back(typeFromString(data_type)); return data_types; -}; +} struct TypesTestCase { diff --git a/src/Dictionaries/DictionaryStructure.h b/src/Dictionaries/DictionaryStructure.h index 9014b09b072..50cfba01894 100644 --- a/src/Dictionaries/DictionaryStructure.h +++ b/src/Dictionaries/DictionaryStructure.h @@ -89,7 +89,7 @@ constexpr void callOnDictionaryAttributeType(AttributeUnderlyingType type, F && if (type == other) func(DictionaryAttributeType{}); }); -}; +} struct DictionarySpecialAttribute final { diff --git a/src/Disks/DiskDecorator.h b/src/Disks/DiskDecorator.h index d707eb3e51d..325d8b6704b 100644 --- a/src/Disks/DiskDecorator.h +++ b/src/Disks/DiskDecorator.h @@ -96,6 +96,7 @@ class ReservationDelegate : public IReservation public: ReservationDelegate(ReservationPtr delegate_, DiskPtr wrapper_) : delegate(std::move(delegate_)), wrapper(wrapper_) { } UInt64 getSize() const override { return delegate->getSize(); } + UInt64 getUnreservedSpace() const override { return delegate->getUnreservedSpace(); } DiskPtr getDisk(size_t) const override { return wrapper; } Disks getDisks() const override { return {wrapper}; } void update(UInt64 new_size) override { delegate->update(new_size); } diff --git a/src/Disks/DiskEncrypted.cpp b/src/Disks/DiskEncrypted.cpp index 4ac59af95ab..8edb00e5a67 100644 --- a/src/Disks/DiskEncrypted.cpp +++ b/src/Disks/DiskEncrypted.cpp @@ -182,6 +182,7 @@ public: } UInt64 getSize() const override { return reservation->getSize(); } + UInt64 getUnreservedSpace() const override { return reservation->getUnreservedSpace(); } DiskPtr getDisk(size_t i) const override { diff --git a/src/Disks/DiskLocal.cpp b/src/Disks/DiskLocal.cpp index 8abf0b24782..e8e9867f4df 100644 --- a/src/Disks/DiskLocal.cpp +++ b/src/Disks/DiskLocal.cpp @@ -112,12 +112,15 @@ std::optional fileSizeSafe(const fs::path & path) class DiskLocalReservation : public IReservation { public: - DiskLocalReservation(const DiskLocalPtr & disk_, UInt64 size_) - : disk(disk_), size(size_), metric_increment(CurrentMetrics::DiskSpaceReservedForMerge, size_) - { - } + DiskLocalReservation(const DiskLocalPtr & disk_, UInt64 size_, UInt64 unreserved_space_) + : disk(disk_) + , size(size_) + , unreserved_space(unreserved_space_) + , metric_increment(CurrentMetrics::DiskSpaceReservedForMerge, size_) + {} UInt64 getSize() const override { return size; } + UInt64 getUnreservedSpace() const override { return unreserved_space; } DiskPtr getDisk(size_t i) const override { @@ -165,6 +168,7 @@ public: private: DiskLocalPtr disk; UInt64 size; + UInt64 unreserved_space; CurrentMetrics::Increment metric_increment; }; @@ -201,32 +205,38 @@ private: ReservationPtr DiskLocal::reserve(UInt64 bytes) { - if (!tryReserve(bytes)) + auto unreserved_space = tryReserve(bytes); + if (!unreserved_space.has_value()) return {}; - return std::make_unique(std::static_pointer_cast(shared_from_this()), bytes); + return std::make_unique( + std::static_pointer_cast(shared_from_this()), + bytes, unreserved_space.value()); } -bool DiskLocal::tryReserve(UInt64 bytes) +std::optional DiskLocal::tryReserve(UInt64 bytes) { std::lock_guard lock(DiskLocal::reservation_mutex); + + UInt64 available_space = getAvailableSpace(); + UInt64 unreserved_space = available_space - std::min(available_space, reserved_bytes); + if (bytes == 0) { LOG_DEBUG(log, "Reserving 0 bytes on disk {}", backQuote(name)); ++reservation_count; - return true; + return {unreserved_space}; } - auto available_space = getAvailableSpace(); - UInt64 unreserved_space = available_space - std::min(available_space, reserved_bytes); if (unreserved_space >= bytes) { LOG_DEBUG(log, "Reserving {} on disk {}, having unreserved {}.", ReadableSize(bytes), backQuote(name), ReadableSize(unreserved_space)); ++reservation_count; reserved_bytes += bytes; - return true; + return {unreserved_space - bytes}; } - return false; + + return {}; } static UInt64 getTotalSpaceByName(const String & name, const String & disk_path, UInt64 keep_free_space_bytes) @@ -437,7 +447,7 @@ void DiskLocal::copy(const String & from_path, const std::shared_ptr & to fs::copy(from, to, fs::copy_options::recursive | fs::copy_options::overwrite_existing); /// Use more optimal way. } else - copyThroughBuffers(from_path, to_disk, to_path); /// Base implementation. + copyThroughBuffers(from_path, to_disk, to_path, /* copy_root_dir */ true); /// Base implementation. } void DiskLocal::copyDirectoryContent(const String & from_dir, const std::shared_ptr & to_disk, const String & to_dir) @@ -445,7 +455,7 @@ void DiskLocal::copyDirectoryContent(const String & from_dir, const std::shared_ if (isSameDiskType(*this, *to_disk)) fs::copy(from_dir, to_dir, fs::copy_options::recursive | fs::copy_options::overwrite_existing); /// Use more optimal way. else - copyThroughBuffers(from_dir, to_disk, to_dir); /// Base implementation. + copyThroughBuffers(from_dir, to_disk, to_dir, /* copy_root_dir */ false); /// Base implementation. } SyncGuardPtr DiskLocal::getDirectorySyncGuard(const String & path) const diff --git a/src/Disks/DiskLocal.h b/src/Disks/DiskLocal.h index 61faccbe2a5..62b03e7b2ed 100644 --- a/src/Disks/DiskLocal.h +++ b/src/Disks/DiskLocal.h @@ -121,7 +121,7 @@ public: bool canWrite() const noexcept; private: - bool tryReserve(UInt64 bytes); + std::optional tryReserve(UInt64 bytes); /// Setup disk for healthy check. Returns true if it's read-write, false if read-only. /// Throw exception if it's not possible to setup necessary files and directories. diff --git a/src/Disks/DiskRestartProxy.cpp b/src/Disks/DiskRestartProxy.cpp index 8bb31cec55f..fab4e2148ca 100644 --- a/src/Disks/DiskRestartProxy.cpp +++ b/src/Disks/DiskRestartProxy.cpp @@ -54,7 +54,7 @@ public: RestartAwareWriteBuffer(const DiskRestartProxy & disk, std::unique_ptr impl_) : WriteBufferFromFileDecorator(std::move(impl_)), lock(disk.mutex) { } - virtual ~RestartAwareWriteBuffer() override + ~RestartAwareWriteBuffer() override { try { diff --git a/src/Disks/IDisk.h b/src/Disks/IDisk.h index c4578d51b6e..440cf31682d 100644 --- a/src/Disks/IDisk.h +++ b/src/Disks/IDisk.h @@ -392,6 +392,10 @@ public: /// Get reservation size. virtual UInt64 getSize() const = 0; + /// Space available for reservation + /// (with this reservation already take into account). + virtual UInt64 getUnreservedSpace() const = 0; + /// Get i-th disk where reservation take place. virtual DiskPtr getDisk(size_t i = 0) const = 0; /// NOLINT diff --git a/src/Disks/IDiskRemote.cpp b/src/Disks/IDiskRemote.cpp index d72d7004cb7..00d36bcc624 100644 --- a/src/Disks/IDiskRemote.cpp +++ b/src/Disks/IDiskRemote.cpp @@ -23,7 +23,7 @@ namespace ErrorCodes extern const int INCORRECT_DISK_INDEX; extern const int UNKNOWN_FORMAT; extern const int FILE_ALREADY_EXISTS; - extern const int PATH_ACCESS_DENIED;; + extern const int PATH_ACCESS_DENIED; extern const int FILE_DOESNT_EXIST; extern const int BAD_FILE_TYPE; } @@ -637,34 +637,40 @@ void IDiskRemote::createHardLink(const String & src_path, const String & dst_pat ReservationPtr IDiskRemote::reserve(UInt64 bytes) { - if (!tryReserve(bytes)) + auto unreserved_space = tryReserve(bytes); + if (!unreserved_space.has_value()) return {}; - return std::make_unique(std::static_pointer_cast(shared_from_this()), bytes); + return std::make_unique( + std::static_pointer_cast(shared_from_this()), + bytes, unreserved_space.value()); } -bool IDiskRemote::tryReserve(UInt64 bytes) +std::optional IDiskRemote::tryReserve(UInt64 bytes) { std::lock_guard lock(reservation_mutex); + + auto available_space = getAvailableSpace(); + UInt64 unreserved_space = available_space - std::min(available_space, reserved_bytes); + if (bytes == 0) { LOG_TRACE(log, "Reserving 0 bytes on remote_fs disk {}", backQuote(name)); ++reservation_count; - return true; + return {unreserved_space}; } - auto available_space = getAvailableSpace(); - UInt64 unreserved_space = available_space - std::min(available_space, reserved_bytes); if (unreserved_space >= bytes) { LOG_TRACE(log, "Reserving {} on disk {}, having unreserved {}.", ReadableSize(bytes), backQuote(name), ReadableSize(unreserved_space)); ++reservation_count; reserved_bytes += bytes; - return true; + return {unreserved_space - bytes}; } - return false; + + return {}; } String IDiskRemote::getUniqueId(const String & path) const diff --git a/src/Disks/IDiskRemote.h b/src/Disks/IDiskRemote.h index 65bcdf3e719..96da7dc4f23 100644 --- a/src/Disks/IDiskRemote.h +++ b/src/Disks/IDiskRemote.h @@ -177,7 +177,7 @@ private: void removeMetadataRecursive(const String & path, std::unordered_map> & paths_to_remove); - bool tryReserve(UInt64 bytes); + std::optional tryReserve(UInt64 bytes); UInt64 reserved_bytes = 0; UInt64 reservation_count = 0; @@ -250,13 +250,18 @@ private: class DiskRemoteReservation final : public IReservation { public: - DiskRemoteReservation(const RemoteDiskPtr & disk_, UInt64 size_) - : disk(disk_), size(size_), metric_increment(CurrentMetrics::DiskSpaceReservedForMerge, size_) + DiskRemoteReservation(const RemoteDiskPtr & disk_, UInt64 size_, UInt64 unreserved_space_) + : disk(disk_) + , size(size_) + , unreserved_space(unreserved_space_) + , metric_increment(CurrentMetrics::DiskSpaceReservedForMerge, size_) { } UInt64 getSize() const override { return size; } + UInt64 getUnreservedSpace() const override { return unreserved_space; } + DiskPtr getDisk(size_t i) const override; Disks getDisks() const override { return {disk}; } @@ -268,6 +273,7 @@ public: private: RemoteDiskPtr disk; UInt64 size; + UInt64 unreserved_space; CurrentMetrics::Increment metric_increment; }; diff --git a/src/Disks/IO/CachedReadBufferFromRemoteFS.cpp b/src/Disks/IO/CachedReadBufferFromRemoteFS.cpp index 541d03f5c20..1cb6354d38c 100644 --- a/src/Disks/IO/CachedReadBufferFromRemoteFS.cpp +++ b/src/Disks/IO/CachedReadBufferFromRemoteFS.cpp @@ -282,6 +282,20 @@ SeekableReadBufferPtr CachedReadBufferFromRemoteFS::getReadBufferForFileSegment( } case FileSegment::State::PARTIALLY_DOWNLOADED: { + if (file_segment->getDownloadOffset() > file_offset_of_buffer_end) + { + /// segment{k} state: PARTIALLY_DOWNLOADED + /// cache: [______|___________ + /// ^ + /// download_offset (in progress) + /// requested_range: [__________] + /// ^ + /// file_offset_of_buffer_end + + read_type = ReadType::CACHED; + return getCacheReadBuffer(range.left); + } + auto downloader_id = file_segment->getOrSetDownloader(); if (downloader_id == file_segment->getCallerId()) { diff --git a/src/Disks/IVolume.cpp b/src/Disks/IVolume.cpp index 85568fdd05b..09f42cc5467 100644 --- a/src/Disks/IVolume.cpp +++ b/src/Disks/IVolume.cpp @@ -7,17 +7,31 @@ namespace DB { + namespace ErrorCodes { extern const int NO_ELEMENTS_IN_CONFIG; + extern const int EXCESSIVE_ELEMENT_IN_CONFIG; } + +VolumeLoadBalancing parseVolumeLoadBalancing(const String & config) +{ + if (config == "round_robin") + return VolumeLoadBalancing::ROUND_ROBIN; + if (config == "least_used") + return VolumeLoadBalancing::LEAST_USED; + throw Exception(ErrorCodes::EXCESSIVE_ELEMENT_IN_CONFIG, "'{}' is not valid load_balancing value", config); +} + + IVolume::IVolume( String name_, const Poco::Util::AbstractConfiguration & config, const String & config_prefix, DiskSelectorPtr disk_selector) : name(std::move(name_)) + , load_balancing(parseVolumeLoadBalancing(config.getString(config_prefix + ".load_balancing", "round_robin"))) { Poco::Util::AbstractConfiguration::Keys keys; config.keys(config_prefix, keys); diff --git a/src/Disks/IVolume.h b/src/Disks/IVolume.h index 41260cac2e9..26d4c96f481 100644 --- a/src/Disks/IVolume.h +++ b/src/Disks/IVolume.h @@ -11,11 +11,18 @@ namespace DB enum class VolumeType { JBOD, - RAID1, SINGLE_DISK, UNKNOWN }; +enum class VolumeLoadBalancing +{ + ROUND_ROBIN, + LEAST_USED, +}; + +VolumeLoadBalancing parseVolumeLoadBalancing(const String & config); + class IVolume; using VolumePtr = std::shared_ptr; using Volumes = std::vector; @@ -34,11 +41,19 @@ using Volumes = std::vector; class IVolume : public Space { public: - IVolume(String name_, Disks disks_, size_t max_data_part_size_ = 0, bool perform_ttl_move_on_insert_ = true) + /// This constructor is only for: + /// - SingleDiskVolume + /// From createVolumeFromReservation(). + IVolume(String name_, + Disks disks_, + size_t max_data_part_size_ = 0, + bool perform_ttl_move_on_insert_ = true, + VolumeLoadBalancing load_balancing_ = VolumeLoadBalancing::ROUND_ROBIN) : disks(std::move(disks_)) , name(name_) , max_data_part_size(max_data_part_size_) , perform_ttl_move_on_insert(perform_ttl_move_on_insert_) + , load_balancing(load_balancing_) { } @@ -79,6 +94,10 @@ public: /// Should a new data part be synchronously moved to a volume according to ttl on insert /// or move this part in background task asynchronously after insert. bool perform_ttl_move_on_insert = true; + /// Load balancing, one of: + /// - ROUND_ROBIN + /// - LEAST_USED + const VolumeLoadBalancing load_balancing; }; } diff --git a/src/Disks/SingleDiskVolume.cpp b/src/Disks/SingleDiskVolume.cpp deleted file mode 100644 index 47140407026..00000000000 --- a/src/Disks/SingleDiskVolume.cpp +++ /dev/null @@ -1,6 +0,0 @@ -#include - -namespace DB -{ - -} diff --git a/src/Disks/StoragePolicy.cpp b/src/Disks/StoragePolicy.cpp index 11f53d10fb4..3dd60ac02d4 100644 --- a/src/Disks/StoragePolicy.cpp +++ b/src/Disks/StoragePolicy.cpp @@ -63,7 +63,12 @@ StoragePolicy::StoragePolicy( if (volumes.empty() && name == DEFAULT_STORAGE_POLICY_NAME) { - auto default_volume = std::make_shared(DEFAULT_VOLUME_NAME, std::vector{disks->get(DEFAULT_DISK_NAME)}, 0, false); + auto default_volume = std::make_shared(DEFAULT_VOLUME_NAME, + std::vector{disks->get(DEFAULT_DISK_NAME)}, + /* max_data_part_size_= */ 0, + /* are_merges_avoided_= */ false, + /* perform_ttl_move_on_insert_= */ true, + VolumeLoadBalancing::ROUND_ROBIN); volumes.emplace_back(std::move(default_volume)); } diff --git a/src/Disks/VolumeJBOD.cpp b/src/Disks/VolumeJBOD.cpp index e0f7dfc8231..401822fc901 100644 --- a/src/Disks/VolumeJBOD.cpp +++ b/src/Disks/VolumeJBOD.cpp @@ -19,15 +19,18 @@ VolumeJBOD::VolumeJBOD( const String & config_prefix, DiskSelectorPtr disk_selector) : IVolume(name_, config, config_prefix, disk_selector) + , disks_by_size(disks.begin(), disks.end()) { Poco::Logger * logger = &Poco::Logger::get("StorageConfiguration"); auto has_max_bytes = config.has(config_prefix + ".max_data_part_size_bytes"); auto has_max_ratio = config.has(config_prefix + ".max_data_part_size_ratio"); if (has_max_bytes && has_max_ratio) + { throw Exception( "Only one of 'max_data_part_size_bytes' and 'max_data_part_size_ratio' should be specified.", ErrorCodes::EXCESSIVE_ELEMENT_IN_CONFIG); + } if (has_max_bytes) { @@ -47,12 +50,20 @@ VolumeJBOD::VolumeJBOD( } max_data_part_size = static_cast(sum_size * ratio / disks.size()); for (size_t i = 0; i < disks.size(); ++i) + { if (sizes[i] < max_data_part_size) - LOG_WARNING(logger, "Disk {} on volume {} have not enough space ({}) for containing part the size of max_data_part_size ({})", backQuote(disks[i]->getName()), backQuote(config_prefix), ReadableSize(sizes[i]), ReadableSize(max_data_part_size)); + { + LOG_WARNING(logger, "Disk {} on volume {} have not enough space ({}) for containing part the size of max_data_part_size ({})", + backQuote(disks[i]->getName()), backQuote(config_prefix), ReadableSize(sizes[i]), ReadableSize(max_data_part_size)); + } + } } static constexpr UInt64 MIN_PART_SIZE = 8u * 1024u * 1024u; if (max_data_part_size != 0 && max_data_part_size < MIN_PART_SIZE) - LOG_WARNING(logger, "Volume {} max_data_part_size is too low ({} < {})", backQuote(name), ReadableSize(max_data_part_size), ReadableSize(MIN_PART_SIZE)); + { + LOG_WARNING(logger, "Volume {} max_data_part_size is too low ({} < {})", + backQuote(name), ReadableSize(max_data_part_size), ReadableSize(MIN_PART_SIZE)); + } /// Default value is 'true' due to backward compatibility. perform_ttl_move_on_insert = config.getBool(config_prefix + ".perform_ttl_move_on_insert", true); @@ -72,31 +83,61 @@ VolumeJBOD::VolumeJBOD(const VolumeJBOD & volume_jbod, DiskPtr VolumeJBOD::getDisk(size_t /* index */) const { - size_t start_from = last_used.fetch_add(1u, std::memory_order_acq_rel); - size_t index = start_from % disks.size(); - return disks[index]; + switch (load_balancing) + { + case VolumeLoadBalancing::ROUND_ROBIN: + { + size_t start_from = last_used.fetch_add(1u, std::memory_order_acq_rel); + size_t index = start_from % disks.size(); + return disks[index]; + } + case VolumeLoadBalancing::LEAST_USED: + { + std::lock_guard lock(mutex); + return disks_by_size.top().disk; + } + } + __builtin_unreachable(); } ReservationPtr VolumeJBOD::reserve(UInt64 bytes) { /// This volume can not store data which size is greater than `max_data_part_size` /// to ensure that parts of size greater than that go to another volume(s). - if (max_data_part_size != 0 && bytes > max_data_part_size) return {}; - size_t start_from = last_used.fetch_add(1u, std::memory_order_acq_rel); - size_t disks_num = disks.size(); - for (size_t i = 0; i < disks_num; ++i) + switch (load_balancing) { - size_t index = (start_from + i) % disks_num; + case VolumeLoadBalancing::ROUND_ROBIN: + { + size_t start_from = last_used.fetch_add(1u, std::memory_order_acq_rel); + size_t disks_num = disks.size(); + for (size_t i = 0; i < disks_num; ++i) + { + size_t index = (start_from + i) % disks_num; - auto reservation = disks[index]->reserve(bytes); + auto reservation = disks[index]->reserve(bytes); + + if (reservation) + return reservation; + } + return {}; + } + case VolumeLoadBalancing::LEAST_USED: + { + std::lock_guard lock(mutex); + + DiskWithSize disk = disks_by_size.top(); + disks_by_size.pop(); + + ReservationPtr reservation = disk.reserve(bytes); + disks_by_size.push(disk); - if (reservation) return reservation; + } } - return {}; + __builtin_unreachable(); } bool VolumeJBOD::areMergesAvoided() const diff --git a/src/Disks/VolumeJBOD.h b/src/Disks/VolumeJBOD.h index 621125f1109..21d61e6dd8d 100644 --- a/src/Disks/VolumeJBOD.h +++ b/src/Disks/VolumeJBOD.h @@ -22,8 +22,8 @@ using VolumesJBOD = std::vector; class VolumeJBOD : public IVolume { public: - VolumeJBOD(String name_, Disks disks_, UInt64 max_data_part_size_, bool are_merges_avoided_) - : IVolume(name_, disks_, max_data_part_size_) + VolumeJBOD(String name_, Disks disks_, UInt64 max_data_part_size_, bool are_merges_avoided_, bool perform_ttl_move_on_insert_, VolumeLoadBalancing load_balancing_) + : IVolume(name_, disks_, max_data_part_size_, perform_ttl_move_on_insert_, load_balancing_) , are_merges_avoided(are_merges_avoided_) { } @@ -44,7 +44,8 @@ public: VolumeType getType() const override { return VolumeType::JBOD; } - /// Always returns next disk (round-robin), ignores argument. + /// Returns disk based on the load balancing algorithm (round-robin, or least-used), + /// ignores @index argument. /// /// - Used with policy for temporary data /// - Ignores all limitations @@ -63,8 +64,36 @@ public: bool are_merges_avoided = true; private: - /// Index of last used disk. + struct DiskWithSize + { + DiskPtr disk; + uint64_t free_size = 0; + + DiskWithSize(DiskPtr disk_) + : disk(disk_) + , free_size(disk->getUnreservedSpace()) + {} + + bool operator<(const DiskWithSize & rhs) const + { + return free_size < rhs.free_size; + } + + ReservationPtr reserve(uint64_t bytes) + { + ReservationPtr reservation = disk->reserve(bytes); + /// Not just subtract bytes, but update the value, + /// since some reservations may be done directly via IDisk, or not by ClickHouse. + free_size = reservation->getUnreservedSpace(); + return reservation; + } + }; + + mutable std::mutex mutex; + /// Index of last used disk, for load_balancing=round_robin mutable std::atomic last_used = 0; + /// Priority queue of disks sorted by size, for load_balancing=least_used + mutable std::priority_queue disks_by_size; /// True if parts on this volume participate in merges according to START/STOP MERGES ON VOLUME. std::atomic> are_merges_avoided_user_override{std::nullopt}; diff --git a/src/Formats/EscapingRuleUtils.cpp b/src/Formats/EscapingRuleUtils.cpp index 1875caf1855..e4b655cdcf9 100644 --- a/src/Formats/EscapingRuleUtils.cpp +++ b/src/Formats/EscapingRuleUtils.cpp @@ -1,5 +1,5 @@ #include -#include +#include #include #include #include @@ -83,7 +83,7 @@ void skipFieldByEscapingRule(ReadBuffer & buf, FormatSettings::EscapingRule esca readEscapedString(tmp, buf); break; case FormatSettings::EscapingRule::Quoted: - readQuotedFieldIntoString(tmp, buf); + readQuotedField(tmp, buf); break; case FormatSettings::EscapingRule::CSV: readCSVString(tmp, buf, format_settings.csv); @@ -219,13 +219,13 @@ String readByEscapingRule(ReadBuffer & buf, FormatSettings::EscapingRule escapin if constexpr (read_string) readQuotedString(result, buf); else - readQuotedFieldIntoString(result, buf); + readQuotedField(result, buf); break; case FormatSettings::EscapingRule::JSON: if constexpr (read_string) readJSONString(result, buf); else - readJSONFieldIntoString(result, buf); + readJSONField(result, buf); break; case FormatSettings::EscapingRule::Raw: readString(result, buf); @@ -452,7 +452,7 @@ DataTypePtr determineDataTypeByEscapingRule(const String & field, const FormatSe return buf.eof() ? type : nullptr; } case FormatSettings::EscapingRule::JSON: - return getDataTypeFromJSONField(field); + return JSONUtils::getDataTypeFromField(field); case FormatSettings::EscapingRule::CSV: { if (!format_settings.csv.input_format_use_best_effort_in_schema_inference) diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp index 96b52cd2423..4c1b23a75ab 100644 --- a/src/Formats/FormatFactory.cpp +++ b/src/Formats/FormatFactory.cpp @@ -99,6 +99,7 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings) format_settings.parquet.case_insensitive_column_matching = settings.input_format_parquet_case_insensitive_column_matching; format_settings.parquet.allow_missing_columns = settings.input_format_parquet_allow_missing_columns; format_settings.parquet.skip_columns_with_unsupported_types_in_schema_inference = settings.input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference; + format_settings.parquet.output_string_as_string = settings.output_format_parquet_string_as_string; format_settings.pretty.charset = settings.output_format_pretty_grid_charset.toString() == "ASCII" ? FormatSettings::Pretty::Charset::ASCII : FormatSettings::Pretty::Charset::UTF8; format_settings.pretty.color = settings.output_format_pretty_color; format_settings.pretty.max_column_pad_width = settings.output_format_pretty_max_column_pad_width; @@ -132,17 +133,19 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings) format_settings.arrow.import_nested = settings.input_format_arrow_import_nested; format_settings.arrow.allow_missing_columns = settings.input_format_arrow_allow_missing_columns; format_settings.arrow.skip_columns_with_unsupported_types_in_schema_inference = settings.input_format_arrow_skip_columns_with_unsupported_types_in_schema_inference; + format_settings.arrow.skip_columns_with_unsupported_types_in_schema_inference = settings.input_format_arrow_skip_columns_with_unsupported_types_in_schema_inference; + format_settings.arrow.case_insensitive_column_matching = settings.input_format_arrow_case_insensitive_column_matching; + format_settings.arrow.output_string_as_string = settings.output_format_arrow_string_as_string; format_settings.orc.import_nested = settings.input_format_orc_import_nested; format_settings.orc.allow_missing_columns = settings.input_format_orc_allow_missing_columns; format_settings.orc.row_batch_size = settings.input_format_orc_row_batch_size; format_settings.orc.skip_columns_with_unsupported_types_in_schema_inference = settings.input_format_orc_skip_columns_with_unsupported_types_in_schema_inference; - format_settings.arrow.skip_columns_with_unsupported_types_in_schema_inference = settings.input_format_arrow_skip_columns_with_unsupported_types_in_schema_inference; - format_settings.arrow.case_insensitive_column_matching = settings.input_format_arrow_case_insensitive_column_matching; format_settings.orc.import_nested = settings.input_format_orc_import_nested; format_settings.orc.allow_missing_columns = settings.input_format_orc_allow_missing_columns; format_settings.orc.row_batch_size = settings.input_format_orc_row_batch_size; format_settings.orc.skip_columns_with_unsupported_types_in_schema_inference = settings.input_format_orc_skip_columns_with_unsupported_types_in_schema_inference; format_settings.orc.case_insensitive_column_matching = settings.input_format_orc_case_insensitive_column_matching; + format_settings.orc.output_string_as_string = settings.output_format_orc_string_as_string; format_settings.defaults_for_omitted_fields = settings.input_format_defaults_for_omitted_fields; format_settings.capn_proto.enum_comparing_mode = settings.format_capn_proto_enum_comparising_mode; format_settings.seekable_read = settings.input_format_allow_seeks; diff --git a/src/Formats/FormatSettings.h b/src/Formats/FormatSettings.h index 4f77fe099e1..e6f0a7d229e 100644 --- a/src/Formats/FormatSettings.h +++ b/src/Formats/FormatSettings.h @@ -81,6 +81,7 @@ struct FormatSettings bool allow_missing_columns = false; bool skip_columns_with_unsupported_types_in_schema_inference = false; bool case_insensitive_column_matching = false; + bool output_string_as_string = false; } arrow; struct @@ -148,6 +149,7 @@ struct FormatSettings bool skip_columns_with_unsupported_types_in_schema_inference = false; bool case_insensitive_column_matching = false; std::unordered_set skip_row_groups = {}; + bool output_string_as_string = false; } parquet; struct Pretty @@ -234,6 +236,7 @@ struct FormatSettings bool skip_columns_with_unsupported_types_in_schema_inference = false; bool case_insensitive_column_matching = false; std::unordered_set skip_stripes = {}; + bool output_string_as_string = false; } orc; /// For capnProto format we should determine how to diff --git a/src/Formats/JSONEachRowUtils.cpp b/src/Formats/JSONEachRowUtils.cpp deleted file mode 100644 index 534237c900c..00000000000 --- a/src/Formats/JSONEachRowUtils.cpp +++ /dev/null @@ -1,387 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -namespace DB -{ -namespace ErrorCodes -{ - extern const int INCORRECT_DATA; - extern const int LOGICAL_ERROR; -} - -template -static std::pair fileSegmentationEngineJSONEachRowImpl(ReadBuffer & in, DB::Memory<> & memory, size_t min_chunk_size, size_t min_rows) -{ - skipWhitespaceIfAny(in); - - char * pos = in.position(); - size_t balance = 0; - bool quotes = false; - size_t number_of_rows = 0; - - while (loadAtPosition(in, memory, pos) && (balance || memory.size() + static_cast(pos - in.position()) < min_chunk_size || number_of_rows < min_rows)) - { - const auto current_object_size = memory.size() + static_cast(pos - in.position()); - if (min_chunk_size != 0 && current_object_size > 10 * min_chunk_size) - throw ParsingException("Size of JSON object is extremely large. Expected not greater than " + - std::to_string(min_chunk_size) + " bytes, but current is " + std::to_string(current_object_size) + - " bytes per row. Increase the value setting 'min_chunk_bytes_for_parallel_parsing' or check your data manually, most likely JSON is malformed", ErrorCodes::INCORRECT_DATA); - - if (quotes) - { - pos = find_first_symbols<'\\', '"'>(pos, in.buffer().end()); - - if (pos > in.buffer().end()) - throw Exception("Position in buffer is out of bounds. There must be a bug.", ErrorCodes::LOGICAL_ERROR); - else if (pos == in.buffer().end()) - continue; - - if (*pos == '\\') - { - ++pos; - if (loadAtPosition(in, memory, pos)) - ++pos; - } - else if (*pos == '"') - { - ++pos; - quotes = false; - } - } - else - { - pos = find_first_symbols(pos, in.buffer().end()); - - if (pos > in.buffer().end()) - throw Exception("Position in buffer is out of bounds. There must be a bug.", ErrorCodes::LOGICAL_ERROR); - else if (pos == in.buffer().end()) - continue; - - else if (*pos == opening_bracket) - { - ++balance; - ++pos; - } - else if (*pos == closing_bracket) - { - --balance; - ++pos; - } - else if (*pos == '\\') - { - ++pos; - if (loadAtPosition(in, memory, pos)) - ++pos; - } - else if (*pos == '"') - { - quotes = true; - ++pos; - } - - if (balance == 0) - ++number_of_rows; - } - } - - saveUpToPosition(in, memory, pos); - return {loadAtPosition(in, memory, pos), number_of_rows}; -} - -template -static String readJSONEachRowLineIntoStringImpl(ReadBuffer & in) -{ - Memory memory; - fileSegmentationEngineJSONEachRowImpl(in, memory, 0, 1); - return String(memory.data(), memory.size()); -} - -template -DataTypePtr getDataTypeFromJSONFieldImpl(const Element & field) -{ - if (field.isNull()) - return nullptr; - - if (field.isBool()) - return DataTypeFactory::instance().get("Nullable(Bool)"); - - if (field.isInt64() || field.isUInt64() || field.isDouble()) - return makeNullable(std::make_shared()); - - if (field.isString()) - return makeNullable(std::make_shared()); - - if (field.isArray()) - { - auto array = field.getArray(); - - /// Return nullptr in case of empty array because we cannot determine nested type. - if (array.size() == 0) - return nullptr; - - DataTypes nested_data_types; - /// If this array contains fields with different types we will treat it as Tuple. - bool is_tuple = false; - for (const auto element : array) - { - auto type = getDataTypeFromJSONFieldImpl(element); - if (!type) - return nullptr; - - if (!nested_data_types.empty() && type->getName() != nested_data_types.back()->getName()) - is_tuple = true; - - nested_data_types.push_back(std::move(type)); - } - - if (is_tuple) - return std::make_shared(nested_data_types); - - return std::make_shared(nested_data_types.back()); - } - - if (field.isObject()) - { - auto object = field.getObject(); - DataTypePtr value_type; - bool is_object = false; - for (const auto key_value_pair : object) - { - auto type = getDataTypeFromJSONFieldImpl(key_value_pair.second); - if (!type) - continue; - - if (isObject(type)) - { - is_object = true; - break; - } - - if (!value_type) - { - value_type = type; - } - else if (!value_type->equals(*type)) - { - is_object = true; - break; - } - } - - if (is_object) - return std::make_shared("json", true); - - if (value_type) - return std::make_shared(std::make_shared(), value_type); - - return nullptr; - } - - throw Exception{ErrorCodes::INCORRECT_DATA, "Unexpected JSON type"}; -} - -auto getJSONParserAndElement() -{ -#if USE_SIMDJSON - return std::pair(); -#elif USE_RAPIDJSON - return std::pair(); -#else - return std::pair(); -#endif -} - -DataTypePtr getDataTypeFromJSONField(const String & field) -{ - auto [parser, element] = getJSONParserAndElement(); - bool parsed = parser.parse(field, element); - if (!parsed) - throw Exception(ErrorCodes::INCORRECT_DATA, "Cannot parse JSON object"); - - return getDataTypeFromJSONFieldImpl(element); -} - -template -static DataTypes determineColumnDataTypesFromJSONEachRowDataImpl(ReadBuffer & in, bool /*json_strings*/, Extractor & extractor) -{ - String line = readJSONEachRowLineIntoStringImpl(in); - auto [parser, element] = getJSONParserAndElement(); - bool parsed = parser.parse(line, element); - if (!parsed) - throw Exception(ErrorCodes::INCORRECT_DATA, "Cannot parse JSON object"); - - auto fields = extractor.extract(element); - - DataTypes data_types; - data_types.reserve(fields.size()); - for (const auto & field : fields) - data_types.push_back(getDataTypeFromJSONFieldImpl(field)); - - /// TODO: For JSONStringsEachRow/JSONCompactStringsEach all types will be strings. - /// Should we try to parse data inside strings somehow in this case? - - return data_types; -} - -std::pair fileSegmentationEngineJSONEachRow(ReadBuffer & in, DB::Memory<> & memory, size_t min_chunk_size) -{ - return fileSegmentationEngineJSONEachRowImpl<'{', '}'>(in, memory, min_chunk_size, 1); -} - -std::pair fileSegmentationEngineJSONCompactEachRow(ReadBuffer & in, DB::Memory<> & memory, size_t min_chunk_size, size_t min_rows) -{ - return fileSegmentationEngineJSONEachRowImpl<'[', ']'>(in, memory, min_chunk_size, min_rows); -} - -struct JSONEachRowFieldsExtractor -{ - template - std::vector extract(const Element & element) - { - /// {..., "" : , ...} - - if (!element.isObject()) - throw Exception(ErrorCodes::INCORRECT_DATA, "Root JSON value is not an object"); - - auto object = element.getObject(); - std::vector fields; - fields.reserve(object.size()); - column_names.reserve(object.size()); - for (const auto & key_value_pair : object) - { - column_names.emplace_back(key_value_pair.first); - fields.push_back(key_value_pair.second); - } - - return fields; - } - - std::vector column_names; -}; - -NamesAndTypesList readRowAndGetNamesAndDataTypesForJSONEachRow(ReadBuffer & in, bool json_strings) -{ - JSONEachRowFieldsExtractor extractor; - auto data_types = determineColumnDataTypesFromJSONEachRowDataImpl(in, json_strings, extractor); - NamesAndTypesList result; - for (size_t i = 0; i != extractor.column_names.size(); ++i) - result.emplace_back(extractor.column_names[i], data_types[i]); - return result; -} - -struct JSONCompactEachRowFieldsExtractor -{ - template - std::vector extract(const Element & element) - { - /// [..., , ...] - if (!element.isArray()) - throw Exception(ErrorCodes::INCORRECT_DATA, "Root JSON value is not an array"); - - auto array = element.getArray(); - std::vector fields; - fields.reserve(array.size()); - for (size_t i = 0; i != array.size(); ++i) - fields.push_back(array[i]); - return fields; - } -}; - -DataTypes readRowAndGetDataTypesForJSONCompactEachRow(ReadBuffer & in, bool json_strings) -{ - JSONCompactEachRowFieldsExtractor extractor; - return determineColumnDataTypesFromJSONEachRowDataImpl(in, json_strings, extractor); -} - - -bool nonTrivialPrefixAndSuffixCheckerJSONEachRowImpl(ReadBuffer & buf) -{ - /// For JSONEachRow we can safely skip whitespace characters - skipWhitespaceIfAny(buf); - return buf.eof() || *buf.position() == '['; -} - -bool readFieldImpl(ReadBuffer & in, IColumn & column, const DataTypePtr & type, const SerializationPtr & serialization, const String & column_name, const FormatSettings & format_settings, bool yield_strings) -{ - try - { - bool as_nullable = format_settings.null_as_default && !type->isNullable() && !type->isLowCardinalityNullable(); - - if (yield_strings) - { - String str; - readJSONString(str, in); - - ReadBufferFromString buf(str); - - if (as_nullable) - return SerializationNullable::deserializeWholeTextImpl(column, buf, format_settings, serialization); - - serialization->deserializeWholeText(column, buf, format_settings); - return true; - } - - if (as_nullable) - return SerializationNullable::deserializeTextJSONImpl(column, in, format_settings, serialization); - - serialization->deserializeTextJSON(column, in, format_settings); - return true; - } - catch (Exception & e) - { - e.addMessage("(while reading the value of key " + column_name + ")"); - throw; - } -} - -DataTypePtr getCommonTypeForJSONFormats(const DataTypePtr & first, const DataTypePtr & second, bool allow_bools_as_numbers) -{ - if (allow_bools_as_numbers) - { - auto not_nullable_first = removeNullable(first); - auto not_nullable_second = removeNullable(second); - /// Check if we have Bool and Number and if so make the result type Number - bool bool_type_presents = isBool(not_nullable_first) || isBool(not_nullable_second); - bool number_type_presents = isNumber(not_nullable_first) || isNumber(not_nullable_second); - if (bool_type_presents && number_type_presents) - { - if (isBool(not_nullable_first)) - return second; - return first; - } - } - - /// If we have Map and Object, make result type Object - bool object_type_presents = isObject(first) || isObject(second); - bool map_type_presents = isMap(first) || isMap(second); - if (object_type_presents && map_type_presents) - { - if (isObject(first)) - return first; - return second; - } - - /// If we have different Maps, make result type Object - if (isMap(first) && isMap(second) && !first->equals(*second)) - return std::make_shared("json", true); - - return nullptr; -} - -} diff --git a/src/Formats/JSONEachRowUtils.h b/src/Formats/JSONEachRowUtils.h deleted file mode 100644 index 46c343f356a..00000000000 --- a/src/Formats/JSONEachRowUtils.h +++ /dev/null @@ -1,37 +0,0 @@ -#pragma once - -#include -#include -#include -#include -#include -#include - -namespace DB -{ - -std::pair fileSegmentationEngineJSONEachRow(ReadBuffer & in, DB::Memory<> & memory, size_t min_chunk_size); -std::pair fileSegmentationEngineJSONCompactEachRow(ReadBuffer & in, DB::Memory<> & memory, size_t min_chunk_size, size_t min_rows); - - -/// Parse JSON from string and convert it's type to ClickHouse type. Make the result type always Nullable. -/// JSON array with different nested types is treated as Tuple. -/// If cannot convert (for example when field contains null), return nullptr. -DataTypePtr getDataTypeFromJSONField(const String & field); - -/// Read row in JSONEachRow format and try to determine type for each field. -/// Return list of names and types. -/// If cannot determine the type of some field, return nullptr for it. -NamesAndTypesList readRowAndGetNamesAndDataTypesForJSONEachRow(ReadBuffer & in, bool json_strings); - -/// Read row in JSONCompactEachRow format and try to determine type for each field. -/// If cannot determine the type of some field, return nullptr for it. -DataTypes readRowAndGetDataTypesForJSONCompactEachRow(ReadBuffer & in, bool json_strings); - -bool nonTrivialPrefixAndSuffixCheckerJSONEachRowImpl(ReadBuffer & buf); - -bool readFieldImpl(ReadBuffer & in, IColumn & column, const DataTypePtr & type, const SerializationPtr & serialization, const String & column_name, const FormatSettings & format_settings, bool yield_strings); - -DataTypePtr getCommonTypeForJSONFormats(const DataTypePtr & first, const DataTypePtr & second, bool allow_bools_as_numbers); - -} diff --git a/src/Formats/JSONUtils.cpp b/src/Formats/JSONUtils.cpp new file mode 100644 index 00000000000..1ac58760516 --- /dev/null +++ b/src/Formats/JSONUtils.cpp @@ -0,0 +1,603 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +namespace DB +{ +namespace ErrorCodes +{ + extern const int INCORRECT_DATA; + extern const int LOGICAL_ERROR; +} + +namespace JSONUtils +{ + + template + static std::pair + fileSegmentationEngineJSONEachRowImpl(ReadBuffer & in, DB::Memory<> & memory, size_t min_chunk_size, size_t min_rows) + { + skipWhitespaceIfAny(in); + + char * pos = in.position(); + size_t balance = 0; + bool quotes = false; + size_t number_of_rows = 0; + + while (loadAtPosition(in, memory, pos) + && (balance || memory.size() + static_cast(pos - in.position()) < min_chunk_size || number_of_rows < min_rows)) + { + const auto current_object_size = memory.size() + static_cast(pos - in.position()); + if (min_chunk_size != 0 && current_object_size > 10 * min_chunk_size) + throw ParsingException( + "Size of JSON object is extremely large. Expected not greater than " + std::to_string(min_chunk_size) + + " bytes, but current is " + std::to_string(current_object_size) + + " bytes per row. Increase the value setting 'min_chunk_bytes_for_parallel_parsing' or check your data manually, most likely JSON is malformed", + ErrorCodes::INCORRECT_DATA); + + if (quotes) + { + pos = find_first_symbols<'\\', '"'>(pos, in.buffer().end()); + + if (pos > in.buffer().end()) + throw Exception("Position in buffer is out of bounds. There must be a bug.", ErrorCodes::LOGICAL_ERROR); + else if (pos == in.buffer().end()) + continue; + + if (*pos == '\\') + { + ++pos; + if (loadAtPosition(in, memory, pos)) + ++pos; + } + else if (*pos == '"') + { + ++pos; + quotes = false; + } + } + else + { + pos = find_first_symbols(pos, in.buffer().end()); + + if (pos > in.buffer().end()) + throw Exception("Position in buffer is out of bounds. There must be a bug.", ErrorCodes::LOGICAL_ERROR); + else if (pos == in.buffer().end()) + continue; + + else if (*pos == opening_bracket) + { + ++balance; + ++pos; + } + else if (*pos == closing_bracket) + { + --balance; + ++pos; + } + else if (*pos == '\\') + { + ++pos; + if (loadAtPosition(in, memory, pos)) + ++pos; + } + else if (*pos == '"') + { + quotes = true; + ++pos; + } + + if (balance == 0) + ++number_of_rows; + } + } + + saveUpToPosition(in, memory, pos); + return {loadAtPosition(in, memory, pos), number_of_rows}; + } + + template + static String readJSONEachRowLineIntoStringImpl(ReadBuffer & in) + { + Memory memory; + fileSegmentationEngineJSONEachRowImpl(in, memory, 0, 1); + return String(memory.data(), memory.size()); + } + + template + DataTypePtr getDataTypeFromFieldImpl(const Element & field) + { + if (field.isNull()) + return nullptr; + + if (field.isBool()) + return DataTypeFactory::instance().get("Nullable(Bool)"); + + if (field.isInt64() || field.isUInt64() || field.isDouble()) + return makeNullable(std::make_shared()); + + if (field.isString()) + return makeNullable(std::make_shared()); + + if (field.isArray()) + { + auto array = field.getArray(); + + /// Return nullptr in case of empty array because we cannot determine nested type. + if (array.size() == 0) + return nullptr; + + DataTypes nested_data_types; + /// If this array contains fields with different types we will treat it as Tuple. + bool is_tuple = false; + for (const auto element : array) + { + auto type = getDataTypeFromFieldImpl(element); + if (!type) + return nullptr; + + if (!nested_data_types.empty() && type->getName() != nested_data_types.back()->getName()) + is_tuple = true; + + nested_data_types.push_back(std::move(type)); + } + + if (is_tuple) + return std::make_shared(nested_data_types); + + return std::make_shared(nested_data_types.back()); + } + + if (field.isObject()) + { + auto object = field.getObject(); + DataTypePtr value_type; + bool is_object = false; + for (const auto key_value_pair : object) + { + auto type = getDataTypeFromFieldImpl(key_value_pair.second); + if (!type) + continue; + + if (isObject(type)) + { + is_object = true; + break; + } + + if (!value_type) + { + value_type = type; + } + else if (!value_type->equals(*type)) + { + is_object = true; + break; + } + } + + if (is_object) + return std::make_shared("json", true); + + if (value_type) + return std::make_shared(std::make_shared(), value_type); + + return nullptr; + } + + throw Exception{ErrorCodes::INCORRECT_DATA, "Unexpected JSON type"}; + } + + auto getJSONParserAndElement() + { +#if USE_SIMDJSON + return std::pair(); +#elif USE_RAPIDJSON + return std::pair(); +#else + return std::pair(); +#endif + } + + DataTypePtr getDataTypeFromField(const String & field) + { + auto [parser, element] = getJSONParserAndElement(); + bool parsed = parser.parse(field, element); + if (!parsed) + throw Exception(ErrorCodes::INCORRECT_DATA, "Cannot parse JSON object here: {}", field); + + return getDataTypeFromFieldImpl(element); + } + + template + static DataTypes determineColumnDataTypesFromJSONEachRowDataImpl(ReadBuffer & in, bool /*json_strings*/, Extractor & extractor) + { + String line = readJSONEachRowLineIntoStringImpl(in); + auto [parser, element] = getJSONParserAndElement(); + bool parsed = parser.parse(line, element); + if (!parsed) + throw Exception(ErrorCodes::INCORRECT_DATA, "Cannot parse JSON object here: {}", line); + + auto fields = extractor.extract(element); + + DataTypes data_types; + data_types.reserve(fields.size()); + for (const auto & field : fields) + data_types.push_back(getDataTypeFromFieldImpl(field)); + + /// TODO: For JSONStringsEachRow/JSONCompactStringsEach all types will be strings. + /// Should we try to parse data inside strings somehow in this case? + + return data_types; + } + + std::pair fileSegmentationEngineJSONEachRow(ReadBuffer & in, DB::Memory<> & memory, size_t min_chunk_size) + { + return fileSegmentationEngineJSONEachRowImpl<'{', '}'>(in, memory, min_chunk_size, 1); + } + + std::pair + fileSegmentationEngineJSONCompactEachRow(ReadBuffer & in, DB::Memory<> & memory, size_t min_chunk_size, size_t min_rows) + { + return fileSegmentationEngineJSONEachRowImpl<'[', ']'>(in, memory, min_chunk_size, min_rows); + } + + struct JSONEachRowFieldsExtractor + { + template + std::vector extract(const Element & element) + { + /// {..., "" : , ...} + + if (!element.isObject()) + throw Exception(ErrorCodes::INCORRECT_DATA, "Root JSON value is not an object"); + + auto object = element.getObject(); + std::vector fields; + fields.reserve(object.size()); + column_names.reserve(object.size()); + for (const auto & key_value_pair : object) + { + column_names.emplace_back(key_value_pair.first); + fields.push_back(key_value_pair.second); + } + + return fields; + } + + std::vector column_names; + }; + + NamesAndTypesList readRowAndGetNamesAndDataTypesForJSONEachRow(ReadBuffer & in, bool json_strings) + { + JSONEachRowFieldsExtractor extractor; + auto data_types + = determineColumnDataTypesFromJSONEachRowDataImpl(in, json_strings, extractor); + NamesAndTypesList result; + for (size_t i = 0; i != extractor.column_names.size(); ++i) + result.emplace_back(extractor.column_names[i], data_types[i]); + return result; + } + + struct JSONCompactEachRowFieldsExtractor + { + template + std::vector extract(const Element & element) + { + /// [..., , ...] + if (!element.isArray()) + throw Exception(ErrorCodes::INCORRECT_DATA, "Root JSON value is not an array"); + + auto array = element.getArray(); + std::vector fields; + fields.reserve(array.size()); + for (size_t i = 0; i != array.size(); ++i) + fields.push_back(array[i]); + return fields; + } + }; + + DataTypes readRowAndGetDataTypesForJSONCompactEachRow(ReadBuffer & in, bool json_strings) + { + JSONCompactEachRowFieldsExtractor extractor; + return determineColumnDataTypesFromJSONEachRowDataImpl(in, json_strings, extractor); + } + + + bool nonTrivialPrefixAndSuffixCheckerJSONEachRowImpl(ReadBuffer & buf) + { + /// For JSONEachRow we can safely skip whitespace characters + skipWhitespaceIfAny(buf); + return buf.eof() || *buf.position() == '['; + } + + bool readField( + ReadBuffer & in, + IColumn & column, + const DataTypePtr & type, + const SerializationPtr & serialization, + const String & column_name, + const FormatSettings & format_settings, + bool yield_strings) + { + try + { + bool as_nullable = format_settings.null_as_default && !type->isNullable() && !type->isLowCardinalityNullable(); + + if (yield_strings) + { + String str; + readJSONString(str, in); + + ReadBufferFromString buf(str); + + if (as_nullable) + return SerializationNullable::deserializeWholeTextImpl(column, buf, format_settings, serialization); + + serialization->deserializeWholeText(column, buf, format_settings); + return true; + } + + if (as_nullable) + return SerializationNullable::deserializeTextJSONImpl(column, in, format_settings, serialization); + + serialization->deserializeTextJSON(column, in, format_settings); + return true; + } + catch (Exception & e) + { + e.addMessage("(while reading the value of key " + column_name + ")"); + throw; + } + } + + DataTypePtr getCommonTypeForJSONFormats(const DataTypePtr & first, const DataTypePtr & second, bool allow_bools_as_numbers) + { + if (allow_bools_as_numbers) + { + auto not_nullable_first = removeNullable(first); + auto not_nullable_second = removeNullable(second); + /// Check if we have Bool and Number and if so make the result type Number + bool bool_type_presents = isBool(not_nullable_first) || isBool(not_nullable_second); + bool number_type_presents = isNumber(not_nullable_first) || isNumber(not_nullable_second); + if (bool_type_presents && number_type_presents) + { + if (isBool(not_nullable_first)) + return second; + return first; + } + } + + /// If we have Map and Object, make result type Object + bool object_type_presents = isObject(first) || isObject(second); + bool map_type_presents = isMap(first) || isMap(second); + if (object_type_presents && map_type_presents) + { + if (isObject(first)) + return first; + return second; + } + + /// If we have different Maps, make result type Object + if (isMap(first) && isMap(second) && !first->equals(*second)) + return std::make_shared("json", true); + + return nullptr; + } + + void writeFieldDelimiter(WriteBuffer & out, size_t new_lines) + { + writeChar(',', out); + writeChar('\n', new_lines, out); + } + + void writeFieldCompactDelimiter(WriteBuffer & out) { writeCString(", ", out); } + + template + void writeTitle(const char * title, WriteBuffer & out, size_t indent) + { + writeChar('\t', indent, out); + writeChar('"', out); + writeCString(title, out); + if constexpr (with_space) + writeCString("\": ", out); + else + writeCString("\":\n", out); + } + + void writeObjectStart(WriteBuffer & out, size_t indent, const char * title) + { + if (title) + writeTitle(title, out, indent); + writeChar('\t', indent, out); + writeCString("{\n", out); + } + + void writeObjectEnd(WriteBuffer & out, size_t indent) + { + writeChar('\n', out); + writeChar('\t', indent, out); + writeChar('}', out); + } + + void writeArrayStart(WriteBuffer & out, size_t indent, const char * title) + { + if (title) + writeTitle(title, out, indent); + writeChar('\t', indent, out); + writeCString("[\n", out); + } + + void writeCompactArrayStart(WriteBuffer & out, size_t indent, const char * title) + { + if (title) + writeTitle(title, out, indent); + else + writeChar('\t', indent, out); + writeCString("[", out); + } + + void writeArrayEnd(WriteBuffer & out, size_t indent) + { + writeChar('\n', out); + writeChar('\t', indent, out); + writeChar(']', out); + } + + void writeCompactArrayEnd(WriteBuffer & out) { writeChar(']', out); } + + void writeFieldFromColumn( + const IColumn & column, + const ISerialization & serialization, + size_t row_num, + bool yield_strings, + const FormatSettings & settings, + WriteBuffer & out, + const std::optional & name, + size_t indent) + { + if (name.has_value()) + writeTitle(name->data(), out, indent); + + if (yield_strings) + { + WriteBufferFromOwnString buf; + + serialization.serializeText(column, row_num, buf, settings); + writeJSONString(buf.str(), out, settings); + } + else + serialization.serializeTextJSON(column, row_num, out, settings); + } + + void writeColumns( + const Columns & columns, + const NamesAndTypes & fields, + const Serializations & serializations, + size_t row_num, + bool yield_strings, + const FormatSettings & settings, + WriteBuffer & out, + size_t indent) + { + for (size_t i = 0; i < columns.size(); ++i) + { + if (i != 0) + writeFieldDelimiter(out); + writeFieldFromColumn(*columns[i], *serializations[i], row_num, yield_strings, settings, out, fields[i].name, indent); + } + } + + void writeCompactColumns( + const Columns & columns, + const Serializations & serializations, + size_t row_num, + bool yield_strings, + const FormatSettings & settings, + WriteBuffer & out) + { + for (size_t i = 0; i < columns.size(); ++i) + { + if (i != 0) + writeFieldCompactDelimiter(out); + writeFieldFromColumn(*columns[i], *serializations[i], row_num, yield_strings, settings, out); + } + } + + void writeMetadata(const NamesAndTypes & fields, const FormatSettings & settings, WriteBuffer & out) + { + writeArrayStart(out, 1, "meta"); + + for (size_t i = 0; i < fields.size(); ++i) + { + writeObjectStart(out, 2); + + writeTitle("name", out, 3); + writeDoubleQuoted(fields[i].name, out); + writeFieldDelimiter(out); + writeTitle("type", out, 3); + writeJSONString(fields[i].type->getName(), out, settings); + writeObjectEnd(out, 2); + + if (i + 1 < fields.size()) + writeFieldDelimiter(out); + } + + writeArrayEnd(out, 1); + } + + void writeAdditionalInfo( + size_t rows, + size_t rows_before_limit, + bool applied_limit, + const Stopwatch & watch, + const Progress & progress, + bool write_statistics, + WriteBuffer & out) + { + writeFieldDelimiter(out, 2); + writeTitle("rows", out, 1); + writeIntText(rows, out); + + if (applied_limit) + { + writeFieldDelimiter(out, 2); + writeTitle("rows_before_limit_at_least", out, 1); + writeIntText(rows_before_limit, out); + } + + if (write_statistics) + { + writeFieldDelimiter(out, 2); + writeObjectStart(out, 1, "statistics"); + + writeTitle("elapsed", out, 2); + writeText(watch.elapsedSeconds(), out); + writeFieldDelimiter(out); + + writeTitle("rows_read", out, 2); + writeText(progress.read_rows.load(), out); + writeFieldDelimiter(out); + + writeTitle("bytes_read", out, 2); + writeText(progress.read_bytes.load(), out); + + writeObjectEnd(out, 1); + } + } + + void makeNamesAndTypesWithValidUTF8(NamesAndTypes & fields, const FormatSettings & settings, bool & need_validate_utf8) + { + for (auto & field : fields) + { + if (!field.type->textCanContainOnlyValidUTF8()) + need_validate_utf8 = true; + + WriteBufferFromOwnString buf; + { + WriteBufferValidUTF8 validating_buf(buf); + writeJSONString(field.name, validating_buf, settings); + } + field.name = buf.str().substr(1, buf.str().size() - 2); + } + } + +} + +} diff --git a/src/Formats/JSONUtils.h b/src/Formats/JSONUtils.h new file mode 100644 index 00000000000..f2aba3cbcb5 --- /dev/null +++ b/src/Formats/JSONUtils.h @@ -0,0 +1,109 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace JSONUtils +{ + std::pair fileSegmentationEngineJSONEachRow(ReadBuffer & in, DB::Memory<> & memory, size_t min_chunk_size); + std::pair + fileSegmentationEngineJSONCompactEachRow(ReadBuffer & in, DB::Memory<> & memory, size_t min_chunk_size, size_t min_rows); + + /// Parse JSON from string and convert it's type to ClickHouse type. Make the result type always Nullable. + /// JSON array with different nested types is treated as Tuple. + /// If cannot convert (for example when field contains null), return nullptr. + DataTypePtr getDataTypeFromField(const String & field); + + /// Read row in JSONEachRow format and try to determine type for each field. + /// Return list of names and types. + /// If cannot determine the type of some field, return nullptr for it. + NamesAndTypesList readRowAndGetNamesAndDataTypesForJSONEachRow(ReadBuffer & in, bool json_strings); + + /// Read row in JSONCompactEachRow format and try to determine type for each field. + /// If cannot determine the type of some field, return nullptr for it. + DataTypes readRowAndGetDataTypesForJSONCompactEachRow(ReadBuffer & in, bool json_strings); + + bool nonTrivialPrefixAndSuffixCheckerJSONEachRowImpl(ReadBuffer & buf); + + bool readField( + ReadBuffer & in, + IColumn & column, + const DataTypePtr & type, + const SerializationPtr & serialization, + const String & column_name, + const FormatSettings & format_settings, + bool yield_strings); + + DataTypePtr getCommonTypeForJSONFormats(const DataTypePtr & first, const DataTypePtr & second, bool allow_bools_as_numbers); + + void makeNamesAndTypesWithValidUTF8(NamesAndTypes & fields, const FormatSettings & settings, bool & need_validate_utf8); + + /// Functions helpers for writing JSON data to WriteBuffer. + + void writeFieldDelimiter(WriteBuffer & out, size_t new_lines = 1); + + void writeFieldCompactDelimiter(WriteBuffer & out); + + void writeObjectStart(WriteBuffer & out, size_t indent = 0, const char * title = nullptr); + + void writeObjectEnd(WriteBuffer & out, size_t indent = 0); + + void writeArrayStart(WriteBuffer & out, size_t indent = 0, const char * title = nullptr); + + void writeCompactArrayStart(WriteBuffer & out, size_t indent = 0, const char * title = nullptr); + + void writeArrayEnd(WriteBuffer & out, size_t indent = 0); + + void writeCompactArrayEnd(WriteBuffer & out); + + void writeFieldFromColumn( + const IColumn & column, + const ISerialization & serialization, + size_t row_num, + bool yield_strings, + const FormatSettings & settings, + WriteBuffer & out, + const std::optional & name = std::nullopt, + size_t indent = 0); + + void writeColumns( + const Columns & columns, + const NamesAndTypes & fields, + const Serializations & serializations, + size_t row_num, + bool yield_strings, + const FormatSettings & settings, + WriteBuffer & out, + size_t indent = 0); + + void writeCompactColumns( + const Columns & columns, + const Serializations & serializations, + size_t row_num, + bool yield_strings, + const FormatSettings & settings, + WriteBuffer & out); + + void writeMetadata(const NamesAndTypes & fields, const FormatSettings & settings, WriteBuffer & out); + + void writeAdditionalInfo( + size_t rows, + size_t rows_before_limit, + bool applied_limit, + const Stopwatch & watch, + const Progress & progress, + bool write_statistics, + WriteBuffer & out); +} + +} diff --git a/src/Formats/ReadSchemaUtils.cpp b/src/Formats/ReadSchemaUtils.cpp index 26944f85014..035546031d8 100644 --- a/src/Formats/ReadSchemaUtils.cpp +++ b/src/Formats/ReadSchemaUtils.cpp @@ -73,11 +73,15 @@ ColumnsDescription readSchemaFromFormat( { std::string exception_messages; SchemaReaderPtr schema_reader; + size_t max_rows_to_read = format_settings ? format_settings->max_rows_to_read_for_schema_inference : context->getSettingsRef().input_format_max_rows_to_read_for_schema_inference; + size_t iterations = 0; while ((buf = read_buffer_iterator())) { + ++iterations; + if (buf->eof()) { - auto exception_message = fmt::format("Cannot extract table structure from {} format file, file is emptyg", format_name); + auto exception_message = fmt::format("Cannot extract table structure from {} format file, file is empty", format_name); if (!retry) throw Exception(ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, exception_message); @@ -89,12 +93,26 @@ ColumnsDescription readSchemaFromFormat( try { schema_reader = FormatFactory::instance().getSchemaReader(format_name, *buf, context, format_settings); + schema_reader->setMaxRowsToRead(max_rows_to_read); names_and_types = schema_reader->readSchema(); break; } catch (...) { auto exception_message = getCurrentExceptionMessage(false); + size_t rows_read = schema_reader->getNumRowsRead(); + assert(rows_read <= max_rows_to_read); + max_rows_to_read -= schema_reader->getNumRowsRead(); + if (rows_read != 0 && max_rows_to_read == 0) + { + exception_message += "\nTo increase the maximum number of rows to read for structure determination, use setting input_format_max_rows_to_read_for_schema_inference"; + if (iterations > 1) + { + exception_messages += "\n" + exception_message; + break; + } + retry = false; + } if (!retry || !isRetryableSchemaInferenceError(getCurrentExceptionCode())) throw Exception(ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, "Cannot extract table structure from {} format file. Error: {}", format_name, exception_message); diff --git a/src/Formats/registerFormats.cpp b/src/Formats/registerFormats.cpp index 6797b967baa..8493c84173d 100644 --- a/src/Formats/registerFormats.cpp +++ b/src/Formats/registerFormats.cpp @@ -38,6 +38,10 @@ void registerInputFormatJSONEachRow(FormatFactory & factory); void registerOutputFormatJSONEachRow(FormatFactory & factory); void registerInputFormatJSONCompactEachRow(FormatFactory & factory); void registerOutputFormatJSONCompactEachRow(FormatFactory & factory); +void registerInputFormatJSONColumns(FormatFactory & factory); +void registerOutputFormatJSONColumns(FormatFactory & factory); +void registerInputFormatJSONCompactColumns(FormatFactory & factory); +void registerOutputFormatJSONCompactColumns(FormatFactory & factory); void registerInputFormatProtobuf(FormatFactory & factory); void registerOutputFormatProtobuf(FormatFactory & factory); void registerInputFormatProtobufList(FormatFactory & factory); @@ -70,6 +74,7 @@ void registerOutputFormatVertical(FormatFactory & factory); void registerOutputFormatJSON(FormatFactory & factory); void registerOutputFormatJSONCompact(FormatFactory & factory); void registerOutputFormatJSONEachRowWithProgress(FormatFactory & factory); +void registerOutputFormatJSONColumnsWithMetadata(FormatFactory & factory); void registerOutputFormatXML(FormatFactory & factory); void registerOutputFormatODBCDriver2(FormatFactory & factory); void registerOutputFormatNull(FormatFactory & factory); @@ -102,14 +107,16 @@ void registerTSVSchemaReader(FormatFactory & factory); void registerCSVSchemaReader(FormatFactory & factory); void registerJSONCompactEachRowSchemaReader(FormatFactory & factory); void registerJSONEachRowSchemaReader(FormatFactory & factory); +void registerJSONAsStringSchemaReader(FormatFactory & factory); +void registerJSONAsObjectSchemaReader(FormatFactory & factory); +void registerJSONColumnsSchemaReader(FormatFactory & factory); +void registerJSONCompactColumnsSchemaReader(FormatFactory & factory); void registerNativeSchemaReader(FormatFactory & factory); void registerRowBinaryWithNamesAndTypesSchemaReader(FormatFactory & factory); void registerAvroSchemaReader(FormatFactory & factory); void registerProtobufSchemaReader(FormatFactory & factory); void registerProtobufListSchemaReader(FormatFactory & factory); void registerLineAsStringSchemaReader(FormatFactory & factory); -void registerJSONAsStringSchemaReader(FormatFactory & factory); -void registerJSONAsObjectSchemaReader(FormatFactory & factory); void registerRawBLOBSchemaReader(FormatFactory & factory); void registerMsgPackSchemaReader(FormatFactory & factory); void registerCapnProtoSchemaReader(FormatFactory & factory); @@ -120,6 +127,7 @@ void registerValuesSchemaReader(FormatFactory & factory); void registerTemplateSchemaReader(FormatFactory & factory); void registerMySQLSchemaReader(FormatFactory & factory); + void registerFileExtensions(FormatFactory & factory); void registerFormats() @@ -128,8 +136,8 @@ void registerFormats() registerFileSegmentationEngineTabSeparated(factory); registerFileSegmentationEngineCSV(factory); - registerFileSegmentationEngineJSONEachRow(factory); registerFileSegmentationEngineRegexp(factory); + registerFileSegmentationEngineJSONEachRow(factory); registerFileSegmentationEngineJSONAsString(factory); registerFileSegmentationEngineJSONAsObject(factory); registerFileSegmentationEngineJSONCompactEachRow(factory); @@ -155,6 +163,10 @@ void registerFormats() registerOutputFormatJSONEachRow(factory); registerInputFormatJSONCompactEachRow(factory); registerOutputFormatJSONCompactEachRow(factory); + registerInputFormatJSONColumns(factory); + registerOutputFormatJSONColumns(factory); + registerInputFormatJSONCompactColumns(factory); + registerOutputFormatJSONCompactColumns(factory); registerInputFormatProtobuf(factory); registerOutputFormatProtobufList(factory); registerInputFormatProtobufList(factory); @@ -184,6 +196,7 @@ void registerFormats() registerOutputFormatJSON(factory); registerOutputFormatJSONCompact(factory); registerOutputFormatJSONEachRowWithProgress(factory); + registerOutputFormatJSONColumnsWithMetadata(factory); registerOutputFormatXML(factory); registerOutputFormatODBCDriver2(factory); registerOutputFormatNull(factory); @@ -195,8 +208,8 @@ void registerFormats() registerInputFormatRegexp(factory); registerInputFormatJSONAsString(factory); - registerInputFormatLineAsString(factory); registerInputFormatJSONAsObject(factory); + registerInputFormatLineAsString(factory); #if USE_HIVE registerInputFormatHiveText(factory); #endif @@ -215,14 +228,16 @@ void registerFormats() registerCSVSchemaReader(factory); registerJSONCompactEachRowSchemaReader(factory); registerJSONEachRowSchemaReader(factory); + registerJSONAsStringSchemaReader(factory); + registerJSONAsObjectSchemaReader(factory); + registerJSONColumnsSchemaReader(factory); + registerJSONCompactColumnsSchemaReader(factory); registerNativeSchemaReader(factory); registerRowBinaryWithNamesAndTypesSchemaReader(factory); registerAvroSchemaReader(factory); registerProtobufSchemaReader(factory); registerProtobufListSchemaReader(factory); registerLineAsStringSchemaReader(factory); - registerJSONAsStringSchemaReader(factory); - registerJSONAsObjectSchemaReader(factory); registerRawBLOBSchemaReader(factory); registerMsgPackSchemaReader(factory); registerCapnProtoSchemaReader(factory); diff --git a/src/Functions/CMakeLists.txt b/src/Functions/CMakeLists.txt index a84e5a3f526..60386908f01 100644 --- a/src/Functions/CMakeLists.txt +++ b/src/Functions/CMakeLists.txt @@ -23,6 +23,7 @@ target_link_libraries(clickhouse_functions dbms ch_contrib::metrohash ch_contrib::murmurhash + ch_contrib::hashidsxx PRIVATE ch_contrib::zlib diff --git a/src/Functions/FunctionHashID.cpp b/src/Functions/FunctionHashID.cpp new file mode 100644 index 00000000000..bd875a9d4ff --- /dev/null +++ b/src/Functions/FunctionHashID.cpp @@ -0,0 +1,12 @@ +#include "FunctionHashID.h" +#include + +namespace DB +{ + +void registerFunctionHashID(FunctionFactory & factory) +{ + factory.registerFunction(); +} + +} diff --git a/src/Functions/FunctionHashID.h b/src/Functions/FunctionHashID.h new file mode 100644 index 00000000000..fbfb368bec7 --- /dev/null +++ b/src/Functions/FunctionHashID.h @@ -0,0 +1,169 @@ +#pragma once + +#include + +#include + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; + extern const int ILLEGAL_COLUMN; + extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int SUPPORT_IS_DISABLED; + extern const int TOO_MANY_ARGUMENTS_FOR_FUNCTION; + extern const int TOO_FEW_ARGUMENTS_FOR_FUNCTION; +} + +// hashid(string, salt) +class FunctionHashID : public IFunction +{ +public: + static constexpr auto name = "hashid"; + + static FunctionPtr create(ContextPtr context) + { + if (!context->getSettingsRef().allow_experimental_hash_functions) + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, + "Hashing function '{}' is experimental. Set `allow_experimental_hash_functions` setting to enable it", name); + + return std::make_shared(); + } + + String getName() const override { return name; } + + size_t getNumberOfArguments() const override { return 0; } + + bool isVariadic() const override { return true; } + + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; } + + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override + { + if (arguments.size() < 1) + throw Exception(ErrorCodes::TOO_FEW_ARGUMENTS_FOR_FUNCTION, "Function {} expects at least one argument", getName()); + + const auto & id_col = arguments[0]; + if (!isUnsignedInteger(id_col.type)) + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "First argument of function {} must be unsigned integer, got {}", + getName(), + arguments[0].type->getName()); + + if (arguments.size() > 1) + { + const auto & hash_col = arguments[1]; + if (!isString(hash_col.type) || !isColumnConst(*hash_col.column.get())) + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Second argument of function {} must be String, got {}", + getName(), + arguments[1].type->getName()); + } + + if (arguments.size() > 2) + { + const auto & min_length_col = arguments[2]; + if (!isUInt8(min_length_col.type) || !isColumnConst(*min_length_col.column.get())) + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Third argument of function {} must be UInt8, got {}", + getName(), + arguments[2].type->getName()); + } + + if (arguments.size() > 3) + { + const auto & alphabet_col = arguments[3]; + if (!isString(alphabet_col.type) || !isColumnConst(*alphabet_col.column.get())) + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Fourth argument of function {} must be String, got {}", + getName(), + arguments[3].type->getName()); + } + + if (arguments.size() > 4) + { + throw Exception( + ErrorCodes::TOO_MANY_ARGUMENTS_FOR_FUNCTION, + "Function {} expect no more than four arguments (integer, salt, min_length, optional_alphabet), got {}", + getName(), + arguments.size()); + } + + return std::make_shared(); + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override + { + const auto & numcolumn = arguments[0].column; + + if (checkAndGetColumn(numcolumn.get()) || checkAndGetColumn(numcolumn.get()) + || checkAndGetColumn(numcolumn.get()) || checkAndGetColumn(numcolumn.get()) + || checkAndGetColumnConst(numcolumn.get()) || checkAndGetColumnConst(numcolumn.get()) + || checkAndGetColumnConst(numcolumn.get()) || checkAndGetColumnConst(numcolumn.get())) + { + std::string salt; + UInt8 minLength = 0; + std::string alphabet; + + if (arguments.size() >= 4) + { + const auto & alphabetcolumn = arguments[3].column; + if (auto alpha_col = checkAndGetColumnConst(alphabetcolumn.get())) + { + alphabet = alpha_col->getValue(); + if (alphabet.find('\0') != std::string::npos) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Custom alphabet must not contain null character"); + } + } + else + alphabet.assign(DEFAULT_ALPHABET); + + if (arguments.size() >= 3) + { + const auto & minlengthcolumn = arguments[2].column; + if (auto min_length_col = checkAndGetColumnConst(minlengthcolumn.get())) + minLength = min_length_col->getValue(); + } + + if (arguments.size() >= 2) + { + const auto & saltcolumn = arguments[1].column; + if (auto salt_col = checkAndGetColumnConst(saltcolumn.get())) + salt = salt_col->getValue(); + } + + hashidsxx::Hashids hash(salt, minLength, alphabet); + + auto col_res = ColumnString::create(); + + for (size_t i = 0; i < input_rows_count; ++i) + { + col_res->insert(hash.encode({numcolumn->getUInt(i)})); + } + + return col_res; + } + else + throw Exception( + "Illegal column " + arguments[0].column->getName() + " of first argument of function hashid", ErrorCodes::ILLEGAL_COLUMN); + } +}; + +} diff --git a/src/Functions/FunctionUnaryArithmetic.h b/src/Functions/FunctionUnaryArithmetic.h index 3207b80af71..4dc769b8177 100644 --- a/src/Functions/FunctionUnaryArithmetic.h +++ b/src/Functions/FunctionUnaryArithmetic.h @@ -13,6 +13,7 @@ #include #include +#include #if USE_EMBEDDED_COMPILER # pragma GCC diagnostic push @@ -41,11 +42,31 @@ struct UnaryOperationImpl using ArrayA = typename ColVecA::Container; using ArrayC = typename ColVecC::Container; - static void NO_INLINE vector(const ArrayA & a, ArrayC & c) + MULTITARGET_FUNCTION_WRAPPER_AVX2_SSE42(vectorImpl, + MULTITARGET_FH( + static void NO_INLINE), /*vectorImpl*/ MULTITARGET_FB((const ArrayA & a, ArrayC & c) /// NOLINT { size_t size = a.size(); for (size_t i = 0; i < size; ++i) c[i] = Op::apply(a[i]); + })) + + static void NO_INLINE vector(const ArrayA & a, ArrayC & c) + { +#if USE_MULTITARGET_CODE + if (isArchSupported(TargetArch::AVX2)) + { + vectorImplAVX2(a, c); + return; + } + else if (isArchSupported(TargetArch::SSE42)) + { + vectorImplSSE42(a, c); + return; + } +#endif + + vectorImpl(a, c); } static void constant(A a, ResultType & c) @@ -58,11 +79,31 @@ struct UnaryOperationImpl template struct FixedStringUnaryOperationImpl { - static void NO_INLINE vector(const ColumnFixedString::Chars & a, ColumnFixedString::Chars & c) + MULTITARGET_FUNCTION_WRAPPER_AVX2_SSE42(vectorImpl, + MULTITARGET_FH( + static void NO_INLINE), /*vectorImpl*/ MULTITARGET_FB((const ColumnFixedString::Chars & a, ColumnFixedString::Chars & c) /// NOLINT { size_t size = a.size(); for (size_t i = 0; i < size; ++i) c[i] = Op::apply(a[i]); + })) + + static void NO_INLINE vector(const ColumnFixedString::Chars & a, ColumnFixedString::Chars & c) + { +#if USE_MULTITARGET_CODE + if (isArchSupported(TargetArch::AVX2)) + { + vectorImplAVX2(a, c); + return; + } + else if (isArchSupported(TargetArch::SSE42)) + { + vectorImplSSE42(a, c); + return; + } +#endif + + vectorImpl(a, c); } }; diff --git a/src/Functions/FunctionsCodingIP.cpp b/src/Functions/FunctionsCodingIP.cpp index c141d1bf102..1fdbbf3e9bb 100644 --- a/src/Functions/FunctionsCodingIP.cpp +++ b/src/Functions/FunctionsCodingIP.cpp @@ -265,9 +265,11 @@ public: bool useDefaultImplementationForConstants() const override { return true; } + bool useDefaultImplementationForNulls() const override { return false; } + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override { - if (!isStringOrFixedString(arguments[0])) + if (!isStringOrFixedString(removeNullable(arguments[0]))) { throw Exception( ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument of function {}", arguments[0]->getName(), getName()); @@ -280,22 +282,37 @@ public: return makeNullable(result_type); } - return result_type; + return arguments[0]->isNullable() ? makeNullable(result_type) : result_type; } ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override { - const ColumnPtr & column = arguments[0].column; + ColumnPtr column = arguments[0].column; + ColumnPtr null_map_column; + const NullMap * null_map = nullptr; + if (column->isNullable()) + { + const auto * column_nullable = assert_cast(column.get()); + column = column_nullable->getNestedColumnPtr(); + null_map_column = column_nullable->getNullMapColumnPtr(); + null_map = &column_nullable->getNullMapData(); + } if constexpr (exception_mode == IPStringToNumExceptionMode::Throw) { if (cast_ipv4_ipv6_default_on_conversion_error) { - return convertToIPv6(column); + auto result = convertToIPv6(column, null_map); + if (null_map && !result->isNullable()) + return ColumnNullable::create(result, null_map_column); + return result; } } - return convertToIPv6(column); + auto result = convertToIPv6(column, null_map); + if (null_map && !result->isNullable()) + return ColumnNullable::create(IColumn::mutate(result), IColumn::mutate(null_map_column)); + return result; } private: @@ -390,9 +407,11 @@ public: bool useDefaultImplementationForConstants() const override { return true; } + bool useDefaultImplementationForNulls() const override { return false; } + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override { - if (!isString(arguments[0])) + if (!isString(removeNullable(arguments[0]))) { throw Exception( ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument of function {}", arguments[0]->getName(), getName()); @@ -405,22 +424,37 @@ public: return makeNullable(result_type); } - return result_type; + return arguments[0]->isNullable() ? makeNullable(result_type) : result_type; } ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override { - const ColumnPtr & column = arguments[0].column; + ColumnPtr column = arguments[0].column; + ColumnPtr null_map_column; + const NullMap * null_map = nullptr; + if (column->isNullable()) + { + const auto * column_nullable = assert_cast(column.get()); + column = column_nullable->getNestedColumnPtr(); + null_map_column = column_nullable->getNullMapColumnPtr(); + null_map = &column_nullable->getNullMapData(); + } if constexpr (exception_mode == IPStringToNumExceptionMode::Throw) { if (cast_ipv4_ipv6_default_on_conversion_error) { - return convertToIPv4(column); + auto result = convertToIPv4(column, null_map); + if (null_map && !result->isNullable()) + return ColumnNullable::create(result, null_map_column); + return result; } } - return convertToIPv4(column); + auto result = convertToIPv4(column, null_map); + if (null_map && !result->isNullable()) + return ColumnNullable::create(IColumn::mutate(result), IColumn::mutate(null_map_column)); + return result; } private: @@ -506,7 +540,7 @@ public: DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override { - if (!isString(arguments[0])) + if (!isString(removeNullable(arguments[0]))) { throw Exception( ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument of function {}", arguments[0]->getName(), getName()); @@ -519,7 +553,7 @@ public: return makeNullable(result_type); } - return result_type; + return arguments[0]->isNullable() ? makeNullable(result_type) : result_type; } }; @@ -543,7 +577,7 @@ public: DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override { - if (!isStringOrFixedString(arguments[0])) + if (!isStringOrFixedString(removeNullable(arguments[0]))) { throw Exception( ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument of function {}", arguments[0]->getName(), getName()); @@ -556,7 +590,7 @@ public: return makeNullable(result_type); } - return result_type; + return arguments[0]->isNullable() ? makeNullable(result_type) : result_type; } }; diff --git a/src/Functions/FunctionsCodingIP.h b/src/Functions/FunctionsCodingIP.h index 246e62d965c..f25b25b12f0 100644 --- a/src/Functions/FunctionsCodingIP.h +++ b/src/Functions/FunctionsCodingIP.h @@ -31,7 +31,7 @@ static inline bool tryParseIPv4(const char * pos, UInt32 & result_value) namespace detail { template - ColumnPtr convertToIPv6(const StringColumnType & string_column) + ColumnPtr convertToIPv6(const StringColumnType & string_column, const PaddedPODArray * null_map = nullptr) { size_t column_size = string_column.size(); @@ -85,6 +85,15 @@ namespace detail src_next_offset += fixed_string_size; } + if (null_map && (*null_map)[i]) + { + std::fill_n(&vec_res[i], IPV6_BINARY_LENGTH, 0); + src_offset = src_next_offset; + if constexpr (exception_mode == IPStringToNumExceptionMode::Null) + (*vec_null_map_to)[i] = true; + continue; + } + bool parse_result = false; UInt32 dummy_result = 0; @@ -126,7 +135,7 @@ namespace detail } template -ColumnPtr convertToIPv6(ColumnPtr column) +ColumnPtr convertToIPv6(ColumnPtr column, const PaddedPODArray * null_map = nullptr) { size_t column_size = column->size(); @@ -137,11 +146,11 @@ ColumnPtr convertToIPv6(ColumnPtr column) if (const auto * column_input_string = checkAndGetColumn(column.get())) { - return detail::convertToIPv6(*column_input_string); + return detail::convertToIPv6(*column_input_string, null_map); } else if (const auto * column_input_fixed_string = checkAndGetColumn(column.get())) { - return detail::convertToIPv6(*column_input_fixed_string); + return detail::convertToIPv6(*column_input_fixed_string, null_map); } else { @@ -150,7 +159,7 @@ ColumnPtr convertToIPv6(ColumnPtr column) } template -ColumnPtr convertToIPv4(ColumnPtr column) +ColumnPtr convertToIPv4(ColumnPtr column, const PaddedPODArray * null_map = nullptr) { const ColumnString * column_string = checkAndGetColumn(column.get()); @@ -181,6 +190,15 @@ ColumnPtr convertToIPv4(ColumnPtr column) for (size_t i = 0; i < vec_res.size(); ++i) { + if (null_map && (*null_map)[i]) + { + vec_res[i] = 0; + prev_offset = offsets_src[i]; + if constexpr (exception_mode == IPStringToNumExceptionMode::Null) + (*vec_null_map_to)[i] = true; + continue; + } + bool parse_result = tryParseIPv4(reinterpret_cast(&vec_src[prev_offset]), vec_res[i]); if (!parse_result) diff --git a/src/Functions/FunctionsComparison.h b/src/Functions/FunctionsComparison.h index 48170d6f564..16575e551a7 100644 --- a/src/Functions/FunctionsComparison.h +++ b/src/Functions/FunctionsComparison.h @@ -2,6 +2,7 @@ #include #include +#include #include #include @@ -84,8 +85,8 @@ struct NumComparisonImpl using ContainerA = PaddedPODArray; using ContainerB = PaddedPODArray; - /// If you don't specify NO_INLINE, the compiler will inline this function, but we don't need this as this function contains tight loop inside. - static void NO_INLINE vectorVector(const ContainerA & a, const ContainerB & b, PaddedPODArray & c) + MULTITARGET_FUNCTION_WRAPPER_AVX2_SSE42(vectorVectorImpl, + MULTITARGET_FH(static void), /*vectorVectorImpl*/ MULTITARGET_FB((const ContainerA & a, const ContainerB & b, PaddedPODArray & c) /// NOLINT { /** GCC 4.8.2 vectorizes a loop only if it is written in this form. * In this case, if you loop through the array index (the code will look simpler), @@ -105,9 +106,29 @@ struct NumComparisonImpl ++b_pos; ++c_pos; } + })) + + static void NO_INLINE vectorVector(const ContainerA & a, const ContainerB & b, PaddedPODArray & c) + { +#if USE_MULTITARGET_CODE + if (isArchSupported(TargetArch::AVX2)) + { + vectorVectorImplAVX2(a, b, c); + return; + } + else if (isArchSupported(TargetArch::SSE42)) + { + vectorVectorImplSSE42(a, b, c); + return; + } +#endif + + vectorVectorImpl(a, b, c); } - static void NO_INLINE vectorConstant(const ContainerA & a, B b, PaddedPODArray & c) + + MULTITARGET_FUNCTION_WRAPPER_AVX2_SSE42(vectorConstantImpl, + MULTITARGET_FH(static void), /*vectorConstantImpl*/ MULTITARGET_FB((const ContainerA & a, B b, PaddedPODArray & c) /// NOLINT { size_t size = a.size(); const A * __restrict a_pos = a.data(); @@ -120,6 +141,24 @@ struct NumComparisonImpl ++a_pos; ++c_pos; } + })) + + static void NO_INLINE vectorConstant(const ContainerA & a, B b, PaddedPODArray & c) + { +#if USE_MULTITARGET_CODE + if (isArchSupported(TargetArch::AVX2)) + { + vectorConstantImplAVX2(a, b, c); + return; + } + else if (isArchSupported(TargetArch::SSE42)) + { + vectorConstantImplSSE42(a, b, c); + return; + } +#endif + + vectorConstantImpl(a, b, c); } static void constantVector(A a, const ContainerB & b, PaddedPODArray & c) diff --git a/src/Functions/FunctionsConversion.h b/src/Functions/FunctionsConversion.h index 1363e91eb0d..ba3c9d68241 100644 --- a/src/Functions/FunctionsConversion.h +++ b/src/Functions/FunctionsConversion.h @@ -1349,7 +1349,7 @@ struct ConvertImpl struct ConvertImplGenericFromString { - static ColumnPtr execute(ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable *, size_t input_rows_count) + static ColumnPtr execute(ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable * column_nullable, size_t input_rows_count) { static_assert(std::is_same_v || std::is_same_v, "Can be used only to parse from ColumnString or ColumnFixedString"); @@ -1365,8 +1365,15 @@ struct ConvertImplGenericFromString FormatSettings format_settings; auto serialization = data_type_to.getDefaultSerialization(); + const auto * null_map = column_nullable ? &column_nullable->getNullMapData() : nullptr; for (size_t i = 0; i < input_rows_count; ++i) { + if (null_map && (*null_map)[i]) + { + column_to.insertDefault(); + continue; + } + const auto & val = col_from_string->getDataAt(i); ReadBufferFromMemory read_buffer(val.data, val.size); @@ -3694,16 +3701,17 @@ private: if (to_type->getCustomName()->getName() == "IPv4") { ret = [cast_ipv4_ipv6_default_on_conversion_error_value]( - ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable *, size_t) + ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable * column_nullable, size_t) -> ColumnPtr { if (!WhichDataType(result_type).isUInt32()) throw Exception(ErrorCodes::TYPE_MISMATCH, "Wrong result type {}. Expected UInt32", result_type->getName()); + const auto * null_map = column_nullable ? &column_nullable->getNullMapData() : nullptr; if (cast_ipv4_ipv6_default_on_conversion_error_value) - return convertToIPv4(arguments[0].column); + return convertToIPv4(arguments[0].column, null_map); else - return convertToIPv4(arguments[0].column); + return convertToIPv4(arguments[0].column, null_map); }; return true; @@ -3712,17 +3720,18 @@ private: if (to_type->getCustomName()->getName() == "IPv6") { ret = [cast_ipv4_ipv6_default_on_conversion_error_value]( - ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable *, size_t) + ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable * column_nullable, size_t) -> ColumnPtr { if (!WhichDataType(result_type).isFixedString()) throw Exception( ErrorCodes::TYPE_MISMATCH, "Wrong result type {}. Expected FixedString", result_type->getName()); + const auto * null_map = column_nullable ? &column_nullable->getNullMapData() : nullptr; if (cast_ipv4_ipv6_default_on_conversion_error_value) - return convertToIPv6(arguments[0].column); + return convertToIPv6(arguments[0].column, null_map); else - return convertToIPv6(arguments[0].column); + return convertToIPv6(arguments[0].column, null_map); }; return true; diff --git a/src/Functions/SubtractSubSeconds.cpp b/src/Functions/SubtractSubSeconds.cpp index 5eeb24c8748..ac3a66bfc2d 100644 --- a/src/Functions/SubtractSubSeconds.cpp +++ b/src/Functions/SubtractSubSeconds.cpp @@ -9,19 +9,19 @@ using FunctionSubtractNanoseconds = FunctionDateOrDateTimeAddInterval(); -}; +} using FunctionSubtractMicroseconds = FunctionDateOrDateTimeAddInterval; void registerFunctionSubtractMicroseconds(FunctionFactory & factory) { factory.registerFunction(); -}; +} using FunctionSubtractMilliseconds = FunctionDateOrDateTimeAddInterval; void registerFunctionSubtractMilliseconds(FunctionFactory & factory) { factory.registerFunction(); -}; +} } diff --git a/src/Functions/addSubSeconds.cpp b/src/Functions/addSubSeconds.cpp index f58f8b20b99..cb5ffce61e3 100644 --- a/src/Functions/addSubSeconds.cpp +++ b/src/Functions/addSubSeconds.cpp @@ -9,19 +9,19 @@ using FunctionAddNanoseconds = FunctionDateOrDateTimeAddInterval(); -}; +} using FunctionAddMicroseconds = FunctionDateOrDateTimeAddInterval; void registerFunctionAddMicroseconds(FunctionFactory & factory) { factory.registerFunction(); -}; +} using FunctionAddMilliseconds = FunctionDateOrDateTimeAddInterval; void registerFunctionAddMilliseconds(FunctionFactory & factory) { factory.registerFunction(); -}; +} } diff --git a/src/Functions/generateUUIDv4.cpp b/src/Functions/generateUUIDv4.cpp index 659c4c2c7c6..e4ecf5358f9 100644 --- a/src/Functions/generateUUIDv4.cpp +++ b/src/Functions/generateUUIDv4.cpp @@ -5,6 +5,11 @@ namespace DB { +namespace ErrorCodes +{ + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; +} + #define DECLARE_SEVERAL_IMPLEMENTATIONS(...) \ DECLARE_DEFAULT_CODE (__VA_ARGS__) \ DECLARE_AVX2_SPECIFIC_CODE(__VA_ARGS__) @@ -23,10 +28,18 @@ public: size_t getNumberOfArguments() const override { return 0; } + bool isDeterministicInScopeOfQuery() const override { return false; } + bool useDefaultImplementationForNulls() const override { return false; } bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; } + bool isVariadic() const override { return true; } - DataTypePtr getReturnTypeImpl(const DataTypes &) const override + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override { + if (arguments.size() > 1) + throw Exception("Number of arguments for function " + getName() + " doesn't match: passed " + + toString(arguments.size()) + ", should be 0 or 1.", + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + return std::make_shared(); } diff --git a/src/Functions/h3Distance.cpp b/src/Functions/h3Distance.cpp new file mode 100644 index 00000000000..b6206a1922f --- /dev/null +++ b/src/Functions/h3Distance.cpp @@ -0,0 +1,117 @@ +#include "config_functions.h" + +#if USE_H3 + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + + +namespace DB +{ +namespace ErrorCodes +{ + extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int ILLEGAL_COLUMN; +} + +namespace +{ + +class FunctionH3Distance : public IFunction +{ +public: + static constexpr auto name = "h3Distance"; + + static FunctionPtr create(ContextPtr) { return std::make_shared(); } + + std::string getName() const override { return name; } + + size_t getNumberOfArguments() const override { return 2; } + bool useDefaultImplementationForConstants() const override { return true; } + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + { + const auto * arg = arguments[0].get(); + if (!WhichDataType(arg).isUInt64()) + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of argument {} of function {}. Must be UInt64", + arg->getName(), 1, getName()); + + arg = arguments[1].get(); + if (!WhichDataType(arg).isUInt64()) + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of argument {} of function {}. Must be UInt64", + arg->getName(), 2, getName()); + + return std::make_shared(); + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override + { + auto non_const_arguments = arguments; + for (auto & argument : non_const_arguments) + argument.column = argument.column->convertToFullColumnIfConst(); + + const auto * col_start_index = checkAndGetColumn(non_const_arguments[0].column.get()); + if (!col_start_index) + throw Exception( + ErrorCodes::ILLEGAL_COLUMN, + "Illegal type {} of argument {} of function {}. Must be UInt64.", + arguments[0].type->getName(), + 1, + getName()); + + const auto & data_start_index = col_start_index->getData(); + + const auto * col_end_index = checkAndGetColumn(non_const_arguments[1].column.get()); + if (!col_end_index) + throw Exception( + ErrorCodes::ILLEGAL_COLUMN, + "Illegal type {} of argument {} of function {}. Must be UInt64.", + arguments[1].type->getName(), + 2, + getName()); + + const auto & data_end_index = col_end_index->getData(); + + + auto dst = ColumnVector::create(); + auto & dst_data = dst->getData(); + dst_data.resize(input_rows_count); + + for (size_t row = 0; row < input_rows_count; ++row) + { + const UInt64 start = data_start_index[row]; + const UInt64 end = data_end_index[row]; + + auto size = gridPathCellsSize(start, end); + dst_data[row] = size; + } + + return dst; + } +}; + +} + +void registerFunctionH3Distance(FunctionFactory & factory) +{ + factory.registerFunction(); +} + +} + +#endif diff --git a/src/Functions/h3HexRing.cpp b/src/Functions/h3HexRing.cpp new file mode 100644 index 00000000000..cc3acf9d7a5 --- /dev/null +++ b/src/Functions/h3HexRing.cpp @@ -0,0 +1,153 @@ +#include "config_functions.h" + +#if USE_H3 + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int PARAMETER_OUT_OF_BOUND; + extern const int ILLEGAL_COLUMN; + extern const int INCORRECT_DATA; +} + +namespace +{ + +class FunctionH3HexRing : public IFunction +{ +public: + static constexpr auto name = "h3HexRing"; + + static FunctionPtr create(ContextPtr) { return std::make_shared(); } + + std::string getName() const override { return name; } + + size_t getNumberOfArguments() const override { return 2; } + bool useDefaultImplementationForConstants() const override { return true; } + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + { + const auto * arg = arguments[0].get(); + if (!WhichDataType(arg).isUInt64()) + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of argument {} of function {}. Must be UInt64", + arg->getName(), 1, getName()); + + arg = arguments[1].get(); + if (!WhichDataType(arg).isUInt16()) + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of argument {} of function {}. Must be UInt16", + arg->getName(), + 2, + getName()); + + return std::make_shared(std::make_shared()); + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override + { + auto non_const_arguments = arguments; + for (auto & argument : non_const_arguments) + argument.column = argument.column->convertToFullColumnIfConst(); + + const auto * col_hindex = checkAndGetColumn(non_const_arguments[0].column.get()); + if (!col_hindex) + throw Exception( + ErrorCodes::ILLEGAL_COLUMN, + "Illegal type {} of argument {} of function {}. Must be UInt64.", + arguments[0].type->getName(), + 1, + getName()); + + const auto & data_hindex = col_hindex->getData(); + + /// ColumnUInt16 is sufficient as the max value of 2nd arg is checked (arg > 0 < 10000) in implementation below + const auto * col_k = checkAndGetColumn(non_const_arguments[1].column.get()); + if (!col_k) + throw Exception( + ErrorCodes::ILLEGAL_COLUMN, + "Illegal type {} of argument {} of function {}. Must be UInt16.", + arguments[1].type->getName(), + 2, + getName()); + + const auto & data_k = col_k->getData(); + + auto dst = ColumnArray::create(ColumnUInt64::create()); + auto & dst_data = typeid_cast(dst->getData()); + auto & dst_offsets = dst->getOffsets(); + dst_offsets.resize(input_rows_count); + + /// First calculate array sizes for all rows and save them in Offsets + UInt64 current_offset = 0; + for (size_t row = 0; row < input_rows_count; ++row) + { + const int k = data_k[row]; + + /// The result size is 6*k. We should not allow to generate too large arrays nevertheless. + constexpr auto max_k = 10000; + if (k > max_k) + throw Exception(ErrorCodes::PARAMETER_OUT_OF_BOUND, "Too large 'k' argument for {} function, maximum {}", getName(), max_k); + /// Check is already made while fetching the argument for k (to determine if it's an unsigned integer). Nevertheless, it's checked again here. + if (k < 0) + throw Exception(ErrorCodes::PARAMETER_OUT_OF_BOUND, "Argument 'k' for {} function must be non negative", getName()); + + const auto vec_size = (k == 0 ? 1 : 6 * k); /// Required size according to comments in gridRingUnsafe() source code + + current_offset += vec_size; + dst_offsets[row] = current_offset; + } + + /// Allocate based on total size of arrays for all rows + dst_data.getData().resize(current_offset); + + /// Fill the array for each row with known size + auto* ptr = dst_data.getData().data(); + current_offset = 0; + for (size_t row = 0; row < input_rows_count; ++row) + { + const H3Index origin_hindex = data_hindex[row]; + const int k = data_k[row]; + + H3Error err = gridRingUnsafe(origin_hindex, k, ptr + current_offset); + + if (err) + throw Exception(ErrorCodes::INCORRECT_DATA, "Incorrect arguments h3Index: {}, k: {}, error: {}", origin_hindex, k, err); + + const auto size = dst_offsets[row] - current_offset; + current_offset += size; + } + + return dst; + } +}; + +} + +void registerFunctionH3HexRing(FunctionFactory & factory) +{ + factory.registerFunction(); +} + +} + +#endif diff --git a/src/Functions/h3Line.cpp b/src/Functions/h3Line.cpp new file mode 100644 index 00000000000..2e92d65b0b9 --- /dev/null +++ b/src/Functions/h3Line.cpp @@ -0,0 +1,143 @@ +#include "config_functions.h" + +#if USE_H3 + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + + +namespace DB +{ +namespace ErrorCodes +{ + extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int ILLEGAL_COLUMN; + extern const int INCORRECT_DATA; +} + +namespace +{ + +class FunctionH3Line : public IFunction +{ +public: + static constexpr auto name = "h3Line"; + + static FunctionPtr create(ContextPtr) { return std::make_shared(); } + + std::string getName() const override { return name; } + + size_t getNumberOfArguments() const override { return 2; } + bool useDefaultImplementationForConstants() const override { return true; } + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + { + const auto * arg = arguments[0].get(); + if (!WhichDataType(arg).isUInt64()) + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of argument {} of function {}. Must be UInt64", + arg->getName(), 1, getName()); + + arg = arguments[1].get(); + if (!WhichDataType(arg).isUInt64()) + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of argument {} of function {}. Must be UInt64", + arg->getName(), 2, getName()); + + return std::make_shared(std::make_shared()); + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override + { + auto non_const_arguments = arguments; + for (auto & argument : non_const_arguments) + argument.column = argument.column->convertToFullColumnIfConst(); + + const auto * col_start_index = checkAndGetColumn(non_const_arguments[0].column.get()); + if (!col_start_index) + throw Exception( + ErrorCodes::ILLEGAL_COLUMN, + "Illegal type {} of argument {} of function {}. Must be UInt64.", + arguments[0].type->getName(), + 1, + getName()); + + const auto & data_start_index = col_start_index->getData(); + + const auto * col_end_index = checkAndGetColumn(non_const_arguments[1].column.get()); + if (!col_end_index) + throw Exception( + ErrorCodes::ILLEGAL_COLUMN, + "Illegal type {} of argument {} of function {}. Must be UInt64.", + arguments[1].type->getName(), + 2, + getName()); + + const auto & data_end_index = col_end_index->getData(); + + + auto dst = ColumnArray::create(ColumnUInt64::create()); + auto & dst_data = typeid_cast(dst->getData()); + auto & dst_offsets = dst->getOffsets(); + dst_offsets.resize(input_rows_count); + + /// First calculate array sizes for all rows and save them in Offsets + UInt64 current_offset = 0; + for (size_t row = 0; row < input_rows_count; ++row) + { + const UInt64 start = data_start_index[row]; + const UInt64 end = data_end_index[row]; + + auto size = gridPathCellsSize(start, end); + if (size < 0) + throw Exception( + ErrorCodes::INCORRECT_DATA, + "Line cannot be computed between start H3 index {} and end H3 index {}", + start, end); + + current_offset += size; + dst_offsets[row] = current_offset; + } + + /// Allocate based on total size of arrays for all rows + dst_data.getData().resize(current_offset); + + /// Fill the array for each row with known size + auto* ptr = dst_data.getData().data(); + current_offset = 0; + for (size_t row = 0; row < input_rows_count; ++row) + { + const UInt64 start = data_start_index[row]; + const UInt64 end = data_end_index[row]; + const auto size = dst_offsets[row] - current_offset; + gridPathCells(start, end, ptr + current_offset); + current_offset += size; + } + + return dst; + } +}; + +} + +void registerFunctionH3Line(FunctionFactory & factory) +{ + factory.registerFunction(); +} + +} + +#endif diff --git a/src/Functions/isIPAddressContainedIn.cpp b/src/Functions/isIPAddressContainedIn.cpp index f7c79299317..5ef247f7346 100644 --- a/src/Functions/isIPAddressContainedIn.cpp +++ b/src/Functions/isIPAddressContainedIn.cpp @@ -149,7 +149,7 @@ namespace DB } } - virtual DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override { if (arguments.size() != 2) throw Exception( @@ -166,7 +166,7 @@ namespace DB return std::make_shared(); } - virtual size_t getNumberOfArguments() const override { return 2; } + size_t getNumberOfArguments() const override { return 2; } bool useDefaultImplementationForNulls() const override { return false; } private: diff --git a/src/Functions/partitionId.cpp b/src/Functions/partitionId.cpp index eed3dd31bf9..c5dced68e88 100644 --- a/src/Functions/partitionId.cpp +++ b/src/Functions/partitionId.cpp @@ -45,7 +45,7 @@ public: return std::make_shared(); } - virtual ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { Block sample_block(arguments); size_t size = arguments.size(); diff --git a/src/Functions/registerFunctions.cpp b/src/Functions/registerFunctions.cpp index 52bfe9b8437..0c67bf81d1e 100644 --- a/src/Functions/registerFunctions.cpp +++ b/src/Functions/registerFunctions.cpp @@ -24,6 +24,7 @@ void registerFunctionsEmbeddedDictionaries(FunctionFactory &); void registerFunctionsExternalDictionaries(FunctionFactory &); void registerFunctionsExternalModels(FunctionFactory &); void registerFunctionsFormatting(FunctionFactory &); +void registerFunctionHashID(FunctionFactory &); void registerFunctionsHashing(FunctionFactory &); void registerFunctionsHigherOrder(FunctionFactory &); void registerFunctionsLogical(FunctionFactory &); @@ -137,6 +138,7 @@ void registerFunctions() #endif registerFunctionTid(factory); registerFunctionLogTrace(factory); + registerFunctionHashID(factory); } } diff --git a/src/Functions/registerFunctionsGeo.cpp b/src/Functions/registerFunctionsGeo.cpp index 9cbe1ed96cf..c6043de3b97 100644 --- a/src/Functions/registerFunctionsGeo.cpp +++ b/src/Functions/registerFunctionsGeo.cpp @@ -57,6 +57,9 @@ void registerFunctionH3PointDistKm(FunctionFactory &); void registerFunctionH3PointDistRads(FunctionFactory &); void registerFunctionH3GetRes0Indexes(FunctionFactory &); void registerFunctionH3GetPentagonIndexes(FunctionFactory &); +void registerFunctionH3Line(FunctionFactory &); +void registerFunctionH3Distance(FunctionFactory &); +void registerFunctionH3HexRing(FunctionFactory &); #endif @@ -128,6 +131,9 @@ void registerFunctionsGeo(FunctionFactory & factory) registerFunctionH3PointDistRads(factory); registerFunctionH3GetRes0Indexes(factory); registerFunctionH3GetPentagonIndexes(factory); + registerFunctionH3Line(factory); + registerFunctionH3Distance(factory); + registerFunctionH3HexRing(factory); #endif #if USE_S2_GEOMETRY diff --git a/src/Functions/toBool.cpp b/src/Functions/toBool.cpp index 70f92edb8ae..7f167744f01 100644 --- a/src/Functions/toBool.cpp +++ b/src/Functions/toBool.cpp @@ -27,20 +27,22 @@ namespace size_t getNumberOfArguments() const override { return 1; } bool useDefaultImplementationForConstants() const override { return true; } + bool useDefaultImplementationForNulls() const override { return false; } bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; } - DataTypePtr getReturnTypeImpl(const DataTypes &) const override + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override { - return DataTypeFactory::instance().get("Bool"); + auto bool_type = DataTypeFactory::instance().get("Bool"); + return arguments[0]->isNullable() ? makeNullable(bool_type) : bool_type; } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t) const override { ColumnsWithTypeAndName cast_args { arguments[0], { - DataTypeString().createColumnConst(arguments[0].column->size(), "Bool"), + DataTypeString().createColumnConst(arguments[0].column->size(), arguments[0].type->isNullable() ? "Nullable(Bool)" : "Bool"), std::make_shared(), "" } @@ -48,7 +50,7 @@ namespace FunctionOverloadResolverPtr func_builder_cast = CastInternalOverloadResolver::createImpl(); auto func_cast = func_builder_cast->build(cast_args); - return func_cast->execute(cast_args, DataTypeFactory::instance().get("Bool"), arguments[0].column->size()); + return func_cast->execute(cast_args, result_type, arguments[0].column->size()); } }; diff --git a/src/IO/ReadBufferFromEncryptedFile.cpp b/src/IO/ReadBufferFromEncryptedFile.cpp index 7aec6dcde02..16517422e26 100644 --- a/src/IO/ReadBufferFromEncryptedFile.cpp +++ b/src/IO/ReadBufferFromEncryptedFile.cpp @@ -96,6 +96,7 @@ bool ReadBufferFromEncryptedFile::nextImpl() working_buffer.resize(bytes_read); encryptor.decrypt(encrypted_buffer.data(), bytes_read, working_buffer.begin()); + offset += bytes_read; pos = working_buffer.begin(); return true; } diff --git a/src/IO/ReadHelpers.cpp b/src/IO/ReadHelpers.cpp index 69500a5efe4..846b4762cb3 100644 --- a/src/IO/ReadHelpers.cpp +++ b/src/IO/ReadHelpers.cpp @@ -1363,7 +1363,7 @@ static void readQuotedFieldInBrackets(String & s, ReadBuffer & buf) } } -void readQuotedFieldIntoString(String & s, ReadBuffer & buf) +void readQuotedField(String & s, ReadBuffer & buf) { s.clear(); @@ -1427,8 +1427,9 @@ void readQuotedFieldIntoString(String & s, ReadBuffer & buf) } } -void readJSONFieldIntoString(String & s, ReadBuffer & buf) +void readJSONField(String & s, ReadBuffer & buf) { + s.clear(); auto parse_func = [](ReadBuffer & in) { skipJSONField(in, "json_field"); }; readParsedValueIntoString(s, buf, parse_func); } diff --git a/src/IO/ReadHelpers.h b/src/IO/ReadHelpers.h index c5ffa52c9b3..496b8000441 100644 --- a/src/IO/ReadHelpers.h +++ b/src/IO/ReadHelpers.h @@ -1425,8 +1425,8 @@ struct PcgDeserializer } }; -void readQuotedFieldIntoString(String & s, ReadBuffer & buf); +void readQuotedField(String & s, ReadBuffer & buf); -void readJSONFieldIntoString(String & s, ReadBuffer & buf); +void readJSONField(String & s, ReadBuffer & buf); } diff --git a/src/IO/S3Common.cpp b/src/IO/S3Common.cpp index c277332ef03..d7b3a9bde02 100644 --- a/src/IO/S3Common.cpp +++ b/src/IO/S3Common.cpp @@ -127,7 +127,7 @@ public: AWSEC2MetadataClient& operator =(const AWSEC2MetadataClient && rhs) = delete; AWSEC2MetadataClient(const AWSEC2MetadataClient && rhs) = delete; - virtual ~AWSEC2MetadataClient() override = default; + ~AWSEC2MetadataClient() override = default; using Aws::Internal::AWSHttpResourceClient::GetResource; @@ -264,10 +264,10 @@ public: { } - virtual ~AWSEC2InstanceProfileConfigLoader() override = default; + ~AWSEC2InstanceProfileConfigLoader() override = default; protected: - virtual bool LoadInternal() override + bool LoadInternal() override { auto credentials_str = use_secure_pull ? client->getDefaultCredentialsSecurely() : client->getDefaultCredentials(); diff --git a/src/IO/tests/gtest_file_encryption.cpp b/src/IO/tests/gtest_file_encryption.cpp index 3a114f94ee0..e9affee4add 100644 --- a/src/IO/tests/gtest_file_encryption.cpp +++ b/src/IO/tests/gtest_file_encryption.cpp @@ -4,6 +4,12 @@ #include #include #include +#include +#include +#include +#include +#include +#include using namespace DB; @@ -210,4 +216,31 @@ INSTANTIATE_TEST_SUITE_P(All, }) ); +TEST(FileEncryptionPositionUpdateTest, Decryption) +{ + String tmp_path = std::filesystem::current_path() / "test_offset_update"; + if (std::filesystem::exists(tmp_path)) + std::filesystem::remove(tmp_path); + + String key = "1234567812345678"; + FileEncryption::Header header; + header.algorithm = Algorithm::AES_128_CTR; + header.key_id = 1; + header.key_hash = calculateKeyHash(key); + header.init_vector = InitVector::random(); + + auto lwb = std::make_unique(tmp_path); + WriteBufferFromEncryptedFile wb(10, std::move(lwb), key, header); + auto data = getRandomASCIIString(20); + wb.write(data.data(), data.size()); + wb.finalize(); + + auto lrb = std::make_unique(tmp_path); + ReadBufferFromEncryptedFile rb(10, std::move(lrb), key, header); + rb.ignore(5); + rb.ignore(5); + rb.ignore(5); + ASSERT_EQ(rb.getPosition(), 15); +} + #endif diff --git a/src/Interpreters/Access/InterpreterShowGrantsQuery.cpp b/src/Interpreters/Access/InterpreterShowGrantsQuery.cpp index c82088847d3..b5256b64db5 100644 --- a/src/Interpreters/Access/InterpreterShowGrantsQuery.cpp +++ b/src/Interpreters/Access/InterpreterShowGrantsQuery.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -147,13 +148,22 @@ std::vector InterpreterShowGrantsQuery::getEntities() const CachedAccessChecking show_roles(access, AccessType::SHOW_ROLES); bool throw_if_access_denied = !show_query.for_roles->all; + auto current_user = access->getUser(); + auto roles_info = access->getRolesInfo(); + std::vector entities; for (const auto & id : ids) { auto entity = access_control.tryRead(id); if (!entity) continue; - if ((id == access->getUserID() /* Any user can see his own grants */) + + bool is_current_user = (id == access->getUserID()); + bool is_enabled_or_granted_role = entity->isTypeOf() + && ((current_user && current_user->granted_roles.isGranted(id)) || roles_info->enabled_roles.contains(id)); + + if ((is_current_user /* Any user can see his own grants */) + || (is_enabled_or_granted_role /* and grants from the granted roles */) || (entity->isTypeOf() && show_users.checkAccess(throw_if_access_denied)) || (entity->isTypeOf() && show_roles.checkAccess(throw_if_access_denied))) entities.push_back(entity); diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index 34f396b978c..4f951d69349 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -1378,7 +1378,7 @@ void Context::killCurrentQuery() { process_list_elem->cancelQuery(true); } -}; +} String Context::getDefaultFormat() const { diff --git a/src/Interpreters/ExpressionActions.cpp b/src/Interpreters/ExpressionActions.cpp index 2ba3453110e..2da53a2e258 100644 --- a/src/Interpreters/ExpressionActions.cpp +++ b/src/Interpreters/ExpressionActions.cpp @@ -160,8 +160,11 @@ static void setLazyExecutionInfo( const ActionsDAGReverseInfo::NodeInfo & node_info = reverse_info.nodes_info[reverse_info.reverse_index.at(node)]; /// If node is used in result or it doesn't have parents, we can't enable lazy execution. - if (node_info.used_in_result || node_info.parents.empty()) + if (node_info.used_in_result || node_info.parents.empty() || (node->type != ActionsDAG::ActionType::FUNCTION && node->type != ActionsDAG::ActionType::ALIAS)) + { lazy_execution_info.can_be_lazy_executed = false; + return; + } /// To fill lazy execution info for current node we need to create it for all it's parents. for (const auto & parent : node_info.parents) @@ -172,7 +175,7 @@ static void setLazyExecutionInfo( { /// Use set, because one node can be more than one argument. /// Example: expr1 AND expr2 AND expr1. - std::set indexes; + std::unordered_set indexes; for (size_t i = 0; i != parent->children.size(); ++i) { if (node == parent->children[i]) @@ -294,6 +297,10 @@ static std::unordered_set processShortCircuitFunctions short_circuit_nodes[&node] = short_circuit_settings; } + /// If there are no short-circuit functions, no need to do anything. + if (short_circuit_nodes.empty()) + return {}; + auto reverse_info = getActionsDAGReverseInfo(nodes, actions_dag.getIndex()); /// For each node we fill LazyExecutionInfo. diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp index e7325363c08..0b1154f6fd1 100644 --- a/src/Interpreters/ExpressionAnalyzer.cpp +++ b/src/Interpreters/ExpressionAnalyzer.cpp @@ -1426,13 +1426,14 @@ ActionsDAGPtr SelectQueryExpressionAnalyzer::appendOrderBy(ExpressionActionsChai getRootActions(select_query->orderBy(), only_types, step.actions()); bool with_fill = false; - NameSet order_by_keys; for (auto & child : select_query->orderBy()->children) { auto * ast = child->as(); ASTPtr order_expression = ast->children.at(0); - step.addRequiredOutput(order_expression->getColumnName()); + const String & column_name = order_expression->getColumnName(); + step.addRequiredOutput(column_name); + order_by_keys.emplace(column_name); if (ast->with_fill) with_fill = true; @@ -1485,8 +1486,7 @@ ActionsDAGPtr SelectQueryExpressionAnalyzer::appendOrderBy(ExpressionActionsChai if (with_fill) { for (const auto & column : step.getResultColumns()) - if (!order_by_keys.contains(column.name)) - non_constant_inputs.insert(column.name); + non_constant_inputs.insert(column.name); } auto actions = chain.getLastActions(); @@ -1501,18 +1501,22 @@ bool SelectQueryExpressionAnalyzer::appendLimitBy(ExpressionActionsChain & chain if (!select_query->limitBy()) return false; - /// Use columns for ORDER BY. - /// They could be required to do ORDER BY on the initiator in case of distributed queries. - ExpressionActionsChain::Step & step = chain.lastStep(chain.getLastStep().getRequiredColumns()); + ExpressionActionsChain::Step & step = chain.lastStep(aggregated_columns); getRootActions(select_query->limitBy(), only_types, step.actions()); NameSet existing_column_names; - for (const auto & column : chain.getLastStep().getRequiredColumns()) + for (const auto & column : aggregated_columns) { step.addRequiredOutput(column.name); existing_column_names.insert(column.name); } + /// Columns from ORDER BY could be required to do ORDER BY on the initiator in case of distributed queries. + for (const auto & column_name : order_by_keys) + { + step.addRequiredOutput(column_name); + existing_column_names.insert(column_name); + } auto & children = select_query->limitBy()->children; for (auto & child : children) diff --git a/src/Interpreters/ExpressionAnalyzer.h b/src/Interpreters/ExpressionAnalyzer.h index b3704095c92..85efb3829d0 100644 --- a/src/Interpreters/ExpressionAnalyzer.h +++ b/src/Interpreters/ExpressionAnalyzer.h @@ -61,6 +61,8 @@ struct ExpressionAnalyzerData NamesAndTypesList aggregated_columns; /// Columns after window functions. NamesAndTypesList columns_after_window; + /// Keys of ORDER BY + NameSet order_by_keys; bool has_aggregation = false; NamesAndTypesList aggregation_keys; diff --git a/src/Interpreters/FilesystemCacheLog.cpp b/src/Interpreters/FilesystemCacheLog.cpp index 609305321b1..f03472c45e0 100644 --- a/src/Interpreters/FilesystemCacheLog.cpp +++ b/src/Interpreters/FilesystemCacheLog.cpp @@ -75,4 +75,4 @@ void FilesystemCacheLogElement::appendToBlock(MutableColumns & columns) const columns[i++]->insert(read_buffer_id); } -}; +} diff --git a/src/Interpreters/FilesystemCacheLog.h b/src/Interpreters/FilesystemCacheLog.h index 77bae7d788a..a3624867aec 100644 --- a/src/Interpreters/FilesystemCacheLog.h +++ b/src/Interpreters/FilesystemCacheLog.h @@ -58,4 +58,4 @@ class FilesystemCacheLog : public SystemLog using SystemLog::SystemLog; }; -}; +} diff --git a/src/Interpreters/HashJoin.cpp b/src/Interpreters/HashJoin.cpp index 138d10efb35..b58059312bd 100644 --- a/src/Interpreters/HashJoin.cpp +++ b/src/Interpreters/HashJoin.cpp @@ -1176,7 +1176,7 @@ void addFoundRowAll( ++current_offset; } } -}; +} template void addNotFoundRow(AddedColumns & added [[maybe_unused]], IColumn::Offset & current_offset [[maybe_unused]]) @@ -1356,8 +1356,8 @@ IColumn::Filter joinRightColumnsSwitchMultipleDisjuncts( JoinStuff::JoinUsedFlags & used_flags [[maybe_unused]]) { return mapv.size() > 1 - ? joinRightColumns(std::forward>(key_getter_vector), mapv, added_columns, used_flags) - : joinRightColumns(std::forward>(key_getter_vector), mapv, added_columns, used_flags); + ? joinRightColumns(std::forward>(key_getter_vector), mapv, added_columns, used_flags) + : joinRightColumns(std::forward>(key_getter_vector), mapv, added_columns, used_flags); } template diff --git a/src/Interpreters/InternalTextLogsQueue.cpp b/src/Interpreters/InternalTextLogsQueue.cpp index 2172a6f4261..6176e3cc865 100644 --- a/src/Interpreters/InternalTextLogsQueue.cpp +++ b/src/Interpreters/InternalTextLogsQueue.cpp @@ -38,6 +38,7 @@ MutableColumns InternalTextLogsQueue::getSampleColumns() void InternalTextLogsQueue::pushBlock(Block && log_block) { + OvercommitTrackerBlockerInThread blocker; static Block sample_block = getSampleBlock(); if (blocksHaveEqualStructure(sample_block, log_block)) diff --git a/src/Interpreters/InternalTextLogsQueue.h b/src/Interpreters/InternalTextLogsQueue.h index 28841598d30..a7193a55178 100644 --- a/src/Interpreters/InternalTextLogsQueue.h +++ b/src/Interpreters/InternalTextLogsQueue.h @@ -1,5 +1,6 @@ #pragma once #include +#include #include @@ -17,6 +18,62 @@ public: static Block getSampleBlock(); static MutableColumns getSampleColumns(); + template + bool push(Args &&... args) + { + OvercommitTrackerBlockerInThread blocker; + return ConcurrentBoundedQueue::push(std::forward(args)...); + } + + template + bool emplace(Args &&... args) + { + OvercommitTrackerBlockerInThread blocker; + return ConcurrentBoundedQueue::emplace(std::forward(args)...); + } + + template + bool pop(Args &&... args) + { + OvercommitTrackerBlockerInThread blocker; + return ConcurrentBoundedQueue::pop(std::forward(args)...); + } + + template + bool tryPush(Args &&... args) + { + OvercommitTrackerBlockerInThread blocker; + return ConcurrentBoundedQueue::tryPush(std::forward(args)...); + } + + template + bool tryEmplace(Args &&... args) + { + OvercommitTrackerBlockerInThread blocker; + return ConcurrentBoundedQueue::tryEmplace(std::forward(args)...); + } + + template + bool tryPop(Args &&... args) + { + OvercommitTrackerBlockerInThread blocker; + return ConcurrentBoundedQueue::tryPop(std::forward(args)...); + } + + template + void clear(Args &&... args) + { + OvercommitTrackerBlockerInThread blocker; + return ConcurrentBoundedQueue::clear(std::forward(args)...); + } + + template + void clearAndFinish(Args &&... args) + { + OvercommitTrackerBlockerInThread blocker; + return ConcurrentBoundedQueue::clearAndFinish(std::forward(args)...); + } + /// Is used to pass block from remote server to the client void pushBlock(Block && log_block); diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index dec660d84a8..df53333b635 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -31,6 +31,7 @@ #include #include +#include #include #include @@ -461,8 +462,12 @@ ColumnsDescription InterpreterCreateQuery::getColumnsDescription( { const auto & col_decl = ast->as(); - DataTypePtr column_type = nullptr; + if (col_decl.collation && !context_->getSettingsRef().compatibility_ignore_collation_in_create_table) + { + throw Exception("Cannot support collation, please set compatibility_ignore_collation_in_create_table=true", ErrorCodes::NOT_IMPLEMENTED); + } + DataTypePtr column_type = nullptr; if (col_decl.type) { column_type = DataTypeFactory::instance().get(col_decl.type); @@ -1446,12 +1451,18 @@ BlockIO InterpreterCreateQuery::fillTableIfNeeded(const ASTCreateQuery & create) { /// If the query is a CREATE SELECT, insert the data into the table. if (create.select && !create.attach - && !create.is_ordinary_view && !create.is_live_view && !create.is_window_view - && (!create.is_materialized_view || create.is_populate)) + && !create.is_ordinary_view && !create.is_live_view + && (!(create.is_materialized_view || create.is_window_view) || create.is_populate)) { auto insert = std::make_shared(); insert->table_id = {create.getDatabase(), create.getTable(), create.uuid}; - insert->select = create.select->clone(); + if (create.is_window_view) + { + auto table = DatabaseCatalog::instance().getTable(insert->table_id, getContext()); + insert->select = typeid_cast(table.get())->getSourceTableSelectQuery(); + } + else + insert->select = create.select->clone(); return InterpreterInsertQuery(insert, getContext(), getContext()->getSettingsRef().insert_allow_materialized_columns).execute(); diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index 9db61bcfc9d..ee902f5984a 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -28,6 +28,7 @@ #include #include #include +#include #include #include @@ -102,7 +103,9 @@ Block InterpreterInsertQuery::getSampleBlock( /// If the query does not include information about columns if (!query.columns) { - if (no_destination) + if (auto * window_view = dynamic_cast(table.get())) + return window_view->getInputHeader(); + else if (no_destination) return metadata_snapshot->getSampleBlockWithVirtuals(table->getVirtuals()); else return metadata_snapshot->getSampleBlockNonMaterialized(); @@ -203,7 +206,7 @@ static bool isTrivialSelect(const ASTPtr & select) } /// This query is ASTSelectWithUnionQuery subquery return false; -}; +} Chain InterpreterInsertQuery::buildChain( const StoragePtr & table, diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index 6bfadc66352..d143295181e 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -1093,7 +1093,10 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional

desc->type == ProjectionDescription::Type::Aggregate) { @@ -1168,6 +1171,12 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional

{}", QueryProcessingStage::toString(from_stage), QueryProcessingStage::toString(options.to_stage)); } + if (query_info.projection && query_info.projection->input_order_info && query_info.input_order_info) + throw Exception("InputOrderInfo is set for projection and for query", ErrorCodes::LOGICAL_ERROR); + InputOrderInfoPtr input_order_info_for_order; + if (!expressions.need_aggregate) + input_order_info_for_order = query_info.projection ? query_info.projection->input_order_info : query_info.input_order_info; + if (options.to_stage > QueryProcessingStage::FetchColumns) { auto preliminary_sort = [&]() @@ -1183,10 +1192,7 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional

input_order_info : nullptr)); + executeOrder(query_plan, input_order_info_for_order); if (expressions.has_order_by && query.limitLength()) executeDistinct(query_plan, false, expressions.selected_columns, true); @@ -1311,16 +1317,9 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional

input_order_info.reset(); - } - // Now we must execute: // 1) expressions before window functions, // 2) window functions, @@ -1455,10 +1454,7 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional

input_order_info : nullptr)); + executeOrder(query_plan, input_order_info_for_order); } /** Optimization - if there are several sources and there is LIMIT, then first apply the preliminary LIMIT, @@ -2358,8 +2354,11 @@ void InterpreterSelectQuery::executeTotalsAndHaving( { const Settings & settings = context->getSettingsRef(); + const auto & header_before = query_plan.getCurrentDataStream().header; + auto totals_having_step = std::make_unique( query_plan.getCurrentDataStream(), + getAggregatesMask(header_before, query_analyzer->aggregates()), overflow_row, expression, has_having ? getSelectQuery().having()->getColumnName() : "", @@ -2744,12 +2743,6 @@ void InterpreterSelectQuery::executeExtremes(QueryPlan & query_plan) void InterpreterSelectQuery::executeSubqueriesInSetsAndJoins(QueryPlan & query_plan, SubqueriesForSets & subqueries_for_sets) { - // const auto & input_order_info = query_info.input_order_info - // ? query_info.input_order_info - // : (query_info.projection ? query_info.projection->input_order_info : nullptr); - // if (input_order_info) - // executeMergeSorted(query_plan, input_order_info->order_key_prefix_descr, 0, "before creating sets for subqueries and joins"); - const Settings & settings = context->getSettingsRef(); SizeLimits limits(settings.max_rows_to_transfer, settings.max_bytes_to_transfer, settings.transfer_overflow_mode); diff --git a/src/Interpreters/OpenTelemetrySpanLog.cpp b/src/Interpreters/OpenTelemetrySpanLog.cpp index 36ffd617cd6..80c170339b7 100644 --- a/src/Interpreters/OpenTelemetrySpanLog.cpp +++ b/src/Interpreters/OpenTelemetrySpanLog.cpp @@ -46,7 +46,7 @@ NamesAndAliases OpenTelemetrySpanLogElement::getNamesAndAliases() return { {"attribute.names", std::make_shared(std::make_shared()), "mapKeys(attribute)"}, - {"attribute.values", std::make_shared(std::make_shared()), "mapKeys(attribute)"} + {"attribute.values", std::make_shared(std::make_shared()), "mapValues(attribute)"} }; } diff --git a/src/Interpreters/SessionLog.cpp b/src/Interpreters/SessionLog.cpp index 7772c8cdb3b..3523b2f1be0 100644 --- a/src/Interpreters/SessionLog.cpp +++ b/src/Interpreters/SessionLog.cpp @@ -61,7 +61,7 @@ void fillColumnArray(const Strings & data, IColumn & column) } auto & offsets = array.getOffsets(); offsets.push_back(offsets.back() + size); -}; +} } diff --git a/src/Interpreters/UserDefinedExecutableFunction.cpp b/src/Interpreters/UserDefinedExecutableFunction.cpp index e5a852b0e75..477a1b10f3c 100644 --- a/src/Interpreters/UserDefinedExecutableFunction.cpp +++ b/src/Interpreters/UserDefinedExecutableFunction.cpp @@ -21,4 +21,4 @@ UserDefinedExecutableFunction::UserDefinedExecutableFunction( { } -}; +} diff --git a/src/Interpreters/ZooKeeperLog.cpp b/src/Interpreters/ZooKeeperLog.cpp index 2828c3e2fa8..6394b1d5429 100644 --- a/src/Interpreters/ZooKeeperLog.cpp +++ b/src/Interpreters/ZooKeeperLog.cpp @@ -209,4 +209,4 @@ void ZooKeeperLogElement::appendToBlock(MutableColumns & columns) const columns[i++]->insert(children_array); } -}; +} diff --git a/src/Interpreters/executeDDLQueryOnCluster.cpp b/src/Interpreters/executeDDLQueryOnCluster.cpp index b280d98b5d3..b68a8554342 100644 --- a/src/Interpreters/executeDDLQueryOnCluster.cpp +++ b/src/Interpreters/executeDDLQueryOnCluster.cpp @@ -82,6 +82,10 @@ BlockIO executeDDLQueryOnCluster(const ASTPtr & query_ptr_, ContextPtr context, } query->cluster = context->getMacros()->expand(query->cluster); + + /// TODO: support per-cluster grant + context->checkAccess(AccessType::CLUSTER); + ClusterPtr cluster = params.cluster ? params.cluster : context->getCluster(query->cluster); DDLWorker & ddl_worker = context->getDDLWorker(); diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index d1596c08318..3c03bea3dd1 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -158,11 +158,11 @@ static String prepareQueryForLogging(const String & query, ContextPtr context) /// Log query into text log (not into system table). -static void logQuery(const String & query, ContextPtr context, bool internal) +static void logQuery(const String & query, ContextPtr context, bool internal, QueryProcessingStage::Enum stage) { if (internal) { - LOG_DEBUG(&Poco::Logger::get("executeQuery"), "(internal) {}", joinLines(query)); + LOG_DEBUG(&Poco::Logger::get("executeQuery"), "(internal) {} (stage: {})", joinLines(query), QueryProcessingStage::toString(stage)); } else { @@ -185,13 +185,14 @@ static void logQuery(const String & query, ContextPtr context, bool internal) if (auto txn = context->getCurrentTransaction()) transaction_info = fmt::format(" (TID: {}, TIDH: {})", txn->tid, txn->tid.getHash()); - LOG_DEBUG(&Poco::Logger::get("executeQuery"), "(from {}{}{}){}{} {}", + LOG_DEBUG(&Poco::Logger::get("executeQuery"), "(from {}{}{}){}{} {} (stage: {})", client_info.current_address.toString(), (current_user != "default" ? ", user: " + current_user : ""), (!initial_query_id.empty() && current_query_id != initial_query_id ? ", initial_query_id: " + initial_query_id : std::string()), transaction_info, comment, - joinLines(query)); + joinLines(query), + QueryProcessingStage::toString(stage)); if (client_info.client_trace_context.trace_id != UUID()) { @@ -498,7 +499,7 @@ static std::tuple executeQueryImpl( String query = String(begin, begin + std::min(end - begin, static_cast(max_query_size))); auto query_for_logging = prepareQueryForLogging(query, context); - logQuery(query_for_logging, context, internal); + logQuery(query_for_logging, context, internal, stage); if (!internal) { @@ -548,7 +549,7 @@ static std::tuple executeQueryImpl( /// since it substitute parameters and without them query does not contain /// parameters), to keep query as-is in query_log and server log. query_for_logging = prepareQueryForLogging(query, context); - logQuery(query_for_logging, context, internal); + logQuery(query_for_logging, context, internal, stage); /// Propagate WITH statement to children ASTSelect. if (settings.enable_global_with_statement) diff --git a/src/Loggers/Loggers.cpp b/src/Loggers/Loggers.cpp index 512e44f79c7..70205998bb5 100644 --- a/src/Loggers/Loggers.cpp +++ b/src/Loggers/Loggers.cpp @@ -32,7 +32,7 @@ static std::string createDirectory(const std::string & file) return ""; fs::create_directories(path); return path; -}; +} #ifdef WITH_TEXT_LOG void Loggers::setTextLog(std::shared_ptr log, int max_priority) diff --git a/src/Parsers/ASTCollation.cpp b/src/Parsers/ASTCollation.cpp new file mode 100644 index 00000000000..19716414581 --- /dev/null +++ b/src/Parsers/ASTCollation.cpp @@ -0,0 +1,21 @@ +#include + +namespace DB +{ + ASTPtr ASTCollation::clone() const + { + auto res = std::make_shared(*this); + res->collation = collation->clone(); + return res; + } + + void ASTCollation::formatImpl(const FormatSettings &s, FormatState &state, FormatStateStacked frame) const + { + if (collation) + { + collation->formatImpl(s, state, frame); + } + + } + +} diff --git a/src/Parsers/ASTCollation.h b/src/Parsers/ASTCollation.h new file mode 100644 index 00000000000..a735956a90e --- /dev/null +++ b/src/Parsers/ASTCollation.h @@ -0,0 +1,20 @@ +#pragma once + +#include + +namespace DB +{ + +class ASTCollation : public IAST +{ +public: + ASTPtr collation = nullptr; + + String getID(char) const override { return "Collation"; } + + ASTPtr clone() const override; + + void formatImpl(const FormatSettings & s, FormatState & state, FormatStateStacked frame) const override; +}; + +} diff --git a/src/Parsers/ASTColumnDeclaration.cpp b/src/Parsers/ASTColumnDeclaration.cpp index 888cd639fb0..dc5651d9f14 100644 --- a/src/Parsers/ASTColumnDeclaration.cpp +++ b/src/Parsers/ASTColumnDeclaration.cpp @@ -43,6 +43,11 @@ ASTPtr ASTColumnDeclaration::clone() const res->ttl = ttl->clone(); res->children.push_back(res->ttl); } + if (collation) + { + res->collation = collation->clone(); + res->children.push_back(res->collation); + } return res; } @@ -97,6 +102,12 @@ void ASTColumnDeclaration::formatImpl(const FormatSettings & settings, FormatSta settings.ostr << ' ' << (settings.hilite ? hilite_keyword : "") << "TTL" << (settings.hilite ? hilite_none : "") << ' '; ttl->formatImpl(settings, state, frame); } + + if (collation) + { + settings.ostr << ' ' << (settings.hilite ? hilite_keyword : "") << "COLLATE" << (settings.hilite ? hilite_none : "") << ' '; + collation->formatImpl(settings, state, frame); + } } } diff --git a/src/Parsers/ASTColumnDeclaration.h b/src/Parsers/ASTColumnDeclaration.h index ea17a8b4dfa..5ecfb859abc 100644 --- a/src/Parsers/ASTColumnDeclaration.h +++ b/src/Parsers/ASTColumnDeclaration.h @@ -19,6 +19,7 @@ public: ASTPtr comment; ASTPtr codec; ASTPtr ttl; + ASTPtr collation; String getID(char delim) const override { return "ColumnDeclaration" + (delim + name); } diff --git a/src/Parsers/ASTCreateQuery.cpp b/src/Parsers/ASTCreateQuery.cpp index 23881cd3fbb..9bae29d47ff 100644 --- a/src/Parsers/ASTCreateQuery.cpp +++ b/src/Parsers/ASTCreateQuery.cpp @@ -394,15 +394,18 @@ void ASTCreateQuery::formatQueryImpl(const FormatSettings & settings, FormatStat frame.expression_list_always_start_on_new_line = false; //-V519 + if (inner_storage) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << " INNER" << (settings.hilite ? hilite_none : ""); + inner_storage->formatImpl(settings, state, frame); + } + if (storage) storage->formatImpl(settings, state, frame); if (dictionary) dictionary->formatImpl(settings, state, frame); - if (is_populate) - settings.ostr << (settings.hilite ? hilite_keyword : "") << " POPULATE" << (settings.hilite ? hilite_none : ""); - if (is_watermark_strictly_ascending) { settings.ostr << (settings.hilite ? hilite_keyword : "") << " WATERMARK STRICTLY_ASCENDING" << (settings.hilite ? hilite_none : ""); @@ -423,6 +426,9 @@ void ASTCreateQuery::formatQueryImpl(const FormatSettings & settings, FormatStat lateness_function->formatImpl(settings, state, frame); } + if (is_populate) + settings.ostr << (settings.hilite ? hilite_keyword : "") << " POPULATE" << (settings.hilite ? hilite_none : ""); + if (select) { settings.ostr << (settings.hilite ? hilite_keyword : "") << " AS" diff --git a/src/Parsers/ASTCreateQuery.h b/src/Parsers/ASTCreateQuery.h index 28a4b2a2932..596baa3eb3c 100644 --- a/src/Parsers/ASTCreateQuery.h +++ b/src/Parsers/ASTCreateQuery.h @@ -76,6 +76,7 @@ public: StorageID to_table_id = StorageID::createEmpty(); /// For CREATE MATERIALIZED VIEW mv TO table. UUID to_inner_uuid = UUIDHelpers::Nil; /// For materialized view with inner table + ASTStorage * inner_storage = nullptr; /// For window view with inner table ASTStorage * storage = nullptr; ASTPtr watermark_function; ASTPtr lateness_function; diff --git a/src/Parsers/Access/ParserCreateUserQuery.cpp b/src/Parsers/Access/ParserCreateUserQuery.cpp index b7d49be879e..eda2505f88d 100644 --- a/src/Parsers/Access/ParserCreateUserQuery.cpp +++ b/src/Parsers/Access/ParserCreateUserQuery.cpp @@ -46,7 +46,7 @@ namespace } - bool parseAuthenticationData(IParserBase::Pos & pos, Expected & expected, bool id_mode, AuthenticationData & auth_data) + bool parseAuthenticationData(IParserBase::Pos & pos, Expected & expected, AuthenticationData & auth_data) { return IParserBase::wrapParseImpl(pos, [&] { @@ -120,7 +120,7 @@ namespace return false; value = ast->as().value.safeGet(); - if (id_mode && expect_hash) + if (expect_hash && type == AuthenticationType::SHA256_PASSWORD) { if (ParserKeyword{"SALT"}.ignore(pos, expected) && ParserStringLiteral{}.parse(pos, ast, expected)) { @@ -447,7 +447,7 @@ bool ParserCreateUserQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec if (!auth_data) { AuthenticationData new_auth_data; - if (parseAuthenticationData(pos, expected, attach_mode, new_auth_data)) + if (parseAuthenticationData(pos, expected, new_auth_data)) { auth_data = std::move(new_auth_data); continue; diff --git a/src/Parsers/CMakeLists.txt b/src/Parsers/CMakeLists.txt index b2c31366929..73f300fd5f6 100644 --- a/src/Parsers/CMakeLists.txt +++ b/src/Parsers/CMakeLists.txt @@ -4,7 +4,7 @@ add_headers_and_sources(clickhouse_parsers .) add_headers_and_sources(clickhouse_parsers ./Access) add_headers_and_sources(clickhouse_parsers ./MySQL) add_library(clickhouse_parsers ${clickhouse_parsers_headers} ${clickhouse_parsers_sources}) -target_link_libraries(clickhouse_parsers PUBLIC clickhouse_common_io clickhouse_common_access) +target_link_libraries(clickhouse_parsers PUBLIC clickhouse_common_io clickhouse_common_access string_utils) if (USE_DEBUG_HELPERS) # CMake generator expression will do insane quoting when it encounters special character like quotes, spaces, etc. diff --git a/src/Parsers/ExpressionElementParsers.cpp b/src/Parsers/ExpressionElementParsers.cpp index 9150fee3bde..633b8c2e35a 100644 --- a/src/Parsers/ExpressionElementParsers.cpp +++ b/src/Parsers/ExpressionElementParsers.cpp @@ -7,8 +7,10 @@ #include #include #include +#include #include +#include #include #include #include @@ -1447,6 +1449,31 @@ bool ParserCodec::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) return true; } +bool ParserCollation::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + ASTPtr collation; + + if (!ParserIdentifier(true).parse(pos, collation, expected)) + return false; + + // check the collation name is valid + const String name = getIdentifierName(collation); + + bool valid_collation = name == "binary" || + endsWith(name, "_bin") || + endsWith(name, "_ci") || + endsWith(name, "_cs") || + endsWith(name, "_ks"); + + if (!valid_collation) + return false; + + auto collation_node = std::make_shared(); + collation_node->collation = collation; + node = collation_node; + return true; +} + template static bool isOneOf(TokenType token) diff --git a/src/Parsers/ExpressionElementParsers.h b/src/Parsers/ExpressionElementParsers.h index 10b4f5fd7d1..f4dfe80f43e 100644 --- a/src/Parsers/ExpressionElementParsers.h +++ b/src/Parsers/ExpressionElementParsers.h @@ -217,6 +217,18 @@ protected: bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; }; +/** Parse collation + * COLLATE utf8_unicode_ci NOT NULL + */ +class ParserCollation : public IParserBase +{ +protected: + const char * getName() const override { return "collation"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +private: + static const char * valid_collations[]; +}; + /// Fast path of cast operator "::". /// It tries to read literal as text. /// If it fails, later operator will be transformed to function CAST. diff --git a/src/Parsers/ParserCreateQuery.cpp b/src/Parsers/ParserCreateQuery.cpp index 0c36aeb3141..e57cfece806 100644 --- a/src/Parsers/ParserCreateQuery.cpp +++ b/src/Parsers/ParserCreateQuery.cpp @@ -830,11 +830,13 @@ bool ParserCreateWindowViewQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & ParserKeyword s_as("AS"); ParserKeyword s_view("VIEW"); ParserKeyword s_window("WINDOW"); + ParserKeyword s_populate("POPULATE"); ParserToken s_dot(TokenType::Dot); ParserToken s_eq(TokenType::Equals); ParserToken s_lparen(TokenType::OpeningRoundBracket); ParserToken s_rparen(TokenType::ClosingRoundBracket); ParserStorage storage_p; + ParserStorage storage_inner; ParserTablePropertiesDeclarationList table_properties_p; ParserIntervalOperatorExpression watermark_p; ParserIntervalOperatorExpression lateness_p; @@ -844,6 +846,7 @@ bool ParserCreateWindowViewQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & ASTPtr to_table; ASTPtr columns_list; ASTPtr storage; + ASTPtr inner_storage; ASTPtr watermark; ASTPtr lateness; ASTPtr as_database; @@ -857,6 +860,7 @@ bool ParserCreateWindowViewQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & bool is_watermark_bounded = false; bool allowed_lateness = false; bool if_not_exists = false; + bool is_populate = false; if (!s_create.ignore(pos, expected)) { @@ -901,8 +905,17 @@ bool ParserCreateWindowViewQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & return false; } - /// Inner table ENGINE for WINDOW VIEW - storage_p.parse(pos, storage, expected); + if (ParserKeyword{"INNER"}.ignore(pos, expected)) + { + /// Inner table ENGINE for WINDOW VIEW + storage_inner.parse(pos, inner_storage, expected); + } + + if (!to_table) + { + /// Target table ENGINE for WINDOW VIEW + storage_p.parse(pos, storage, expected); + } // WATERMARK if (ParserKeyword{"WATERMARK"}.ignore(pos, expected)) @@ -929,6 +942,9 @@ bool ParserCreateWindowViewQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & return false; } + if (s_populate.ignore(pos, expected)) + is_populate = true; + /// AS SELECT ... if (!s_as.ignore(pos, expected)) return false; @@ -955,12 +971,14 @@ bool ParserCreateWindowViewQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & query->set(query->columns_list, columns_list); query->set(query->storage, storage); + query->set(query->inner_storage, inner_storage); query->is_watermark_strictly_ascending = is_watermark_strictly_ascending; query->is_watermark_ascending = is_watermark_ascending; query->is_watermark_bounded = is_watermark_bounded; query->watermark_function = watermark; query->allowed_lateness = allowed_lateness; query->lateness_function = lateness; + query->is_populate = is_populate; tryGetIdentifierNameInto(as_database, query->as_database); tryGetIdentifierNameInto(as_table, query->as_table); diff --git a/src/Parsers/ParserCreateQuery.h b/src/Parsers/ParserCreateQuery.h index 787e1622617..29cd08554b5 100644 --- a/src/Parsers/ParserCreateQuery.h +++ b/src/Parsers/ParserCreateQuery.h @@ -132,10 +132,12 @@ bool IParserColumnDeclaration::parseImpl(Pos & pos, ASTPtr & node, E ParserKeyword s_ttl{"TTL"}; ParserKeyword s_remove{"REMOVE"}; ParserKeyword s_type{"TYPE"}; + ParserKeyword s_collate{"COLLATE"}; ParserTernaryOperatorExpression expr_parser; ParserStringLiteral string_literal_parser; ParserLiteral literal_parser; ParserCodec codec_parser; + ParserCollation collation_parser; ParserExpression expression_parser; /// mandatory column name @@ -171,6 +173,7 @@ bool IParserColumnDeclaration::parseImpl(Pos & pos, ASTPtr & node, E ASTPtr comment_expression; ASTPtr codec_expression; ASTPtr ttl_expression; + ASTPtr collation_expression; if (!s_default.checkWithoutMoving(pos, expected) && !s_materialized.checkWithoutMoving(pos, expected) @@ -185,6 +188,11 @@ bool IParserColumnDeclaration::parseImpl(Pos & pos, ASTPtr & node, E return false; if (!type_parser.parse(pos, type, expected)) return false; + if (s_collate.ignore(pos, expected)) + { + if (!collation_parser.parse(pos, collation_expression, expected)) + return false; + } } Pos pos_before_specifier = pos; @@ -287,6 +295,11 @@ bool IParserColumnDeclaration::parseImpl(Pos & pos, ASTPtr & node, E column_declaration->ttl = ttl_expression; column_declaration->children.push_back(std::move(ttl_expression)); } + if (collation_expression) + { + column_declaration->collation = collation_expression; + column_declaration->children.push_back(std::move(collation_expression)); + } return true; } @@ -406,7 +419,7 @@ protected: bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; }; -/// CREATE|ATTACH WINDOW VIEW [IF NOT EXISTS] [db.]name [TO [db.]name] [ENGINE [db.]name] [WATERMARK function] AS SELECT ... +/// CREATE|ATTACH WINDOW VIEW [IF NOT EXISTS] [db.]name [TO [db.]name] [INNER ENGINE engine] [ENGINE engine] [WATERMARK strategy] [ALLOWED_LATENESS interval_function] [POPULATE] AS SELECT ... class ParserCreateWindowViewQuery : public IParserBase { protected: diff --git a/src/Parsers/tests/gtest_Parser.cpp b/src/Parsers/tests/gtest_Parser.cpp index f8de8ed90e6..5b6d49e2741 100644 --- a/src/Parsers/tests/gtest_Parser.cpp +++ b/src/Parsers/tests/gtest_Parser.cpp @@ -261,10 +261,18 @@ INSTANTIATE_TEST_SUITE_P(ParserCreateUserQuery, ParserTest, "CREATE USER user1 IDENTIFIED WITH sha256_password BY 'qwe123'", "CREATE USER user1 IDENTIFIED WITH sha256_hash BY '[A-Za-z0-9]{64}' SALT '[A-Za-z0-9]{64}'" }, + { + "CREATE USER user1 IDENTIFIED WITH sha256_hash BY '7A37B85C8918EAC19A9089C0FA5A2AB4DCE3F90528DCDEEC108B23DDF3607B99' SALT 'salt'", + "CREATE USER user1 IDENTIFIED WITH sha256_hash BY '7A37B85C8918EAC19A9089C0FA5A2AB4DCE3F90528DCDEEC108B23DDF3607B99' SALT 'salt'" + }, { "ALTER USER user1 IDENTIFIED WITH sha256_password BY 'qwe123'", "ALTER USER user1 IDENTIFIED WITH sha256_hash BY '[A-Za-z0-9]{64}' SALT '[A-Za-z0-9]{64}'" }, + { + "ALTER USER user1 IDENTIFIED WITH sha256_hash BY '7A37B85C8918EAC19A9089C0FA5A2AB4DCE3F90528DCDEEC108B23DDF3607B99' SALT 'salt'", + "ALTER USER user1 IDENTIFIED WITH sha256_hash BY '7A37B85C8918EAC19A9089C0FA5A2AB4DCE3F90528DCDEEC108B23DDF3607B99' SALT 'salt'" + }, { "CREATE USER user1 IDENTIFIED WITH sha256_password BY 'qwe123' SALT 'EFFD7F6B03B3EA68B8F86C1E91614DD50E42EB31EF7160524916444D58B5E264'", "throws Syntax error" diff --git a/src/Processors/Chunk.cpp b/src/Processors/Chunk.cpp index 8c7c09abf01..61bd118636d 100644 --- a/src/Processors/Chunk.cpp +++ b/src/Processors/Chunk.cpp @@ -169,6 +169,17 @@ std::string Chunk::dumpStructure() const return out.str(); } +void Chunk::append(const Chunk & chunk) +{ + MutableColumns mutation = mutateColumns(); + for (size_t position = 0; position < mutation.size(); ++position) + { + auto column = chunk.getColumns()[position]; + mutation[position]->insertRangeFrom(*column, 0, column->size()); + } + size_t rows = mutation[0]->size(); + setColumns(std::move(mutation), rows); +} void ChunkMissingValues::setBit(size_t column_idx, size_t row_idx) { diff --git a/src/Processors/Chunk.h b/src/Processors/Chunk.h index 1c9240ba114..ec514846f24 100644 --- a/src/Processors/Chunk.h +++ b/src/Processors/Chunk.h @@ -90,7 +90,7 @@ public: bool hasRows() const { return num_rows > 0; } bool hasColumns() const { return !columns.empty(); } bool empty() const { return !hasRows() && !hasColumns(); } - operator bool() const { return !empty(); } /// NOLINT + explicit operator bool() const { return !empty(); } void addColumn(ColumnPtr column); void addColumn(size_t position, ColumnPtr column); @@ -101,6 +101,8 @@ public: std::string dumpStructure() const; + void append(const Chunk & chunk); + private: Columns columns; UInt64 num_rows = 0; diff --git a/src/Processors/Formats/IRowInputFormat.cpp b/src/Processors/Formats/IRowInputFormat.cpp index 481f77c1ef8..dbe28147d8f 100644 --- a/src/Processors/Formats/IRowInputFormat.cpp +++ b/src/Processors/Formats/IRowInputFormat.cpp @@ -48,13 +48,8 @@ bool isParseError(int code) } IRowInputFormat::IRowInputFormat(Block header, ReadBuffer & in_, Params params_) - : IInputFormat(std::move(header), in_), params(params_) + : IInputFormat(std::move(header), in_), serializations(getPort().getHeader().getSerializations()), params(params_) { - const auto & port_header = getPort().getHeader(); - size_t num_columns = port_header.columns(); - serializations.resize(num_columns); - for (size_t i = 0; i < num_columns; ++i) - serializations[i] = port_header.getByPosition(i).type->getDefaultSerialization(); } diff --git a/src/Processors/Formats/IRowOutputFormat.cpp b/src/Processors/Formats/IRowOutputFormat.cpp index b48c4a2b3e6..f2f6b49ed3f 100644 --- a/src/Processors/Formats/IRowOutputFormat.cpp +++ b/src/Processors/Formats/IRowOutputFormat.cpp @@ -12,13 +12,11 @@ namespace ErrorCodes IRowOutputFormat::IRowOutputFormat(const Block & header, WriteBuffer & out_, const Params & params_) : IOutputFormat(header, out_) + , num_columns(header.columns()) , types(header.getDataTypes()) + , serializations(header.getSerializations()) , params(params_) { - num_columns = types.size(); - serializations.reserve(num_columns); - for (const auto & type : types) - serializations.push_back(type->getDefaultSerialization()); } void IRowOutputFormat::consume(DB::Chunk chunk) diff --git a/src/Processors/Formats/ISchemaReader.cpp b/src/Processors/Formats/ISchemaReader.cpp index f23f33c482d..5a6ebf00660 100644 --- a/src/Processors/Formats/ISchemaReader.cpp +++ b/src/Processors/Formats/ISchemaReader.cpp @@ -12,9 +12,10 @@ namespace ErrorCodes extern const int TYPE_MISMATCH; extern const int INCORRECT_DATA; extern const int EMPTY_DATA_PASSED; + extern const int BAD_ARGUMENTS; } -static void chooseResultType( +void chooseResultColumnType( DataTypePtr & type, const DataTypePtr & new_type, CommonDataTypeChecker common_type_checker, @@ -48,16 +49,14 @@ static void chooseResultType( } } -static void checkTypeAndAppend(NamesAndTypesList & result, DataTypePtr & type, const String & name, const DataTypePtr & default_type, size_t max_rows_to_read) +void checkResultColumnTypeAndAppend(NamesAndTypesList & result, DataTypePtr & type, const String & name, const DataTypePtr & default_type, size_t rows_read) { if (!type) { if (!default_type) throw Exception( ErrorCodes::ONLY_NULLS_WHILE_READING_SCHEMA, - "Cannot determine table structure by first {} rows of data, because some columns contain only Nulls. To increase the maximum " - "number of rows to read for structure determination, use setting input_format_max_rows_to_read_for_schema_inference", - max_rows_to_read); + "Cannot determine table structure by first {} rows of data, because some columns contain only Nulls", rows_read); type = default_type; } @@ -65,7 +64,7 @@ static void checkTypeAndAppend(NamesAndTypesList & result, DataTypePtr & type, c } IRowSchemaReader::IRowSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings) - : ISchemaReader(in_), max_rows_to_read(format_settings.max_rows_to_read_for_schema_inference) + : ISchemaReader(in_) { if (!format_settings.column_names_for_schema_inference.empty()) { @@ -94,8 +93,14 @@ IRowSchemaReader::IRowSchemaReader(ReadBuffer & in_, const FormatSettings & form NamesAndTypesList IRowSchemaReader::readSchema() { + if (max_rows_to_read == 0) + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "Cannot read rows to determine the schema, the maximum number of rows to read is set to 0. " + "Most likely setting input_format_max_rows_to_read_for_schema_inference is set to 0"); + DataTypes data_types = readRowAndGetDataTypes(); - for (size_t row = 1; row < max_rows_to_read; ++row) + for (rows_read = 1; rows_read < max_rows_to_read; ++rows_read) { DataTypes new_data_types = readRowAndGetDataTypes(); if (new_data_types.empty()) @@ -111,7 +116,7 @@ NamesAndTypesList IRowSchemaReader::readSchema() if (!new_data_types[i]) continue; - chooseResultType(data_types[i], new_data_types[i], common_type_checker, getDefaultType(i), std::to_string(i + 1), row); + chooseResultColumnType(data_types[i], new_data_types[i], common_type_checker, getDefaultType(i), std::to_string(i + 1), rows_read); } } @@ -136,7 +141,7 @@ NamesAndTypesList IRowSchemaReader::readSchema() for (size_t i = 0; i != data_types.size(); ++i) { /// Check that we could determine the type of this column. - checkTypeAndAppend(result, data_types[i], column_names[i], getDefaultType(i), max_rows_to_read); + checkResultColumnTypeAndAppend(result, data_types[i], column_names[i], getDefaultType(i), rows_read); } return result; @@ -151,13 +156,19 @@ DataTypePtr IRowSchemaReader::getDefaultType(size_t column) const return nullptr; } -IRowWithNamesSchemaReader::IRowWithNamesSchemaReader(ReadBuffer & in_, size_t max_rows_to_read_, DataTypePtr default_type_) - : ISchemaReader(in_), max_rows_to_read(max_rows_to_read_), default_type(default_type_) +IRowWithNamesSchemaReader::IRowWithNamesSchemaReader(ReadBuffer & in_, DataTypePtr default_type_) + : ISchemaReader(in_), default_type(default_type_) { } NamesAndTypesList IRowWithNamesSchemaReader::readSchema() { + if (max_rows_to_read == 0) + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "Cannot read rows to determine the schema, the maximum number of rows to read is set to 0. " + "Most likely setting input_format_max_rows_to_read_for_schema_inference is set to 0"); + bool eof = false; auto names_and_types = readRowAndGetNamesAndDataTypes(eof); std::unordered_map names_to_types; @@ -170,7 +181,7 @@ NamesAndTypesList IRowWithNamesSchemaReader::readSchema() names_order.push_back(name); } - for (size_t row = 1; row < max_rows_to_read; ++row) + for (rows_read = 1; rows_read < max_rows_to_read; ++rows_read) { auto new_names_and_types = readRowAndGetNamesAndDataTypes(eof); if (eof) @@ -189,7 +200,7 @@ NamesAndTypesList IRowWithNamesSchemaReader::readSchema() } auto & type = it->second; - chooseResultType(type, new_type, common_type_checker, default_type, name, row); + chooseResultColumnType(type, new_type, common_type_checker, default_type, name, rows_read); } } @@ -202,7 +213,7 @@ NamesAndTypesList IRowWithNamesSchemaReader::readSchema() { auto & type = names_to_types[name]; /// Check that we could determine the type of this column. - checkTypeAndAppend(result, type, name, default_type, max_rows_to_read); + checkResultColumnTypeAndAppend(result, type, name, default_type, rows_read); } return result; diff --git a/src/Processors/Formats/ISchemaReader.h b/src/Processors/Formats/ISchemaReader.h index a8eff762856..00987540d04 100644 --- a/src/Processors/Formats/ISchemaReader.h +++ b/src/Processors/Formats/ISchemaReader.h @@ -26,6 +26,9 @@ public: virtual bool needContext() const { return false; } virtual void setContext(ContextPtr &) {} + virtual void setMaxRowsToRead(size_t) {} + virtual size_t getNumRowsRead() const { return 0; } + virtual ~ISchemaReader() = default; protected: @@ -61,10 +64,14 @@ protected: void setColumnNames(const std::vector & names) { column_names = names; } + void setMaxRowsToRead(size_t max_rows) override { max_rows_to_read = max_rows; } + size_t getNumRowsRead() const override { return rows_read; } + private: DataTypePtr getDefaultType(size_t column) const; size_t max_rows_to_read; + size_t rows_read = 0; DataTypePtr default_type; DataTypes default_types; CommonDataTypeChecker common_type_checker; @@ -79,7 +86,7 @@ private: class IRowWithNamesSchemaReader : public ISchemaReader { public: - IRowWithNamesSchemaReader(ReadBuffer & in_, size_t max_rows_to_read_, DataTypePtr default_type_ = nullptr); + IRowWithNamesSchemaReader(ReadBuffer & in_, DataTypePtr default_type_ = nullptr); NamesAndTypesList readSchema() override; bool hasStrictOrderOfColumns() const override { return false; } @@ -92,8 +99,12 @@ protected: /// Set eof = true if can't read more data. virtual NamesAndTypesList readRowAndGetNamesAndDataTypes(bool & eof) = 0; + void setMaxRowsToRead(size_t max_rows) override { max_rows_to_read = max_rows; } + size_t getNumRowsRead() const override { return rows_read; } + private: size_t max_rows_to_read; + size_t rows_read = 0; DataTypePtr default_type; CommonDataTypeChecker common_type_checker; }; @@ -109,4 +120,15 @@ public: virtual ~IExternalSchemaReader() = default; }; +void chooseResultColumnType( + DataTypePtr & type, + const DataTypePtr & new_type, + CommonDataTypeChecker common_type_checker, + const DataTypePtr & default_type, + const String & column_name, + size_t row); + +void checkResultColumnTypeAndAppend( + NamesAndTypesList & result, DataTypePtr & type, const String & name, const DataTypePtr & default_type, size_t rows_read); + } diff --git a/src/Processors/Formats/Impl/ArrowBlockInputFormat.cpp b/src/Processors/Formats/Impl/ArrowBlockInputFormat.cpp index 07331d82bb8..da3e3efe807 100644 --- a/src/Processors/Formats/Impl/ArrowBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ArrowBlockInputFormat.cpp @@ -76,9 +76,8 @@ Chunk ArrowBlockInputFormat::generate() /// If defaults_for_omitted_fields is true, calculate the default values from default expression for omitted fields. /// Otherwise fill the missing columns with zero values of its type. if (format_settings.defaults_for_omitted_fields) - for (size_t row_idx = 0; row_idx < res.getNumRows(); ++row_idx) - for (const auto & column_idx : missing_columns) - block_missing_values.setBit(column_idx, row_idx); + for (const auto & column_idx : missing_columns) + block_missing_values.setBits(column_idx, res.getNumRows()); return res; } diff --git a/src/Processors/Formats/Impl/ArrowBlockOutputFormat.cpp b/src/Processors/Formats/Impl/ArrowBlockOutputFormat.cpp index 60408f13ff0..83eaefa8cf7 100644 --- a/src/Processors/Formats/Impl/ArrowBlockOutputFormat.cpp +++ b/src/Processors/Formats/Impl/ArrowBlockOutputFormat.cpp @@ -34,7 +34,7 @@ void ArrowBlockOutputFormat::consume(Chunk chunk) { const Block & header = getPort(PortKind::Main).getHeader(); ch_column_to_arrow_column - = std::make_unique(header, "Arrow", format_settings.arrow.low_cardinality_as_dictionary); + = std::make_unique(header, "Arrow", format_settings.arrow.low_cardinality_as_dictionary, format_settings.arrow.output_string_as_string); } ch_column_to_arrow_column->chChunkToArrowTable(arrow_table, chunk, columns_num); diff --git a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp index c792d828e44..7c5dd2a03ea 100644 --- a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp +++ b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp @@ -36,6 +36,7 @@ #include #include + /// UINT16 and UINT32 are processed separately, see comments in readColumnFromArrowColumn. #define FOR_ARROW_NUMERIC_TYPES(M) \ M(arrow::Type::UINT8, DB::UInt8) \ diff --git a/src/Processors/Formats/Impl/AvroRowOutputFormat.cpp b/src/Processors/Formats/Impl/AvroRowOutputFormat.cpp index 70373480920..7b86dcd4a64 100644 --- a/src/Processors/Formats/Impl/AvroRowOutputFormat.cpp +++ b/src/Processors/Formats/Impl/AvroRowOutputFormat.cpp @@ -80,7 +80,7 @@ class OutputStreamWriteBufferAdapter : public avro::OutputStream public: explicit OutputStreamWriteBufferAdapter(WriteBuffer & out_) : out(out_) {} - virtual bool next(uint8_t ** data, size_t * len) override + bool next(uint8_t ** data, size_t * len) override { out.nextIfAtEnd(); *data = reinterpret_cast(out.position()); @@ -90,10 +90,10 @@ public: return true; } - virtual void backup(size_t len) override { out.position() -= len; } + void backup(size_t len) override { out.position() -= len; } - virtual uint64_t byteCount() const override { return out.count(); } - virtual void flush() override { } + uint64_t byteCount() const override { return out.count(); } + void flush() override { } private: WriteBuffer & out; diff --git a/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp b/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp index 3f6a36e8e8c..bd5a6368291 100644 --- a/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp +++ b/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp @@ -168,6 +168,7 @@ namespace DB String format_name, size_t start, size_t end, + bool output_string_as_string, std::unordered_map> & dictionary_values); template @@ -180,6 +181,7 @@ namespace DB String format_name, size_t start, size_t end, + bool output_string_as_string, std::unordered_map> & dictionary_values) { const auto * column_array = assert_cast(column.get()); @@ -196,7 +198,7 @@ namespace DB /// Start new array. components_status = builder.Append(); checkStatus(components_status, nested_column->getName(), format_name); - fillArrowArray(column_name, nested_column, nested_type, null_bytemap, value_builder, format_name, offsets[array_idx - 1], offsets[array_idx], dictionary_values); + fillArrowArray(column_name, nested_column, nested_type, null_bytemap, value_builder, format_name, offsets[array_idx - 1], offsets[array_idx], output_string_as_string, dictionary_values); } } @@ -209,6 +211,7 @@ namespace DB String format_name, size_t start, size_t end, + bool output_string_as_string, std::unordered_map> & dictionary_values) { const auto * column_tuple = assert_cast(column.get()); @@ -219,7 +222,7 @@ namespace DB for (size_t i = 0; i != column_tuple->tupleSize(); ++i) { ColumnPtr nested_column = column_tuple->getColumnPtr(i); - fillArrowArray(column_name + "." + std::to_string(i), nested_column, nested_types[i], null_bytemap, builder.field_builder(i), format_name, start, end, dictionary_values); + fillArrowArray(column_name + "." + std::to_string(i), nested_column, nested_types[i], null_bytemap, builder.field_builder(i), format_name, start, end, output_string_as_string, dictionary_values); } for (size_t i = start; i != end; ++i) @@ -267,6 +270,7 @@ namespace DB String format_name, size_t start, size_t end, + bool output_string_as_string, std::unordered_map> & dictionary_values) { const auto * column_lc = assert_cast(column.get()); @@ -284,7 +288,7 @@ namespace DB auto dict_column = column_lc->getDictionary().getNestedColumn(); const auto & dict_type = assert_cast(column_type.get())->getDictionaryType(); - fillArrowArray(column_name, dict_column, dict_type, nullptr, values_builder.get(), format_name, 0, dict_column->size(), dictionary_values); + fillArrowArray(column_name, dict_column, dict_type, nullptr, values_builder.get(), format_name, 0, dict_column->size(), output_string_as_string, dictionary_values); status = values_builder->Finish(&dict_values); checkStatus(status, column->getName(), format_name); } @@ -321,6 +325,7 @@ namespace DB String format_name, size_t start, size_t end, + bool output_string_as_string, std::unordered_map> & dictionary_values) { auto value_type = assert_cast(array_builder->type().get())->value_type(); @@ -328,7 +333,7 @@ namespace DB #define DISPATCH(ARROW_TYPE_ID, ARROW_TYPE) \ if (arrow::Type::ARROW_TYPE_ID == value_type->id()) \ { \ - fillArrowArrayWithLowCardinalityColumnDataImpl(column_name, column, column_type, null_bytemap, array_builder, format_name, start, end, dictionary_values); \ + fillArrowArrayWithLowCardinalityColumnDataImpl(column_name, column, column_type, null_bytemap, array_builder, format_name, start, end, output_string_as_string, dictionary_values); \ return; \ } @@ -338,7 +343,7 @@ namespace DB throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot fill arrow array with {} data.", column_type->getName()); } - template + template static void fillArrowArrayWithStringColumnData( ColumnPtr write_column, const PaddedPODArray * null_bytemap, @@ -348,7 +353,7 @@ namespace DB size_t end) { const auto & internal_column = assert_cast(*write_column); - arrow::BinaryBuilder & builder = assert_cast(*array_builder); + ArrowBuilder & builder = assert_cast(*array_builder); arrow::Status status; for (size_t string_i = start; string_i < end; ++string_i) @@ -442,6 +447,7 @@ namespace DB String format_name, size_t start, size_t end, + bool output_string_as_string, std::unordered_map> & dictionary_values) { const String column_type_name = column_type->getFamilyName(); @@ -453,15 +459,21 @@ namespace DB DataTypePtr nested_type = assert_cast(column_type.get())->getNestedType(); const ColumnPtr & null_column = column_nullable->getNullMapColumnPtr(); const PaddedPODArray & bytemap = assert_cast &>(*null_column).getData(); - fillArrowArray(column_name, nested_column, nested_type, &bytemap, array_builder, format_name, start, end, dictionary_values); + fillArrowArray(column_name, nested_column, nested_type, &bytemap, array_builder, format_name, start, end, output_string_as_string, dictionary_values); } else if (isString(column_type)) { - fillArrowArrayWithStringColumnData(column, null_bytemap, format_name, array_builder, start, end); + if (output_string_as_string) + fillArrowArrayWithStringColumnData(column, null_bytemap, format_name, array_builder, start, end); + else + fillArrowArrayWithStringColumnData(column, null_bytemap, format_name, array_builder, start, end); } else if (isFixedString(column_type)) { - fillArrowArrayWithStringColumnData(column, null_bytemap, format_name, array_builder, start, end); + if (output_string_as_string) + fillArrowArrayWithStringColumnData(column, null_bytemap, format_name, array_builder, start, end); + else + fillArrowArrayWithStringColumnData(column, null_bytemap, format_name, array_builder, start, end); } else if (isDate(column_type)) { @@ -477,21 +489,21 @@ namespace DB } else if (isArray(column_type)) { - fillArrowArrayWithArrayColumnData(column_name, column, column_type, null_bytemap, array_builder, format_name, start, end, dictionary_values); + fillArrowArrayWithArrayColumnData(column_name, column, column_type, null_bytemap, array_builder, format_name, start, end, output_string_as_string, dictionary_values); } else if (isTuple(column_type)) { - fillArrowArrayWithTupleColumnData(column_name, column, column_type, null_bytemap, array_builder, format_name, start, end, dictionary_values); + fillArrowArrayWithTupleColumnData(column_name, column, column_type, null_bytemap, array_builder, format_name, start, end, output_string_as_string, dictionary_values); } else if (column_type->getTypeId() == TypeIndex::LowCardinality) { - fillArrowArrayWithLowCardinalityColumnData(column_name, column, column_type, null_bytemap, array_builder, format_name, start, end, dictionary_values); + fillArrowArrayWithLowCardinalityColumnData(column_name, column, column_type, null_bytemap, array_builder, format_name, start, end, output_string_as_string, dictionary_values); } else if (isMap(column_type)) { ColumnPtr column_array = assert_cast(column.get())->getNestedColumnPtr(); DataTypePtr array_type = assert_cast(column_type.get())->getNestedType(); - fillArrowArrayWithArrayColumnData(column_name, column_array, array_type, null_bytemap, array_builder, format_name, start, end, dictionary_values); + fillArrowArrayWithArrayColumnData(column_name, column_array, array_type, null_bytemap, array_builder, format_name, start, end, output_string_as_string, dictionary_values); } else if (isDecimal(column_type)) { @@ -603,13 +615,13 @@ namespace DB } static std::shared_ptr getArrowType( - DataTypePtr column_type, ColumnPtr column, const std::string & column_name, const std::string & format_name, bool * out_is_column_nullable) + DataTypePtr column_type, ColumnPtr column, const std::string & column_name, const std::string & format_name, bool output_string_as_string, bool * out_is_column_nullable) { if (column_type->isNullable()) { DataTypePtr nested_type = assert_cast(column_type.get())->getNestedType(); ColumnPtr nested_column = assert_cast(column.get())->getNestedColumnPtr(); - auto arrow_type = getArrowType(nested_type, nested_column, column_name, format_name, out_is_column_nullable); + auto arrow_type = getArrowType(nested_type, nested_column, column_name, format_name, output_string_as_string, out_is_column_nullable); *out_is_column_nullable = true; return arrow_type; } @@ -643,7 +655,7 @@ namespace DB { auto nested_type = assert_cast(column_type.get())->getNestedType(); auto nested_column = assert_cast(column.get())->getDataPtr(); - auto nested_arrow_type = getArrowType(nested_type, nested_column, column_name, format_name, out_is_column_nullable); + auto nested_arrow_type = getArrowType(nested_type, nested_column, column_name, format_name, output_string_as_string, out_is_column_nullable); return arrow::list(nested_arrow_type); } @@ -655,7 +667,7 @@ namespace DB for (size_t i = 0; i != nested_types.size(); ++i) { String name = column_name + "." + std::to_string(i); - auto nested_arrow_type = getArrowType(nested_types[i], tuple_column->getColumnPtr(i), name, format_name, out_is_column_nullable); + auto nested_arrow_type = getArrowType(nested_types[i], tuple_column->getColumnPtr(i), name, format_name, output_string_as_string, out_is_column_nullable); nested_fields.push_back(std::make_shared(name, nested_arrow_type, *out_is_column_nullable)); } return arrow::struct_(nested_fields); @@ -669,7 +681,7 @@ namespace DB const auto & indexes_column = lc_column->getIndexesPtr(); return arrow::dictionary( getArrowTypeForLowCardinalityIndexes(indexes_column), - getArrowType(nested_type, nested_column, column_name, format_name, out_is_column_nullable)); + getArrowType(nested_type, nested_column, column_name, format_name, output_string_as_string, out_is_column_nullable)); } if (isMap(column_type)) @@ -680,10 +692,19 @@ namespace DB const auto & columns = assert_cast(column.get())->getNestedData().getColumns(); return arrow::map( - getArrowType(key_type, columns[0], column_name, format_name, out_is_column_nullable), - getArrowType(val_type, columns[1], column_name, format_name, out_is_column_nullable)); + getArrowType(key_type, columns[0], column_name, format_name, output_string_as_string, out_is_column_nullable), + getArrowType(val_type, columns[1], column_name, format_name, output_string_as_string, out_is_column_nullable)); } + if (isDateTime64(column_type)) + { + const auto * datetime64_type = assert_cast(column_type.get()); + return arrow::timestamp(getArrowTimeUnit(datetime64_type), datetime64_type->getTimeZone().getTimeZone()); + } + + if (isStringOrFixedString(column_type) && output_string_as_string) + return arrow::utf8(); + const std::string type_name = column_type->getFamilyName(); if (const auto * arrow_type_it = std::find_if( internal_type_to_arrow_type.begin(), @@ -694,19 +715,13 @@ namespace DB return arrow_type_it->second; } - if (isDateTime64(column_type)) - { - const auto * datetime64_type = assert_cast(column_type.get()); - return arrow::timestamp(getArrowTimeUnit(datetime64_type), datetime64_type->getTimeZone().getTimeZone()); - } - throw Exception(ErrorCodes::UNKNOWN_TYPE, "The type '{}' of a column '{}' is not supported for conversion into {} data format.", column_type->getName(), column_name, format_name); } - CHColumnToArrowColumn::CHColumnToArrowColumn(const Block & header, const std::string & format_name_, bool low_cardinality_as_dictionary_) - : format_name(format_name_), low_cardinality_as_dictionary(low_cardinality_as_dictionary_) + CHColumnToArrowColumn::CHColumnToArrowColumn(const Block & header, const std::string & format_name_, bool low_cardinality_as_dictionary_, bool output_string_as_string_) + : format_name(format_name_), low_cardinality_as_dictionary(low_cardinality_as_dictionary_), output_string_as_string(output_string_as_string_) { arrow_fields.reserve(header.columns()); header_columns.reserve(header.columns()); @@ -741,7 +756,7 @@ namespace DB if (!is_arrow_fields_initialized) { bool is_column_nullable = false; - auto arrow_type = getArrowType(header_column.type, column, header_column.name, format_name, &is_column_nullable); + auto arrow_type = getArrowType(header_column.type, column, header_column.name, format_name, output_string_as_string, &is_column_nullable); arrow_fields.emplace_back(std::make_shared(header_column.name, arrow_type, is_column_nullable)); } @@ -751,7 +766,7 @@ namespace DB checkStatus(status, column->getName(), format_name); fillArrowArray( - header_column.name, column, header_column.type, nullptr, array_builder.get(), format_name, 0, column->size(), dictionary_values); + header_column.name, column, header_column.type, nullptr, array_builder.get(), format_name, 0, column->size(), output_string_as_string, dictionary_values); std::shared_ptr arrow_array; status = array_builder->Finish(&arrow_array); diff --git a/src/Processors/Formats/Impl/CHColumnToArrowColumn.h b/src/Processors/Formats/Impl/CHColumnToArrowColumn.h index 50de8045d5f..2896fb3642f 100644 --- a/src/Processors/Formats/Impl/CHColumnToArrowColumn.h +++ b/src/Processors/Formats/Impl/CHColumnToArrowColumn.h @@ -14,7 +14,7 @@ namespace DB class CHColumnToArrowColumn { public: - CHColumnToArrowColumn(const Block & header, const std::string & format_name_, bool low_cardinality_as_dictionary_); + CHColumnToArrowColumn(const Block & header, const std::string & format_name_, bool low_cardinality_as_dictionary_, bool output_string_as_string_); void chChunkToArrowTable(std::shared_ptr & res, const Chunk & chunk, size_t columns_num); @@ -32,6 +32,10 @@ private: /// because LowCardinality column from header always has indexes type UInt8, so, we should get /// proper indexes type from first chunk of data. bool is_arrow_fields_initialized = false; + + /// Output columns with String data type as Arrow::String type. + /// By default Arrow::Binary is used. + bool output_string_as_string = false; }; } diff --git a/src/Processors/Formats/Impl/JSONAsStringRowInputFormat.cpp b/src/Processors/Formats/Impl/JSONAsStringRowInputFormat.cpp index e31006ff0f6..d369eedceea 100644 --- a/src/Processors/Formats/Impl/JSONAsStringRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONAsStringRowInputFormat.cpp @@ -1,5 +1,5 @@ #include -#include +#include #include #include #include @@ -221,12 +221,12 @@ void registerInputFormatJSONAsString(FormatFactory & factory) void registerFileSegmentationEngineJSONAsString(FormatFactory & factory) { - factory.registerFileSegmentationEngine("JSONAsString", &fileSegmentationEngineJSONEachRow); + factory.registerFileSegmentationEngine("JSONAsString", &JSONUtils::fileSegmentationEngineJSONEachRow); } void registerNonTrivialPrefixAndSuffixCheckerJSONAsString(FormatFactory & factory) { - factory.registerNonTrivialPrefixAndSuffixChecker("JSONAsString", nonTrivialPrefixAndSuffixCheckerJSONEachRowImpl); + factory.registerNonTrivialPrefixAndSuffixChecker("JSONAsString", JSONUtils::nonTrivialPrefixAndSuffixCheckerJSONEachRowImpl); } void registerJSONAsStringSchemaReader(FormatFactory & factory) @@ -251,12 +251,12 @@ void registerInputFormatJSONAsObject(FormatFactory & factory) void registerNonTrivialPrefixAndSuffixCheckerJSONAsObject(FormatFactory & factory) { - factory.registerNonTrivialPrefixAndSuffixChecker("JSONAsObject", nonTrivialPrefixAndSuffixCheckerJSONEachRowImpl); + factory.registerNonTrivialPrefixAndSuffixChecker("JSONAsObject", JSONUtils::nonTrivialPrefixAndSuffixCheckerJSONEachRowImpl); } void registerFileSegmentationEngineJSONAsObject(FormatFactory & factory) { - factory.registerFileSegmentationEngine("JSONAsObject", &fileSegmentationEngineJSONEachRow); + factory.registerFileSegmentationEngine("JSONAsObject", &JSONUtils::fileSegmentationEngineJSONEachRow); } void registerJSONAsObjectSchemaReader(FormatFactory & factory) diff --git a/src/Processors/Formats/Impl/JSONColumnsBlockInputFormat.cpp b/src/Processors/Formats/Impl/JSONColumnsBlockInputFormat.cpp new file mode 100644 index 00000000000..935462a6fe4 --- /dev/null +++ b/src/Processors/Formats/Impl/JSONColumnsBlockInputFormat.cpp @@ -0,0 +1,70 @@ +#include +#include +#include + +namespace DB +{ + +JSONColumnsReader::JSONColumnsReader(ReadBuffer & in_) : JSONColumnsReaderBase(in_) +{ +} + +void JSONColumnsReader::readChunkStart() +{ + skipWhitespaceIfAny(*in); + assertChar('{', *in); + skipWhitespaceIfAny(*in); +} + +std::optional JSONColumnsReader::readColumnStart() +{ + skipWhitespaceIfAny(*in); + String name; + readJSONString(name, *in); + skipWhitespaceIfAny(*in); + assertChar(':', *in); + skipWhitespaceIfAny(*in); + assertChar('[', *in); + skipWhitespaceIfAny(*in); + return name; +} + +bool JSONColumnsReader::checkChunkEnd() +{ + skipWhitespaceIfAny(*in); + if (!in->eof() && *in->position() == '}') + { + ++in->position(); + skipWhitespaceIfAny(*in); + return true; + } + return false; +} + + +void registerInputFormatJSONColumns(FormatFactory & factory) +{ + factory.registerInputFormat( + "JSONColumns", + [](ReadBuffer & buf, + const Block &sample, + const RowInputFormatParams &, + const FormatSettings & settings) + { + return std::make_shared(buf, sample, settings, std::make_unique(buf)); + } + ); +} + +void registerJSONColumnsSchemaReader(FormatFactory & factory) +{ + factory.registerSchemaReader( + "JSONColumns", + [](ReadBuffer & buf, const FormatSettings & settings) + { + return std::make_shared(buf, settings, std::make_unique(buf)); + } + ); +} + +} diff --git a/src/Processors/Formats/Impl/JSONColumnsBlockInputFormat.h b/src/Processors/Formats/Impl/JSONColumnsBlockInputFormat.h new file mode 100644 index 00000000000..f8b8a80731e --- /dev/null +++ b/src/Processors/Formats/Impl/JSONColumnsBlockInputFormat.h @@ -0,0 +1,25 @@ +#pragma once + +#include + +namespace DB +{ + +/* Format JSONColumns reads each block of data in the next format: + * { + * "name1": [value1, value2, value3, ...], + * "name2": [value1, value2m value3, ...], + * ... + * } + */ +class JSONColumnsReader : public JSONColumnsReaderBase +{ +public: + JSONColumnsReader(ReadBuffer & in_); + + void readChunkStart() override; + std::optional readColumnStart() override; + bool checkChunkEnd() override; +}; + +} diff --git a/src/Processors/Formats/Impl/JSONColumnsBlockInputFormatBase.cpp b/src/Processors/Formats/Impl/JSONColumnsBlockInputFormatBase.cpp new file mode 100644 index 00000000000..cdde87f2cf6 --- /dev/null +++ b/src/Processors/Formats/Impl/JSONColumnsBlockInputFormatBase.cpp @@ -0,0 +1,272 @@ +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int INCORRECT_DATA; + extern const int EMPTY_DATA_PASSED; +} + + +JSONColumnsReaderBase::JSONColumnsReaderBase(ReadBuffer & in_) : in(&in_) +{ +} + +bool JSONColumnsReaderBase::checkColumnEnd() +{ + skipWhitespaceIfAny(*in); + if (!in->eof() && *in->position() == ']') + { + ++in->position(); + skipWhitespaceIfAny(*in); + return true; + } + return false; +} + +bool JSONColumnsReaderBase::checkColumnEndOrSkipFieldDelimiter() +{ + if (checkColumnEnd()) + return true; + skipWhitespaceIfAny(*in); + assertChar(',', *in); + skipWhitespaceIfAny(*in); + return false; +} + +bool JSONColumnsReaderBase::checkChunkEndOrSkipColumnDelimiter() +{ + if (checkChunkEnd()) + return true; + skipWhitespaceIfAny(*in); + assertChar(',', *in); + skipWhitespaceIfAny(*in); + return false; +} + +void JSONColumnsReaderBase::skipColumn() +{ + /// We assume that we already read '[', so we should skip until matched ']'. + size_t balance = 1; + bool inside_quotes = false; + char * pos; + while (!in->eof() && balance) + { + if (inside_quotes) + pos = find_first_symbols<'"'>(in->position(), in->buffer().end()); + else + pos = find_first_symbols<'[', ']', '"'>(in->position(), in->buffer().end()); + + in->position() = pos; + if (in->position() == in->buffer().end()) + continue; + + if (*in->position() == '"') + inside_quotes = !inside_quotes; + else if (*in->position() == ']') + --balance; + else if (*in->position() == '[') + ++balance; + ++in->position(); + } +} + +JSONColumnsBlockInputFormatBase::JSONColumnsBlockInputFormatBase( + ReadBuffer & in_, const Block & header_, const FormatSettings & format_settings_, std::unique_ptr reader_) + : IInputFormat(header_, in_) + , format_settings(format_settings_) + , fields(header_.getNamesAndTypes()) + , name_to_index(header_.getNamesToIndexesMap()) + , serializations(header_.getSerializations()) + , reader(std::move(reader_)) +{ +} + +size_t JSONColumnsBlockInputFormatBase::readColumn( + IColumn & column, const DataTypePtr & type, const SerializationPtr & serialization, const String & column_name) +{ + /// Check for empty column. + if (reader->checkColumnEnd()) + return 0; + + do + { + JSONUtils::readField(*in, column, type, serialization, column_name, format_settings, false); + } + while (!reader->checkColumnEndOrSkipFieldDelimiter()); + + return column.size(); +} + +void JSONColumnsBlockInputFormatBase::setReadBuffer(ReadBuffer & in_) +{ + reader->setReadBuffer(in_); + IInputFormat::setReadBuffer(in_); +} + +Chunk JSONColumnsBlockInputFormatBase::generate() +{ + MutableColumns columns = getPort().getHeader().cloneEmptyColumns(); + block_missing_values.clear(); + + skipWhitespaceIfAny(*in); + if (in->eof()) + return {}; + + reader->readChunkStart(); + /// Check for empty block. + if (reader->checkChunkEnd()) + return Chunk(std::move(columns), 0); + + std::vector seen_columns(columns.size(), 0); + Int64 rows = -1; + size_t iteration = 0; + do + { + auto column_name = reader->readColumnStart(); + size_t column_index = iteration; + if (column_name.has_value()) + { + /// Check if this name appears in header. If no, skip this column or throw + /// an exception according to setting input_format_skip_unknown_fields + if (!name_to_index.contains(*column_name)) + { + if (!format_settings.skip_unknown_fields) + throw Exception(ErrorCodes::INCORRECT_DATA, "Unknown column found in input data: {}", *column_name); + + reader->skipColumn(); + continue; + } + column_index = name_to_index[*column_name]; + } + + if (column_index >= columns.size()) + throw Exception(ErrorCodes::INCORRECT_DATA, "Input data has too many columns, expected {} columns", columns.size()); + + seen_columns[column_index] = 1; + size_t columns_size = readColumn(*columns[column_index], fields[column_index].type, serializations[column_index], fields[column_index].name); + if (rows != -1 && size_t(rows) != columns_size) + throw Exception(ErrorCodes::INCORRECT_DATA, "Number of rows differs in different columns: {} != {}", rows, columns_size); + rows = columns_size; + ++iteration; + } + while (!reader->checkChunkEndOrSkipColumnDelimiter()); + + if (rows <= 0) + return Chunk(std::move(columns), 0); + + /// Insert defaults in columns that were not presented in block and fill + /// block_missing_values accordingly if setting input_format_defaults_for_omitted_fields is enabled + for (size_t i = 0; i != seen_columns.size(); ++i) + { + if (!seen_columns[i]) + { + columns[i]->insertManyDefaults(rows); + if (format_settings.defaults_for_omitted_fields) + block_missing_values.setBits(i, rows); + } + } + + return Chunk(std::move(columns), rows); +} + +JSONColumnsSchemaReaderBase::JSONColumnsSchemaReaderBase( + ReadBuffer & in_, const FormatSettings & format_settings_, std::unique_ptr reader_) + : ISchemaReader(in_), format_settings(format_settings_), reader(std::move(reader_)) +{ +} + +void JSONColumnsSchemaReaderBase::chooseResulType(DataTypePtr & type, const DataTypePtr & new_type, const String & column_name, size_t row) const +{ + auto common_type_checker = [&](const DataTypePtr & first, const DataTypePtr & second) + { + return JSONUtils::getCommonTypeForJSONFormats(first, second, format_settings.json.read_bools_as_numbers); + }; + chooseResultColumnType(type, new_type, common_type_checker, nullptr, column_name, row); +} + +NamesAndTypesList JSONColumnsSchemaReaderBase::readSchema() +{ + size_t total_rows_read = 0; + std::unordered_map names_to_types; + std::vector names_order; + /// Read data block by block and determine the type for each column + /// until max_rows_to_read_for_schema_inference is reached. + while (total_rows_read < format_settings.max_rows_to_read_for_schema_inference) + { + if (in.eof()) + break; + + reader->readChunkStart(); + /// Check for empty block. + if (reader->checkChunkEnd()) + continue; + + size_t iteration = 0; + size_t rows_in_block = 0; + do + { + auto column_name_opt = reader->readColumnStart(); + /// If format doesn't have named for columns, use default names 'c1', 'c2', ... + String column_name = column_name_opt.has_value() ? *column_name_opt : "c" + std::to_string(iteration + 1); + /// Keep order of column names as it is in input data. + if (!names_to_types.contains(column_name)) + names_order.push_back(column_name); + + rows_in_block = 0; + auto column_type = readColumnAndGetDataType(column_name, rows_in_block, format_settings.max_rows_to_read_for_schema_inference - total_rows_read); + chooseResulType(names_to_types[column_name], column_type, column_name, total_rows_read + 1); + ++iteration; + } + while (!reader->checkChunkEndOrSkipColumnDelimiter()); + + total_rows_read += rows_in_block; + } + + if (names_to_types.empty()) + throw Exception(ErrorCodes::EMPTY_DATA_PASSED, "Cannot read rows from the data"); + + NamesAndTypesList result; + for (auto & name : names_order) + { + auto & type = names_to_types[name]; + /// Check that we could determine the type of this column. + checkResultColumnTypeAndAppend(result, type, name, nullptr, format_settings.max_rows_to_read_for_schema_inference); + } + + return result; +} + +DataTypePtr JSONColumnsSchemaReaderBase::readColumnAndGetDataType(const String & column_name, size_t & rows_read, size_t max_rows_to_read) +{ + /// Check for empty column. + if (reader->checkColumnEnd()) + return nullptr; + + String field; + DataTypePtr column_type; + do + { + /// If we reached max_rows_to_read, skip the rest part of this column. + if (rows_read == max_rows_to_read) + { + reader->skipColumn(); + break; + } + + readJSONField(field, in); + DataTypePtr field_type = JSONUtils::getDataTypeFromField(field); + chooseResulType(column_type, field_type, column_name, rows_read); + ++rows_read; + } + while (!reader->checkColumnEndOrSkipFieldDelimiter()); + + return column_type; +} + +} diff --git a/src/Processors/Formats/Impl/JSONColumnsBlockInputFormatBase.h b/src/Processors/Formats/Impl/JSONColumnsBlockInputFormatBase.h new file mode 100644 index 00000000000..ac746a2e2d1 --- /dev/null +++ b/src/Processors/Formats/Impl/JSONColumnsBlockInputFormatBase.h @@ -0,0 +1,92 @@ +#pragma once + +#include +#include +#include + + +namespace DB +{ + +class ReadBuffer; + + +/// Base class for reading data in Columnar JSON formats. +class JSONColumnsReaderBase +{ +public: + JSONColumnsReaderBase(ReadBuffer & in_); + + virtual ~JSONColumnsReaderBase() = default; + + void setReadBuffer(ReadBuffer & in_) { in = &in_; } + + virtual void readChunkStart() = 0; + virtual std::optional readColumnStart() = 0; + + virtual bool checkChunkEnd() = 0; + bool checkChunkEndOrSkipColumnDelimiter(); + + bool checkColumnEnd(); + bool checkColumnEndOrSkipFieldDelimiter(); + + void skipColumn(); + +protected: + ReadBuffer * in; +}; + + +/// Base class for Columnar JSON input formats. It works with data using +/// JSONColumnsReaderBase interface. +/// To implement new Columnar JSON format you need to implement new JSONColumnsReaderBase +/// interface and provide it to JSONColumnsBlockInputFormatBase. +class JSONColumnsBlockInputFormatBase : public IInputFormat +{ +public: + JSONColumnsBlockInputFormatBase(ReadBuffer & in_, const Block & header_, const FormatSettings & format_settings_, std::unique_ptr reader_); + + String getName() const override { return "JSONColumnsBlockInputFormatBase"; } + + void setReadBuffer(ReadBuffer & in_) override; + + const BlockMissingValues & getMissingValues() const override { return block_missing_values; } + +protected: + Chunk generate() override; + + size_t readColumn(IColumn & column, const DataTypePtr & type, const SerializationPtr & serialization, const String & column_name); + + const FormatSettings format_settings; + const NamesAndTypes fields; + /// Maps column names and their positions in header. + std::unordered_map name_to_index; + Serializations serializations; + std::unique_ptr reader; + BlockMissingValues block_missing_values; +}; + + +/// Base class for schema inference from Columnar JSON input formats. It works with data using +/// JSONColumnsReaderBase interface. +/// To implement schema reader for the new Columnar JSON format you need to implement new JSONColumnsReaderBase +/// interface and provide it to JSONColumnsSchemaReaderBase. +class JSONColumnsSchemaReaderBase : public ISchemaReader +{ +public: + JSONColumnsSchemaReaderBase(ReadBuffer & in_, const FormatSettings & format_settings_, std::unique_ptr reader_); + +private: + NamesAndTypesList readSchema() override; + + /// Read whole column in the block (up to max_rows_to_read rows) and extract the data type. + DataTypePtr readColumnAndGetDataType(const String & column_name, size_t & rows_read, size_t max_rows_to_read); + + /// Choose result type for column from two inferred types from different rows. + void chooseResulType(DataTypePtr & type, const DataTypePtr & new_type, const String & column_name, size_t row) const; + + const FormatSettings format_settings; + std::unique_ptr reader; +}; + +} diff --git a/src/Processors/Formats/Impl/JSONColumnsBlockOutputFormat.cpp b/src/Processors/Formats/Impl/JSONColumnsBlockOutputFormat.cpp new file mode 100644 index 00000000000..dd8688c655e --- /dev/null +++ b/src/Processors/Formats/Impl/JSONColumnsBlockOutputFormat.cpp @@ -0,0 +1,49 @@ +#include +#include +#include +#include +#include + +namespace DB +{ + +JSONColumnsBlockOutputFormat::JSONColumnsBlockOutputFormat(WriteBuffer & out_, const Block & header_, const FormatSettings & format_settings_, size_t indent_) + : JSONColumnsBlockOutputFormatBase(out_, header_, format_settings_), fields(header_.getNamesAndTypes()), indent(indent_) +{ + for (auto & field : fields) + { + WriteBufferFromOwnString buf; + writeJSONString(field.name, buf, format_settings); + field.name = buf.str().substr(1, buf.str().size() - 2); + } +} + +void JSONColumnsBlockOutputFormat::writeChunkStart() +{ + JSONUtils::writeObjectStart(*ostr, indent); +} + +void JSONColumnsBlockOutputFormat::writeColumnStart(size_t column_index) +{ + JSONUtils::writeCompactArrayStart(*ostr, indent + 1, fields[column_index].name.data()); +} + +void JSONColumnsBlockOutputFormat::writeChunkEnd() +{ + JSONUtils::writeObjectEnd(*ostr, indent); + writeChar('\n', *ostr); +} + +void registerOutputFormatJSONColumns(FormatFactory & factory) +{ + factory.registerOutputFormat("JSONColumns", []( + WriteBuffer & buf, + const Block & sample, + const RowOutputFormatParams &, + const FormatSettings & format_settings) + { + return std::make_shared(buf, sample, format_settings); + }); +} + +} diff --git a/src/Processors/Formats/Impl/JSONColumnsBlockOutputFormat.h b/src/Processors/Formats/Impl/JSONColumnsBlockOutputFormat.h new file mode 100644 index 00000000000..e52f5f61aec --- /dev/null +++ b/src/Processors/Formats/Impl/JSONColumnsBlockOutputFormat.h @@ -0,0 +1,32 @@ +#pragma once + +#include + +namespace DB +{ + +/* Format JSONColumns outputs all data as a single block in the next format: + * { + * "name1": [value1, value2, value3, ...], + * "name2": [value1, value2m value3, ...], + * ... + * } + */ +class JSONColumnsBlockOutputFormat : public JSONColumnsBlockOutputFormatBase +{ +public: + JSONColumnsBlockOutputFormat(WriteBuffer & out_, const Block & header_, const FormatSettings & format_settings_, size_t indent_ = 0); + + String getName() const override { return "JSONColumnsBlockOutputFormat"; } + +protected: + void writeChunkStart() override; + void writeChunkEnd() override; + + void writeColumnStart(size_t column_index) override; + + NamesAndTypes fields; + size_t indent; +}; + +} diff --git a/src/Processors/Formats/Impl/JSONColumnsBlockOutputFormatBase.cpp b/src/Processors/Formats/Impl/JSONColumnsBlockOutputFormatBase.cpp new file mode 100644 index 00000000000..8e83282408b --- /dev/null +++ b/src/Processors/Formats/Impl/JSONColumnsBlockOutputFormatBase.cpp @@ -0,0 +1,66 @@ +#include +#include +#include + + +namespace DB +{ + +JSONColumnsBlockOutputFormatBase::JSONColumnsBlockOutputFormatBase( + WriteBuffer & out_, const Block & header_, const FormatSettings & format_settings_) + : IOutputFormat(header_, out_) + , format_settings(format_settings_) + , serializations(header_.getSerializations()) + , ostr(&out) +{ +} + +void JSONColumnsBlockOutputFormatBase::consume(Chunk chunk) +{ + if (!mono_chunk) + { + mono_chunk = std::move(chunk); + return; + } + + mono_chunk.append(chunk); +} + +void JSONColumnsBlockOutputFormatBase::writeSuffix() +{ + + writeChunk(mono_chunk); + mono_chunk.clear(); +} + +void JSONColumnsBlockOutputFormatBase::writeChunk(Chunk & chunk) +{ + writeChunkStart(); + const auto & columns = chunk.getColumns(); + for (size_t i = 0; i != columns.size(); ++i) + { + writeColumnStart(i); + writeColumn(*columns[i], *serializations[i]); + writeColumnEnd(i == columns.size() - 1); + } + writeChunkEnd(); +} + +void JSONColumnsBlockOutputFormatBase::writeColumnEnd(bool is_last) +{ + JSONUtils::writeCompactArrayEnd(*ostr); + if (!is_last) + JSONUtils::writeFieldDelimiter(*ostr); +} + +void JSONColumnsBlockOutputFormatBase::writeColumn(const IColumn & column, const ISerialization & serialization) +{ + for (size_t i = 0; i != column.size(); ++i) + { + if (i != 0) + JSONUtils::writeFieldCompactDelimiter(*ostr); + serialization.serializeTextJSON(column, i, *ostr, format_settings); + } +} + +} diff --git a/src/Processors/Formats/Impl/JSONColumnsBlockOutputFormatBase.h b/src/Processors/Formats/Impl/JSONColumnsBlockOutputFormatBase.h new file mode 100644 index 00000000000..133979523f9 --- /dev/null +++ b/src/Processors/Formats/Impl/JSONColumnsBlockOutputFormatBase.h @@ -0,0 +1,42 @@ +#pragma once + +#include +#include +#include + + +namespace DB +{ + +class WriteBuffer; + +/// Base class for Columnar JSON output formats. +/// It buffers all data and outputs it as a single block in writeSuffix() method. +class JSONColumnsBlockOutputFormatBase : public IOutputFormat +{ +public: + JSONColumnsBlockOutputFormatBase(WriteBuffer & out_, const Block & header_, const FormatSettings & format_settings_); + + String getName() const override { return "JSONColumnsBlockOutputFormatBase"; } + +protected: + void consume(Chunk chunk) override; + void writeSuffix() override; + + void writeChunk(Chunk & chunk); + void writeColumn(const IColumn & column, const ISerialization & serialization); + + virtual void writeChunkStart() = 0; + virtual void writeChunkEnd() = 0; + virtual void writeColumnStart(size_t /*column_index*/) = 0; + void writeColumnEnd(bool is_last); + + const FormatSettings format_settings; + const Serializations serializations; + + WriteBuffer * ostr; + + Chunk mono_chunk; +}; + +} diff --git a/src/Processors/Formats/Impl/JSONColumnsWithMetadataBlockOutputFormat.cpp b/src/Processors/Formats/Impl/JSONColumnsWithMetadataBlockOutputFormat.cpp new file mode 100644 index 00000000000..394385e548d --- /dev/null +++ b/src/Processors/Formats/Impl/JSONColumnsWithMetadataBlockOutputFormat.cpp @@ -0,0 +1,120 @@ +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + +JSONColumnsWithMetadataBlockOutputFormat::JSONColumnsWithMetadataBlockOutputFormat(WriteBuffer & out_, const Block & header_, const FormatSettings & format_settings_) + : JSONColumnsBlockOutputFormat(out_, header_, format_settings_, 1) +{ + bool need_validate_utf8 = false; + JSONUtils::makeNamesAndTypesWithValidUTF8(fields, format_settings, need_validate_utf8); + + if (need_validate_utf8) + { + validating_ostr = std::make_unique(out); + ostr = validating_ostr.get(); + } +} + +void JSONColumnsWithMetadataBlockOutputFormat::writePrefix() +{ + JSONUtils::writeObjectStart(*ostr); + JSONUtils::writeMetadata(fields, format_settings, *ostr); +} + +void JSONColumnsWithMetadataBlockOutputFormat::writeSuffix() +{ + rows = mono_chunk.getNumRows(); + JSONColumnsBlockOutputFormatBase::writeSuffix(); +} + +void JSONColumnsWithMetadataBlockOutputFormat::writeChunkStart() +{ + JSONUtils::writeFieldDelimiter(*ostr, 2); + JSONUtils::writeObjectStart(*ostr, 1, "data"); +} + +void JSONColumnsWithMetadataBlockOutputFormat::writeChunkEnd() +{ + JSONUtils::writeObjectEnd(*ostr, indent); +} + +void JSONColumnsWithMetadataBlockOutputFormat::consumeExtremes(Chunk chunk) +{ + auto num_rows = chunk.getNumRows(); + if (num_rows != 2) + throw Exception("Got " + toString(num_rows) + " in extremes chunk, expected 2", ErrorCodes::LOGICAL_ERROR); + + const auto & columns = chunk.getColumns(); + JSONUtils::writeFieldDelimiter(*ostr, 2); + JSONUtils::writeObjectStart(*ostr, 1, "extremes"); + writeExtremesElement("min", columns, 0); + JSONUtils::writeFieldDelimiter(*ostr); + writeExtremesElement("max", columns, 1); + JSONUtils::writeObjectEnd(*ostr, 1); +} + +void JSONColumnsWithMetadataBlockOutputFormat::writeExtremesElement(const char * title, const Columns & columns, size_t row_num) +{ + JSONUtils::writeObjectStart(*ostr, 2, title); + JSONUtils::writeColumns(columns, fields, serializations, row_num, false, format_settings, *ostr, 3); + JSONUtils::writeObjectEnd(*ostr, 2); +} + +void JSONColumnsWithMetadataBlockOutputFormat::consumeTotals(Chunk chunk) +{ + auto num_rows = chunk.getNumRows(); + if (num_rows != 1) + throw Exception("Got " + toString(num_rows) + " in totals chunk, expected 1", ErrorCodes::LOGICAL_ERROR); + + const auto & columns = chunk.getColumns(); + JSONUtils::writeFieldDelimiter(*ostr, 2); + JSONUtils::writeObjectStart(*ostr, 1, "totals"); + JSONUtils::writeColumns(columns, fields, serializations, 0, false, format_settings, *ostr, 2); + JSONUtils::writeObjectEnd(*ostr, 1); +} + +void JSONColumnsWithMetadataBlockOutputFormat::finalizeImpl() +{ + auto outside_statistics = getOutsideStatistics(); + if (outside_statistics) + statistics = std::move(*outside_statistics); + + JSONUtils::writeAdditionalInfo( + rows, + statistics.rows_before_limit, + statistics.applied_limit, + statistics.watch, + statistics.progress, + format_settings.write_statistics, + *ostr); + + JSONUtils::writeObjectEnd(*ostr); + writeChar('\n', *ostr); + ostr->next(); +} + +void registerOutputFormatJSONColumnsWithMetadata(FormatFactory & factory) +{ + factory.registerOutputFormat("JSONColumnsWithMetadata", []( + WriteBuffer & buf, + const Block & sample, + const RowOutputFormatParams &, + const FormatSettings & format_settings) + { + return std::make_shared(buf, sample, format_settings); + }); + + factory.markFormatHasNoAppendSupport("JSONColumnsWithMetadata"); +} + +} diff --git a/src/Processors/Formats/Impl/JSONColumnsWithMetadataBlockOutputFormat.h b/src/Processors/Formats/Impl/JSONColumnsWithMetadataBlockOutputFormat.h new file mode 100644 index 00000000000..f56a79bdf56 --- /dev/null +++ b/src/Processors/Formats/Impl/JSONColumnsWithMetadataBlockOutputFormat.h @@ -0,0 +1,67 @@ +#pragma once +#include + +namespace DB +{ + +/* Format JSONColumnsWithMetadata outputs all data as a single block in the next format: + * { + * "meta": + * [ + * { + * "name": "name1", + * "type": "type1" + * }, + * { + * "name": "name2", + * "type": "type2" + * }, + * ... + * ], + * + * "data": + * { + * "name1": [value1, value2, value3, ...], + * "name2": [value1, value2m value3, ...], + * ... + * }, + * + * "rows": ..., + * + * "statistics": + * { + * "elapsed": ..., + * "rows_read": ..., + * "bytes_read": ... + * } + * } + */ +class JSONColumnsWithMetadataBlockOutputFormat : public JSONColumnsBlockOutputFormat +{ +public: + JSONColumnsWithMetadataBlockOutputFormat(WriteBuffer & out_, const Block & header_, const FormatSettings & format_settings_); + + String getName() const override { return "JSONCompactColumnsBlockOutputFormat"; } + + void setRowsBeforeLimit(size_t rows_before_limit_) override { statistics.rows_before_limit = rows_before_limit_; statistics.applied_limit = true; } + void onProgress(const Progress & progress_) override { statistics.progress.incrementPiecewiseAtomically(progress_); } + +protected: + void consumeTotals(Chunk chunk) override; + void consumeExtremes(Chunk chunk) override; + + void writePrefix() override; + void writeSuffix() override; + void finalizeImpl() override; + + void writeChunkStart() override; + void writeChunkEnd() override; + + void writeExtremesElement(const char * title, const Columns & columns, size_t row_num); + + Statistics statistics; + std::unique_ptr validating_ostr; /// Validates UTF-8 sequences, replaces bad sequences with replacement character. + size_t rows; +}; + +} diff --git a/src/Processors/Formats/Impl/JSONCompactColumnsBlockInputFormat.cpp b/src/Processors/Formats/Impl/JSONCompactColumnsBlockInputFormat.cpp new file mode 100644 index 00000000000..5b26ee2677b --- /dev/null +++ b/src/Processors/Formats/Impl/JSONCompactColumnsBlockInputFormat.cpp @@ -0,0 +1,65 @@ +#include +#include +#include + +namespace DB +{ + +JSONCompactColumnsReader::JSONCompactColumnsReader(ReadBuffer & in_) : JSONColumnsReaderBase(in_) +{ +} + +void JSONCompactColumnsReader::readChunkStart() +{ + skipWhitespaceIfAny(*in); + assertChar('[', *in); + skipWhitespaceIfAny(*in); +} + +std::optional JSONCompactColumnsReader::readColumnStart() +{ + skipWhitespaceIfAny(*in); + assertChar('[', *in); + skipWhitespaceIfAny(*in); + return std::nullopt; +} + +bool JSONCompactColumnsReader::checkChunkEnd() +{ + skipWhitespaceIfAny(*in); + if (!in->eof() && *in->position() == ']') + { + ++in->position(); + skipWhitespaceIfAny(*in); + return true; + } + return false; +} + + +void registerInputFormatJSONCompactColumns(FormatFactory & factory) +{ + factory.registerInputFormat( + "JSONCompactColumns", + [](ReadBuffer & buf, + const Block &sample, + const RowInputFormatParams &, + const FormatSettings & settings) + { + return std::make_shared(buf, sample, settings, std::make_unique(buf)); + } + ); +} + +void registerJSONCompactColumnsSchemaReader(FormatFactory & factory) +{ + factory.registerSchemaReader( + "JSONCompactColumns", + [](ReadBuffer & buf, const FormatSettings & settings) + { + return std::make_shared(buf, settings, std::make_unique(buf)); + } + ); +} + +} diff --git a/src/Processors/Formats/Impl/JSONCompactColumnsBlockInputFormat.h b/src/Processors/Formats/Impl/JSONCompactColumnsBlockInputFormat.h new file mode 100644 index 00000000000..7f23e127ab4 --- /dev/null +++ b/src/Processors/Formats/Impl/JSONCompactColumnsBlockInputFormat.h @@ -0,0 +1,25 @@ +#pragma once + +#include + +namespace DB +{ + +/* Format JSONCompactColumns reads each block of data in the next format: + * [ + * [value1, value2, value3, ...], + * [value1, value2m value3, ...], + * ... + * ] + */ +class JSONCompactColumnsReader : public JSONColumnsReaderBase +{ +public: + JSONCompactColumnsReader(ReadBuffer & in_); + + void readChunkStart() override; + std::optional readColumnStart() override; + bool checkChunkEnd() override; +}; + +} diff --git a/src/Processors/Formats/Impl/JSONCompactColumnsBlockOutputFormat.cpp b/src/Processors/Formats/Impl/JSONCompactColumnsBlockOutputFormat.cpp new file mode 100644 index 00000000000..757345cbbe0 --- /dev/null +++ b/src/Processors/Formats/Impl/JSONCompactColumnsBlockOutputFormat.cpp @@ -0,0 +1,42 @@ +#include +#include +#include +#include + +namespace DB +{ + +JSONCompactColumnsBlockOutputFormat::JSONCompactColumnsBlockOutputFormat(WriteBuffer & out_, const Block & header_, const FormatSettings & format_settings_) + : JSONColumnsBlockOutputFormatBase(out_, header_, format_settings_), column_names(header_.getNames()) +{ +} + +void JSONCompactColumnsBlockOutputFormat::writeChunkStart() +{ + JSONUtils::writeArrayStart(*ostr); +} + +void JSONCompactColumnsBlockOutputFormat::writeColumnStart(size_t) +{ + JSONUtils::writeCompactArrayStart(*ostr, 1); +} + +void JSONCompactColumnsBlockOutputFormat::writeChunkEnd() +{ + JSONUtils::writeArrayEnd(*ostr); + writeChar('\n', *ostr); +} + +void registerOutputFormatJSONCompactColumns(FormatFactory & factory) +{ + factory.registerOutputFormat("JSONCompactColumns", []( + WriteBuffer & buf, + const Block & sample, + const RowOutputFormatParams &, + const FormatSettings & format_settings) + { + return std::make_shared(buf, sample, format_settings); + }); +} + +} diff --git a/src/Processors/Formats/Impl/JSONCompactColumnsBlockOutputFormat.h b/src/Processors/Formats/Impl/JSONCompactColumnsBlockOutputFormat.h new file mode 100644 index 00000000000..49612ed67f6 --- /dev/null +++ b/src/Processors/Formats/Impl/JSONCompactColumnsBlockOutputFormat.h @@ -0,0 +1,30 @@ +#pragma once +#include + +namespace DB +{ + +/* Format JSONCompactColumns outputs all data as a single block in the next format: + * [ + * [value1, value2, value3, ...], + * [value1, value2m value3, ...], + * ... + * ] + */ +class JSONCompactColumnsBlockOutputFormat : public JSONColumnsBlockOutputFormatBase +{ +public: + JSONCompactColumnsBlockOutputFormat(WriteBuffer & out_, const Block & header_, const FormatSettings & format_settings_); + + String getName() const override { return "JSONCompactColumnsBlockOutputFormat"; } + +protected: + void writeChunkStart() override; + void writeChunkEnd() override; + + void writeColumnStart(size_t column_index) override; + + const Names column_names; +}; + +} diff --git a/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.cpp b/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.cpp index 867b56c541b..ef59fc8f05a 100644 --- a/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.cpp @@ -5,7 +5,7 @@ #include #include #include -#include +#include #include #include #include @@ -109,7 +109,7 @@ std::vector JSONCompactEachRowFormatReader::readHeaderRow() bool JSONCompactEachRowFormatReader::readField(IColumn & column, const DataTypePtr & type, const SerializationPtr & serialization, bool /*is_last_file_column*/, const String & column_name) { skipWhitespaceIfAny(*in); - return readFieldImpl(*in, column, type, serialization, column_name, format_settings, yield_strings); + return JSONUtils::readField(*in, column, type, serialization, column_name, format_settings, yield_strings); } bool JSONCompactEachRowFormatReader::parseRowStartWithDiagnosticInfo(WriteBuffer & out) @@ -189,7 +189,7 @@ JSONCompactEachRowRowSchemaReader::JSONCompactEachRowRowSchemaReader( bool allow_bools_as_numbers = format_settings_.json.read_bools_as_numbers; setCommonTypeChecker([allow_bools_as_numbers](const DataTypePtr & first, const DataTypePtr & second) { - return getCommonTypeForJSONFormats(first, second, allow_bools_as_numbers); + return JSONUtils::getCommonTypeForJSONFormats(first, second, allow_bools_as_numbers); }); } @@ -209,7 +209,7 @@ DataTypes JSONCompactEachRowRowSchemaReader::readRowAndGetDataTypes() if (in.eof()) return {}; - return readRowAndGetDataTypesForJSONCompactEachRow(in, reader.yieldStrings()); + return JSONUtils::readRowAndGetDataTypesForJSONCompactEachRow(in, reader.yieldStrings()); } void registerInputFormatJSONCompactEachRow(FormatFactory & factory) @@ -258,7 +258,7 @@ void registerFileSegmentationEngineJSONCompactEachRow(FormatFactory & factory) size_t min_rows = 1 + int(with_names) + int(with_types); factory.registerFileSegmentationEngine(format_name, [min_rows](ReadBuffer & in, DB::Memory<> & memory, size_t min_chunk_size) { - return fileSegmentationEngineJSONCompactEachRow(in, memory, min_chunk_size, min_rows); + return JSONUtils::fileSegmentationEngineJSONCompactEachRow(in, memory, min_chunk_size, min_rows); }); }; diff --git a/src/Processors/Formats/Impl/JSONCompactRowOutputFormat.cpp b/src/Processors/Formats/Impl/JSONCompactRowOutputFormat.cpp index b31c04b4554..47b79b71ae2 100644 --- a/src/Processors/Formats/Impl/JSONCompactRowOutputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONCompactRowOutputFormat.cpp @@ -1,5 +1,6 @@ #include #include +#include #include @@ -20,72 +21,50 @@ JSONCompactRowOutputFormat::JSONCompactRowOutputFormat( void JSONCompactRowOutputFormat::writeField(const IColumn & column, const ISerialization & serialization, size_t row_num) { - if (yield_strings) - { - WriteBufferFromOwnString buf; - - serialization.serializeText(column, row_num, buf, settings); - writeJSONString(buf.str(), *ostr, settings); - } - else - serialization.serializeTextJSON(column, row_num, *ostr, settings); - + JSONUtils::writeFieldFromColumn(column, serialization, row_num, yield_strings, settings, *ostr); ++field_number; } void JSONCompactRowOutputFormat::writeFieldDelimiter() { - writeCString(", ", *ostr); + JSONUtils::writeFieldCompactDelimiter(*ostr); } -void JSONCompactRowOutputFormat::writeTotalsFieldDelimiter() -{ - writeCString(",", *ostr); -} - - void JSONCompactRowOutputFormat::writeRowStartDelimiter() { - writeCString("\t\t[", *ostr); + JSONUtils::writeCompactArrayStart(*ostr, 2); } void JSONCompactRowOutputFormat::writeRowEndDelimiter() { - writeChar(']', *ostr); + JSONUtils::writeCompactArrayEnd(*ostr); field_number = 0; ++row_count; } void JSONCompactRowOutputFormat::writeBeforeTotals() { - writeCString(",\n", *ostr); - writeChar('\n', *ostr); - writeCString("\t\"totals\": [", *ostr); + JSONUtils::writeFieldDelimiter(*ostr, 2); + JSONUtils::writeCompactArrayStart(*ostr, 1, "totals"); +} + +void JSONCompactRowOutputFormat::writeTotals(const Columns & columns, size_t row_num) +{ + JSONUtils::writeCompactColumns(columns, serializations, row_num, yield_strings, settings, *ostr); } void JSONCompactRowOutputFormat::writeAfterTotals() { - writeChar(']', *ostr); + JSONUtils::writeCompactArrayEnd(*ostr); } void JSONCompactRowOutputFormat::writeExtremesElement(const char * title, const Columns & columns, size_t row_num) { - writeCString("\t\t\"", *ostr); - writeCString(title, *ostr); - writeCString("\": [", *ostr); - - size_t extremes_columns = columns.size(); - for (size_t i = 0; i < extremes_columns; ++i) - { - if (i != 0) - writeTotalsFieldDelimiter(); - - writeField(*columns[i], *serializations[i], row_num); - } - - writeChar(']', *ostr); + JSONUtils::writeCompactArrayStart(*ostr, 2, title); + JSONUtils::writeCompactColumns(columns, serializations, row_num, yield_strings, settings, *ostr); + JSONUtils::writeCompactArrayEnd(*ostr); } void registerOutputFormatJSONCompact(FormatFactory & factory) diff --git a/src/Processors/Formats/Impl/JSONCompactRowOutputFormat.h b/src/Processors/Formats/Impl/JSONCompactRowOutputFormat.h index a0e9a2a6026..d17a6acf019 100644 --- a/src/Processors/Formats/Impl/JSONCompactRowOutputFormat.h +++ b/src/Processors/Formats/Impl/JSONCompactRowOutputFormat.h @@ -36,12 +36,7 @@ private: void writeExtremesElement(const char * title, const Columns & columns, size_t row_num) override; - void writeTotalsField(const IColumn & column, const ISerialization & serialization, size_t row_num) override - { - return writeField(column, serialization, row_num); - } - - void writeTotalsFieldDelimiter() override; + void writeTotals(const Columns & columns, size_t row_num) override; }; } diff --git a/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp b/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp index 4fb7a40ebfc..629ed66a1a4 100644 --- a/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp @@ -2,7 +2,7 @@ #include #include -#include +#include #include #include #include @@ -140,7 +140,7 @@ void JSONEachRowRowInputFormat::readField(size_t index, MutableColumns & columns seen_columns[index] = true; const auto & type = getPort().getHeader().getByPosition(index).type; const auto & serialization = serializations[index]; - read_columns[index] = readFieldImpl(*in, *columns[index], type, serialization, columnName(index), format_settings, yield_strings); + read_columns[index] = JSONUtils::readField(*in, *columns[index], type, serialization, columnName(index), format_settings, yield_strings); } inline bool JSONEachRowRowInputFormat::advanceToNextKey(size_t key_index) @@ -307,13 +307,13 @@ void JSONEachRowRowInputFormat::readSuffix() } JSONEachRowSchemaReader::JSONEachRowSchemaReader(ReadBuffer & in_, bool json_strings_, const FormatSettings & format_settings) - : IRowWithNamesSchemaReader(in_, format_settings.max_rows_to_read_for_schema_inference) + : IRowWithNamesSchemaReader(in_) , json_strings(json_strings_) { bool allow_bools_as_numbers = format_settings.json.read_bools_as_numbers; setCommonTypeChecker([allow_bools_as_numbers](const DataTypePtr & first, const DataTypePtr & second) { - return getCommonTypeForJSONFormats(first, second, allow_bools_as_numbers); + return JSONUtils::getCommonTypeForJSONFormats(first, second, allow_bools_as_numbers); }); } @@ -350,7 +350,7 @@ NamesAndTypesList JSONEachRowSchemaReader::readRowAndGetNamesAndDataTypes(bool & return {}; } - return readRowAndGetNamesAndDataTypesForJSONEachRow(in, json_strings); + return JSONUtils::readRowAndGetNamesAndDataTypesForJSONEachRow(in, json_strings); } void registerInputFormatJSONEachRow(FormatFactory & factory) @@ -397,18 +397,18 @@ void registerInputFormatJSONEachRow(FormatFactory & factory) void registerFileSegmentationEngineJSONEachRow(FormatFactory & factory) { - factory.registerFileSegmentationEngine("JSONEachRow", &fileSegmentationEngineJSONEachRow); - factory.registerFileSegmentationEngine("JSONStringsEachRow", &fileSegmentationEngineJSONEachRow); - factory.registerFileSegmentationEngine("JSONLines", &fileSegmentationEngineJSONEachRow); - factory.registerFileSegmentationEngine("NDJSON", &fileSegmentationEngineJSONEachRow); + factory.registerFileSegmentationEngine("JSONEachRow", &JSONUtils::fileSegmentationEngineJSONEachRow); + factory.registerFileSegmentationEngine("JSONStringsEachRow", &JSONUtils::fileSegmentationEngineJSONEachRow); + factory.registerFileSegmentationEngine("JSONLines", &JSONUtils::fileSegmentationEngineJSONEachRow); + factory.registerFileSegmentationEngine("NDJSON", &JSONUtils::fileSegmentationEngineJSONEachRow); } void registerNonTrivialPrefixAndSuffixCheckerJSONEachRow(FormatFactory & factory) { - factory.registerNonTrivialPrefixAndSuffixChecker("JSONEachRow", nonTrivialPrefixAndSuffixCheckerJSONEachRowImpl); - factory.registerNonTrivialPrefixAndSuffixChecker("JSONStringsEachRow", nonTrivialPrefixAndSuffixCheckerJSONEachRowImpl); - factory.registerNonTrivialPrefixAndSuffixChecker("JSONLines", nonTrivialPrefixAndSuffixCheckerJSONEachRowImpl); - factory.registerNonTrivialPrefixAndSuffixChecker("NDJSON", nonTrivialPrefixAndSuffixCheckerJSONEachRowImpl); + factory.registerNonTrivialPrefixAndSuffixChecker("JSONEachRow", JSONUtils::nonTrivialPrefixAndSuffixCheckerJSONEachRowImpl); + factory.registerNonTrivialPrefixAndSuffixChecker("JSONStringsEachRow", JSONUtils::nonTrivialPrefixAndSuffixCheckerJSONEachRowImpl); + factory.registerNonTrivialPrefixAndSuffixChecker("JSONLines", JSONUtils::nonTrivialPrefixAndSuffixCheckerJSONEachRowImpl); + factory.registerNonTrivialPrefixAndSuffixChecker("NDJSON", JSONUtils::nonTrivialPrefixAndSuffixCheckerJSONEachRowImpl); } void registerJSONEachRowSchemaReader(FormatFactory & factory) diff --git a/src/Processors/Formats/Impl/JSONRowOutputFormat.cpp b/src/Processors/Formats/Impl/JSONRowOutputFormat.cpp index 61ac25ca441..fc2d3cb8133 100644 --- a/src/Processors/Formats/Impl/JSONRowOutputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONRowOutputFormat.cpp @@ -2,6 +2,7 @@ #include #include #include +#include namespace DB @@ -15,23 +16,9 @@ JSONRowOutputFormat::JSONRowOutputFormat( bool yield_strings_) : IRowOutputFormat(header, out_, params_), settings(settings_), yield_strings(yield_strings_) { - const auto & sample = getPort(PortKind::Main).getHeader(); - NamesAndTypesList columns(sample.getNamesAndTypesList()); - fields.assign(columns.begin(), columns.end()); - bool need_validate_utf8 = false; - for (size_t i = 0; i < sample.columns(); ++i) - { - if (!sample.getByPosition(i).type->textCanContainOnlyValidUTF8()) - need_validate_utf8 = true; - - WriteBufferFromOwnString buf; - { - WriteBufferValidUTF8 validating_buf(buf); - writeJSONString(fields[i].name, validating_buf, settings); - } - fields[i].name = buf.str(); - } + fields = header.getNamesAndTypes(); + JSONUtils::makeNamesAndTypesWithValidUTF8(fields, settings, need_validate_utf8); if (need_validate_utf8) { @@ -45,88 +32,34 @@ JSONRowOutputFormat::JSONRowOutputFormat( void JSONRowOutputFormat::writePrefix() { - writeCString("{\n", *ostr); - writeCString("\t\"meta\":\n", *ostr); - writeCString("\t[\n", *ostr); - - for (size_t i = 0; i < fields.size(); ++i) - { - writeCString("\t\t{\n", *ostr); - - writeCString("\t\t\t\"name\": ", *ostr); - writeString(fields[i].name, *ostr); - writeCString(",\n", *ostr); - writeCString("\t\t\t\"type\": ", *ostr); - writeJSONString(fields[i].type->getName(), *ostr, settings); - writeChar('\n', *ostr); - - writeCString("\t\t}", *ostr); - if (i + 1 < fields.size()) - writeChar(',', *ostr); - writeChar('\n', *ostr); - } - - writeCString("\t],\n", *ostr); - writeChar('\n', *ostr); - writeCString("\t\"data\":\n", *ostr); - writeCString("\t[\n", *ostr); + JSONUtils::writeObjectStart(*ostr); + JSONUtils::writeMetadata(fields, settings, *ostr); + JSONUtils::writeFieldDelimiter(*ostr, 2); + JSONUtils::writeArrayStart(*ostr, 1, "data"); } void JSONRowOutputFormat::writeField(const IColumn & column, const ISerialization & serialization, size_t row_num) { - writeCString("\t\t\t", *ostr); - writeString(fields[field_number].name, *ostr); - writeCString(": ", *ostr); - - if (yield_strings) - { - WriteBufferFromOwnString buf; - - serialization.serializeText(column, row_num, buf, settings); - writeJSONString(buf.str(), *ostr, settings); - } - else - serialization.serializeTextJSON(column, row_num, *ostr, settings); - - ++field_number; -} - -void JSONRowOutputFormat::writeTotalsField(const IColumn & column, const ISerialization & serialization, size_t row_num) -{ - writeCString("\t\t", *ostr); - writeString(fields[field_number].name, *ostr); - writeCString(": ", *ostr); - - if (yield_strings) - { - WriteBufferFromOwnString buf; - - serialization.serializeText(column, row_num, buf, settings); - writeJSONString(buf.str(), *ostr, settings); - } - else - serialization.serializeTextJSON(column, row_num, *ostr, settings); - + JSONUtils::writeFieldFromColumn(column, serialization, row_num, yield_strings, settings, *ostr, fields[field_number].name, 3); ++field_number; } void JSONRowOutputFormat::writeFieldDelimiter() { - writeCString(",\n", *ostr); + JSONUtils::writeFieldDelimiter(*ostr); } void JSONRowOutputFormat::writeRowStartDelimiter() { - writeCString("\t\t{\n", *ostr); + JSONUtils::writeObjectStart(*ostr, 2); } void JSONRowOutputFormat::writeRowEndDelimiter() { - writeChar('\n', *ostr); - writeCString("\t\t}", *ostr); + JSONUtils::writeObjectEnd(*ostr, 2); field_number = 0; ++row_count; } @@ -134,71 +67,42 @@ void JSONRowOutputFormat::writeRowEndDelimiter() void JSONRowOutputFormat::writeRowBetweenDelimiter() { - writeCString(",\n", *ostr); + JSONUtils::writeFieldDelimiter(*ostr); } void JSONRowOutputFormat::writeSuffix() { - writeChar('\n', *ostr); - writeCString("\t]", *ostr); + JSONUtils::writeArrayEnd(*ostr, 1); } void JSONRowOutputFormat::writeBeforeTotals() { - writeCString(",\n", *ostr); - writeChar('\n', *ostr); - writeCString("\t\"totals\":\n", *ostr); - writeCString("\t{\n", *ostr); + JSONUtils::writeFieldDelimiter(*ostr, 2); + JSONUtils::writeObjectStart(*ostr, 1, "totals"); } void JSONRowOutputFormat::writeTotals(const Columns & columns, size_t row_num) { - size_t columns_size = columns.size(); - - for (size_t i = 0; i < columns_size; ++i) - { - if (i != 0) - writeTotalsFieldDelimiter(); - - writeTotalsField(*columns[i], *serializations[i], row_num); - } + JSONUtils::writeColumns(columns, fields, serializations, row_num, yield_strings, settings, *ostr, 2); } void JSONRowOutputFormat::writeAfterTotals() { - writeChar('\n', *ostr); - writeCString("\t}", *ostr); - field_number = 0; + JSONUtils::writeObjectEnd(*ostr, 1); } void JSONRowOutputFormat::writeBeforeExtremes() { - writeCString(",\n", *ostr); - writeChar('\n', *ostr); - writeCString("\t\"extremes\":\n", *ostr); - writeCString("\t{\n", *ostr); + JSONUtils::writeFieldDelimiter(*ostr, 2); + JSONUtils::writeObjectStart(*ostr, 1, "extremes"); } void JSONRowOutputFormat::writeExtremesElement(const char * title, const Columns & columns, size_t row_num) { - writeCString("\t\t\"", *ostr); - writeCString(title, *ostr); - writeCString("\":\n", *ostr); - writeCString("\t\t{\n", *ostr); - - size_t extremes_columns = columns.size(); - for (size_t i = 0; i < extremes_columns; ++i) - { - if (i != 0) - writeFieldDelimiter(); - - writeField(*columns[i], *serializations[i], row_num); - } - - writeChar('\n', *ostr); - writeCString("\t\t}", *ostr); - field_number = 0; + JSONUtils::writeObjectStart(*ostr, 2, title); + JSONUtils::writeColumns(columns, fields, serializations, row_num, yield_strings, settings, *ostr, 3); + JSONUtils::writeObjectEnd(*ostr, 2); } void JSONRowOutputFormat::writeMinExtreme(const Columns & columns, size_t row_num) @@ -213,58 +117,29 @@ void JSONRowOutputFormat::writeMaxExtreme(const Columns & columns, size_t row_nu void JSONRowOutputFormat::writeAfterExtremes() { - writeChar('\n', *ostr); - writeCString("\t}", *ostr); + JSONUtils::writeObjectEnd(*ostr, 1); } void JSONRowOutputFormat::finalizeImpl() { - writeCString(",\n\n", *ostr); - writeCString("\t\"rows\": ", *ostr); - writeIntText(row_count, *ostr); - auto outside_statistics = getOutsideStatistics(); if (outside_statistics) statistics = std::move(*outside_statistics); - writeRowsBeforeLimitAtLeast(); - - if (settings.write_statistics) - writeStatistics(); + JSONUtils::writeAdditionalInfo( + row_count, + statistics.rows_before_limit, + statistics.applied_limit, + statistics.watch, + statistics.progress, + settings.write_statistics, + *ostr); + JSONUtils::writeObjectEnd(*ostr); writeChar('\n', *ostr); - writeCString("}\n", *ostr); ostr->next(); } -void JSONRowOutputFormat::writeRowsBeforeLimitAtLeast() -{ - if (statistics.applied_limit) - { - writeCString(",\n\n", *ostr); - writeCString("\t\"rows_before_limit_at_least\": ", *ostr); - writeIntText(statistics.rows_before_limit, *ostr); - } -} - -void JSONRowOutputFormat::writeStatistics() -{ - writeCString(",\n\n", *ostr); - writeCString("\t\"statistics\":\n", *ostr); - writeCString("\t{\n", *ostr); - - writeCString("\t\t\"elapsed\": ", *ostr); - writeText(statistics.watch.elapsedSeconds(), *ostr); - writeCString(",\n", *ostr); - writeCString("\t\t\"rows_read\": ", *ostr); - writeText(statistics.progress.read_rows.load(), *ostr); - writeCString(",\n", *ostr); - writeCString("\t\t\"bytes_read\": ", *ostr); - writeText(statistics.progress.read_bytes.load(), *ostr); - writeChar('\n', *ostr); - - writeCString("\t}", *ostr); -} void JSONRowOutputFormat::onProgress(const Progress & value) { diff --git a/src/Processors/Formats/Impl/JSONRowOutputFormat.h b/src/Processors/Formats/Impl/JSONRowOutputFormat.h index 8561f5b4870..3459cc1b7a6 100644 --- a/src/Processors/Formats/Impl/JSONRowOutputFormat.h +++ b/src/Processors/Formats/Impl/JSONRowOutputFormat.h @@ -63,12 +63,7 @@ protected: void finalizeImpl() override; - virtual void writeTotalsField(const IColumn & column, const ISerialization & serialization, size_t row_num); virtual void writeExtremesElement(const char * title, const Columns & columns, size_t row_num); - virtual void writeTotalsFieldDelimiter() { writeFieldDelimiter(); } - - void writeRowsBeforeLimitAtLeast(); - void writeStatistics(); void onRowsReadBeforeUpdate() override { row_count = getRowsReadBefore(); } diff --git a/src/Processors/Formats/Impl/LineAsStringRowInputFormat.cpp b/src/Processors/Formats/Impl/LineAsStringRowInputFormat.cpp index dc346b4f5f5..30084804d92 100644 --- a/src/Processors/Formats/Impl/LineAsStringRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/LineAsStringRowInputFormat.cpp @@ -1,5 +1,5 @@ #include -#include +#include #include #include #include diff --git a/src/Processors/Formats/Impl/MySQLDumpRowInputFormat.cpp b/src/Processors/Formats/Impl/MySQLDumpRowInputFormat.cpp index 5f3f015a5b1..7768339b064 100644 --- a/src/Processors/Formats/Impl/MySQLDumpRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/MySQLDumpRowInputFormat.cpp @@ -398,7 +398,7 @@ bool MySQLDumpRowInputFormat::readField(IColumn & column, size_t column_idx) void MySQLDumpRowInputFormat::skipField() { String tmp; - readQuotedFieldIntoString(tmp, *in); + readQuotedField(tmp, *in); } MySQLDumpSchemaReader::MySQLDumpSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings_) @@ -434,7 +434,7 @@ DataTypes MySQLDumpSchemaReader::readRowAndGetDataTypes() if (!data_types.empty()) skipFieldDelimiter(in); - readQuotedFieldIntoString(value, in); + readQuotedField(value, in); auto type = determineDataTypeByEscapingRule(value, format_settings, FormatSettings::EscapingRule::Quoted); data_types.push_back(std::move(type)); } diff --git a/src/Processors/Formats/Impl/ODBCDriver2BlockOutputFormat.cpp b/src/Processors/Formats/Impl/ODBCDriver2BlockOutputFormat.cpp index a82285c1c19..7cf133e5739 100644 --- a/src/Processors/Formats/Impl/ODBCDriver2BlockOutputFormat.cpp +++ b/src/Processors/Formats/Impl/ODBCDriver2BlockOutputFormat.cpp @@ -13,7 +13,7 @@ namespace DB { ODBCDriver2BlockOutputFormat::ODBCDriver2BlockOutputFormat( WriteBuffer & out_, const Block & header_, const FormatSettings & format_settings_) - : IOutputFormat(header_, out_), format_settings(format_settings_) + : IOutputFormat(header_, out_), format_settings(format_settings_), serializations(header_.getSerializations()) { } @@ -23,7 +23,7 @@ static void writeODBCString(WriteBuffer & out, const std::string & str) out.write(str.data(), str.size()); } -void ODBCDriver2BlockOutputFormat::writeRow(const Serializations & serializations, const Columns & columns, size_t row_idx, std::string & buffer) +void ODBCDriver2BlockOutputFormat::writeRow(const Columns & columns, size_t row_idx, std::string & buffer) { size_t num_columns = columns.size(); for (size_t column_idx = 0; column_idx < num_columns; ++column_idx) @@ -46,20 +46,14 @@ void ODBCDriver2BlockOutputFormat::writeRow(const Serializations & serialization } } -void ODBCDriver2BlockOutputFormat::write(Chunk chunk, PortKind port_kind) +void ODBCDriver2BlockOutputFormat::write(Chunk chunk, PortKind) { String text_value; - const auto & header = getPort(port_kind).getHeader(); const auto & columns = chunk.getColumns(); - size_t num_columns = columns.size(); - Serializations serializations(num_columns); - for (size_t i = 0; i < num_columns; ++i) - serializations[i] = header.getByPosition(i).type->getDefaultSerialization(); - const size_t rows = chunk.getNumRows(); for (size_t i = 0; i < rows; ++i) - writeRow(serializations, columns, i, text_value); + writeRow(columns, i, text_value); } void ODBCDriver2BlockOutputFormat::consume(Chunk chunk) diff --git a/src/Processors/Formats/Impl/ODBCDriver2BlockOutputFormat.h b/src/Processors/Formats/Impl/ODBCDriver2BlockOutputFormat.h index de6ea22dfd7..9a0a43aa5bb 100644 --- a/src/Processors/Formats/Impl/ODBCDriver2BlockOutputFormat.h +++ b/src/Processors/Formats/Impl/ODBCDriver2BlockOutputFormat.h @@ -35,8 +35,9 @@ private: void writePrefix() override; const FormatSettings format_settings; + Serializations serializations; - void writeRow(const Serializations & serializations, const Columns & columns, size_t row_idx, std::string & buffer); + void writeRow(const Columns & columns, size_t row_idx, std::string & buffer); void write(Chunk chunk, PortKind port_kind); }; diff --git a/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp b/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp index 1531c0d2794..87351b6c5d9 100644 --- a/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp @@ -65,10 +65,8 @@ Chunk ORCBlockInputFormat::generate() /// If defaults_for_omitted_fields is true, calculate the default values from default expression for omitted fields. /// Otherwise fill the missing columns with zero values of its type. if (format_settings.defaults_for_omitted_fields) - for (size_t row_idx = 0; row_idx < res.getNumRows(); ++row_idx) - for (const auto & column_idx : missing_columns) - block_missing_values.setBit(column_idx, row_idx); - + for (const auto & column_idx : missing_columns) + block_missing_values.setBits(column_idx, res.getNumRows()); return res; } diff --git a/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp b/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp index 106b71a9df5..aaa3e8fe976 100644 --- a/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp +++ b/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp @@ -100,6 +100,8 @@ ORC_UNIQUE_PTR ORCBlockOutputFormat::getORCType(const DataTypePtr & t case TypeIndex::FixedString: [[fallthrough]]; case TypeIndex::String: { + if (format_settings.orc.output_string_as_string) + return orc::createPrimitiveType(orc::TypeKind::STRING); return orc::createPrimitiveType(orc::TypeKind::BINARY); } case TypeIndex::Nullable: diff --git a/src/Processors/Formats/Impl/ParallelParsingInputFormat.cpp b/src/Processors/Formats/Impl/ParallelParsingInputFormat.cpp index 27192829dde..d3e167d35c6 100644 --- a/src/Processors/Formats/Impl/ParallelParsingInputFormat.cpp +++ b/src/Processors/Formats/Impl/ParallelParsingInputFormat.cpp @@ -88,10 +88,10 @@ void ParallelParsingInputFormat::parserThreadFunction(ThreadGroupStatusPtr threa // We don't know how many blocks will be. So we have to read them all // until an empty block occurred. Chunk chunk; - while (!parsing_finished && (chunk = parser.getChunk()) != Chunk()) + while (!parsing_finished && (chunk = parser.getChunk())) { /// Variable chunk is moved, but it is not really used in the next iteration. - /// NOLINTNEXTLINE(bugprone-use-after-move) + /// NOLINTNEXTLINE(bugprone-use-after-move, hicpp-invalid-access-moved) unit.chunk_ext.chunk.emplace_back(std::move(chunk)); unit.chunk_ext.block_missing_values.emplace_back(parser.getMissingValues()); } diff --git a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp index 86987c665e0..062f161b7f9 100644 --- a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp @@ -66,9 +66,8 @@ Chunk ParquetBlockInputFormat::generate() /// If defaults_for_omitted_fields is true, calculate the default values from default expression for omitted fields. /// Otherwise fill the missing columns with zero values of its type. if (format_settings.defaults_for_omitted_fields) - for (size_t row_idx = 0; row_idx < res.getNumRows(); ++row_idx) - for (const auto & column_idx : missing_columns) - block_missing_values.setBit(column_idx, row_idx); + for (const auto & column_idx : missing_columns) + block_missing_values.setBits(column_idx, res.getNumRows()); return res; } diff --git a/src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp b/src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp index 68e2ae1c6eb..c8e94311af5 100644 --- a/src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp +++ b/src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp @@ -29,7 +29,7 @@ void ParquetBlockOutputFormat::consume(Chunk chunk) if (!ch_column_to_arrow_column) { const Block & header = getPort(PortKind::Main).getHeader(); - ch_column_to_arrow_column = std::make_unique(header, "Parquet", false); + ch_column_to_arrow_column = std::make_unique(header, "Parquet", false, format_settings.parquet.output_string_as_string); } ch_column_to_arrow_column->chChunkToArrowTable(arrow_table, chunk, columns_num); diff --git a/src/Processors/Formats/Impl/PrettyBlockOutputFormat.cpp b/src/Processors/Formats/Impl/PrettyBlockOutputFormat.cpp index ad65a5f707d..8fbf0a14916 100644 --- a/src/Processors/Formats/Impl/PrettyBlockOutputFormat.cpp +++ b/src/Processors/Formats/Impl/PrettyBlockOutputFormat.cpp @@ -22,7 +22,7 @@ namespace ErrorCodes PrettyBlockOutputFormat::PrettyBlockOutputFormat( WriteBuffer & out_, const Block & header_, const FormatSettings & format_settings_) - : IOutputFormat(header_, out_), format_settings(format_settings_) + : IOutputFormat(header_, out_), format_settings(format_settings_), serializations(header_.getSerializations()) { struct winsize w; if (0 == ioctl(STDOUT_FILENO, TIOCGWINSZ, &w)) @@ -143,7 +143,7 @@ GridSymbols ascii_grid_symbols { } -void PrettyBlockOutputFormat::write(const Chunk & chunk, PortKind port_kind) +void PrettyBlockOutputFormat::write(Chunk chunk, PortKind port_kind) { UInt64 max_rows = format_settings.pretty.max_rows; @@ -158,10 +158,6 @@ void PrettyBlockOutputFormat::write(const Chunk & chunk, PortKind port_kind) const auto & columns = chunk.getColumns(); const auto & header = getPort(port_kind).getHeader(); - Serializations serializations(num_columns); - for (size_t i = 0; i < num_columns; ++i) - serializations[i] = header.getByPosition(i).type->getSerialization(*columns[i]->getSerializationInfo()); - WidthsPerColumn widths; Widths max_widths; Widths name_widths; @@ -371,21 +367,21 @@ void PrettyBlockOutputFormat::writeValueWithPadding( void PrettyBlockOutputFormat::consume(Chunk chunk) { - write(chunk, PortKind::Main); + write(std::move(chunk), PortKind::Main); } void PrettyBlockOutputFormat::consumeTotals(Chunk chunk) { total_rows = 0; writeCString("\nTotals:\n", out); - write(chunk, PortKind::Totals); + write(std::move(chunk), PortKind::Totals); } void PrettyBlockOutputFormat::consumeExtremes(Chunk chunk) { total_rows = 0; writeCString("\nExtremes:\n", out); - write(chunk, PortKind::Extremes); + write(std::move(chunk), PortKind::Extremes); } diff --git a/src/Processors/Formats/Impl/PrettyBlockOutputFormat.h b/src/Processors/Formats/Impl/PrettyBlockOutputFormat.h index 091010f9131..cfdd2213515 100644 --- a/src/Processors/Formats/Impl/PrettyBlockOutputFormat.h +++ b/src/Processors/Formats/Impl/PrettyBlockOutputFormat.h @@ -33,11 +33,12 @@ protected: size_t row_number_width = 7; // "10000. " const FormatSettings format_settings; + Serializations serializations; using Widths = PODArray; using WidthsPerColumn = std::vector; - virtual void write(const Chunk & chunk, PortKind port_kind); + virtual void write(Chunk chunk, PortKind port_kind); void writeSuffix() override; void onRowsReadBeforeUpdate() override { total_rows = getRowsReadBefore(); } diff --git a/src/Processors/Formats/Impl/PrettyCompactBlockOutputFormat.cpp b/src/Processors/Formats/Impl/PrettyCompactBlockOutputFormat.cpp index be8751cde13..9ba358a76e1 100644 --- a/src/Processors/Formats/Impl/PrettyCompactBlockOutputFormat.cpp +++ b/src/Processors/Formats/Impl/PrettyCompactBlockOutputFormat.cpp @@ -149,7 +149,6 @@ void PrettyCompactBlockOutputFormat::writeBottom(const Widths & max_widths) void PrettyCompactBlockOutputFormat::writeRow( size_t row_num, const Block & header, - const Serializations & serializations, const Columns & columns, const WidthsPerColumn & widths, const Widths & max_widths) @@ -187,7 +186,7 @@ void PrettyCompactBlockOutputFormat::writeRow( writeCString("\n", out); } -void PrettyCompactBlockOutputFormat::write(const Chunk & chunk, PortKind port_kind) +void PrettyCompactBlockOutputFormat::write(Chunk chunk, PortKind port_kind) { UInt64 max_rows = format_settings.pretty.max_rows; @@ -202,18 +201,11 @@ void PrettyCompactBlockOutputFormat::write(const Chunk & chunk, PortKind port_ki { if (!mono_chunk) { - mono_chunk = chunk.clone(); + mono_chunk = std::move(chunk); return; } - MutableColumns mutation = mono_chunk.mutateColumns(); - for (size_t position = 0; position < mutation.size(); ++position) - { - auto column = chunk.getColumns()[position]; - mutation[position]->insertRangeFrom(*column, 0, column->size()); - } - size_t rows = mutation[0]->size(); - mono_chunk.setColumns(std::move(mutation), rows); + mono_chunk.append(chunk); return; } else @@ -241,13 +233,8 @@ void PrettyCompactBlockOutputFormat::writeChunk(const Chunk & chunk, PortKind po writeHeader(header, max_widths, name_widths); - size_t num_columns = header.columns(); - Serializations serializations(num_columns); - for (size_t i = 0; i < num_columns; ++i) - serializations[i] = header.getByPosition(i).type->getDefaultSerialization(); - for (size_t i = 0; i < num_rows && total_rows + i < max_rows; ++i) - writeRow(i, header, serializations, columns, widths, max_widths); + writeRow(i, header, columns, widths, max_widths); writeBottom(max_widths); diff --git a/src/Processors/Formats/Impl/PrettyCompactBlockOutputFormat.h b/src/Processors/Formats/Impl/PrettyCompactBlockOutputFormat.h index a52ffe3d70a..5c39328051c 100644 --- a/src/Processors/Formats/Impl/PrettyCompactBlockOutputFormat.h +++ b/src/Processors/Formats/Impl/PrettyCompactBlockOutputFormat.h @@ -17,13 +17,12 @@ public: String getName() const override { return "PrettyCompactBlockOutputFormat"; } private: - void write(const Chunk & chunk, PortKind port_kind) override; + void write(Chunk chunk, PortKind port_kind) override; void writeHeader(const Block & block, const Widths & max_widths, const Widths & name_widths); void writeBottom(const Widths & max_widths); void writeRow( size_t row_num, const Block & header, - const Serializations & serializations, const Columns & columns, const WidthsPerColumn & widths, const Widths & max_widths); diff --git a/src/Processors/Formats/Impl/PrettySpaceBlockOutputFormat.cpp b/src/Processors/Formats/Impl/PrettySpaceBlockOutputFormat.cpp index 85b27a6fb57..730907ba45c 100644 --- a/src/Processors/Formats/Impl/PrettySpaceBlockOutputFormat.cpp +++ b/src/Processors/Formats/Impl/PrettySpaceBlockOutputFormat.cpp @@ -9,7 +9,7 @@ namespace DB { -void PrettySpaceBlockOutputFormat::write(const Chunk & chunk, PortKind port_kind) +void PrettySpaceBlockOutputFormat::write(Chunk chunk, PortKind port_kind) { UInt64 max_rows = format_settings.pretty.max_rows; @@ -24,10 +24,6 @@ void PrettySpaceBlockOutputFormat::write(const Chunk & chunk, PortKind port_kind const auto & header = getPort(port_kind).getHeader(); const auto & columns = chunk.getColumns(); - Serializations serializations(num_columns); - for (size_t i = 0; i < num_columns; ++i) - serializations[i] = header.getByPosition(i).type->getSerialization(*columns[i]->getSerializationInfo()); - WidthsPerColumn widths; Widths max_widths; Widths name_widths; diff --git a/src/Processors/Formats/Impl/PrettySpaceBlockOutputFormat.h b/src/Processors/Formats/Impl/PrettySpaceBlockOutputFormat.h index b3090497783..6a8cb4e799c 100644 --- a/src/Processors/Formats/Impl/PrettySpaceBlockOutputFormat.h +++ b/src/Processors/Formats/Impl/PrettySpaceBlockOutputFormat.h @@ -17,7 +17,7 @@ public: String getName() const override { return "PrettySpaceBlockOutputFormat"; } private: - void write(const Chunk & chunk, PortKind port_kind) override; + void write(Chunk chunk, PortKind port_kind) override; void writeSuffix() override; }; diff --git a/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp b/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp index 26c7d1aced5..5c48062ace8 100644 --- a/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp @@ -214,10 +214,7 @@ void TSKVRowInputFormat::resetParser() } TSKVSchemaReader::TSKVSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings_) - : IRowWithNamesSchemaReader( - in_, - format_settings_.max_rows_to_read_for_schema_inference, - getDefaultDataTypeForEscapingRule(FormatSettings::EscapingRule::Escaped)) + : IRowWithNamesSchemaReader(in_, getDefaultDataTypeForEscapingRule(FormatSettings::EscapingRule::Escaped)) , format_settings(format_settings_) { } diff --git a/src/Processors/Formats/Impl/TSKVRowOutputFormat.cpp b/src/Processors/Formats/Impl/TSKVRowOutputFormat.cpp index 14dec8420a8..0e29d74b419 100644 --- a/src/Processors/Formats/Impl/TSKVRowOutputFormat.cpp +++ b/src/Processors/Formats/Impl/TSKVRowOutputFormat.cpp @@ -8,12 +8,8 @@ namespace DB { TSKVRowOutputFormat::TSKVRowOutputFormat(WriteBuffer & out_, const Block & header, const RowOutputFormatParams & params_, const FormatSettings & format_settings_) - : TabSeparatedRowOutputFormat(out_, header, false, false, false, params_, format_settings_) + : TabSeparatedRowOutputFormat(out_, header, false, false, false, params_, format_settings_), fields(header.getNamesAndTypes()) { - const auto & sample = getPort(PortKind::Main).getHeader(); - NamesAndTypesList columns(sample.getNamesAndTypesList()); - fields.assign(columns.begin(), columns.end()); - for (auto & field : fields) { WriteBufferFromOwnString wb; diff --git a/src/Processors/Formats/Impl/TemplateBlockOutputFormat.cpp b/src/Processors/Formats/Impl/TemplateBlockOutputFormat.cpp index 5c5b99f61da..0e7bdb259ac 100644 --- a/src/Processors/Formats/Impl/TemplateBlockOutputFormat.cpp +++ b/src/Processors/Formats/Impl/TemplateBlockOutputFormat.cpp @@ -3,7 +3,6 @@ #include #include #include -#include namespace DB @@ -17,15 +16,9 @@ namespace ErrorCodes TemplateBlockOutputFormat::TemplateBlockOutputFormat(const Block & header_, WriteBuffer & out_, const FormatSettings & settings_, ParsedTemplateFormatString format_, ParsedTemplateFormatString row_format_, std::string row_between_delimiter_) - : IOutputFormat(header_, out_), settings(settings_), format(std::move(format_)) + : IOutputFormat(header_, out_), settings(settings_), serializations(header_.getSerializations()), format(std::move(format_)) , row_format(std::move(row_format_)), row_between_delimiter(std::move(row_between_delimiter_)) { - const auto & sample = getPort(PortKind::Main).getHeader(); - size_t columns = sample.columns(); - serializations.resize(columns); - for (size_t i = 0; i < columns; ++i) - serializations[i] = sample.safeGetByPosition(i).type->getDefaultSerialization(); - /// Validate format string for whole output size_t data_idx = format.format_idx_to_column_idx.size() + 1; for (size_t i = 0; i < format.format_idx_to_column_idx.size(); ++i) diff --git a/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp b/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp index 727b7fb0a1f..41f77f8bbf2 100644 --- a/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp @@ -49,11 +49,8 @@ ValuesBlockInputFormat::ValuesBlockInputFormat( params(params_), format_settings(format_settings_), num_columns(header_.columns()), parser_type_for_column(num_columns, ParserType::Streaming), attempts_to_deduce_template(num_columns), attempts_to_deduce_template_cached(num_columns), - rows_parsed_using_template(num_columns), templates(num_columns), types(header_.getDataTypes()) + rows_parsed_using_template(num_columns), templates(num_columns), types(header_.getDataTypes()), serializations(header_.getSerializations()) { - serializations.resize(types.size()); - for (size_t i = 0; i < types.size(); ++i) - serializations[i] = types[i]->getDefaultSerialization(); } Chunk ValuesBlockInputFormat::generate() @@ -599,7 +596,7 @@ DataTypes ValuesSchemaReader::readRowAndGetDataTypes() skipWhitespaceIfAny(buf); } - readQuotedFieldIntoString(value, buf); + readQuotedField(value, buf); auto type = determineDataTypeByEscapingRule(value, format_settings, FormatSettings::EscapingRule::Quoted); data_types.push_back(std::move(type)); } diff --git a/src/Processors/Formats/Impl/XMLRowOutputFormat.cpp b/src/Processors/Formats/Impl/XMLRowOutputFormat.cpp index cc2b37189f9..d5fb29874d1 100644 --- a/src/Processors/Formats/Impl/XMLRowOutputFormat.cpp +++ b/src/Processors/Formats/Impl/XMLRowOutputFormat.cpp @@ -8,11 +8,9 @@ namespace DB { XMLRowOutputFormat::XMLRowOutputFormat(WriteBuffer & out_, const Block & header_, const RowOutputFormatParams & params_, const FormatSettings & format_settings_) - : IRowOutputFormat(header_, out_, params_), format_settings(format_settings_) + : IRowOutputFormat(header_, out_, params_), fields(header_.getNamesAndTypes()), format_settings(format_settings_) { const auto & sample = getPort(PortKind::Main).getHeader(); - NamesAndTypesList columns(sample.getNamesAndTypesList()); - fields.assign(columns.begin(), columns.end()); field_tag_names.resize(sample.columns()); bool need_validate_utf8 = false; @@ -200,7 +198,6 @@ void XMLRowOutputFormat::onProgress(const Progress & value) void XMLRowOutputFormat::finalizeImpl() { - writeCString("\t", *ostr); writeIntText(row_count, *ostr); writeCString("\n", *ostr); diff --git a/src/Processors/QueryPlan/TotalsHavingStep.cpp b/src/Processors/QueryPlan/TotalsHavingStep.cpp index be2cd2348a4..5e7e7011e0e 100644 --- a/src/Processors/QueryPlan/TotalsHavingStep.cpp +++ b/src/Processors/QueryPlan/TotalsHavingStep.cpp @@ -1,7 +1,7 @@ #include #include -#include #include +#include #include #include #include @@ -27,6 +27,7 @@ static ITransformingStep::Traits getTraits(bool has_filter) TotalsHavingStep::TotalsHavingStep( const DataStream & input_stream_, + const ColumnsMask & aggregates_mask_, bool overflow_row_, const ActionsDAGPtr & actions_dag_, const std::string & filter_column_, @@ -41,8 +42,10 @@ TotalsHavingStep::TotalsHavingStep( actions_dag_.get(), filter_column_, remove_filter_, - final_), + final_, + aggregates_mask_), getTraits(!filter_column_.empty())) + , aggregates_mask(aggregates_mask_) , overflow_row(overflow_row_) , actions_dag(actions_dag_) , filter_column_name(filter_column_) @@ -59,6 +62,7 @@ void TotalsHavingStep::transformPipeline(QueryPipelineBuilder & pipeline, const auto totals_having = std::make_shared( pipeline.getHeader(), + aggregates_mask, overflow_row, expression_actions, filter_column_name, diff --git a/src/Processors/QueryPlan/TotalsHavingStep.h b/src/Processors/QueryPlan/TotalsHavingStep.h index b51b332761d..4ad741a1b44 100644 --- a/src/Processors/QueryPlan/TotalsHavingStep.h +++ b/src/Processors/QueryPlan/TotalsHavingStep.h @@ -1,5 +1,6 @@ #pragma once #include +#include namespace DB { @@ -15,6 +16,7 @@ class TotalsHavingStep : public ITransformingStep public: TotalsHavingStep( const DataStream & input_stream_, + const ColumnsMask & aggregates_mask_, bool overflow_row_, const ActionsDAGPtr & actions_dag_, const std::string & filter_column_, @@ -33,6 +35,7 @@ public: const ActionsDAGPtr & getActions() const { return actions_dag; } private: + const ColumnsMask aggregates_mask; bool overflow_row; ActionsDAGPtr actions_dag; String filter_column_name; diff --git a/src/Processors/Transforms/AggregatingInOrderTransform.cpp b/src/Processors/Transforms/AggregatingInOrderTransform.cpp index 9f01b45f864..7491dda8164 100644 --- a/src/Processors/Transforms/AggregatingInOrderTransform.cpp +++ b/src/Processors/Transforms/AggregatingInOrderTransform.cpp @@ -38,7 +38,7 @@ AggregatingInOrderTransform::AggregatingInOrderTransform( , variants(*many_data->variants[current_variant]) { /// We won't finalize states in order to merge same states (generated due to multi-thread execution) in AggregatingSortedTransform - res_header = params->getCustomHeader(false); + res_header = params->getCustomHeader(/* final_= */ false); for (size_t i = 0; i < group_by_info->order_key_prefix_descr.size(); ++i) { @@ -310,5 +310,23 @@ void AggregatingInOrderTransform::generate() need_generate = false; } +FinalizeAggregatedTransform::FinalizeAggregatedTransform(Block header, AggregatingTransformParamsPtr params_) + : ISimpleTransform({std::move(header)}, {params_->getHeader()}, true) + , params(params_) + , aggregates_mask(getAggregatesMask(params->getHeader(), params->params.aggregates)) +{ +} + +void FinalizeAggregatedTransform::transform(Chunk & chunk) +{ + if (params->final) + finalizeChunk(chunk, aggregates_mask); + else if (!chunk.getChunkInfo()) + { + auto info = std::make_shared(); + chunk.setChunkInfo(std::move(info)); + } +} + } diff --git a/src/Processors/Transforms/AggregatingInOrderTransform.h b/src/Processors/Transforms/AggregatingInOrderTransform.h index 57310f16245..9632b107463 100644 --- a/src/Processors/Transforms/AggregatingInOrderTransform.h +++ b/src/Processors/Transforms/AggregatingInOrderTransform.h @@ -4,7 +4,7 @@ #include #include #include -#include +#include namespace DB { @@ -90,25 +90,14 @@ private: class FinalizeAggregatedTransform : public ISimpleTransform { public: - FinalizeAggregatedTransform(Block header, AggregatingTransformParamsPtr params_) - : ISimpleTransform({std::move(header)}, {params_->getHeader()}, true) - , params(params_) {} - - void transform(Chunk & chunk) override - { - if (params->final) - finalizeChunk(chunk); - else if (!chunk.getChunkInfo()) - { - auto info = std::make_shared(); - chunk.setChunkInfo(std::move(info)); - } - } + FinalizeAggregatedTransform(Block header, AggregatingTransformParamsPtr params_); + void transform(Chunk & chunk) override; String getName() const override { return "FinalizeAggregatedTransform"; } private: AggregatingTransformParamsPtr params; + ColumnsMask aggregates_mask; }; diff --git a/src/Processors/Transforms/CubeTransform.cpp b/src/Processors/Transforms/CubeTransform.cpp index 456eccc732f..6b672407cd1 100644 --- a/src/Processors/Transforms/CubeTransform.cpp +++ b/src/Processors/Transforms/CubeTransform.cpp @@ -12,6 +12,7 @@ CubeTransform::CubeTransform(Block header, AggregatingTransformParamsPtr params_ : IAccumulatingTransform(std::move(header), params_->getHeader()) , params(std::move(params_)) , keys(params->params.keys) + , aggregates_mask(getAggregatesMask(params->getHeader(), params->params.aggregates)) { if (keys.size() >= 8 * sizeof(mask)) throw Exception("Too many keys are used for CubeTransform.", ErrorCodes::LOGICAL_ERROR); @@ -73,7 +74,7 @@ Chunk CubeTransform::generate() cube_chunk = merge(std::move(chunks), false); } - finalizeChunk(gen_chunk); + finalizeChunk(gen_chunk, aggregates_mask); return gen_chunk; } diff --git a/src/Processors/Transforms/CubeTransform.h b/src/Processors/Transforms/CubeTransform.h index 6d0e2338174..a1a1c9271c3 100644 --- a/src/Processors/Transforms/CubeTransform.h +++ b/src/Processors/Transforms/CubeTransform.h @@ -1,6 +1,7 @@ #pragma once #include #include +#include namespace DB @@ -20,7 +21,8 @@ protected: private: AggregatingTransformParamsPtr params; - ColumnNumbers keys; + const ColumnNumbers keys; + const ColumnsMask aggregates_mask; Chunks consumed_chunks; Chunk cube_chunk; diff --git a/src/Processors/Transforms/RollupTransform.cpp b/src/Processors/Transforms/RollupTransform.cpp index fb51b5f6b45..3150ff99e76 100644 --- a/src/Processors/Transforms/RollupTransform.cpp +++ b/src/Processors/Transforms/RollupTransform.cpp @@ -8,6 +8,7 @@ RollupTransform::RollupTransform(Block header, AggregatingTransformParamsPtr par : IAccumulatingTransform(std::move(header), params_->getHeader()) , params(std::move(params_)) , keys(params->params.keys) + , aggregates_mask(getAggregatesMask(params->getHeader(), params->params.aggregates)) { } @@ -56,7 +57,7 @@ Chunk RollupTransform::generate() rollup_chunk = merge(std::move(chunks), false); } - finalizeChunk(gen_chunk); + finalizeChunk(gen_chunk, aggregates_mask); return gen_chunk; } diff --git a/src/Processors/Transforms/RollupTransform.h b/src/Processors/Transforms/RollupTransform.h index fd435740a63..74e83b8535d 100644 --- a/src/Processors/Transforms/RollupTransform.h +++ b/src/Processors/Transforms/RollupTransform.h @@ -1,6 +1,7 @@ #pragma once #include #include +#include namespace DB { @@ -19,7 +20,9 @@ protected: private: AggregatingTransformParamsPtr params; - ColumnNumbers keys; + const ColumnNumbers keys; + const ColumnsMask aggregates_mask; + Chunks consumed_chunks; Chunk rollup_chunk; size_t last_removed_key = 0; diff --git a/src/Processors/Transforms/TotalsHavingTransform.cpp b/src/Processors/Transforms/TotalsHavingTransform.cpp index 45e972afa3f..29be0ceed23 100644 --- a/src/Processors/Transforms/TotalsHavingTransform.cpp +++ b/src/Processors/Transforms/TotalsHavingTransform.cpp @@ -17,33 +17,21 @@ namespace ErrorCodes extern const int ILLEGAL_COLUMN; } -void finalizeChunk(Chunk & chunk) -{ - auto num_rows = chunk.getNumRows(); - auto columns = chunk.detachColumns(); - - for (auto & column : columns) - if (typeid_cast(column.get())) - column = ColumnAggregateFunction::convertToValues(IColumn::mutate(std::move(column))); - - chunk.setColumns(std::move(columns), num_rows); -} - -void finalizeBlock(Block & block) +static void finalizeBlock(Block & block, const ColumnsMask & aggregates_mask) { for (size_t i = 0; i < block.columns(); ++i) { - ColumnWithTypeAndName & current = block.getByPosition(i); - const DataTypeAggregateFunction * unfinalized_type = typeid_cast(current.type.get()); + if (!aggregates_mask[i]) + continue; - if (unfinalized_type) + ColumnWithTypeAndName & current = block.getByPosition(i); + const DataTypeAggregateFunction & unfinalized_type = typeid_cast(*current.type); + + current.type = unfinalized_type.getReturnType(); + if (current.column) { - current.type = unfinalized_type->getReturnType(); - if (current.column) - { - auto mut_column = IColumn::mutate(std::move(current.column)); - current.column = ColumnAggregateFunction::convertToValues(std::move(mut_column)); - } + auto mut_column = IColumn::mutate(std::move(current.column)); + current.column = ColumnAggregateFunction::convertToValues(std::move(mut_column)); } } } @@ -53,10 +41,11 @@ Block TotalsHavingTransform::transformHeader( const ActionsDAG * expression, const std::string & filter_column_name, bool remove_filter, - bool final) + bool final, + const ColumnsMask & aggregates_mask) { if (final) - finalizeBlock(block); + finalizeBlock(block, aggregates_mask); if (expression) { @@ -70,6 +59,7 @@ Block TotalsHavingTransform::transformHeader( TotalsHavingTransform::TotalsHavingTransform( const Block & header, + const ColumnsMask & aggregates_mask_, bool overflow_row_, const ExpressionActionsPtr & expression_, const std::string & filter_column_, @@ -77,7 +67,8 @@ TotalsHavingTransform::TotalsHavingTransform( TotalsMode totals_mode_, double auto_include_threshold_, bool final_) - : ISimpleTransform(header, transformHeader(header, expression_ ? &expression_->getActionsDAG() : nullptr, filter_column_, remove_filter_, final_), true) + : ISimpleTransform(header, transformHeader(header, expression_ ? &expression_->getActionsDAG() : nullptr, filter_column_, remove_filter_, final_, aggregates_mask_), true) + , aggregates_mask(aggregates_mask_) , overflow_row(overflow_row_) , expression(expression_) , filter_column_name(filter_column_) @@ -87,7 +78,7 @@ TotalsHavingTransform::TotalsHavingTransform( , final(final_) { finalized_header = getInputPort().getHeader(); - finalizeBlock(finalized_header); + finalizeBlock(finalized_header, aggregates_mask); /// Port for Totals. if (expression) @@ -179,7 +170,7 @@ void TotalsHavingTransform::transform(Chunk & chunk) auto finalized = chunk.clone(); if (final) - finalizeChunk(finalized); + finalizeChunk(finalized, aggregates_mask); total_keys += finalized.getNumRows(); @@ -300,7 +291,7 @@ void TotalsHavingTransform::prepareTotals() } totals = Chunk(std::move(current_totals), 1); - finalizeChunk(totals); + finalizeChunk(totals, aggregates_mask); if (expression) { diff --git a/src/Processors/Transforms/TotalsHavingTransform.h b/src/Processors/Transforms/TotalsHavingTransform.h index 6b4afb2fa8b..2567781771e 100644 --- a/src/Processors/Transforms/TotalsHavingTransform.h +++ b/src/Processors/Transforms/TotalsHavingTransform.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include namespace DB @@ -25,6 +26,7 @@ class TotalsHavingTransform : public ISimpleTransform public: TotalsHavingTransform( const Block & header, + const ColumnsMask & aggregates_mask_, bool overflow_row_, const ExpressionActionsPtr & expression_, const std::string & filter_column_, @@ -40,7 +42,7 @@ public: Status prepare() override; void work() override; - static Block transformHeader(Block block, const ActionsDAG * expression, const std::string & filter_column_name, bool remove_filter, bool final); + static Block transformHeader(Block block, const ActionsDAG * expression, const std::string & filter_column_name, bool remove_filter, bool final, const ColumnsMask & aggregates_mask); protected: void transform(Chunk & chunk) override; @@ -54,6 +56,7 @@ private: void prepareTotals(); /// Params + const ColumnsMask aggregates_mask; bool overflow_row; ExpressionActionsPtr expression; String filter_column_name; @@ -77,6 +80,4 @@ private: MutableColumns current_totals; }; -void finalizeChunk(Chunk & chunk); - } diff --git a/src/Processors/Transforms/WindowTransform.cpp b/src/Processors/Transforms/WindowTransform.cpp index 45993be70d9..82e7cd48085 100644 --- a/src/Processors/Transforms/WindowTransform.cpp +++ b/src/Processors/Transforms/WindowTransform.cpp @@ -425,7 +425,7 @@ auto WindowTransform::moveRowNumberNoCheck(const RowNumber & _x, int64_t offset) { RowNumber x = _x; - if (offset > 0) + if (offset > 0 && x != blocksEnd()) { for (;;) { diff --git a/src/Processors/Transforms/buildPushingToViewsChain.cpp b/src/Processors/Transforms/buildPushingToViewsChain.cpp index 451d213ca9c..a3cc620e418 100644 --- a/src/Processors/Transforms/buildPushingToViewsChain.cpp +++ b/src/Processors/Transforms/buildPushingToViewsChain.cpp @@ -314,7 +314,7 @@ Chain buildPushingToViewsChain( runtime_stats->type = QueryViewsLogElement::ViewType::WINDOW; query = window_view->getMergeableQuery(); // Used only to log in system.query_views_log out = buildPushingToViewsChain( - dependent_table, dependent_metadata_snapshot, insert_context, ASTPtr(), true, view_thread_status, view_counter_ms, storage_header); + dependent_table, dependent_metadata_snapshot, insert_context, ASTPtr(), true, view_thread_status, view_counter_ms); } else out = buildPushingToViewsChain( @@ -392,7 +392,7 @@ Chain buildPushingToViewsChain( } else if (auto * window_view = dynamic_cast(storage.get())) { - auto sink = std::make_shared(live_view_header, *window_view, storage, context); + auto sink = std::make_shared(window_view->getInputHeader(), *window_view, storage, context); sink->setRuntimeData(thread_status, elapsed_counter_ms); result_chain.addSource(std::move(sink)); } diff --git a/src/Processors/Transforms/finalizeChunk.cpp b/src/Processors/Transforms/finalizeChunk.cpp new file mode 100644 index 00000000000..4e643db8470 --- /dev/null +++ b/src/Processors/Transforms/finalizeChunk.cpp @@ -0,0 +1,32 @@ +#include +#include + +namespace DB +{ + +ColumnsMask getAggregatesMask(const Block & header, const AggregateDescriptions & aggregates) +{ + ColumnsMask mask(header.columns()); + for (const auto & aggregate : aggregates) + mask[header.getPositionByName(aggregate.column_name)] = true; + return mask; +} + +void finalizeChunk(Chunk & chunk, const ColumnsMask & aggregates_mask) +{ + auto num_rows = chunk.getNumRows(); + auto columns = chunk.detachColumns(); + + for (size_t i = 0; i < columns.size(); ++i) + { + if (!aggregates_mask[i]) + continue; + + auto & column = columns[i]; + column = ColumnAggregateFunction::convertToValues(IColumn::mutate(std::move(column))); + } + + chunk.setColumns(std::move(columns), num_rows); +} + +} diff --git a/src/Processors/Transforms/finalizeChunk.h b/src/Processors/Transforms/finalizeChunk.h new file mode 100644 index 00000000000..d4a3921187d --- /dev/null +++ b/src/Processors/Transforms/finalizeChunk.h @@ -0,0 +1,20 @@ +#pragma once + +#include +#include +#include +#include + +namespace DB +{ + +using ColumnsMask = std::vector; + +ColumnsMask getAggregatesMask(const Block & header, const AggregateDescriptions & aggregates); + +/// Convert ColumnAggregateFunction to real values. +/// +/// @param aggregates_mask columns to convert (see getAggregatesMask()) +void finalizeChunk(Chunk & chunk, const ColumnsMask & aggregates_mask); + +} diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index 687b32d0ef0..f056842926d 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -1480,7 +1480,7 @@ bool TCPHandler::receiveUnexpectedData(bool throw_exception) maybe_compressed_in = in; auto skip_block_in = std::make_shared(*maybe_compressed_in, client_tcp_protocol_version); - bool read_ok = skip_block_in->read(); + bool read_ok = !!skip_block_in->read(); if (!read_ok) state.read_all_data = true; diff --git a/src/Storages/Cache/ExternalDataSourceCache.cpp b/src/Storages/Cache/ExternalDataSourceCache.cpp index 2440b518568..84ae35451ea 100644 --- a/src/Storages/Cache/ExternalDataSourceCache.cpp +++ b/src/Storages/Cache/ExternalDataSourceCache.cpp @@ -1,3 +1,4 @@ +#include #include #include #include @@ -14,6 +15,10 @@ #include #include #include +#include +#include +#include +#include namespace ProfileEvents { @@ -162,28 +167,56 @@ ExternalDataSourceCache & ExternalDataSourceCache::instance() void ExternalDataSourceCache::recoverTask() { std::vector invalid_paths; - for (auto const & group_dir : fs::directory_iterator{root_dir}) + for (size_t i = 0, sz = root_dirs.size(); i < sz; ++i) { - for (auto const & cache_dir : fs::directory_iterator{group_dir.path()}) + const auto & root_dir = root_dirs[i]; + for (auto const & group_dir : fs::directory_iterator{root_dir}) { - String path = cache_dir.path(); - auto cache_controller = RemoteCacheController::recover(path); - if (!cache_controller) + for (auto const & cache_dir : fs::directory_iterator{group_dir.path()}) { - invalid_paths.emplace_back(path); - continue; - } - auto cache_load_func = [&] { return cache_controller; }; - if (!lru_caches->getOrSet(path, cache_load_func)) - { - invalid_paths.emplace_back(path); + String subpath = cache_dir.path().stem(); + String path = cache_dir.path(); + size_t root_dir_idx = ConsistentHashing(sipHash64(subpath.c_str(), subpath.size()), sz); + if (root_dir_idx != i) + { + // When the root_dirs has been changed, to simplify just delete the old cached files. + LOG_TRACE( + log, + "Drop file({}) since root_dir is not match. prev dir is {}, and it should be {}", + path, + root_dirs[i], + root_dirs[root_dir_idx]); + invalid_paths.emplace_back(path); + continue; + } + auto cache_controller = RemoteCacheController::recover(path); + if (!cache_controller) + { + invalid_paths.emplace_back(path); + continue; + } + auto cache_load_func = [&] { return cache_controller; }; + if (!lru_caches->getOrSet(path, cache_load_func)) + { + invalid_paths.emplace_back(path); + } } } } for (auto & path : invalid_paths) fs::remove_all(path); initialized = true; - LOG_INFO(log, "Recovered from directory:{}", root_dir); + + auto root_dirs_to_string = [&]() + { + String res; + for (const auto & root_dir : root_dirs) + { + res += root_dir + ","; + } + return res; + }; + LOG_INFO(log, "Recovered from directory:{}", root_dirs_to_string()); } void ExternalDataSourceCache::initOnce(ContextPtr context, const String & root_dir_, size_t limit_size_, size_t bytes_read_before_flush_) @@ -195,14 +228,18 @@ void ExternalDataSourceCache::initOnce(ContextPtr context, const String & root_d } LOG_INFO( log, "Initializing local cache for remote data sources. Local cache root path: {}, cache size limit: {}", root_dir_, limit_size_); - root_dir = root_dir_; + splitInto<','>(root_dirs, root_dir_); + std::sort(root_dirs.begin(), root_dirs.end()); local_cache_bytes_read_before_flush = bytes_read_before_flush_; lru_caches = std::make_unique(limit_size_); /// Create if root_dir not exists. - if (!fs::exists(fs::path(root_dir))) + for (const auto & root_dir : root_dirs) { - fs::create_directories(fs::path(root_dir)); + if (!fs::exists(fs::path(root_dir))) + { + fs::create_directories(fs::path(root_dir)); + } } recover_task_holder = context->getSchedulePool().createTask("recover local cache metadata for remote files", [this] { recoverTask(); }); @@ -215,7 +252,8 @@ String ExternalDataSourceCache::calculateLocalPath(IRemoteFileMetadataPtr metada String full_path = metadata->getName() + ":" + metadata->remote_path + ":" + metadata->getVersion(); UInt128 hashcode = sipHash128(full_path.c_str(), full_path.size()); String hashcode_str = getHexUIntLowercase(hashcode); - return fs::path(root_dir) / hashcode_str.substr(0, 3) / hashcode_str; + size_t root_dir_idx = ConsistentHashing(sipHash64(hashcode_str.c_str(), hashcode_str.size()), root_dirs.size()); + return fs::path(root_dirs[root_dir_idx]) / hashcode_str.substr(0, 3) / hashcode_str; } std::pair, std::unique_ptr> ExternalDataSourceCache::createReader( diff --git a/src/Storages/Cache/ExternalDataSourceCache.h b/src/Storages/Cache/ExternalDataSourceCache.h index ec0aeea4985..18d3d5ca699 100644 --- a/src/Storages/Cache/ExternalDataSourceCache.h +++ b/src/Storages/Cache/ExternalDataSourceCache.h @@ -83,7 +83,7 @@ protected: private: // Root directory of local cache for remote filesystem. - String root_dir; + Strings root_dirs; size_t local_cache_bytes_read_before_flush = 0; std::atomic initialized = false; diff --git a/src/Storages/ColumnsDescription.cpp b/src/Storages/ColumnsDescription.cpp index 3aa5b28fed5..7a43ae7af4b 100644 --- a/src/Storages/ColumnsDescription.cpp +++ b/src/Storages/ColumnsDescription.cpp @@ -196,7 +196,7 @@ static auto getNameRange(const ColumnsDescription::ColumnsContainer & columns, c return std::make_pair(begin, end); } -void ColumnsDescription::add(ColumnDescription column, const String & after_column, bool first) +void ColumnsDescription::add(ColumnDescription column, const String & after_column, bool first, bool add_subcolumns) { if (has(column.name)) throw Exception("Cannot add column " + column.name + ": column with this name already exists", @@ -222,7 +222,8 @@ void ColumnsDescription::add(ColumnDescription column, const String & after_colu insert_it = range.second; } - addSubcolumns(column.name, column.type); + if (add_subcolumns) + addSubcolumns(column.name, column.type); columns.get<0>().insert(insert_it, std::move(column)); } @@ -572,6 +573,27 @@ std::optional ColumnsDescription::tryGetColumnOrSubcolumn(GetCo return tryGetColumn(GetColumnsOptions(kind).withSubcolumns(), column_name); } +std::optional ColumnsDescription::tryGetColumnDescription(const GetColumnsOptions & options, const String & column_name) const +{ + auto it = columns.get<1>().find(column_name); + if (it != columns.get<1>().end() && (defaultKindToGetKind(it->default_desc.kind) & options.kind)) + return *it; + + if (options.with_subcolumns) + { + auto jt = subcolumns.get<0>().find(column_name); + if (jt != subcolumns.get<0>().end()) + return ColumnDescription{jt->name, jt->type}; + } + + return {}; +} + +std::optional ColumnsDescription::tryGetColumnOrSubcolumnDescription(GetColumnsOptions::Kind kind, const String & column_name) const +{ + return tryGetColumnDescription(GetColumnsOptions(kind).withSubcolumns(), column_name); +} + NameAndTypePair ColumnsDescription::getColumnOrSubcolumn(GetColumnsOptions::Kind kind, const String & column_name) const { auto column = tryGetColumnOrSubcolumn(kind, column_name); diff --git a/src/Storages/ColumnsDescription.h b/src/Storages/ColumnsDescription.h index 75db8b92545..c81ccb5d217 100644 --- a/src/Storages/ColumnsDescription.h +++ b/src/Storages/ColumnsDescription.h @@ -100,7 +100,7 @@ public: explicit ColumnsDescription(NamesAndTypesList ordinary, NamesAndAliases aliases); /// `after_column` can be a Nested column name; - void add(ColumnDescription column, const String & after_column = String(), bool first = false); + void add(ColumnDescription column, const String & after_column = String(), bool first = false, bool add_subcolumns = true); /// `column_name` can be a Nested column name; void remove(const String & column_name); @@ -180,6 +180,9 @@ public: std::optional tryGetColumnOrSubcolumn(GetColumnsOptions::Kind kind, const String & column_name) const; std::optional tryGetColumn(const GetColumnsOptions & options, const String & column_name) const; + std::optional tryGetColumnOrSubcolumnDescription(GetColumnsOptions::Kind kind, const String & column_name) const; + std::optional tryGetColumnDescription(const GetColumnsOptions & options, const String & column_name) const; + ColumnDefaults getDefaults() const; /// TODO: remove bool hasDefault(const String & column_name) const; bool hasDefaults() const; diff --git a/src/Storages/ExecutableSettings.cpp b/src/Storages/ExecutableSettings.cpp index 136357eb6f8..dc462350a06 100644 --- a/src/Storages/ExecutableSettings.cpp +++ b/src/Storages/ExecutableSettings.cpp @@ -14,7 +14,7 @@ namespace ErrorCodes extern const int UNKNOWN_SETTING; } -IMPLEMENT_SETTINGS_TRAITS(ExecutableSettingsTraits, LIST_OF_EXECUTABLE_SETTINGS); +IMPLEMENT_SETTINGS_TRAITS(ExecutableSettingsTraits, LIST_OF_EXECUTABLE_SETTINGS) void ExecutableSettings::loadFromQuery(ASTStorage & storage_def) { diff --git a/src/Storages/HDFS/StorageHDFS.cpp b/src/Storages/HDFS/StorageHDFS.cpp index 0176487bbfe..d114bb67016 100644 --- a/src/Storages/HDFS/StorageHDFS.cpp +++ b/src/Storages/HDFS/StorageHDFS.cpp @@ -539,8 +539,7 @@ Pipe StorageHDFS::read( if (fetch_columns.empty()) fetch_columns.push_back(ExpressionActions::getSmallestColumn(storage_snapshot->metadata->getColumns().getAllPhysical())); - columns_description = ColumnsDescription{ - storage_snapshot->getSampleBlockForColumns(fetch_columns).getNamesAndTypesList()}; + columns_description = storage_snapshot->getDescriptionForColumns(fetch_columns); block_for_format = storage_snapshot->getSampleBlockForColumns(columns_description.getNamesOfPhysical()); } else diff --git a/src/Storages/Hive/StorageHive.cpp b/src/Storages/Hive/StorageHive.cpp index 09c2f578419..c66e1acc6e5 100644 --- a/src/Storages/Hive/StorageHive.cpp +++ b/src/Storages/Hive/StorageHive.cpp @@ -45,6 +45,7 @@ namespace ErrorCodes extern const int BAD_ARGUMENTS; extern const int CANNOT_OPEN_FILE; extern const int LOGICAL_ERROR; + extern const int TOO_MANY_PARTITIONS; } @@ -151,13 +152,26 @@ public: { if (!reader) { + if (current_file_remained_rows) + { + return generateChunkByPartitionKeys(); + } + current_idx = source_info->next_uri_to_read.fetch_add(1); if (current_idx >= source_info->hive_files.size()) return {}; - const auto & current_file = source_info->hive_files[current_idx]; + current_file = source_info->hive_files[current_idx]; current_path = current_file->getPath(); + if (!to_read_block.columns() && current_file->getRows()) + { + /// this is the case that all columns to read are partition keys. We can construct const columns + /// directly without reading from hive files. + current_file_remained_rows = *(current_file->getRows()); + return generateChunkByPartitionKeys(); + } + String uri_with_path = hdfs_namenode_url + current_path; auto compression = chooseCompressionMethod(current_path, compression_method); std::unique_ptr raw_read_buf; @@ -260,6 +274,36 @@ public: } } + Chunk generateChunkByPartitionKeys() + { + size_t max_rows = getContext()->getSettings().max_block_size; + size_t rows = 0; + if (max_rows > current_file_remained_rows) + { + rows = current_file_remained_rows; + current_file_remained_rows = 0; + } + else + { + rows = max_rows; + current_file_remained_rows -= max_rows; + } + + Columns cols; + auto types = source_info->partition_name_types.getTypes(); + auto names = source_info->partition_name_types.getNames(); + auto fields = current_file->getPartitionValues(); + for (size_t i = 0, sz = types.size(); i < sz; ++i) + { + if (!sample_block.has(names[i])) + continue; + auto col = types[i]->createColumnConst(rows, fields[i]); + auto col_idx = sample_block.getPositionByName(names[i]); + cols.insert(cols.begin() + col_idx, col); + } + return Chunk(std::move(cols), rows); + } + private: std::unique_ptr read_buf; std::unique_ptr pipeline; @@ -275,8 +319,10 @@ private: const Names & text_input_field_names; FormatSettings format_settings; + HiveFilePtr current_file; String current_path; size_t current_idx = 0; + size_t current_file_remained_rows = 0; Poco::Logger * log = &Poco::Logger::get("StorageHive"); }; @@ -627,30 +673,6 @@ bool StorageHive::isColumnOriented() const return format_name == "Parquet" || format_name == "ORC"; } -void StorageHive::getActualColumnsToRead(Block & sample_block, const Block & header_block, const NameSet & partition_columns) const -{ - if (!isColumnOriented()) - sample_block = header_block; - UInt32 erased_columns = 0; - for (const auto & column : partition_columns) - { - if (sample_block.has(column)) - erased_columns++; - } - if (erased_columns == sample_block.columns()) - { - for (size_t i = 0; i < header_block.columns(); ++i) - { - const auto & col = header_block.getByPosition(i); - if (!partition_columns.count(col.name)) - { - sample_block.insert(col); - break; - } - } - } -} - Pipe StorageHive::read( const Names & column_names, const StorageSnapshotPtr & storage_snapshot, @@ -690,8 +712,6 @@ Pipe StorageHive::read( sources_info->need_file_column = true; } - getActualColumnsToRead(sample_block, header_block, NameSet{partition_names.begin(), partition_names.end()}); - if (num_streams > sources_info->hive_files.size()) num_streams = sources_info->hive_files.size(); @@ -726,6 +746,8 @@ HiveFiles StorageHive::collectHiveFiles( /// Hive files to collect HiveFiles hive_files; + Int64 hit_parttions_num = 0; + Int64 hive_max_query_partitions = context_->getSettings().max_partitions_to_read; /// Mutext to protect hive_files, which maybe appended in multiple threads std::mutex hive_files_mutex; ThreadPool pool{max_threads}; @@ -741,6 +763,11 @@ HiveFiles StorageHive::collectHiveFiles( if (!hive_files_in_partition.empty()) { std::lock_guard lock(hive_files_mutex); + hit_parttions_num += 1; + if (hive_max_query_partitions > 0 && hit_parttions_num > hive_max_query_partitions) + { + throw Exception(ErrorCodes::TOO_MANY_PARTITIONS, "Too many partitions to query for table {}.{} . Maximum number of partitions to read is limited to {}", hive_database, hive_table, hive_max_query_partitions); + } hive_files.insert(std::end(hive_files), std::begin(hive_files_in_partition), std::end(hive_files_in_partition)); } }); diff --git a/src/Storages/Hive/StorageHive.h b/src/Storages/Hive/StorageHive.h index d61bb184574..1b37a0afd15 100644 --- a/src/Storages/Hive/StorageHive.h +++ b/src/Storages/Hive/StorageHive.h @@ -117,8 +117,6 @@ private: const ContextPtr & context_, PruneLevel prune_level = PruneLevel::Max) const; - void getActualColumnsToRead(Block & sample_block, const Block & header_block, const NameSet & partition_columns) const; - void lazyInitialize(); std::optional diff --git a/src/Storages/IndicesDescription.cpp b/src/Storages/IndicesDescription.cpp index a0a1bcbce2d..68cf6dfbb28 100644 --- a/src/Storages/IndicesDescription.cpp +++ b/src/Storages/IndicesDescription.cpp @@ -19,7 +19,7 @@ namespace ErrorCodes { extern const int INCORRECT_QUERY; extern const int LOGICAL_ERROR; -}; +} IndexDescription::IndexDescription(const IndexDescription & other) : definition_ast(other.definition_ast ? other.definition_ast->clone() : nullptr) diff --git a/src/Storages/Kafka/StorageKafka.cpp b/src/Storages/Kafka/StorageKafka.cpp index 394ea73d648..2409f8dcb6e 100644 --- a/src/Storages/Kafka/StorageKafka.cpp +++ b/src/Storages/Kafka/StorageKafka.cpp @@ -418,7 +418,6 @@ ProducerBufferPtr StorageKafka::createWriteBuffer(const Block & header) { cppkafka::Configuration conf; conf.set("metadata.broker.list", brokers); - conf.set("group.id", group); conf.set("client.id", client_id); conf.set("client.software.name", VERSION_NAME); conf.set("client.software.version", VERSION_DESCRIBE); diff --git a/src/Storages/MeiliSearch/MeiliSearchColumnDescriptionFetcher.cpp b/src/Storages/MeiliSearch/MeiliSearchColumnDescriptionFetcher.cpp index e8da210edc8..0777f43aaae 100644 --- a/src/Storages/MeiliSearch/MeiliSearchColumnDescriptionFetcher.cpp +++ b/src/Storages/MeiliSearch/MeiliSearchColumnDescriptionFetcher.cpp @@ -84,4 +84,4 @@ ColumnsDescription MeiliSearchColumnDescriptionFetcher::fetchColumnsDescription( return ColumnsDescription(list); } -}; +} diff --git a/src/Storages/MeiliSearch/MeiliSearchColumnDescriptionFetcher.h b/src/Storages/MeiliSearch/MeiliSearchColumnDescriptionFetcher.h index 29d5f865d8c..19b40251d9d 100644 --- a/src/Storages/MeiliSearch/MeiliSearchColumnDescriptionFetcher.h +++ b/src/Storages/MeiliSearch/MeiliSearchColumnDescriptionFetcher.h @@ -21,4 +21,4 @@ private: MeiliSearchConnection connection; }; -}; +} diff --git a/src/Storages/MergeTree/MergeTreeIndexFullText.cpp b/src/Storages/MergeTree/MergeTreeIndexFullText.cpp index 5ecb7b537e2..b244bd489f1 100644 --- a/src/Storages/MergeTree/MergeTreeIndexFullText.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexFullText.cpp @@ -660,7 +660,7 @@ MergeTreeIndexConditionPtr MergeTreeIndexFullText::createIndexCondition( const SelectQueryInfo & query, ContextPtr context) const { return std::make_shared(query, context, index.sample_block, params, token_extractor.get()); -}; +} bool MergeTreeIndexFullText::mayBenefitFromIndexForIn(const ASTPtr & node) const { diff --git a/src/Storages/MergeTree/MergeTreeIndexMinMax.cpp b/src/Storages/MergeTree/MergeTreeIndexMinMax.cpp index 16d11fb7e33..b257a1db090 100644 --- a/src/Storages/MergeTree/MergeTreeIndexMinMax.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexMinMax.cpp @@ -196,7 +196,7 @@ MergeTreeIndexConditionPtr MergeTreeIndexMinMax::createIndexCondition( const SelectQueryInfo & query, ContextPtr context) const { return std::make_shared(index, query, context); -}; +} bool MergeTreeIndexMinMax::mayBenefitFromIndexForIn(const ASTPtr & node) const { diff --git a/src/Storages/MergeTree/MergeTreeIndexReader.cpp b/src/Storages/MergeTree/MergeTreeIndexReader.cpp index 6ce58dc2c58..732c7a82209 100644 --- a/src/Storages/MergeTree/MergeTreeIndexReader.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexReader.cpp @@ -68,7 +68,7 @@ void MergeTreeIndexReader::seek(size_t mark) MergeTreeIndexGranulePtr MergeTreeIndexReader::read() { auto granule = index->createIndexGranule(); - granule->deserializeBinary(*stream->data_buffer, version); + granule->deserializeBinary(*stream->getDataBuffer(), version); return granule; } diff --git a/src/Storages/MergeTree/MergeTreeIndexSet.cpp b/src/Storages/MergeTree/MergeTreeIndexSet.cpp index 30fb7e55a10..3c31deda823 100644 --- a/src/Storages/MergeTree/MergeTreeIndexSet.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexSet.cpp @@ -484,7 +484,7 @@ MergeTreeIndexConditionPtr MergeTreeIndexSet::createIndexCondition( const SelectQueryInfo & query, ContextPtr context) const { return std::make_shared(index.name, index.sample_block, max_rows, query, context); -}; +} bool MergeTreeIndexSet::mayBenefitFromIndexForIn(const ASTPtr &) const { diff --git a/src/Storages/MergeTree/MergeTreeIndices.h b/src/Storages/MergeTree/MergeTreeIndices.h index 984a2bb7762..d76216e1598 100644 --- a/src/Storages/MergeTree/MergeTreeIndices.h +++ b/src/Storages/MergeTree/MergeTreeIndices.h @@ -29,7 +29,7 @@ struct MergeTreeIndexFormat MergeTreeIndexVersion version; const char* extension; - operator bool() const { return version != 0; } /// NOLINT + explicit operator bool() const { return version != 0; } }; /// Stores some info about a single block of data. diff --git a/src/Storages/MergeTree/MergeTreeReaderStream.cpp b/src/Storages/MergeTree/MergeTreeReaderStream.cpp index 39ee18d6499..c1f23eab872 100644 --- a/src/Storages/MergeTree/MergeTreeReaderStream.cpp +++ b/src/Storages/MergeTree/MergeTreeReaderStream.cpp @@ -17,25 +17,35 @@ namespace ErrorCodes MergeTreeReaderStream::MergeTreeReaderStream( DiskPtr disk_, const String & path_prefix_, const String & data_file_extension_, size_t marks_count_, - const MarkRanges & all_mark_ranges, - const MergeTreeReaderSettings & settings, + const MarkRanges & all_mark_ranges_, + const MergeTreeReaderSettings & settings_, MarkCache * mark_cache_, - UncompressedCache * uncompressed_cache, size_t file_size_, + UncompressedCache * uncompressed_cache_, size_t file_size_, const MergeTreeIndexGranularityInfo * index_granularity_info_, - const ReadBufferFromFileBase::ProfileCallback & profile_callback, clockid_t clock_type, + const ReadBufferFromFileBase::ProfileCallback & profile_callback_, clockid_t clock_type_, bool is_low_cardinality_dictionary_) - : disk(std::move(disk_)) + : settings(settings_) + , profile_callback(profile_callback_) + , clock_type(clock_type_) + , all_mark_ranges(all_mark_ranges_) + , file_size(file_size_) + , uncompressed_cache(uncompressed_cache_) + , disk(std::move(disk_)) , path_prefix(path_prefix_) , data_file_extension(data_file_extension_) , is_low_cardinality_dictionary(is_low_cardinality_dictionary_) , marks_count(marks_count_) - , file_size(file_size_) , mark_cache(mark_cache_) , save_marks_in_cache(settings.save_marks_in_cache) , index_granularity_info(index_granularity_info_) , marks_loader(disk, mark_cache, index_granularity_info->getMarksFilePath(path_prefix), - marks_count, *index_granularity_info, save_marks_in_cache) + marks_count, *index_granularity_info, save_marks_in_cache) {} + +void MergeTreeReaderStream::init() { + if (initialized) + return; + initialized = true; /// Compute the size of the buffer. size_t max_mark_range_bytes = 0; size_t sum_mark_range_bytes = 0; @@ -192,6 +202,7 @@ size_t MergeTreeReaderStream::getRightOffset(size_t right_mark_non_included) void MergeTreeReaderStream::seekToMark(size_t index) { + init(); MarkInCompressedFile mark = marks_loader.getMark(index); try @@ -214,6 +225,7 @@ void MergeTreeReaderStream::seekToMark(size_t index) void MergeTreeReaderStream::seekToStart() { + init(); try { compressed_data_buffer->seek(0, 0); @@ -236,6 +248,7 @@ void MergeTreeReaderStream::adjustRightMark(size_t right_mark) * read from stream, but we must update last_right_offset only if it is bigger than * the last one to avoid redundantly cancelling prefetches. */ + init(); auto right_offset = getRightOffset(right_mark); if (!right_offset) { @@ -255,4 +268,16 @@ void MergeTreeReaderStream::adjustRightMark(size_t right_mark) } } +ReadBuffer * MergeTreeReaderStream::getDataBuffer() +{ + init(); + return data_buffer; +} + +CompressedReadBufferBase * MergeTreeReaderStream::getCompressedDataBuffer() +{ + init(); + return compressed_data_buffer; +} + } diff --git a/src/Storages/MergeTree/MergeTreeReaderStream.h b/src/Storages/MergeTree/MergeTreeReaderStream.h index f0a21ffbfa8..74922b9c236 100644 --- a/src/Storages/MergeTree/MergeTreeReaderStream.h +++ b/src/Storages/MergeTree/MergeTreeReaderStream.h @@ -1,4 +1,5 @@ #pragma once +#include #include #include #include @@ -37,12 +38,20 @@ public: */ void adjustRightMark(size_t right_mark); - ReadBuffer * data_buffer; - CompressedReadBufferBase * compressed_data_buffer; + ReadBuffer * getDataBuffer(); + CompressedReadBufferBase * getCompressedDataBuffer(); private: + void init(); size_t getRightOffset(size_t right_mark_non_included); + const MergeTreeReaderSettings settings; + const ReadBufferFromFileBase::ProfileCallback profile_callback; + clockid_t clock_type; + const MarkRanges all_mark_ranges; + size_t file_size; + UncompressedCache * uncompressed_cache; + DiskPtr disk; std::string path_prefix; std::string data_file_extension; @@ -50,10 +59,13 @@ private: bool is_low_cardinality_dictionary = false; size_t marks_count; - size_t file_size; + + ReadBuffer * data_buffer; + CompressedReadBufferBase * compressed_data_buffer; MarkCache * mark_cache; bool save_marks_in_cache; + bool initialized = false; std::optional last_right_offset; diff --git a/src/Storages/MergeTree/MergeTreeReaderWide.cpp b/src/Storages/MergeTree/MergeTreeReaderWide.cpp index 3641b0a19b0..0f5cf8de669 100644 --- a/src/Storages/MergeTree/MergeTreeReaderWide.cpp +++ b/src/Storages/MergeTree/MergeTreeReaderWide.cpp @@ -221,7 +221,7 @@ static ReadBuffer * getStream( else if (seek_to_mark) stream.seekToMark(from_mark); - return stream.data_buffer; + return stream.getDataBuffer(); } void MergeTreeReaderWide::deserializePrefix( diff --git a/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp b/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp index de99193e4d3..b3ff05a960a 100644 --- a/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp +++ b/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp @@ -123,14 +123,14 @@ static const ASTFunction * getAsTuple(const ASTPtr & node) if (const auto * func = node->as(); func && func->name == "tuple") return func; return {}; -}; +} static bool getAsTupleLiteral(const ASTPtr & node, Tuple & tuple) { if (const auto * value_tuple = node->as()) return value_tuple && value_tuple->value.tryGet(tuple); return false; -}; +} bool MergeTreeWhereOptimizer::tryAnalyzeTuple(Conditions & res, const ASTFunction * func, bool is_final) const { diff --git a/src/Storages/MergeTree/RPNBuilder.h b/src/Storages/MergeTree/RPNBuilder.h index 183808c9290..27b616dc301 100644 --- a/src/Storages/MergeTree/RPNBuilder.h +++ b/src/Storages/MergeTree/RPNBuilder.h @@ -120,4 +120,4 @@ private: }; -}; +} diff --git a/src/Storages/ProjectionsDescription.cpp b/src/Storages/ProjectionsDescription.cpp index a1bc0bd58da..5e9966a2794 100644 --- a/src/Storages/ProjectionsDescription.cpp +++ b/src/Storages/ProjectionsDescription.cpp @@ -29,7 +29,7 @@ namespace ErrorCodes extern const int ILLEGAL_PROJECTION; extern const int NOT_IMPLEMENTED; extern const int LOGICAL_ERROR; -}; +} bool ProjectionDescription::isPrimaryKeyColumnPossiblyWrappedInFunctions(const ASTPtr & node) const { diff --git a/src/Storages/StorageExecutable.cpp b/src/Storages/StorageExecutable.cpp index 4a4317c9aab..9638e5186f9 100644 --- a/src/Storages/StorageExecutable.cpp +++ b/src/Storages/StorageExecutable.cpp @@ -234,5 +234,5 @@ void registerStorageExecutable(StorageFactory & factory) }, storage_features); } -}; +} diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp index 5b191b37f5e..47e32337dfe 100644 --- a/src/Storages/StorageFile.cpp +++ b/src/Storages/StorageFile.cpp @@ -691,8 +691,7 @@ Pipe StorageFile::read( const auto get_columns_for_format = [&]() -> ColumnsDescription { if (isColumnOriented()) - return ColumnsDescription{ - storage_snapshot->getSampleBlockForColumns(column_names).getNamesAndTypesList()}; + return storage_snapshot->getDescriptionForColumns(column_names); else return storage_snapshot->metadata->getColumns(); }; diff --git a/src/Storages/StorageMaterializedView.cpp b/src/Storages/StorageMaterializedView.cpp index 171ad0bd877..99cc8a284b8 100644 --- a/src/Storages/StorageMaterializedView.cpp +++ b/src/Storages/StorageMaterializedView.cpp @@ -127,9 +127,6 @@ StorageMaterializedView::StorageMaterializedView( target_table_id = DatabaseCatalog::instance().getTable({manual_create_query->getDatabase(), manual_create_query->getTable()}, getContext())->getStorageID(); } - - if (!select.select_table_id.empty()) - DatabaseCatalog::instance().addDependency(select.select_table_id, getStorageID()); } QueryProcessingStage::Enum StorageMaterializedView::getQueryProcessingStage( @@ -400,6 +397,14 @@ void StorageMaterializedView::renameInMemory(const StorageID & new_table_id) DatabaseCatalog::instance().updateDependency(select_query.select_table_id, old_table_id, select_query.select_table_id, getStorageID()); } +void StorageMaterializedView::startup() +{ + auto metadata_snapshot = getInMemoryMetadataPtr(); + const auto & select_query = metadata_snapshot->getSelectQuery(); + if (!select_query.select_table_id.empty()) + DatabaseCatalog::instance().addDependency(select_query.select_table_id, getStorageID()); +} + void StorageMaterializedView::shutdown() { auto metadata_snapshot = getInMemoryMetadataPtr(); diff --git a/src/Storages/StorageMaterializedView.h b/src/Storages/StorageMaterializedView.h index 16817c930b2..001bf39f10f 100644 --- a/src/Storages/StorageMaterializedView.h +++ b/src/Storages/StorageMaterializedView.h @@ -69,6 +69,7 @@ public: void renameInMemory(const StorageID & new_table_id) override; + void startup() override; void shutdown() override; QueryProcessingStage::Enum diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp index 6107c1a5117..d402dce5ede 100644 --- a/src/Storages/StorageS3.cpp +++ b/src/Storages/StorageS3.cpp @@ -719,8 +719,7 @@ Pipe StorageS3::read( if (fetch_columns.empty()) fetch_columns.push_back(ExpressionActions::getSmallestColumn(storage_snapshot->metadata->getColumns().getAllPhysical())); - columns_description = ColumnsDescription{ - storage_snapshot->getSampleBlockForColumns(fetch_columns).getNamesAndTypesList()}; + columns_description = storage_snapshot->getDescriptionForColumns(fetch_columns); block_for_format = storage_snapshot->getSampleBlockForColumns(columns_description.getNamesOfPhysical()); } else diff --git a/src/Storages/StorageSnapshot.cpp b/src/Storages/StorageSnapshot.cpp index a4b64c798f3..07c4c794210 100644 --- a/src/Storages/StorageSnapshot.cpp +++ b/src/Storages/StorageSnapshot.cpp @@ -92,32 +92,40 @@ NameAndTypePair StorageSnapshot::getColumn(const GetColumnsOptions & options, co Block StorageSnapshot::getSampleBlockForColumns(const Names & column_names) const { Block res; + auto columns_description = getDescriptionForColumns(column_names); + for (const auto & column : columns_description) + res.insert({column.type->createColumn(), column.type, column.name}); + return res; +} + +ColumnsDescription StorageSnapshot::getDescriptionForColumns(const Names & column_names) const +{ + ColumnsDescription res; const auto & columns = getMetadataForQuery()->getColumns(); for (const auto & name : column_names) { - auto column = columns.tryGetColumnOrSubcolumn(GetColumnsOptions::All, name); - auto object_column = object_columns.tryGetColumnOrSubcolumn(GetColumnsOptions::All, name); - + auto column = columns.tryGetColumnOrSubcolumnDescription(GetColumnsOptions::All, name); + auto object_column = object_columns.tryGetColumnOrSubcolumnDescription(GetColumnsOptions::All, name); if (column && !object_column) { - res.insert({column->type->createColumn(), column->type, column->name}); + res.add(*column, "", false, false); } else if (object_column) { - res.insert({object_column->type->createColumn(), object_column->type, object_column->name}); + res.add(*object_column, "", false, false); } else if (auto it = virtual_columns.find(name); it != virtual_columns.end()) { /// Virtual columns must be appended after ordinary, because user can /// override them. const auto & type = it->second; - res.insert({type->createColumn(), type, name}); + res.add({name, type}); } else { throw Exception(ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK, - "Column {} not found in table {}", backQuote(name), storage.getStorageID().getNameForLogs()); + "Column {} not found in table {}", backQuote(name), storage.getStorageID().getNameForLogs()); } } diff --git a/src/Storages/StorageSnapshot.h b/src/Storages/StorageSnapshot.h index 909f4fd5cab..5b76a4b37e5 100644 --- a/src/Storages/StorageSnapshot.h +++ b/src/Storages/StorageSnapshot.h @@ -68,6 +68,8 @@ struct StorageSnapshot /// Block with ordinary + materialized + aliases + virtuals + subcolumns. Block getSampleBlockForColumns(const Names & column_names) const; + ColumnsDescription getDescriptionForColumns(const Names & column_names) const; + /// Verify that all the requested names are in the table and are set correctly: /// list of names is not empty and the names do not repeat. void check(const Names & column_names) const; diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp index 03bd1d5e7d9..0db4fa75aba 100644 --- a/src/Storages/StorageURL.cpp +++ b/src/Storages/StorageURL.cpp @@ -602,8 +602,7 @@ Pipe IStorageURLBase::read( Block block_for_format; if (isColumnOriented()) { - columns_description = ColumnsDescription{ - storage_snapshot->getSampleBlockForColumns(column_names).getNamesAndTypesList()}; + columns_description = storage_snapshot->getDescriptionForColumns(column_names); block_for_format = storage_snapshot->getSampleBlockForColumns(columns_description.getNamesOfPhysical()); } else @@ -690,8 +689,7 @@ Pipe StorageURLWithFailover::read( Block block_for_format; if (isColumnOriented()) { - columns_description = ColumnsDescription{ - storage_snapshot->getSampleBlockForColumns(column_names).getNamesAndTypesList()}; + columns_description = storage_snapshot->getDescriptionForColumns(column_names); block_for_format = storage_snapshot->getSampleBlockForColumns(columns_description.getNamesOfPhysical()); } else diff --git a/src/Storages/System/StorageSystemCertificates.cpp b/src/Storages/System/StorageSystemCertificates.cpp new file mode 100644 index 00000000000..6e37046e2b4 --- /dev/null +++ b/src/Storages/System/StorageSystemCertificates.cpp @@ -0,0 +1,215 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include "Poco/File.h" +#if USE_SSL + #include + #include "Poco/Net/SSLManager.h" + #include "Poco/Crypto/X509Certificate.h" +#endif + +namespace DB +{ + +NamesAndTypesList StorageSystemCertificates::getNamesAndTypes() +{ + return + { + {"version", std::make_shared>()}, + {"serial_number", std::make_shared(std::make_shared())}, + {"signature_algo", std::make_shared(std::make_shared())}, + {"issuer", std::make_shared(std::make_shared())}, + {"not_before", std::make_shared(std::make_shared())}, + {"not_after", std::make_shared(std::make_shared())}, + {"subject", std::make_shared(std::make_shared())}, + {"pkey_algo", std::make_shared(std::make_shared())}, + {"path", std::make_shared()}, + {"default", std::make_shared>()} + }; +} + +#if USE_SSL + +static std::unordered_set parse_dir(const std::string & dir) +{ + std::unordered_set ret; + boost::split(ret, dir, boost::is_any_of(":"), boost::token_compress_on); + return ret; +} + +static void populateTable(const X509 * cert, MutableColumns & res_columns, const std::string & path, bool def) +{ + BIO * b = BIO_new(BIO_s_mem()); + SCOPE_EXIT( + { + BIO_free(b); + }); + size_t col = 0; + + res_columns[col++]->insert(X509_get_version(cert) + 1); + + { + char buf[1024] = {0}; + const ASN1_INTEGER * sn = cert->cert_info->serialNumber; + BIGNUM * bnsn = ASN1_INTEGER_to_BN(sn, nullptr); + SCOPE_EXIT( + { + BN_free(bnsn); + }); + if (BN_print(b, bnsn) > 0 && BIO_read(b, buf, sizeof(buf)) > 0) + res_columns[col]->insert(buf); + else + res_columns[col]->insertDefault(); + } + ++col; + + { + const ASN1_BIT_STRING *sig = nullptr; + const X509_ALGOR *al = nullptr; + char buf[1024] = {0}; + X509_get0_signature(&sig, &al, cert); + if (al) + { + OBJ_obj2txt(buf, sizeof(buf), al->algorithm, 0); + res_columns[col]->insert(buf); + } + else + res_columns[col]->insertDefault(); + } + ++col; + + char * issuer = X509_NAME_oneline(cert->cert_info->issuer, nullptr, 0); + if (issuer) + { + SCOPE_EXIT( + { + OPENSSL_free(issuer); + }); + res_columns[col]->insert(issuer); + } + else + res_columns[col]->insertDefault(); + ++col; + + { + char buf[1024] = {0}; + if (ASN1_TIME_print(b, X509_get_notBefore(cert)) && BIO_read(b, buf, sizeof(buf)) > 0) + res_columns[col]->insert(buf); + else + res_columns[col]->insertDefault(); + } + ++col; + + { + char buf[1024] = {0}; + if (ASN1_TIME_print(b, X509_get_notAfter(cert)) && BIO_read(b, buf, sizeof(buf)) > 0) + res_columns[col]->insert(buf); + else + res_columns[col]->insertDefault(); + } + ++col; + + char * subject = X509_NAME_oneline(cert->cert_info->subject, nullptr, 0); + if (subject) + { + SCOPE_EXIT( + { + OPENSSL_free(subject); + }); + res_columns[col]->insert(subject); + } + else + res_columns[col]->insertDefault(); + ++col; + + if (X509_PUBKEY * pkey = X509_get_X509_PUBKEY(cert)) + { + char buf[1024] = {0}; + ASN1_OBJECT *ppkalg = nullptr; + const unsigned char *pk = nullptr; + int ppklen = 0; + X509_ALGOR *pa = nullptr; + if (X509_PUBKEY_get0_param(&ppkalg, &pk, &ppklen, &pa, pkey) && + i2a_ASN1_OBJECT(b, ppkalg) > 0 && BIO_read(b, buf, sizeof(buf)) > 0) + res_columns[col]->insert(buf); + else + res_columns[col]->insertDefault(); + } + else + res_columns[col]->insertDefault(); + ++col; + + res_columns[col++]->insert(path); + res_columns[col++]->insert(def); +} + +static void enumCertificates(const std::string & dir, bool def, MutableColumns & res_columns) +{ + static const RE2 cert_name("^[a-fA-F0-9]{8}\\.\\d$"); + assert(cert_name.ok()); + + const std::filesystem::path p(dir); + + for (auto const& dir_entry : std::filesystem::directory_iterator(p)) + { + if (!dir_entry.is_regular_file() || !RE2::FullMatch(dir_entry.path().filename().string(), cert_name)) + continue; + + Poco::Crypto::X509Certificate cert(dir_entry.path()); + populateTable(cert.certificate(), res_columns, dir_entry.path(), def); + } +} + +#endif + +void StorageSystemCertificates::fillData([[maybe_unused]] MutableColumns & res_columns, ContextPtr/* context*/, const SelectQueryInfo &) const +{ +#if USE_SSL + const auto & ca_paths = Poco::Net::SSLManager::instance().defaultServerContext()->getCAPaths(); + + if (!ca_paths.caLocation.empty()) + { + Poco::File afile(ca_paths.caLocation); + if (afile.exists()) + { + if (afile.isDirectory()) + { + auto dir_set = parse_dir(ca_paths.caLocation); + for (const auto & entry : dir_set) + enumCertificates(entry, false, res_columns); + } + else + { + auto certs = Poco::Crypto::X509Certificate::readPEM(afile.path()); + for (const auto & cert : certs) + populateTable(cert.certificate(), res_columns, afile.path(), false); + } + } + } + + if (!ca_paths.caDefaultDir.empty()) + { + auto dir_set = parse_dir(ca_paths.caDefaultDir); + for (const auto & entry : dir_set) + enumCertificates(entry, true, res_columns); + } + + if (!ca_paths.caDefaultFile.empty()) + { + Poco::File afile(ca_paths.caDefaultFile); + if (afile.exists()) + { + auto certs = Poco::Crypto::X509Certificate::readPEM(ca_paths.caDefaultFile); + for (const auto & cert : certs) + populateTable(cert.certificate(), res_columns, ca_paths.caDefaultFile, true); + } + } +#endif +} + +} diff --git a/src/Storages/System/StorageSystemCertificates.h b/src/Storages/System/StorageSystemCertificates.h new file mode 100644 index 00000000000..4df0010d0b4 --- /dev/null +++ b/src/Storages/System/StorageSystemCertificates.h @@ -0,0 +1,29 @@ +#pragma once + +#include + + +namespace DB +{ + +class Context; +class Cluster; + +/** Implements system table 'certificates' + * that allows to obtain information about available certificates + * and their sources. + */ +class StorageSystemCertificates final : public IStorageSystemOneBlock +{ +public: + std::string getName() const override { return "SystemCertificates"; } + + static NamesAndTypesList getNamesAndTypes(); + +protected: + using IStorageSystemOneBlock::IStorageSystemOneBlock; + + void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const override; +}; + +} diff --git a/src/Storages/System/StorageSystemContributors.generated.cpp b/src/Storages/System/StorageSystemContributors.generated.cpp index 51923397ede..42a0f24cc65 100644 --- a/src/Storages/System/StorageSystemContributors.generated.cpp +++ b/src/Storages/System/StorageSystemContributors.generated.cpp @@ -19,6 +19,7 @@ const char * auto_contributors[] { "Albert Kidrachev", "Alberto", "Aleksandr Karo", + "Aleksandr Razumov", "Aleksandr Shalimov", "Aleksandra (Ася)", "Aleksandrov Vladimir", @@ -179,6 +180,7 @@ const char * auto_contributors[] { "Boris Granveaud", "Boris Kuschel", "Bowen Masco", + "Brandon", "Braulio Valdivielso", "Brendan Cox", "Brett Hoerner", @@ -205,6 +207,7 @@ const char * auto_contributors[] { "CurtizJ", "DF5HSE", "DIAOZHAFENG", + "Dale McDiarmid", "Dan Roscigno", "Daniel Bershatsky", "Daniel Dao", @@ -392,6 +395,7 @@ const char * auto_contributors[] { "João Figueiredo", "Julian Gilyadov", "Julian Zhou", + "Julio Jimenez", "Justin Hilliard", "Kang Liu", "Karl Pietrzak", @@ -425,6 +429,7 @@ const char * auto_contributors[] { "LAL2211", "LB", "LIJINGBO", + "Ladislav Snizek", "Larry Luo", "Lars Eidnes", "Latysheva Alexandra", @@ -451,6 +456,7 @@ const char * auto_contributors[] { "Maksim Kita", "Malte", "Marat IDRISOV", + "Marcelo Rodriguez", "Marek Vavrusa", "Marek Vavruša", "Marek Vavruša", @@ -510,6 +516,7 @@ const char * auto_contributors[] { "Mike Kot", "Mikhail", "Mikhail Andreev", + "Mikhail Artemenko", "Mikhail Cheshkov", "Mikhail Fandyushin", "Mikhail Filimonov", @@ -615,6 +622,7 @@ const char * auto_contributors[] { "Philippe Ombredanne", "Potya", "Pradeep Chhetri", + "Prashant Shahi", "Pxl", "Pysaoke", "Quid37", @@ -652,6 +660,7 @@ const char * auto_contributors[] { "Russ Frank", "Ruzal Ibragimov", "Ryad ZENINE", + "Ryadh DAHIMENE", "S.M.A. Djawadi", "Saad Ur Rahman", "Sabyanin Maxim", @@ -661,6 +670,7 @@ const char * auto_contributors[] { "Samuel Chou", "Saulius Valatka", "Sean Haynes", + "Sean Lafferty", "Serg Kulakov", "Serge Rider", "Sergei Bocharov", @@ -677,6 +687,7 @@ const char * auto_contributors[] { "Sergey Mirvoda", "Sergey Ryzhkov", "Sergey Shtykov", + "Sergey Tulentsev", "Sergey V. Galtsev", "Sergey Zaikin", "Sergi Almacellas Abellana", @@ -727,6 +738,7 @@ const char * auto_contributors[] { "The-Alchemist", "Thom O'Connor", "Thomas Berdy", + "Tian Xinhui", "Tiaonmmn", "Tigran Khudaverdyan", "Timur Magomedov", @@ -804,11 +816,13 @@ const char * auto_contributors[] { "Weiqing Xu", "William Shallum", "Winter Zhang", + "XenoAmess", "Xianda Ke", "Xiang Zhou", "Xin Wang", "Xudong Zhang", "Y Lu", + "Yakko Majuri", "Yakov Olkhovskiy", "Yangkuan Liu", "Yatian Xu", @@ -821,6 +835,7 @@ const char * auto_contributors[] { "Yiğit Konur", "Yohann Jardin", "Yong Wang", + "Yong-Hao Zou", "Youenn Lebras", "Yuntao Wu", "Yuri Dyachenko", @@ -884,6 +899,7 @@ const char * auto_contributors[] { "benbiti", "bgranvea", "bharatnc", + "bkuschel", "blazerer", "bluebirddm", "bo zeng", @@ -936,6 +952,7 @@ const char * auto_contributors[] { "dmi-feo", "dmitrii", "dmitriiut", + "dmitriy", "dmitry kuzmin", "dongyifeng", "eaxdev", @@ -986,9 +1003,13 @@ const char * auto_contributors[] { "grantovsky", "gulige", "guoleiyi", + "guomaolin", + "guov100", + "guykohen", "gyuton", "hanqf-git", "hao.he", + "hardstep33", "hchen9", "hcz", "heleihelei", @@ -997,6 +1018,7 @@ const char * auto_contributors[] { "hermano", "hexiaoting", "hhell", + "homeward", "hotid", "huangzhaowei", "hustnn", @@ -1025,6 +1047,7 @@ const char * auto_contributors[] { "jennyma", "jetgm", "jewisliu", + "jiahui-97", "jianmei zhang", "jkuklis", "jus1096", @@ -1045,6 +1068,7 @@ const char * auto_contributors[] { "l", "l1tsolaiki", "lalex", + "lanfz", "larryluogit", "laurieliyang", "lehasm", @@ -1054,6 +1078,7 @@ const char * auto_contributors[] { "levushkin aleksej", "levysh", "lgbo", + "lgbo-usstc", "lgbo-ustc", "lhuang0928", "lhuang09287750", @@ -1066,6 +1091,7 @@ const char * auto_contributors[] { "listar", "litao91", "liu-bov", + "liumaojing", "liuneng1994", "liuyangkuan", "liuyimin", @@ -1120,8 +1146,10 @@ const char * auto_contributors[] { "nagorny", "nauta", "nautaa", + "ndchikin", "neng.liu", "never lee", + "ni1l", "nicelulu", "nickzhwang", "nikitamikhaylov", @@ -1134,6 +1162,7 @@ const char * auto_contributors[] { "ogorbacheva", "olegkv", "olevino", + "olevino999", "olgarev", "orantius", "p0ny", @@ -1205,6 +1234,7 @@ const char * auto_contributors[] { "tangjiangling", "tao jiang", "tavplubix", + "tchepavel", "tcoyvwac", "tekeri", "templarzq", @@ -1237,10 +1267,12 @@ const char * auto_contributors[] { "vzakaznikov", "wangchao", "weeds085490", + "wuxiaobai24", "wzl", "xPoSx", "xiedeyantu", "xinhuitian", + "yakkomajuri", "yakov-olkhovskiy", "yandd", "yang", @@ -1276,6 +1308,7 @@ const char * auto_contributors[] { "zhukai", "zkun", "zlx19950903", + "zombee0", "zvonand", "zvrr", "zvvr", @@ -1296,6 +1329,7 @@ const char * auto_contributors[] { "何李夫", "凌涛", "吴健", + "小蝌蚪", "小路", "张中南", "张健", diff --git a/src/Storages/System/StorageSystemPartsBase.h b/src/Storages/System/StorageSystemPartsBase.h index 477261ad7ad..36c0fd551df 100644 --- a/src/Storages/System/StorageSystemPartsBase.h +++ b/src/Storages/System/StorageSystemPartsBase.h @@ -22,7 +22,7 @@ struct StoragesInfo bool need_inactive_parts = false; MergeTreeData * data = nullptr; - operator bool() const { return storage != nullptr; } /// NOLINT + explicit operator bool() const { return storage != nullptr; } MergeTreeData::DataPartsVector getParts(MergeTreeData::DataPartStateVector & state, bool has_state_column, bool require_projection_parts = false) const; }; diff --git a/src/Storages/System/attachSystemTables.cpp b/src/Storages/System/attachSystemTables.cpp index f8940de889a..a86a04c4444 100644 --- a/src/Storages/System/attachSystemTables.cpp +++ b/src/Storages/System/attachSystemTables.cpp @@ -72,6 +72,7 @@ #include #include #include +#include #ifdef OS_LINUX #include @@ -168,6 +169,7 @@ void attachSystemTablesServer(ContextPtr context, IDatabase & system_database, b attach(context, system_database, "asynchronous_inserts"); attach(context, system_database, "filesystem_cache"); attach(context, system_database, "remote_data_paths"); + attach(context, system_database, "certificates"); if (has_zookeeper) attach(context, system_database, "zookeeper"); diff --git a/src/Storages/WindowView/StorageWindowView.cpp b/src/Storages/WindowView/StorageWindowView.cpp index fe78b1e7f7b..a44b8954e3c 100644 --- a/src/Storages/WindowView/StorageWindowView.cpp +++ b/src/Storages/WindowView/StorageWindowView.cpp @@ -13,15 +13,18 @@ #include #include #include +#include #include #include #include +#include #include #include #include #include #include #include +#include #include #include #include @@ -44,8 +47,10 @@ #include #include #include +#include #include #include +#include #include #include @@ -70,6 +75,7 @@ namespace ErrorCodes extern const int QUERY_IS_NOT_SUPPORTED_IN_WINDOW_VIEW; extern const int SUPPORT_IS_DISABLED; extern const int TABLE_WAS_NOT_DROPPED; + extern const int NOT_IMPLEMENTED; } namespace @@ -266,40 +272,13 @@ namespace } }; - IntervalKind strToIntervalKind(const String& interval_str) - { - if (interval_str == "Nanosecond") - return IntervalKind::Nanosecond; - else if (interval_str == "Microsecond") - return IntervalKind::Microsecond; - else if (interval_str == "Millisecond") - return IntervalKind::Millisecond; - else if (interval_str == "Second") - return IntervalKind::Second; - else if (interval_str == "Minute") - return IntervalKind::Minute; - else if (interval_str == "Hour") - return IntervalKind::Hour; - else if (interval_str == "Day") - return IntervalKind::Day; - else if (interval_str == "Week") - return IntervalKind::Week; - else if (interval_str == "Month") - return IntervalKind::Month; - else if (interval_str == "Quarter") - return IntervalKind::Quarter; - else if (interval_str == "Year") - return IntervalKind::Year; - __builtin_unreachable(); - } - void extractWindowArgument(const ASTPtr & ast, IntervalKind::Kind & kind, Int64 & num_units, String err_msg) { const auto * arg = ast->as(); - if (!arg || !startsWith(arg->name, "toInterval")) + if (!arg || !startsWith(arg->name, "toInterval") + || !IntervalKind::tryParseString(Poco::toLower(arg->name.substr(10)), kind)) throw Exception(err_msg, ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - kind = strToIntervalKind(arg->name.substr(10)); const auto * interval_unit = arg->children.front()->children.front()->as(); if (!interval_unit || (interval_unit->value.getType() != Field::Types::String @@ -349,6 +328,37 @@ namespace String getName() const override { return "AddingAggregatedChunkInfoTransform"; } }; + + String generateInnerTableName(const StorageID & storage_id) + { + if (storage_id.hasUUID()) + return ".inner." + toString(storage_id.uuid); + return ".inner." + storage_id.getTableName(); + } + + String generateTargetTableName(const StorageID & storage_id) + { + if (storage_id.hasUUID()) + return ".inner.target." + toString(storage_id.uuid); + return ".inner.target." + storage_id.table_name; + } + + ASTPtr generateInnerFetchQuery(StorageID inner_table_id) + { + auto fetch_query = std::make_shared(); + auto select = std::make_shared(); + select->children.push_back(std::make_shared()); + fetch_query->setExpression(ASTSelectQuery::Expression::SELECT, select); + fetch_query->setExpression(ASTSelectQuery::Expression::TABLES, std::make_shared()); + auto tables_elem = std::make_shared(); + auto table_expr = std::make_shared(); + fetch_query->tables()->children.push_back(tables_elem); + tables_elem->table_expression = table_expr; + tables_elem->children.push_back(table_expr); + table_expr->database_and_table_name = std::make_shared(inner_table_id); + table_expr->children.push_back(table_expr->database_and_table_name); + return fetch_query; + } } static void extractDependentTable(ContextPtr context, ASTPtr & query, String & select_database_name, String & select_table_name) @@ -407,10 +417,10 @@ UInt32 StorageWindowView::getCleanupBound() ASTPtr StorageWindowView::getCleanupQuery() { - ASTPtr function_equal; - function_equal = makeASTFunction( + ASTPtr function_less; + function_less= makeASTFunction( "less", - std::make_shared(window_id_name), + std::make_shared(inner_window_id_column_name), std::make_shared(getCleanupBound())); auto alter_query = std::make_shared(); @@ -422,7 +432,7 @@ ASTPtr StorageWindowView::getCleanupQuery() auto alter_command = std::make_shared(); alter_command->type = ASTAlterCommand::DELETE; - alter_command->predicate = function_equal; + alter_command->predicate = function_less; alter_command->children.push_back(alter_command->predicate); alter_query->command_list->children.push_back(alter_command); return alter_query; @@ -442,28 +452,124 @@ bool StorageWindowView::optimize( const Names & deduplicate_by_columns, ContextPtr local_context) { - auto storage_ptr = getInnerStorage(); + auto storage_ptr = getInnerTable(); auto metadata_snapshot = storage_ptr->getInMemoryMetadataPtr(); - return getInnerStorage()->optimize(query, metadata_snapshot, partition, final, deduplicate, deduplicate_by_columns, local_context); + return getInnerTable()->optimize(query, metadata_snapshot, partition, final, deduplicate, deduplicate_by_columns, local_context); +} + +void StorageWindowView::alter( + const AlterCommands & params, + ContextPtr local_context, + AlterLockHolder &) +{ + auto table_id = getStorageID(); + StorageInMemoryMetadata new_metadata = getInMemoryMetadata(); + StorageInMemoryMetadata old_metadata = getInMemoryMetadata(); + params.apply(new_metadata, local_context); + + const auto & new_select = new_metadata.select; + const auto & new_select_query = new_metadata.select.inner_query; + + modifying_query = true; + SCOPE_EXIT({ + modifying_query = false; + }); + + shutdown(); + + auto inner_query = initInnerQuery(new_select_query->as(), local_context); + + dropInnerTableIfAny(true, local_context); + + /// create inner table + std::exchange(has_inner_table, true); + auto create_context = Context::createCopy(local_context); + auto inner_create_query = getInnerTableCreateQuery(inner_query, inner_table_id); + InterpreterCreateQuery create_interpreter(inner_create_query, create_context); + create_interpreter.setInternal(true); + create_interpreter.execute(); + + DatabaseCatalog::instance().addDependency(select_table_id, table_id); + + shutdown_called = false; + + clean_cache_task = getContext()->getSchedulePool().createTask(getStorageID().getFullTableName(), [this] { threadFuncCleanup(); }); + fire_task = getContext()->getSchedulePool().createTask( + getStorageID().getFullTableName(), [this] { is_proctime ? threadFuncFireProc() : threadFuncFireEvent(); }); + clean_cache_task->deactivate(); + fire_task->deactivate(); + + new_metadata.setSelectQuery(new_select); + + DatabaseCatalog::instance().getDatabase(table_id.database_name)->alterTable(local_context, table_id, new_metadata); + setInMemoryMetadata(new_metadata); + + startup(); +} + +void StorageWindowView::checkAlterIsPossible(const AlterCommands & commands, ContextPtr /*local_context*/) const +{ + for (const auto & command : commands) + { + if (!command.isCommentAlter() && command.type != AlterCommand::MODIFY_QUERY) + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Alter of type '{}' is not supported by storage {}", command.type, getName()); + } } std::pair StorageWindowView::getNewBlocks(UInt32 watermark) { UInt32 w_start = addTime(watermark, window_kind, -window_num_units, *time_zone); + auto inner_table = getInnerTable(); InterpreterSelectQuery fetch( - getFetchColumnQuery(w_start, watermark), + inner_fetch_query, getContext(), - getInnerStorage(), - nullptr, + inner_table, + inner_table->getInMemoryMetadataPtr(), SelectQueryOptions(QueryProcessingStage::FetchColumns)); auto builder = fetch.buildQueryPipeline(); + ASTPtr filter_function; + if (is_tumble) + { + /// SELECT * FROM inner_table WHERE window_id_name == w_end + /// (because we fire at the end of windows) + filter_function = makeASTFunction("equals", std::make_shared(inner_window_id_column_name), std::make_shared(watermark)); + } + else + { + auto func_array = makeASTFunction("array"); + auto w_end = watermark; + while (w_start < w_end) + { + /// slice_num_units = std::gcd(hop_num_units, window_num_units); + /// We use std::gcd(hop_num_units, window_num_units) as the new window size + /// to split the overlapped windows into non-overlapped. + /// For a hopping window with window_size=3 slice=1, the windows might be + /// [1,3],[2,4],[3,5], which will cause recomputation. + /// In this case, the slice_num_units will be `gcd(1,3)=1' and the non-overlapped + /// windows will split into [1], [2], [3]... We compute each split window into + /// mergeable state and merge them when the window is triggering. + func_array ->arguments->children.push_back(std::make_shared(w_end)); + w_end = addTime(w_end, window_kind, -slice_num_units, *time_zone); + } + filter_function = makeASTFunction("has", func_array, std::make_shared(inner_window_id_column_name)); + } + + auto syntax_result = TreeRewriter(getContext()).analyze(filter_function, builder.getHeader().getNamesAndTypesList()); + auto filter_expression = ExpressionAnalyzer(filter_function, syntax_result, getContext()).getActionsDAG(false); + + builder.addSimpleTransform([&](const Block & header) + { + return std::make_shared( + header, std::make_shared(filter_expression), filter_function->getColumnName(), true); + }); + /// Adding window column DataTypes window_column_type{std::make_shared(), std::make_shared()}; ColumnWithTypeAndName column; - column.name = window_column_name; + column.name = inner_window_column_name; column.type = std::make_shared(std::move(window_column_type)); column.column = column.type->createColumnConst(0, Tuple{w_start, watermark}); auto adding_column_dag = ActionsDAG::makeAddingColumnActions(std::move(column)); @@ -476,7 +582,7 @@ std::pair StorageWindowView::getNewBlocks(UInt32 watermark) /// Removing window id column auto new_header = builder.getHeader(); - new_header.erase(window_id_name); + new_header.erase(inner_window_id_column_name); auto convert_actions_dag = ActionsDAG::makeConvertingActions( builder.getHeader().getColumnsWithTypeAndName(), new_header.getColumnsWithTypeAndName(), @@ -499,8 +605,8 @@ std::pair StorageWindowView::getNewBlocks(UInt32 watermark) auto creator = [&](const StorageID & blocks_id_global) { - auto parent_table_metadata = getParentStorage()->getInMemoryMetadataPtr(); - auto required_columns = parent_table_metadata->getColumns(); + auto source_table_metadata = getSourceTable()->getInMemoryMetadataPtr(); + auto required_columns = source_table_metadata->getColumns(); required_columns.add(ColumnDescription("____timestamp", std::make_shared())); return StorageBlocks::createStorage(blocks_id_global, required_columns, std::move(pipes), QueryProcessingStage::WithMergeableState); }; @@ -555,10 +661,18 @@ inline void StorageWindowView::fire(UInt32 watermark) BlocksPtr blocks; Block header; + try { std::lock_guard lock(mutex); std::tie(blocks, header) = getNewBlocks(watermark); } + catch (...) + { + tryLogCurrentException(__PRETTY_FUNCTION__); + } + + if (!blocks || blocks->empty()) + return; for (const auto & block : *blocks) { @@ -569,9 +683,10 @@ inline void StorageWindowView::fire(UInt32 watermark) } fire_condition.notify_all(); } + if (!target_table_id.empty()) { - StoragePtr target_table = getTargetStorage(); + StoragePtr target_table = getTargetTable(); auto insert = std::make_shared(); insert->table_id = target_table->getStorageID(); InterpreterInsertQuery interpreter(insert, getContext()); @@ -596,13 +711,60 @@ inline void StorageWindowView::fire(UInt32 watermark) } } -std::shared_ptr StorageWindowView::getInnerTableCreateQuery( - const ASTPtr & inner_query, ASTStorage * storage, const String & database_name, const String & table_name) +ASTPtr StorageWindowView::getSourceTableSelectQuery() +{ + auto query = select_query->clone(); + auto & modified_select = query->as(); + + if (hasJoin(modified_select)) + { + auto analyzer_res = TreeRewriterResult({}); + removeJoin(modified_select, analyzer_res, getContext()); + } + else + { + modified_select.setExpression(ASTSelectQuery::Expression::HAVING, {}); + modified_select.setExpression(ASTSelectQuery::Expression::GROUP_BY, {}); + } + + auto select_list = std::make_shared(); + for (const auto & column_name : getInputHeader().getNames()) + select_list->children.emplace_back(std::make_shared(column_name)); + modified_select.setExpression(ASTSelectQuery::Expression::SELECT, select_list); + + if (!is_time_column_func_now) + { + auto query = select_query->clone(); + DropTableIdentifierMatcher::Data drop_table_identifier_data; + DropTableIdentifierMatcher::Visitor drop_table_identifier_visitor(drop_table_identifier_data); + drop_table_identifier_visitor.visit(query); + + FetchQueryInfoMatcher::Data query_info_data; + FetchQueryInfoMatcher::Visitor(query_info_data).visit(query); + + auto order_by = std::make_shared(); + auto order_by_elem = std::make_shared(); + order_by_elem->children.push_back(std::make_shared(query_info_data.timestamp_column_name)); + order_by_elem->direction = 1; + order_by->children.push_back(order_by_elem); + modified_select.setExpression(ASTSelectQuery::Expression::ORDER_BY, std::move(order_by)); + } + else + modified_select.setExpression(ASTSelectQuery::Expression::ORDER_BY, {}); + + const auto select_with_union_query = std::make_shared(); + select_with_union_query->list_of_selects = std::make_shared(); + select_with_union_query->list_of_selects->children.push_back(query); + + return select_with_union_query; +} + +ASTPtr StorageWindowView::getInnerTableCreateQuery(const ASTPtr & inner_query, const StorageID & inner_table_id) { /// We will create a query to create an internal table. auto inner_create_query = std::make_shared(); - inner_create_query->setDatabase(database_name); - inner_create_query->setTable(table_name); + inner_create_query->setDatabase(inner_table_id.getDatabaseName()); + inner_create_query->setTable(inner_table_id.getTableName()); Aliases aliases; QueryAliasesVisitor(aliases).visit(inner_query); @@ -618,14 +780,13 @@ std::shared_ptr StorageWindowView::getInnerTableCreateQuery( auto columns_list = std::make_shared(); - String window_id_column_name; if (is_time_column_func_now) { auto column_window = std::make_shared(); column_window->name = window_id_name; column_window->type = std::make_shared("UInt32"); columns_list->children.push_back(column_window); - window_id_column_name = window_id_name; + inner_window_id_column_name = window_id_name; } for (const auto & column : t_sample_block.getColumnsWithTypeAndName()) @@ -637,17 +798,19 @@ std::shared_ptr StorageWindowView::getInnerTableCreateQuery( column_dec->name = column.name; column_dec->type = ast; columns_list->children.push_back(column_dec); - if (!is_time_column_func_now && window_id_column_name.empty() && startsWith(column.name, "windowID")) + if (!is_time_column_func_now && inner_window_id_column_name.empty() && startsWith(column.name, "windowID")) { - window_id_column_name = column.name; + inner_window_id_column_name = column.name; } } - if (window_id_column_name.empty()) + if (inner_window_id_column_name.empty()) throw Exception( "The first argument of time window function should not be a constant value.", ErrorCodes::QUERY_IS_NOT_SUPPORTED_IN_WINDOW_VIEW); + inner_window_column_name = std::regex_replace(inner_window_id_column_name, std::regex("windowID"), is_tumble ? "tumble" : "hop"); + ToIdentifierMatcher::Data query_data; query_data.window_id_name = window_id_name; query_data.window_id_alias = window_id_alias; @@ -680,33 +843,32 @@ std::shared_ptr StorageWindowView::getInnerTableCreateQuery( }; auto new_storage = std::make_shared(); - /// storage != nullptr in case create window view with ENGINE syntax - if (storage) + /// inner_storage_engine != nullptr in case create window view with ENGINE syntax + if (inner_table_engine) { - new_storage->set(new_storage->engine, storage->engine->clone()); + auto storage = inner_table_engine->as(); - if (storage->ttl_table) + if (storage.ttl_table) throw Exception( ErrorCodes::QUERY_IS_NOT_SUPPORTED_IN_WINDOW_VIEW, "TTL is not supported for inner table in Window View"); - if (!endsWith(storage->engine->name, "MergeTree")) - throw Exception( - ErrorCodes::INCORRECT_QUERY, - "The ENGINE of WindowView must be MergeTree family of table engines " - "including the engines with replication support"); + new_storage->set(new_storage->engine, storage.engine->clone()); - if (storage->partition_by) - new_storage->set(new_storage->partition_by, visit(storage->partition_by)); - if (storage->primary_key) - new_storage->set(new_storage->primary_key, visit(storage->primary_key)); - if (storage->order_by) - new_storage->set(new_storage->order_by, visit(storage->order_by)); - if (storage->sample_by) - new_storage->set(new_storage->sample_by, visit(storage->sample_by)); + if (endsWith(storage.engine->name, "MergeTree")) + { + if (storage.partition_by) + new_storage->set(new_storage->partition_by, visit(storage.partition_by)); + if (storage.primary_key) + new_storage->set(new_storage->primary_key, visit(storage.primary_key)); + if (storage.order_by) + new_storage->set(new_storage->order_by, visit(storage.order_by)); + if (storage.sample_by) + new_storage->set(new_storage->sample_by, visit(storage.sample_by)); - if (storage->settings) - new_storage->set(new_storage->settings, storage->settings->clone()); + if (storage.settings) + new_storage->set(new_storage->settings, storage.settings->clone()); + } } else { @@ -816,11 +978,6 @@ void StorageWindowView::updateMaxTimestamp(UInt32 timestamp) void StorageWindowView::updateMaxWatermark(UInt32 watermark) { std::lock_guard lock(fire_signal_mutex); - if (max_watermark == 0) - { - max_watermark = getWindowUpperBound(watermark - 1); - return; - } bool updated; if (is_watermark_strictly_ascending) @@ -854,7 +1011,11 @@ inline void StorageWindowView::cleanup() std::lock_guard mutex_lock(mutex); auto alter_query = getCleanupQuery(); - InterpreterAlterQuery interpreter_alter(alter_query, getContext()); + auto cleanup_context = Context::createCopy(getContext()); + cleanup_context->makeQueryContext(); + cleanup_context->setCurrentQueryId(""); + cleanup_context->getClientInfo().is_replicated_database_internal = true; + InterpreterAlterQuery interpreter_alter(alter_query, cleanup_context); interpreter_alter.execute(); watch_streams.remove_if([](std::weak_ptr & ptr) { return ptr.expired(); }); @@ -864,7 +1025,8 @@ void StorageWindowView::threadFuncCleanup() { try { - cleanup(); + if (!shutdown_called) + cleanup(); } catch (...) { @@ -877,6 +1039,9 @@ void StorageWindowView::threadFuncCleanup() void StorageWindowView::threadFuncFireProc() { + if (shutdown_called) + return; + std::unique_lock lock(fire_signal_mutex); UInt32 timestamp_now = std::time(nullptr); @@ -910,7 +1075,7 @@ void StorageWindowView::threadFuncFireEvent() std::unique_lock lock(fire_signal_mutex); while (!shutdown_called) { - bool signaled = std::cv_status::no_timeout == fire_signal_condition.wait_for(lock, std::chrono::seconds(5)); + bool signaled = std::cv_status::no_timeout == fire_signal_condition.wait_for(lock, std::chrono::seconds(fire_signal_timeout_s)); if (!signaled) continue; @@ -953,7 +1118,7 @@ void StorageWindowView::read( if (target_table_id.empty()) return; - auto storage = getTargetStorage(); + auto storage = getTargetTable(); auto lock = storage->lockForShare(local_context->getCurrentQueryId(), local_context->getSettingsRef().lock_acquire_timeout); auto target_metadata_snapshot = storage->getInMemoryMetadataPtr(); auto target_storage_snapshot = storage->getStorageSnapshot(target_metadata_snapshot, local_context); @@ -1037,7 +1202,12 @@ StorageWindowView::StorageWindowView( : IStorage(table_id_) , WithContext(context_->getGlobalContext()) , log(&Poco::Logger::get(fmt::format("StorageWindowView({}.{})", table_id_.database_name, table_id_.table_name))) + , fire_signal_timeout_s(context_->getSettingsRef().wait_for_window_view_fire_signal_timeout.totalSeconds()) + , clean_interval_ms(context_->getSettingsRef().window_view_clean_interval.totalMilliseconds()) { + if (!query.select) + throw Exception(ErrorCodes::INCORRECT_QUERY, "SELECT query is not specified for {}", getName()); + StorageInMemoryMetadata storage_metadata; storage_metadata.setColumns(columns_); setInMemoryMetadata(storage_metadata); @@ -1045,16 +1215,84 @@ StorageWindowView::StorageWindowView( if (!query.select) throw Exception(ErrorCodes::INCORRECT_QUERY, "SELECT query is not specified for {}", getName()); + /// If the target table is not set, use inner target table + inner_target_table = query.to_table_id.empty(); + if (inner_target_table && !query.storage) + throw Exception( + "You must specify where to save results of a WindowView query: either ENGINE or an existing table in a TO clause", + ErrorCodes::INCORRECT_QUERY); + if (query.select->list_of_selects->children.size() != 1) throw Exception( ErrorCodes::QUERY_IS_NOT_SUPPORTED_IN_WINDOW_VIEW, "UNION is not supported for {}", getName()); - select_query = query.select->list_of_selects->children.at(0)->clone(); + /// Extract information about watermark, lateness. + eventTimeParser(query); + + target_table_id = query.to_table_id; + + auto inner_query = initInnerQuery(query.select->list_of_selects->children.at(0)->as(), context_); + + if (query.inner_storage) + inner_table_engine = query.inner_storage->clone(); + inner_table_id = StorageID(getStorageID().database_name, generateInnerTableName(getStorageID())); + inner_fetch_query = generateInnerFetchQuery(inner_table_id); + + if (is_proctime) + next_fire_signal = getWindowUpperBound(std::time(nullptr)); + + std::exchange(has_inner_table, true); + if (!attach_) + { + auto inner_create_query = getInnerTableCreateQuery(inner_query, inner_table_id); + auto create_context = Context::createCopy(context_); + InterpreterCreateQuery create_interpreter(inner_create_query, create_context); + create_interpreter.setInternal(true); + create_interpreter.execute(); + if (inner_target_table) + { + /// create inner target table + auto create_context = Context::createCopy(context_); + auto target_create_query = std::make_shared(); + target_create_query->setDatabase(table_id_.database_name); + target_create_query->setTable(generateTargetTableName(table_id_)); + + auto new_columns_list = std::make_shared(); + new_columns_list->set(new_columns_list->columns, query.columns_list->columns->ptr()); + + target_create_query->set(target_create_query->columns_list, new_columns_list); + target_create_query->set(target_create_query->storage, query.storage->ptr()); + + InterpreterCreateQuery create_interpreter(target_create_query, create_context); + create_interpreter.setInternal(true); + create_interpreter.execute(); + + target_table_id = StorageID(target_create_query->getDatabase(), target_create_query->getTable()); + } + else + target_table_id = query.to_table_id; + } + + inner_fetch_query = generateInnerFetchQuery(inner_table_id); + + clean_cache_task = getContext()->getSchedulePool().createTask(getStorageID().getFullTableName(), [this] { threadFuncCleanup(); }); + fire_task = getContext()->getSchedulePool().createTask( + getStorageID().getFullTableName(), [this] { is_proctime ? threadFuncFireProc() : threadFuncFireEvent(); }); + clean_cache_task->deactivate(); + fire_task->deactivate(); +} + +ASTPtr StorageWindowView::initInnerQuery(ASTSelectQuery query, ContextPtr context_) +{ + select_query = query.clone(); + input_header.clear(); + output_header.clear(); + String select_database_name = getContext()->getCurrentDatabase(); String select_table_name; - auto select_query_tmp = select_query->clone(); - extractDependentTable(getContext(), select_query_tmp, select_database_name, select_table_name); + auto select_query_tmp = query.clone(); + extractDependentTable(context_, select_query_tmp, select_database_name, select_table_name); /// If the table is not specified - use the table `system.one` if (select_table_name.empty()) @@ -1063,77 +1301,29 @@ StorageWindowView::StorageWindowView( select_table_name = "one"; } select_table_id = StorageID(select_database_name, select_table_name); - DatabaseCatalog::instance().addDependency(select_table_id, table_id_); /// Extract all info from query; substitute Function_tumble and Function_hop with Function_windowID. - auto inner_query = innerQueryParser(select_query->as()); + auto inner_query = innerQueryParser(query); - // Parse mergeable query + /// Parse mergeable query mergeable_query = inner_query->clone(); ReplaceFunctionNowData func_now_data; ReplaceFunctionNowVisitor(func_now_data).visit(mergeable_query); is_time_column_func_now = func_now_data.is_time_column_func_now; + if (!is_proctime && is_time_column_func_now) + throw Exception("now() is not supported for Event time processing.", ErrorCodes::INCORRECT_QUERY); if (is_time_column_func_now) window_id_name = func_now_data.window_id_name; - // Parse final query (same as mergeable query but has tumble/hop instead of windowID) + /// Parse final query (same as mergeable query but has tumble/hop instead of windowID) final_query = mergeable_query->clone(); ReplaceWindowIdMatcher::Data final_query_data; - if (is_tumble) - final_query_data.window_name = "tumble"; - else - final_query_data.window_name = "hop"; + final_query_data.window_name = is_tumble ? "tumble" : "hop"; ReplaceWindowIdMatcher::Visitor(final_query_data).visit(final_query); - is_watermark_strictly_ascending = query.is_watermark_strictly_ascending; - is_watermark_ascending = query.is_watermark_ascending; - is_watermark_bounded = query.is_watermark_bounded; - target_table_id = query.to_table_id; - - /// Extract information about watermark, lateness. - eventTimeParser(query); - - if (is_tumble) - window_column_name = std::regex_replace(window_id_name, std::regex("windowID"), "tumble"); - else - window_column_name = std::regex_replace(window_id_name, std::regex("windowID"), "hop"); - - auto generate_inner_table_name = [](const StorageID & storage_id) - { - if (storage_id.hasUUID()) - return ".inner." + toString(storage_id.uuid); - return ".inner." + storage_id.table_name; - }; - - if (attach_) - { - inner_table_id = StorageID(table_id_.database_name, generate_inner_table_name(table_id_)); - } - else - { - auto inner_create_query - = getInnerTableCreateQuery(inner_query, query.storage, table_id_.database_name, generate_inner_table_name(table_id_)); - - auto create_context = Context::createCopy(context_); - InterpreterCreateQuery create_interpreter(inner_create_query, create_context); - create_interpreter.setInternal(true); - create_interpreter.execute(); - inner_table_id = StorageID(inner_create_query->getDatabase(), inner_create_query->getTable()); - } - - clean_interval_ms = getContext()->getSettingsRef().window_view_clean_interval.totalMilliseconds(); - next_fire_signal = getWindowUpperBound(std::time(nullptr)); - - clean_cache_task = getContext()->getSchedulePool().createTask(getStorageID().getFullTableName(), [this] { threadFuncCleanup(); }); - if (is_proctime) - fire_task = getContext()->getSchedulePool().createTask(getStorageID().getFullTableName(), [this] { threadFuncFireProc(); }); - else - fire_task = getContext()->getSchedulePool().createTask(getStorageID().getFullTableName(), [this] { threadFuncFireEvent(); }); - clean_cache_task->deactivate(); - fire_task->deactivate(); + return inner_query; } - ASTPtr StorageWindowView::innerQueryParser(const ASTSelectQuery & query) { if (!query.groupBy()) @@ -1195,13 +1385,16 @@ ASTPtr StorageWindowView::innerQueryParser(const ASTSelectQuery & query) void StorageWindowView::eventTimeParser(const ASTCreateQuery & query) { + watermark_num_units = 0; + lateness_num_units = 0; + is_watermark_strictly_ascending = query.is_watermark_strictly_ascending; + is_watermark_ascending = query.is_watermark_ascending; + is_watermark_bounded = query.is_watermark_bounded; + if (query.is_watermark_strictly_ascending || query.is_watermark_ascending || query.is_watermark_bounded) { is_proctime = false; - if (is_time_column_func_now) - throw Exception("now() is not supported for Event time processing.", ErrorCodes::INCORRECT_QUERY); - if (query.is_watermark_ascending) { is_watermark_bounded = true; @@ -1215,6 +1408,8 @@ void StorageWindowView::eventTimeParser(const ASTCreateQuery & query) "Illegal type WATERMARK function should be Interval"); } } + else + is_proctime = true; if (query.allowed_lateness) { @@ -1223,11 +1418,25 @@ void StorageWindowView::eventTimeParser(const ASTCreateQuery & query) query.lateness_function, lateness_kind, lateness_num_units, "Illegal type ALLOWED_LATENESS function should be Interval"); } + else + allowed_lateness = false; } void StorageWindowView::writeIntoWindowView( StorageWindowView & window_view, const Block & block, ContextPtr local_context) { + while (window_view.modifying_query) + std::this_thread::sleep_for(std::chrono::milliseconds(100)); + + if (!window_view.is_proctime && window_view.max_watermark == 0 && block.rows() > 0) + { + std::lock_guard lock(window_view.fire_signal_mutex); + const auto & window_column = block.getByName(window_view.timestamp_column_name); + const ColumnUInt32::Container & window_end_data = static_cast(*window_column.column).getData(); + UInt32 first_record_timestamp = window_end_data[0]; + window_view.max_watermark = window_view.getWindowUpperBound(first_record_timestamp); + } + Pipe pipe(std::make_shared(block.cloneEmpty(), Chunk(block.getColumns(), block.rows()))); UInt32 lateness_bound = 0; @@ -1262,14 +1471,10 @@ void StorageWindowView::writeIntoWindowView( if (lateness_bound > 0) /// Add filter, which leaves rows with timestamp >= lateness_bound { - ASTPtr args = std::make_shared(); - args->children.push_back(std::make_shared(window_view.timestamp_column_name)); - args->children.push_back(std::make_shared(lateness_bound)); - - auto filter_function = std::make_shared(); - filter_function->name = "greaterOrEquals"; - filter_function->arguments = args; - filter_function->children.push_back(filter_function->arguments); + auto filter_function = makeASTFunction( + "greaterOrEquals", + std::make_shared(window_view.timestamp_column_name), + std::make_shared(lateness_bound)); ASTPtr query = filter_function; NamesAndTypesList columns; @@ -1321,8 +1526,8 @@ void StorageWindowView::writeIntoWindowView( auto creator = [&](const StorageID & blocks_id_global) { - auto parent_metadata = window_view.getParentStorage()->getInMemoryMetadataPtr(); - auto required_columns = parent_metadata->getColumns(); + auto source_metadata = window_view.getSourceTable()->getInMemoryMetadataPtr(); + auto required_columns = source_metadata->getColumns(); required_columns.add(ColumnDescription("____timestamp", std::make_shared())); return StorageBlocks::createStorage(blocks_id_global, required_columns, std::move(pipes), QueryProcessingStage::FetchColumns); }; @@ -1378,11 +1583,12 @@ void StorageWindowView::writeIntoWindowView( }); } - auto inner_storage = window_view.getInnerStorage(); - auto lock = inner_storage->lockForShare( + auto inner_table = window_view.getInnerTable(); + auto lock = inner_table->lockForShare( local_context->getCurrentQueryId(), local_context->getSettingsRef().lock_acquire_timeout); - auto metadata_snapshot = inner_storage->getInMemoryMetadataPtr(); - auto output = inner_storage->write(window_view.getMergeableQuery(), metadata_snapshot, local_context); + auto metadata_snapshot = inner_table->getInMemoryMetadataPtr(); + auto output = inner_table->write(window_view.getMergeableQuery(), metadata_snapshot, local_context); + output->addTableLock(lock); if (!blocksHaveEqualStructure(builder.getHeader(), output->getHeader())) { @@ -1408,6 +1614,33 @@ void StorageWindowView::writeIntoWindowView( void StorageWindowView::startup() { + if (is_time_column_func_now) + inner_window_id_column_name = window_id_name; + else + { + Aliases aliases; + QueryAliasesVisitor(aliases).visit(mergeable_query); + auto inner_query_normalized = mergeable_query->clone(); + QueryNormalizer::Data normalizer_data(aliases, {}, false, getContext()->getSettingsRef(), false); + QueryNormalizer(normalizer_data).visit(inner_query_normalized); + auto inner_select_query = std::static_pointer_cast(inner_query_normalized); + auto t_sample_block + = InterpreterSelectQuery(inner_select_query, getContext(), SelectQueryOptions(QueryProcessingStage::WithMergeableState)) + .getSampleBlock(); + for (const auto & column : t_sample_block.getColumnsWithTypeAndName()) + { + if (startsWith(column.name, "windowID")) + { + inner_window_id_column_name = column.name; + break; + } + } + } + + inner_window_column_name = std::regex_replace(inner_window_id_column_name, std::regex("windowID"), is_tumble ? "tumble" : "hop"); + + DatabaseCatalog::instance().addDependency(select_table_id, getStorageID()); + // Start the working thread clean_cache_task->activateAndSchedule(); fire_task->activateAndSchedule(); @@ -1454,6 +1687,9 @@ void StorageWindowView::dropInnerTableIfAny(bool no_delay, ContextPtr local_cont { InterpreterDropQuery::executeDropQuery( ASTDropQuery::Kind::Drop, getContext(), local_context, inner_table_id, no_delay); + + if (inner_target_table) + InterpreterDropQuery::executeDropQuery(ASTDropQuery::Kind::Drop, getContext(), local_context, target_table_id, no_delay); } catch (...) { @@ -1461,79 +1697,39 @@ void StorageWindowView::dropInnerTableIfAny(bool no_delay, ContextPtr local_cont } } -Block & StorageWindowView::getHeader() const +const Block & StorageWindowView::getInputHeader() const { std::lock_guard lock(sample_block_lock); - if (!sample_block) + if (!input_header) { - sample_block = InterpreterSelectQuery(select_query->clone(), getContext(), SelectQueryOptions(QueryProcessingStage::Complete)) + input_header = InterpreterSelectQuery(select_query->clone(), getContext(), SelectQueryOptions(QueryProcessingStage::FetchColumns)) .getSampleBlock(); - /// convert all columns to full columns - /// in case some of them are constant - for (size_t i = 0; i < sample_block.columns(); ++i) - { - sample_block.safeGetByPosition(i).column = sample_block.safeGetByPosition(i).column->convertToFullColumnIfConst(); - } } - return sample_block; + return input_header; } -StoragePtr StorageWindowView::getParentStorage() const +const Block & StorageWindowView::getOutputHeader() const +{ + std::lock_guard lock(sample_block_lock); + if (!output_header) + { + output_header = InterpreterSelectQuery(select_query->clone(), getContext(), SelectQueryOptions(QueryProcessingStage::Complete)) + .getSampleBlock(); + } + return output_header; +} + +StoragePtr StorageWindowView::getSourceTable() const { return DatabaseCatalog::instance().getTable(select_table_id, getContext()); } -StoragePtr StorageWindowView::getInnerStorage() const +StoragePtr StorageWindowView::getInnerTable() const { return DatabaseCatalog::instance().getTable(inner_table_id, getContext()); } -ASTPtr StorageWindowView::getFetchColumnQuery(UInt32 w_start, UInt32 w_end) const -{ - auto res_query = std::make_shared(); - auto select = std::make_shared(); - select->children.push_back(std::make_shared()); - res_query->setExpression(ASTSelectQuery::Expression::SELECT, select); - res_query->setExpression(ASTSelectQuery::Expression::TABLES, std::make_shared()); - auto tables_elem = std::make_shared(); - auto table_expr = std::make_shared(); - res_query->tables()->children.push_back(tables_elem); - tables_elem->table_expression = table_expr; - tables_elem->children.push_back(table_expr); - table_expr->database_and_table_name = std::make_shared(inner_table_id); - table_expr->children.push_back(table_expr->database_and_table_name); - - if (is_tumble) - { - /// SELECT * FROM inner_table PREWHERE window_id_name == w_end - /// (because we fire at the end of windows) - auto func_equals = makeASTFunction("equals", std::make_shared(window_id_name), std::make_shared(w_end)); - res_query->setExpression(ASTSelectQuery::Expression::PREWHERE, func_equals); - } - else - { - auto func_array = makeASTFunction("array"); - while (w_start < w_end) - { - /// slice_num_units = std::gcd(hop_num_units, window_num_units); - /// We use std::gcd(hop_num_units, window_num_units) as the new window size - /// to split the overlapped windows into non-overlapped. - /// For a hopping window with window_size=3 slice=1, the windows might be - /// [1,3],[2,4],[3,5], which will cause recomputation. - /// In this case, the slice_num_units will be `gcd(1,3)=1' and the non-overlapped - /// windows will split into [1], [2], [3]... We compute each split window into - /// mergeable state and merge them when the window is triggering. - func_array ->arguments->children.push_back(std::make_shared(w_end)); - w_end = addTime(w_end, window_kind, -slice_num_units, *time_zone); - } - auto func_has = makeASTFunction("has", func_array, std::make_shared(window_id_name)); - res_query->setExpression(ASTSelectQuery::Expression::PREWHERE, func_has); - } - - return res_query; -} - -StoragePtr StorageWindowView::getTargetStorage() const +StoragePtr StorageWindowView::getTargetTable() const { return DatabaseCatalog::instance().getTable(target_table_id, getContext()); } diff --git a/src/Storages/WindowView/StorageWindowView.h b/src/Storages/WindowView/StorageWindowView.h index 101d29d1ae7..d9343aa03ac 100644 --- a/src/Storages/WindowView/StorageWindowView.h +++ b/src/Storages/WindowView/StorageWindowView.h @@ -18,8 +18,9 @@ using ASTPtr = std::shared_ptr; * StorageWindowView. * * CREATE WINDOW VIEW [IF NOT EXISTS] [db.]name [TO [db.]name] - * [ENGINE [db.]name] + * [INNER ENGINE engine] [ENGINE engine] * [WATERMARK strategy] [ALLOWED_LATENESS interval_function] + * [POPULATE] * AS SELECT ... * GROUP BY [tumble/hop(...)] * @@ -108,7 +109,7 @@ public: const StorageID & table_id_, ContextPtr context_, const ASTCreateQuery & query, - const ColumnsDescription & columns, + const ColumnsDescription & columns_, bool attach_); String getName() const override { return "WindowView"; } @@ -134,6 +135,10 @@ public: const Names & deduplicate_by_columns, ContextPtr context) override; + void alter(const AlterCommands & params, ContextPtr context, AlterLockHolder & table_lock_holder) override; + + void checkAlterIsPossible(const AlterCommands & commands, ContextPtr context) const override; + void startup() override; void shutdown() override; @@ -166,10 +171,18 @@ public: std::pair getNewBlocks(UInt32 watermark); + BlockIO populate(); + static void writeIntoWindowView(StorageWindowView & window_view, const Block & block, ContextPtr context); ASTPtr getMergeableQuery() const { return mergeable_query->clone(); } + ASTPtr getSourceTableSelectQuery(); + + const Block & getInputHeader() const; + + const Block & getOutputHeader() const; + private: Poco::Logger * log; @@ -179,22 +192,28 @@ private: ASTPtr mergeable_query; /// Used to fetch the mergeable state and generate the final result. e.g. SELECT * FROM * GROUP BY tumble(____timestamp, *) ASTPtr final_query; + /// Used to fetch the data from inner storage. + ASTPtr inner_fetch_query; - bool is_proctime{true}; + bool is_proctime; bool is_time_column_func_now; bool is_tumble; // false if is hop std::atomic shutdown_called{false}; + std::atomic modifying_query{false}; bool has_inner_table{true}; - mutable Block sample_block; + bool inner_target_table{false}; + mutable Block input_header; + mutable Block output_header; + UInt64 fire_signal_timeout_s; UInt64 clean_interval_ms; const DateLUTImpl * time_zone = nullptr; UInt32 max_timestamp = 0; UInt32 max_watermark = 0; // next watermark to fire UInt32 max_fired_watermark = 0; - bool is_watermark_strictly_ascending{false}; - bool is_watermark_ascending{false}; - bool is_watermark_bounded{false}; - bool allowed_lateness{false}; + bool is_watermark_strictly_ascending; + bool is_watermark_ascending; + bool is_watermark_bounded; + bool allowed_lateness; UInt32 next_fire_signal; std::deque fire_signal; std::list> watch_streams; @@ -214,18 +233,21 @@ private: Int64 window_num_units; Int64 hop_num_units; Int64 slice_num_units; - Int64 watermark_num_units = 0; - Int64 lateness_num_units = 0; + Int64 watermark_num_units; + Int64 lateness_num_units; Int64 slide_num_units; String window_id_name; String window_id_alias; - String window_column_name; + String inner_window_column_name; + String inner_window_id_column_name; String timestamp_column_name; StorageID select_table_id = StorageID::createEmpty(); StorageID target_table_id = StorageID::createEmpty(); StorageID inner_table_id = StorageID::createEmpty(); + ASTPtr inner_table_engine; + BackgroundSchedulePool::TaskHolder clean_cache_task; BackgroundSchedulePool::TaskHolder fire_task; @@ -234,9 +256,8 @@ private: ASTPtr innerQueryParser(const ASTSelectQuery & query); void eventTimeParser(const ASTCreateQuery & query); + ASTPtr initInnerQuery(ASTSelectQuery query, ContextPtr context); - std::shared_ptr getInnerTableCreateQuery( - const ASTPtr & inner_query, ASTStorage * storage, const String & database_name, const String & table_name); UInt32 getCleanupBound(); ASTPtr getCleanupQuery(); @@ -253,14 +274,10 @@ private: void updateMaxTimestamp(UInt32 timestamp); ASTPtr getFinalQuery() const { return final_query->clone(); } - ASTPtr getFetchColumnQuery(UInt32 w_start, UInt32 w_end) const; + ASTPtr getInnerTableCreateQuery(const ASTPtr & inner_query, const StorageID & inner_table_id); - StoragePtr getParentStorage() const; - - StoragePtr getInnerStorage() const; - - StoragePtr getTargetStorage() const; - - Block & getHeader() const; + StoragePtr getSourceTable() const; + StoragePtr getInnerTable() const; + StoragePtr getTargetTable() const; }; } diff --git a/src/Storages/WindowView/WindowViewSource.h b/src/Storages/WindowView/WindowViewSource.h index dc47e3fb878..b1648427fae 100644 --- a/src/Storages/WindowView/WindowViewSource.h +++ b/src/Storages/WindowView/WindowViewSource.h @@ -20,7 +20,7 @@ public: : SourceWithProgress( is_events_ ? Block( {ColumnWithTypeAndName(ColumnUInt32::create(), std::make_shared(window_view_timezone_), "watermark")}) - : storage_->getHeader()) + : storage_->getOutputHeader()) , storage(storage_) , is_events(is_events_) , window_view_timezone(window_view_timezone_) @@ -32,7 +32,7 @@ public: header.insert( ColumnWithTypeAndName(ColumnUInt32::create(), std::make_shared(window_view_timezone_), "watermark")); else - header = storage->getHeader(); + header = storage->getOutputHeader(); } String getName() const override { return "WindowViewSource"; } diff --git a/tests/ci/ast_fuzzer_check.py b/tests/ci/ast_fuzzer_check.py index 94f5eff51d7..9ccae89b403 100644 --- a/tests/ci/ast_fuzzer_check.py +++ b/tests/ci/ast_fuzzer_check.py @@ -169,6 +169,8 @@ if __name__ == "__main__": check_name, ) + ch_helper.insert_events_into(db="default", table="checks", events=prepared_events) + logging.info("Result: '%s', '%s', '%s'", status, description, report_url) print(f"::notice ::Report url: {report_url}") post_commit_status(gh, pr_info.sha, check_name, description, status, report_url) diff --git a/tests/ci/build_check.py b/tests/ci/build_check.py index b73bf057393..f8397bf3e76 100644 --- a/tests/ci/build_check.py +++ b/tests/ci/build_check.py @@ -26,15 +26,6 @@ from tee_popen import TeePopen IMAGE_NAME = "clickhouse/binary-builder" -def get_build_config(build_check_name: str, build_name: str) -> BuildConfig: - if build_check_name == "ClickHouse build check (actions)": - build_config_name = "build_config" - else: - raise Exception(f"Unknown build check name {build_check_name}") - - return CI_CONFIG[build_config_name][build_name] - - def _can_export_binaries(build_config: BuildConfig) -> bool: if build_config["package_type"] != "deb": return False @@ -157,19 +148,21 @@ def create_json_artifact( json.dump(result, build_links) -def get_release_or_pr( - pr_info: PRInfo, build_config: BuildConfig, version: ClickHouseVersion -) -> str: +def get_release_or_pr(pr_info: PRInfo, version: ClickHouseVersion) -> Tuple[str, str]: + # FIXME performance + # performance builds are havily relies on a fixed path for artifacts, that's why + # we need to preserve 0 for anything but PR number + # It should be fixed in performance-comparison image eventually + performance_pr = "0" if "release" in pr_info.labels or "release-lts" in pr_info.labels: # for release pull requests we use branch names prefixes, not pr numbers - return pr_info.head_ref - elif pr_info.number == 0 and build_config["package_type"] != "performance": - # for pushes to master - major version, but not for performance builds - # they havily relies on a fixed path for build package and nobody going - # to deploy them somewhere, so it's ok. - return f"{version.major}.{version.minor}" + return pr_info.head_ref, performance_pr + elif pr_info.number == 0: + # for pushes to master - major version + return f"{version.major}.{version.minor}", performance_pr # PR number for anything else - return str(pr_info.number) + pr_number = str(pr_info.number) + return pr_number, pr_number def upload_master_static_binaries( @@ -196,10 +189,9 @@ def upload_master_static_binaries( def main(): logging.basicConfig(level=logging.INFO) - build_check_name = sys.argv[1] - build_name = sys.argv[2] + build_name = sys.argv[1] - build_config = get_build_config(build_check_name, build_name) + build_config = CI_CONFIG["build_config"][build_name] if not os.path.exists(TEMP_PATH): os.makedirs(TEMP_PATH) @@ -211,9 +203,13 @@ def main(): s3_helper = S3Helper("https://s3.amazonaws.com") version = get_version_from_repo(git=Git(True)) - release_or_pr = get_release_or_pr(pr_info, build_config, version) + release_or_pr, performance_pr = get_release_or_pr(pr_info, version) s3_path_prefix = "/".join((release_or_pr, pr_info.sha, build_name)) + # FIXME performance + s3_performance_path = "/".join( + (performance_pr, pr_info.sha, build_name, "performance.tgz") + ) # If this is rerun, then we try to find already created artifacts and just # put them as github actions artifcat (result) @@ -286,15 +282,6 @@ def main(): logging.info("cache was not fetched, will create empty dir") os.makedirs(ccache_path) - if build_config["package_type"] == "performance" and pr_info.number != 0: - # because perf tests store some information about git commits - cmd = ( - f"cd {REPO_COPY} && git fetch --depth=60 --no-recurse-submodules " - "--no-tags origin master:master" - ) - logging.info("Fetch master branch with a command: %s", cmd) - subprocess.check_call(cmd, shell=True) - packager_cmd = get_packager_cmd( build_config, os.path.join(REPO_COPY, "docker/packager"), @@ -333,11 +320,27 @@ def main(): else: logging.info("Build log doesn't exist") - build_urls = s3_helper.upload_build_folder_to_s3( - build_output_path, - s3_path_prefix, - keep_dirs_in_s3_path=False, - upload_symlinks=False, + # FIXME performance + performance_urls = [] + performance_path = os.path.join(build_output_path, "performance.tgz") + if os.path.exists(performance_path): + performance_urls.append( + s3_helper.upload_build_file_to_s3(performance_path, s3_performance_path) + ) + logging.info( + "Uploaded performance.tgz to %s, now delete to avoid duplication", + performance_urls[0], + ) + os.remove(performance_path) + + build_urls = ( + s3_helper.upload_build_folder_to_s3( + build_output_path, + s3_path_prefix, + keep_dirs_in_s3_path=False, + upload_symlinks=False, + ) + + performance_urls ) logging.info("Got build URLs %s", build_urls) diff --git a/tests/ci/cancel_and_rerun_workflow_lambda/app.py b/tests/ci/cancel_and_rerun_workflow_lambda/app.py index 72fbe530bae..813ee9d1ab7 100644 --- a/tests/ci/cancel_and_rerun_workflow_lambda/app.py +++ b/tests/ci/cancel_and_rerun_workflow_lambda/app.py @@ -329,7 +329,7 @@ def main(event): exec_workflow_url([most_recent_workflow.cancel_url], token) print("Cancelled") - for _ in range(30): + for _ in range(45): latest_workflow_desc = get_workflow_description(most_recent_workflow.run_id) print("Checking latest workflow", latest_workflow_desc) if latest_workflow_desc.status in ("completed", "cancelled"): diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py index 1a070c781d4..33430c11a53 100644 --- a/tests/ci/ci_config.py +++ b/tests/ci/ci_config.py @@ -18,16 +18,6 @@ CI_CONFIG = { "tidy": "disable", "with_coverage": False, }, - "performance": { - "compiler": "clang-13", - "build_type": "", - "sanitizer": "", - "package_type": "performance", - "bundled": "bundled", - "splitted": "unsplitted", - "tidy": "disable", - "with_coverage": False, - }, "coverity": { "compiler": "clang-13", "build_type": "", @@ -202,7 +192,6 @@ CI_CONFIG = { "builds_report_config": { "ClickHouse build check (actions)": [ "package_release", - "performance", "coverity", "package_aarch64", "package_asan", @@ -372,7 +361,7 @@ CI_CONFIG = { "required_build": "binary_release", }, "Performance Comparison (actions)": { - "required_build": "performance", + "required_build": "package_release", }, }, } # type: dict diff --git a/tests/ci/clickhouse_helper.py b/tests/ci/clickhouse_helper.py index d52b6262a78..c595dc559df 100644 --- a/tests/ci/clickhouse_helper.py +++ b/tests/ci/clickhouse_helper.py @@ -7,6 +7,10 @@ import requests # type: ignore from get_robot_token import get_parameter_from_ssm +class InsertException(Exception): + pass + + class ClickHouseHelper: def __init__(self, url=None): if url is None: @@ -22,15 +26,13 @@ class ClickHouseHelper: def _insert_json_str_info_impl(url, auth, db, table, json_str): params = { "database": db, - "query": "INSERT INTO {table} FORMAT JSONEachRow".format(table=table), + "query": f"INSERT INTO {table} FORMAT JSONEachRow", "date_time_input_format": "best_effort", "send_logs_level": "warning", } for i in range(5): - response = requests.post( - url, params=params, data=json_str, headers=auth, verify=False - ) + response = requests.post(url, params=params, data=json_str, headers=auth) logging.info("Response content '%s'", response.content) @@ -58,23 +60,37 @@ class ClickHouseHelper: response.request.body, ) - raise Exception(error) + raise InsertException(error) else: - raise Exception(error) + raise InsertException(error) def _insert_json_str_info(self, db, table, json_str): self._insert_json_str_info_impl(self.url, self.auth, db, table, json_str) - def insert_event_into(self, db, table, event): + def insert_event_into(self, db, table, event, safe=True): event_str = json.dumps(event) - self._insert_json_str_info(db, table, event_str) + try: + self._insert_json_str_info(db, table, event_str) + except InsertException as e: + logging.error( + "Exception happened during inserting data into clickhouse: %s", e + ) + if not safe: + raise - def insert_events_into(self, db, table, events): + def insert_events_into(self, db, table, events, safe=True): jsons = [] for event in events: jsons.append(json.dumps(event)) - self._insert_json_str_info(db, table, ",".join(jsons)) + try: + self._insert_json_str_info(db, table, ",".join(jsons)) + except InsertException as e: + logging.error( + "Exception happened during inserting data into clickhouse: %s", e + ) + if not safe: + raise def _select_and_get_json_each_row(self, db, query): params = { @@ -85,9 +101,7 @@ class ClickHouseHelper: for i in range(5): response = None try: - response = requests.get( - self.url, params=params, headers=self.auth, verify=False - ) + response = requests.get(self.url, params=params, headers=self.auth) response.raise_for_status() return response.text except Exception as ex: @@ -96,7 +110,7 @@ class ClickHouseHelper: logging.warning("Reponse text %s", response.text) time.sleep(0.1 * i) - raise Exception("Cannot insert data into clickhouse") + raise Exception("Cannot fetch data from clickhouse") def select_json_each_row(self, db, query): text = self._select_and_get_json_each_row(db, query) @@ -167,17 +181,14 @@ def prepare_tests_results_for_clickhouse( def mark_flaky_tests(clickhouse_helper, check_name, test_results): try: - query = """ - SELECT DISTINCT test_name - FROM checks - WHERE - check_start_time BETWEEN now() - INTERVAL 3 DAY AND now() - AND check_name = '{check_name}' - AND (test_status = 'FAIL' OR test_status = 'FLAKY') - AND pull_request_number = 0 - """.format( - check_name=check_name - ) + query = f"""SELECT DISTINCT test_name +FROM checks +WHERE + check_start_time BETWEEN now() - INTERVAL 3 DAY AND now() + AND check_name = '{check_name}' + AND (test_status = 'FAIL' OR test_status = 'FLAKY') + AND pull_request_number = 0 +""" tests_data = clickhouse_helper.select_json_each_row("default", query) master_failed_tests = {row["test_name"] for row in tests_data} @@ -187,4 +198,4 @@ def mark_flaky_tests(clickhouse_helper, check_name, test_results): if test_result[1] == "FAIL" and test_result[0] in master_failed_tests: test_result[1] = "FLAKY" except Exception as ex: - logging.info("Exception happened during flaky tests fetch %s", ex) + logging.error("Exception happened during flaky tests fetch %s", ex) diff --git a/tests/ci/docker_server.py b/tests/ci/docker_server.py index 988771b1577..97b901617f9 100644 --- a/tests/ci/docker_server.py +++ b/tests/ci/docker_server.py @@ -5,6 +5,7 @@ import argparse import json import logging import subprocess +import sys from os import path as p, makedirs from typing import List, Tuple @@ -292,7 +293,7 @@ def main(): pr_info = None if CI: pr_info = PRInfo() - release_or_pr = get_release_or_pr(pr_info, {"package_type": ""}, args.version) + release_or_pr, _ = get_release_or_pr(pr_info, args.version) args.bucket_prefix = ( f"https://s3.amazonaws.com/{S3_BUILDS_BUCKET}/" f"{release_or_pr}/{pr_info.sha}" @@ -350,6 +351,8 @@ def main(): ) ch_helper = ClickHouseHelper() ch_helper.insert_events_into(db="default", table="checks", events=prepared_events) + if status != "success": + sys.exit(1) if __name__ == "__main__": diff --git a/tests/ci/docs_release.py b/tests/ci/docs_release.py index b6d47326f9b..d404e79c312 100644 --- a/tests/ci/docs_release.py +++ b/tests/ci/docs_release.py @@ -42,8 +42,10 @@ if __name__ == "__main__": token = CLOUDFLARE_TOKEN cmd = ( - "docker run --cap-add=SYS_PTRACE --volume=$SSH_AUTH_SOCK:/ssh-agent -e SSH_AUTH_SOCK=/ssh-agent " - f"-e CLOUDFLARE_TOKEN={token} --volume={repo_path}:/repo_path --volume={test_output}:/output_path {docker_image}" + "docker run --cap-add=SYS_PTRACE --volume=$SSH_AUTH_SOCK:/ssh-agent " + f"-e SSH_AUTH_SOCK=/ssh-agent -e CLOUDFLARE_TOKEN={token} " + f"-e EXTRA_BUILD_ARGS='--verbose' --volume={repo_path}:/repo_path" + f" --volume={test_output}:/output_path {docker_image}" ) run_log_path = os.path.join(test_output, "runlog.log") diff --git a/tests/ci/env_helper.py b/tests/ci/env_helper.py index 6462baad729..c97c6298acc 100644 --- a/tests/ci/env_helper.py +++ b/tests/ci/env_helper.py @@ -9,7 +9,7 @@ TEMP_PATH = os.getenv("TEMP_PATH", module_dir) CACHES_PATH = os.getenv("CACHES_PATH", TEMP_PATH) CLOUDFLARE_TOKEN = os.getenv("CLOUDFLARE_TOKEN") -GITHUB_EVENT_PATH = os.getenv("GITHUB_EVENT_PATH") +GITHUB_EVENT_PATH = os.getenv("GITHUB_EVENT_PATH", "") GITHUB_REPOSITORY = os.getenv("GITHUB_REPOSITORY", "ClickHouse/ClickHouse") GITHUB_RUN_ID = os.getenv("GITHUB_RUN_ID", "0") GITHUB_SERVER_URL = os.getenv("GITHUB_SERVER_URL", "https://github.com") diff --git a/tests/ci/performance_comparison_check.py b/tests/ci/performance_comparison_check.py index aa848b37109..b491c739653 100644 --- a/tests/ci/performance_comparison_check.py +++ b/tests/ci/performance_comparison_check.py @@ -8,17 +8,19 @@ import json import subprocess import traceback import re +from typing import Dict from github import Github -from env_helper import GITHUB_RUN_URL -from pr_info import PRInfo -from s3_helper import S3Helper -from get_robot_token import get_best_robot_token -from docker_pull_helper import get_image_with_version from commit_status_helper import get_commit, post_commit_status -from tee_popen import TeePopen +from ci_config import CI_CONFIG +from docker_pull_helper import get_image_with_version +from env_helper import GITHUB_EVENT_PATH, GITHUB_RUN_URL +from get_robot_token import get_best_robot_token +from pr_info import PRInfo from rerun_helper import RerunHelper +from s3_helper import S3Helper +from tee_popen import TeePopen IMAGE_NAME = "clickhouse/performance-comparison" @@ -33,7 +35,8 @@ def get_run_command( image, ): return ( - f"docker run --privileged --volume={workspace}:/workspace --volume={result_path}:/output " + f"docker run --privileged --volume={workspace}:/workspace " + f"--volume={result_path}:/output " f"--volume={repo_tests_path}:/usr/share/clickhouse-test " f"--cap-add syslog --cap-add sys_admin --cap-add sys_rawio " f"-e PR_TO_TEST={pr_to_test} -e SHA_TO_TEST={sha_to_test} {additional_env} " @@ -69,11 +72,12 @@ if __name__ == "__main__": reports_path = os.getenv("REPORTS_PATH", "./reports") check_name = sys.argv[1] + required_build = CI_CONFIG["tests_config"][check_name]["required_build"] if not os.path.exists(temp_path): os.makedirs(temp_path) - with open(os.getenv("GITHUB_EVENT_PATH"), "r", encoding="utf-8") as event_file: + with open(GITHUB_EVENT_PATH, "r", encoding="utf-8") as event_file: event = json.load(event_file) gh = Github(get_best_robot_token()) @@ -83,6 +87,7 @@ if __name__ == "__main__": docker_env = "" docker_env += " -e S3_URL=https://s3.amazonaws.com/clickhouse-builds" + docker_env += f" -e BUILD_NAME={required_build}" if pr_info.number == 0: pr_link = commit.html_url @@ -95,9 +100,12 @@ if __name__ == "__main__": ) if "RUN_BY_HASH_TOTAL" in os.environ: - run_by_hash_total = int(os.getenv("RUN_BY_HASH_TOTAL")) - run_by_hash_num = int(os.getenv("RUN_BY_HASH_NUM")) - docker_env += f" -e CHPC_TEST_RUN_BY_HASH_TOTAL={run_by_hash_total} -e CHPC_TEST_RUN_BY_HASH_NUM={run_by_hash_num}" + run_by_hash_total = int(os.getenv("RUN_BY_HASH_TOTAL", "1")) + run_by_hash_num = int(os.getenv("RUN_BY_HASH_NUM", "1")) + docker_env += ( + f" -e CHPC_TEST_RUN_BY_HASH_TOTAL={run_by_hash_total}" + f" -e CHPC_TEST_RUN_BY_HASH_NUM={run_by_hash_num}" + ) check_name_with_group = ( check_name + f" [{run_by_hash_num + 1}/{run_by_hash_total}]" ) @@ -157,13 +165,12 @@ if __name__ == "__main__": ) s3_prefix = f"{pr_info.number}/{pr_info.sha}/{check_name_prefix}/" s3_helper = S3Helper("https://s3.amazonaws.com") - for file in paths: + uploaded = {} # type: Dict[str, str] + for name, path in paths.items(): try: - paths[file] = s3_helper.upload_test_report_to_s3( - paths[file], s3_prefix + file - ) + uploaded[name] = s3_helper.upload_test_report_to_s3(path, s3_prefix + name) except Exception: - paths[file] = "" + uploaded[name] = "" traceback.print_exc() # Upload all images and flamegraphs to S3 @@ -178,9 +185,12 @@ if __name__ == "__main__": status = "" message = "" try: - report_text = open(os.path.join(result_path, "report.html"), "r").read() - status_match = re.search("", report_text) - message_match = re.search("", report_text) + with open( + os.path.join(result_path, "report.html"), "r", encoding="utf-8" + ) as report_fd: + report_text = report_fd.read() + status_match = re.search("", report_text) + message_match = re.search("", report_text) if status_match: status = status_match.group(1).strip() if message_match: @@ -205,17 +215,17 @@ if __name__ == "__main__": report_url = GITHUB_RUN_URL - if paths["runlog.log"]: - report_url = paths["runlog.log"] + if uploaded["runlog.log"]: + report_url = uploaded["runlog.log"] - if paths["compare.log"]: - report_url = paths["compare.log"] + if uploaded["compare.log"]: + report_url = uploaded["compare.log"] - if paths["output.7z"]: - report_url = paths["output.7z"] + if uploaded["output.7z"]: + report_url = uploaded["output.7z"] - if paths["report.html"]: - report_url = paths["report.html"] + if uploaded["report.html"]: + report_url = uploaded["report.html"] post_commit_status( gh, pr_info.sha, check_name_with_group, message, status, report_url diff --git a/tests/ci/release.py b/tests/ci/release.py index 8dcf80e331e..b07deffa1fb 100755 --- a/tests/ci/release.py +++ b/tests/ci/release.py @@ -89,6 +89,11 @@ class Release: self._git.update() self.version = get_version_from_repo(git=self._git) + def get_stable_release_type(self) -> str: + if self.version.minor % 5 == 3: # our 3 and 8 are LTS + return VersionType.LTS + return VersionType.STABLE + def check_prerequisites(self): """ Check tooling installed in the system @@ -96,7 +101,7 @@ class Release: self.run("gh auth status") self.run("git status") - def do(self, check_dirty: bool, check_branch: bool, with_prestable: bool): + def do(self, check_dirty: bool, check_branch: bool, with_release_branch: bool): self.check_prerequisites() if check_dirty: @@ -115,14 +120,22 @@ class Release: with self._checkout(self.release_commit, True): if self.release_type in self.BIG: # Checkout to the commit, it will provide the correct current version - if with_prestable: - with self.prestable(): + if with_release_branch: + with self.create_release_branch(): logging.info("Prestable part of the releasing is done") else: - logging.info("Skipping prestable stage") + logging.info("Skipping creating release branch stage") - with self.testing(): - logging.info("Testing part of the releasing is done") + rollback = self._rollback_stack.copy() + try: + with self.testing(): + logging.info("Testing part of the releasing is done") + except (Exception, KeyboardInterrupt): + logging.fatal("Testing part failed, rollback previous steps") + rollback.reverse() + for cmd in rollback: + self.run(cmd) + raise elif self.release_type in self.SMALL: with self.stable(): @@ -152,7 +165,10 @@ class Release: ) # Prefetch the branch to have it updated - self.run(f"git fetch {self.repo.url} {branch}:{branch}") + if self._git.branch == branch: + self.run("git pull") + else: + self.run(f"git fetch {self.repo.url} {branch}:{branch}") output = self.run(f"git branch --contains={self.release_commit} {branch}") if branch not in output: raise Exception( @@ -170,16 +186,16 @@ class Release: ) @contextmanager - def prestable(self): + def create_release_branch(self): self.check_no_tags_after() # Create release branch self.read_version() with self._create_branch(self.release_branch, self.release_commit): with self._checkout(self.release_branch, True): self.read_version() - self.version.with_description(VersionType.PRESTABLE) - with self._create_gh_release(True): - with self._bump_prestable_version(): + self.version.with_description(self.get_stable_release_type()) + with self._create_gh_release(False): + with self._bump_release_branch(): # At this point everything will rollback automatically yield @@ -187,9 +203,7 @@ class Release: def stable(self): self.check_no_tags_after() self.read_version() - version_type = VersionType.STABLE - if self.version.minor % 5 == 3: # our 3 and 8 are LTS - version_type = VersionType.LTS + version_type = self.get_stable_release_type() self.version.with_description(version_type) with self._create_gh_release(False): self.version = self.version.update(self.release_type) @@ -198,7 +212,7 @@ class Release: update_contributors(raise_error=True) # Checkouting the commit of the branch and not the branch itself, # then we are able to skip rollback - with self._checkout(f"{self.release_branch}@{{0}}", False): + with self._checkout(f"{self.release_branch}^0", False): current_commit = self.run("git rev-parse HEAD") self.run( f"git commit -m " @@ -254,11 +268,15 @@ class Release: self._release_commit = commit(release_commit) @contextmanager - def _bump_prestable_version(self): + def _bump_release_branch(self): # Update only git, origal version stays the same self._git.update() new_version = self.version.patch_update() - new_version.with_description("prestable") + version_type = self.get_stable_release_type() + pr_labels = "--label release" + if version_type == VersionType.LTS: + pr_labels += " --label release-lts" + new_version.with_description(version_type) update_cmake_version(new_version) update_contributors(raise_error=True) self.run( @@ -272,22 +290,23 @@ class Release: with self._create_gh_label( f"v{self.release_branch}-affected", "c2bfff" ): + # The following command is rolled back by self._push self.run( f"gh pr create --repo {self.repo} --title " f"'Release pull request for branch {self.release_branch}' " - f"--head {self.release_branch} --label release " + f"--head {self.release_branch} {pr_labels} " "--body 'This PullRequest is a part of ClickHouse release " "cycle. It is used by CI system only. Do not perform any " "changes with it.'" ) - # Here the prestable part is done + # Here the release branch part is done yield @contextmanager def _bump_testing_version(self, helper_branch: str): self.read_version() self.version = self.version.update(self.release_type) - self.version.with_description("testing") + self.version.with_description(VersionType.TESTING) update_cmake_version(self.version) update_contributors(raise_error=True) self.run( @@ -300,7 +319,7 @@ class Release: f"gh pr create --repo {self.repo} --title 'Update version after " f"release' --head {helper_branch} --body-file '{body_file}'" ) - # Here the prestable part is done + # Here the testing part is done yield @contextmanager @@ -314,7 +333,7 @@ class Release: rollback_cmd = f"git checkout {orig_ref}" try: yield - except BaseException: + except (Exception, KeyboardInterrupt): logging.warning("Rolling back checked out %s for %s", ref, orig_ref) self.run(f"git reset --hard; git checkout {orig_ref}") raise @@ -329,7 +348,7 @@ class Release: self._rollback_stack.append(rollback_cmd) try: yield - except BaseException: + except (Exception, KeyboardInterrupt): logging.warning("Rolling back created branch %s", name) self.run(rollback_cmd) raise @@ -344,7 +363,7 @@ class Release: self._rollback_stack.append(rollback_cmd) try: yield - except BaseException: + except (Exception, KeyboardInterrupt): logging.warning("Rolling back label %s", label) self.run(rollback_cmd) raise @@ -358,14 +377,14 @@ class Release: if as_prerelease: prerelease = "--prerelease" self.run( - f"gh release create {prerelease} --draft --repo {self.repo} " + f"gh release create {prerelease} --repo {self.repo} " f"--title 'Release {tag}' '{tag}'" ) rollback_cmd = f"gh release delete --yes --repo {self.repo} '{tag}'" self._rollback_stack.append(rollback_cmd) try: yield - except BaseException: + except (Exception, KeyboardInterrupt): logging.warning("Rolling back release publishing") self.run(rollback_cmd) raise @@ -379,7 +398,7 @@ class Release: try: with self._push(f"'{tag}'"): yield - except BaseException: + except (Exception, KeyboardInterrupt): logging.warning("Rolling back tag %s", tag) self.run(rollback_cmd) raise @@ -396,7 +415,7 @@ class Release: try: yield - except BaseException: + except (Exception, KeyboardInterrupt): if with_rollback_on_fail: logging.warning("Rolling back pushed ref %s", ref) self.run(rollback_cmd) @@ -437,14 +456,13 @@ def parse_args() -> argparse.Namespace: dest="release_type", help="a release type, new branch is created only for 'major' and 'minor'", ) - parser.add_argument("--with-prestable", default=True, help=argparse.SUPPRESS) + parser.add_argument("--with-release-branch", default=True, help=argparse.SUPPRESS) parser.add_argument( - "--no-prestable", - dest="with_prestable", + "--no-release-branch", + dest="with_release_branch", action="store_false", default=argparse.SUPPRESS, - help=f"if set, for release types in {Release.BIG} skip creating prestable " - "release and release branch", + help=f"if set, for release types in {Release.BIG} skip creating release branch", ) parser.add_argument("--check-dirty", default=True, help=argparse.SUPPRESS) parser.add_argument( @@ -475,7 +493,7 @@ def main(): repo = Repo(args.repo, args.remote_protocol) release = Release(repo, args.commit, args.release_type) - release.do(args.check_dirty, args.check_branch, args.with_prestable) + release.do(args.check_dirty, args.check_branch, args.with_release_branch) if __name__ == "__main__": diff --git a/tests/ci/run_check.py b/tests/ci/run_check.py index ebdb7e9594a..bd70134760a 100644 --- a/tests/ci/run_check.py +++ b/tests/ci/run_check.py @@ -25,11 +25,14 @@ CAN_BE_TESTED_LABEL = "can be tested" DO_NOT_TEST_LABEL = "do not test" SUBMODULE_CHANGED_LABEL = "submodule changed" +# They are used in .github/PULL_REQUEST_TEMPLATE.md, keep comments there +# updated accordingly LABELS = { "pr-backward-incompatible": ["Backward Incompatible Change"], "pr-bugfix": [ "Bug Fix", "Bug Fix (user-visible misbehaviour in official stable or prestable release)", + "Bug Fix (user-visible misbehavior in official stable or prestable release)", ], "pr-build": [ "Build/Testing/Packaging Improvement", diff --git a/tests/config/config.d/enable_access_control_improvements.xml b/tests/config/config.d/enable_access_control_improvements.xml index 44fc8f7b996..052858a9519 100644 --- a/tests/config/config.d/enable_access_control_improvements.xml +++ b/tests/config/config.d/enable_access_control_improvements.xml @@ -1,5 +1,6 @@ true + true diff --git a/tests/integration/ci-runner.py b/tests/integration/ci-runner.py index 20787786aa4..9ced81d73f0 100755 --- a/tests/integration/ci-runner.py +++ b/tests/integration/ci-runner.py @@ -34,6 +34,23 @@ def stringhash(s): return zlib.crc32(s.encode("utf-8")) +# Search test by the common prefix. +# This is accept tests w/o parameters in skip list. +# +# Examples: +# - has_test(['foobar'], 'foobar[param]') == True +# - has_test(['foobar[param]'], 'foobar') == True +def has_test(tests, test_to_match): + for test in tests: + if len(test_to_match) < len(test): + if test[0 : len(test_to_match)] == test_to_match: + return True + else: + if test_to_match[0 : len(test)] == test: + return True + return False + + def get_changed_tests_to_run(pr_info, repo_path): result = set() changed_files = pr_info["changed_files"] @@ -145,7 +162,7 @@ def get_test_times(output): def clear_ip_tables_and_restart_daemons(): logging.info( "Dump iptables after run %s", - subprocess.check_output("sudo iptables -L", shell=True), + subprocess.check_output("sudo iptables -nvL", shell=True), ) try: logging.info("Killing all alive docker containers") @@ -792,7 +809,7 @@ class ClickhouseIntegrationTestsRunner: self._install_clickhouse(build_path) logging.info( "Dump iptables before run %s", - subprocess.check_output("sudo iptables -L", shell=True), + subprocess.check_output("sudo iptables -nvL", shell=True), ) all_tests = self._get_all_tests(repo_path) @@ -809,13 +826,19 @@ class ClickhouseIntegrationTestsRunner: "Found %s tests first 3 %s", len(all_tests), " ".join(all_tests[:3]) ) filtered_sequential_tests = list( - filter(lambda test: test in all_tests, parallel_skip_tests) + filter(lambda test: has_test(all_tests, test), parallel_skip_tests) ) filtered_parallel_tests = list( - filter(lambda test: test not in parallel_skip_tests, all_tests) + filter( + lambda test: not has_test(parallel_skip_tests, test), + all_tests, + ) ) not_found_tests = list( - filter(lambda test: test not in all_tests, parallel_skip_tests) + filter( + lambda test: not has_test(all_tests, test), + parallel_skip_tests, + ) ) logging.info( "Found %s tests first 3 %s, parallel %s, other %s", diff --git a/tests/integration/helpers/network.py b/tests/integration/helpers/network.py index 63fb2065f9d..7fdd3b79bd6 100644 --- a/tests/integration/helpers/network.py +++ b/tests/integration/helpers/network.py @@ -236,6 +236,7 @@ class _NetworkManager: for i in range(5): if self._container is not None: try: + logging.debug("[network] Removing %s", self._container.id) self._container.remove(force=True) break except docker.errors.NotFound: @@ -276,7 +277,7 @@ class _NetworkManager: detach=True, network_mode="host", ) - container_id = self._container.id + logging.debug("[network] Created new container %s", self._container.id) self._container_expire_time = time.time() + self.container_expire_timeout return self._container @@ -295,6 +296,10 @@ class _NetworkManager: output = self._docker_client.api.exec_start(handle).decode("utf8") exit_code = self._docker_client.api.exec_inspect(handle)["ExitCode"] + logging.debug( + "[network] %s: %s (%s): %s", container.id, cmd, exit_code, output.strip() + ) + if exit_code != 0: print(output) raise subprocess.CalledProcessError(exit_code, cmd) diff --git a/tests/integration/parallel_skip.json b/tests/integration/parallel_skip.json index b220e56dbd9..8587e8d4120 100644 --- a/tests/integration/parallel_skip.json +++ b/tests/integration/parallel_skip.json @@ -4,5 +4,47 @@ "test_dns_cache/test.py::test_ip_change_update_dns_cache", "test_dns_cache/test.py::test_user_access_ip_change[node0]", "test_dns_cache/test.py::test_user_access_ip_change[node1]", - "test_dns_cache/test.py::test_host_is_drop_from_cache_after_consecutive_failures" + "test_dns_cache/test.py::test_host_is_drop_from_cache_after_consecutive_failures", + + "test_atomic_drop_table/test.py::test_atomic_delete_with_stopped_zookeeper", + "test_attach_without_fetching/test.py::test_attach_without_fetching", + "test_cleanup_dir_after_bad_zk_conn/test.py::test_cleanup_dir_after_bad_zk_conn", + "test_cleanup_dir_after_bad_zk_conn/test.py::test_attach_without_zk", + "test_consistent_parts_after_clone_replica/test.py::test_inconsistent_parts_if_drop_while_replica_not_active", + "test_cross_replication/test.py::test", + "test_ddl_worker_non_leader/test.py::test_non_leader_replica", + "test_delayed_replica_failover/test.py::test", + "test_dictionary_allow_read_expired_keys/test_default_reading.py::test_default_reading", + "test_dictionary_allow_read_expired_keys/test_dict_get.py::test_simple_dict_get", + "test_dictionary_allow_read_expired_keys/test_dict_get_or_default.py::test_simple_dict_get_or_default", + "test_disabled_mysql_server/test.py::test_disabled_mysql_server", + "test_distributed_respect_user_timeouts/test.py::test_reconnect", + "test_https_replication/test.py::test_replication_after_partition", + "test_insert_into_distributed/test.py::test_reconnect", + "test_insert_into_distributed/test.py::test_inserts_batching", + "test_insert_into_distributed_through_materialized_view/test.py::test_reconnect", + "test_insert_into_distributed_through_materialized_view/test.py::test_inserts_batching", + "test_keeper_multinode_blocade_leader/test.py::test_blocade_leader", + "test_keeper_multinode_blocade_leader/test.py::test_blocade_leader_twice", + "test_keeper_multinode_simple/test.py::test_session_expiration", + "test_keeper_two_nodes_cluster/test.py::test_read_write_two_nodes_with_blocade", + "test_limited_replicated_fetches/test.py::test_limited_fetches", + "test_materialized_mysql_database/test.py::test_network_partition_5_7", + "test_materialized_mysql_database/test.py::test_network_partition_8_0", + "test_mysql_database_engine/test.py::test_restart_server", + "test_parts_delete_zookeeper/test.py::test_merge_doesnt_work_without_zookeeper", + "test_quorum_inserts_parallel/test.py::test_parallel_quorum_actually_quorum", + "test_random_inserts/test.py::test_random_inserts", + "test_redirect_url_storage/test.py::test_url_reconnect", + "test_replace_partition/test.py::test_drop_failover", + "test_replace_partition/test.py::test_replace_after_replace_failover", + "test_replicated_database/test.py::test_recover_staled_replica", + "test_replicated_database/test.py::test_startup_without_zk", + "test_replicated_database/test.py::test_sync_replica", + "test_replicated_fetches_timeouts/test.py::test_no_stall", + "test_storage_kafka/test.py::test_kafka_no_holes_when_write_suffix_failed", + "test_storage_s3/test.py::test_url_reconnect_in_the_middle", + "test_system_metrics/test.py::test_readonly_metrics", + "test_system_replicated_fetches/test.py::test_system_replicated_fetches", + "test_zookeeper_config_load_balancing/test.py::test_round_robin" ] diff --git a/tests/integration/test_broken_part_during_merge/test.py b/tests/integration/test_broken_part_during_merge/test.py index d7492be686b..2171e33a02a 100644 --- a/tests/integration/test_broken_part_during_merge/test.py +++ b/tests/integration/test_broken_part_during_merge/test.py @@ -2,7 +2,6 @@ import pytest from helpers.cluster import ClickHouseCluster from multiprocessing.dummy import Pool -from helpers.network import PartitionManager from helpers.corrupt_part_data_on_disk import corrupt_part_data_on_disk import time diff --git a/tests/integration/test_config_corresponding_root/configs/config.xml b/tests/integration/test_config_corresponding_root/configs/config.xml index e1a1c1c75df..914d4e6cb0e 100644 --- a/tests/integration/test_config_corresponding_root/configs/config.xml +++ b/tests/integration/test_config_corresponding_root/configs/config.xml @@ -101,13 +101,6 @@ --> 8589934592 - - 5368709120 - - /var/lib/clickhouse/ diff --git a/tests/integration/test_config_xml_full/configs/config.xml b/tests/integration/test_config_xml_full/configs/config.xml index f7a0afecac5..55e84568af1 100644 --- a/tests/integration/test_config_xml_full/configs/config.xml +++ b/tests/integration/test_config_xml_full/configs/config.xml @@ -304,12 +304,6 @@ --> 8589934592 - - 5368709120 - 8589934592 - - 5368709120 - - /var/lib/clickhouse/ diff --git a/tests/integration/test_jbod_load_balancing/__init__.py b/tests/integration/test_jbod_load_balancing/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_jbod_load_balancing/configs/config.d/storage_configuration.xml b/tests/integration/test_jbod_load_balancing/configs/config.d/storage_configuration.xml new file mode 100644 index 00000000000..529eb1bc0b5 --- /dev/null +++ b/tests/integration/test_jbod_load_balancing/configs/config.d/storage_configuration.xml @@ -0,0 +1,39 @@ + + + + + /jbod1/ + + + /jbod2/ + + + /jbod3/ + + + + + + + jbod1 + jbod2 + jbod3 + + + + + + + + + jbod1 + jbod2 + jbod3 + + least_used + + + + + + diff --git a/tests/integration/test_jbod_load_balancing/test.py b/tests/integration/test_jbod_load_balancing/test.py new file mode 100644 index 00000000000..9c62d1bbdfc --- /dev/null +++ b/tests/integration/test_jbod_load_balancing/test.py @@ -0,0 +1,136 @@ +# pylint: disable=unused-argument +# pylint: disable=redefined-outer-name + +import pytest +from helpers.cluster import ClickHouseCluster + +cluster = ClickHouseCluster(__file__) +node = cluster.add_instance( + "node", + main_configs=[ + "configs/config.d/storage_configuration.xml", + ], + tmpfs=[ + "/jbod1:size=100M", + "/jbod2:size=200M", + "/jbod3:size=300M", + ], +) + + +@pytest.fixture(scope="module") +def start_cluster(): + try: + cluster.start() + yield cluster + finally: + cluster.shutdown() + + +def test_jbod_load_balancing_round_robin(start_cluster): + try: + node.query( + """ + CREATE TABLE data_round_robin (p UInt8) + ENGINE = MergeTree + ORDER BY tuple() + SETTINGS storage_policy = 'jbod_round_robin'; + + SYSTEM STOP MERGES data_round_robin; + + INSERT INTO data_round_robin SELECT * FROM numbers(10); + INSERT INTO data_round_robin SELECT * FROM numbers(10); + INSERT INTO data_round_robin SELECT * FROM numbers(10); + INSERT INTO data_round_robin SELECT * FROM numbers(10); + """ + ) + + parts = node.query( + """ + SELECT count(), disk_name + FROM system.parts + WHERE table = 'data_round_robin' + GROUP BY disk_name + ORDER BY disk_name + """ + ) + parts = [l.split("\t") for l in parts.strip().split("\n")] + assert parts == [ + ["2", "jbod1"], + ["1", "jbod2"], + ["1", "jbod3"], + ] + finally: + node.query("DROP TABLE IF EXISTS data_round_robin SYNC") + + +def test_jbod_load_balancing_least_used(start_cluster): + try: + node.query( + """ + CREATE TABLE data_least_used (p UInt8) + ENGINE = MergeTree + ORDER BY tuple() + SETTINGS storage_policy = 'jbod_least_used'; + + SYSTEM STOP MERGES data_least_used; + + INSERT INTO data_least_used SELECT * FROM numbers(10); + INSERT INTO data_least_used SELECT * FROM numbers(10); + INSERT INTO data_least_used SELECT * FROM numbers(10); + INSERT INTO data_least_used SELECT * FROM numbers(10); + """ + ) + + parts = node.query( + """ + SELECT count(), disk_name + FROM system.parts + WHERE table = 'data_least_used' + GROUP BY disk_name + ORDER BY disk_name + """ + ) + parts = [l.split("\t") for l in parts.strip().split("\n")] + assert parts == [ + ["4", "jbod3"], + ] + finally: + node.query("DROP TABLE IF EXISTS data_least_used SYNC") + + +def test_jbod_load_balancing_least_used_next_disk(start_cluster): + try: + node.query( + """ + CREATE TABLE data_least_used_next_disk + ( + s String CODEC(NONE) + ) + ENGINE = MergeTree + ORDER BY tuple() + SETTINGS storage_policy = 'jbod_least_used'; + + SYSTEM STOP MERGES data_least_used_next_disk; + + -- 100MiB each part, 3 parts in total + INSERT INTO data_least_used_next_disk SELECT repeat('a', 100) FROM numbers(3e6) SETTINGS max_block_size='1Mi'; + """ + ) + + parts = node.query( + """ + SELECT count(), disk_name + FROM system.parts + WHERE table = 'data_least_used_next_disk' + GROUP BY disk_name + ORDER BY disk_name + """ + ) + parts = [l.split("\t") for l in parts.strip().split("\n")] + assert parts == [ + ["1", "jbod2"], + ["2", "jbod3"], + ] + finally: + node.query("DROP TABLE IF EXISTS data_least_used_next_disk SYNC") diff --git a/tests/integration/test_join_set_family_s3/configs/config.xml b/tests/integration/test_join_set_family_s3/configs/config.xml index ca4bdf15b43..87270096ccd 100644 --- a/tests/integration/test_join_set_family_s3/configs/config.xml +++ b/tests/integration/test_join_set_family_s3/configs/config.xml @@ -22,7 +22,6 @@ 500 - 5368709120 ./clickhouse/ users.xml diff --git a/tests/integration/test_keeper_four_word_command/test.py b/tests/integration/test_keeper_four_word_command/test.py index caaf1d0c87a..b60a8389cd0 100644 --- a/tests/integration/test_keeper_four_word_command/test.py +++ b/tests/integration/test_keeper_four_word_command/test.py @@ -6,7 +6,6 @@ import string import os import time from multiprocessing.dummy import Pool -from helpers.network import PartitionManager from helpers.test_tools import assert_eq_with_retry from io import StringIO import csv diff --git a/tests/integration/test_keeper_nodes_add/test.py b/tests/integration/test_keeper_nodes_add/test.py index 498f2bdfab6..c3449534e87 100644 --- a/tests/integration/test_keeper_nodes_add/test.py +++ b/tests/integration/test_keeper_nodes_add/test.py @@ -7,7 +7,6 @@ import string import os import time from multiprocessing.dummy import Pool -from helpers.network import PartitionManager from helpers.test_tools import assert_eq_with_retry from kazoo.client import KazooClient, KazooState diff --git a/tests/integration/test_keeper_nodes_move/test.py b/tests/integration/test_keeper_nodes_move/test.py index 9a571cd8ed6..31082846fb8 100644 --- a/tests/integration/test_keeper_nodes_move/test.py +++ b/tests/integration/test_keeper_nodes_move/test.py @@ -10,7 +10,6 @@ import string import os import time from multiprocessing.dummy import Pool -from helpers.network import PartitionManager from helpers.test_tools import assert_eq_with_retry from kazoo.client import KazooClient, KazooState diff --git a/tests/integration/test_keeper_three_nodes_start/test.py b/tests/integration/test_keeper_three_nodes_start/test.py index f23ef5440c1..e451f969b37 100644 --- a/tests/integration/test_keeper_three_nodes_start/test.py +++ b/tests/integration/test_keeper_three_nodes_start/test.py @@ -8,7 +8,6 @@ import string import os import time from multiprocessing.dummy import Pool -from helpers.network import PartitionManager from helpers.test_tools import assert_eq_with_retry from kazoo.client import KazooClient, KazooState diff --git a/tests/integration/test_keeper_three_nodes_two_alive/test.py b/tests/integration/test_keeper_three_nodes_two_alive/test.py index 11ff1d8cc08..f1de469c5a1 100644 --- a/tests/integration/test_keeper_three_nodes_two_alive/test.py +++ b/tests/integration/test_keeper_three_nodes_two_alive/test.py @@ -6,7 +6,6 @@ import string import os import time from multiprocessing.dummy import Pool -from helpers.network import PartitionManager from helpers.test_tools import assert_eq_with_retry from kazoo.client import KazooClient, KazooState diff --git a/tests/integration/test_keeper_znode_time/test.py b/tests/integration/test_keeper_znode_time/test.py index dc0476a0c09..bff3d52014e 100644 --- a/tests/integration/test_keeper_znode_time/test.py +++ b/tests/integration/test_keeper_znode_time/test.py @@ -5,7 +5,6 @@ import string import os import time from multiprocessing.dummy import Pool -from helpers.network import PartitionManager from helpers.test_tools import assert_eq_with_retry cluster = ClickHouseCluster(__file__) diff --git a/tests/integration/test_log_family_s3/configs/config.xml b/tests/integration/test_log_family_s3/configs/config.xml index ca4bdf15b43..87270096ccd 100644 --- a/tests/integration/test_log_family_s3/configs/config.xml +++ b/tests/integration/test_log_family_s3/configs/config.xml @@ -22,7 +22,6 @@ 500 - 5368709120 ./clickhouse/ users.xml diff --git a/tests/integration/test_logs_level/configs/config_information.xml b/tests/integration/test_logs_level/configs/config_information.xml index 9275a468990..17342b36c57 100644 --- a/tests/integration/test_logs_level/configs/config_information.xml +++ b/tests/integration/test_logs_level/configs/config_information.xml @@ -19,7 +19,6 @@ 500 - 5368709120 users.xml /etc/clickhouse-server/config.d/*.xml diff --git a/tests/integration/test_lost_part_during_startup/test.py b/tests/integration/test_lost_part_during_startup/test.py index b110a17704b..de21d64c8aa 100644 --- a/tests/integration/test_lost_part_during_startup/test.py +++ b/tests/integration/test_lost_part_during_startup/test.py @@ -3,7 +3,6 @@ import time import pytest from helpers.cluster import ClickHouseCluster -from helpers.network import PartitionManager cluster = ClickHouseCluster(__file__) node1 = cluster.add_instance("node1", with_zookeeper=True, stay_alive=True) diff --git a/tests/integration/test_match_process_uid_against_data_owner/configs/config.xml b/tests/integration/test_match_process_uid_against_data_owner/configs/config.xml index 72801915721..58087e93882 100644 --- a/tests/integration/test_match_process_uid_against_data_owner/configs/config.xml +++ b/tests/integration/test_match_process_uid_against_data_owner/configs/config.xml @@ -12,6 +12,5 @@ /var/lib/clickhouse/ - 5368709120 users.xml diff --git a/tests/integration/test_merge_tree_azure_blob_storage/configs/config.xml b/tests/integration/test_merge_tree_azure_blob_storage/configs/config.xml index 59829ffdb56..8e7eada42ed 100644 --- a/tests/integration/test_merge_tree_azure_blob_storage/configs/config.xml +++ b/tests/integration/test_merge_tree_azure_blob_storage/configs/config.xml @@ -14,7 +14,6 @@ 500 - 5368709120 ./clickhouse/ users.xml diff --git a/tests/integration/test_merge_tree_s3/configs/config.xml b/tests/integration/test_merge_tree_s3/configs/config.xml index 59829ffdb56..8e7eada42ed 100644 --- a/tests/integration/test_merge_tree_s3/configs/config.xml +++ b/tests/integration/test_merge_tree_s3/configs/config.xml @@ -14,7 +14,6 @@ 500 - 5368709120 ./clickhouse/ users.xml diff --git a/tests/integration/test_merge_tree_s3_failover/configs/config.xml b/tests/integration/test_merge_tree_s3_failover/configs/config.xml index 59829ffdb56..8e7eada42ed 100644 --- a/tests/integration/test_merge_tree_s3_failover/configs/config.xml +++ b/tests/integration/test_merge_tree_s3_failover/configs/config.xml @@ -14,7 +14,6 @@ 500 - 5368709120 ./clickhouse/ users.xml diff --git a/tests/integration/test_merge_tree_s3_restore/configs/config.xml b/tests/integration/test_merge_tree_s3_restore/configs/config.xml index 59829ffdb56..8e7eada42ed 100644 --- a/tests/integration/test_merge_tree_s3_restore/configs/config.xml +++ b/tests/integration/test_merge_tree_s3_restore/configs/config.xml @@ -14,7 +14,6 @@ 500 - 5368709120 ./clickhouse/ users.xml diff --git a/tests/integration/test_merge_tree_s3_with_cache/configs/config.xml b/tests/integration/test_merge_tree_s3_with_cache/configs/config.xml index ba0f02d1fa7..ccba76b71a6 100644 --- a/tests/integration/test_merge_tree_s3_with_cache/configs/config.xml +++ b/tests/integration/test_merge_tree_s3_with_cache/configs/config.xml @@ -21,7 +21,6 @@ 500 - 0 ./clickhouse/ users.xml diff --git a/tests/integration/test_mysql_database_engine/test.py b/tests/integration/test_mysql_database_engine/test.py index 96a4e2d692c..3e0a1a549d1 100644 --- a/tests/integration/test_mysql_database_engine/test.py +++ b/tests/integration/test_mysql_database_engine/test.py @@ -343,9 +343,8 @@ def test_data_types_support_level_for_mysql_database_engine(started_cluster): mysql_node.query("DROP DATABASE test") -# test tool cannot support null by now. TSV format returns \N for null, so cannot compare using == directly -# float_values = ['NULL'] -# float_values = [0] mysql returns 0 while clickhouse returns 0.0, so cannot compare using == directly +float_values = [0, "NULL"] +clickhouse_float_values = [0, "\\N"] int32_values = [0, 1, -1, 2147483647, -2147483648] uint32_values = [ 0, @@ -357,8 +356,8 @@ int16_values = [0, 1, -1, 32767, -32768] uint16_values = [0, 1, 65535] int8_values = [0, 1, -1, 127, -128] uint8_values = [0, 1, 255] -# string_values = ["'ClickHouse'", 'NULL'] -string_values = ["'ClickHouse'"] +string_values = ["'ClickHouse'", "NULL"] +clickhouse_string_values = ["ClickHouse", "\\N"] date_values = ["'1970-01-01'"] date2Date32_values = ["'1925-01-01'", "'2283-11-11'"] date2String_values = ["'1000-01-01'", "'9999-12-31'"] @@ -381,17 +380,38 @@ timestamp_values = ["'2015-05-18 07:40:01.123'", "'2019-09-16 19:20:11.123'"] timestamp_values_no_subsecond = ["'2015-05-18 07:40:01'", "'2019-09-16 19:20:11'"] +def arryToString(expected_clickhouse_values): + return "\n".join(str(value) for value in expected_clickhouse_values) + + +# if expected_clickhouse_values is "", compare MySQL and ClickHouse query results directly @pytest.mark.parametrize( - "case_name, mysql_type, expected_ch_type, mysql_values, setting_mysql_datatypes_support_level", + "case_name, mysql_type, expected_ch_type, mysql_values, expected_clickhouse_values , setting_mysql_datatypes_support_level", [ - # test common type mapping - # ("common_types", "FLOAT", "Nullable(Float32)", float_values, ""), - # ("common_types", "FLOAT UNSIGNED", "Nullable(Float32)", float_values, ""), + pytest.param( + "common_types", + "FLOAT", + "Nullable(Float32)", + float_values, + clickhouse_float_values, + "", + id="float_1", + ), + pytest.param( + "common_types", + "FLOAT UNSIGNED", + "Nullable(Float32)", + float_values, + clickhouse_float_values, + "", + id="float_2", + ), pytest.param( "common_types", "INT", "Nullable(Int32)", int32_values, + int32_values, "", id="common_types_1", ), @@ -400,6 +420,7 @@ timestamp_values_no_subsecond = ["'2015-05-18 07:40:01'", "'2019-09-16 19:20:11' "INT NOT NULL", "Int32", int32_values, + int32_values, "", id="common_types_2", ), @@ -408,6 +429,7 @@ timestamp_values_no_subsecond = ["'2015-05-18 07:40:01'", "'2019-09-16 19:20:11' "INT UNSIGNED NOT NULL", "UInt32", uint32_values, + uint32_values, "", id="common_types_3", ), @@ -416,6 +438,7 @@ timestamp_values_no_subsecond = ["'2015-05-18 07:40:01'", "'2019-09-16 19:20:11' "INT UNSIGNED", "Nullable(UInt32)", uint32_values, + uint32_values, "", id="common_types_4", ), @@ -424,6 +447,7 @@ timestamp_values_no_subsecond = ["'2015-05-18 07:40:01'", "'2019-09-16 19:20:11' "INT UNSIGNED DEFAULT NULL", "Nullable(UInt32)", uint32_values, + uint32_values, "", id="common_types_5", ), @@ -432,6 +456,7 @@ timestamp_values_no_subsecond = ["'2015-05-18 07:40:01'", "'2019-09-16 19:20:11' "INT UNSIGNED DEFAULT '1'", "Nullable(UInt32)", uint32_values, + uint32_values, "", id="common_types_6", ), @@ -440,6 +465,7 @@ timestamp_values_no_subsecond = ["'2015-05-18 07:40:01'", "'2019-09-16 19:20:11' "INT(10)", "Nullable(Int32)", int32_values, + int32_values, "", id="common_types_7", ), @@ -448,6 +474,7 @@ timestamp_values_no_subsecond = ["'2015-05-18 07:40:01'", "'2019-09-16 19:20:11' "INT(10) NOT NULL", "Int32", int32_values, + int32_values, "", id="common_types_8", ), @@ -456,6 +483,7 @@ timestamp_values_no_subsecond = ["'2015-05-18 07:40:01'", "'2019-09-16 19:20:11' "INT(10) UNSIGNED NOT NULL", "UInt32", uint32_values, + uint32_values, "", id="common_types_8", ), @@ -464,6 +492,7 @@ timestamp_values_no_subsecond = ["'2015-05-18 07:40:01'", "'2019-09-16 19:20:11' "INT(10) UNSIGNED", "Nullable(UInt32)", uint32_values, + uint32_values, "", id="common_types_9", ), @@ -472,6 +501,7 @@ timestamp_values_no_subsecond = ["'2015-05-18 07:40:01'", "'2019-09-16 19:20:11' "INT(10) UNSIGNED DEFAULT NULL", "Nullable(UInt32)", uint32_values, + uint32_values, "", id="common_types_10", ), @@ -480,6 +510,7 @@ timestamp_values_no_subsecond = ["'2015-05-18 07:40:01'", "'2019-09-16 19:20:11' "INT(10) UNSIGNED DEFAULT '1'", "Nullable(UInt32)", uint32_values, + uint32_values, "", id="common_types_11", ), @@ -488,6 +519,7 @@ timestamp_values_no_subsecond = ["'2015-05-18 07:40:01'", "'2019-09-16 19:20:11' "INTEGER", "Nullable(Int32)", int32_values, + int32_values, "", id="common_types_12", ), @@ -496,6 +528,7 @@ timestamp_values_no_subsecond = ["'2015-05-18 07:40:01'", "'2019-09-16 19:20:11' "INTEGER UNSIGNED", "Nullable(UInt32)", uint32_values, + uint32_values, "", id="common_types_13", ), @@ -504,6 +537,7 @@ timestamp_values_no_subsecond = ["'2015-05-18 07:40:01'", "'2019-09-16 19:20:11' "MEDIUMINT", "Nullable(Int32)", mint_values, + mint_values, "", id="common_types_14", ), @@ -512,6 +546,7 @@ timestamp_values_no_subsecond = ["'2015-05-18 07:40:01'", "'2019-09-16 19:20:11' "MEDIUMINT UNSIGNED", "Nullable(UInt32)", umint_values, + umint_values, "", id="common_types_15", ), @@ -520,6 +555,7 @@ timestamp_values_no_subsecond = ["'2015-05-18 07:40:01'", "'2019-09-16 19:20:11' "SMALLINT", "Nullable(Int16)", int16_values, + int16_values, "", id="common_types_16", ), @@ -528,6 +564,7 @@ timestamp_values_no_subsecond = ["'2015-05-18 07:40:01'", "'2019-09-16 19:20:11' "SMALLINT UNSIGNED", "Nullable(UInt16)", uint16_values, + uint16_values, "", id="common_types_17", ), @@ -536,6 +573,7 @@ timestamp_values_no_subsecond = ["'2015-05-18 07:40:01'", "'2019-09-16 19:20:11' "TINYINT", "Nullable(Int8)", int8_values, + int8_values, "", id="common_types_18", ), @@ -544,6 +582,7 @@ timestamp_values_no_subsecond = ["'2015-05-18 07:40:01'", "'2019-09-16 19:20:11' "TINYINT UNSIGNED", "Nullable(UInt8)", uint8_values, + uint8_values, "", id="common_types_19", ), @@ -552,6 +591,7 @@ timestamp_values_no_subsecond = ["'2015-05-18 07:40:01'", "'2019-09-16 19:20:11' "VARCHAR(10)", "Nullable(String)", string_values, + clickhouse_string_values, "", id="common_types_20", ), @@ -561,6 +601,7 @@ timestamp_values_no_subsecond = ["'2015-05-18 07:40:01'", "'2019-09-16 19:20:11' "Nullable(Date)", date_values, "", + "", id="common_types_21", ), pytest.param( @@ -568,6 +609,7 @@ timestamp_values_no_subsecond = ["'2015-05-18 07:40:01'", "'2019-09-16 19:20:11' "DATE", "Nullable(Date32)", date2Date32_values, + "", "date2Date32", id="common_types_22", ), @@ -576,14 +618,34 @@ timestamp_values_no_subsecond = ["'2015-05-18 07:40:01'", "'2019-09-16 19:20:11' "DATE", "Nullable(String)", date2String_values, + "", "date2String", id="common_types_23", ), + pytest.param( + "common_types", + "binary(1)", + "Nullable(FixedString(1))", + [1], + [1], + "", + id="common_types_24", + ), + pytest.param( + "common_types", + "binary(0)", + "Nullable(FixedString(1))", + ["NULL"], + ["\\N"], + "", + id="common_types_25", + ), pytest.param( "decimal_default", "decimal NOT NULL", "Decimal(10, 0)", decimal_values, + "", "decimal,datetime64", id="decimal_1", ), @@ -592,6 +654,7 @@ timestamp_values_no_subsecond = ["'2015-05-18 07:40:01'", "'2019-09-16 19:20:11' "decimal", "Nullable(Decimal(10, 0))", decimal_values, + "", "decimal,datetime64", id="decimal_2", ), @@ -600,6 +663,7 @@ timestamp_values_no_subsecond = ["'2015-05-18 07:40:01'", "'2019-09-16 19:20:11' "decimal(18, 6) NOT NULL", "Decimal(18, 6)", decimal_values, + "", "decimal,datetime64", id="decimal_3", ), @@ -608,6 +672,7 @@ timestamp_values_no_subsecond = ["'2015-05-18 07:40:01'", "'2019-09-16 19:20:11' "decimal(38, 6) NOT NULL", "Decimal(38, 6)", decimal_values, + "", "decimal,datetime64", id="decimal_4", ), @@ -619,6 +684,7 @@ timestamp_values_no_subsecond = ["'2015-05-18 07:40:01'", "'2019-09-16 19:20:11' "timestamp", "DateTime", timestamp_values, + "", "decimal,datetime64", id="timestamp_default", ), @@ -627,6 +693,7 @@ timestamp_values_no_subsecond = ["'2015-05-18 07:40:01'", "'2019-09-16 19:20:11' "timestamp(6)", "DateTime64(6)", timestamp_values, + "", "decimal,datetime64", id="timestamp_6", ), @@ -635,6 +702,7 @@ timestamp_values_no_subsecond = ["'2015-05-18 07:40:01'", "'2019-09-16 19:20:11' "DATETIME NOT NULL", "DateTime64(0)", timestamp_values, + "", "decimal,datetime64", id="datetime_default", ), @@ -643,6 +711,7 @@ timestamp_values_no_subsecond = ["'2015-05-18 07:40:01'", "'2019-09-16 19:20:11' "DATETIME(6) NOT NULL", "DateTime64(6)", timestamp_values, + "", "decimal,datetime64", id="datetime_6_1", ), @@ -652,6 +721,7 @@ timestamp_values_no_subsecond = ["'2015-05-18 07:40:01'", "'2019-09-16 19:20:11' "decimal(40, 6) NOT NULL", "String", decimal_values, + "", "decimal,datetime64", id="decimal_40_6", ), @@ -660,6 +730,7 @@ timestamp_values_no_subsecond = ["'2015-05-18 07:40:01'", "'2019-09-16 19:20:11' "decimal(18, 6) NOT NULL", "String", decimal_values, + "", "datetime64", id="decimal_18_6_1", ), @@ -669,6 +740,7 @@ timestamp_values_no_subsecond = ["'2015-05-18 07:40:01'", "'2019-09-16 19:20:11' "String", decimal_values, "", + "", id="decimal_18_6_2", ), pytest.param( @@ -676,6 +748,7 @@ timestamp_values_no_subsecond = ["'2015-05-18 07:40:01'", "'2019-09-16 19:20:11' "DATETIME(6) NOT NULL", "DateTime", timestamp_values_no_subsecond, + "", "decimal", id="datetime_6_2", ), @@ -685,6 +758,7 @@ timestamp_values_no_subsecond = ["'2015-05-18 07:40:01'", "'2019-09-16 19:20:11' "DateTime", timestamp_values_no_subsecond, "", + "", id="datetime_6_3", ), ], @@ -695,6 +769,7 @@ def test_mysql_types( mysql_type, expected_ch_type, mysql_values, + expected_clickhouse_values, setting_mysql_datatypes_support_level, ): """Verify that values written to MySQL can be read on ClickHouse side via DB engine MySQL, @@ -772,12 +847,18 @@ def test_mysql_types( == expected_ch_type ) + expected_format_clickhouse_values = arryToString(expected_clickhouse_values) + if expected_format_clickhouse_values == "": + expected_format_clickhouse_values = execute_query( + mysql_node, "SELECT value FROM ${mysql_db}.${table_name}" + ) + # Validate values - assert execute_query( + assert expected_format_clickhouse_values == execute_query( clickhouse_node, "SELECT value FROM ${ch_mysql_table}", settings=clickhouse_query_settings, - ) == execute_query(mysql_node, "SELECT value FROM ${mysql_db}.${table_name}") + ) # MySQL DATABASE ENGINE execute_query( @@ -800,11 +881,11 @@ def test_mysql_types( ) # Validate values - assert execute_query( + assert expected_format_clickhouse_values == execute_query( clickhouse_node, "SELECT value FROM ${ch_mysql_db}.${table_name}", settings=clickhouse_query_settings, - ) == execute_query(mysql_node, "SELECT value FROM ${mysql_db}.${table_name}") + ) # MySQL TABLE FUNCTION # Validate type @@ -818,9 +899,7 @@ def test_mysql_types( ) # Validate values - assert execute_query( - mysql_node, "SELECT value FROM ${mysql_db}.${table_name}" - ) == execute_query( + assert expected_format_clickhouse_values == execute_query( clickhouse_node, "SELECT value FROM mysql('mysql57:3306', '${mysql_db}', '${table_name}', 'root', 'clickhouse')", settings=clickhouse_query_settings, diff --git a/tests/integration/test_mysql_protocol/configs/config.xml b/tests/integration/test_mysql_protocol/configs/config.xml index 07f22392939..a2ba348eabd 100644 --- a/tests/integration/test_mysql_protocol/configs/config.xml +++ b/tests/integration/test_mysql_protocol/configs/config.xml @@ -29,7 +29,6 @@ 127.0.0.1 500 - 5368709120 ./clickhouse/ users.xml diff --git a/tests/integration/test_odbc_interaction/configs/config.xml b/tests/integration/test_odbc_interaction/configs/config.xml index e9d426b2f71..861c81248d7 100644 --- a/tests/integration/test_odbc_interaction/configs/config.xml +++ b/tests/integration/test_odbc_interaction/configs/config.xml @@ -28,7 +28,6 @@ 500 - 5368709120 ./clickhouse/ users.xml diff --git a/tests/integration/test_polymorphic_parts/test.py b/tests/integration/test_polymorphic_parts/test.py index ba40b46c586..1f40f934e86 100644 --- a/tests/integration/test_polymorphic_parts/test.py +++ b/tests/integration/test_polymorphic_parts/test.py @@ -5,7 +5,6 @@ import struct import pytest from helpers.cluster import ClickHouseCluster -from helpers.network import PartitionManager from helpers.test_tools import TSV from helpers.test_tools import assert_eq_with_retry, exec_query_with_retry diff --git a/tests/integration/test_postgresql_protocol/configs/config.xml b/tests/integration/test_postgresql_protocol/configs/config.xml index 1dade9247b3..b3340627751 100644 --- a/tests/integration/test_postgresql_protocol/configs/config.xml +++ b/tests/integration/test_postgresql_protocol/configs/config.xml @@ -29,7 +29,6 @@ 127.0.0.1 500 - 5368709120 ./clickhouse/ users.xml diff --git a/tests/integration/test_profile_events_s3/configs/config.xml b/tests/integration/test_profile_events_s3/configs/config.xml index 665e85dfe2f..226e3e7efbe 100644 --- a/tests/integration/test_profile_events_s3/configs/config.xml +++ b/tests/integration/test_profile_events_s3/configs/config.xml @@ -29,7 +29,6 @@ 500 - 5368709120 ./clickhouse/ users.xml diff --git a/tests/integration/test_reload_auxiliary_zookeepers/configs/config.xml b/tests/integration/test_reload_auxiliary_zookeepers/configs/config.xml index 98f07de912d..d1c2e5958c8 100644 --- a/tests/integration/test_reload_auxiliary_zookeepers/configs/config.xml +++ b/tests/integration/test_reload_auxiliary_zookeepers/configs/config.xml @@ -22,7 +22,6 @@ 500 - 5368709120 ./clickhouse/ users.xml diff --git a/tests/integration/test_reload_clusters_config/test.py b/tests/integration/test_reload_clusters_config/test.py index 6979fd5565b..a52871890e9 100644 --- a/tests/integration/test_reload_clusters_config/test.py +++ b/tests/integration/test_reload_clusters_config/test.py @@ -7,7 +7,6 @@ import pytest sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from helpers.cluster import ClickHouseCluster -from helpers.network import PartitionManager from helpers.test_tools import TSV cluster = ClickHouseCluster(__file__) diff --git a/tests/integration/test_reload_max_table_size_to_drop/configs/config.xml b/tests/integration/test_reload_max_table_size_to_drop/configs/config.xml index 98f07de912d..d1c2e5958c8 100644 --- a/tests/integration/test_reload_max_table_size_to_drop/configs/config.xml +++ b/tests/integration/test_reload_max_table_size_to_drop/configs/config.xml @@ -22,7 +22,6 @@ 500 - 5368709120 ./clickhouse/ users.xml diff --git a/tests/integration/test_reload_zookeeper/configs/config.xml b/tests/integration/test_reload_zookeeper/configs/config.xml index 98f07de912d..d1c2e5958c8 100644 --- a/tests/integration/test_reload_zookeeper/configs/config.xml +++ b/tests/integration/test_reload_zookeeper/configs/config.xml @@ -22,7 +22,6 @@ 500 - 5368709120 ./clickhouse/ users.xml diff --git a/tests/integration/test_role/test.py b/tests/integration/test_role/test.py index 44ce0e13f2f..8e94b1620b5 100644 --- a/tests/integration/test_role/test.py +++ b/tests/integration/test_role/test.py @@ -1,4 +1,5 @@ import pytest +from helpers.client import QueryRuntimeException from helpers.cluster import ClickHouseCluster from helpers.test_tools import TSV @@ -260,6 +261,11 @@ def test_introspection(): assert instance.query("SHOW GRANTS", user="A") == TSV( ["GRANT SELECT ON test.table TO A", "GRANT R1 TO A"] ) + + assert instance.query("SHOW GRANTS FOR R1", user="A") == TSV([]) + with pytest.raises(QueryRuntimeException, match="Not enough privileges"): + assert instance.query("SHOW GRANTS FOR R2", user="A") + assert instance.query("SHOW GRANTS", user="B") == TSV( [ "GRANT CREATE ON *.* TO B WITH GRANT OPTION", diff --git a/tests/integration/test_s3_with_https/configs/config.xml b/tests/integration/test_s3_with_https/configs/config.xml index 59829ffdb56..8e7eada42ed 100644 --- a/tests/integration/test_s3_with_https/configs/config.xml +++ b/tests/integration/test_s3_with_https/configs/config.xml @@ -14,7 +14,6 @@ 500 - 5368709120 ./clickhouse/ users.xml diff --git a/tests/integration/test_s3_with_proxy/configs/config.xml b/tests/integration/test_s3_with_proxy/configs/config.xml index 59829ffdb56..8e7eada42ed 100644 --- a/tests/integration/test_s3_with_proxy/configs/config.xml +++ b/tests/integration/test_s3_with_proxy/configs/config.xml @@ -14,7 +14,6 @@ 500 - 5368709120 ./clickhouse/ users.xml diff --git a/tests/integration/test_storage_kerberized_kafka/test.py b/tests/integration/test_storage_kerberized_kafka/test.py index f4aea059c05..6347ba89c16 100644 --- a/tests/integration/test_storage_kerberized_kafka/test.py +++ b/tests/integration/test_storage_kerberized_kafka/test.py @@ -8,7 +8,6 @@ import logging from helpers.cluster import ClickHouseCluster from helpers.test_tools import TSV from helpers.client import QueryRuntimeException -from helpers.network import PartitionManager import json import subprocess diff --git a/tests/integration/test_system_clusters_actual_information/test.py b/tests/integration/test_system_clusters_actual_information/test.py index 865c80db1c9..0658d0c7576 100644 --- a/tests/integration/test_system_clusters_actual_information/test.py +++ b/tests/integration/test_system_clusters_actual_information/test.py @@ -7,7 +7,6 @@ import pytest sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from helpers.cluster import ClickHouseCluster -from helpers.network import PartitionManager cluster = ClickHouseCluster(__file__) node = cluster.add_instance( diff --git a/tests/performance/bitCount.xml b/tests/performance/bitCount.xml deleted file mode 100644 index 77f7674d0da..00000000000 --- a/tests/performance/bitCount.xml +++ /dev/null @@ -1,19 +0,0 @@ - - - - - - expr - - number - toUInt32(number) - toUInt16(number) - toUInt8(number) - toInt32(number) - toFloat64(number) - - - - - SELECT bitCount({expr}) FROM numbers(100000000) FORMAT Null - diff --git a/tests/performance/unary_arithmetic_functions.xml b/tests/performance/unary_arithmetic_functions.xml new file mode 100644 index 00000000000..62e11457ac4 --- /dev/null +++ b/tests/performance/unary_arithmetic_functions.xml @@ -0,0 +1,42 @@ + + + + + + func + + bitCount + bitNot + abs + intExp2 + intExp10 + negate + roundAge + roundDuration + roundToExp2 + sign + + + + + expr + + + number + toUInt32(number) + toUInt16(number) + toUInt8(number) + + toInt64(number) + toInt32(number) + toInt16(number) + toInt8(number) + + toFloat64(number) + toFloat32(number) + + + + + SELECT {func}({expr}) FROM numbers(100000000) FORMAT Null + diff --git a/tests/queries/0_stateless/00285_not_all_data_in_totals.reference b/tests/queries/0_stateless/00285_not_all_data_in_totals.reference index 961d8a34c09..065c39f5909 100644 --- a/tests/queries/0_stateless/00285_not_all_data_in_totals.reference +++ b/tests/queries/0_stateless/00285_not_all_data_in_totals.reference @@ -25,7 +25,7 @@ [0, "2"] ], - "totals": [0,"2000"], + "totals": [0, "2000"], "rows": 10, @@ -58,7 +58,7 @@ [0, "2"] ], - "totals": [0,"2000"], + "totals": [0, "2000"], "rows": 10, diff --git a/tests/queries/0_stateless/00313_const_totals_extremes.reference b/tests/queries/0_stateless/00313_const_totals_extremes.reference index f9084065989..fcb39b8080c 100644 --- a/tests/queries/0_stateless/00313_const_totals_extremes.reference +++ b/tests/queries/0_stateless/00313_const_totals_extremes.reference @@ -65,12 +65,12 @@ [1.23, "1"] ], - "totals": [1.23,"1"], + "totals": [1.23, "1"], "extremes": { - "min": [1.23,"1"], - "max": [1.23,"1"] + "min": [1.23, "1"], + "max": [1.23, "1"] }, "rows": 1 @@ -142,12 +142,12 @@ [1.1, "1"] ], - "totals": [1.1,"1"], + "totals": [1.1, "1"], "extremes": { - "min": [1.1,"1"], - "max": [1.1,"1"] + "min": [1.1, "1"], + "max": [1.1, "1"] }, "rows": 1 diff --git a/tests/queries/0_stateless/00378_json_quote_64bit_integers.reference b/tests/queries/0_stateless/00378_json_quote_64bit_integers.reference index 49c937e09df..5174c13a9e0 100644 --- a/tests/queries/0_stateless/00378_json_quote_64bit_integers.reference +++ b/tests/queries/0_stateless/00378_json_quote_64bit_integers.reference @@ -119,12 +119,12 @@ ["0", "0", "9223372036854775807", "-9223372036854775808", "18446744073709551615", ["0"], ["0","0"]] ], - "totals": ["0","0","9223372036854775807","-9223372036854775808","18446744073709551615",["0"],["0","0"]], + "totals": ["0", "0", "9223372036854775807", "-9223372036854775808", "18446744073709551615", ["0"], ["0","0"]], "extremes": { - "min": ["0","0","9223372036854775807","-9223372036854775808","18446744073709551615",["0"],["0","0"]], - "max": ["0","0","9223372036854775807","-9223372036854775808","18446744073709551615",["0"],["0","0"]] + "min": ["0", "0", "9223372036854775807", "-9223372036854775808", "18446744073709551615", ["0"], ["0","0"]], + "max": ["0", "0", "9223372036854775807", "-9223372036854775808", "18446744073709551615", ["0"], ["0","0"]] }, "rows": 1 @@ -251,12 +251,12 @@ [0, 0, 9223372036854775807, -9223372036854775808, 18446744073709551615, [0], [0,0]] ], - "totals": [0,0,9223372036854775807,-9223372036854775808,18446744073709551615,[0],[0,0]], + "totals": [0, 0, 9223372036854775807, -9223372036854775808, 18446744073709551615, [0], [0,0]], "extremes": { - "min": [0,0,9223372036854775807,-9223372036854775808,18446744073709551615,[0],[0,0]], - "max": [0,0,9223372036854775807,-9223372036854775808,18446744073709551615,[0],[0,0]] + "min": [0, 0, 9223372036854775807, -9223372036854775808, 18446744073709551615, [0], [0,0]], + "max": [0, 0, 9223372036854775807, -9223372036854775808, 18446744073709551615, [0], [0,0]] }, "rows": 1 diff --git a/tests/queries/0_stateless/01047_window_view_parser_inner_table.sql b/tests/queries/0_stateless/01047_window_view_parser_inner_table.sql index 3e4f95d098e..8b978e6094c 100644 --- a/tests/queries/0_stateless/01047_window_view_parser_inner_table.sql +++ b/tests/queries/0_stateless/01047_window_view_parser_inner_table.sql @@ -14,46 +14,46 @@ SELECT '---TUMBLE---'; SELECT '||---WINDOW COLUMN NAME---'; DROP TABLE IF EXISTS test_01047.wv; DROP TABLE IF EXISTS test_01047.`.inner.wv`; -CREATE WINDOW VIEW test_01047.wv ENGINE AggregatingMergeTree ORDER BY tumble(timestamp, INTERVAL '1' SECOND) AS SELECT count(a), tumbleEnd(wid) AS count FROM test_01047.mt GROUP BY tumble(timestamp, INTERVAL '1' SECOND) as wid; +CREATE WINDOW VIEW test_01047.wv INNER ENGINE AggregatingMergeTree ORDER BY tumble(timestamp, INTERVAL '1' SECOND) ENGINE Memory AS SELECT count(a), tumbleEnd(wid) AS count FROM test_01047.mt GROUP BY tumble(timestamp, INTERVAL '1' SECOND) as wid; SHOW CREATE TABLE test_01047.`.inner.wv`; SELECT '||---WINDOW COLUMN ALIAS---'; DROP TABLE IF EXISTS test_01047.wv; DROP TABLE IF EXISTS test_01047.`.inner.wv`; -CREATE WINDOW VIEW test_01047.wv ENGINE AggregatingMergeTree ORDER BY wid AS SELECT count(a) AS count, tumble(timestamp, INTERVAL '1' SECOND) AS wid FROM test_01047.mt GROUP BY wid; +CREATE WINDOW VIEW test_01047.wv INNER ENGINE AggregatingMergeTree ORDER BY wid ENGINE Memory AS SELECT count(a) AS count, tumble(timestamp, INTERVAL '1' SECOND) AS wid FROM test_01047.mt GROUP BY wid; SHOW CREATE TABLE test_01047.`.inner.wv`; SELECT '||---DATA COLUMN ALIAS---'; DROP TABLE IF EXISTS test_01047.wv; DROP TABLE IF EXISTS test_01047.`.inner.wv`; -CREATE WINDOW VIEW test_01047.wv ENGINE AggregatingMergeTree ORDER BY id AS SELECT count(a) AS count, b as id FROM test_01047.mt GROUP BY id, tumble(timestamp, INTERVAL '1' SECOND); +CREATE WINDOW VIEW test_01047.wv INNER ENGINE AggregatingMergeTree ORDER BY id ENGINE Memory AS SELECT count(a) AS count, b as id FROM test_01047.mt GROUP BY id, tumble(timestamp, INTERVAL '1' SECOND); SHOW CREATE TABLE test_01047.`.inner.wv`; SELECT '||---IDENTIFIER---'; DROP TABLE IF EXISTS test_01047.wv; DROP TABLE IF EXISTS test_01047.`.inner.wv`; -CREATE WINDOW VIEW test_01047.wv ENGINE AggregatingMergeTree ORDER BY (tumble(timestamp, INTERVAL '1' SECOND), b) PRIMARY KEY tumble(timestamp, INTERVAL '1' SECOND) AS SELECT count(a) AS count FROM test_01047.mt GROUP BY b, tumble(timestamp, INTERVAL '1' SECOND) AS wid; +CREATE WINDOW VIEW test_01047.wv INNER ENGINE AggregatingMergeTree ORDER BY (tumble(timestamp, INTERVAL '1' SECOND), b) PRIMARY KEY tumble(timestamp, INTERVAL '1' SECOND) ENGINE Memory AS SELECT count(a) AS count FROM test_01047.mt GROUP BY b, tumble(timestamp, INTERVAL '1' SECOND) AS wid; SHOW CREATE TABLE test_01047.`.inner.wv`; SELECT '||---FUNCTION---'; DROP TABLE IF EXISTS test_01047.wv; DROP TABLE IF EXISTS test_01047.`.inner.wv`; -CREATE WINDOW VIEW test_01047.wv ENGINE AggregatingMergeTree ORDER BY (tumble(timestamp, INTERVAL '1' SECOND), plus(a, b)) PRIMARY KEY tumble(timestamp, INTERVAL '1' SECOND) AS SELECT count(a) AS count FROM test_01047.mt GROUP BY plus(a, b) as _type, tumble(timestamp, INTERVAL '1' SECOND) AS wid; +CREATE WINDOW VIEW test_01047.wv INNER ENGINE AggregatingMergeTree ORDER BY (tumble(timestamp, INTERVAL '1' SECOND), plus(a, b)) PRIMARY KEY tumble(timestamp, INTERVAL '1' SECOND) ENGINE Memory AS SELECT count(a) AS count FROM test_01047.mt GROUP BY plus(a, b) as _type, tumble(timestamp, INTERVAL '1' SECOND) AS wid; SHOW CREATE TABLE test_01047.`.inner.wv`; SELECT '||---PARTITION---'; DROP TABLE IF EXISTS test_01047.wv; DROP TABLE IF EXISTS test_01047.`.inner.wv`; -CREATE WINDOW VIEW test_01047.wv ENGINE AggregatingMergeTree ORDER BY wid PARTITION BY wid AS SELECT count(a) AS count, tumble(now(), INTERVAL '1' SECOND) AS wid FROM test_01047.mt GROUP BY wid; +CREATE WINDOW VIEW test_01047.wv INNER ENGINE AggregatingMergeTree ORDER BY wid PARTITION BY wid ENGINE Memory AS SELECT count(a) AS count, tumble(now(), INTERVAL '1' SECOND) AS wid FROM test_01047.mt GROUP BY wid; SHOW CREATE TABLE test_01047.`.inner.wv`; SELECT '||---JOIN---'; DROP TABLE IF EXISTS test_01047.wv; -CREATE WINDOW VIEW test_01047.wv ENGINE AggregatingMergeTree ORDER BY tumble(test_01047.mt.timestamp, INTERVAL '1' SECOND) AS SELECT count(test_01047.mt.a), count(test_01047.mt_2.b), wid FROM test_01047.mt JOIN test_01047.mt_2 ON test_01047.mt.timestamp = test_01047.mt_2.timestamp GROUP BY tumble(test_01047.mt.timestamp, INTERVAL '1' SECOND) AS wid; +CREATE WINDOW VIEW test_01047.wv INNER ENGINE AggregatingMergeTree ORDER BY tumble(test_01047.mt.timestamp, INTERVAL '1' SECOND) ENGINE Memory AS SELECT count(test_01047.mt.a), count(test_01047.mt_2.b), wid FROM test_01047.mt JOIN test_01047.mt_2 ON test_01047.mt.timestamp = test_01047.mt_2.timestamp GROUP BY tumble(test_01047.mt.timestamp, INTERVAL '1' SECOND) AS wid; SHOW CREATE TABLE test_01047.`.inner.wv`; DROP TABLE IF EXISTS test_01047.wv; -CREATE WINDOW VIEW test_01047.wv ENGINE AggregatingMergeTree ORDER BY wid AS SELECT count(test_01047.mt.a), count(test_01047.mt_2.b), wid FROM test_01047.mt JOIN test_01047.mt_2 ON test_01047.mt.timestamp = test_01047.mt_2.timestamp GROUP BY tumble(test_01047.mt.timestamp, INTERVAL '1' SECOND) AS wid; +CREATE WINDOW VIEW test_01047.wv INNER ENGINE AggregatingMergeTree ORDER BY wid ENGINE Memory AS SELECT count(test_01047.mt.a), count(test_01047.mt_2.b), wid FROM test_01047.mt JOIN test_01047.mt_2 ON test_01047.mt.timestamp = test_01047.mt_2.timestamp GROUP BY tumble(test_01047.mt.timestamp, INTERVAL '1' SECOND) AS wid; SHOW CREATE TABLE test_01047.`.inner.wv`; @@ -61,46 +61,46 @@ SELECT '---HOP---'; SELECT '||---WINDOW COLUMN NAME---'; DROP TABLE IF EXISTS test_01047.wv; DROP TABLE IF EXISTS test_01047.`.inner.wv`; -CREATE WINDOW VIEW test_01047.wv ENGINE AggregatingMergeTree ORDER BY hop(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) AS SELECT count(a) AS count, hopEnd(wid) FROM test_01047.mt GROUP BY hop(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) as wid; +CREATE WINDOW VIEW test_01047.wv INNER ENGINE AggregatingMergeTree ORDER BY hop(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) ENGINE Memory AS SELECT count(a) AS count, hopEnd(wid) FROM test_01047.mt GROUP BY hop(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) as wid; SHOW CREATE TABLE test_01047.`.inner.wv`; SELECT '||---WINDOW COLUMN ALIAS---'; DROP TABLE IF EXISTS test_01047.wv; DROP TABLE IF EXISTS test_01047.`.inner.wv`; -CREATE WINDOW VIEW test_01047.wv ENGINE AggregatingMergeTree ORDER BY wid AS SELECT count(a) AS count, hop(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) AS wid FROM test_01047.mt GROUP BY wid; +CREATE WINDOW VIEW test_01047.wv INNER ENGINE AggregatingMergeTree ORDER BY wid ENGINE Memory AS SELECT count(a) AS count, hop(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) AS wid FROM test_01047.mt GROUP BY wid; SHOW CREATE TABLE test_01047.`.inner.wv`; SELECT '||---DATA COLUMN ALIAS---'; DROP TABLE IF EXISTS test_01047.wv; DROP TABLE IF EXISTS test_01047.`.inner.wv`; -CREATE WINDOW VIEW test_01047.wv ENGINE AggregatingMergeTree ORDER BY id AS SELECT count(a) AS count, b as id FROM test_01047.mt GROUP BY id, hop(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND); +CREATE WINDOW VIEW test_01047.wv INNER ENGINE AggregatingMergeTree ORDER BY id ENGINE Memory AS SELECT count(a) AS count, b as id FROM test_01047.mt GROUP BY id, hop(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND); SHOW CREATE TABLE test_01047.`.inner.wv`; SELECT '||---IDENTIFIER---'; DROP TABLE IF EXISTS test_01047.wv; DROP TABLE IF EXISTS test_01047.`.inner.wv`; -CREATE WINDOW VIEW test_01047.wv ENGINE AggregatingMergeTree ORDER BY (hop(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND), b) PRIMARY KEY hop(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) AS SELECT count(a) AS count FROM test_01047.mt GROUP BY b, hop(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) AS wid; +CREATE WINDOW VIEW test_01047.wv INNER ENGINE AggregatingMergeTree ORDER BY (hop(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND), b) PRIMARY KEY hop(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) ENGINE Memory AS SELECT count(a) AS count FROM test_01047.mt GROUP BY b, hop(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) AS wid; SHOW CREATE TABLE test_01047.`.inner.wv`; SELECT '||---FUNCTION---'; DROP TABLE IF EXISTS test_01047.wv; DROP TABLE IF EXISTS test_01047.`.inner.wv`; -CREATE WINDOW VIEW test_01047.wv ENGINE AggregatingMergeTree ORDER BY (hop(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND), plus(a, b)) PRIMARY KEY hop(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) AS SELECT count(a) AS count FROM test_01047.mt GROUP BY plus(a, b) as _type, hop(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) AS wid; +CREATE WINDOW VIEW test_01047.wv INNER ENGINE AggregatingMergeTree ORDER BY (hop(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND), plus(a, b)) PRIMARY KEY hop(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) ENGINE Memory AS SELECT count(a) AS count FROM test_01047.mt GROUP BY plus(a, b) as _type, hop(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) AS wid; SHOW CREATE TABLE test_01047.`.inner.wv`; SELECT '||---PARTITION---'; DROP TABLE IF EXISTS test_01047.wv; DROP TABLE IF EXISTS test_01047.`.inner.wv`; -CREATE WINDOW VIEW test_01047.wv ENGINE AggregatingMergeTree ORDER BY wid PARTITION BY wid AS SELECT count(a) AS count, hopEnd(wid) FROM test_01047.mt GROUP BY hop(now(), INTERVAL '1' SECOND, INTERVAL '3' SECOND) as wid; +CREATE WINDOW VIEW test_01047.wv INNER ENGINE AggregatingMergeTree ORDER BY wid PARTITION BY wid ENGINE Memory AS SELECT count(a) AS count, hopEnd(wid) FROM test_01047.mt GROUP BY hop(now(), INTERVAL '1' SECOND, INTERVAL '3' SECOND) as wid; SHOW CREATE TABLE test_01047.`.inner.wv`; SELECT '||---JOIN---'; DROP TABLE IF EXISTS test_01047.wv; -CREATE WINDOW VIEW test_01047.wv ENGINE AggregatingMergeTree ORDER BY hop(test_01047.mt.timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) AS SELECT count(test_01047.mt.a), count(test_01047.mt_2.b), wid FROM test_01047.mt JOIN test_01047.mt_2 ON test_01047.mt.timestamp = test_01047.mt_2.timestamp GROUP BY hop(test_01047.mt.timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) AS wid; +CREATE WINDOW VIEW test_01047.wv INNER ENGINE AggregatingMergeTree ORDER BY hop(test_01047.mt.timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) ENGINE Memory AS SELECT count(test_01047.mt.a), count(test_01047.mt_2.b), wid FROM test_01047.mt JOIN test_01047.mt_2 ON test_01047.mt.timestamp = test_01047.mt_2.timestamp GROUP BY hop(test_01047.mt.timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) AS wid; SHOW CREATE TABLE test_01047.`.inner.wv`; DROP TABLE IF EXISTS test_01047.wv; -CREATE WINDOW VIEW test_01047.wv ENGINE AggregatingMergeTree ORDER BY wid AS SELECT count(test_01047.mt.a), count(test_01047.mt_2.b), wid FROM test_01047.mt JOIN test_01047.mt_2 ON test_01047.mt.timestamp = test_01047.mt_2.timestamp GROUP BY hop(test_01047.mt.timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) AS wid; +CREATE WINDOW VIEW test_01047.wv INNER ENGINE AggregatingMergeTree ORDER BY wid ENGINE Memory AS SELECT count(test_01047.mt.a), count(test_01047.mt_2.b), wid FROM test_01047.mt JOIN test_01047.mt_2 ON test_01047.mt.timestamp = test_01047.mt_2.timestamp GROUP BY hop(test_01047.mt.timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) AS wid; SHOW CREATE TABLE test_01047.`.inner.wv`; DROP TABLE test_01047.wv; diff --git a/tests/queries/0_stateless/01048_window_view_parser.reference b/tests/queries/0_stateless/01048_window_view_parser.reference index 947b68c3a89..b708ecb656b 100644 --- a/tests/queries/0_stateless/01048_window_view_parser.reference +++ b/tests/queries/0_stateless/01048_window_view_parser.reference @@ -15,6 +15,8 @@ CREATE TABLE test_01048.`.inner.wv`\n(\n `windowID(timestamp, toIntervalSecon CREATE TABLE test_01048.`.inner.wv`\n(\n `b` Int32,\n `windowID(timestamp, toIntervalSecond(\'1\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nORDER BY (b, `windowID(timestamp, toIntervalSecond(\'1\'))`)\nSETTINGS index_granularity = 8192 ||---JOIN--- CREATE TABLE test_01048.`.inner.wv`\n(\n `windowID(timestamp, toIntervalSecond(\'1\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32),\n `count(mt_2.b)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nORDER BY `windowID(timestamp, toIntervalSecond(\'1\'))`\nSETTINGS index_granularity = 8192 +||---POPULATE JOIN--- +CREATE TABLE test_01048.`.inner.wv`\n(\n `windowID(timestamp, toIntervalSecond(\'1\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32),\n `count(mt_2.b)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nORDER BY `windowID(timestamp, toIntervalSecond(\'1\'))`\nSETTINGS index_granularity = 8192 ---HOP--- ||---WINDOW COLUMN NAME--- CREATE TABLE test_01048.`.inner.wv`\n(\n `windowID(timestamp, toIntervalSecond(1), toIntervalSecond(3))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nORDER BY `windowID(timestamp, toIntervalSecond(1), toIntervalSecond(3))`\nSETTINGS index_granularity = 8192 @@ -32,3 +34,5 @@ CREATE TABLE test_01048.`.inner.wv`\n(\n `windowID(timestamp, toIntervalSecon CREATE TABLE test_01048.`.inner.wv`\n(\n `b` Int32,\n `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nORDER BY (b, `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`)\nSETTINGS index_granularity = 8192 ||---JOIN--- CREATE TABLE test_01048.`.inner.wv`\n(\n `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32),\n `count(mt_2.b)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nORDER BY `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`\nSETTINGS index_granularity = 8192 +||---POPULATE JOIN--- +CREATE TABLE test_01048.`.inner.wv`\n(\n `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32),\n `count(mt_2.b)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nORDER BY `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`\nSETTINGS index_granularity = 8192 diff --git a/tests/queries/0_stateless/01048_window_view_parser.sql b/tests/queries/0_stateless/01048_window_view_parser.sql index e17352205e3..95190ddafa1 100644 --- a/tests/queries/0_stateless/01048_window_view_parser.sql +++ b/tests/queries/0_stateless/01048_window_view_parser.sql @@ -13,90 +13,100 @@ CREATE TABLE test_01048.mt_2(a Int32, b Int32, timestamp DateTime) ENGINE=MergeT SELECT '---TUMBLE---'; SELECT '||---WINDOW COLUMN NAME---'; DROP TABLE IF EXISTS test_01048.wv; -CREATE WINDOW VIEW test_01048.wv AS SELECT count(a) AS count, tumbleEnd(wid) as wend FROM test_01048.mt GROUP BY tumble(timestamp, INTERVAL 1 SECOND) as wid; +CREATE WINDOW VIEW test_01048.wv ENGINE Memory AS SELECT count(a) AS count, tumbleEnd(wid) as wend FROM test_01048.mt GROUP BY tumble(timestamp, INTERVAL 1 SECOND) as wid; SHOW CREATE TABLE test_01048.`.inner.wv`; SELECT '||---WINDOW COLUMN ALIAS---'; DROP TABLE IF EXISTS test_01048.wv; -CREATE WINDOW VIEW test_01048.wv AS SELECT count(a) AS count, tumble(timestamp, INTERVAL '1' SECOND) AS wid FROM test_01048.mt GROUP BY wid; +CREATE WINDOW VIEW test_01048.wv ENGINE Memory AS SELECT count(a) AS count, tumble(timestamp, INTERVAL '1' SECOND) AS wid FROM test_01048.mt GROUP BY wid; SHOW CREATE TABLE test_01048.`.inner.wv`; SELECT '||---IDENTIFIER---'; DROP TABLE IF EXISTS test_01048.wv; -CREATE WINDOW VIEW test_01048.wv AS SELECT count(a) AS count FROM test_01048.mt GROUP BY b, tumble(timestamp, INTERVAL '1' SECOND) AS wid; +CREATE WINDOW VIEW test_01048.wv ENGINE Memory AS SELECT count(a) AS count FROM test_01048.mt GROUP BY b, tumble(timestamp, INTERVAL '1' SECOND) AS wid; SHOW CREATE TABLE test_01048.`.inner.wv`; DROP TABLE IF EXISTS test_01048.wv; -CREATE WINDOW VIEW test_01048.wv AS SELECT count(a) AS count FROM test_01048.mt GROUP BY tumble(timestamp, INTERVAL '1' SECOND) AS wid, b; +CREATE WINDOW VIEW test_01048.wv ENGINE Memory AS SELECT count(a) AS count FROM test_01048.mt GROUP BY tumble(timestamp, INTERVAL '1' SECOND) AS wid, b; SHOW CREATE TABLE test_01048.`.inner.wv`; SELECT '||---FUNCTION---'; DROP TABLE IF EXISTS test_01048.wv; -CREATE WINDOW VIEW test_01048.wv AS SELECT count(a) AS count FROM test_01048.mt GROUP BY plus(a, b) as _type, tumble(timestamp, INTERVAL '1' SECOND) AS wid; +CREATE WINDOW VIEW test_01048.wv ENGINE Memory AS SELECT count(a) AS count FROM test_01048.mt GROUP BY plus(a, b) as _type, tumble(timestamp, INTERVAL '1' SECOND) AS wid; SHOW CREATE TABLE test_01048.`.inner.wv`; DROP TABLE IF EXISTS test_01048.wv; -CREATE WINDOW VIEW test_01048.wv AS SELECT count(a) AS count FROM test_01048.mt GROUP BY tumble(timestamp, INTERVAL '1' SECOND) AS wid, plus(a, b); +CREATE WINDOW VIEW test_01048.wv ENGINE Memory AS SELECT count(a) AS count FROM test_01048.mt GROUP BY tumble(timestamp, INTERVAL '1' SECOND) AS wid, plus(a, b); SHOW CREATE TABLE test_01048.`.inner.wv`; SELECT '||---TimeZone---'; DROP TABLE IF EXISTS test_01048.wv; -CREATE WINDOW VIEW test_01048.wv AS SELECT count(a) AS count, tumble(timestamp, INTERVAL '1' SECOND, 'Asia/Shanghai') AS wid FROM test_01048.mt GROUP BY wid; +CREATE WINDOW VIEW test_01048.wv ENGINE Memory AS SELECT count(a) AS count, tumble(timestamp, INTERVAL '1' SECOND, 'Asia/Shanghai') AS wid FROM test_01048.mt GROUP BY wid; SHOW CREATE TABLE test_01048.`.inner.wv`; SELECT '||---DATA COLUMN ALIAS---'; DROP TABLE IF EXISTS test_01048.wv; -CREATE WINDOW VIEW test_01048.wv AS SELECT count(a) AS count, b as id FROM test_01048.mt GROUP BY id, tumble(timestamp, INTERVAL '1' SECOND); +CREATE WINDOW VIEW test_01048.wv ENGINE Memory AS SELECT count(a) AS count, b as id FROM test_01048.mt GROUP BY id, tumble(timestamp, INTERVAL '1' SECOND); SHOW CREATE TABLE test_01048.`.inner.wv`; SELECT '||---JOIN---'; DROP TABLE IF EXISTS test_01048.wv; -CREATE WINDOW VIEW test_01048.wv AS SELECT count(test_01048.mt.a), count(test_01048.mt_2.b), wid FROM test_01048.mt JOIN test_01048.mt_2 ON test_01048.mt.timestamp = test_01048.mt_2.timestamp GROUP BY tumble(test_01048.mt.timestamp, INTERVAL '1' SECOND) AS wid; +CREATE WINDOW VIEW test_01048.wv ENGINE Memory AS SELECT count(test_01048.mt.a), count(test_01048.mt_2.b), wid FROM test_01048.mt JOIN test_01048.mt_2 ON test_01048.mt.timestamp = test_01048.mt_2.timestamp GROUP BY tumble(test_01048.mt.timestamp, INTERVAL '1' SECOND) AS wid; +SHOW CREATE TABLE test_01048.`.inner.wv`; + +SELECT '||---POPULATE JOIN---'; +DROP TABLE IF EXISTS test_01048.wv; +CREATE WINDOW VIEW test_01048.wv ENGINE Memory POPULATE AS SELECT count(test_01048.mt.a), count(test_01048.mt_2.b), wid FROM test_01048.mt JOIN test_01048.mt_2 ON test_01048.mt.timestamp = test_01048.mt_2.timestamp GROUP BY tumble(test_01048.mt.timestamp, INTERVAL '1' SECOND) AS wid; SHOW CREATE TABLE test_01048.`.inner.wv`; SELECT '---HOP---'; SELECT '||---WINDOW COLUMN NAME---'; DROP TABLE IF EXISTS test_01048.wv; -CREATE WINDOW VIEW test_01048.wv AS SELECT count(a) AS count, hopEnd(wid) as wend FROM test_01048.mt GROUP BY hop(timestamp, INTERVAL 1 SECOND, INTERVAL 3 SECOND) as wid; +CREATE WINDOW VIEW test_01048.wv ENGINE Memory AS SELECT count(a) AS count, hopEnd(wid) as wend FROM test_01048.mt GROUP BY hop(timestamp, INTERVAL 1 SECOND, INTERVAL 3 SECOND) as wid; SHOW CREATE TABLE test_01048.`.inner.wv`; SELECT '||---WINDOW COLUMN ALIAS---'; DROP TABLE IF EXISTS test_01048.wv; -CREATE WINDOW VIEW test_01048.wv AS SELECT count(a) AS count, hop(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) AS wid FROM test_01048.mt GROUP BY wid; +CREATE WINDOW VIEW test_01048.wv ENGINE Memory AS SELECT count(a) AS count, hop(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) AS wid FROM test_01048.mt GROUP BY wid; SHOW CREATE TABLE test_01048.`.inner.wv`; SELECT '||---IDENTIFIER---'; DROP TABLE IF EXISTS test_01048.wv; -CREATE WINDOW VIEW test_01048.wv AS SELECT count(a) AS count FROM test_01048.mt GROUP BY b, hop(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) AS wid; +CREATE WINDOW VIEW test_01048.wv ENGINE Memory AS SELECT count(a) AS count FROM test_01048.mt GROUP BY b, hop(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) AS wid; SHOW CREATE TABLE test_01048.`.inner.wv`; DROP TABLE IF EXISTS test_01048.wv; -CREATE WINDOW VIEW test_01048.wv AS SELECT count(a) AS count FROM test_01048.mt GROUP BY hop(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) AS wid, b; +CREATE WINDOW VIEW test_01048.wv ENGINE Memory AS SELECT count(a) AS count FROM test_01048.mt GROUP BY hop(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) AS wid, b; SHOW CREATE TABLE test_01048.`.inner.wv`; SELECT '||---FUNCTION---'; DROP TABLE IF EXISTS test_01048.wv; -CREATE WINDOW VIEW test_01048.wv AS SELECT count(a) AS count FROM test_01048.mt GROUP BY plus(a, b) as _type, hop(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) AS wid; +CREATE WINDOW VIEW test_01048.wv ENGINE Memory AS SELECT count(a) AS count FROM test_01048.mt GROUP BY plus(a, b) as _type, hop(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) AS wid; SHOW CREATE TABLE test_01048.`.inner.wv`; DROP TABLE IF EXISTS test_01048.wv; -CREATE WINDOW VIEW test_01048.wv AS SELECT count(a) AS count FROM test_01048.mt GROUP BY hop(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) AS wid, plus(a, b); +CREATE WINDOW VIEW test_01048.wv ENGINE Memory AS SELECT count(a) AS count FROM test_01048.mt GROUP BY hop(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) AS wid, plus(a, b); SHOW CREATE TABLE test_01048.`.inner.wv`; SELECT '||---TimeZone---'; DROP TABLE IF EXISTS test_01048.wv; -CREATE WINDOW VIEW test_01048.wv AS SELECT count(a) AS count, hopEnd(wid) as wend FROM test_01048.mt GROUP BY hop(timestamp, INTERVAL 1 SECOND, INTERVAL 3 SECOND, 'Asia/Shanghai') as wid; +CREATE WINDOW VIEW test_01048.wv ENGINE Memory AS SELECT count(a) AS count, hopEnd(wid) as wend FROM test_01048.mt GROUP BY hop(timestamp, INTERVAL 1 SECOND, INTERVAL 3 SECOND, 'Asia/Shanghai') as wid; SHOW CREATE TABLE test_01048.`.inner.wv`; SELECT '||---DATA COLUMN ALIAS---'; DROP TABLE IF EXISTS test_01048.wv; -CREATE WINDOW VIEW test_01048.wv AS SELECT count(a) AS count, b as id FROM test_01048.mt GROUP BY id, hop(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND); +CREATE WINDOW VIEW test_01048.wv ENGINE Memory AS SELECT count(a) AS count, b as id FROM test_01048.mt GROUP BY id, hop(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND); SHOW CREATE TABLE test_01048.`.inner.wv`; SELECT '||---JOIN---'; DROP TABLE IF EXISTS test_01048.wv; -CREATE WINDOW VIEW test_01048.wv AS SELECT count(test_01048.mt.a), count(test_01048.mt_2.b), wid FROM test_01048.mt JOIN test_01048.mt_2 ON test_01048.mt.timestamp = test_01048.mt_2.timestamp GROUP BY hop(test_01048.mt.timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) AS wid; +CREATE WINDOW VIEW test_01048.wv ENGINE Memory AS SELECT count(test_01048.mt.a), count(test_01048.mt_2.b), wid FROM test_01048.mt JOIN test_01048.mt_2 ON test_01048.mt.timestamp = test_01048.mt_2.timestamp GROUP BY hop(test_01048.mt.timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) AS wid; +SHOW CREATE TABLE test_01048.`.inner.wv`; + +SELECT '||---POPULATE JOIN---'; +DROP TABLE IF EXISTS test_01048.wv; +CREATE WINDOW VIEW test_01048.wv ENGINE Memory POPULATE AS SELECT count(test_01048.mt.a), count(test_01048.mt_2.b), wid FROM test_01048.mt JOIN test_01048.mt_2 ON test_01048.mt.timestamp = test_01048.mt_2.timestamp GROUP BY hop(test_01048.mt.timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) AS wid; SHOW CREATE TABLE test_01048.`.inner.wv`; DROP TABLE test_01048.wv; diff --git a/tests/queries/0_stateless/01050_window_view_parser_tumble.sql b/tests/queries/0_stateless/01050_window_view_parser_tumble.sql index 54f9ed00cbe..d9604bb2b52 100644 --- a/tests/queries/0_stateless/01050_window_view_parser_tumble.sql +++ b/tests/queries/0_stateless/01050_window_view_parser_tumble.sql @@ -6,28 +6,28 @@ CREATE TABLE mt(a Int32, timestamp DateTime) ENGINE=MergeTree ORDER BY tuple(); SELECT '---WATERMARK---'; DROP TABLE IF EXISTS wv NO DELAY; -CREATE WINDOW VIEW wv WATERMARK=INTERVAL '1' SECOND AS SELECT count(a), tumbleStart(wid) AS w_start, tumbleEnd(wid) AS w_end FROM mt GROUP BY tumble(timestamp, INTERVAL '3' SECOND) AS wid; +CREATE WINDOW VIEW wv ENGINE Memory WATERMARK=INTERVAL '1' SECOND AS SELECT count(a), tumbleStart(wid) AS w_start, tumbleEnd(wid) AS w_end FROM mt GROUP BY tumble(timestamp, INTERVAL '3' SECOND) AS wid; SELECT '---With w_end---'; DROP TABLE IF EXISTS wv NO DELAY; -CREATE WINDOW VIEW wv AS SELECT count(a), tumbleStart(tumble(timestamp, INTERVAL '3' SECOND)) AS w_start, tumbleEnd(wid) AS w_end FROM mt GROUP BY tumble(timestamp, INTERVAL '3' SECOND) AS wid; +CREATE WINDOW VIEW wv ENGINE Memory AS SELECT count(a), tumbleStart(tumble(timestamp, INTERVAL '3' SECOND)) AS w_start, tumbleEnd(wid) AS w_end FROM mt GROUP BY tumble(timestamp, INTERVAL '3' SECOND) AS wid; SELECT '---WithOut w_end---'; DROP TABLE IF EXISTS wv NO DELAY; -CREATE WINDOW VIEW wv AS SELECT count(a), tumbleStart(wid) AS w_start FROM mt GROUP BY tumble(timestamp, INTERVAL '3' SECOND) AS wid; +CREATE WINDOW VIEW wv ENGINE Memory AS SELECT count(a), tumbleStart(wid) AS w_start FROM mt GROUP BY tumble(timestamp, INTERVAL '3' SECOND) AS wid; SELECT '---WITH---'; DROP TABLE IF EXISTS wv NO DELAY; -CREATE WINDOW VIEW wv AS WITH toDateTime('2018-01-01 00:00:00') AS date_time SELECT count(a), tumbleStart(wid) AS w_start, tumbleEnd(wid) AS w_end, date_time FROM mt GROUP BY tumble(timestamp, INTERVAL '3' SECOND) AS wid; +CREATE WINDOW VIEW wv ENGINE Memory AS WITH toDateTime('2018-01-01 00:00:00') AS date_time SELECT count(a), tumbleStart(wid) AS w_start, tumbleEnd(wid) AS w_end, date_time FROM mt GROUP BY tumble(timestamp, INTERVAL '3' SECOND) AS wid; SELECT '---WHERE---'; DROP TABLE IF EXISTS wv NO DELAY; -CREATE WINDOW VIEW wv AS SELECT count(a), tumbleStart(wid) AS w_start FROM mt WHERE a != 1 GROUP BY tumble(timestamp, INTERVAL '3' SECOND) AS wid; +CREATE WINDOW VIEW wv ENGINE Memory AS SELECT count(a), tumbleStart(wid) AS w_start FROM mt WHERE a != 1 GROUP BY tumble(timestamp, INTERVAL '3' SECOND) AS wid; SELECT '---ORDER_BY---'; DROP TABLE IF EXISTS wv NO DELAY; -CREATE WINDOW VIEW wv AS SELECT count(a), tumbleStart(wid) AS w_start FROM mt WHERE a != 1 GROUP BY tumble(timestamp, INTERVAL '3' SECOND) AS wid ORDER BY w_start; +CREATE WINDOW VIEW wv ENGINE Memory AS SELECT count(a), tumbleStart(wid) AS w_start FROM mt WHERE a != 1 GROUP BY tumble(timestamp, INTERVAL '3' SECOND) AS wid ORDER BY w_start; SELECT '---With now---'; DROP TABLE IF EXISTS wv NO DELAY; -CREATE WINDOW VIEW wv AS SELECT count(a), tumbleStart(wid) AS w_start, tumbleEnd(tumble(now(), INTERVAL '3' SECOND)) AS w_end FROM mt GROUP BY tumble(now(), INTERVAL '3' SECOND) AS wid; +CREATE WINDOW VIEW wv ENGINE Memory AS SELECT count(a), tumbleStart(wid) AS w_start, tumbleEnd(tumble(now(), INTERVAL '3' SECOND)) AS w_end FROM mt GROUP BY tumble(now(), INTERVAL '3' SECOND) AS wid; diff --git a/tests/queries/0_stateless/01051_window_view_parser_hop.sql b/tests/queries/0_stateless/01051_window_view_parser_hop.sql index 0f705d5c911..472dc66f1a2 100644 --- a/tests/queries/0_stateless/01051_window_view_parser_hop.sql +++ b/tests/queries/0_stateless/01051_window_view_parser_hop.sql @@ -6,28 +6,28 @@ CREATE TABLE mt(a Int32, timestamp DateTime) ENGINE=MergeTree ORDER BY tuple(); SELECT '---WATERMARK---'; DROP TABLE IF EXISTS wv NO DELAY; -CREATE WINDOW VIEW wv WATERMARK=INTERVAL '1' SECOND AS SELECT count(a), hopStart(wid) AS w_start, hopEnd(wid) AS w_end FROM mt GROUP BY hop(timestamp, INTERVAL '3' SECOND, INTERVAL '5' SECOND) AS wid; +CREATE WINDOW VIEW wv ENGINE Memory WATERMARK=INTERVAL '1' SECOND AS SELECT count(a), hopStart(wid) AS w_start, hopEnd(wid) AS w_end FROM mt GROUP BY hop(timestamp, INTERVAL '3' SECOND, INTERVAL '5' SECOND) AS wid; SELECT '---With w_end---'; DROP TABLE IF EXISTS wv NO DELAY; -CREATE WINDOW VIEW wv AS SELECT count(a), hopStart(wid) AS w_start, hopEnd(wid) AS w_end FROM mt GROUP BY hop(timestamp, INTERVAL '3' SECOND, INTERVAL '5' SECOND) AS wid; +CREATE WINDOW VIEW wv ENGINE Memory AS SELECT count(a), hopStart(wid) AS w_start, hopEnd(wid) AS w_end FROM mt GROUP BY hop(timestamp, INTERVAL '3' SECOND, INTERVAL '5' SECOND) AS wid; SELECT '---WithOut w_end---'; DROP TABLE IF EXISTS wv NO DELAY; -CREATE WINDOW VIEW wv AS SELECT count(a), hopStart(wid) AS w_start FROM mt GROUP BY hop(timestamp, INTERVAL '3' SECOND, INTERVAL '5' SECOND) AS wid; +CREATE WINDOW VIEW wv ENGINE Memory AS SELECT count(a), hopStart(wid) AS w_start FROM mt GROUP BY hop(timestamp, INTERVAL '3' SECOND, INTERVAL '5' SECOND) AS wid; SELECT '---WITH---'; DROP TABLE IF EXISTS wv NO DELAY; -CREATE WINDOW VIEW wv AS WITH toDateTime('2018-01-01 00:00:00') AS date_time SELECT count(a), hopStart(wid) AS w_start, hopEnd(wid) AS w_end, date_time FROM mt GROUP BY hop(timestamp, INTERVAL '3' SECOND, INTERVAL '5' SECOND) AS wid; +CREATE WINDOW VIEW wv ENGINE Memory AS WITH toDateTime('2018-01-01 00:00:00') AS date_time SELECT count(a), hopStart(wid) AS w_start, hopEnd(wid) AS w_end, date_time FROM mt GROUP BY hop(timestamp, INTERVAL '3' SECOND, INTERVAL '5' SECOND) AS wid; SELECT '---WHERE---'; DROP TABLE IF EXISTS wv NO DELAY; -CREATE WINDOW VIEW wv AS SELECT count(a), hopStart(wid) AS w_start FROM mt WHERE a != 1 GROUP BY hop(timestamp, INTERVAL '3' SECOND, INTERVAL '5' SECOND) AS wid; +CREATE WINDOW VIEW wv ENGINE Memory AS SELECT count(a), hopStart(wid) AS w_start FROM mt WHERE a != 1 GROUP BY hop(timestamp, INTERVAL '3' SECOND, INTERVAL '5' SECOND) AS wid; SELECT '---ORDER_BY---'; DROP TABLE IF EXISTS wv NO DELAY; -CREATE WINDOW VIEW wv AS SELECT count(a), hopStart(wid) AS w_start FROM mt WHERE a != 1 GROUP BY hop(timestamp, INTERVAL '3' SECOND, INTERVAL '5' SECOND) AS wid ORDER BY w_start; +CREATE WINDOW VIEW wv ENGINE Memory AS SELECT count(a), hopStart(wid) AS w_start FROM mt WHERE a != 1 GROUP BY hop(timestamp, INTERVAL '3' SECOND, INTERVAL '5' SECOND) AS wid ORDER BY w_start; SELECT '---With now---'; DROP TABLE IF EXISTS wv NO DELAY; -CREATE WINDOW VIEW wv AS SELECT count(a), hopStart(wid) AS w_start, hopEnd(hop(now(), INTERVAL '1' SECOND, INTERVAL '3' SECOND)) as w_end FROM mt GROUP BY hop(now(), INTERVAL '1' SECOND, INTERVAL '3' SECOND) AS wid; +CREATE WINDOW VIEW wv ENGINE Memory AS SELECT count(a), hopStart(wid) AS w_start, hopEnd(hop(now(), INTERVAL '1' SECOND, INTERVAL '3' SECOND)) as w_end FROM mt GROUP BY hop(now(), INTERVAL '1' SECOND, INTERVAL '3' SECOND) AS wid; diff --git a/tests/queries/0_stateless/01056_window_view_proc_hop_watch.py b/tests/queries/0_stateless/01056_window_view_proc_hop_watch.py index be139c153aa..8580ad43ccd 100755 --- a/tests/queries/0_stateless/01056_window_view_proc_hop_watch.py +++ b/tests/queries/0_stateless/01056_window_view_proc_hop_watch.py @@ -1,4 +1,6 @@ #!/usr/bin/env python3 +# Tags: no-parallel + import os import sys import signal @@ -25,28 +27,27 @@ with client(name="client1>", log=log) as client1, client( client2.send("SET allow_experimental_window_view = 1") client2.expect(prompt) - client1.send("CREATE DATABASE 01056_window_view_proc_hop_watch") + client1.send("CREATE DATABASE IF NOT EXISTS 01056_window_view_proc_hop_watch") client1.expect(prompt) client1.send("DROP TABLE IF EXISTS 01056_window_view_proc_hop_watch.mt") client1.expect(prompt) client1.send("DROP TABLE IF EXISTS 01056_window_view_proc_hop_watch.wv") client1.expect(prompt) - client1.send("DROP TABLE IF EXISTS `.inner.wv`") - client1.expect(prompt) client1.send( "CREATE TABLE 01056_window_view_proc_hop_watch.mt(a Int32, timestamp DateTime) ENGINE=MergeTree ORDER BY tuple()" ) client1.expect(prompt) client1.send( - "CREATE WINDOW VIEW 01056_window_view_proc_hop_watch.wv AS SELECT count(a) AS count FROM 01056_window_view_proc_hop_watch.mt GROUP BY hop(timestamp, INTERVAL '1' SECOND, INTERVAL '1' SECOND, 'US/Samoa') AS wid;" + "CREATE WINDOW VIEW 01056_window_view_proc_hop_watch.wv ENGINE Memory AS SELECT count(a) AS count FROM 01056_window_view_proc_hop_watch.mt GROUP BY hop(timestamp, INTERVAL '1' SECOND, INTERVAL '1' SECOND, 'US/Samoa') AS wid;" ) client1.expect(prompt) client1.send("WATCH 01056_window_view_proc_hop_watch.wv") client1.expect("Query id" + end_of_block) + client1.expect("Progress: 0.00 rows.*\)") client2.send( - "INSERT INTO 01056_window_view_proc_hop_watch.mt VALUES (1, now('US/Samoa') + 1)" + "INSERT INTO 01056_window_view_proc_hop_watch.mt VALUES (1, now('US/Samoa') + 3)" ) client1.expect("1" + end_of_block) client1.expect("Progress: 1.00 rows.*\)") diff --git a/tests/queries/0_stateless/01059_window_view_event_hop_watch_strict_asc.py b/tests/queries/0_stateless/01059_window_view_event_hop_watch_strict_asc.py index f5024cb11ab..44c2f211f2b 100755 --- a/tests/queries/0_stateless/01059_window_view_event_hop_watch_strict_asc.py +++ b/tests/queries/0_stateless/01059_window_view_event_hop_watch_strict_asc.py @@ -1,4 +1,6 @@ #!/usr/bin/env python3 +# Tags: no-parallel + import os import sys import signal @@ -23,7 +25,7 @@ with client(name="client1>", log=log) as client1, client( client1.send("SET window_view_heartbeat_interval = 1") client1.expect(prompt) - client1.send("CREATE DATABASE db_01059_event_hop_watch_strict_asc") + client1.send("CREATE DATABASE IF NOT EXISTS db_01059_event_hop_watch_strict_asc") client1.expect(prompt) client1.send("DROP TABLE IF EXISTS db_01059_event_hop_watch_strict_asc.mt") client1.expect(prompt) @@ -31,16 +33,17 @@ with client(name="client1>", log=log) as client1, client( client1.expect(prompt) client1.send( - "CREATE TABLE db_01059_event_hop_watch_strict_asc.mt(a Int32, timestamp DateTime) ENGINE=MergeTree ORDER BY tuple()" + "CREATE TABLE db_01059_event_hop_watch_strict_asc.mt(a Int32, timestamp DateTime('US/Samoa')) ENGINE=MergeTree ORDER BY tuple()" ) client1.expect(prompt) client1.send( - "CREATE WINDOW VIEW db_01059_event_hop_watch_strict_asc.wv WATERMARK=STRICTLY_ASCENDING AS SELECT count(a) AS count, hopEnd(wid) as w_end FROM db_01059_event_hop_watch_strict_asc.mt GROUP BY hop(timestamp, INTERVAL '2' SECOND, INTERVAL '3' SECOND, 'US/Samoa') AS wid;" + "CREATE WINDOW VIEW db_01059_event_hop_watch_strict_asc.wv ENGINE Memory WATERMARK=STRICTLY_ASCENDING AS SELECT count(a) AS count, hopEnd(wid) as w_end FROM db_01059_event_hop_watch_strict_asc.mt GROUP BY hop(timestamp, INTERVAL '2' SECOND, INTERVAL '3' SECOND, 'US/Samoa') AS wid;" ) client1.expect(prompt) client1.send("WATCH db_01059_event_hop_watch_strict_asc.wv") client1.expect("Query id" + end_of_block) + client1.expect("Progress: 0.00 rows.*\)") client2.send( "INSERT INTO db_01059_event_hop_watch_strict_asc.mt VALUES (1, toDateTime('1990/01/01 12:00:00', 'US/Samoa'));" ) diff --git a/tests/queries/0_stateless/01062_window_view_event_hop_watch_asc.py b/tests/queries/0_stateless/01062_window_view_event_hop_watch_asc.py index 7f23e983ba2..ddf0c423fa9 100755 --- a/tests/queries/0_stateless/01062_window_view_event_hop_watch_asc.py +++ b/tests/queries/0_stateless/01062_window_view_event_hop_watch_asc.py @@ -1,4 +1,6 @@ #!/usr/bin/env python3 +# Tags: no-parallel + import os import sys import signal @@ -25,7 +27,7 @@ with client(name="client1>", log=log) as client1, client( client2.send("SET allow_experimental_window_view = 1") client2.expect(prompt) - client1.send("CREATE DATABASE 01062_window_view_event_hop_watch_asc") + client1.send("CREATE DATABASE IF NOT EXISTS 01062_window_view_event_hop_watch_asc") client1.expect(prompt) client1.send("DROP TABLE IF EXISTS 01062_window_view_event_hop_watch_asc.mt") client1.expect(prompt) @@ -39,27 +41,28 @@ with client(name="client1>", log=log) as client1, client( ) client1.expect(prompt) client1.send( - "CREATE WINDOW VIEW 01062_window_view_event_hop_watch_asc.wv WATERMARK=ASCENDING AS SELECT count(a) AS count, hopEnd(wid) AS w_end FROM 01062_window_view_event_hop_watch_asc.mt GROUP BY hop(timestamp, INTERVAL '2' SECOND, INTERVAL '3' SECOND, 'US/Samoa') AS wid" + "CREATE WINDOW VIEW 01062_window_view_event_hop_watch_asc.wv ENGINE Memory WATERMARK=ASCENDING AS SELECT count(a) AS count, hopEnd(wid) AS w_end FROM 01062_window_view_event_hop_watch_asc.mt GROUP BY hop(timestamp, INTERVAL '2' SECOND, INTERVAL '3' SECOND, 'US/Samoa') AS wid" ) client1.expect(prompt) client1.send("WATCH 01062_window_view_event_hop_watch_asc.wv") client1.expect("Query id" + end_of_block) + client1.expect("Progress: 0.00 rows.*\)") client2.send( - "INSERT INTO 01062_window_view_event_hop_watch_asc.mt VALUES (1, '1990/01/01 12:00:00');" + "INSERT INTO 01062_window_view_event_hop_watch_asc.mt VALUES (1, toDateTime('1990/01/01 12:00:00', 'US/Samoa'));" ) client2.expect(prompt) client2.send( - "INSERT INTO 01062_window_view_event_hop_watch_asc.mt VALUES (1, '1990/01/01 12:00:05');" + "INSERT INTO 01062_window_view_event_hop_watch_asc.mt VALUES (1, toDateTime('1990/01/01 12:00:05', 'US/Samoa'));" ) client2.expect(prompt) client1.expect("1*" + end_of_block) client2.send( - "INSERT INTO 01062_window_view_event_hop_watch_asc.mt VALUES (1, '1990/01/01 12:00:06');" + "INSERT INTO 01062_window_view_event_hop_watch_asc.mt VALUES (1, toDateTime('1990/01/01 12:00:06', 'US/Samoa'));" ) client2.expect(prompt) client2.send( - "INSERT INTO 01062_window_view_event_hop_watch_asc.mt VALUES (1, '1990/01/01 12:00:10');" + "INSERT INTO 01062_window_view_event_hop_watch_asc.mt VALUES (1, toDateTime('1990/01/01 12:00:10', 'US/Samoa'));" ) client2.expect(prompt) client1.expect("1" + end_of_block) diff --git a/tests/queries/0_stateless/01065_window_view_event_hop_watch_bounded.py b/tests/queries/0_stateless/01065_window_view_event_hop_watch_bounded.py index 92d2b56ed34..2e63e762f52 100755 --- a/tests/queries/0_stateless/01065_window_view_event_hop_watch_bounded.py +++ b/tests/queries/0_stateless/01065_window_view_event_hop_watch_bounded.py @@ -1,7 +1,8 @@ #!/usr/bin/env python3 +# Tags: no-parallel + import os import sys -import signal CURDIR = os.path.dirname(os.path.realpath(__file__)) sys.path.insert(0, os.path.join(CURDIR, "helpers")) @@ -25,33 +26,45 @@ with client(name="client1>", log=log) as client1, client( client2.send("SET allow_experimental_window_view = 1") client2.expect(prompt) - client1.send("DROP TABLE IF EXISTS test.mt") + client1.send( + "CREATE DATABASE IF NOT EXISTS 01065_window_view_event_hop_watch_bounded" + ) client1.expect(prompt) - client1.send("DROP TABLE IF EXISTS test.wv") + client1.send("DROP TABLE IF EXISTS 01065_window_view_event_hop_watch_bounded.mt") client1.expect(prompt) - client1.send("DROP TABLE IF EXISTS `.inner.wv`") + client1.send("DROP TABLE IF EXISTS 01065_window_view_event_hop_watch_bounded.wv") client1.expect(prompt) client1.send( - "CREATE TABLE test.mt(a Int32, timestamp DateTime) ENGINE=MergeTree ORDER BY tuple()" + "CREATE TABLE 01065_window_view_event_hop_watch_bounded.mt(a Int32, timestamp DateTime) ENGINE=MergeTree ORDER BY tuple()" ) client1.expect(prompt) client1.send( - "CREATE WINDOW VIEW test.wv WATERMARK=INTERVAL '2' SECOND AS SELECT count(a) AS count, hopEnd(wid) AS w_end FROM test.mt GROUP BY hop(timestamp, INTERVAL '2' SECOND, INTERVAL '3' SECOND, 'US/Samoa') AS wid" + "CREATE WINDOW VIEW 01065_window_view_event_hop_watch_bounded.wv ENGINE Memory WATERMARK=INTERVAL '2' SECOND AS SELECT count(a) AS count, hopEnd(wid) AS w_end FROM 01065_window_view_event_hop_watch_bounded.mt GROUP BY hop(timestamp, INTERVAL '2' SECOND, INTERVAL '3' SECOND, 'US/Samoa') AS wid" ) - client1.expect(prompt) + client1.expect("Ok.") - client1.send("WATCH test.wv") - client2.send("INSERT INTO test.mt VALUES (1, '1990/01/01 12:00:00');") - client2.expect(prompt) - client2.send("INSERT INTO test.mt VALUES (1, '1990/01/01 12:00:05');") - client2.expect(prompt) - client1.expect("1*" + end_of_block) - client2.send("INSERT INTO test.mt VALUES (1, '1990/01/01 12:00:06');") - client2.send("INSERT INTO test.mt VALUES (1, '1990/01/01 12:00:10');") - client2.expect(prompt) - client1.expect("1*" + end_of_block) - client1.expect("2*" + end_of_block) + client1.send("WATCH 01065_window_view_event_hop_watch_bounded.wv") + client1.expect("Query id" + end_of_block) + client1.expect("Progress: 0.00 rows.*\)") + client2.send( + "INSERT INTO 01065_window_view_event_hop_watch_bounded.mt VALUES (1, '1990/01/01 12:00:00');" + ) + client2.expect("Ok.") + client2.send( + "INSERT INTO 01065_window_view_event_hop_watch_bounded.mt VALUES (1, '1990/01/01 12:00:05');" + ) + client2.expect("Ok.") + client1.expect("1" + end_of_block) + client2.send( + "INSERT INTO 01065_window_view_event_hop_watch_bounded.mt VALUES (1, '1990/01/01 12:00:06');" + ) + client2.expect("Ok.") + client2.send( + "INSERT INTO 01065_window_view_event_hop_watch_bounded.mt VALUES (1, '1990/01/01 12:00:10');" + ) + client2.expect("Ok.") + client1.expect("2" + end_of_block) # send Ctrl-C client1.send("\x03", eol="") @@ -59,7 +72,9 @@ with client(name="client1>", log=log) as client1, client( if match.groups()[1]: client1.send(client1.command) client1.expect(prompt) - client1.send("DROP TABLE test.wv") + client1.send("DROP TABLE 01065_window_view_event_hop_watch_bounded.wv") client1.expect(prompt) - client1.send("DROP TABLE test.mt") + client1.send("DROP TABLE 01065_window_view_event_hop_watch_bounded.mt") + client1.expect(prompt) + client1.send("DROP DATABASE IF EXISTS 01065_window_view_event_hop_watch_bounded") client1.expect(prompt) diff --git a/tests/queries/0_stateless/01069_window_view_proc_tumble_watch.py b/tests/queries/0_stateless/01069_window_view_proc_tumble_watch.py index 4c675fcabfb..7b7d05c92db 100755 --- a/tests/queries/0_stateless/01069_window_view_proc_tumble_watch.py +++ b/tests/queries/0_stateless/01069_window_view_proc_tumble_watch.py @@ -39,12 +39,13 @@ with client(name="client1>", log=log) as client1, client( ) client1.expect(prompt) client1.send( - "CREATE WINDOW VIEW 01069_window_view_proc_tumble_watch.wv AS SELECT count(a) AS count FROM 01069_window_view_proc_tumble_watch.mt GROUP BY tumble(timestamp, INTERVAL '1' SECOND, 'US/Samoa') AS wid;" + "CREATE WINDOW VIEW 01069_window_view_proc_tumble_watch.wv ENGINE Memory AS SELECT count(a) AS count FROM 01069_window_view_proc_tumble_watch.mt GROUP BY tumble(timestamp, INTERVAL '1' SECOND, 'US/Samoa') AS wid;" ) client1.expect(prompt) client1.send("WATCH 01069_window_view_proc_tumble_watch.wv") client1.expect("Query id" + end_of_block) + client1.expect("Progress: 0.00 rows.*\)") client2.send( "INSERT INTO 01069_window_view_proc_tumble_watch.mt VALUES (1, now('US/Samoa') + 3)" ) diff --git a/tests/queries/0_stateless/01070_window_view_watch_events.py b/tests/queries/0_stateless/01070_window_view_watch_events.py index 2bf732d68e5..f8782e5e7ce 100755 --- a/tests/queries/0_stateless/01070_window_view_watch_events.py +++ b/tests/queries/0_stateless/01070_window_view_watch_events.py @@ -39,18 +39,19 @@ with client(name="client1>", log=log) as client1, client( ) client1.expect(prompt) client1.send( - "CREATE WINDOW VIEW 01070_window_view_watch_events.wv WATERMARK=ASCENDING AS SELECT count(a) AS count, tumbleEnd(wid) AS w_end FROM 01070_window_view_watch_events.mt GROUP BY tumble(timestamp, INTERVAL '5' SECOND, 'US/Samoa') AS wid" + "CREATE WINDOW VIEW 01070_window_view_watch_events.wv ENGINE Memory WATERMARK=ASCENDING AS SELECT count(a) AS count, tumbleEnd(wid) AS w_end FROM 01070_window_view_watch_events.mt GROUP BY tumble(timestamp, INTERVAL '5' SECOND, 'US/Samoa') AS wid" ) client1.expect(prompt) client1.send("WATCH 01070_window_view_watch_events.wv EVENTS") client1.expect("Query id" + end_of_block) + client1.expect("Progress: 0.00 rows.*\)") client2.send( - "INSERT INTO 01070_window_view_watch_events.mt VALUES (1, '1990/01/01 12:00:00');" + "INSERT INTO 01070_window_view_watch_events.mt VALUES (1, toDateTime('1990/01/01 12:00:00', 'US/Samoa'));" ) client2.expect("Ok.") client2.send( - "INSERT INTO 01070_window_view_watch_events.mt VALUES (1, '1990/01/01 12:00:06');" + "INSERT INTO 01070_window_view_watch_events.mt VALUES (1, toDateTime('1990/01/01 12:00:06', 'US/Samoa'));" ) client2.expect("Ok.") client1.expect("1990-01-01 12:00:05" + end_of_block) diff --git a/tests/queries/0_stateless/01073_window_view_event_tumble_to_asc_populate.reference b/tests/queries/0_stateless/01073_window_view_event_tumble_to_asc_populate.reference new file mode 100644 index 00000000000..ba7551b2578 --- /dev/null +++ b/tests/queries/0_stateless/01073_window_view_event_tumble_to_asc_populate.reference @@ -0,0 +1,17 @@ +1 1 1990-01-01 12:00:05 +1 2 1990-01-01 12:00:05 +1 3 1990-01-01 12:00:05 +1 4 1990-01-01 12:00:10 +1 5 1990-01-01 12:00:10 +1 6 1990-01-01 12:00:15 +1 7 1990-01-01 12:00:15 +------ +1 1 1990-01-01 12:00:05 +1 2 1990-01-01 12:00:05 +1 3 1990-01-01 12:00:05 +1 4 1990-01-01 12:00:10 +1 5 1990-01-01 12:00:10 +1 6 1990-01-01 12:00:15 +1 7 1990-01-01 12:00:15 +1 8 1990-01-01 12:00:35 +2 8 1990-01-01 12:00:40 diff --git a/tests/queries/0_stateless/01073_window_view_event_tumble_to_asc_populate.sh b/tests/queries/0_stateless/01073_window_view_event_tumble_to_asc_populate.sh new file mode 100755 index 00000000000..0845be093d5 --- /dev/null +++ b/tests/queries/0_stateless/01073_window_view_event_tumble_to_asc_populate.sh @@ -0,0 +1,48 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_CLIENT --multiquery </dev/null +echo "without_on_cluster_$CLICKHOUSE_TEST_UNIQUE_NAME" +$CLICKHOUSE_CLIENT --user "without_on_cluster_$CLICKHOUSE_TEST_UNIQUE_NAME" -q "CREATE DATABASE IF NOT EXISTS db_without_on_cluster_$CLICKHOUSE_TEST_UNIQUE_NAME ON CLUSTER test_shard_localhost" |& { + grep -m1 -F -o "Not enough privileges. To execute this query it's necessary to have grant CLUSTER ON *.*. (ACCESS_DENIED)" +} diff --git a/tests/queries/0_stateless/02263_lazy_mark_load.reference b/tests/queries/0_stateless/02263_lazy_mark_load.reference new file mode 100644 index 00000000000..0cfbf08886f --- /dev/null +++ b/tests/queries/0_stateless/02263_lazy_mark_load.reference @@ -0,0 +1 @@ +2 diff --git a/tests/queries/0_stateless/02263_lazy_mark_load.sh b/tests/queries/0_stateless/02263_lazy_mark_load.sh new file mode 100755 index 00000000000..bdcada6dc34 --- /dev/null +++ b/tests/queries/0_stateless/02263_lazy_mark_load.sh @@ -0,0 +1,34 @@ +#!/usr/bin/env bash +set -eo pipefail + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +QUERY_ID=$(${CLICKHOUSE_CLIENT} -q "select lower(hex(reverse(reinterpretAsString(generateUUIDv4()))))") + +${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS lazy_mark_test;" +${CLICKHOUSE_CLIENT} <&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') + +DATA_FILE=$USER_FILES_PATH/data_02293 + +$CLICKHOUSE_CLIENT -q "drop table if exists test_02293" +$CLICKHOUSE_CLIENT -q "create table test_02293 (a UInt32, b String, c Array(Tuple(Array(UInt32), String))) engine=Memory" +$CLICKHOUSE_CLIENT -q "insert into test_02293 select number, 'String', [(range(number % 3), 'String'), (range(number % 4), 'gnirtS')] from numbers(5) settings max_block_size=2" + +echo "JSONColumns" +$CLICKHOUSE_CLIENT -q "select * from test_02293 order by a format JSONColumns" +$CLICKHOUSE_CLIENT -q "select * from test_02293 order by a format JSONColumns" > $DATA_FILE +$CLICKHOUSE_CLIENT -q "desc file(data_02293, JSONColumns)" +$CLICKHOUSE_CLIENT -q "select * from file(data_02293, JSONColumns)" + +echo "JSONCompactColumns" +$CLICKHOUSE_CLIENT -q "select * from test_02293 order by a format JSONCompactColumns" +$CLICKHOUSE_CLIENT -q "select * from test_02293 order by a format JSONCompactColumns" > $DATA_FILE +$CLICKHOUSE_CLIENT -q "desc file(data_02293, JSONCompactColumns)" +$CLICKHOUSE_CLIENT -q "select * from file(data_02293, JSONCompactColumns)" + +echo "JSONColumnsWithMetadata" +$CLICKHOUSE_CLIENT -q "select sum(a) as sum, avg(a) as avg from test_02293 group by a % 4 with totals order by tuple(sum, avg) format JSONColumnsWithMetadata" --extremes=1 | grep -v "elapsed" + + +echo ' +{ + "b": [1, 2, 3], + "a": [3, 2, 1] +} +{ + "c": [1, 2, 3] +} +{ +} +{ + "a": [], + "d": [] +} +{ + "d": ["String"] +} +' > $DATA_FILE + +$CLICKHOUSE_CLIENT -q "desc file(data_02293, JSONColumns)" +$CLICKHOUSE_CLIENT -q "select * from file(data_02293, JSONColumns)" +$CLICKHOUSE_CLIENT -q "select * from file(data_02293, JSONColumns, 'a UInt32, t String')" 2>&1 | grep -F -q 'INCORRECT_DATA' && echo 'OK' || echo 'FAIL' +$CLICKHOUSE_CLIENT -q "select * from file(data_02293, JSONColumns, 'a UInt32, t String') settings input_format_skip_unknown_fields=1" + +echo ' +[ + [1, 2, 3], + [1, 2, 3] +] +[ + [1, 2, 3] +] +[ +] +[ + [], + [] +] +[ + [1], + [2], + ["String"] +] +' > $DATA_FILE + +$CLICKHOUSE_CLIENT -q "desc file(data_02293, JSONCompactColumns)" +$CLICKHOUSE_CLIENT -q "select * from file(data_02293, JSONCompactColumns)" +$CLICKHOUSE_CLIENT -q "select * from file(data_02293, JSONCompactColumns, 'a UInt32, t UInt32')" 2>&1 | grep -F -q 'INCORRECT_DATA' && echo 'OK' || echo 'FAIL' + +echo ' +{ + "a": [null, null, null], + "b": [3, 2, 1] +} +{ + "a": [1, 2, 3] +} +' > $DATA_FILE + +$CLICKHOUSE_CLIENT -q "desc file(data_02293, JSONColumns) settings input_format_max_rows_to_read_for_schema_inference=3" 2>&1 | grep -F -q 'CANNOT_EXTRACT_TABLE_STRUCTURE' && echo 'OK' || echo 'FAIL' + + diff --git a/tests/queries/0_stateless/02293_h3_distance.reference b/tests/queries/0_stateless/02293_h3_distance.reference new file mode 100644 index 00000000000..c02779b2c93 --- /dev/null +++ b/tests/queries/0_stateless/02293_h3_distance.reference @@ -0,0 +1,28 @@ +7 +7 +7 +7 +7 +7 +7 +7 +7 +7 +8 +9 +9 +9 +9 +9 +9 +9 +9 +9 +13 +13 +13 +13 +13 +13 +13 +13 diff --git a/tests/queries/0_stateless/02293_h3_distance.sql b/tests/queries/0_stateless/02293_h3_distance.sql new file mode 100644 index 00000000000..7bb35d222c0 --- /dev/null +++ b/tests/queries/0_stateless/02293_h3_distance.sql @@ -0,0 +1,43 @@ +-- Tags: no-fasttest + +DROP TABLE IF EXISTS h3_indexes; + +CREATE TABLE h3_indexes (id int, start String, end String) ENGINE = Memory; + +-- test values taken from h3 library test suite + +INSERT INTO h3_indexes VALUES (1, '830631fffffffff','830780fffffffff'); +INSERT INTO h3_indexes VALUES (2, '830631fffffffff','830783fffffffff'); +INSERT INTO h3_indexes VALUES (3, '830631fffffffff','83079dfffffffff'); +INSERT INTO h3_indexes VALUES (4, '830631fffffffff','830799fffffffff'); +INSERT INTO h3_indexes VALUES (5, '830631fffffffff','8306f5fffffffff'); +INSERT INTO h3_indexes VALUES (6, '830631fffffffff','8306e6fffffffff'); +INSERT INTO h3_indexes VALUES (7, '830631fffffffff','8306e4fffffffff'); +INSERT INTO h3_indexes VALUES (8, '830631fffffffff','830701fffffffff'); +INSERT INTO h3_indexes VALUES (9, '830631fffffffff','830700fffffffff'); +INSERT INTO h3_indexes VALUES (10, '830631fffffffff','830706fffffffff'); +INSERT INTO h3_indexes VALUES (11, '830631fffffffff','830733fffffffff'); +INSERT INTO h3_indexes VALUES (12, '8301a6fffffffff','830014fffffffff'); +INSERT INTO h3_indexes VALUES (13, '8301a6fffffffff','830033fffffffff'); +INSERT INTO h3_indexes VALUES (14, '8301a6fffffffff','830031fffffffff'); +INSERT INTO h3_indexes VALUES (15, '8301a6fffffffff','830022fffffffff'); +INSERT INTO h3_indexes VALUES (16, '8301a6fffffffff','830020fffffffff'); +INSERT INTO h3_indexes VALUES (17, '8301a6fffffffff','830024fffffffff'); +INSERT INTO h3_indexes VALUES (18, '8301a6fffffffff','830120fffffffff'); +INSERT INTO h3_indexes VALUES (19, '8301a6fffffffff','830124fffffffff'); +INSERT INTO h3_indexes VALUES (20, '8301a6fffffffff','8308cdfffffffff'); +INSERT INTO h3_indexes VALUES (21, '8301a5fffffffff','831059fffffffff'); +INSERT INTO h3_indexes VALUES (22, '8301a5fffffffff','830b2dfffffffff'); +INSERT INTO h3_indexes VALUES (23, '8301a5fffffffff','830b29fffffffff'); +INSERT INTO h3_indexes VALUES (24, '8301a5fffffffff','830b76fffffffff'); +INSERT INTO h3_indexes VALUES (25, '8301a5fffffffff','830b43fffffffff'); +INSERT INTO h3_indexes VALUES (26, '8301a5fffffffff','830b4efffffffff'); +INSERT INTO h3_indexes VALUES (27, '8301a5fffffffff','830b48fffffffff'); +INSERT INTO h3_indexes VALUES (28, '8301a5fffffffff','830b49fffffffff'); + + +SELECT h3Distance(stringToH3(start), stringToH3(end)) FROM h3_indexes ORDER BY id; + + +DROP TABLE h3_indexes; + diff --git a/tests/queries/0_stateless/02293_h3_hex_ring.reference b/tests/queries/0_stateless/02293_h3_hex_ring.reference new file mode 100644 index 00000000000..1f1175b0c7f --- /dev/null +++ b/tests/queries/0_stateless/02293_h3_hex_ring.reference @@ -0,0 +1,17 @@ +[581276613233082367] +[580995138256371711,581144671837749247,581166662070304767,581171060116815871,581267817140060159,581272215186571263,581276613233082367,581531699930726399,581536097977237503,581558088209793023,581747204209770495,581764796395814911] +[581250224954015743,581259021047037951,581267817140060159,581500913605148671,581518505791193087,581764796395814911] +[589624655266971647,589625205022785535,589626854290227199,589642797208829951,589644996232085503,589708767906496511,589709317662310399,589709867418124287,589714815220449279,589715914732077055,589725810336727039,589727459604168703,589728009359982591,589735156185563135,589736255697190911,589742303011143679,589744502034399231,589745051790213119] +[594053281945223167,594053419384176639,594054106578943999,594054244017897471,594054450176327679,594055343529525247,594055618407432191,594064277061500927,594064345780977663,594071698764988415,594071767484465151,594072111081848831,594072317240279039,594072523398709247,594072592118185983,594079532785336319,594079807663243263,594081044613824511,594081113333301247,594081319491731455,594081594369638399,594081731808591871,594081937967022079,594082762600742911] +[598346591383846911,598346814722146303,598346840491950079,598346849081884671,598346960751034367,598346977930903551,598348884896382975,598348910666186751,598348919256121343,598349168364224511,598349176954159103,598356710326796287,598356718916730879,598356761866403839,598356779046273023,598356796226142207,598356821995945983,598371905921089535,598371923100958719,598372687605137407,598372704785006591,598372807864221695,598372842223960063,598373580958334975,598373589548269567,598373830066438143,598373864426176511,598374783549177855,598374792139112447,598374800729047039] +[599542260886929407,599542265181896703,599542294172925951,599542297394151423,599542298467893247,599542329606406143,599542330680147967,599542331753889791,599542337122598911,599542338196340735,599542343565049855,599542350007500799,599542351081242623,599542599115603967,599542601263087615,599542612000505855,599542614147989503,599542617369214975,599542630254116863,599542632401600511,599542642065276927,599542643139018751,599547512558190591,599547514705674239,599547525443092479,599547527590576127,599547530811801599,599549536561528831,599549540856496127,599549546225205247,599549550520172543,599549569847525375,599549573068750847,599549576289976319,599549838282981375,599549839356723199] +[604296028669083647,604296029205954559,604296036587929599,604296036722147327,604296037124800511,604296037259018239,604296037930106879,604296273348001791,604296273750654975,604296273884872703,604296277777186815,604296333477543935,604296333880197119,604296334014414847,604296337235640319,604296337369858047,604296337772511231,604296337906728959,604296338577817599,604296347704623103,604296348241494015,604296351999590399,604296352133808127,604296356294557695,604296356831428607,604296358173605887,604296358442041343,604296358844694527,604296360857960447,604296361126395903,604296363676532735,604296363944968191,604296364213403647,604296369179459583,604296369313677311,604296383943409663,604296384211845119,604296384614498303,604296386627764223,604296386896199679,604296389446336511,604296389714771967] +[608784291018571775,608784291035348991,608784291119235071,608784291136012287,608784291219898367,608784292931174399,608784292964728831,608784293132500991,608784293166055423,608784293216387071,608784293417713663,608784293451268095,608785191854407679,608785191904739327,608785191921516543,608785192424833023,608785192491941887,608785193750233087,608785193783787519,608785194203217919,608785194236772351,608785194404544511,608785194438098943,608785194488430591,608785196182929407,608785196199706623,608785198632402943,608785198649180159,608785198716289023,608785198783397887,608785209319489535,608785209386598399,608785209436930047,608785209453707263,608785209537593343,608785210410008575,608785210426785791,608785210510671871,608785210594557951,608785210611335167,608785210711998463,608785210728775679,608785213195026431,608785213245358079,608785213262135295,608785213614456831,608785213765451775,608785213832560639] +[615732152056676351,615732152062967807,615732152065064959,615732152115396607,615732152117493759,615732152125882367,615732152134270975,615732152593547263,615732152599838719,615732152601935871,615732152648073215,615732152656461823,615732152677433343,615732152681627647,615732152687919103,615732189809606655,615732191669780479,615732191673974783,615732191678169087,615732191701237759,615732191705432063,615732191711723519,615732191923535871,615732191925633023,615732191938215935,615732191942410239,615732191978061823,615732191980158975,615732192020004863,615732192022102015,615732192032587775,615732192129056767,615732192133251071,615732192206651391,615732192215039999,615732192263274495,615732192265371647,615732192271663103,615732192273760255,615732192284246015,615732192359743487,615732192368132095,615732192422658047,615732192428949503,615732192431046655,615732196117839871,615732196119937023,615732196356915199,615732196361109503,615732196432412671,615732196434509823,615732196436606975,615732196451287039,615732196455481343] +[617056792082120703,617056792083169279,617056793998917631,617056793999179775,617056794000490495,617056794000752639,617056794002063359,617056794003636223,617056794003898367,617056794015170559,617056794015694847,617056794016481279,617056794020413439,617056794020675583,617056794478903295,617056794479951871,617056794487029759,617056794487816191,617056794488078335,617056794497515519,617056794498301951,617056794498564095,617056794506428415,617056794507476991,617056794527924223,617056794528186367,617056794528448511,617056794531069951,617056794531594239,617056794532380671,617056794537885695,617056794538147839,617056794544701439,617056794544963583,617056794546012159,617056794547060735,617056794554662911,617056794554925055,617056794556235775,617056794556497919,617056794557808639,617056794561478655,617056794562002943,617056794564624383,617056794565410815,617056794565935103,617056794569080831,617056794569605119,617056794666860543,617056794699890687,617056794700414975,617056794703036415,617056794703560703,617056794704347135,617056794707492863,617056794708017151,617056794755727359,617056794756775935,617056794822836223,617056794823884799] +[624586471612907519,624586471613005823,624586471613038591,624586471613759487,624586471613890559,624586471613988863,624586471614119935,624586471619330047,624586471619395583,624586471620280319,624586471620411391,624586471620804607,624586471620870143,624586471621951487,624586471621984255,624586471622148095,624586471622279167,624586471622934527,624586471622967295,624586471647903743,624586471647936511,624586471655374847,624586471655407615,624586471655571455,624586471655702527,624586471655735295,624586471657078783,624586471657144319,624586471658258431,624586471658323967,624586471658422271,624586471658553343,624586471658618879,624586471660486655,624586471660519423,624586475478614015,624586475478646783,624586475478745087,624586475478777855,624586475478941695,624586475479597055,624586475479629823,624586477870874623,624586477871005695,624586477871366143,624586477871497215,624586477871890431,624586477871955967,624586477872250879,624586477872316415,624586477877166079,624586477877297151,624586477877657599,624586477877788671,624586477877952511,624586477877985279,624586477878149119,624586477878247423,624586477878280191,624586477882376191,624586477882441727,624586477882736639,624586477882802175,624586477882867711,624586477882933247,624586477883031551] +[627882919482134527,627882919482138623,627882919482159103,627882919482200063,627882919482208255,627882919482220543,627882919482675199,627882919482683391,627882919482724351,627882919482732543,627882919482744831,627882919482793983,627882919482802175,627882919484207103,627882919484223487,627882919484297215,627882919484309503,627882919484313599,627882919484399615,627882919484415999,627882919484993535,627882919484997631,627882919485018111,627882919485022207,627882919485042687,627882919485067263,627882919485071359,627882919485255679,627882919485329407,627882919485333503,627882919485345791,627882919485358079,627882919485362175,627882919521722367,627882919521738751,627882919521849343,627882919521861631,627882919521865727,627882919522021375,627882919522037759,627882919522054143,627882919522058239,627882919522652159,627882919522656255,627882919522770943,627882919522787327,627882919522902015,627882919522910207,627882919523069951,627882919523086335,627882919538020351,627882919538028543,627882919538139135,627882919538147327,627882919538155519,627882919540690943,627882919540711423,627882919540715519,627882919541714943,627882919541719039,627882919541952511,627882919541960703,627882919542001663,627882919542009855,627882919542022143,627882919542071295,627882919542079487,627882919542263807,627882919542267903,627882919542288383,627882919542312959,627882919542317055] +[634600058495592959,634600058495593983,634600058495648255,634600058495649279,634600058495650815,634600058495658495,634600058495659519,634600058503304191,634600058503306239,634600058503341567,634600058503343615,634600058503356927,634600058503358463,634600058503358975,634600058503402495,634600058503404543,634600058503410687,634600058503411711,634600058503431679,634600058503432191,634600058503444991,634600058503445503,634600058503455231,634600058503455743,634600058503458303,634600058503472639,634600058503474687,634600058503487999,634600058503489535,634600058503491071,634600058504447487,634600058504447999,634600058504460799,634600058504461311,634600058504471551,634600058504472575,634600058507233791,634600058507234303,634600058507235839,634600058507236351,634600058507238911,634600058507249151,634600058507249663,634600058507280895,634600058507281919,634600058507283455,634600058507408383,634600058507409407,634600058507430399,634600058507430911,634600058507432447,634600058507432959,634600058507435519,634600058507444735,634600058507446271,634600058507446783,634600058508281343,634600058508282367,634600058508283391,634600058508289023,634600058508291071,634600058508300799,634600058508301823,634600058508303359,634600058508379647,634600058508381695,634600058508387327,634600058508389375,634600058508395007,634600058508396543,634600058508397055,634600058508446207,634600058508447231,634600058508464639,634600058508465663,634600058508467199,634600058508469247,634600058508470271] +[635544851676508863,635544851676508927,635544851676509247,635544851676509311,635544851676509375,635544851676510015,635544851676510079,635544851676961407,635544851676961663,635544851676977791,635544851676978047,635544851677184319,635544851677184447,635544851677185087,635544851677185215,635544851677185407,635544851677186175,635544851677186303,635544851677192511,635544851677192639,635544851677193279,635544851677193599,635544851677193663,635544851677194367,635544851677194495,635544851677203071,635544851677203327,635544851677205567,635544851677205695,635544851677205823,635544851677207743,635544851677207999,635544851677266623,635544851677266687,635544851677267007,635544851677267071,635544851677267263,635544851677267775,635544851677267839,635544851677269183,635544851677269439,635544851677270719,635544851677270783,635544851677271103,635544851677271167,635544851677271487,635544851677271871,635544851677271935,635544851677316159,635544851677316351,635544851677316543,635544851677325951,635544851677326207,635544851677328447,635544851677328639,635544851677328703,635544851677330623,635544851677330879,635544851677340351,635544851677340415,635544851677340735,635544851677340799,635544851677341119,635544851677341503,635544851677341567,635544851677389887,635544851677390079,635544851677390143,635544851677392063,635544851677392319,635544851677397311,635544851677397439,635544851677398079,635544851677398207,635544851677398399,635544851677399167,635544851677399295,635544851677405503,635544851677405631,635544851677406271,635544851677406463,635544851677406591,635544851677407359,635544851677407487] +[639763125756235855,639763125756235887,639763125756236439,639763125756236471,639763125756236679,639763125756236703,639763125756236711,639763125756237391,639763125756237423,639763125756238487,639763125756238519,639763125756238599,639763125756238615,639763125756238639,639763125756238727,639763125756238751,639763125756238759,639763125756266535,639763125756266551,639763125756266823,639763125756266839,639763125756266863,639763125756266895,639763125756266911,639763125756267559,639763125756267575,639763125756267847,639763125756267863,639763125756267887,639763125756267911,639763125756267919,639763125756267959,639763125756286007,639763125756286215,639763125756286239,639763125756286247,639763125756286351,639763125756286367,639763125756288023,639763125756288055,639763125756288143,639763125756288175,639763125756288263,639763125756288287,639763125756288295,639763125756291095,639763125756291103,639763125756291303,639763125756291311,639763125756291463,639763125756291471,639763125756291495,639763125756291607,639763125756291615,639763125756291815,639763125756291823,639763125756291975,639763125756291983,639763125756292023,639763125756403919,639763125756403935,639763125756404263,639763125756404271,639763125756404295,639763125756404303,639763125756404343,639763125756404503,639763125756404511,639763125756404647,639763125756404663,639763125756404943,639763125756404959,639763125756405511,639763125756405527,639763125756405551,639763125756405671,639763125756405687,639763125756408871,639763125756408879,639763125756408903,639763125756408911,639763125756408951,639763125756409111,639763125756409119,639763125756409367,639763125756409399,639763125756409487,639763125756409519,639763125756409623,639763125756409631] +[644178757620498449,644178757620498453,644178757620498458,644178757620498462,644178757620498480,644178757620498483,644178757620498484,644178757620498628,644178757620498629,644178757620498666,644178757620498667,644178757620498672,644178757620498673,644178757620498676,644178757620498705,644178757620498709,644178757620498714,644178757620498718,644178757620498736,644178757620498739,644178757620498740,644178757620498948,644178757620498977,644178757620498979,644178757620498992,644178757620498993,644178757620498998,644178757620499268,644178757620499270,644178757620499280,644178757620499282,644178757620499285,644178757620499297,644178757620499299,644178757620499332,644178757620499333,644178757620499370,644178757620499371,644178757620499376,644178757620499377,644178757620499382,644178757620500492,644178757620500494,644178757620500504,644178757620500506,644178757620500509,644178757620500521,644178757620500523,644178757620500620,644178757620500622,644178757620500632,644178757620500634,644178757620500637,644178757620500649,644178757620500651,644178757620500810,644178757620500811,644178757620500824,644178757620500826,644178757620500829,644178757620501064,644178757620501067,644178757620501068,644178757620501089,644178757620501093,644178757620501098,644178757620501102,644178757620501320,644178757620501323,644178757620501324,644178757620501345,644178757620501349,644178757620501354,644178757620501358,644178757620505988,644178757620505990,644178757620506000,644178757620506002,644178757620506005,644178757620506017,644178757620506019,644178757620506021,644178757620520074,644178757620520075,644178757620520080,644178757620520081,644178757620520086,644178757620520092,644178757620520093,644178757620520138,644178757620520139,644178757620520144,644178757620520145,644178757620520150,644178757620520156,644178757620520157] diff --git a/tests/queries/0_stateless/02293_h3_hex_ring.sql b/tests/queries/0_stateless/02293_h3_hex_ring.sql new file mode 100644 index 00000000000..5651f5ce557 --- /dev/null +++ b/tests/queries/0_stateless/02293_h3_hex_ring.sql @@ -0,0 +1,35 @@ +-- Tags: no-fasttest + +SELECT h3HexRing(581276613233082367, toUInt16(0)); +SELECT h3HexRing(579205132326352334, toUInt16(1)) as hexRing; -- { serverError 117 } +SELECT h3HexRing(581276613233082367, -1); -- { serverError 43 } +SELECT h3HexRing(581276613233082367, toUInt16(-1)); -- { serverError 12 } + +DROP TABLE IF EXISTS h3_indexes; + +-- Test h3 indices and k selected from original test fixture: https://github.com/uber/h3/blob/master/src/apps/testapps + +CREATE TABLE h3_indexes (h3_index UInt64, k UInt16) ENGINE = Memory; + + +INSERT INTO h3_indexes VALUES (581276613233082367,1); +INSERT INTO h3_indexes VALUES (581263419093549055,2); +INSERT INTO h3_indexes VALUES (589753847883235327,3); +INSERT INTO h3_indexes VALUES (594082350283882495,4); +INSERT INTO h3_indexes VALUES (598372386957426687,5); +INSERT INTO h3_indexes VALUES (599542359671177215,6); +INSERT INTO h3_indexes VALUES (604296355086598143,7); +INSERT INTO h3_indexes VALUES (608785214872748031,8); +INSERT INTO h3_indexes VALUES (615732192485572607,9); +INSERT INTO h3_indexes VALUES (617056794467368959,10); +INSERT INTO h3_indexes VALUES (624586477873168383,11); +INSERT INTO h3_indexes VALUES (627882919484481535,12); +INSERT INTO h3_indexes VALUES (634600058503392255,13); +INSERT INTO h3_indexes VALUES (635544851677385791,14); +INSERT INTO h3_indexes VALUES (639763125756281263,15); +INSERT INTO h3_indexes VALUES (644178757620501158,16); + + +SELECT arraySort(h3HexRing(h3_index, k)) FROM h3_indexes ORDER BY h3_index; + +DROP TABLE h3_indexes; diff --git a/tests/queries/0_stateless/02293_h3_line.reference b/tests/queries/0_stateless/02293_h3_line.reference new file mode 100644 index 00000000000..1211776b7f1 --- /dev/null +++ b/tests/queries/0_stateless/02293_h3_line.reference @@ -0,0 +1,28 @@ +[590080540275638271,590080471556161535,590080883873021951,590106516237844479,590104385934065663,590103630019821567,590103561300344831] +[590080540275638271,590080471556161535,590080608995115007,590104454653542399,590104385934065663,590104523373019135,590103767458775039] +[590080540275638271,590080471556161535,590080608995115007,590104454653542399,590104111056158719,590104523373019135,590105554165170175] +[590080540275638271,590080677714591743,590080608995115007,590104179775635455,590104317214588927,590104248495112191,590105279287263231] +[590080540275638271,590077585338138623,590077310460231679,590079097166626815,590078822288719871,590079028447150079,590094009293078527] +[590080540275638271,590077585338138623,590077310460231679,590079097166626815,590079165886103551,590078891008196607,590092978500927487] +[590080540275638271,590077585338138623,590077173021278207,590077379179708415,590079165886103551,590077860216045567,590092841061974015] +[590080540275638271,590080815153545215,590079784361394175,590096483194241023,590096758072147967,590095727279996927,590094833926799359] +[590080540275638271,590080815153545215,590096620633194495,590096414474764287,590096758072147967,590094971365752831,590094765207322623] +[590080540275638271,590080815153545215,590096620633194495,590096414474764287,590096689352671231,590094902646276095,590095177524183039] +[590080540275638271,590080815153545215,590096620633194495,590096414474764287,590096826791624703,590095933438427135,590096208316334079,590098269900636159] +[590000619524194303,590000344646287359,590000413365764095,589998351781462015,590000894402101247,589998832817799167,589998901537275903,589998626659368959,589972994294546431] +[590000619524194303,590000207207333887,590000413365764095,590001169280008191,590000894402101247,590000963121577983,589975330756755455,589975055878848511,589975124598325247] +[590000619524194303,590000207207333887,590000413365764095,590001169280008191,590000756963147775,590000825682624511,589975330756755455,589974918439895039,589974987159371775] +[590000619524194303,590000207207333887,590000275926810623,590001031841054719,590000756963147775,590000825682624511,589975193317801983,589975262037278719,589973956367220735] +[590000619524194303,590000207207333887,590000275926810623,590001031841054719,590001100560531455,589990998797451263,589991067516927999,589974231245127679,589973818928267263] +[590000619524194303,590000207207333887,590000275926810623,590001031841054719,589990517761114111,589991273675358207,589990861358497791,589990930077974527,589974093806174207] +[590000619524194303,590000482085240831,590277902612824063,590278177490731007,589992648064892927,589992716784369663,589992579345416191,589991548553265151,589991411114311679] +[590000619524194303,590000482085240831,590277902612824063,590278177490731007,589992648064892927,589992510625939455,589992854223323135,589991823431172095,589991685992218623] +[590000619524194303,590000482085240831,590277902612824063,590278177490731007,590278108771254271,589992922942799871,589992785503846399,590126170008190975,590126444886097919] +[590000550804717567,590000207207333887,590000619524194303,590001650316345343,590001581596868607,590001719035822079,590259760670965759,590260654024163327,590260379146256383,590257561647710207,590258455000907775,590259485793058815,590259210915151871] +[590000550804717567,590000207207333887,590000619524194303,590001650316345343,590001306718961663,590001719035822079,590260516585209855,590260241707302911,590260379146256383,590258317561954303,590258042684047359,590258180123000831,590168226327953407] +[590000550804717567,590000207207333887,590000619524194303,590001650316345343,590001306718961663,590001444157915135,590260516585209855,590260241707302911,590260447865733119,590258386281431039,590258042684047359,590258248842477567,590167951450046463] +[590000550804717567,590000207207333887,590000344646287359,590001650316345343,590001306718961663,590001444157915135,590260585304686591,590260310426779647,590172005899173887,590258386281431039,590258111403524095,590173105410801663,590173242849755135] +[590000550804717567,590000207207333887,590000344646287359,589998283061985279,589998420500938751,589999451293089791,589999107695706111,589999313854136319,590172555654987775,590172693093941247,590169875595395071,590169600717488127,590169738156441599] +[590000550804717567,590000207207333887,590000413365764095,589998283061985279,589998420500938751,589998145623031807,589999176415182847,589999313854136319,590172624374464511,590172280777080831,590172418216034303,590170356631732223,590170494070685695] +[590000550804717567,590000207207333887,590000413365764095,589998283061985279,589998008184078335,589998145623031807,589999176415182847,590000069768380415,590172624374464511,590172349496557567,590172486935511039,590170425351208959,590170081753825279] +[590000550804717567,590000207207333887,590000413365764095,589998283061985279,589998008184078335,589998145623031807,589999932329426943,590000069768380415,589999794890473471,590172349496557567,589984126849777663,590170425351208959,590170150473302015] diff --git a/tests/queries/0_stateless/02293_h3_line.sql b/tests/queries/0_stateless/02293_h3_line.sql new file mode 100644 index 00000000000..01b1a84ef22 --- /dev/null +++ b/tests/queries/0_stateless/02293_h3_line.sql @@ -0,0 +1,44 @@ +-- Tags: no-fasttest + +DROP TABLE IF EXISTS h3_indexes; + +CREATE TABLE h3_indexes (id int, start String, end String) ENGINE = Memory; + +-- test values taken from h3 library test suite + +INSERT INTO h3_indexes VALUES (1, '830631fffffffff','830780fffffffff'); +INSERT INTO h3_indexes VALUES (2, '830631fffffffff','830783fffffffff'); +INSERT INTO h3_indexes VALUES (3, '830631fffffffff','83079dfffffffff'); +INSERT INTO h3_indexes VALUES (4, '830631fffffffff','830799fffffffff'); +INSERT INTO h3_indexes VALUES (5, '830631fffffffff','8306f5fffffffff'); +INSERT INTO h3_indexes VALUES (6, '830631fffffffff','8306e6fffffffff'); +INSERT INTO h3_indexes VALUES (7, '830631fffffffff','8306e4fffffffff'); +INSERT INTO h3_indexes VALUES (8, '830631fffffffff','830701fffffffff'); +INSERT INTO h3_indexes VALUES (9, '830631fffffffff','830700fffffffff'); +INSERT INTO h3_indexes VALUES (10, '830631fffffffff','830706fffffffff'); +INSERT INTO h3_indexes VALUES (11, '830631fffffffff','830733fffffffff'); +INSERT INTO h3_indexes VALUES (12, '8301a6fffffffff','830014fffffffff'); +INSERT INTO h3_indexes VALUES (13, '8301a6fffffffff','830033fffffffff'); +INSERT INTO h3_indexes VALUES (14, '8301a6fffffffff','830031fffffffff'); +INSERT INTO h3_indexes VALUES (15, '8301a6fffffffff','830022fffffffff'); +INSERT INTO h3_indexes VALUES (16, '8301a6fffffffff','830020fffffffff'); +INSERT INTO h3_indexes VALUES (17, '8301a6fffffffff','830024fffffffff'); +INSERT INTO h3_indexes VALUES (18, '8301a6fffffffff','830120fffffffff'); +INSERT INTO h3_indexes VALUES (19, '8301a6fffffffff','830124fffffffff'); +INSERT INTO h3_indexes VALUES (20, '8301a6fffffffff','8308cdfffffffff'); +INSERT INTO h3_indexes VALUES (21, '8301a5fffffffff','831059fffffffff'); +INSERT INTO h3_indexes VALUES (22, '8301a5fffffffff','830b2dfffffffff'); +INSERT INTO h3_indexes VALUES (23, '8301a5fffffffff','830b29fffffffff'); +INSERT INTO h3_indexes VALUES (24, '8301a5fffffffff','830b76fffffffff'); +INSERT INTO h3_indexes VALUES (25, '8301a5fffffffff','830b43fffffffff'); +INSERT INTO h3_indexes VALUES (26, '8301a5fffffffff','830b4efffffffff'); +INSERT INTO h3_indexes VALUES (27, '8301a5fffffffff','830b48fffffffff'); +INSERT INTO h3_indexes VALUES (28, '8301a5fffffffff','830b49fffffffff'); + + +SELECT h3Line(stringToH3(start), stringToH3(end)) FROM h3_indexes ORDER BY id; + +SELECT h3Line(0xffffffffffffff, 0xffffffffffffff); -- { serverError 117 } + +DROP TABLE h3_indexes; + diff --git a/tests/queries/0_stateless/02293_hashid.reference b/tests/queries/0_stateless/02293_hashid.reference new file mode 100644 index 00000000000..9ae4cce3944 --- /dev/null +++ b/tests/queries/0_stateless/02293_hashid.reference @@ -0,0 +1,11 @@ +0 gY +1 jR +2 k5 +3 l5 +4 mO +0 pbgkmdljlpjoapne +1 akemglnjepjpodba +2 obmgndljgajpkeao +3 dldokmpjpgjgeanb +4 nkdlpgajngjnobme +YQrvD5XGvbx diff --git a/tests/queries/0_stateless/02293_hashid.sql b/tests/queries/0_stateless/02293_hashid.sql new file mode 100644 index 00000000000..145bd76ccbf --- /dev/null +++ b/tests/queries/0_stateless/02293_hashid.sql @@ -0,0 +1,5 @@ +SET allow_experimental_hash_functions = 1; + +select number, hashid(number) from system.numbers limit 5; +select number, hashid(number, 's3cr3t', 16, 'abcdefghijklmnop') from system.numbers limit 5; +select hashid(1234567890123456, 's3cr3t'); diff --git a/tests/queries/0_stateless/02294_system_certificates.reference b/tests/queries/0_stateless/02294_system_certificates.reference new file mode 100644 index 00000000000..4655f996c40 --- /dev/null +++ b/tests/queries/0_stateless/02294_system_certificates.reference @@ -0,0 +1,10 @@ +version Int32 +serial_number Nullable(String) +signature_algo Nullable(String) +issuer Nullable(String) +not_before Nullable(String) +not_after Nullable(String) +subject Nullable(String) +pkey_algo Nullable(String) +path String +default UInt8 diff --git a/tests/queries/0_stateless/02294_system_certificates.sql b/tests/queries/0_stateless/02294_system_certificates.sql new file mode 100644 index 00000000000..1fb70f52401 --- /dev/null +++ b/tests/queries/0_stateless/02294_system_certificates.sql @@ -0,0 +1,2 @@ +# Check table structure +DESCRIBE system.certificates; diff --git a/tests/queries/0_stateless/02295_GROUP_BY_AggregateFunction.reference b/tests/queries/0_stateless/02295_GROUP_BY_AggregateFunction.reference new file mode 100644 index 00000000000..68355daf334 --- /dev/null +++ b/tests/queries/0_stateless/02295_GROUP_BY_AggregateFunction.reference @@ -0,0 +1,30 @@ +-- { echoOn } +SELECT grp_aggreg FROM data_02295 GROUP BY a, grp_aggreg SETTINGS optimize_aggregation_in_order = 0 FORMAT JSONEachRow; +{"grp_aggreg":"\u0002\u0002\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0003\u0000\u0000\u0000\u0000\u0000\u0000\u0000"} +{"grp_aggreg":"\u0002\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0001\u0000\u0000\u0000\u0000\u0000\u0000\u0000"} +SELECT grp_aggreg FROM data_02295 GROUP BY a, grp_aggreg SETTINGS optimize_aggregation_in_order = 1 FORMAT JSONEachRow; +{"grp_aggreg":"\u0002\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0001\u0000\u0000\u0000\u0000\u0000\u0000\u0000"} +{"grp_aggreg":"\u0002\u0002\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0003\u0000\u0000\u0000\u0000\u0000\u0000\u0000"} +SELECT grp_aggreg FROM data_02295 GROUP BY a, grp_aggreg WITH TOTALS SETTINGS optimize_aggregation_in_order = 0 FORMAT JSONEachRow; +{"grp_aggreg":"\u0002\u0002\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0003\u0000\u0000\u0000\u0000\u0000\u0000\u0000"} +{"grp_aggreg":"\u0002\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0001\u0000\u0000\u0000\u0000\u0000\u0000\u0000"} +SELECT grp_aggreg FROM data_02295 GROUP BY a, grp_aggreg WITH TOTALS SETTINGS optimize_aggregation_in_order = 1 FORMAT JSONEachRow; +{"grp_aggreg":"\u0002\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0001\u0000\u0000\u0000\u0000\u0000\u0000\u0000"} +{"grp_aggreg":"\u0002\u0002\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0003\u0000\u0000\u0000\u0000\u0000\u0000\u0000"} +-- regression for incorrect positions passed to finalizeChunk() +SELECT a, min(b), max(b) FROM data_02295 GROUP BY a ORDER BY a, count() SETTINGS optimize_aggregation_in_order = 1; +0 0 0 +1 0 0 +SELECT a, min(b), max(b) FROM data_02295 GROUP BY a ORDER BY a, count() SETTINGS optimize_aggregation_in_order = 1, max_threads = 1; +0 0 0 +1 0 0 +SELECT a, min(b), max(b) FROM data_02295 GROUP BY a WITH TOTALS ORDER BY a, count() SETTINGS optimize_aggregation_in_order = 1; +0 0 0 +1 0 0 + +0 0 0 +SELECT a, min(b), max(b) FROM data_02295 GROUP BY a WITH TOTALS ORDER BY a, count() SETTINGS optimize_aggregation_in_order = 1, max_threads = 1; +0 0 0 +1 0 0 + +0 0 0 diff --git a/tests/queries/0_stateless/02295_GROUP_BY_AggregateFunction.sql b/tests/queries/0_stateless/02295_GROUP_BY_AggregateFunction.sql new file mode 100644 index 00000000000..d4e9369c1c8 --- /dev/null +++ b/tests/queries/0_stateless/02295_GROUP_BY_AggregateFunction.sql @@ -0,0 +1,24 @@ +drop table if exists data_02295; + +create table data_02295 ( + -- the order of "a" and "b" is important here + -- (since finalizeChunk() accepts positions and they may be wrong) + b Int64, + a Int64, + grp_aggreg AggregateFunction(groupArrayArray, Array(UInt64)) +) engine = MergeTree() order by a; +insert into data_02295 select 0 b, intDiv(number, 2) a, groupArrayArrayState([toUInt64(number)]) from numbers(4) group by a, b; + +-- { echoOn } +SELECT grp_aggreg FROM data_02295 GROUP BY a, grp_aggreg SETTINGS optimize_aggregation_in_order = 0 FORMAT JSONEachRow; +SELECT grp_aggreg FROM data_02295 GROUP BY a, grp_aggreg SETTINGS optimize_aggregation_in_order = 1 FORMAT JSONEachRow; +SELECT grp_aggreg FROM data_02295 GROUP BY a, grp_aggreg WITH TOTALS SETTINGS optimize_aggregation_in_order = 0 FORMAT JSONEachRow; +SELECT grp_aggreg FROM data_02295 GROUP BY a, grp_aggreg WITH TOTALS SETTINGS optimize_aggregation_in_order = 1 FORMAT JSONEachRow; +-- regression for incorrect positions passed to finalizeChunk() +SELECT a, min(b), max(b) FROM data_02295 GROUP BY a ORDER BY a, count() SETTINGS optimize_aggregation_in_order = 1; +SELECT a, min(b), max(b) FROM data_02295 GROUP BY a ORDER BY a, count() SETTINGS optimize_aggregation_in_order = 1, max_threads = 1; +SELECT a, min(b), max(b) FROM data_02295 GROUP BY a WITH TOTALS ORDER BY a, count() SETTINGS optimize_aggregation_in_order = 1; +SELECT a, min(b), max(b) FROM data_02295 GROUP BY a WITH TOTALS ORDER BY a, count() SETTINGS optimize_aggregation_in_order = 1, max_threads = 1; +-- { echoOff } + +drop table data_02295; diff --git a/tests/queries/0_stateless/02302_defaults_in_columnar_formats.reference b/tests/queries/0_stateless/02302_defaults_in_columnar_formats.reference new file mode 100644 index 00000000000..9de3c47b3b2 --- /dev/null +++ b/tests/queries/0_stateless/02302_defaults_in_columnar_formats.reference @@ -0,0 +1,3 @@ +1 42 43 +1 42 43 +1 42 43 diff --git a/tests/queries/0_stateless/02302_defaults_in_columnar_formats.sql b/tests/queries/0_stateless/02302_defaults_in_columnar_formats.sql new file mode 100644 index 00000000000..5946f2d37e5 --- /dev/null +++ b/tests/queries/0_stateless/02302_defaults_in_columnar_formats.sql @@ -0,0 +1,8 @@ +-- Tags: no-fasttest, no-parallel + +insert into function file(data_02302.parquet) select 1 as x settings engine_file_truncate_on_insert=1; +select * from file(data_02302.parquet, auto, 'x UInt8, y default 42, z default x + y') settings input_format_parquet_allow_missing_columns=1; +insert into function file(data_02302.orc) select 1 as x settings engine_file_truncate_on_insert=1; +select * from file(data_02302.orc, auto, 'x UInt8, y default 42, z default x + y') settings input_format_orc_allow_missing_columns=1; +insert into function file(data_02302.arrow) select 1 as x settings engine_file_truncate_on_insert=1; +select * from file(data_02302.arrow, auto, 'x UInt8, y default 42, z default x + y') settings input_format_arrow_allow_missing_columns=1; diff --git a/tests/queries/0_stateless/02302_projections_GROUP_BY_ORDERY_BY_optimize_aggregation_in_order.reference b/tests/queries/0_stateless/02302_projections_GROUP_BY_ORDERY_BY_optimize_aggregation_in_order.reference new file mode 100644 index 00000000000..0a83fa24d49 --- /dev/null +++ b/tests/queries/0_stateless/02302_projections_GROUP_BY_ORDERY_BY_optimize_aggregation_in_order.reference @@ -0,0 +1,13 @@ +-- { echoOn } +select x + y, sum(x - y) as s from test_agg_proj_02302 group by x + y order by s desc limit 5 settings allow_experimental_projection_optimization=1, optimize_aggregation_in_order=0, optimize_read_in_order=0; +15 480 +14 450 +13 420 +12 390 +11 360 +select x + y, sum(x - y) as s from test_agg_proj_02302 group by x + y order by s desc limit 5 settings allow_experimental_projection_optimization=1, optimize_aggregation_in_order=1, optimize_read_in_order=1; +15 480 +14 450 +13 420 +12 390 +11 360 diff --git a/tests/queries/0_stateless/02302_projections_GROUP_BY_ORDERY_BY_optimize_aggregation_in_order.sql b/tests/queries/0_stateless/02302_projections_GROUP_BY_ORDERY_BY_optimize_aggregation_in_order.sql new file mode 100644 index 00000000000..be050cc3080 --- /dev/null +++ b/tests/queries/0_stateless/02302_projections_GROUP_BY_ORDERY_BY_optimize_aggregation_in_order.sql @@ -0,0 +1,13 @@ +-- Tags: no-s3-storage + +drop table if exists test_agg_proj_02302; + +create table test_agg_proj_02302 (x Int32, y Int32, PROJECTION x_plus_y (select sum(x - y), argMax(x, y) group by x + y)) ENGINE = MergeTree order by tuple() settings index_granularity = 1; +insert into test_agg_proj_02302 select intDiv(number, 2), -intDiv(number,3) - 1 from numbers(100); + +-- { echoOn } +select x + y, sum(x - y) as s from test_agg_proj_02302 group by x + y order by s desc limit 5 settings allow_experimental_projection_optimization=1, optimize_aggregation_in_order=0, optimize_read_in_order=0; +select x + y, sum(x - y) as s from test_agg_proj_02302 group by x + y order by s desc limit 5 settings allow_experimental_projection_optimization=1, optimize_aggregation_in_order=1, optimize_read_in_order=1; + +-- { echoOff } +drop table test_agg_proj_02302; diff --git a/tests/queries/0_stateless/02303_cast_nullable_to_custom_types.reference b/tests/queries/0_stateless/02303_cast_nullable_to_custom_types.reference new file mode 100644 index 00000000000..159ee805f26 --- /dev/null +++ b/tests/queries/0_stateless/02303_cast_nullable_to_custom_types.reference @@ -0,0 +1,32 @@ +\N +\N +\N +\N +\N +\N +\N +\N +\N +true +\N +0.0.0.0 +\N +:: +\N +true +\N +0.0.0.0 +\N +0.0.0.0 +\N +\N +\N +0 +\N +:: +\N +:: +\N +\N +\N +\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0 diff --git a/tests/queries/0_stateless/02303_cast_nullable_to_custom_types.sql b/tests/queries/0_stateless/02303_cast_nullable_to_custom_types.sql new file mode 100644 index 00000000000..1a0d9a4c830 --- /dev/null +++ b/tests/queries/0_stateless/02303_cast_nullable_to_custom_types.sql @@ -0,0 +1,23 @@ +select CAST(CAST(NULL, 'Nullable(String)'), 'Nullable(Bool)'); +select CAST(CAST(NULL, 'Nullable(String)'), 'Nullable(IPv4)'); +select CAST(CAST(NULL, 'Nullable(String)'), 'Nullable(IPv6)'); + +select toBool(CAST(NULL, 'Nullable(String)')); +select toIPv4(CAST(NULL, 'Nullable(String)')); +select IPv4StringToNum(CAST(NULL, 'Nullable(String)')); +select toIPv6(CAST(NULL, 'Nullable(String)')); +select IPv6StringToNum(CAST(NULL, 'Nullable(String)')); + +select CAST(number % 2 ? 'true' : NULL, 'Nullable(Bool)') from numbers(2); +select CAST(number % 2 ? '0.0.0.0' : NULL, 'Nullable(IPv4)') from numbers(2); +select CAST(number % 2 ? '0000:0000:0000:0000:0000:0000:0000:0000' : NULL, 'Nullable(IPv6)') from numbers(2); + +select toBool(number % 2 ? 'true' : NULL) from numbers(2); +select toIPv4(number % 2 ? '0.0.0.0' : NULL) from numbers(2); +select toIPv4OrDefault(number % 2 ? '' : NULL) from numbers(2); +select toIPv4OrNull(number % 2 ? '' : NULL) from numbers(2); +select IPv4StringToNum(number % 2 ? '0.0.0.0' : NULL) from numbers(2); +select toIPv6(number % 2 ? '0000:0000:0000:0000:0000:0000:0000:0000' : NULL) from numbers(2); +select toIPv6OrDefault(number % 2 ? '' : NULL) from numbers(2); +select toIPv6OrNull(number % 2 ? '' : NULL) from numbers(2); +select IPv6StringToNum(number % 2 ? '0000:0000:0000:0000:0000:0000:0000:0000' : NULL) from numbers(2); diff --git a/tests/queries/0_stateless/02303_query_kind.reference b/tests/queries/0_stateless/02303_query_kind.reference new file mode 100644 index 00000000000..51addfdb857 --- /dev/null +++ b/tests/queries/0_stateless/02303_query_kind.reference @@ -0,0 +1,44 @@ +clickhouse-client --query_kind secondary_query -q explain plan header=1 select toString(dummy) as dummy from system.one group by dummy +Expression ((Projection + Before ORDER BY)) +Header: dummy String + Aggregating + Header: toString(dummy) String + Expression (Before GROUP BY) + Header: toString(dummy) String + SettingQuotaAndLimits (Set limits and quota after reading from storage) + Header: dummy UInt8 + ReadFromStorage (SystemOne) + Header: dummy UInt8 +clickhouse-local --query_kind secondary_query -q explain plan header=1 select toString(dummy) as dummy from system.one group by dummy +Expression ((Projection + Before ORDER BY)) +Header: dummy String + Aggregating + Header: toString(dummy) String + Expression (Before GROUP BY) + Header: toString(dummy) String + SettingQuotaAndLimits (Set limits and quota after reading from storage) + Header: dummy UInt8 + ReadFromStorage (SystemOne) + Header: dummy UInt8 +clickhouse-client --query_kind initial_query -q explain plan header=1 select toString(dummy) as dummy from system.one group by dummy +Expression ((Projection + Before ORDER BY)) +Header: dummy String + Aggregating + Header: dummy UInt8 + Expression (Before GROUP BY) + Header: dummy UInt8 + SettingQuotaAndLimits (Set limits and quota after reading from storage) + Header: dummy UInt8 + ReadFromStorage (SystemOne) + Header: dummy UInt8 +clickhouse-local --query_kind initial_query -q explain plan header=1 select toString(dummy) as dummy from system.one group by dummy +Expression ((Projection + Before ORDER BY)) +Header: dummy String + Aggregating + Header: dummy UInt8 + Expression (Before GROUP BY) + Header: dummy UInt8 + SettingQuotaAndLimits (Set limits and quota after reading from storage) + Header: dummy UInt8 + ReadFromStorage (SystemOne) + Header: dummy UInt8 diff --git a/tests/queries/0_stateless/02303_query_kind.sh b/tests/queries/0_stateless/02303_query_kind.sh new file mode 100755 index 00000000000..5ad5f9ec6f4 --- /dev/null +++ b/tests/queries/0_stateless/02303_query_kind.sh @@ -0,0 +1,16 @@ +#!/usr/bin/env bash + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +function run_query() +{ + echo "clickhouse-client $*" + $CLICKHOUSE_CLIENT "$@" + + echo "clickhouse-local $*" + $CLICKHOUSE_LOCAL "$@" +} +run_query --query_kind secondary_query -q "explain plan header=1 select toString(dummy) as dummy from system.one group by dummy" +run_query --query_kind initial_query -q "explain plan header=1 select toString(dummy) as dummy from system.one group by dummy" diff --git a/tests/queries/0_stateless/02304_grouping_sets_with_rollup_cube.reference b/tests/queries/0_stateless/02304_grouping_sets_with_rollup_cube.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02304_grouping_sets_with_rollup_cube.sql b/tests/queries/0_stateless/02304_grouping_sets_with_rollup_cube.sql new file mode 100644 index 00000000000..8a53ccb5121 --- /dev/null +++ b/tests/queries/0_stateless/02304_grouping_sets_with_rollup_cube.sql @@ -0,0 +1,25 @@ +-- Tags: no-backward-compatibility-check:22.5.1 + +SELECT + number +FROM + numbers(10) +GROUP BY + GROUPING SETS + ( + number, + number % 2 + ) + WITH ROLLUP; -- { serverError NOT_IMPLEMENTED } + +SELECT + number +FROM + numbers(10) +GROUP BY + GROUPING SETS + ( + number, + number % 2 + ) + WITH CUBE; -- { serverError NOT_IMPLEMENTED } diff --git a/tests/queries/0_stateless/02304_orc_arrow_parquet_string_as_string.reference b/tests/queries/0_stateless/02304_orc_arrow_parquet_string_as_string.reference new file mode 100644 index 00000000000..f0ab418f0ce --- /dev/null +++ b/tests/queries/0_stateless/02304_orc_arrow_parquet_string_as_string.reference @@ -0,0 +1,3 @@ +s Nullable(String) +s Nullable(String) +s Nullable(String) diff --git a/tests/queries/0_stateless/02304_orc_arrow_parquet_string_as_string.sql b/tests/queries/0_stateless/02304_orc_arrow_parquet_string_as_string.sql new file mode 100644 index 00000000000..2d971bba9db --- /dev/null +++ b/tests/queries/0_stateless/02304_orc_arrow_parquet_string_as_string.sql @@ -0,0 +1,8 @@ +-- Tags: no-fasttest, no-parallel + +insert into function file(data_02304.parquet) select 'hello' as s from numbers(3) settings engine_file_truncate_on_insert=1, output_format_parquet_string_as_string=1; +desc file(data_02304.parquet); +insert into function file(data_02304.orc) select 'hello' as s from numbers(3) settings engine_file_truncate_on_insert=1, output_format_orc_string_as_string=1; +desc file(data_02304.orc); +insert into function file(data_02304.arrow) select 'hello' as s from numbers(3) settings engine_file_truncate_on_insert=1, output_format_arrow_string_as_string=1; +desc file(data_02304.arrow); diff --git a/tests/queries/0_stateless/02305_schema_inference_with_globs.reference b/tests/queries/0_stateless/02305_schema_inference_with_globs.reference new file mode 100644 index 00000000000..3a92ee54da4 --- /dev/null +++ b/tests/queries/0_stateless/02305_schema_inference_with_globs.reference @@ -0,0 +1,6 @@ +2 +4 +6 +8 +x Nullable(String) +x Nullable(String) diff --git a/tests/queries/0_stateless/02305_schema_inference_with_globs.sh b/tests/queries/0_stateless/02305_schema_inference_with_globs.sh new file mode 100755 index 00000000000..19506c84645 --- /dev/null +++ b/tests/queries/0_stateless/02305_schema_inference_with_globs.sh @@ -0,0 +1,18 @@ +#!/usr/bin/env bash +# Tags: no-fasttest + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +$CLICKHOUSE_CLIENT -q "insert into function file(data1.jsonl) select NULL as x from numbers(10) settings engine_file_truncate_on_insert=1" +$CLICKHOUSE_CLIENT -q "insert into function file(data2.jsonl) select NULL as x from numbers(10) settings engine_file_truncate_on_insert=1" +$CLICKHOUSE_CLIENT -q "insert into function file(data3.jsonl) select NULL as x from numbers(10) settings engine_file_truncate_on_insert=1" +$CLICKHOUSE_CLIENT -q "insert into function file(data4.jsonl) select number % 2 ? number : NULL as x from numbers(10) settings engine_file_truncate_on_insert=1" + +$CLICKHOUSE_CLIENT -q "desc file('data*.jsonl') settings input_format_max_rows_to_read_for_schema_inference=8" 2>&1 | grep -c 'ONLY_NULLS_WHILE_READING_SCHEMA'; +$CLICKHOUSE_CLIENT -q "desc file('data*.jsonl') settings input_format_max_rows_to_read_for_schema_inference=16" 2>&1 | grep -c 'ONLY_NULLS_WHILE_READING_SCHEMA'; +$CLICKHOUSE_CLIENT -q "desc file('data*.jsonl') settings input_format_max_rows_to_read_for_schema_inference=24" 2>&1 | grep -c 'ONLY_NULLS_WHILE_READING_SCHEMA'; +$CLICKHOUSE_CLIENT -q "desc file('data*.jsonl') settings input_format_max_rows_to_read_for_schema_inference=31" 2>&1 | grep -c 'ONLY_NULLS_WHILE_READING_SCHEMA'; +$CLICKHOUSE_CLIENT -q "desc file('data*.jsonl') settings input_format_max_rows_to_read_for_schema_inference=32" +$CLICKHOUSE_CLIENT -q "desc file('data*.jsonl') settings input_format_max_rows_to_read_for_schema_inference=100" diff --git a/tests/queries/0_stateless/02306_window_move_row_number_fix.reference b/tests/queries/0_stateless/02306_window_move_row_number_fix.reference new file mode 100644 index 00000000000..dec7d2fabd2 --- /dev/null +++ b/tests/queries/0_stateless/02306_window_move_row_number_fix.reference @@ -0,0 +1 @@ +\N diff --git a/tests/queries/0_stateless/02306_window_move_row_number_fix.sql b/tests/queries/0_stateless/02306_window_move_row_number_fix.sql new file mode 100644 index 00000000000..5bc0c41b3ee --- /dev/null +++ b/tests/queries/0_stateless/02306_window_move_row_number_fix.sql @@ -0,0 +1,2 @@ +-- Tags: no-backward-compatibility-check +SELECT nth_value(NULL, 1048577) OVER (Rows BETWEEN 1023 FOLLOWING AND UNBOUNDED FOLLOWING) diff --git a/tests/queries/0_stateless/02310_generate_multi_columns_with_uuid.reference b/tests/queries/0_stateless/02310_generate_multi_columns_with_uuid.reference new file mode 100644 index 00000000000..bb5ee5c21eb --- /dev/null +++ b/tests/queries/0_stateless/02310_generate_multi_columns_with_uuid.reference @@ -0,0 +1,3 @@ +0 +0 +1 diff --git a/tests/queries/0_stateless/02310_generate_multi_columns_with_uuid.sql b/tests/queries/0_stateless/02310_generate_multi_columns_with_uuid.sql new file mode 100644 index 00000000000..3ab19446b3e --- /dev/null +++ b/tests/queries/0_stateless/02310_generate_multi_columns_with_uuid.sql @@ -0,0 +1,5 @@ +SELECT generateUUIDv4(1) = generateUUIDv4(2); + +SELECT generateUUIDv4() = generateUUIDv4(1); + +SELECT generateUUIDv4(1) = generateUUIDv4(1); diff --git a/tests/queries/1_stateful/00023_totals_limit.reference b/tests/queries/1_stateful/00023_totals_limit.reference index fc4a02662d7..c76452411d7 100644 --- a/tests/queries/1_stateful/00023_totals_limit.reference +++ b/tests/queries/1_stateful/00023_totals_limit.reference @@ -16,7 +16,7 @@ [1604017, "189"] ], - "totals": [0,"4652"], + "totals": [0, "4652"], "rows": 1, diff --git a/tests/queries/1_stateful/00157_cache_dictionary.sql b/tests/queries/1_stateful/00157_cache_dictionary.sql index a8f9d12ab3a..15bd4cbe6d4 100644 --- a/tests/queries/1_stateful/00157_cache_dictionary.sql +++ b/tests/queries/1_stateful/00157_cache_dictionary.sql @@ -1,23 +1,28 @@ -- Tags: no-tsan, no-parallel +DROP TABLE IF EXISTS test.hits_1m; +CREATE TABLE test.hits_1m as test.hits; +INSERT INTO test.hits_1m SELECT * FROM test.hits LIMIT 1000000; + CREATE DATABASE IF NOT EXISTS db_dict; DROP DICTIONARY IF EXISTS db_dict.cache_hits; CREATE DICTIONARY db_dict.cache_hits (WatchID UInt64, UserID UInt64, SearchPhrase String) PRIMARY KEY WatchID -SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'hits' PASSWORD '' DB 'test')) +SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'hits_1m' PASSWORD '' DB 'test')) LIFETIME(MIN 1 MAX 10) LAYOUT(CACHE(SIZE_IN_CELLS 1 QUERY_WAIT_TIMEOUT_MILLISECONDS 60000)); SELECT count() FROM (SELECT WatchID, arrayDistinct(groupArray(dictGetUInt64( 'db_dict.cache_hits', 'UserID', toUInt64(WatchID)))) as arr -FROM test.hits PREWHERE WatchID % 5 == 0 GROUP BY WatchID order by length(arr) desc) WHERE arr = [0]; +FROM test.hits_1m PREWHERE WatchID % 5 == 0 GROUP BY WatchID order by length(arr) desc) WHERE arr = [0]; SELECT count() FROM (SELECT WatchID, arrayDistinct(groupArray(dictGetUInt64( 'db_dict.cache_hits', 'UserID', toUInt64(WatchID)))) as arr -FROM test.hits PREWHERE WatchID % 7 == 0 GROUP BY WatchID order by length(arr) desc) WHERE arr = [0]; +FROM test.hits_1m PREWHERE WatchID % 7 == 0 GROUP BY WatchID order by length(arr) desc) WHERE arr = [0]; SELECT count() FROM (SELECT WatchID, arrayDistinct(groupArray(dictGetUInt64( 'db_dict.cache_hits', 'UserID', toUInt64(WatchID)))) as arr -FROM test.hits PREWHERE WatchID % 13 == 0 GROUP BY WatchID order by length(arr) desc) WHERE arr = [0]; +FROM test.hits_1m PREWHERE WatchID % 13 == 0 GROUP BY WatchID order by length(arr) desc) WHERE arr = [0]; DROP DICTIONARY IF EXISTS db_dict.cache_hits; DROP DATABASE IF EXISTS db_dict; +DROP TABLE IF EXISTS hits_1m; diff --git a/utils/changelog-simple/.gitignore b/utils/changelog-simple/.gitignore new file mode 100644 index 00000000000..78caa68e38e --- /dev/null +++ b/utils/changelog-simple/.gitignore @@ -0,0 +1,2 @@ +*.txt +*.json diff --git a/utils/changelog-simple/README.md b/utils/changelog-simple/README.md new file mode 100644 index 00000000000..cd8f8da9b61 --- /dev/null +++ b/utils/changelog-simple/README.md @@ -0,0 +1,21 @@ +## How To Generate Changelog + +Generate github token: +* https://github.com/settings/tokens - keep all checkboxes unchecked, no scopes need to be enabled. + +Dependencies: +``` +sudo apt-get install git curl jq python3 python3-fuzzywuzzy +``` + +Update information about tags: +``` +git fetch --tags +``` + +Usage example: + +``` +export GITHUB_USER=... GITHUB_TOKEN=ghp_... +./changelog.sh v21.5.6.6-stable v21.6.2.7-prestable +``` diff --git a/utils/changelog-simple/changelog.sh b/utils/changelog-simple/changelog.sh new file mode 100755 index 00000000000..52817acfae4 --- /dev/null +++ b/utils/changelog-simple/changelog.sh @@ -0,0 +1,96 @@ +#!/bin/bash +set -e + +script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" + +from="$1" +to="$2" +log_command=(git log "$from..$to" --first-parent) + +"${log_command[@]}" > "changelog-log.txt" + +# Check for diamond merges. +if "${log_command[@]}" --oneline --grep "Merge branch '" | grep '' +then + # DO NOT ADD automated handling of diamond merges to this script. + # It is an unsustainable way to work with git, and it MUST be visible. + echo Warning: suspected diamond merges above. + echo Some commits will be missed, review these manually. +fi + +# Search for PR numbers in commit messages. First variant is normal merge, and second +# variant is squashed. Next are some backport message variants. +find_prs=(sed -n "s/^.*merg[eding]*.*#\([[:digit:]]\+\).*$/\1/Ip; + s/^.*(#\([[:digit:]]\+\))$/\1/p; + s/^.*back[- ]*port[ed of]*.*#\([[:digit:]]\+\).*$/\1/Ip; + s/^.*cherry[- ]*pick[ed of]*.*#\([[:digit:]]\+\).*$/\1/Ip") + +# awk is to filter out small task numbers from different task tracker, which are +# referenced by documentation commits like '* DOCSUP-824: query log (#115)'. +"${find_prs[@]}" "changelog-log.txt" | sort -rn | uniq | awk '$0 > 1000 { print $0 }' > "changelog-prs.txt" + +echo "$(wc -l < "changelog-prs.txt") PRs added between $from and $to." +if [ $(wc -l < "changelog-prs.txt") -eq 0 ] ; then exit 0 ; fi + +function github_download() +{ + local url=${1} + local file=${2} + if ! [ -f "$file" ] + then + echo "curl -u \"$GITHUB_USER:***\" -sSf \"$url\" > \"$file\"" + + if ! curl -u "$GITHUB_USER:$GITHUB_TOKEN" \ + -sSf "$url" \ + > "$file" + then + >&2 echo "Failed to download '$url' to '$file'. Contents: '$(cat "$file")'." + rm "$file" + return 1 + fi + sleep 0.1 + fi +} + +rm changelog-prs-filtered.txt &> /dev/null ||: +for pr in $(cat "changelog-prs.txt") +do + # Download PR info from github. + file="pr$pr.json" + github_download "https://api.github.com/repos/ClickHouse/ClickHouse/pulls/$pr" "$file" || continue + + if ! [ "$pr" == "$(jq -r .number "$file")" ] + then + >&2 echo "Got wrong data for PR #$pr (please check and remove '$file')." + continue + fi + + # Filter out PRs by bots. + user_login=$(jq -r .user.login "$file") + + filter_bot=$(echo "$user_login" | grep -q "\[bot\]$" && echo "Skip." || echo "Ok." ||:) + filter_robot=$(echo "$user_login" | grep -q "robot-clickhouse" && echo "Skip." || echo "Ok." ||:) + + if [ "Skip." == "$filter_robot" ] || [ "Skip." == "$filter_bot" ] + then + continue + fi + + # Download author info from github. + user_id=$(jq -r .user.id "$file") + user_file="user$user_id.json" + github_download "$(jq -r .user.url "$file")" "$user_file" || continue + + if ! [ "$user_id" == "$(jq -r .id "$user_file")" ] + then + >&2 echo "Got wrong data for user #$user_id (please check and remove '$user_file')." + continue + fi + + echo "$pr" >> changelog-prs-filtered.txt +done + +echo "### ClickHouse release $to FIXME as compared to $from +" > changelog.md +"$script_dir/format-changelog.py" changelog-prs-filtered.txt >> changelog.md +cat changelog.md diff --git a/utils/changelog-simple/format-changelog.py b/utils/changelog-simple/format-changelog.py new file mode 100755 index 00000000000..d5e1518270e --- /dev/null +++ b/utils/changelog-simple/format-changelog.py @@ -0,0 +1,164 @@ +#!/usr/bin/python3 + +import argparse +import collections +import fuzzywuzzy.fuzz +import itertools +import json +import os +import re +import sys + +parser = argparse.ArgumentParser(description="Format changelog for given PRs.") +parser.add_argument( + "file", + metavar="FILE", + type=argparse.FileType("r", encoding="utf-8"), + nargs="?", + default=sys.stdin, + help="File with PR numbers, one per line.", +) +args = parser.parse_args() + +# This function mirrors the PR description checks in ClickhousePullRequestTrigger. +# Returns False if the PR should not be mentioned changelog. +def parse_one_pull_request(item): + description = item["body"] + # Don't skip empty lines because they delimit parts of description + lines = [ + line + for line in [ + x.strip() for x in (description.split("\n") if description else []) + ] + ] + lines = [re.sub(r"\s+", " ", l) for l in lines] + + category = "" + entry = "" + + if lines: + i = 0 + while i < len(lines): + if re.match(r"(?i).*change\s*log\s*category", lines[i]): + i += 1 + if i >= len(lines): + break + # Can have one empty line between header and the category itself. Filter it out. + if not lines[i]: + i += 1 + if i >= len(lines): + break + category = re.sub(r"^[-*\s]*", "", lines[i]) + i += 1 + + elif re.match(r"(?i).*change\s*log\s*entry", lines[i]): + i += 1 + # Can have one empty line between header and the entry itself. Filter it out. + if i < len(lines) and not lines[i]: + i += 1 + # All following lines until empty one are the changelog entry. + entry_lines = [] + while i < len(lines) and lines[i]: + entry_lines.append(lines[i]) + i += 1 + entry = " ".join(entry_lines) + else: + i += 1 + + if not category: + # Shouldn't happen, because description check in CI should catch such PRs. + # Fall through, so that it shows up in output and the user can fix it. + category = "NO CL CATEGORY" + + # Filter out the PR categories that are not for changelog. + if re.match( + r"(?i)doc|((non|in|not|un)[-\s]*significant)|(not[ ]*for[ ]*changelog)", + category, + ): + return False + + if not entry: + # Shouldn't happen, because description check in CI should catch such PRs. + category = "NO CL ENTRY" + entry = "NO CL ENTRY: '" + item["title"] + "'" + + entry = entry.strip() + if entry[-1] != ".": + entry += "." + + item["entry"] = entry + item["category"] = category + + return True + + +# This array gives the preferred category order, and is also used to +# normalize category names. +categories_preferred_order = [ + "Backward Incompatible Change", + "New Feature", + "Performance Improvement", + "Improvement", + "Bug Fix", + "Build/Testing/Packaging Improvement", + "Other", +] + +category_to_pr = collections.defaultdict(lambda: []) +users = {} +for line in args.file: + pr = json.loads(open(f"pr{line.strip()}.json").read()) + assert pr["number"] + if not parse_one_pull_request(pr): + continue + + assert pr["category"] + + # Normalize category name + for c in categories_preferred_order: + if fuzzywuzzy.fuzz.ratio(pr["category"].lower(), c.lower()) >= 90: + pr["category"] = c + break + + category_to_pr[pr["category"]].append(pr) + user_id = pr["user"]["id"] + users[user_id] = json.loads(open(f"user{user_id}.json").read()) + + +def print_category(category): + print(("#### " + category)) + print() + for pr in category_to_pr[category]: + user = users[pr["user"]["id"]] + user_name = user["name"] if user["name"] else user["login"] + + # Substitute issue links. + # 1) issue number w/o markdown link + pr["entry"] = re.sub( + r"([^[])#([0-9]{4,})", + r"\1[#\2](https://github.com/ClickHouse/ClickHouse/issues/\2)", + pr["entry"], + ) + # 2) issue URL w/o markdown link + pr["entry"] = re.sub( + r"([^(])https://github.com/ClickHouse/ClickHouse/issues/([0-9]{4,})", + r"\1[#\2](https://github.com/ClickHouse/ClickHouse/issues/\2)", + pr["entry"], + ) + + print( + f'* {pr["entry"]} [#{pr["number"]}]({pr["html_url"]}) ([{user_name}]({user["html_url"]})).' + ) + + print() + + +# Print categories in preferred order +for category in categories_preferred_order: + if category in category_to_pr: + print_category(category) + category_to_pr.pop(category) + +# Print the rest of the categories +for category in category_to_pr: + print_category(category) diff --git a/utils/changelog/changelog.py b/utils/changelog/changelog.py index a846c240055..c15a1600506 100755 --- a/utils/changelog/changelog.py +++ b/utils/changelog/changelog.py @@ -19,6 +19,8 @@ from git_helper import is_shallow, git_runner as runner # This array gives the preferred category order, and is also used to # normalize category names. +# Categories are used in .github/PULL_REQUEST_TEMPLATE.md, keep comments there +# updated accordingly categories_preferred_order = ( "Backward Incompatible Change", "New Feature", @@ -31,6 +33,7 @@ categories_preferred_order = ( FROM_REF = "" TO_REF = "" +SHA_IN_CHANGELOG = [] # type: List[str] class Description: @@ -97,19 +100,7 @@ class Worker(Thread): logging.info("PR %s does not belong to the repo", api_pr.number) continue - try: - runner.run( - f"git merge-base --is-ancestor '{merge_commit}' '{TO_REF}'", - stderr=DEVNULL, - ) - runner.run( - f"git merge-base --is-ancestor '{FROM_REF}' '{merge_commit}'", - stderr=DEVNULL, - ) - in_changelog = True - except CalledProcessError: - # Commit is not between from and to refs - continue + in_changelog = merge_commit in SHA_IN_CHANGELOG if in_changelog: desc = generate_description(api_pr, self.repo) if desc is not None: @@ -296,7 +287,7 @@ def write_changelog(fd: TextIO, descriptions: Dict[str, List[Description]]): fd.write("\n") - for category in descriptions: + for category in sorted(descriptions): if category not in seen_categories: fd.write(f"#### {category}\n\n") for desc in descriptions[category]: @@ -314,25 +305,23 @@ def check_refs(from_ref: Optional[str], to_ref: str): # Check from_ref if from_ref is None: - FROM_REF = runner.run(f"git describe --abbrev=0 --tags '{TO_REF}~'") - # Check if the previsous tag is different for merge commits - # I __assume__ we won't have octopus merges, at least for the tagged commits - try: - alternative_tag = runner.run( - f"git describe --abbrev=0 --tags '{TO_REF}^2'", stderr=DEVNULL - ) - if FROM_REF != alternative_tag: - raise Exception( - f"Unable to get unified parent tag for {TO_REF}, " - f"define it manually, get {FROM_REF} and {alternative_tag}" - ) - except CalledProcessError: - pass + # Get all tags pointing to TO_REF + tags = runner.run(f"git tag --points-at '{TO_REF}^{{}}'") + logging.info("All tags pointing to %s:\n%s", TO_REF, tags) + exclude = " ".join([f"--exclude='{tag}'" for tag in tags.split("\n")]) + FROM_REF = runner.run(f"git describe --abbrev=0 --tags {exclude} '{TO_REF}'") else: runner.run(f"git rev-parse {FROM_REF}") FROM_REF = from_ref +def set_sha_in_changelog(): + global SHA_IN_CHANGELOG + SHA_IN_CHANGELOG = runner.run( + f"git log --format=format:%H {FROM_REF}..{TO_REF}" + ).split("\n") + + def main(): log_levels = [logging.CRITICAL, logging.WARN, logging.INFO, logging.DEBUG] args = parse_args() @@ -348,6 +337,7 @@ def main(): runner.run("git fetch --tags", stderr=DEVNULL) check_refs(args.from_ref, args.to_ref) + set_sha_in_changelog() logging.info("Using %s..%s as changelog interval", FROM_REF, TO_REF) diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv index 0f8d13a7d93..1d8bae44904 100644 --- a/utils/list-versions/version_date.tsv +++ b/utils/list-versions/version_date.tsv @@ -1,3 +1,4 @@ +v22.5.1.2079-stable 2022-05-19 v22.4.5.9-stable 2022-05-06 v22.4.4.7-stable 2022-04-29 v22.4.3.3-stable 2022-04-26 diff --git a/website/benchmark/dbms/index.html b/website/benchmark/dbms/index.html index a856bbb0502..c4a700ed2df 100644 --- a/website/benchmark/dbms/index.html +++ b/website/benchmark/dbms/index.html @@ -35,7 +35,7 @@

Full results

-
+
diff --git a/website/benchmark/hardware/index.html b/website/benchmark/hardware/index.html index 7b68e42f451..9c9b14b56da 100644 --- a/website/benchmark/hardware/index.html +++ b/website/benchmark/hardware/index.html @@ -35,7 +35,7 @@

Full results

-
+
diff --git a/website/benchmark/versions/index.html b/website/benchmark/versions/index.html index da0702a04de..cce85934a9b 100644 --- a/website/benchmark/versions/index.html +++ b/website/benchmark/versions/index.html @@ -35,7 +35,7 @@

Full results

-
+
diff --git a/website/css/blog.css b/website/css/blog.css index 089856b8e00..e69de29bb2d 100644 --- a/website/css/blog.css +++ b/website/css/blog.css @@ -1,28 +0,0 @@ -body.blog .dropdown-item { - color: #111 !important; -} - -body.blog .dropdown-item:hover, -body.blog .dropdown-item:focus { - background-color: #efefef; -} - -.blog .social-icon { - background: #eee; -} - -@media (prefers-color-scheme: dark) { - body.blog .dropdown-item { - color: #fff !important; - } - - .blog .dropdown-item:hover, - .blog .dropdown-item:focus, - .blog .tag{ - background-color: #666 !important; - } - - .blog .social-icon { - background: #444451; - } -} diff --git a/website/css/docs.css b/website/css/docs.css index 735559f384c..e7d41bc28bf 100644 --- a/website/css/docs.css +++ b/website/css/docs.css @@ -83,68 +83,6 @@ summary { line-height: 1.25; } -#docsearch-input:focus, #docsearch-input:active { - border: 0; - color: #efefef!important; -} - -@media (max-width: 768px) { - #search-form { - width: 50%; - } - .algolia-autocomplete, #docsearch-input, #search-form .input-group { - width: 100%; - } -} - -.algolia-autocomplete .algolia-docsearch-suggestion--subcategory-column { - display: none !important; -} - -.algolia-autocomplete .algolia-docsearch-suggestion--content { - float: none !important; - width: 100% !important; - background-color: #444451; -} - -.algolia-autocomplete .algolia-docsearch-suggestion--content:before { - content: none !important; -} - -.algolia-autocomplete .ds-dropdown-menu { - max-height: 512px; - overflow-x: hidden; - overflow-y: auto; -} - -.algolia-autocomplete .ds-dropdown-menu, -.algolia-autocomplete .ds-dropdown-menu [class^=ds-dataset-], -.algolia-autocomplete .algolia-docsearch-suggestion, -.algolia-autocomplete .ds-dropdown-menu:before { - background: #1c1c1c !important; - border-color: #333; -} - -.algolia-autocomplete .algolia-docsearch-suggestion--content { - background-color: #333; -} - -.algolia-autocomplete .algolia-docsearch-suggestion--content:hover, -.algolia-autocomplete .ds-dropdown-menu .ds-suggestion.ds-cursor .algolia-docsearch-suggestion:not(.suggestion-layout-simple) .algolia-docsearch-suggestion--content { - background-color: #444451 !important; -} - -.algolia-autocomplete .algolia-docsearch-suggestion--category-header, -.algolia-autocomplete .algolia-docsearch-suggestion--subcategory-column, -.algolia-autocomplete .algolia-docsearch-suggestion--title, -.algolia-autocomplete .algolia-docsearch-suggestion--text { - color: #efefef; -} - -.algolia-autocomplete .algolia-docsearch-suggestion--highlight { - color: #f14600; -} - #toc .nav-link { color: #333; } diff --git a/website/css/docsearch.css b/website/css/docsearch.css index 1e7ffb30512..e69de29bb2d 100644 --- a/website/css/docsearch.css +++ b/website/css/docsearch.css @@ -1,555 +0,0 @@ -.searchbox { - display: inline-block; - position: relative; - width: 200px; - height: 32px !important; - white-space: nowrap; - box-sizing: border-box; - visibility: visible !important; -} - -.searchbox .algolia-autocomplete { - display: block; - width: 100%; - height: 100%; -} - -.searchbox__wrapper { - width: 100%; - height: 100%; - z-index: 999; - position: relative; -} - -.searchbox__input { - display: inline-block; - box-sizing: border-box; - border: 0; - border-radius: 16px; - box-shadow: inset 0 0 0 1px #cccccc; - background: #ffffff !important; - padding: 0; - padding-right: 26px; - padding-left: 32px; - width: 100%; - height: 100%; - vertical-align: middle; - white-space: normal; - font-size: 12px; - -webkit-appearance: none; - -moz-appearance: none; - appearance: none; -} - -.searchbox__input::-webkit-search-decoration, .searchbox__input::-webkit-search-cancel-button, .searchbox__input::-webkit-search-results-button, .searchbox__input::-webkit-search-results-decoration { - display: none; -} - -.searchbox__input:hover { - box-shadow: inset 0 0 0 1px #b3b3b3; -} - -.searchbox__input:focus, .searchbox__input:active { - outline: 0; - box-shadow: inset 0 0 0 1px #aaaaaa; - background: #ffffff; -} - -.searchbox__input::-webkit-input-placeholder { - color: #aaaaaa; -} - -.searchbox__input:-ms-input-placeholder { - color: #aaaaaa; -} - -.searchbox__input::-ms-input-placeholder { - color: #aaaaaa; -} - -.searchbox__input::placeholder { - color: #aaaaaa; -} - -.searchbox__submit { - position: absolute; - top: 0; - margin: 0; - border: 0; - border-radius: 16px 0 0 16px; - background-color: rgba(69, 142, 225, 0); - padding: 0; - width: 32px; - height: 100%; - vertical-align: middle; - text-align: center; - font-size: inherit; - -webkit-user-select: none; - -moz-user-select: none; - -ms-user-select: none; - user-select: none; - right: inherit; - left: 0; -} - -.searchbox__submit::before { - display: inline-block; - margin-right: -4px; - height: 100%; - vertical-align: middle; - content: ''; -} - -.searchbox__submit:hover, .searchbox__submit:active { - cursor: pointer; -} - -.searchbox__submit:focus { - outline: 0; -} - -.searchbox__submit svg { - width: 14px; - height: 14px; - vertical-align: middle; - fill: #6d7e96; -} - -.searchbox__reset { - display: block; - position: absolute; - top: 8px; - right: 8px; - margin: 0; - border: 0; - background: none; - cursor: pointer; - padding: 0; - font-size: inherit; - -webkit-user-select: none; - -moz-user-select: none; - -ms-user-select: none; - user-select: none; - fill: rgba(0, 0, 0, 0.5); -} - -.searchbox__reset.hide { - display: none; -} - -.searchbox__reset:focus { - outline: 0; -} - -.searchbox__reset svg { - display: block; - margin: 4px; - width: 8px; - height: 8px; -} - -.searchbox__input:valid ~ .searchbox__reset { - display: block; - -webkit-animation-name: sbx-reset-in; - animation-name: sbx-reset-in; - -webkit-animation-duration: 0.15s; - animation-duration: 0.15s; -} - -@-webkit-keyframes sbx-reset-in { - 0% { - -webkit-transform: translate3d(-20%, 0, 0); - transform: translate3d(-20%, 0, 0); - opacity: 0; - } - 100% { - -webkit-transform: none; - transform: none; - opacity: 1; - } -} - -@keyframes sbx-reset-in { - 0% { - -webkit-transform: translate3d(-20%, 0, 0); - transform: translate3d(-20%, 0, 0); - opacity: 0; - } - 100% { - -webkit-transform: none; - transform: none; - opacity: 1; - } -} - -.algolia-autocomplete.algolia-autocomplete-right .ds-dropdown-menu { - right: 0 !important; - left: inherit !important; -} - -.algolia-autocomplete.algolia-autocomplete-right .ds-dropdown-menu:before { - right: 48px; -} - -.algolia-autocomplete.algolia-autocomplete-left .ds-dropdown-menu { - left: 0 !important; - right: inherit !important; -} - -.algolia-autocomplete.algolia-autocomplete-left .ds-dropdown-menu:before { - left: 48px; -} - -.algolia-autocomplete .ds-dropdown-menu { - position: relative; - top: -6px; - border-radius: 4px; - margin: 6px 0 0; - padding: 0; - text-align: left; - height: auto; - position: relative; - background: transparent; - border: none; - z-index: 999; - max-width: 600px; - min-width: 500px; - box-shadow: 0 1px 0 0 rgba(0, 0, 0, 0.2), 0 2px 3px 0 rgba(0, 0, 0, 0.1); -} - -.algolia-autocomplete .ds-dropdown-menu:before { - display: block; - position: absolute; - content: ''; - width: 14px; - height: 14px; - background: #fff; - z-index: 1000; - top: -7px; - border-top: 1px solid #d9d9d9; - border-right: 1px solid #d9d9d9; - -webkit-transform: rotate(-45deg); - transform: rotate(-45deg); - border-radius: 2px; -} - -.algolia-autocomplete .ds-dropdown-menu .ds-suggestions { - position: relative; - z-index: 1000; - margin-top: 8px; -} - -.algolia-autocomplete .ds-dropdown-menu .ds-suggestions a:hover { - text-decoration: none; -} - -.algolia-autocomplete .ds-dropdown-menu .ds-suggestion { - cursor: pointer; -} - -.algolia-autocomplete .ds-dropdown-menu .ds-suggestion.ds-cursor .algolia-docsearch-suggestion.suggestion-layout-simple { - background-color: rgba(69, 142, 225, 0.05); -} - -.algolia-autocomplete .ds-dropdown-menu .ds-suggestion.ds-cursor .algolia-docsearch-suggestion:not(.suggestion-layout-simple) .algolia-docsearch-suggestion--content { - background-color: rgba(69, 142, 225, 0.05); -} - -.algolia-autocomplete .ds-dropdown-menu [class^='ds-dataset-'] { - position: relative; - border: solid 1px #d9d9d9; - background: #fff; - border-radius: 4px; - overflow: auto; - padding: 0 8px 8px; -} - -.algolia-autocomplete .ds-dropdown-menu * { - box-sizing: border-box; -} - -.algolia-autocomplete .algolia-docsearch-suggestion { - display: block; - position: relative; - padding: 0 8px; - background: #fff; - color: #02060c; - overflow: hidden; -} - -.algolia-autocomplete .algolia-docsearch-suggestion--highlight { - color: #174d8c; - background: rgba(143, 187, 237, 0.1); - padding: 0.1em 0.05em; -} - -.algolia-autocomplete .algolia-docsearch-suggestion--category-header .algolia-docsearch-suggestion--category-header-lvl0 -.algolia-docsearch-suggestion--highlight, -.algolia-autocomplete .algolia-docsearch-suggestion--category-header .algolia-docsearch-suggestion--category-header-lvl1 -.algolia-docsearch-suggestion--highlight { - padding: 0 0 1px; - background: inherit; - box-shadow: inset 0 -2px 0 0 rgba(69, 142, 225, 0.8); - color: inherit; -} - -.algolia-autocomplete .algolia-docsearch-suggestion--text .algolia-docsearch-suggestion--highlight { - padding: 0 0 1px; - background: inherit; - box-shadow: inset 0 -2px 0 0 rgba(69, 142, 225, 0.8); - color: inherit; -} - -.algolia-autocomplete .algolia-docsearch-suggestion--content { - display: block; - float: right; - width: 70%; - position: relative; - padding: 5.33333px 0 5.33333px 10.66667px; - cursor: pointer; -} - -.algolia-autocomplete .algolia-docsearch-suggestion--content:before { - content: ''; - position: absolute; - display: block; - top: 0; - height: 100%; - width: 1px; - background: #ddd; - left: -1px; -} - -.algolia-autocomplete .algolia-docsearch-suggestion--category-header { - position: relative; - border-bottom: 1px solid #ddd; - display: none; - margin-top: 8px; - padding: 4px 0; - font-size: 1em; - color: #33363d; -} - -.algolia-autocomplete .algolia-docsearch-suggestion--wrapper { - width: 100%; - float: left; - padding: 8px 0 0 0; -} - -.algolia-autocomplete .algolia-docsearch-suggestion--subcategory-column { - float: left; - width: 30%; - padding-left: 0; - text-align: right; - position: relative; - padding: 5.33333px 10.66667px; - color: #a4a7ae; - font-size: 0.9em; - word-wrap: break-word; -} - -.algolia-autocomplete .algolia-docsearch-suggestion--subcategory-column:before { - content: ''; - position: absolute; - display: block; - top: 0; - height: 100%; - width: 1px; - background: #ddd; - right: 0; -} - -.algolia-autocomplete .algolia-docsearch-suggestion--subcategory-inline { - display: none; -} - -.algolia-autocomplete .algolia-docsearch-suggestion--title { - margin-bottom: 4px; - color: #02060c; - font-size: 0.9em; - font-weight: bold; -} - -.algolia-autocomplete .algolia-docsearch-suggestion--text { - display: block; - line-height: 1.2em; - font-size: 0.85em; - color: #63676d; -} - -.algolia-autocomplete .algolia-docsearch-suggestion--no-results { - width: 100%; - padding: 8px 0; - text-align: center; - font-size: 1.2em; -} - -.algolia-autocomplete .algolia-docsearch-suggestion--no-results::before { - display: none; -} - -.algolia-autocomplete .algolia-docsearch-suggestion code { - padding: 1px 5px; - font-size: 90%; - border: none; - color: #222222; - background-color: #ebebeb; - border-radius: 3px; - font-family: Menlo, Monaco, Consolas, 'Courier New', monospace; -} - -.algolia-autocomplete .algolia-docsearch-suggestion code .algolia-docsearch-suggestion--highlight { - background: none; -} - -.algolia-autocomplete .algolia-docsearch-suggestion.algolia-docsearch-suggestion__main .algolia-docsearch-suggestion--category-header { - display: block; -} - -.algolia-autocomplete .algolia-docsearch-suggestion.algolia-docsearch-suggestion__secondary { - display: block; -} - -@media all and (min-width: 768px) { - .algolia-autocomplete .algolia-docsearch-suggestion .algolia-docsearch-suggestion--subcategory-column { - display: block; - } -} - -@media all and (max-width: 768px) { - .algolia-autocomplete .algolia-docsearch-suggestion .algolia-docsearch-suggestion--subcategory-column { - display: inline-block; - width: auto; - text-align: left; - float: left; - padding: 0; - color: #02060c; - font-size: 0.9em; - font-weight: bold; - text-align: left; - opacity: 0.5; - } - .algolia-autocomplete .algolia-docsearch-suggestion .algolia-docsearch-suggestion--subcategory-column:before { - display: none; - } - .algolia-autocomplete .algolia-docsearch-suggestion .algolia-docsearch-suggestion--subcategory-column:after { - content: '|'; - } - .algolia-autocomplete .algolia-docsearch-suggestion .algolia-docsearch-suggestion--content { - display: inline-block; - width: auto; - text-align: left; - float: left; - padding: 0; - } - .algolia-autocomplete .algolia-docsearch-suggestion .algolia-docsearch-suggestion--content:before { - display: none; - } -} - -.algolia-autocomplete .suggestion-layout-simple.algolia-docsearch-suggestion { - border-bottom: solid 1px #eee; - padding: 8px; - margin: 0; -} - -.algolia-autocomplete .suggestion-layout-simple .algolia-docsearch-suggestion--content { - width: 100%; - padding: 0; -} - -.algolia-autocomplete .suggestion-layout-simple .algolia-docsearch-suggestion--content::before { - display: none; -} - -.algolia-autocomplete .suggestion-layout-simple .algolia-docsearch-suggestion--category-header { - margin: 0; - padding: 0; - display: block; - width: 100%; - border: none; -} - -.algolia-autocomplete .suggestion-layout-simple .algolia-docsearch-suggestion--category-header-lvl0 { - opacity: 0.6; - font-size: 0.85em; -} - -.algolia-autocomplete .suggestion-layout-simple .algolia-docsearch-suggestion--category-header-lvl1 { - opacity: 0.6; - font-size: 0.85em; -} - -.algolia-autocomplete .suggestion-layout-simple .algolia-docsearch-suggestion--category-header-lvl1::before { - background-image: url('data:image/svg+xml;utf8,'); - content: ''; - width: 10px; - height: 10px; - display: inline-block; -} - -.algolia-autocomplete .suggestion-layout-simple .algolia-docsearch-suggestion--wrapper { - width: 100%; - float: left; - margin: 0; - padding: 0; -} - -.algolia-autocomplete .suggestion-layout-simple .algolia-docsearch-suggestion--duplicate-content, .algolia-autocomplete .suggestion-layout-simple .algolia-docsearch-suggestion--subcategory-inline { - display: none !important; -} - -.algolia-autocomplete .suggestion-layout-simple .algolia-docsearch-suggestion--title { - margin: 0; - color: #458ee1; - font-size: 0.9em; - font-weight: normal; -} - -.algolia-autocomplete .suggestion-layout-simple .algolia-docsearch-suggestion--title::before { - content: '#'; - font-weight: bold; - color: #458ee1; - display: inline-block; -} - -.algolia-autocomplete .suggestion-layout-simple .algolia-docsearch-suggestion--text { - margin: 4px 0 0; - display: block; - line-height: 1.4em; - padding: 5.33333px 8px; - background: #f8f8f8; - font-size: 0.85em; - opacity: 0.8; -} - -.algolia-autocomplete .suggestion-layout-simple .algolia-docsearch-suggestion--text .algolia-docsearch-suggestion--highlight { - color: #3f4145; - font-weight: bold; - box-shadow: none; -} - -.algolia-autocomplete .algolia-docsearch-footer { - width: 134px; - height: 20px; - z-index: 2000; - margin-top: 10.66667px; - float: right; - font-size: 0; - line-height: 0; -} - -.algolia-autocomplete .algolia-docsearch-footer--logo { - background-image: url("data:image/svg+xml,%3Csvg width='168' height='24' xmlns='http://www.w3.org/2000/svg'%3E%3Cg fill='none' fill-rule='evenodd'%3E%3Cpath d='M78.988.938h16.594a2.968 2.968 0 0 1 2.966 2.966V20.5a2.967 2.967 0 0 1-2.966 2.964H78.988a2.967 2.967 0 0 1-2.966-2.964V3.897A2.961 2.961 0 0 1 78.988.938zm41.937 17.866c-4.386.02-4.386-3.54-4.386-4.106l-.007-13.336 2.675-.424v13.254c0 .322 0 2.358 1.718 2.364v2.248zm-10.846-2.18c.821 0 1.43-.047 1.855-.129v-2.719a6.334 6.334 0 0 0-1.574-.199c-.295 0-.596.021-.897.069a2.699 2.699 0 0 0-.814.24c-.24.116-.439.28-.582.491-.15.212-.219.335-.219.656 0 .628.219.991.616 1.23s.938.362 1.615.362zm-.233-9.7c.883 0 1.629.109 2.231.328.602.218 1.088.525 1.444.915.363.396.609.922.76 1.483.157.56.232 1.175.232 1.85v6.874c-.41.089-1.034.19-1.868.314-.834.123-1.772.185-2.813.185-.69 0-1.327-.069-1.895-.198a4.001 4.001 0 0 1-1.471-.636 3.085 3.085 0 0 1-.951-1.134c-.226-.465-.343-1.12-.343-1.803 0-.656.13-1.073.384-1.525.26-.45.608-.819 1.047-1.106.445-.287.95-.492 1.532-.615a8.8 8.8 0 0 1 1.82-.185 8.404 8.404 0 0 1 1.972.24v-.438c0-.307-.035-.6-.11-.874a1.88 1.88 0 0 0-.384-.73 1.784 1.784 0 0 0-.724-.493 3.164 3.164 0 0 0-1.143-.205c-.616 0-1.177.075-1.69.164a7.735 7.735 0 0 0-1.26.307l-.321-2.192c.335-.117.834-.233 1.478-.349a10.98 10.98 0 0 1 2.073-.178zm52.842 9.626c.822 0 1.43-.048 1.854-.13V13.7a6.347 6.347 0 0 0-1.574-.199c-.294 0-.595.021-.896.069a2.7 2.7 0 0 0-.814.24 1.46 1.46 0 0 0-.582.491c-.15.212-.218.335-.218.656 0 .628.218.991.615 1.23.404.245.938.362 1.615.362zm-.226-9.694c.883 0 1.629.108 2.231.327.602.219 1.088.526 1.444.915.355.39.609.923.759 1.483.158.56.233 1.175.233 1.852v6.873c-.41.088-1.034.19-1.868.314-.834.123-1.772.184-2.813.184-.69 0-1.327-.068-1.895-.198a4.001 4.001 0 0 1-1.471-.635 3.085 3.085 0 0 1-.951-1.134c-.226-.465-.343-1.12-.343-1.804 0-.656.13-1.073.384-1.524.26-.45.608-.82 1.047-1.107.445-.286.95-.491 1.532-.614a8.803 8.803 0 0 1 2.751-.13c.329.034.671.096 1.04.185v-.437a3.3 3.3 0 0 0-.109-.875 1.873 1.873 0 0 0-.384-.731 1.784 1.784 0 0 0-.724-.492 3.165 3.165 0 0 0-1.143-.205c-.616 0-1.177.075-1.69.164-.514.089-.938.191-1.26.307l-.321-2.193c.335-.116.834-.232 1.478-.348a11.633 11.633 0 0 1 2.073-.177zm-8.034-1.271a1.626 1.626 0 0 1-1.628-1.62c0-.895.725-1.62 1.628-1.62.904 0 1.63.725 1.63 1.62 0 .895-.733 1.62-1.63 1.62zm1.348 13.22h-2.689V7.27l2.69-.423v11.956zm-4.714 0c-4.386.02-4.386-3.54-4.386-4.107l-.008-13.336 2.676-.424v13.254c0 .322 0 2.358 1.718 2.364v2.248zm-8.698-5.903c0-1.156-.253-2.119-.746-2.788-.493-.677-1.183-1.01-2.067-1.01-.882 0-1.574.333-2.065 1.01-.493.676-.733 1.632-.733 2.788 0 1.168.246 1.953.74 2.63.492.683 1.183 1.018 2.066 1.018.882 0 1.574-.342 2.067-1.019.492-.683.738-1.46.738-2.63zm2.737-.007c0 .902-.13 1.584-.397 2.33a5.52 5.52 0 0 1-1.128 1.906 4.986 4.986 0 0 1-1.752 1.223c-.685.286-1.739.45-2.265.45-.528-.006-1.574-.157-2.252-.45a5.096 5.096 0 0 1-1.744-1.223c-.487-.527-.863-1.162-1.137-1.906a6.345 6.345 0 0 1-.41-2.33c0-.902.123-1.77.397-2.508a5.554 5.554 0 0 1 1.15-1.892 5.133 5.133 0 0 1 1.75-1.216c.679-.287 1.425-.423 2.232-.423.808 0 1.553.142 2.237.423.685.286 1.274.69 1.753 1.216a5.644 5.644 0 0 1 1.135 1.892c.287.738.431 1.606.431 2.508zm-20.138 0c0 1.12.246 2.363.738 2.882.493.52 1.13.78 1.91.78.424 0 .828-.062 1.204-.178.377-.116.677-.253.917-.417V9.33a10.476 10.476 0 0 0-1.766-.226c-.971-.028-1.71.37-2.23 1.004-.513.636-.773 1.75-.773 2.788zm7.438 5.274c0 1.824-.466 3.156-1.404 4.004-.936.846-2.367 1.27-4.296 1.27-.705 0-2.17-.137-3.34-.396l.431-2.118c.98.205 2.272.26 2.95.26 1.074 0 1.84-.219 2.299-.656.459-.437.684-1.086.684-1.948v-.437a8.07 8.07 0 0 1-1.047.397c-.43.13-.93.198-1.492.198-.739 0-1.41-.116-2.018-.349a4.206 4.206 0 0 1-1.567-1.025c-.431-.45-.774-1.017-1.013-1.694-.24-.677-.363-1.885-.363-2.773 0-.834.13-1.88.384-2.577.26-.696.629-1.298 1.129-1.796.493-.498 1.095-.881 1.8-1.162a6.605 6.605 0 0 1 2.428-.457c.87 0 1.67.109 2.45.24.78.129 1.444.265 1.985.415V18.17z' fill='%235468FF'/%3E%3Cpath d='M6.972 6.677v1.627c-.712-.446-1.52-.67-2.425-.67-.585 0-1.045.13-1.38.391a1.24 1.24 0 0 0-.502 1.03c0 .425.164.765.494 1.02.33.256.835.532 1.516.83.447.192.795.356 1.045.495.25.138.537.332.862.582.324.25.563.548.718.894.154.345.23.741.23 1.188 0 .947-.334 1.691-1.004 2.234-.67.542-1.537.814-2.601.814-1.18 0-2.16-.229-2.936-.686v-1.708c.84.628 1.814.942 2.92.942.585 0 1.048-.136 1.388-.407.34-.271.51-.646.51-1.125 0-.287-.1-.55-.302-.79-.203-.24-.42-.42-.655-.542-.234-.123-.585-.29-1.053-.503-.276-.127-.47-.218-.582-.271a13.67 13.67 0 0 1-.55-.287 4.275 4.275 0 0 1-.567-.351 6.92 6.92 0 0 1-.455-.4c-.18-.17-.31-.34-.39-.51-.08-.17-.155-.37-.224-.598a2.553 2.553 0 0 1-.104-.742c0-.915.333-1.638.998-2.17.664-.532 1.523-.798 2.576-.798.968 0 1.793.17 2.473.51zm7.468 5.696v-.287c-.022-.607-.187-1.088-.495-1.444-.309-.357-.75-.535-1.324-.535-.532 0-.99.194-1.373.583-.382.388-.622.949-.717 1.683h3.909zm1.005 2.792v1.404c-.596.34-1.383.51-2.362.51-1.255 0-2.255-.377-3-1.132-.744-.755-1.116-1.744-1.116-2.968 0-1.297.34-2.316 1.021-3.055.68-.74 1.548-1.11 2.6-1.11 1.033 0 1.852.323 2.458.966.606.644.91 1.572.91 2.784 0 .33-.033.676-.096 1.038h-5.314c.107.702.405 1.239.894 1.611.49.372 1.106.558 1.85.558.862 0 1.58-.202 2.155-.606zm6.605-1.77h-1.212c-.596 0-1.045.116-1.349.35-.303.234-.454.532-.454.894 0 .372.117.664.35.877.235.213.575.32 1.022.32.51 0 .912-.142 1.204-.424.293-.281.44-.651.44-1.108v-.91zm-4.068-2.554V9.325c.627-.361 1.457-.542 2.489-.542 2.116 0 3.175 1.026 3.175 3.08V17h-1.548v-.957c-.415.68-1.143 1.02-2.186 1.02-.766 0-1.38-.22-1.843-.661-.462-.442-.694-1.003-.694-1.684 0-.776.293-1.38.878-1.81.585-.431 1.404-.647 2.457-.647h1.34V11.8c0-.554-.133-.971-.399-1.253-.266-.282-.707-.423-1.324-.423a4.07 4.07 0 0 0-2.345.718zm9.333-1.93v1.42c.394-1 1.101-1.5 2.123-1.5.148 0 .313.016.494.048v1.531a1.885 1.885 0 0 0-.75-.143c-.542 0-.989.24-1.34.718-.351.479-.527 1.048-.527 1.707V17h-1.563V8.91h1.563zm5.01 4.084c.022.82.272 1.492.75 2.019.479.526 1.15.79 2.01.79.639 0 1.235-.176 1.788-.527v1.404c-.521.319-1.186.479-1.995.479-1.265 0-2.276-.4-3.031-1.197-.755-.798-1.133-1.792-1.133-2.984 0-1.16.38-2.151 1.14-2.975.761-.825 1.79-1.237 3.088-1.237.702 0 1.346.149 1.93.447v1.436a3.242 3.242 0 0 0-1.77-.495c-.84 0-1.513.266-2.019.798-.505.532-.758 1.213-.758 2.042zM40.24 5.72v4.579c.458-1 1.293-1.5 2.505-1.5.787 0 1.42.245 1.899.734.479.49.718 1.17.718 2.042V17h-1.564v-5.106c0-.553-.14-.98-.422-1.284-.282-.303-.652-.455-1.11-.455-.531 0-1.002.202-1.411.606-.41.405-.615 1.022-.615 1.851V17h-1.563V5.72h1.563zm14.966 10.02c.596 0 1.096-.253 1.5-.758.404-.506.606-1.157.606-1.955 0-.915-.202-1.62-.606-2.114-.404-.495-.92-.742-1.548-.742-.553 0-1.05.224-1.491.67-.442.447-.662 1.133-.662 2.058 0 .958.212 1.67.638 2.138.425.469.946.703 1.563.703zM53.004 5.72v4.42c.574-.894 1.388-1.341 2.44-1.341 1.022 0 1.857.383 2.506 1.149.649.766.973 1.781.973 3.047 0 1.138-.309 2.109-.925 2.912-.617.803-1.463 1.205-2.537 1.205-1.075 0-1.894-.447-2.457-1.34V17h-1.58V5.72h1.58zm9.908 11.104l-3.223-7.913h1.739l1.005 2.632 1.26 3.415c.096-.32.48-1.458 1.15-3.415l.909-2.632h1.66l-2.92 7.866c-.777 2.074-1.963 3.11-3.559 3.11a2.92 2.92 0 0 1-.734-.079v-1.34c.17.042.351.064.543.064 1.032 0 1.755-.57 2.17-1.708z' fill='%235D6494'/%3E%3Cpath d='M89.632 5.967v-.772a.978.978 0 0 0-.978-.977h-2.28a.978.978 0 0 0-.978.977v.793c0 .088.082.15.171.13a7.127 7.127 0 0 1 1.984-.28c.65 0 1.295.088 1.917.259.082.02.164-.04.164-.13m-6.248 1.01l-.39-.389a.977.977 0 0 0-1.382 0l-.465.465a.973.973 0 0 0 0 1.38l.383.383c.062.061.15.047.205-.014.226-.307.472-.601.746-.874.281-.28.568-.526.883-.751.068-.042.075-.137.02-.2m4.16 2.453v3.341c0 .096.104.165.192.117l2.97-1.537c.068-.034.089-.117.055-.184a3.695 3.695 0 0 0-3.08-1.866c-.068 0-.136.054-.136.13m0 8.048a4.489 4.489 0 0 1-4.49-4.482 4.488 4.488 0 0 1 4.49-4.482 4.488 4.488 0 0 1 4.489 4.482 4.484 4.484 0 0 1-4.49 4.482m0-10.85a6.363 6.363 0 1 0 0 12.729c3.518 0 6.372-2.85 6.372-6.368a6.358 6.358 0 0 0-6.371-6.36' fill='%23FFF'/%3E%3C/g%3E%3C/svg%3E%0A"); - background-repeat: no-repeat; - background-position: center; - background-size: 100%; - overflow: hidden; - text-indent: -9000px; - padding: 0 !important; - width: 100%; - height: 100%; - display: block; -} - -/*# sourceMappingURL=data:application/json;base64,{"version":3,"sources":["docsearch.css"],"names":[],"mappings":"AAAA;EACE,sBAAsB;EACtB,mBAAmB;EACnB,aAAa;EACb,wBAAwB;EACxB,oBAAoB;EACpB,uBAAuB;EACvB,+BAA+B;CAChC;;AAED;EACE,eAAe;EACf,YAAY;EACZ,aAAa;CACd;;AAED;EACE,YAAY;EACZ,aAAa;EACb,aAAa;EACb,mBAAmB;CACpB;;AAED;EACE,sBAAsB;EACtB,uBAAuB;EACvB,uDAAuD;EACvD,UAAU;EACV,oBAAoB;EACpB,oCAAoC;EACpC,+BAA+B;EAC/B,WAAW;EACX,oBAAoB;EACpB,mBAAmB;EACnB,YAAY;EACZ,aAAa;EACb,uBAAuB;EACvB,oBAAoB;EACpB,gBAAgB;EAChB,yBAAiB;KAAjB,sBAAiB;UAAjB,iBAAiB;CAClB;;AAED;EACE,cAAc;CACf;;AAED;EACE,oCAAoC;CACrC;;AAED;EACE,WAAW;EACX,oCAAoC;EACpC,oBAAoB;CACrB;;AAED;EACE,eAAe;CAChB;;AAFD;EACE,eAAe;CAChB;;AAFD;EACE,eAAe;CAChB;;AAFD;EACE,eAAe;CAChB;;AAED;EACE,mBAAmB;EACnB,OAAO;EACP,UAAU;EACV,UAAU;EACV,6BAA6B;EAC7B,wCAAwC;EACxC,WAAW;EACX,YAAY;EACZ,aAAa;EACb,uBAAuB;EACvB,mBAAmB;EACnB,mBAAmB;EACnB,0BAAkB;KAAlB,uBAAkB;MAAlB,sBAAkB;UAAlB,kBAAkB;EAClB,eAAe;EACf,QAAQ;CACT;;AAED;EACE,sBAAsB;EACtB,mBAAmB;EACnB,aAAa;EACb,uBAAuB;EACvB,YAAY;CACb;;AAED;EACE,gBAAgB;CACjB;;AAED;EACE,WAAW;CACZ;;AAED;EACE,YAAY;EACZ,aAAa;EACb,uBAAuB;EACvB,cAAc;CACf;;AAED;EACE,eAAe;EACf,mBAAmB;EACnB,SAAS;EACT,WAAW;EACX,UAAU;EACV,UAAU;EACV,iBAAiB;EACjB,gBAAgB;EAChB,WAAW;EACX,mBAAmB;EACnB,0BAAkB;KAAlB,uBAAkB;MAAlB,sBAAkB;UAAlB,kBAAkB;EAClB,yBAAyB;CAC1B;;AAED;EACE,cAAc;CACf;;AAED;EACE,WAAW;CACZ;;AAED;EACE,eAAe;EACf,YAAY;EACZ,WAAW;EACX,YAAY;CACb;;AAED;EACE,eAAe;EACf,qCAA6B;UAA7B,6BAA6B;EAC7B,kCAA0B;UAA1B,0BAA0B;CAC3B;;AAED;EACE;IACE,2CAAmC;YAAnC,mCAAmC;IACnC,WAAW;GACZ;EACD;IACE,wBAAgB;YAAhB,gBAAgB;IAChB,WAAW;GACZ;CACF;;AATD;EACE;IACE,2CAAmC;YAAnC,mCAAmC;IACnC,WAAW;GACZ;EACD;IACE,wBAAgB;YAAhB,gBAAgB;IAChB,WAAW;GACZ;CACF;;AAED;EACE,oBAAoB;EACpB,yBAAyB;CAC1B;;AAED;EACE,YAAY;CACb;;AAED;EACE,mBAAmB;EACnB,0BAA0B;CAC3B;;AAED;EACE,WAAW;CACZ;;AAED;EACE,mBAAmB;EACnB,UAAU;EACV,mBAAmB;EACnB,gBAAgB;EAChB,WAAW;EACX,iBAAiB;EACjB,aAAa;EACb,mBAAmB;EACnB,wBAAwB;EACxB,aAAa;EACb,aAAa;EACb,iBAAiB;EACjB,iBAAiB;EACjB,yEAAyE;CAC1E;;AAED;EACE,eAAe;EACf,mBAAmB;EACnB,YAAY;EACZ,YAAY;EACZ,aAAa;EACb,iBAAiB;EACjB,cAAc;EACd,UAAU;EACV,8BAA8B;EAC9B,gCAAgC;EAChC,kCAA0B;UAA1B,0BAA0B;EAC1B,mBAAmB;CACpB;;AAED;EACE,mBAAmB;EACnB,cAAc;EACd,gBAAgB;CACjB;;AAED;EACE,sBAAsB;CACvB;;AAED;EACE,gBAAgB;CACjB;;AAED;EACE,2CAA2C;CAC5C;;AAED;EACE,2CAA2C;CAC5C;;AAED;EACE,mBAAmB;EACnB,0BAA0B;EAC1B,iBAAiB;EACjB,mBAAmB;EACnB,eAAe;EACf,mBAAmB;CACpB;;AAED;EACE,uBAAuB;CACxB;;AAED;EACE,eAAe;EACf,mBAAmB;EACnB,eAAe;EACf,iBAAiB;EACjB,eAAe;EACf,iBAAiB;CAClB;;AAED;EACE,eAAe;EACf,qCAAqC;EACrC,sBAAsB;CACvB;;AAED;;;;EAIE,iBAAiB;EACjB,oBAAoB;EACpB,qDAAqD;EACrD,eAAe;CAChB;;AAED;EACE,iBAAiB;EACjB,oBAAoB;EACpB,qDAAqD;EACrD,eAAe;CAChB;;AAED;EACE,eAAe;EACf,aAAa;EACb,WAAW;EACX,mBAAmB;EACnB,0CAA0C;EAC1C,gBAAgB;CACjB;;AAED;EACE,YAAY;EACZ,mBAAmB;EACnB,eAAe;EACf,OAAO;EACP,aAAa;EACb,WAAW;EACX,iBAAiB;EACjB,WAAW;CACZ;;AAED;EACE,mBAAmB;EACnB,8BAA8B;EAC9B,cAAc;EACd,gBAAgB;EAChB,eAAe;EACf,eAAe;EACf,eAAe;CAChB;;AAED;EACE,YAAY;EACZ,YAAY;EACZ,mBAAmB;CACpB;;AAED;EACE,YAAY;EACZ,WAAW;EACX,gBAAgB;EAChB,kBAAkB;EAClB,mBAAmB;EACnB,8BAA8B;EAC9B,eAAe;EACf,iBAAiB;EACjB,sBAAsB;CACvB;;AAED;EACE,YAAY;EACZ,mBAAmB;EACnB,eAAe;EACf,OAAO;EACP,aAAa;EACb,WAAW;EACX,iBAAiB;EACjB,SAAS;CACV;;AAED;EACE,cAAc;CACf;;AAED;EACE,mBAAmB;EACnB,eAAe;EACf,iBAAiB;EACjB,kBAAkB;CACnB;;AAED;EACE,eAAe;EACf,mBAAmB;EACnB,kBAAkB;EAClB,eAAe;CAChB;;AAED;EACE,YAAY;EACZ,eAAe;EACf,mBAAmB;EACnB,iBAAiB;CAClB;;AAED;EACE,cAAc;CACf;;AAED;EACE,iBAAiB;EACjB,eAAe;EACf,aAAa;EACb,eAAe;EACf,0BAA0B;EAC1B,mBAAmB;EACnB,+DAA+D;CAChE;;AAED;EACE,iBAAiB;CAClB;;AAED;EACE,eAAe;CAChB;;AAED;EACE,eAAe;CAChB;;AAED;EACE;IACE,eAAe;GAChB;CACF;;AAED;EACE;IACE,sBAAsB;IACtB,YAAY;IACZ,iBAAiB;IACjB,YAAY;IACZ,WAAW;IACX,eAAe;IACf,iBAAiB;IACjB,kBAAkB;IAClB,iBAAiB;IACjB,aAAa;GACd;EACD;IACE,cAAc;GACf;EACD;IACE,aAAa;GACd;EACD;IACE,sBAAsB;IACtB,YAAY;IACZ,iBAAiB;IACjB,YAAY;IACZ,WAAW;GACZ;EACD;IACE,cAAc;GACf;CACF;;AAED;EACE,8BAA8B;EAC9B,aAAa;EACb,UAAU;CACX;;AAED;EACE,YAAY;EACZ,WAAW;CACZ;;AAED;EACE,cAAc;CACf;;AAED;EACE,UAAU;EACV,WAAW;EACX,eAAe;EACf,YAAY;EACZ,aAAa;CACd;;AAED;EACE,aAAa;EACb,kBAAkB;CACnB;;AAED;EACE,aAAa;EACb,kBAAkB;CACnB;;AAED;EACE,4UAA4U;EAC5U,YAAY;EACZ,YAAY;EACZ,aAAa;EACb,sBAAsB;CACvB;;AAED;EACE,YAAY;EACZ,YAAY;EACZ,UAAU;EACV,WAAW;CACZ;;AAED;EACE,yBAAyB;CAC1B;;AAED;EACE,UAAU;EACV,eAAe;EACf,iBAAiB;EACjB,oBAAoB;CACrB;;AAED;EACE,aAAa;EACb,kBAAkB;EAClB,eAAe;EACf,sBAAsB;CACvB;;AAED;EACE,gBAAgB;EAChB,eAAe;EACf,mBAAmB;EACnB,uBAAuB;EACvB,oBAAoB;EACpB,kBAAkB;EAClB,aAAa;CACd;;AAED;EACE,eAAe;EACf,kBAAkB;EAClB,iBAAiB;CAClB;;AAED;EACE,aAAa;EACb,aAAa;EACb,cAAc;EACd,uBAAuB;EACvB,aAAa;EACb,aAAa;EACb,eAAe;CAChB;;AAED;EACE,w2PAAw2P;EACx2P,6BAA6B;EAC7B,4BAA4B;EAC5B,sBAAsB;EACtB,iBAAiB;EACjB,qBAAqB;EACrB,sBAAsB;EACtB,YAAY;EACZ,aAAa;EACb,eAAe;CAChB","file":"docsearch.css","sourcesContent":[".searchbox {\n  display: inline-block;\n  position: relative;\n  width: 200px;\n  height: 32px !important;\n  white-space: nowrap;\n  box-sizing: border-box;\n  visibility: visible !important;\n}\n\n.searchbox .algolia-autocomplete {\n  display: block;\n  width: 100%;\n  height: 100%;\n}\n\n.searchbox__wrapper {\n  width: 100%;\n  height: 100%;\n  z-index: 999;\n  position: relative;\n}\n\n.searchbox__input {\n  display: inline-block;\n  box-sizing: border-box;\n  transition: box-shadow 0.4s ease, background 0.4s ease;\n  border: 0;\n  border-radius: 16px;\n  box-shadow: inset 0 0 0 1px #cccccc;\n  background: #ffffff !important;\n  padding: 0;\n  padding-right: 26px;\n  padding-left: 32px;\n  width: 100%;\n  height: 100%;\n  vertical-align: middle;\n  white-space: normal;\n  font-size: 12px;\n  appearance: none;\n}\n\n.searchbox__input::-webkit-search-decoration, .searchbox__input::-webkit-search-cancel-button, .searchbox__input::-webkit-search-results-button, .searchbox__input::-webkit-search-results-decoration {\n  display: none;\n}\n\n.searchbox__input:hover {\n  box-shadow: inset 0 0 0 1px #b3b3b3;\n}\n\n.searchbox__input:focus, .searchbox__input:active {\n  outline: 0;\n  box-shadow: inset 0 0 0 1px #aaaaaa;\n  background: #ffffff;\n}\n\n.searchbox__input::placeholder {\n  color: #aaaaaa;\n}\n\n.searchbox__submit {\n  position: absolute;\n  top: 0;\n  margin: 0;\n  border: 0;\n  border-radius: 16px 0 0 16px;\n  background-color: rgba(69, 142, 225, 0);\n  padding: 0;\n  width: 32px;\n  height: 100%;\n  vertical-align: middle;\n  text-align: center;\n  font-size: inherit;\n  user-select: none;\n  right: inherit;\n  left: 0;\n}\n\n.searchbox__submit::before {\n  display: inline-block;\n  margin-right: -4px;\n  height: 100%;\n  vertical-align: middle;\n  content: '';\n}\n\n.searchbox__submit:hover, .searchbox__submit:active {\n  cursor: pointer;\n}\n\n.searchbox__submit:focus {\n  outline: 0;\n}\n\n.searchbox__submit svg {\n  width: 14px;\n  height: 14px;\n  vertical-align: middle;\n  fill: #6d7e96;\n}\n\n.searchbox__reset {\n  display: block;\n  position: absolute;\n  top: 8px;\n  right: 8px;\n  margin: 0;\n  border: 0;\n  background: none;\n  cursor: pointer;\n  padding: 0;\n  font-size: inherit;\n  user-select: none;\n  fill: rgba(0, 0, 0, 0.5);\n}\n\n.searchbox__reset.hide {\n  display: none;\n}\n\n.searchbox__reset:focus {\n  outline: 0;\n}\n\n.searchbox__reset svg {\n  display: block;\n  margin: 4px;\n  width: 8px;\n  height: 8px;\n}\n\n.searchbox__input:valid ~ .searchbox__reset {\n  display: block;\n  animation-name: sbx-reset-in;\n  animation-duration: 0.15s;\n}\n\n@keyframes sbx-reset-in {\n  0% {\n    transform: translate3d(-20%, 0, 0);\n    opacity: 0;\n  }\n  100% {\n    transform: none;\n    opacity: 1;\n  }\n}\n\n.algolia-autocomplete.algolia-autocomplete-right .ds-dropdown-menu {\n  right: 0 !important;\n  left: inherit !important;\n}\n\n.algolia-autocomplete.algolia-autocomplete-right .ds-dropdown-menu:before {\n  right: 48px;\n}\n\n.algolia-autocomplete.algolia-autocomplete-left .ds-dropdown-menu {\n  left: 0 !important;\n  right: inherit !important;\n}\n\n.algolia-autocomplete.algolia-autocomplete-left .ds-dropdown-menu:before {\n  left: 48px;\n}\n\n.algolia-autocomplete .ds-dropdown-menu {\n  position: relative;\n  top: -6px;\n  border-radius: 4px;\n  margin: 6px 0 0;\n  padding: 0;\n  text-align: left;\n  height: auto;\n  position: relative;\n  background: transparent;\n  border: none;\n  z-index: 999;\n  max-width: 600px;\n  min-width: 500px;\n  box-shadow: 0 1px 0 0 rgba(0, 0, 0, 0.2), 0 2px 3px 0 rgba(0, 0, 0, 0.1);\n}\n\n.algolia-autocomplete .ds-dropdown-menu:before {\n  display: block;\n  position: absolute;\n  content: '';\n  width: 14px;\n  height: 14px;\n  background: #fff;\n  z-index: 1000;\n  top: -7px;\n  border-top: 1px solid #d9d9d9;\n  border-right: 1px solid #d9d9d9;\n  transform: rotate(-45deg);\n  border-radius: 2px;\n}\n\n.algolia-autocomplete .ds-dropdown-menu .ds-suggestions {\n  position: relative;\n  z-index: 1000;\n  margin-top: 8px;\n}\n\n.algolia-autocomplete .ds-dropdown-menu .ds-suggestions a:hover {\n  text-decoration: none;\n}\n\n.algolia-autocomplete .ds-dropdown-menu .ds-suggestion {\n  cursor: pointer;\n}\n\n.algolia-autocomplete .ds-dropdown-menu .ds-suggestion.ds-cursor .algolia-docsearch-suggestion.suggestion-layout-simple {\n  background-color: rgba(69, 142, 225, 0.05);\n}\n\n.algolia-autocomplete .ds-dropdown-menu .ds-suggestion.ds-cursor .algolia-docsearch-suggestion:not(.suggestion-layout-simple) .algolia-docsearch-suggestion--content {\n  background-color: rgba(69, 142, 225, 0.05);\n}\n\n.algolia-autocomplete .ds-dropdown-menu [class^='ds-dataset-'] {\n  position: relative;\n  border: solid 1px #d9d9d9;\n  background: #fff;\n  border-radius: 4px;\n  overflow: auto;\n  padding: 0 8px 8px;\n}\n\n.algolia-autocomplete .ds-dropdown-menu * {\n  box-sizing: border-box;\n}\n\n.algolia-autocomplete .algolia-docsearch-suggestion {\n  display: block;\n  position: relative;\n  padding: 0 8px;\n  background: #fff;\n  color: #02060c;\n  overflow: hidden;\n}\n\n.algolia-autocomplete .algolia-docsearch-suggestion--highlight {\n  color: #174d8c;\n  background: rgba(143, 187, 237, 0.1);\n  padding: 0.1em 0.05em;\n}\n\n.algolia-autocomplete .algolia-docsearch-suggestion--category-header .algolia-docsearch-suggestion--category-header-lvl0\n.algolia-docsearch-suggestion--highlight,\n.algolia-autocomplete .algolia-docsearch-suggestion--category-header .algolia-docsearch-suggestion--category-header-lvl1\n.algolia-docsearch-suggestion--highlight {\n  padding: 0 0 1px;\n  background: inherit;\n  box-shadow: inset 0 -2px 0 0 rgba(69, 142, 225, 0.8);\n  color: inherit;\n}\n\n.algolia-autocomplete .algolia-docsearch-suggestion--text .algolia-docsearch-suggestion--highlight {\n  padding: 0 0 1px;\n  background: inherit;\n  box-shadow: inset 0 -2px 0 0 rgba(69, 142, 225, 0.8);\n  color: inherit;\n}\n\n.algolia-autocomplete .algolia-docsearch-suggestion--content {\n  display: block;\n  float: right;\n  width: 70%;\n  position: relative;\n  padding: 5.33333px 0 5.33333px 10.66667px;\n  cursor: pointer;\n}\n\n.algolia-autocomplete .algolia-docsearch-suggestion--content:before {\n  content: '';\n  position: absolute;\n  display: block;\n  top: 0;\n  height: 100%;\n  width: 1px;\n  background: #ddd;\n  left: -1px;\n}\n\n.algolia-autocomplete .algolia-docsearch-suggestion--category-header {\n  position: relative;\n  border-bottom: 1px solid #ddd;\n  display: none;\n  margin-top: 8px;\n  padding: 4px 0;\n  font-size: 1em;\n  color: #33363d;\n}\n\n.algolia-autocomplete .algolia-docsearch-suggestion--wrapper {\n  width: 100%;\n  float: left;\n  padding: 8px 0 0 0;\n}\n\n.algolia-autocomplete .algolia-docsearch-suggestion--subcategory-column {\n  float: left;\n  width: 30%;\n  padding-left: 0;\n  text-align: right;\n  position: relative;\n  padding: 5.33333px 10.66667px;\n  color: #a4a7ae;\n  font-size: 0.9em;\n  word-wrap: break-word;\n}\n\n.algolia-autocomplete .algolia-docsearch-suggestion--subcategory-column:before {\n  content: '';\n  position: absolute;\n  display: block;\n  top: 0;\n  height: 100%;\n  width: 1px;\n  background: #ddd;\n  right: 0;\n}\n\n.algolia-autocomplete .algolia-docsearch-suggestion--subcategory-inline {\n  display: none;\n}\n\n.algolia-autocomplete .algolia-docsearch-suggestion--title {\n  margin-bottom: 4px;\n  color: #02060c;\n  font-size: 0.9em;\n  font-weight: bold;\n}\n\n.algolia-autocomplete .algolia-docsearch-suggestion--text {\n  display: block;\n  line-height: 1.2em;\n  font-size: 0.85em;\n  color: #63676d;\n}\n\n.algolia-autocomplete .algolia-docsearch-suggestion--no-results {\n  width: 100%;\n  padding: 8px 0;\n  text-align: center;\n  font-size: 1.2em;\n}\n\n.algolia-autocomplete .algolia-docsearch-suggestion--no-results::before {\n  display: none;\n}\n\n.algolia-autocomplete .algolia-docsearch-suggestion code {\n  padding: 1px 5px;\n  font-size: 90%;\n  border: none;\n  color: #222222;\n  background-color: #ebebeb;\n  border-radius: 3px;\n  font-family: Menlo, Monaco, Consolas, 'Courier New', monospace;\n}\n\n.algolia-autocomplete .algolia-docsearch-suggestion code .algolia-docsearch-suggestion--highlight {\n  background: none;\n}\n\n.algolia-autocomplete .algolia-docsearch-suggestion.algolia-docsearch-suggestion__main .algolia-docsearch-suggestion--category-header {\n  display: block;\n}\n\n.algolia-autocomplete .algolia-docsearch-suggestion.algolia-docsearch-suggestion__secondary {\n  display: block;\n}\n\n@media all and (min-width: 768px) {\n  .algolia-autocomplete .algolia-docsearch-suggestion .algolia-docsearch-suggestion--subcategory-column {\n    display: block;\n  }\n}\n\n@media all and (max-width: 768px) {\n  .algolia-autocomplete .algolia-docsearch-suggestion .algolia-docsearch-suggestion--subcategory-column {\n    display: inline-block;\n    width: auto;\n    text-align: left;\n    float: left;\n    padding: 0;\n    color: #02060c;\n    font-size: 0.9em;\n    font-weight: bold;\n    text-align: left;\n    opacity: 0.5;\n  }\n  .algolia-autocomplete .algolia-docsearch-suggestion .algolia-docsearch-suggestion--subcategory-column:before {\n    display: none;\n  }\n  .algolia-autocomplete .algolia-docsearch-suggestion .algolia-docsearch-suggestion--subcategory-column:after {\n    content: '|';\n  }\n  .algolia-autocomplete .algolia-docsearch-suggestion .algolia-docsearch-suggestion--content {\n    display: inline-block;\n    width: auto;\n    text-align: left;\n    float: left;\n    padding: 0;\n  }\n  .algolia-autocomplete .algolia-docsearch-suggestion .algolia-docsearch-suggestion--content:before {\n    display: none;\n  }\n}\n\n.algolia-autocomplete .suggestion-layout-simple.algolia-docsearch-suggestion {\n  border-bottom: solid 1px #eee;\n  padding: 8px;\n  margin: 0;\n}\n\n.algolia-autocomplete .suggestion-layout-simple .algolia-docsearch-suggestion--content {\n  width: 100%;\n  padding: 0;\n}\n\n.algolia-autocomplete .suggestion-layout-simple .algolia-docsearch-suggestion--content::before {\n  display: none;\n}\n\n.algolia-autocomplete .suggestion-layout-simple .algolia-docsearch-suggestion--category-header {\n  margin: 0;\n  padding: 0;\n  display: block;\n  width: 100%;\n  border: none;\n}\n\n.algolia-autocomplete .suggestion-layout-simple .algolia-docsearch-suggestion--category-header-lvl0 {\n  opacity: 0.6;\n  font-size: 0.85em;\n}\n\n.algolia-autocomplete .suggestion-layout-simple .algolia-docsearch-suggestion--category-header-lvl1 {\n  opacity: 0.6;\n  font-size: 0.85em;\n}\n\n.algolia-autocomplete .suggestion-layout-simple .algolia-docsearch-suggestion--category-header-lvl1::before {\n  background-image: url('data:image/svg+xml;utf8,<svg width=\"10\" height=\"10\" viewBox=\"0 0 20 38\" xmlns=\"http://www.w3.org/2000/svg\"><path d=\"M1.49 4.31l14 16.126.002-2.624-14 16.074-1.314 1.51 3.017 2.626 1.313-1.508 14-16.075 1.142-1.313-1.14-1.313-14-16.125L3.2.18.18 2.8l1.31 1.51z\" fill-rule=\"evenodd\" fill=\"%231D3657\" /></svg>');\n  content: '';\n  width: 10px;\n  height: 10px;\n  display: inline-block;\n}\n\n.algolia-autocomplete .suggestion-layout-simple .algolia-docsearch-suggestion--wrapper {\n  width: 100%;\n  float: left;\n  margin: 0;\n  padding: 0;\n}\n\n.algolia-autocomplete .suggestion-layout-simple .algolia-docsearch-suggestion--duplicate-content, .algolia-autocomplete .suggestion-layout-simple .algolia-docsearch-suggestion--subcategory-inline {\n  display: none !important;\n}\n\n.algolia-autocomplete .suggestion-layout-simple .algolia-docsearch-suggestion--title {\n  margin: 0;\n  color: #458ee1;\n  font-size: 0.9em;\n  font-weight: normal;\n}\n\n.algolia-autocomplete .suggestion-layout-simple .algolia-docsearch-suggestion--title::before {\n  content: '#';\n  font-weight: bold;\n  color: #458ee1;\n  display: inline-block;\n}\n\n.algolia-autocomplete .suggestion-layout-simple .algolia-docsearch-suggestion--text {\n  margin: 4px 0 0;\n  display: block;\n  line-height: 1.4em;\n  padding: 5.33333px 8px;\n  background: #f8f8f8;\n  font-size: 0.85em;\n  opacity: 0.8;\n}\n\n.algolia-autocomplete .suggestion-layout-simple .algolia-docsearch-suggestion--text .algolia-docsearch-suggestion--highlight {\n  color: #3f4145;\n  font-weight: bold;\n  box-shadow: none;\n}\n\n.algolia-autocomplete .algolia-docsearch-footer {\n  width: 134px;\n  height: 20px;\n  z-index: 2000;\n  margin-top: 10.66667px;\n  float: right;\n  font-size: 0;\n  line-height: 0;\n}\n\n.algolia-autocomplete .algolia-docsearch-footer--logo {\n  background-image: url(\"data:image/svg+xml,%3Csvg width='168' height='24' xmlns='http://www.w3.org/2000/svg'%3E%3Cg fill='none' fill-rule='evenodd'%3E%3Cpath d='M78.988.938h16.594a2.968 2.968 0 0 1 2.966 2.966V20.5a2.967 2.967 0 0 1-2.966 2.964H78.988a2.967 2.967 0 0 1-2.966-2.964V3.897A2.961 2.961 0 0 1 78.988.938zm41.937 17.866c-4.386.02-4.386-3.54-4.386-4.106l-.007-13.336 2.675-.424v13.254c0 .322 0 2.358 1.718 2.364v2.248zm-10.846-2.18c.821 0 1.43-.047 1.855-.129v-2.719a6.334 6.334 0 0 0-1.574-.199c-.295 0-.596.021-.897.069a2.699 2.699 0 0 0-.814.24c-.24.116-.439.28-.582.491-.15.212-.219.335-.219.656 0 .628.219.991.616 1.23s.938.362 1.615.362zm-.233-9.7c.883 0 1.629.109 2.231.328.602.218 1.088.525 1.444.915.363.396.609.922.76 1.483.157.56.232 1.175.232 1.85v6.874c-.41.089-1.034.19-1.868.314-.834.123-1.772.185-2.813.185-.69 0-1.327-.069-1.895-.198a4.001 4.001 0 0 1-1.471-.636 3.085 3.085 0 0 1-.951-1.134c-.226-.465-.343-1.12-.343-1.803 0-.656.13-1.073.384-1.525.26-.45.608-.819 1.047-1.106.445-.287.95-.492 1.532-.615a8.8 8.8 0 0 1 1.82-.185 8.404 8.404 0 0 1 1.972.24v-.438c0-.307-.035-.6-.11-.874a1.88 1.88 0 0 0-.384-.73 1.784 1.784 0 0 0-.724-.493 3.164 3.164 0 0 0-1.143-.205c-.616 0-1.177.075-1.69.164a7.735 7.735 0 0 0-1.26.307l-.321-2.192c.335-.117.834-.233 1.478-.349a10.98 10.98 0 0 1 2.073-.178zm52.842 9.626c.822 0 1.43-.048 1.854-.13V13.7a6.347 6.347 0 0 0-1.574-.199c-.294 0-.595.021-.896.069a2.7 2.7 0 0 0-.814.24 1.46 1.46 0 0 0-.582.491c-.15.212-.218.335-.218.656 0 .628.218.991.615 1.23.404.245.938.362 1.615.362zm-.226-9.694c.883 0 1.629.108 2.231.327.602.219 1.088.526 1.444.915.355.39.609.923.759 1.483.158.56.233 1.175.233 1.852v6.873c-.41.088-1.034.19-1.868.314-.834.123-1.772.184-2.813.184-.69 0-1.327-.068-1.895-.198a4.001 4.001 0 0 1-1.471-.635 3.085 3.085 0 0 1-.951-1.134c-.226-.465-.343-1.12-.343-1.804 0-.656.13-1.073.384-1.524.26-.45.608-.82 1.047-1.107.445-.286.95-.491 1.532-.614a8.803 8.803 0 0 1 2.751-.13c.329.034.671.096 1.04.185v-.437a3.3 3.3 0 0 0-.109-.875 1.873 1.873 0 0 0-.384-.731 1.784 1.784 0 0 0-.724-.492 3.165 3.165 0 0 0-1.143-.205c-.616 0-1.177.075-1.69.164-.514.089-.938.191-1.26.307l-.321-2.193c.335-.116.834-.232 1.478-.348a11.633 11.633 0 0 1 2.073-.177zm-8.034-1.271a1.626 1.626 0 0 1-1.628-1.62c0-.895.725-1.62 1.628-1.62.904 0 1.63.725 1.63 1.62 0 .895-.733 1.62-1.63 1.62zm1.348 13.22h-2.689V7.27l2.69-.423v11.956zm-4.714 0c-4.386.02-4.386-3.54-4.386-4.107l-.008-13.336 2.676-.424v13.254c0 .322 0 2.358 1.718 2.364v2.248zm-8.698-5.903c0-1.156-.253-2.119-.746-2.788-.493-.677-1.183-1.01-2.067-1.01-.882 0-1.574.333-2.065 1.01-.493.676-.733 1.632-.733 2.788 0 1.168.246 1.953.74 2.63.492.683 1.183 1.018 2.066 1.018.882 0 1.574-.342 2.067-1.019.492-.683.738-1.46.738-2.63zm2.737-.007c0 .902-.13 1.584-.397 2.33a5.52 5.52 0 0 1-1.128 1.906 4.986 4.986 0 0 1-1.752 1.223c-.685.286-1.739.45-2.265.45-.528-.006-1.574-.157-2.252-.45a5.096 5.096 0 0 1-1.744-1.223c-.487-.527-.863-1.162-1.137-1.906a6.345 6.345 0 0 1-.41-2.33c0-.902.123-1.77.397-2.508a5.554 5.554 0 0 1 1.15-1.892 5.133 5.133 0 0 1 1.75-1.216c.679-.287 1.425-.423 2.232-.423.808 0 1.553.142 2.237.423.685.286 1.274.69 1.753 1.216a5.644 5.644 0 0 1 1.135 1.892c.287.738.431 1.606.431 2.508zm-20.138 0c0 1.12.246 2.363.738 2.882.493.52 1.13.78 1.91.78.424 0 .828-.062 1.204-.178.377-.116.677-.253.917-.417V9.33a10.476 10.476 0 0 0-1.766-.226c-.971-.028-1.71.37-2.23 1.004-.513.636-.773 1.75-.773 2.788zm7.438 5.274c0 1.824-.466 3.156-1.404 4.004-.936.846-2.367 1.27-4.296 1.27-.705 0-2.17-.137-3.34-.396l.431-2.118c.98.205 2.272.26 2.95.26 1.074 0 1.84-.219 2.299-.656.459-.437.684-1.086.684-1.948v-.437a8.07 8.07 0 0 1-1.047.397c-.43.13-.93.198-1.492.198-.739 0-1.41-.116-2.018-.349a4.206 4.206 0 0 1-1.567-1.025c-.431-.45-.774-1.017-1.013-1.694-.24-.677-.363-1.885-.363-2.773 0-.834.13-1.88.384-2.577.26-.696.629-1.298 1.129-1.796.493-.498 1.095-.881 1.8-1.162a6.605 6.605 0 0 1 2.428-.457c.87 0 1.67.109 2.45.24.78.129 1.444.265 1.985.415V18.17z' fill='%235468FF'/%3E%3Cpath d='M6.972 6.677v1.627c-.712-.446-1.52-.67-2.425-.67-.585 0-1.045.13-1.38.391a1.24 1.24 0 0 0-.502 1.03c0 .425.164.765.494 1.02.33.256.835.532 1.516.83.447.192.795.356 1.045.495.25.138.537.332.862.582.324.25.563.548.718.894.154.345.23.741.23 1.188 0 .947-.334 1.691-1.004 2.234-.67.542-1.537.814-2.601.814-1.18 0-2.16-.229-2.936-.686v-1.708c.84.628 1.814.942 2.92.942.585 0 1.048-.136 1.388-.407.34-.271.51-.646.51-1.125 0-.287-.1-.55-.302-.79-.203-.24-.42-.42-.655-.542-.234-.123-.585-.29-1.053-.503-.276-.127-.47-.218-.582-.271a13.67 13.67 0 0 1-.55-.287 4.275 4.275 0 0 1-.567-.351 6.92 6.92 0 0 1-.455-.4c-.18-.17-.31-.34-.39-.51-.08-.17-.155-.37-.224-.598a2.553 2.553 0 0 1-.104-.742c0-.915.333-1.638.998-2.17.664-.532 1.523-.798 2.576-.798.968 0 1.793.17 2.473.51zm7.468 5.696v-.287c-.022-.607-.187-1.088-.495-1.444-.309-.357-.75-.535-1.324-.535-.532 0-.99.194-1.373.583-.382.388-.622.949-.717 1.683h3.909zm1.005 2.792v1.404c-.596.34-1.383.51-2.362.51-1.255 0-2.255-.377-3-1.132-.744-.755-1.116-1.744-1.116-2.968 0-1.297.34-2.316 1.021-3.055.68-.74 1.548-1.11 2.6-1.11 1.033 0 1.852.323 2.458.966.606.644.91 1.572.91 2.784 0 .33-.033.676-.096 1.038h-5.314c.107.702.405 1.239.894 1.611.49.372 1.106.558 1.85.558.862 0 1.58-.202 2.155-.606zm6.605-1.77h-1.212c-.596 0-1.045.116-1.349.35-.303.234-.454.532-.454.894 0 .372.117.664.35.877.235.213.575.32 1.022.32.51 0 .912-.142 1.204-.424.293-.281.44-.651.44-1.108v-.91zm-4.068-2.554V9.325c.627-.361 1.457-.542 2.489-.542 2.116 0 3.175 1.026 3.175 3.08V17h-1.548v-.957c-.415.68-1.143 1.02-2.186 1.02-.766 0-1.38-.22-1.843-.661-.462-.442-.694-1.003-.694-1.684 0-.776.293-1.38.878-1.81.585-.431 1.404-.647 2.457-.647h1.34V11.8c0-.554-.133-.971-.399-1.253-.266-.282-.707-.423-1.324-.423a4.07 4.07 0 0 0-2.345.718zm9.333-1.93v1.42c.394-1 1.101-1.5 2.123-1.5.148 0 .313.016.494.048v1.531a1.885 1.885 0 0 0-.75-.143c-.542 0-.989.24-1.34.718-.351.479-.527 1.048-.527 1.707V17h-1.563V8.91h1.563zm5.01 4.084c.022.82.272 1.492.75 2.019.479.526 1.15.79 2.01.79.639 0 1.235-.176 1.788-.527v1.404c-.521.319-1.186.479-1.995.479-1.265 0-2.276-.4-3.031-1.197-.755-.798-1.133-1.792-1.133-2.984 0-1.16.38-2.151 1.14-2.975.761-.825 1.79-1.237 3.088-1.237.702 0 1.346.149 1.93.447v1.436a3.242 3.242 0 0 0-1.77-.495c-.84 0-1.513.266-2.019.798-.505.532-.758 1.213-.758 2.042zM40.24 5.72v4.579c.458-1 1.293-1.5 2.505-1.5.787 0 1.42.245 1.899.734.479.49.718 1.17.718 2.042V17h-1.564v-5.106c0-.553-.14-.98-.422-1.284-.282-.303-.652-.455-1.11-.455-.531 0-1.002.202-1.411.606-.41.405-.615 1.022-.615 1.851V17h-1.563V5.72h1.563zm14.966 10.02c.596 0 1.096-.253 1.5-.758.404-.506.606-1.157.606-1.955 0-.915-.202-1.62-.606-2.114-.404-.495-.92-.742-1.548-.742-.553 0-1.05.224-1.491.67-.442.447-.662 1.133-.662 2.058 0 .958.212 1.67.638 2.138.425.469.946.703 1.563.703zM53.004 5.72v4.42c.574-.894 1.388-1.341 2.44-1.341 1.022 0 1.857.383 2.506 1.149.649.766.973 1.781.973 3.047 0 1.138-.309 2.109-.925 2.912-.617.803-1.463 1.205-2.537 1.205-1.075 0-1.894-.447-2.457-1.34V17h-1.58V5.72h1.58zm9.908 11.104l-3.223-7.913h1.739l1.005 2.632 1.26 3.415c.096-.32.48-1.458 1.15-3.415l.909-2.632h1.66l-2.92 7.866c-.777 2.074-1.963 3.11-3.559 3.11a2.92 2.92 0 0 1-.734-.079v-1.34c.17.042.351.064.543.064 1.032 0 1.755-.57 2.17-1.708z' fill='%235D6494'/%3E%3Cpath d='M89.632 5.967v-.772a.978.978 0 0 0-.978-.977h-2.28a.978.978 0 0 0-.978.977v.793c0 .088.082.15.171.13a7.127 7.127 0 0 1 1.984-.28c.65 0 1.295.088 1.917.259.082.02.164-.04.164-.13m-6.248 1.01l-.39-.389a.977.977 0 0 0-1.382 0l-.465.465a.973.973 0 0 0 0 1.38l.383.383c.062.061.15.047.205-.014.226-.307.472-.601.746-.874.281-.28.568-.526.883-.751.068-.042.075-.137.02-.2m4.16 2.453v3.341c0 .096.104.165.192.117l2.97-1.537c.068-.034.089-.117.055-.184a3.695 3.695 0 0 0-3.08-1.866c-.068 0-.136.054-.136.13m0 8.048a4.489 4.489 0 0 1-4.49-4.482 4.488 4.488 0 0 1 4.49-4.482 4.488 4.488 0 0 1 4.489 4.482 4.484 4.484 0 0 1-4.49 4.482m0-10.85a6.363 6.363 0 1 0 0 12.729c3.518 0 6.372-2.85 6.372-6.368a6.358 6.358 0 0 0-6.371-6.36' fill='%23FFF'/%3E%3C/g%3E%3C/svg%3E%0A\");\n  background-repeat: no-repeat;\n  background-position: center;\n  background-size: 100%;\n  overflow: hidden;\n  text-indent: -9000px;\n  padding: 0 !important;\n  width: 100%;\n  height: 100%;\n  display: block;\n}\n"]} */ \ No newline at end of file diff --git a/website/css/highlight.css b/website/css/highlight.css index 52f65bfc74e..e69de29bb2d 100644 --- a/website/css/highlight.css +++ b/website/css/highlight.css @@ -1,76 +0,0 @@ -/* - Name: Base16 Eighties Dark - Author: Chris Kempson (http://chriskempson.com) - Pygments template by Jan T. Sott (https://github.com/idleberg) - Created with Base16 Builder by Chris Kempson (https://github.com/chriskempson/base16-builder) -*/ - -@media (prefers-color-scheme: dark) { - -.syntax .hll { background-color: #515151 } -.syntax { background: #2d2d2d; color: #f2f0ec } -.syntax .c { color: #747369 } /* Comment */ -.syntax .err { color: #f2777a } /* Error */ -.syntax .k { color: #cc99cc } /* Keyword */ -.syntax .l { color: #f99157 } /* Literal */ -.syntax .n { color: #f2f0ec } /* Name */ -.syntax .o { color: #66cccc } /* Operator */ -.syntax .p { color: #f2f0ec } /* Punctuation */ -.syntax .cm { color: #747369 } /* Comment.Multiline */ -.syntax .cp { color: #747369 } /* Comment.Preproc */ -.syntax .c1 { color: #747369 } /* Comment.Single */ -.syntax .cs { color: #747369 } /* Comment.Special */ -.syntax .gd { color: #f2777a } /* Generic.Deleted */ -.syntax .ge { font-style: italic } /* Generic.Emph */ -.syntax .gh { color: #f2f0ec; font-weight: bold } /* Generic.Heading */ -.syntax .gi { color: #99cc99 } /* Generic.Inserted */ -.syntax .gp { color: #747369; font-weight: bold } /* Generic.Prompt */ -.syntax .gs { font-weight: bold } /* Generic.Strong */ -.syntax .gu { color: #66cccc; font-weight: bold } /* Generic.Subheading */ -.syntax .kc { color: #cc99cc } /* Keyword.Constant */ -.syntax .kd { color: #cc99cc } /* Keyword.Declaration */ -.syntax .kn { color: #66cccc } /* Keyword.Namespace */ -.syntax .kp { color: #cc99cc } /* Keyword.Pseudo */ -.syntax .kr { color: #cc99cc } /* Keyword.Reserved */ -.syntax .kt { color: #ffcc66 } /* Keyword.Type */ -.syntax .ld { color: #99cc99 } /* Literal.Date */ -.syntax .m { color: #f99157 } /* Literal.Number */ -.syntax .s { color: #99cc99 } /* Literal.String */ -.syntax .na { color: #6699cc } /* Name.Attribute */ -.syntax .nb { color: #f2f0ec } /* Name.Builtin */ -.syntax .nc { color: #ffcc66 } /* Name.Class */ -.syntax .no { color: #f2777a } /* Name.Constant */ -.syntax .nd { color: #66cccc } /* Name.Decorator */ -.syntax .ni { color: #f2f0ec } /* Name.Entity */ -.syntax .ne { color: #f2777a } /* Name.Exception */ -.syntax .nf { color: #6699cc } /* Name.Function */ -.syntax .nl { color: #f2f0ec } /* Name.Label */ -.syntax .nn { color: #ffcc66 } /* Name.Namespace */ -.syntax .nx { color: #6699cc } /* Name.Other */ -.syntax .py { color: #f2f0ec } /* Name.Property */ -.syntax .nt { color: #66cccc } /* Name.Tag */ -.syntax .nv { color: #f2777a } /* Name.Variable */ -.syntax .ow { color: #66cccc } /* Operator.Word */ -.syntax .w { color: #f2f0ec } /* Text.Whitespace */ -.syntax .mf { color: #f99157 } /* Literal.Number.Float */ -.syntax .mh { color: #f99157 } /* Literal.Number.Hex */ -.syntax .mi { color: #f99157 } /* Literal.Number.Integer */ -.syntax .mo { color: #f99157 } /* Literal.Number.Oct */ -.syntax .sb { color: #99cc99 } /* Literal.String.Backtick */ -.syntax .sc { color: #f2f0ec } /* Literal.String.Char */ -.syntax .sd { color: #747369 } /* Literal.String.Doc */ -.syntax .s2 { color: #99cc99 } /* Literal.String.Double */ -.syntax .se { color: #f99157 } /* Literal.String.Escape */ -.syntax .sh { color: #99cc99 } /* Literal.String.Heredoc */ -.syntax .si { color: #f99157 } /* Literal.String.Interpol */ -.syntax .sx { color: #99cc99 } /* Literal.String.Other */ -.syntax .sr { color: #99cc99 } /* Literal.String.Regex */ -.syntax .s1 { color: #99cc99 } /* Literal.String.Single */ -.syntax .ss { color: #99cc99 } /* Literal.String.Symbol */ -.syntax .bp { color: #f2f0ec } /* Name.Builtin.Pseudo */ -.syntax .vc { color: #f2777a } /* Name.Variable.Class */ -.syntax .vg { color: #f2777a } /* Name.Variable.Global */ -.syntax .vi { color: #f2777a } /* Name.Variable.Instance */ -.syntax .il { color: #f99157 } /* Literal.Number.Integer.Long */ - -} diff --git a/website/css/main.css b/website/css/main.css index cd8ac0a8810..1b4f7c48830 100644 --- a/website/css/main.css +++ b/website/css/main.css @@ -1 +1 @@ -@media screen and (max-width:978.98px){.btn{padding:8px 16px}}@media screen and (max-width:978.98px){.btn-lg{padding:12px 24px}}.btn-primary,.btn-primary:active,.btn-primary:hover{color:#212529}.btn-outline-primary{background:#fffaf0;border-color:#fc0;color:#212529}.btn-outline-primary:active,.btn-outline-primary:hover{background:#fc0;border-color:#fc0;color:#212529}.btn-secondary{border-color:#212529;color:#fff}.btn-outline-secondary,.btn-secondary:active,.btn-secondary:hover{background:#fff;border-color:#212529;color:#212529}.btn-outline-secondary:active,.btn-outline-secondary:hover{background:#212529;border-color:#212529;color:#fff}.btn-tertiary{border-color:#257af4;color:#fff}.btn-tertiary:active,.btn-tertiary:hover{background:#257af4;border-color:#257af4;color:#fff}.btn-outline-tertiary{background:#e3f1fe;color:#257af4}.btn-outline-tertiary:active,.btn-outline-tertiary:hover{background:#257af4;color:#fff}.btns{align-items:center;display:grid;-moz-column-gap:24px;column-gap:24px;row-gap:16px;grid-auto-flow:column;justify-content:center}@media screen and (max-width:767.98px){.btns{grid-auto-flow:row}}.btns.btns-lg{-moz-column-gap:40px;column-gap:40px}.btns.is-2{grid-template-columns:1fr 1fr}@media screen and (max-width:767.98px){.btns.is-2{grid-template-columns:1fr}}.btns.is-3{grid-template-columns:1fr 1fr 1fr}@media screen and (max-width:767.98px){.btns.is-3{grid-template-columns:1fr}}.card{box-shadow:0 8px 20px rgba(108,117,125,.2);overflow:hidden;transition:box-shadow .2s,transform .2s;width:100%}.card,.card-body{position:relative}.card-body{z-index:10}.card.is-large .card-body{padding:40px}.card.bg-primary-light{border-color:#fc0}.card.has-dark-border{border-color:#6c757d}.card.has-pattern:after,.card.has-pattern:before{background-repeat:no-repeat;background-size:auto 100%;bottom:0;content:"";display:block;position:absolute;top:0;width:72px}.card.has-pattern:before{background-image:url(../images/backgrounds/bg-card-pattern-blue-1.png);background-position:0 0;left:0}.card.has-pattern:after{background-image:url(../images/backgrounds/bg-card-pattern-blue-2.png);background-position:100% 0;right:0}.card.has-hover:active,.card.has-hover:hover,a.card:active,a.card:hover{box-shadow:0 12px 32px rgba(108,117,125,.2);transform:translateY(-8px)}.card.has-highlight:after,.card.has-hover:after,a.card:after{content:"";display:block;height:8px;margin-top:auto;transition:background .2s;width:100%}.card.has-highlight:after,.card.has-hover:active:after,.card.has-hover:hover:after,a.card:active:after,a.card:hover:after{background:#e3f1fe}.case-study-cards{-moz-column-gap:40px;column-gap:40px;display:grid;grid-template-columns:1fr;row-gap:40px;padding-bottom:40px;position:relative}.case-study-cards>div{align-items:stretch;display:flex}.case-study-cards:before{background:#d6dbdf;bottom:0;content:"";display:block;left:20px;position:absolute;top:40px;width:100vw}@media screen and (min-width:980px){.case-study-cards{grid-template-columns:repeat(2,minmax(0,1fr));row-gap:80px;padding-bottom:120px}.case-study-cards:before{left:-40px;top:120px}}.case-study-card{align-items:stretch;flex-direction:row;flex-shrink:0;left:0;transition:box-shadow .2s,left .4s,width .4s,z-index 0s;transition-delay:0s,.6s,.6s,0s;width:100%;z-index:2}@media screen and (max-width:979.98px){.case-study-card .row{min-height:0!important}}@media screen and (min-width:980px){.case-study-card:active,.case-study-card:hover{box-shadow:0 12px 32px rgba(108,117,125,.2)}.case-study-card:not(.is-open){cursor:pointer}.case-study-card.is-open{transform:none!important;transition-delay:0s,0s,0s,0s;width:calc(200% + 40px);z-index:10}.case-study-card.is-closing{z-index:10}.case-study-card.open-left.is-open{left:calc(-100% - 40px)}.case-study-card:before{background:no-repeat url(../images/backgrounds/bg-card-pattern-red.png);background-position:100%;background-size:contain;content:"";display:block;height:calc(100% - 80px);max-height:224px;max-width:234px;position:absolute;right:0;top:40px;transform:translateX(30%);transition:transform .4s;transition-delay:.6s;width:100%;z-index:1}}@media screen and (min-width:980px)and (min-width:1240px){.case-study-card:before{transform:translateX(50%)}}@media screen and (min-width:980px){.case-study-card.is-open:before{transform:translateX(70%);transition-delay:0s}}@media screen and (min-width:980px){.case-study-card-wrap{align-items:stretch;display:flex;flex-shrink:0;min-height:304px;position:relative;transition:width .4s;transition-delay:.6s;width:calc(200% + 42px);z-index:2}}@media screen and (min-width:980px){.case-study-card.is-open .case-study-card-wrap{transition-delay:0s;width:100%}}@media screen and (min-width:980px){.case-study-card-body{display:flex;flex-direction:column;padding-right:80px!important}.case-study-card-body>.row{align-self:stretch}}@media screen and (min-width:980px){.case-study-card-toggle{background:#fff;box-shadow:0 8px 20px rgba(108,117,125,.2);border-radius:100%;cursor:pointer;height:56px;position:relative;width:56px}.case-study-card-toggle:after,.case-study-card-toggle:before{background:#257af4;content:"";display:block;height:4px;left:calc(50% - 15px);position:absolute;top:calc(50% - 2px);transition:opacity .2s,transform .2s;width:30px}.case-study-card-toggle:after{transform:rotate(90deg)}}@media screen and (min-width:980px){.case-study-card.is-open .case-study-card-toggle:before{opacity:0;transform:rotate(-90deg)}}@media screen and (min-width:980px){.case-study-card.is-open .case-study-card-toggle:after{transform:rotate(0)}}@media screen and (min-width:980px){.case-study-card .col-lg-3{left:-60%;position:relative;transition:left .4s;transition-delay:.6s}}@media screen and (min-width:980px)and (min-width:980px){.case-study-card .col-lg-3{flex:0 0 250px;max-width:250px;width:250px}}@media screen and (min-width:980px){.case-study-card.is-open .col-lg-3{left:0;transition-delay:0s}}@media screen and (min-width:980px){.case-study-card .col-lg-auto{opacity:0;transform:translateX(24px);transition:opacity .4s,transform .4s;transition-delay:.2s}}@media screen and (min-width:980px)and (min-width:980px){.case-study-card .col-lg-auto{max-width:605px;width:calc(100% - 319px)}}@media screen and (min-width:980px){.case-study-card.is-open .col-lg-auto{opacity:1;transform:none;transition-delay:.2s}}.footer-copy,.footer-links{white-space:nowrap}form .form-group{position:relative}form .form-group.is-select:before{border-left:6px solid transparent;border-right:6px solid transparent;border-top:8px solid #6c757d;content:"";display:block;position:absolute;right:33px;top:calc(50% - 4px);z-index:10}form .form-control{border:1px solid #6c757d;border-radius:6px;height:auto;line-height:20px;min-height:44px;padding:12px 16px;width:100%}form .form-control,form .form-control:focus{box-shadow:0 8px 20px rgba(108,117,125,.2);color:#212529}form .form-control:focus{border-color:#212529}form .form-control::-moz-placeholder{color:#6c757d}form .form-control:-ms-input-placeholder{color:#6c757d}form .form-control::placeholder{color:#6c757d}form select.form-control{-webkit-appearance:none;-moz-appearance:none;appearance:none;padding-right:24px;white-space:pre-wrap}form select.form-control:not([data-chosen]){color:#6c757d}form .btn-secondary:active,form .btn-secondary:hover{color:#212529;background:#fc0;border-color:#fc0}.hero{overflow:visible;position:relative}.hero,.hero-bg{background-repeat:no-repeat;background-position:50%;background-size:cover}.hero-bg{display:block;height:100%;left:50%;position:absolute;top:0;transform:translateX(-50%);z-index:1}.hero>.container{position:relative;z-index:2}.hero.has-offset{margin-bottom:-160px;padding-bottom:160px}.base-hero{height:22.5vw;max-height:324px;min-height:280px}.index-hero{background-image:url(/images/backgrounds/bg-hero-home.svg);height:68vw;max-height:980px}.index-hero,.other-hero{max-width:2448px;width:170vw}.other-hero{background-image:url(/images/backgrounds/bg-hero.svg)}.bg-footer-cta{background-image:url(/images/backgrounds/bg-footer-cta.svg);width:2448px}.quickstart-bg{background-image:url(/images/backgrounds/bg-quick-start.svg);height:40vw;top:220px;width:170vw}hr{background:#f1f6f9;border:0;display:block;height:4px;margin:0;width:100%}hr.is-small{height:2px}hr.is-large{height:8px}hr.is-medium{background:#d6dbdf}hr.is-dark{background:#495057}hr.is-yellow{background:linear-gradient(90deg,#ff8c00,#ff8c00 8px,#fc0 16px,rgba(255,204,0,0));-webkit-clip-path:polygon(8px 100%,0 100%,0 0,8px 0,8px 100%,16px 100%,16px 0,100% 0,100% 100%);clip-path:polygon(8px 100%,0 100%,0 0,8px 0,8px 100%,16px 100%,16px 0,100% 0,100% 100%);height:8px}.icon{display:block;height:48px;margin-bottom:24px;-o-object-fit:contain;object-fit:contain;-o-object-position:center;object-position:center}@media screen and (min-width:576px){.icon{height:64px}}@media screen and (min-width:980px){.icon{height:80px}}img{max-width:100%}.kicker{color:#6c757d;font-family:Hind Siliguri,sans-serif;font-size:.875rem;font-weight:600;letter-spacing:1px;margin:0}@media screen and (max-width:978.98px){.lead{font-size:1.125rem}}.logo{display:block;height:36px;max-width:220px;-o-object-fit:contain;object-fit:contain;-o-object-position:center;object-position:center;width:100%}.navbar-clickhouse{border-bottom:4px solid #f1f6f9;height:142px}.navbar-clickhouse>.container{flex-wrap:wrap}.navbar-super{flex-shrink:0;width:100%}.navbar-super ul{list-style:none}.navbar-super li:not(:last-child){margin-bottom:0;margin-right:24px}.navbar-super a{align-items:center;color:#212529;display:flex;font-size:.875rem}.navbar-super a:active,.navbar-super a:hover{color:#257af4;text-decoration:none}.navbar-super img{flex-shrink:0;margin-right:4px}.navbar-brand-clickhouse{background:no-repeat url(../images/logo-clickhouse.svg);background-size:contain;flex-shrink:0;height:28px;margin-right:48px;padding:0;width:180px}.navbar-nav{align-items:center;height:46px}.navbar .nav-item:not(:last-child){margin-bottom:0;margin-right:24px}.navbar .nav-link{color:#212529}.navbar .nav-link:active,.navbar .nav-link:hover{color:#257af4}.navbar .navbar-nav{flex-direction:row}@media screen and (max-width:978.98px){.navbar>.container{padding-left:20px;padding-right:20px}.navbar .navbar-toggler{height:24px;padding:0;width:24px}.navbar .navbar-toggler:focus{outline:none}.navbar .navbar-toggler-icon{background:no-repeat url(../images/icons/icon-menu.svg);background-position:50%;background-size:contain;height:24px;width:24px}.navbar .navbar-collapse{background:#fff;border-bottom:4px solid #f1f6f9;height:56px;left:0;padding:0 20px 16px;position:absolute;right:0;top:100%}.navbar .nav-link{font-size:.875rem;white-space:nowrap}}@media screen and (max-width:615.98px){.navbar .navbar-collapse{height:auto}.navbar .navbar-nav{flex-direction:column;height:auto}.navbar .nav-item:not(:last-child){margin-bottom:16px;margin-right:0}}@media screen and (max-width:399.98px){.navbar{height:80px}}@media screen and (min-width:616px){.navbar.py-1+div .anchor-fixer :target{scroll-margin-top:62px}}@media screen and (min-width:616px){.navbar.py-2+div .anchor-fixer :target{scroll-margin-top:78px}}@media screen and (min-width:616px){.navbar.py-3+div .anchor-fixer :target{scroll-margin-top:94px}}@media screen and (min-width:616px){.navbar.py-4+div .anchor-fixer :target{scroll-margin-top:110px}}@media screen and (min-width:616px){.navbar.py-5+div .anchor-fixer :target{scroll-margin-top:126px}}@media screen and (min-width:616px){.navbar.py-6+div .anchor-fixer :target{scroll-margin-top:142px}}@media screen and (min-width:616px){.navbar.py-7+div .anchor-fixer :target{scroll-margin-top:158px}}@media screen and (min-width:616px){.navbar.py-8+div .anchor-fixer :target{scroll-margin-top:174px}}@media screen and (max-width:615.98px){.navbar+div .anchor-fixer :target{scroll-margin-top:73px}}@media screen and (max-width:399.98px){.navbar+div .anchor-fixer :target{scroll-margin-top:80px}}.page,.photo-frame{overflow:hidden;width:100%}.photo-frame{background:hsla(0,0%,100%,.6);border-radius:100%;box-shadow:0 8px 20px rgba(108,117,125,.2);display:block;margin-bottom:24px;max-width:160px;position:relative}.photo-frame:before{content:"";display:block;padding-bottom:100%;width:100%}.photo-frame img{display:block;height:100%;left:0;-o-object-fit:contain;object-fit:contain;-o-object-position:center;object-position:center;position:absolute;top:0;width:100%}.pullquote{position:relative;width:70%}.pullquote:before{background:no-repeat url(../images/backgrounds/bg-quotes.svg);background-position:50%;background-size:100%;content:"";mix-blend-mode:multiply;right:56px;width:calc(100% - 16px);z-index:2}.pullquote-bg,.pullquote:before{bottom:0;display:block;position:absolute;top:0}.pullquote-bg{right:0;width:calc(50vw + 28.57143%);z-index:1}.pullquote-body{padding:64px 40px 64px 0;position:relative;z-index:3}.pullquote-quote{font-family:Hind Siliguri,sans-serif;font-size:32px;font-weight:700}.pullquote-citation{font-size:1.125rem}.section{overflow:visible;position:relative}.section,.section-bg{background-repeat:no-repeat;background-position:50%;background-size:cover}.section-bg{display:block;height:100%;left:50%;position:absolute;top:0;transform:translateX(-50%);z-index:1}.section>.container{position:relative;z-index:2}.severity-table th{background:#f1f6f9;font-size:.875rem;padding:8px 16px}.severity-table td{border-top:1px solid #d6dbdf;padding:16px}.social-icons{align-items:center;display:flex}.social-icons>a{aspect-ratio:24/24;background:#6c757d;display:block;height:24px;width:24px;-webkit-mask-position:center;mask-position:center;-webkit-mask-repeat:no-repeat;mask-repeat:no-repeat;-webkit-mask-size:contain;mask-size:contain;transition:background .2s}.social-icons>a:active,.social-icons>a:hover{background:#212529}.social-icons>a+a{margin-left:32px}.social-icons-facebook{-webkit-mask-image:url(/images/icons/icon-facebook-gray.svg);mask-image:url(/images/icons/icon-facebook-gray.svg)}.social-icons-twitter{-webkit-mask-image:url(/images/icons/icon-twitter-gray.svg);mask-image:url(/images/icons/icon-twitter-gray.svg);width:31px}.social-icons-linkedin{-webkit-mask-image:url(/images/icons/icon-linkedin-gray.svg);mask-image:url(/images/icons/icon-linkedin-gray.svg)}.social-icons-linkedin-alt{-webkit-mask-image:url(/images/icons/icon-linkedin-alt-gray.svg);mask-image:url(/images/icons/icon-linkedin-alt-gray.svg)}.social-icons.size-small>a{height:20px;width:20px}.social-icons.size-small>a:active,.social-icons.size-small>a:hover{background:#212529}.social-icons.size-small>a+a{margin-left:16px}.tabs{position:relative}.tabs:before{background:#fff;border-radius:7px 7px 0 0;content:"";display:block;height:8px;left:1px;position:absolute;right:1px;top:68px;z-index:10}@media screen and (min-width:1240px){.tabs:before{top:76px}}.tabs-body{background:#fff;border-radius:8px;border:1px solid #6c757d;box-shadow:0 8px 20px rgba(108,117,125,.2);padding:24px}@media screen and (min-width:980px){.tabs-body{padding:32px}}@media screen and (min-width:1240px){.tabs-body{padding:40px}}.tabs .nav-tabs{border-bottom:0;flex-wrap:nowrap;height:76px;margin:-20px -20px -9px;-webkit-mask-image:linear-gradient(90deg,transparent,#000 20px,#000 calc(100% - 20px),transparent);mask-image:linear-gradient(90deg,transparent,#000 20px,#000 calc(100% - 20px),transparent);overflow:scroll;overflow-x:scroll;overflow-y:visible;padding:20px 20px 0;position:relative}@media screen and (min-width:940px){.tabs .nav-tabs{overflow:visible}}@media screen and (min-width:1240px){.tabs .nav-tabs{height:84px}}.tabs .nav-link{align-items:center;border-bottom:0;color:#6c757d;display:flex;font-size:.875rem;flex-shrink:0;height:56px;justify-content:center;padding:0 12px 8px;text-align:center;white-space:nowrap}@media screen and (min-width:1240px){.tabs .nav-link{height:64px;padding:0 16px 8px}}.tabs .nav-link.active{background:#fff;box-shadow:0 -4px 8px rgba(108,117,125,.1);font-weight:700;padding:0 16px 8px}@media screen and (min-width:980px){.tabs .nav-link.active{padding:0 24px 8px}}@media screen and (min-width:1240px){.tabs .nav-link.active{padding:0 32px 8px}}.tab-pane pre{background:#212529;border-radius:16px;color:#fff;padding:24px 16px}@media screen and (min-width:1240px){.tab-pane pre{padding:32px 24px}}.trailing-link{align-items:center;color:#212529;display:flex;font-size:.875rem;font-weight:700}.trailing-link:after{background:no-repeat url(../images/icons/icon-arrow.svg);background-position:100%;background-size:contain;content:"";display:block;height:12px;transition:transform .2s;width:20px}.trailing-link:active,.trailing-link:hover{color:#212529;text-decoration:none}.trailing-link:active:after,.trailing-link:hover:after{transform:translateX(8px)}.trailing-link.span-full:after{margin-left:auto}ul{list-style-type:square;padding-left:1.25em}ul li:not(:last-child){margin-bottom:16px}ul li::marker{color:#ff3939}ul.has-separators{list-style:none;padding:0}ul.has-separators li:not(:last-child){border-bottom:4px solid #f1f6f9;margin-bottom:24px;padding-bottom:24px}.bg-gradient-secondary{background-image:linear-gradient(58deg,#ff6443 3%,#fe561d 24%,#e32f0d 93%)}.bg-gradient-light-orange{background-image:linear-gradient(90deg,rgba(255,203,128,0),#ffcb80)}.bg-offset-right{bottom:0;left:-24px;position:absolute;top:0;width:calc(100vw + 24px);z-index:-1}@media screen and (min-width:1240px){.bg-offset-right{left:-96px;width:calc(100vw + 96px)}}.bg-inset-right{bottom:0;left:40px;position:absolute;top:0;width:calc(100vw - 40px);z-index:-1}@media screen and (min-width:980px){.bg-inset-right{left:96px;width:calc(100vw - 96px)}}.has-border-left{border-left:8px solid #f1f6f9;padding-left:16px}.font-xl{font-size:1.25rem}.font-lg{font-size:1.125rem}.font-sm{font-size:.875rem}.font-xs{font-size:.625rem}.font-weight-semibold{font-weight:600}.display-5{color:#212529;font-size:20px;font-weight:500}.display-6{color:#212529;font-size:14px;font-weight:700}.overflow-auto{overflow:auto}.text-decoration-underline{text-decoration:underline}.text-upper{text-transform:uppercase} \ No newline at end of file +@media screen and (max-width:978.98px){.btn{padding:8px 16px}}@media screen and (max-width:978.98px){.btn-lg{padding:12px 24px}}.btn-primary,.btn-primary:active,.btn-primary:hover{color:#212529}.btn-outline-primary{background:#fffaf0;border-color:#fc0;color:#212529}.btn-outline-primary:active,.btn-outline-primary:hover{background:#fc0;border-color:#fc0;color:#212529}.btn-secondary{border-color:#212529;color:#fff}.btn-outline-secondary,.btn-secondary:active,.btn-secondary:hover{background:#fff;border-color:#212529;color:#212529}.btn-outline-secondary:active,.btn-outline-secondary:hover{background:#212529;border-color:#212529;color:#fff}.btn-tertiary{border-color:#257af4;color:#fff}.btn-tertiary:active,.btn-tertiary:hover{background:#257af4;border-color:#257af4;color:#fff}.btn-outline-tertiary{background:#e3f1fe;color:#257af4}.btn-outline-tertiary:active,.btn-outline-tertiary:hover{background:#257af4;color:#fff}.btns{align-items:center;display:grid;-moz-column-gap:24px;column-gap:24px;row-gap:16px;grid-auto-flow:column;justify-content:center}@media screen and (max-width:767.98px){.btns{grid-auto-flow:row}}.btns.btns-lg{-moz-column-gap:40px;column-gap:40px}.btns.is-2{grid-template-columns:1fr 1fr}@media screen and (max-width:767.98px){.btns.is-2{grid-template-columns:1fr}}.btns.is-3{grid-template-columns:1fr 1fr 1fr}@media screen and (max-width:767.98px){.btns.is-3{grid-template-columns:1fr}}.card{box-shadow:0 8px 20px rgba(108,117,125,.2);overflow:hidden;transition:box-shadow .2s,transform .2s;width:100%}.card,.card-body{position:relative}.card-body{z-index:10}.card.is-large .card-body{padding:40px}.card.bg-primary-light{border-color:#fc0}.card.has-dark-border{border-color:#6c757d}.card.has-pattern:after,.card.has-pattern:before{background-repeat:no-repeat;background-size:auto 100%;bottom:0;content:"";display:block;position:absolute;top:0;width:72px}.card.has-pattern:before{background-image:url(../images/backgrounds/bg-card-pattern-blue-1.png);background-position:0 0;left:0}.card.has-pattern:after{background-image:url(../images/backgrounds/bg-card-pattern-blue-2.png);background-position:100% 0;right:0}.card.has-hover:active,.card.has-hover:hover,a.card:active,a.card:hover{box-shadow:0 12px 32px rgba(108,117,125,.2);transform:translateY(-8px)}.card.has-highlight:after,.card.has-hover:after,a.card:after{content:"";display:block;height:8px;margin-top:auto;transition:background .2s;width:100%}.card.has-highlight:after,.card.has-hover:active:after,.card.has-hover:hover:after,a.card:active:after,a.card:hover:after{background:#e3f1fe}.case-study-cards{-moz-column-gap:40px;column-gap:40px;display:grid;grid-template-columns:1fr;row-gap:40px;padding-bottom:40px;position:relative}.case-study-cards>div{align-items:stretch;display:flex}.case-study-cards:before{background:#d6dbdf;bottom:0;content:"";display:block;left:20px;position:absolute;top:40px;width:100vw}@media screen and (min-width:980px){.case-study-cards{grid-template-columns:repeat(2,minmax(0,1fr));row-gap:80px;padding-bottom:120px}.case-study-cards:before{left:-40px;top:120px}}.case-study-card{align-items:stretch;flex-direction:row;flex-shrink:0;left:0;transition:box-shadow .2s,left .4s,width .4s,z-index 0s;transition-delay:0s,.6s,.6s,0s;width:100%;z-index:2}@media screen and (max-width:979.98px){.case-study-card .row{min-height:0!important}}@media screen and (min-width:980px){.case-study-card:active,.case-study-card:hover{box-shadow:0 12px 32px rgba(108,117,125,.2)}.case-study-card:not(.is-open){cursor:pointer}.case-study-card.is-open{transform:none!important;transition-delay:0s,0s,0s,0s;width:calc(200% + 40px);z-index:10}.case-study-card.is-closing{z-index:10}.case-study-card.open-left.is-open{left:calc(-100% - 40px)}.case-study-card:before{background:no-repeat url(../images/backgrounds/bg-card-pattern-red.png);background-position:100%;background-size:contain;content:"";display:block;height:calc(100% - 80px);max-height:224px;max-width:234px;position:absolute;right:0;top:40px;transform:translateX(30%);transition:transform .4s;transition-delay:.6s;width:100%;z-index:1}}@media screen and (min-width:980px)and (min-width:1240px){.case-study-card:before{transform:translateX(50%)}}@media screen and (min-width:980px){.case-study-card.is-open:before{transform:translateX(70%);transition-delay:0s}}@media screen and (min-width:980px){.case-study-card-wrap{align-items:stretch;display:flex;flex-shrink:0;min-height:304px;position:relative;transition:width .4s;transition-delay:.6s;width:calc(200% + 42px);z-index:2}}@media screen and (min-width:980px){.case-study-card.is-open .case-study-card-wrap{transition-delay:0s;width:100%}}@media screen and (min-width:980px){.case-study-card-body{display:flex;flex-direction:column;padding-right:80px!important}.case-study-card-body>.row{align-self:stretch}}@media screen and (min-width:980px){.case-study-card-toggle{background:#fff;box-shadow:0 8px 20px rgba(108,117,125,.2);border-radius:100%;cursor:pointer;height:56px;position:relative;width:56px}.case-study-card-toggle:after,.case-study-card-toggle:before{background:#257af4;content:"";display:block;height:4px;left:calc(50% - 15px);position:absolute;top:calc(50% - 2px);transition:opacity .2s,transform .2s;width:30px}.case-study-card-toggle:after{transform:rotate(90deg)}}@media screen and (min-width:980px){.case-study-card.is-open .case-study-card-toggle:before{opacity:0;transform:rotate(-90deg)}}@media screen and (min-width:980px){.case-study-card.is-open .case-study-card-toggle:after{transform:rotate(0)}}@media screen and (min-width:980px){.case-study-card .col-lg-3{left:-60%;position:relative;transition:left .4s;transition-delay:.6s}}@media screen and (min-width:980px)and (min-width:980px){.case-study-card .col-lg-3{flex:0 0 250px;max-width:250px;width:250px}}@media screen and (min-width:980px){.case-study-card.is-open .col-lg-3{left:0;transition-delay:0s}}@media screen and (min-width:980px){.case-study-card .col-lg-auto{opacity:0;transform:translateX(24px);transition:opacity .4s,transform .4s;transition-delay:.2s}}@media screen and (min-width:980px)and (min-width:980px){.case-study-card .col-lg-auto{max-width:605px;width:calc(100% - 319px)}}@media screen and (min-width:980px){.case-study-card.is-open .col-lg-auto{opacity:1;transform:none;transition-delay:.2s}}.footer-copy,.footer-links{white-space:nowrap}form .form-group{position:relative}form .form-group.is-select:before{border-left:6px solid transparent;border-right:6px solid transparent;border-top:8px solid #6c757d;content:"";display:block;position:absolute;right:33px;top:calc(50% - 4px);z-index:10}form .form-control{border:1px solid #6c757d;border-radius:6px;height:auto;line-height:20px;min-height:44px;padding:12px 16px;width:100%}form .form-control,form .form-control:focus{box-shadow:0 8px 20px rgba(108,117,125,.2);color:#212529}form .form-control:focus{border-color:#212529}form .form-control::-moz-placeholder{color:#6c757d}form .form-control:-ms-input-placeholder{color:#6c757d}form .form-control::placeholder{color:#6c757d}form select.form-control{-webkit-appearance:none;-moz-appearance:none;appearance:none;padding-right:24px;white-space:pre-wrap}form select.form-control:not([data-chosen]){color:#6c757d}form .btn-secondary:active,form .btn-secondary:hover{color:#212529;background:#fc0;border-color:#fc0}.hero{overflow:visible;position:relative}.hero,.hero-bg{background-repeat:no-repeat;background-position:50%;background-size:cover}.hero-bg{display:block;height:100%;left:50%;position:absolute;top:0;transform:translateX(-50%);z-index:1}.hero>.container{position:relative;z-index:2}.hero.has-offset{margin-bottom:-160px;padding-bottom:160px}.base-hero{height:22.5vw;max-height:324px;min-height:280px}.index-hero{background-image:url(/images/backgrounds/bg-hero-home.svg);height:68vw;max-height:980px}.index-hero,.other-hero{max-width:2448px;width:170vw}.other-hero{background-image:url(/images/backgrounds/bg-hero.svg)}.bg-footer-cta{background-image:url(/images/backgrounds/bg-footer-cta.svg);width:2448px}.quickstart-bg{background-image:url(/images/backgrounds/bg-quick-start.svg);height:40vw;top:220px;width:170vw}hr{background:#f1f6f9;border:0;display:block;height:4px;margin:0;width:100%}hr.is-small{height:2px}hr.is-large{height:8px}hr.is-medium{background:#d6dbdf}hr.is-dark{background:#495057}hr.is-yellow{background:linear-gradient(90deg,#ff8c00,#ff8c00 8px,#fc0 16px,rgba(255,204,0,0));-webkit-clip-path:polygon(8px 100%,0 100%,0 0,8px 0,8px 100%,16px 100%,16px 0,100% 0,100% 100%);clip-path:polygon(8px 100%,0 100%,0 0,8px 0,8px 100%,16px 100%,16px 0,100% 0,100% 100%);height:8px}.icon{display:block;height:48px;margin-bottom:24px;-o-object-fit:contain;object-fit:contain;-o-object-position:center;object-position:center}@media screen and (min-width:576px){.icon{height:64px}}@media screen and (min-width:980px){.icon{height:80px}}img{max-width:100%}.kicker{color:#6c757d;font-family:Hind Siliguri,sans-serif;font-size:.875rem;font-weight:600;letter-spacing:1px;margin:0}@media screen and (max-width:978.98px){.lead{font-size:1.125rem}}.logo{display:block;height:36px;max-width:220px;-o-object-fit:contain;object-fit:contain;-o-object-position:center;object-position:center;width:100%}.navbar-clickhouse{border-bottom:4px solid #f1f6f9;height:142px}.navbar-clickhouse>.container{flex-wrap:wrap}.navbar-super{flex-shrink:0;width:100%}.navbar-super ul{list-style:none}.navbar-super li:not(:last-child){margin-bottom:0;margin-right:24px}.navbar-super a{align-items:center;color:#212529;display:flex;font-size:.875rem}.navbar-super a:active,.navbar-super a:hover{color:#257af4;text-decoration:none}.navbar-super img{flex-shrink:0;margin-right:4px}.navbar-brand-clickhouse{background:no-repeat url(../images/logo-clickhouse.svg);background-size:contain;flex-shrink:0;height:28px;margin-right:48px;padding:0;width:180px}.navbar-nav{align-items:center;height:46px}.navbar .nav-item:not(:last-child){margin-bottom:0;margin-right:24px}.navbar .nav-link{color:#212529}.navbar .nav-link:active,.navbar .nav-link:hover{color:#257af4}.navbar .navbar-nav{flex-direction:row}@media screen and (max-width:978.98px){.navbar>.container{padding-left:20px;padding-right:20px}.navbar .navbar-toggler{height:24px;padding:0;width:24px}.navbar .navbar-toggler:focus{outline:none}.navbar .navbar-toggler-icon{background:no-repeat url(../images/icons/icon-menu.svg);background-position:50%;background-size:contain;height:24px;width:24px}.navbar .navbar-collapse{background:#fff;border-bottom:4px solid #f1f6f9;height:56px;left:0;padding:0 20px 16px;position:absolute;right:0;top:100%}.navbar .nav-link{font-size:.875rem;white-space:nowrap}}@media screen and (max-width:615.98px){.navbar .navbar-collapse{height:auto}.navbar .navbar-nav{flex-direction:column;height:auto}.navbar .nav-item:not(:last-child){margin-bottom:16px;margin-right:0}}@media screen and (max-width:399.98px){.navbar{height:80px}}@media screen and (min-width:616px){.navbar.py-1+div .anchor-fixer :target{scroll-margin-top:62px}}@media screen and (min-width:616px){.navbar.py-2+div .anchor-fixer :target{scroll-margin-top:78px}}@media screen and (min-width:616px){.navbar.py-3+div .anchor-fixer :target{scroll-margin-top:94px}}@media screen and (min-width:616px){.navbar.py-4+div .anchor-fixer :target{scroll-margin-top:110px}}@media screen and (min-width:616px){.navbar.py-5+div .anchor-fixer :target{scroll-margin-top:126px}}@media screen and (min-width:616px){.navbar.py-6+div .anchor-fixer :target{scroll-margin-top:142px}}@media screen and (min-width:616px){.navbar.py-7+div .anchor-fixer :target{scroll-margin-top:158px}}@media screen and (min-width:616px){.navbar.py-8+div .anchor-fixer :target{scroll-margin-top:174px}}@media screen and (max-width:615.98px){.navbar+div .anchor-fixer :target{scroll-margin-top:73px}}@media screen and (max-width:399.98px){.navbar+div .anchor-fixer :target{scroll-margin-top:80px}}.photo-frame{background:hsla(0,0%,100%,.6);border-radius:100%;box-shadow:0 8px 20px rgba(108,117,125,.2);display:block;margin-bottom:24px;max-width:160px;position:relative}.photo-frame:before{content:"";display:block;padding-bottom:100%;width:100%}.photo-frame img{display:block;height:100%;left:0;-o-object-fit:contain;object-fit:contain;-o-object-position:center;object-position:center;position:absolute;top:0;width:100%}.pullquote{position:relative;width:70%}.pullquote:before{background:no-repeat url(../images/backgrounds/bg-quotes.svg);background-position:50%;background-size:100%;content:"";mix-blend-mode:multiply;right:56px;width:calc(100% - 16px);z-index:2}.pullquote-bg,.pullquote:before{bottom:0;display:block;position:absolute;top:0}.pullquote-bg{right:0;width:calc(50vw + 28.57143%);z-index:1}.pullquote-body{padding:64px 40px 64px 0;position:relative;z-index:3}.pullquote-quote{font-family:Hind Siliguri,sans-serif;font-size:32px;font-weight:700}.pullquote-citation{font-size:1.125rem}.section{overflow:visible;position:relative}.section,.section-bg{background-repeat:no-repeat;background-position:50%;background-size:cover}.section-bg{display:block;height:100%;left:50%;position:absolute;top:0;transform:translateX(-50%);z-index:1}.section>.container{position:relative;z-index:2}.severity-table th{background:#f1f6f9;font-size:.875rem;padding:8px 16px}.severity-table td{border-top:1px solid #d6dbdf;padding:16px}.social-icons{align-items:center;display:flex}.social-icons>a{aspect-ratio:24/24;background:#6c757d;display:block;height:24px;width:24px;-webkit-mask-position:center;mask-position:center;-webkit-mask-repeat:no-repeat;mask-repeat:no-repeat;-webkit-mask-size:contain;mask-size:contain;transition:background .2s}.social-icons>a:active,.social-icons>a:hover{background:#212529}.social-icons>a+a{margin-left:32px}.social-icons-facebook{-webkit-mask-image:url(/images/icons/icon-facebook-gray.svg);mask-image:url(/images/icons/icon-facebook-gray.svg)}.social-icons-twitter{-webkit-mask-image:url(/images/icons/icon-twitter-gray.svg);mask-image:url(/images/icons/icon-twitter-gray.svg);width:31px}.social-icons-linkedin{-webkit-mask-image:url(/images/icons/icon-linkedin-gray.svg);mask-image:url(/images/icons/icon-linkedin-gray.svg)}.social-icons-linkedin-alt{-webkit-mask-image:url(/images/icons/icon-linkedin-alt-gray.svg);mask-image:url(/images/icons/icon-linkedin-alt-gray.svg)}.social-icons.size-small>a{height:20px;width:20px}.social-icons.size-small>a:active,.social-icons.size-small>a:hover{background:#212529}.social-icons.size-small>a+a{margin-left:16px}.tabs{position:relative}.tabs:before{background:#fff;border-radius:7px 7px 0 0;content:"";display:block;height:8px;left:1px;position:absolute;right:1px;top:68px;z-index:10}@media screen and (min-width:1240px){.tabs:before{top:76px}}.tabs-body{background:#fff;border-radius:8px;border:1px solid #6c757d;box-shadow:0 8px 20px rgba(108,117,125,.2);padding:24px}@media screen and (min-width:980px){.tabs-body{padding:32px}}@media screen and (min-width:1240px){.tabs-body{padding:40px}}.tabs .nav-tabs{border-bottom:0;flex-wrap:nowrap;height:76px;margin:-20px -20px -9px;-webkit-mask-image:linear-gradient(90deg,transparent,#000 20px,#000 calc(100% - 20px),transparent);mask-image:linear-gradient(90deg,transparent,#000 20px,#000 calc(100% - 20px),transparent);overflow:scroll;overflow-x:scroll;overflow-y:visible;padding:20px 20px 0;position:relative}@media screen and (min-width:940px){.tabs .nav-tabs{overflow:visible}}@media screen and (min-width:1240px){.tabs .nav-tabs{height:84px}}.tabs .nav-link{align-items:center;border-bottom:0;color:#6c757d;display:flex;font-size:.875rem;flex-shrink:0;height:56px;justify-content:center;padding:0 12px 8px;text-align:center;white-space:nowrap}@media screen and (min-width:1240px){.tabs .nav-link{height:64px;padding:0 16px 8px}}.tabs .nav-link.active{background:#fff;box-shadow:0 -4px 8px rgba(108,117,125,.1);font-weight:700;padding:0 16px 8px}@media screen and (min-width:980px){.tabs .nav-link.active{padding:0 24px 8px}}@media screen and (min-width:1240px){.tabs .nav-link.active{padding:0 32px 8px}}.tab-pane pre{background:#212529;border-radius:16px;color:#fff;padding:24px 16px}@media screen and (min-width:1240px){.tab-pane pre{padding:32px 24px}}.trailing-link{align-items:center;color:#212529;display:flex;font-size:.875rem;font-weight:700}.trailing-link:after{background:no-repeat url(../images/icons/icon-arrow.svg);background-position:100%;background-size:contain;content:"";display:block;height:12px;transition:transform .2s;width:20px}.trailing-link:active,.trailing-link:hover{color:#212529;text-decoration:none}.trailing-link:active:after,.trailing-link:hover:after{transform:translateX(8px)}.trailing-link.span-full:after{margin-left:auto}ul{list-style-type:square;padding-left:1.25em}ul li:not(:last-child){margin-bottom:16px}ul li::marker{color:#ff3939}ul.has-separators{list-style:none;padding:0}ul.has-separators li:not(:last-child){border-bottom:4px solid #f1f6f9;margin-bottom:24px;padding-bottom:24px}.bg-gradient-secondary{background-image:linear-gradient(58deg,#ff6443 3%,#fe561d 24%,#e32f0d 93%)}.bg-gradient-light-orange{background-image:linear-gradient(90deg,rgba(255,203,128,0),#ffcb80)}.bg-offset-right{bottom:0;left:-24px;position:absolute;top:0;width:calc(100vw + 24px);z-index:-1}@media screen and (min-width:1240px){.bg-offset-right{left:-96px;width:calc(100vw + 96px)}}.bg-inset-right{bottom:0;left:40px;position:absolute;top:0;width:calc(100vw - 40px);z-index:-1}@media screen and (min-width:980px){.bg-inset-right{left:96px;width:calc(100vw - 96px)}}.has-border-left{border-left:8px solid #f1f6f9;padding-left:16px}.font-xl{font-size:1.25rem}.font-lg{font-size:1.125rem}.font-sm{font-size:.875rem}.font-xs{font-size:.625rem}.font-weight-semibold{font-weight:600}.display-5{color:#212529;font-size:20px;font-weight:500}.display-6{color:#212529;font-size:14px;font-weight:700}.overflow-auto{overflow:auto}.text-decoration-underline{text-decoration:underline}.text-upper{text-transform:uppercase} \ No newline at end of file